...

Text file src/internal/bytealg/equal_arm64.s

Documentation: internal/bytealg

     1// Copyright 2018 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5#include "go_asm.h"
     6#include "textflag.h"
     7
     8// memequal_varlen(a, b unsafe.Pointer) bool
     9TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT,$0-17
    10	MOVD	8(R26), R2    // compiler stores size at offset 8 in the closure
    11	CBZ	R2, eq
    12	B	runtime·memequal<ABIInternal>(SB)
    13eq:
    14	MOVD	$1, R0
    15	RET
    16
    17// input:
    18// R0: pointer a
    19// R1: pointer b
    20// R2: data len
    21// at return: result in R0
    22// memequal(a, b unsafe.Pointer, size uintptr) bool
    23TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25
    24	// short path to handle 0-byte case
    25	CBZ     R2, equal
    26	// short path to handle equal pointers
    27	CMP     R0, R1
    28	BEQ     equal
    29	CMP	$1, R2
    30	// handle 1-byte special case for better performance
    31	BEQ	one
    32	CMP	$16, R2
    33	// handle specially if length < 16
    34	BLO	tail
    35	BIC	$0x3f, R2, R3
    36	CBZ	R3, chunk16
    37	// work with 64-byte chunks
    38	ADD	R3, R0, R6	// end of chunks
    39chunk64_loop:
    40	VLD1.P	(R0), [V0.D2, V1.D2, V2.D2, V3.D2]
    41	VLD1.P	(R1), [V4.D2, V5.D2, V6.D2, V7.D2]
    42	VCMEQ	V0.D2, V4.D2, V8.D2
    43	VCMEQ	V1.D2, V5.D2, V9.D2
    44	VCMEQ	V2.D2, V6.D2, V10.D2
    45	VCMEQ	V3.D2, V7.D2, V11.D2
    46	VAND	V8.B16, V9.B16, V8.B16
    47	VAND	V8.B16, V10.B16, V8.B16
    48	VAND	V8.B16, V11.B16, V8.B16
    49	CMP	R0, R6
    50	VMOV	V8.D[0], R4
    51	VMOV	V8.D[1], R5
    52	CBZ	R4, not_equal
    53	CBZ	R5, not_equal
    54	BNE	chunk64_loop
    55	AND	$0x3f, R2, R2
    56	CBZ	R2, equal
    57chunk16:
    58	// work with 16-byte chunks
    59	BIC	$0xf, R2, R3
    60	CBZ	R3, tail
    61	ADD	R3, R0, R6	// end of chunks
    62chunk16_loop:
    63	LDP.P	16(R0), (R4, R5)
    64	LDP.P	16(R1), (R7, R9)
    65	EOR	R4, R7
    66	CBNZ	R7, not_equal
    67	EOR	R5, R9
    68	CBNZ	R9, not_equal
    69	CMP	R0, R6
    70	BNE	chunk16_loop
    71	AND	$0xf, R2, R2
    72	CBZ	R2, equal
    73tail:
    74	// special compare of tail with length < 16
    75	TBZ	$3, R2, lt_8
    76	MOVD	(R0), R4
    77	MOVD	(R1), R5
    78	EOR	R4, R5
    79	CBNZ	R5, not_equal
    80	SUB	$8, R2, R6	// offset of the last 8 bytes
    81	MOVD	(R0)(R6), R4
    82	MOVD	(R1)(R6), R5
    83	EOR	R4, R5
    84	CBNZ	R5, not_equal
    85	B	equal
    86	PCALIGN	$16
    87lt_8:
    88	TBZ	$2, R2, lt_4
    89	MOVWU	(R0), R4
    90	MOVWU	(R1), R5
    91	EOR	R4, R5
    92	CBNZ	R5, not_equal
    93	SUB	$4, R2, R6	// offset of the last 4 bytes
    94	MOVWU	(R0)(R6), R4
    95	MOVWU	(R1)(R6), R5
    96	EOR	R4, R5
    97	CBNZ	R5, not_equal
    98	B	equal
    99	PCALIGN	$16
   100lt_4:
   101	TBZ	$1, R2, lt_2
   102	MOVHU.P	2(R0), R4
   103	MOVHU.P	2(R1), R5
   104	CMP	R4, R5
   105	BNE	not_equal
   106lt_2:
   107	TBZ	$0, R2, equal
   108one:
   109	MOVBU	(R0), R4
   110	MOVBU	(R1), R5
   111	CMP	R4, R5
   112	BNE	not_equal
   113equal:
   114	MOVD	$1, R0
   115	RET
   116not_equal:
   117	MOVB	ZR, R0
   118	RET

View as plain text