...

Text file src/internal/bytealg/equal_arm64.s

Documentation: internal/bytealg

     1// Copyright 2018 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5#include "go_asm.h"
     6#include "textflag.h"
     7
     8// memequal(a, b unsafe.Pointer, size uintptr) bool
     9TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25
    10	// short path to handle 0-byte case
    11	CBZ	R2, equal
    12	// short path to handle equal pointers
    13	CMP	R0, R1
    14	BEQ	equal
    15	B	memeqbody<>(SB)
    16equal:
    17	MOVD	$1, R0
    18	RET
    19
    20// memequal_varlen(a, b unsafe.Pointer) bool
    21TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT,$0-17
    22	CMP	R0, R1
    23	BEQ	eq
    24	MOVD	8(R26), R2    // compiler stores size at offset 8 in the closure
    25	CBZ	R2, eq
    26	B	memeqbody<>(SB)
    27eq:
    28	MOVD	$1, R0
    29	RET
    30
    31// input:
    32// R0: pointer a
    33// R1: pointer b
    34// R2: data len
    35// at return: result in R0
    36TEXT memeqbody<>(SB),NOSPLIT,$0
    37	CMP	$1, R2
    38	// handle 1-byte special case for better performance
    39	BEQ	one
    40	CMP	$16, R2
    41	// handle specially if length < 16
    42	BLO	tail
    43	BIC	$0x3f, R2, R3
    44	CBZ	R3, chunk16
    45	// work with 64-byte chunks
    46	ADD	R3, R0, R6	// end of chunks
    47chunk64_loop:
    48	VLD1.P	(R0), [V0.D2, V1.D2, V2.D2, V3.D2]
    49	VLD1.P	(R1), [V4.D2, V5.D2, V6.D2, V7.D2]
    50	VCMEQ	V0.D2, V4.D2, V8.D2
    51	VCMEQ	V1.D2, V5.D2, V9.D2
    52	VCMEQ	V2.D2, V6.D2, V10.D2
    53	VCMEQ	V3.D2, V7.D2, V11.D2
    54	VAND	V8.B16, V9.B16, V8.B16
    55	VAND	V8.B16, V10.B16, V8.B16
    56	VAND	V8.B16, V11.B16, V8.B16
    57	CMP	R0, R6
    58	VMOV	V8.D[0], R4
    59	VMOV	V8.D[1], R5
    60	CBZ	R4, not_equal
    61	CBZ	R5, not_equal
    62	BNE	chunk64_loop
    63	AND	$0x3f, R2, R2
    64	CBZ	R2, equal
    65chunk16:
    66	// work with 16-byte chunks
    67	BIC	$0xf, R2, R3
    68	CBZ	R3, tail
    69	ADD	R3, R0, R6	// end of chunks
    70chunk16_loop:
    71	LDP.P	16(R0), (R4, R5)
    72	LDP.P	16(R1), (R7, R9)
    73	EOR	R4, R7
    74	CBNZ	R7, not_equal
    75	EOR	R5, R9
    76	CBNZ	R9, not_equal
    77	CMP	R0, R6
    78	BNE	chunk16_loop
    79	AND	$0xf, R2, R2
    80	CBZ	R2, equal
    81tail:
    82	// special compare of tail with length < 16
    83	TBZ	$3, R2, lt_8
    84	MOVD	(R0), R4
    85	MOVD	(R1), R5
    86	EOR	R4, R5
    87	CBNZ	R5, not_equal
    88	SUB	$8, R2, R6	// offset of the last 8 bytes
    89	MOVD	(R0)(R6), R4
    90	MOVD	(R1)(R6), R5
    91	EOR	R4, R5
    92	CBNZ	R5, not_equal
    93	B	equal
    94lt_8:
    95	TBZ	$2, R2, lt_4
    96	MOVWU	(R0), R4
    97	MOVWU	(R1), R5
    98	EOR	R4, R5
    99	CBNZ	R5, not_equal
   100	SUB	$4, R2, R6	// offset of the last 4 bytes
   101	MOVWU	(R0)(R6), R4
   102	MOVWU	(R1)(R6), R5
   103	EOR	R4, R5
   104	CBNZ	R5, not_equal
   105	B	equal
   106lt_4:
   107	TBZ	$1, R2, lt_2
   108	MOVHU.P	2(R0), R4
   109	MOVHU.P	2(R1), R5
   110	CMP	R4, R5
   111	BNE	not_equal
   112lt_2:
   113	TBZ	$0, R2, equal
   114one:
   115	MOVBU	(R0), R4
   116	MOVBU	(R1), R5
   117	CMP	R4, R5
   118	BNE	not_equal
   119equal:
   120	MOVD	$1, R0
   121	RET
   122not_equal:
   123	MOVB	ZR, R0
   124	RET

View as plain text