...

Text file src/internal/bytealg/index_arm64.s

Documentation: internal/bytealg

     1// Copyright 2018 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5#include "go_asm.h"
     6#include "textflag.h"
     7
     8// func Index(a, b []byte) int
     9// input:
    10//   R0: a ptr (haystack)
    11//   R1: a len (haystack)
    12//   R2: a cap (haystack) (unused)
    13//   R3: b ptr (needle)
    14//   R4: b len (needle) (2 <= len <= 32)
    15//   R5: b cap (needle) (unused)
    16// return:
    17//   R0: result
    18TEXT ·Index<ABIInternal>(SB),NOSPLIT,$0-56
    19	MOVD	R3, R2
    20	MOVD	R4, R3
    21	B	·IndexString<ABIInternal>(SB)
    22
    23// func IndexString(a, b string) int
    24// input:
    25//   R0: a ptr (haystack)
    26//   R1: a len (haystack)
    27//   R2: b ptr (needle)
    28//   R3: b len (needle) (2 <= len <= 32)
    29// return:
    30//   R0: result
    31TEXT ·IndexString<ABIInternal>(SB),NOSPLIT,$0-40
    32	// main idea is to load 'sep' into separate register(s)
    33	// to avoid repeatedly re-load it again and again
    34	// for sebsequent substring comparisons
    35	SUB	R3, R1, R4
    36	// R4 contains the start of last substring for comparison
    37	ADD	R0, R4, R4
    38	ADD	$1, R0, R8
    39
    40	CMP	$8, R3
    41	BHI	greater_8
    42	TBZ	$3, R3, len_2_7
    43len_8:
    44	// R5 contains 8-byte of sep
    45	MOVD	(R2), R5
    46loop_8:
    47	// R6 contains substring for comparison
    48	CMP	R4, R0
    49	BHI	not_found
    50	MOVD.P	1(R0), R6
    51	CMP	R5, R6
    52	BNE	loop_8
    53	B	found
    54len_2_7:
    55	TBZ	$2, R3, len_2_3
    56	TBZ	$1, R3, len_4_5
    57	TBZ	$0, R3, len_6
    58len_7:
    59	// R5 and R6 contain 7-byte of sep
    60	MOVWU	(R2), R5
    61	// 1-byte overlap with R5
    62	MOVWU	3(R2), R6
    63loop_7:
    64	CMP	R4, R0
    65	BHI	not_found
    66	MOVWU.P	1(R0), R3
    67	CMP	R5, R3
    68	BNE	loop_7
    69	MOVWU	2(R0), R3
    70	CMP	R6, R3
    71	BNE	loop_7
    72	B	found
    73len_6:
    74	// R5 and R6 contain 6-byte of sep
    75	MOVWU	(R2), R5
    76	MOVHU	4(R2), R6
    77loop_6:
    78	CMP	R4, R0
    79	BHI	not_found
    80	MOVWU.P	1(R0), R3
    81	CMP	R5, R3
    82	BNE	loop_6
    83	MOVHU	3(R0), R3
    84	CMP	R6, R3
    85	BNE	loop_6
    86	B	found
    87len_4_5:
    88	TBZ	$0, R3, len_4
    89len_5:
    90	// R5 and R7 contain 5-byte of sep
    91	MOVWU	(R2), R5
    92	MOVBU	4(R2), R7
    93loop_5:
    94	CMP	R4, R0
    95	BHI	not_found
    96	MOVWU.P	1(R0), R3
    97	CMP	R5, R3
    98	BNE	loop_5
    99	MOVBU	3(R0), R3
   100	CMP	R7, R3
   101	BNE	loop_5
   102	B	found
   103len_4:
   104	// R5 contains 4-byte of sep
   105	MOVWU	(R2), R5
   106loop_4:
   107	CMP	R4, R0
   108	BHI	not_found
   109	MOVWU.P	1(R0), R6
   110	CMP	R5, R6
   111	BNE	loop_4
   112	B	found
   113len_2_3:
   114	TBZ	$0, R3, len_2
   115len_3:
   116	// R6 and R7 contain 3-byte of sep
   117	MOVHU	(R2), R6
   118	MOVBU	2(R2), R7
   119loop_3:
   120	CMP	R4, R0
   121	BHI	not_found
   122	MOVHU.P	1(R0), R3
   123	CMP	R6, R3
   124	BNE	loop_3
   125	MOVBU	1(R0), R3
   126	CMP	R7, R3
   127	BNE	loop_3
   128	B	found
   129len_2:
   130	// R5 contains 2-byte of sep
   131	MOVHU	(R2), R5
   132loop_2:
   133	CMP	R4, R0
   134	BHI	not_found
   135	MOVHU.P	1(R0), R6
   136	CMP	R5, R6
   137	BNE	loop_2
   138found:
   139	SUB	R8, R0, R0
   140	RET
   141not_found:
   142	MOVD	$-1, R0
   143	RET
   144greater_8:
   145	SUB	$9, R3, R11	// len(sep) - 9, offset of R0 for last 8 bytes
   146	CMP	$16, R3
   147	BHI	greater_16
   148len_9_16:
   149	MOVD.P	8(R2), R5	// R5 contains the first 8-byte of sep
   150	SUB	$16, R3, R7	// len(sep) - 16, offset of R2 for last 8 bytes
   151	MOVD	(R2)(R7), R6	// R6 contains the last 8-byte of sep
   152loop_9_16:
   153	// search the first 8 bytes first
   154	CMP	R4, R0
   155	BHI	not_found
   156	MOVD.P	1(R0), R7
   157	CMP	R5, R7
   158	BNE	loop_9_16
   159	MOVD	(R0)(R11), R7
   160	CMP	R6, R7		// compare the last 8 bytes
   161	BNE	loop_9_16
   162	B	found
   163greater_16:
   164	CMP	$24, R3
   165	BHI	len_25_32
   166len_17_24:
   167	LDP.P	16(R2), (R5, R6)	// R5 and R6 contain the first 16-byte of sep
   168	SUB	$24, R3, R10		// len(sep) - 24
   169	MOVD	(R2)(R10), R7		// R7 contains the last 8-byte of sep
   170loop_17_24:
   171	// search the first 16 bytes first
   172	CMP	R4, R0
   173	BHI	not_found
   174	MOVD.P	1(R0), R10
   175	CMP	R5, R10
   176	BNE	loop_17_24
   177	MOVD	7(R0), R10
   178	CMP	R6, R10
   179	BNE	loop_17_24
   180	MOVD	(R0)(R11), R10
   181	CMP	R7, R10		// compare the last 8 bytes
   182	BNE	loop_17_24
   183	B	found
   184len_25_32:
   185	LDP.P	16(R2), (R5, R6)
   186	MOVD.P	8(R2), R7	// R5, R6 and R7 contain the first 24-byte of sep
   187	SUB	$32, R3, R12	// len(sep) - 32
   188	MOVD	(R2)(R12), R10	// R10 contains the last 8-byte of sep
   189loop_25_32:
   190	// search the first 24 bytes first
   191	CMP	R4, R0
   192	BHI	not_found
   193	MOVD.P	1(R0), R12
   194	CMP	R5, R12
   195	BNE	loop_25_32
   196	MOVD	7(R0), R12
   197	CMP	R6, R12
   198	BNE	loop_25_32
   199	MOVD	15(R0), R12
   200	CMP	R7, R12
   201	BNE	loop_25_32
   202	MOVD	(R0)(R11), R12
   203	CMP	R10, R12	// compare the last 8 bytes
   204	BNE	loop_25_32
   205	B	found

View as plain text