...

Text file src/internal/bytealg/index_s390x.s

Documentation: internal/bytealg

     1// Copyright 2018 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5#include "go_asm.h"
     6#include "textflag.h"
     7
     8// Caller must confirm availability of vx facility before calling.
     9TEXT ·Index(SB),NOSPLIT|NOFRAME,$0-56
    10	LMG	a_base+0(FP), R1, R2  // R1=&s[0],   R2=len(s)
    11	LMG	b_base+24(FP), R3, R4 // R3=&sep[0], R4=len(sep)
    12	MOVD	$ret+48(FP), R5
    13	BR	indexbody<>(SB)
    14
    15// Caller must confirm availability of vx facility before calling.
    16TEXT ·IndexString(SB),NOSPLIT|NOFRAME,$0-40
    17	LMG	a_base+0(FP), R1, R2  // R1=&s[0],   R2=len(s)
    18	LMG	b_base+16(FP), R3, R4 // R3=&sep[0], R4=len(sep)
    19	MOVD	$ret+32(FP), R5
    20	BR	indexbody<>(SB)
    21
    22// s: string we are searching
    23// sep: string to search for
    24// R1=&s[0], R2=len(s)
    25// R3=&sep[0], R4=len(sep)
    26// R5=&ret (int)
    27// Caller must confirm availability of vx facility before calling.
    28TEXT indexbody<>(SB),NOSPLIT|NOFRAME,$0
    29	CMPBGT	R4, R2, notfound
    30	ADD	R1, R2
    31	SUB	R4, R2 // R2=&s[len(s)-len(sep)] (last valid index)
    32	CMPBEQ	R4, $0, notfound
    33	SUB	$1, R4 // R4=len(sep)-1 for use as VLL index
    34	VLL	R4, (R3), V0 // contains first 16 bytes of sep
    35	MOVD	R1, R7
    36index2plus:
    37	CMPBNE	R4, $1, index3plus
    38	MOVD	$15(R7), R9
    39	CMPBGE	R9, R2, index2to16
    40	VGBM	$0xaaaa, V31       // 0xff00ff00ff00ff00...
    41	VONE	V16
    42	VREPH	$0, V0, V1
    43	CMPBGE	R9, R2, index2to16
    44index2loop:
    45	VL	0(R7), V2          // 16 bytes, even indices
    46	VL	1(R7), V4          // 16 bytes, odd indices
    47	VCEQH	V1, V2, V5         // compare even indices
    48	VCEQH	V1, V4, V6         // compare odd indices
    49	VSEL	V5, V6, V31, V7    // merge even and odd indices
    50	VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
    51	BLT	foundV17
    52	MOVD	$16(R7), R7        // R7+=16
    53	ADD	$15, R7, R9
    54	CMPBLE	R9, R2, index2loop // continue if (R7+15) <= R2 (last index to search)
    55	CMPBLE	R7, R2, index2to16
    56	BR	notfound
    57
    58index3plus:
    59	CMPBNE	R4, $2, index4plus
    60	ADD	$15, R7, R9
    61	CMPBGE	R9, R2, index2to16
    62	MOVD	$1, R0
    63	VGBM	$0xaaaa, V31       // 0xff00ff00ff00ff00...
    64	VONE	V16
    65	VREPH	$0, V0, V1
    66	VREPB	$2, V0, V8
    67index3loop:
    68	VL	(R7), V2           // load 16-bytes into V2
    69	VLL	R0, 16(R7), V3     // load 2-bytes into V3
    70	VSLDB	$1, V2, V3, V4     // V4=(V2:V3)<<1
    71	VSLDB	$2, V2, V3, V9     // V9=(V2:V3)<<2
    72	VCEQH	V1, V2, V5         // compare 2-byte even indices
    73	VCEQH	V1, V4, V6         // compare 2-byte odd indices
    74	VCEQB	V8, V9, V10        // compare last bytes
    75	VSEL	V5, V6, V31, V7    // merge even and odd indices
    76	VN	V7, V10, V7        // AND indices with last byte
    77	VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
    78	BLT	foundV17
    79	MOVD	$16(R7), R7        // R7+=16
    80	ADD	$15, R7, R9
    81	CMPBLE	R9, R2, index3loop // continue if (R7+15) <= R2 (last index to search)
    82	CMPBLE	R7, R2, index2to16
    83	BR	notfound
    84
    85index4plus:
    86	CMPBNE	R4, $3, index5plus
    87	ADD	$15, R7, R9
    88	CMPBGE	R9, R2, index2to16
    89	MOVD	$2, R0
    90	VGBM	$0x8888, V29       // 0xff000000ff000000...
    91	VGBM	$0x2222, V30       // 0x0000ff000000ff00...
    92	VGBM	$0xcccc, V31       // 0xffff0000ffff0000...
    93	VONE	V16
    94	VREPF	$0, V0, V1
    95index4loop:
    96	VL	(R7), V2           // load 16-bytes into V2
    97	VLL	R0, 16(R7), V3     // load 3-bytes into V3
    98	VSLDB	$1, V2, V3, V4     // V4=(V2:V3)<<1
    99	VSLDB	$2, V2, V3, V9     // V9=(V2:V3)<<1
   100	VSLDB	$3, V2, V3, V10    // V10=(V2:V3)<<1
   101	VCEQF	V1, V2, V5         // compare index 0, 4, ...
   102	VCEQF	V1, V4, V6         // compare index 1, 5, ...
   103	VCEQF	V1, V9, V11        // compare index 2, 6, ...
   104	VCEQF	V1, V10, V12       // compare index 3, 7, ...
   105	VSEL	V5, V6, V29, V13   // merge index 0, 1, 4, 5, ...
   106	VSEL	V11, V12, V30, V14 // merge index 2, 3, 6, 7, ...
   107	VSEL	V13, V14, V31, V7  // final merge
   108	VFEEBS	V16, V7, V17       // find leftmost index, set condition to 1 if found
   109	BLT	foundV17
   110	MOVD	$16(R7), R7        // R7+=16
   111	ADD	$15, R7, R9
   112	CMPBLE	R9, R2, index4loop // continue if (R7+15) <= R2 (last index to search)
   113	CMPBLE	R7, R2, index2to16
   114	BR	notfound
   115
   116index5plus:
   117	CMPBGT	R4, $15, index17plus
   118index2to16:
   119	CMPBGT	R7, R2, notfound
   120	MOVD	$1(R7), R8
   121	CMPBGT	R8, R2, index2to16tail
   122index2to16loop:
   123	// unrolled 2x
   124	VLL	R4, (R7), V1
   125	VLL	R4, 1(R7), V2
   126	VCEQGS	V0, V1, V3
   127	BEQ	found
   128	MOVD	$1(R7), R7
   129	VCEQGS	V0, V2, V4
   130	BEQ	found
   131	MOVD	$1(R7), R7
   132	CMPBLT	R7, R2, index2to16loop
   133	CMPBGT	R7, R2, notfound
   134index2to16tail:
   135	VLL	R4, (R7), V1
   136	VCEQGS	V0, V1, V2
   137	BEQ	found
   138	BR	notfound
   139
   140index17plus:
   141	CMPBGT	R4, $31, index33plus
   142	SUB	$16, R4, R0
   143	VLL	R0, 16(R3), V1
   144	VONE	V7
   145index17to32loop:
   146	VL	(R7), V2
   147	VLL	R0, 16(R7), V3
   148	VCEQG	V0, V2, V4
   149	VCEQG	V1, V3, V5
   150	VN	V4, V5, V6
   151	VCEQGS	V6, V7, V8
   152	BEQ	found
   153	MOVD	$1(R7), R7
   154	CMPBLE  R7, R2, index17to32loop
   155	BR	notfound
   156
   157index33plus:
   158	CMPBGT	R4, $47, index49plus
   159	SUB	$32, R4, R0
   160	VL	16(R3), V1
   161	VLL	R0, 32(R3), V2
   162	VONE	V11
   163index33to48loop:
   164	VL	(R7), V3
   165	VL	16(R7), V4
   166	VLL	R0, 32(R7), V5
   167	VCEQG	V0, V3, V6
   168	VCEQG	V1, V4, V7
   169	VCEQG	V2, V5, V8
   170	VN	V6, V7, V9
   171	VN	V8, V9, V10
   172	VCEQGS	V10, V11, V12
   173	BEQ	found
   174	MOVD	$1(R7), R7
   175	CMPBLE  R7, R2, index33to48loop
   176	BR	notfound
   177
   178index49plus:
   179	CMPBGT	R4, $63, index65plus
   180	SUB	$48, R4, R0
   181	VL	16(R3), V1
   182	VL	32(R3), V2
   183	VLL	R0, 48(R3), V3
   184	VONE	V15
   185index49to64loop:
   186	VL	(R7), V4
   187	VL	16(R7), V5
   188	VL	32(R7), V6
   189	VLL	R0, 48(R7), V7
   190	VCEQG	V0, V4, V8
   191	VCEQG	V1, V5, V9
   192	VCEQG	V2, V6, V10
   193	VCEQG	V3, V7, V11
   194	VN	V8, V9, V12
   195	VN	V10, V11, V13
   196	VN	V12, V13, V14
   197	VCEQGS	V14, V15, V16
   198	BEQ	found
   199	MOVD	$1(R7), R7
   200	CMPBLE  R7, R2, index49to64loop
   201notfound:
   202	MOVD	$-1, (R5)
   203	RET
   204
   205index65plus:
   206	// not implemented
   207	MOVD	$0, (R0)
   208	RET
   209
   210foundV17: // index is in doubleword V17[0]
   211	VLGVG	$0, V17, R8
   212	ADD	R8, R7
   213found:
   214	SUB	R1, R7
   215	MOVD	R7, (R5)
   216	RET

View as plain text