1// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:build ppc64 || ppc64le
6
7#include "go_asm.h"
8#include "textflag.h"
9
10// Helper names for x-form loads in BE ordering.
11#ifdef GOARCH_ppc64le
12#define _LDBEX MOVDBR
13#define _LWBEX MOVWBR
14#define _LHBEX MOVHBR
15#else
16#define _LDBEX MOVD
17#define _LWBEX MOVW
18#define _LHBEX MOVH
19#endif
20
21#ifdef GOPPC64_power9
22#define SETB_CR0(rout) SETB CR0, rout
23#define SETB_CR1(rout) SETB CR1, rout
24#define SETB_INIT()
25#define SETB_CR0_NE(rout) SETB_CR0(rout)
26#else
27// A helper macro to emulate SETB on P8. This assumes
28// -1 is in R20, and 1 is in R21. crxlt and crxeq must
29// also be the same CR field.
30#define _SETB(crxlt, crxeq, rout) \
31 ISEL crxeq,R0,R21,rout \
32 ISEL crxlt,R20,rout,rout
33
34// A special case when it is know the comparison
35// will always be not equal. The result must be -1 or 1.
36#define SETB_CR0_NE(rout) \
37 ISEL CR0LT,R20,R21,rout
38
39#define SETB_CR0(rout) _SETB(CR0LT, CR0EQ, rout)
40#define SETB_CR1(rout) _SETB(CR1LT, CR1EQ, rout)
41#define SETB_INIT() \
42 MOVD $-1,R20 \
43 MOVD $1,R21
44#endif
45
46TEXT ·Compare<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-56
47 // incoming:
48 // R3 a addr
49 // R4 a len
50 // R6 b addr
51 // R7 b len
52 //
53 // on entry to cmpbody:
54 // R3 return value if len(a) == len(b)
55 // R5 a addr
56 // R6 b addr
57 // R9 min(len(a),len(b))
58 SETB_INIT()
59 MOVD R3,R5
60 CMP R4,R7,CR0
61 CMP R3,R6,CR7
62 ISEL CR0LT,R4,R7,R9
63 SETB_CR0(R3)
64 BC $12,30,LR // beqlr cr7
65 BR cmpbody<>(SB)
66
67TEXT runtime·cmpstring<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-40
68 // incoming:
69 // R3 a addr -> R5
70 // R4 a len -> R3
71 // R5 b addr -> R6
72 // R6 b len -> R4
73 //
74 // on entry to cmpbody:
75 // R3 compare value if compared length is same.
76 // R5 a addr
77 // R6 b addr
78 // R9 min(len(a),len(b))
79 SETB_INIT()
80 CMP R4,R6,CR0
81 CMP R3,R5,CR7
82 ISEL CR0LT,R4,R6,R9
83 MOVD R5,R6
84 MOVD R3,R5
85 SETB_CR0(R3)
86 BC $12,30,LR // beqlr cr7
87 BR cmpbody<>(SB)
88
89#ifdef GOARCH_ppc64le
90DATA byteswap<>+0(SB)/8, $0x0706050403020100
91DATA byteswap<>+8(SB)/8, $0x0f0e0d0c0b0a0908
92GLOBL byteswap<>+0(SB), RODATA, $16
93#define SWAP V21
94#endif
95
96TEXT cmpbody<>(SB),NOSPLIT|NOFRAME,$0-0
97start:
98 CMP R9,$16,CR0
99 CMP R9,$32,CR1
100 CMP R9,$64,CR2
101 MOVD $16,R10
102 BLT cmp8
103 BLT CR1,cmp16
104 BLT CR2,cmp32
105
106cmp64: // >= 64B
107 DCBT (R5) // optimize for size>=64
108 DCBT (R6) // cache hint
109
110 SRD $6,R9,R14 // There is at least one iteration.
111 MOVD R14,CTR
112 ANDCC $63,R9,R9
113 CMP R9,$16,CR1 // Do setup for tail check early on.
114 CMP R9,$32,CR2
115 CMP R9,$48,CR3
116 ADD $-16,R9,R9
117
118 MOVD $32,R11 // set offsets to load into vector
119 MOVD $48,R12 // set offsets to load into vector
120
121 PCALIGN $16
122cmp64_loop:
123 LXVD2X (R5)(R0),V3 // load bytes of A at offset 0 into vector
124 LXVD2X (R6)(R0),V4 // load bytes of B at offset 0 into vector
125 VCMPEQUDCC V3,V4,V1
126 BGE CR6,different // jump out if its different
127
128 LXVD2X (R5)(R10),V3 // load bytes of A at offset 16 into vector
129 LXVD2X (R6)(R10),V4 // load bytes of B at offset 16 into vector
130 VCMPEQUDCC V3,V4,V1
131 BGE CR6,different
132
133 LXVD2X (R5)(R11),V3 // load bytes of A at offset 32 into vector
134 LXVD2X (R6)(R11),V4 // load bytes of B at offset 32 into vector
135 VCMPEQUDCC V3,V4,V1
136 BGE CR6,different
137
138 LXVD2X (R5)(R12),V3 // load bytes of A at offset 64 into vector
139 LXVD2X (R6)(R12),V4 // load bytes of B at offset 64 into vector
140 VCMPEQUDCC V3,V4,V1
141 BGE CR6,different
142
143 ADD $64,R5,R5 // increment to next 64 bytes of A
144 ADD $64,R6,R6 // increment to next 64 bytes of B
145 BDNZ cmp64_loop
146 BC $12,2,LR // beqlr
147
148 // Finish out tail with minimal overlapped checking.
149 // Note, 0 tail is handled by beqlr above.
150 BLE CR1,cmp64_tail_gt0
151 BLE CR2,cmp64_tail_gt16
152 BLE CR3,cmp64_tail_gt32
153
154cmp64_tail_gt48: // 49 - 63 B
155 LXVD2X (R0)(R5),V3
156 LXVD2X (R0)(R6),V4
157 VCMPEQUDCC V3,V4,V1
158 BGE CR6,different
159
160 LXVD2X (R5)(R10),V3
161 LXVD2X (R6)(R10),V4
162 VCMPEQUDCC V3,V4,V1
163 BGE CR6,different
164
165 LXVD2X (R5)(R11),V3
166 LXVD2X (R6)(R11),V4
167 VCMPEQUDCC V3,V4,V1
168 BGE CR6,different
169
170 BR cmp64_tail_gt0
171
172 PCALIGN $16
173cmp64_tail_gt32: // 33 - 48B
174 LXVD2X (R0)(R5),V3
175 LXVD2X (R0)(R6),V4
176 VCMPEQUDCC V3,V4,V1
177 BGE CR6,different
178
179 LXVD2X (R5)(R10),V3
180 LXVD2X (R6)(R10),V4
181 VCMPEQUDCC V3,V4,V1
182 BGE CR6,different
183
184 BR cmp64_tail_gt0
185
186 PCALIGN $16
187cmp64_tail_gt16: // 17 - 32B
188 LXVD2X (R0)(R5),V3
189 LXVD2X (R0)(R6),V4
190 VCMPEQUDCC V3,V4,V1
191 BGE CR6,different
192
193 BR cmp64_tail_gt0
194
195 PCALIGN $16
196cmp64_tail_gt0: // 1 - 16B
197 LXVD2X (R5)(R9),V3
198 LXVD2X (R6)(R9),V4
199 VCMPEQUDCC V3,V4,V1
200 BGE CR6,different
201
202 RET
203
204 PCALIGN $16
205cmp32: // 32 - 63B
206 ANDCC $31,R9,R9
207
208 LXVD2X (R0)(R5),V3
209 LXVD2X (R0)(R6),V4
210 VCMPEQUDCC V3,V4,V1
211 BGE CR6,different
212
213 LXVD2X (R10)(R5),V3
214 LXVD2X (R10)(R6),V4
215 VCMPEQUDCC V3,V4,V1
216 BGE CR6,different
217
218 BC $12,2,LR // beqlr
219 ADD R9,R10,R10
220
221 LXVD2X (R9)(R5),V3
222 LXVD2X (R9)(R6),V4
223 VCMPEQUDCC V3,V4,V1
224 BGE CR6,different
225
226 LXVD2X (R10)(R5),V3
227 LXVD2X (R10)(R6),V4
228 VCMPEQUDCC V3,V4,V1
229 BGE CR6,different
230 RET
231
232 PCALIGN $16
233cmp16: // 16 - 31B
234 ANDCC $15,R9,R9
235 LXVD2X (R0)(R5),V3
236 LXVD2X (R0)(R6),V4
237 VCMPEQUDCC V3,V4,V1
238 BGE CR6,different
239 BC $12,2,LR // beqlr
240
241 LXVD2X (R9)(R5),V3
242 LXVD2X (R9)(R6),V4
243 VCMPEQUDCC V3,V4,V1
244 BGE CR6,different
245 RET
246
247 PCALIGN $16
248different:
249#ifdef GOARCH_ppc64le
250 MOVD $byteswap<>+00(SB),R16
251 LXVD2X (R16)(R0),SWAP // Set up swap string
252
253 VPERM V3,V3,SWAP,V3
254 VPERM V4,V4,SWAP,V4
255#endif
256
257 MFVSRD VS35,R16 // move upper doublewords of A and B into GPR for comparison
258 MFVSRD VS36,R10
259
260 CMPU R16,R10
261 BEQ lower
262 SETB_CR0_NE(R3)
263 RET
264
265 PCALIGN $16
266lower:
267 VSLDOI $8,V3,V3,V3 // move lower doublewords of A and B into GPR for comparison
268 MFVSRD VS35,R16
269 VSLDOI $8,V4,V4,V4
270 MFVSRD VS36,R10
271
272 CMPU R16,R10
273 SETB_CR0_NE(R3)
274 RET
275
276 PCALIGN $16
277cmp8: // 8 - 15B (0 - 15B if GOPPC64_power10)
278#ifdef GOPPC64_power10
279 SLD $56,R9,R9
280 LXVLL R5,R9,V3 // Load bytes starting from MSB to LSB, unused are zero filled.
281 LXVLL R6,R9,V4
282 VCMPUQ V3,V4,CR0 // Compare as a 128b integer.
283 SETB_CR0(R6)
284 ISEL CR0EQ,R3,R6,R3 // If equal, length determines the return value.
285 RET
286#else
287 CMP R9,$8
288 BLT cmp4
289 ANDCC $7,R9,R9
290 _LDBEX (R0)(R5),R10
291 _LDBEX (R0)(R6),R11
292 _LDBEX (R9)(R5),R12
293 _LDBEX (R9)(R6),R14
294 CMPU R10,R11,CR0
295 SETB_CR0(R5)
296 CMPU R12,R14,CR1
297 SETB_CR1(R6)
298 CRAND CR0EQ,CR1EQ,CR1EQ // If both equal, length determines return value.
299 ISEL CR0EQ,R6,R5,R4
300 ISEL CR1EQ,R3,R4,R3
301 RET
302
303 PCALIGN $16
304cmp4: // 4 - 7B
305 CMP R9,$4
306 BLT cmp2
307 ANDCC $3,R9,R9
308 _LWBEX (R0)(R5),R10
309 _LWBEX (R0)(R6),R11
310 _LWBEX (R9)(R5),R12
311 _LWBEX (R9)(R6),R14
312 RLDIMI $32,R10,$0,R12
313 RLDIMI $32,R11,$0,R14
314 CMPU R12,R14
315 BR cmp0
316
317 PCALIGN $16
318cmp2: // 2 - 3B
319 CMP R9,$2
320 BLT cmp1
321 ANDCC $1,R9,R9
322 _LHBEX (R0)(R5),R10
323 _LHBEX (R0)(R6),R11
324 _LHBEX (R9)(R5),R12
325 _LHBEX (R9)(R6),R14
326 RLDIMI $32,R10,$0,R12
327 RLDIMI $32,R11,$0,R14
328 CMPU R12,R14
329 BR cmp0
330
331 PCALIGN $16
332cmp1:
333 CMP R9,$0
334 BEQ cmp0
335 MOVBZ (R5),R10
336 MOVBZ (R6),R11
337 CMPU R10,R11
338cmp0:
339 SETB_CR0(R6)
340 ISEL CR0EQ,R3,R6,R3
341 RET
342#endif
View as plain text