1// Copyright 2014 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:build ppc64 || ppc64le
6
7#include "textflag.h"
8
9// For more details about how various memory models are
10// enforced on POWER, the following paper provides more
11// details about how they enforce C/C++ like models. This
12// gives context about why the strange looking code
13// sequences below work.
14//
15// http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
16
17// uint32 ·Load(uint32 volatile* ptr)
18TEXT ·Load(SB),NOSPLIT|NOFRAME,$-8-12
19 MOVD ptr+0(FP), R3
20 SYNC
21 MOVWZ 0(R3), R3
22 CMPW R3, R3, CR7
23 BC 4, 30, 1(PC) // bne- cr7,0x4
24 ISYNC
25 MOVW R3, ret+8(FP)
26 RET
27
28// uint8 ·Load8(uint8 volatile* ptr)
29TEXT ·Load8(SB),NOSPLIT|NOFRAME,$-8-9
30 MOVD ptr+0(FP), R3
31 SYNC
32 MOVBZ 0(R3), R3
33 CMP R3, R3, CR7
34 BC 4, 30, 1(PC) // bne- cr7,0x4
35 ISYNC
36 MOVB R3, ret+8(FP)
37 RET
38
39// uint64 ·Load64(uint64 volatile* ptr)
40TEXT ·Load64(SB),NOSPLIT|NOFRAME,$-8-16
41 MOVD ptr+0(FP), R3
42 SYNC
43 MOVD 0(R3), R3
44 CMP R3, R3, CR7
45 BC 4, 30, 1(PC) // bne- cr7,0x4
46 ISYNC
47 MOVD R3, ret+8(FP)
48 RET
49
50// void *·Loadp(void *volatile *ptr)
51TEXT ·Loadp(SB),NOSPLIT|NOFRAME,$-8-16
52 MOVD ptr+0(FP), R3
53 SYNC
54 MOVD 0(R3), R3
55 CMP R3, R3, CR7
56 BC 4, 30, 1(PC) // bne- cr7,0x4
57 ISYNC
58 MOVD R3, ret+8(FP)
59 RET
60
61// uint32 ·LoadAcq(uint32 volatile* ptr)
62TEXT ·LoadAcq(SB),NOSPLIT|NOFRAME,$-8-12
63 MOVD ptr+0(FP), R3
64 MOVWZ 0(R3), R3
65 CMPW R3, R3, CR7
66 BC 4, 30, 1(PC) // bne- cr7, 0x4
67 ISYNC
68 MOVW R3, ret+8(FP)
69 RET
70
71// uint64 ·LoadAcq64(uint64 volatile* ptr)
72TEXT ·LoadAcq64(SB),NOSPLIT|NOFRAME,$-8-16
73 MOVD ptr+0(FP), R3
74 MOVD 0(R3), R3
75 CMP R3, R3, CR7
76 BC 4, 30, 1(PC) // bne- cr7, 0x4
77 ISYNC
78 MOVD R3, ret+8(FP)
79 RET
80
81// bool cas(uint32 *ptr, uint32 old, uint32 new)
82// Atomically:
83// if(*val == old){
84// *val = new;
85// return 1;
86// } else
87// return 0;
88TEXT ·Cas(SB), NOSPLIT, $0-17
89 MOVD ptr+0(FP), R3
90 MOVWZ old+8(FP), R4
91 MOVWZ new+12(FP), R5
92 LWSYNC
93cas_again:
94 LWAR (R3), R6
95 CMPW R6, R4
96 BNE cas_fail
97 STWCCC R5, (R3)
98 BNE cas_again
99 MOVD $1, R3
100 LWSYNC
101 MOVB R3, ret+16(FP)
102 RET
103cas_fail:
104 LWSYNC
105 MOVB R0, ret+16(FP)
106 RET
107
108// bool ·Cas64(uint64 *ptr, uint64 old, uint64 new)
109// Atomically:
110// if(*val == old){
111// *val = new;
112// return 1;
113// } else {
114// return 0;
115// }
116TEXT ·Cas64(SB), NOSPLIT, $0-25
117 MOVD ptr+0(FP), R3
118 MOVD old+8(FP), R4
119 MOVD new+16(FP), R5
120 LWSYNC
121cas64_again:
122 LDAR (R3), R6
123 CMP R6, R4
124 BNE cas64_fail
125 STDCCC R5, (R3)
126 BNE cas64_again
127 MOVD $1, R3
128 LWSYNC
129 MOVB R3, ret+24(FP)
130 RET
131cas64_fail:
132 LWSYNC
133 MOVB R0, ret+24(FP)
134 RET
135
136TEXT ·CasRel(SB), NOSPLIT, $0-17
137 MOVD ptr+0(FP), R3
138 MOVWZ old+8(FP), R4
139 MOVWZ new+12(FP), R5
140 LWSYNC
141cas_again:
142 LWAR (R3), $0, R6 // 0 = Mutex release hint
143 CMPW R6, R4
144 BNE cas_fail
145 STWCCC R5, (R3)
146 BNE cas_again
147 MOVD $1, R3
148 MOVB R3, ret+16(FP)
149 RET
150cas_fail:
151 MOVB R0, ret+16(FP)
152 RET
153
154TEXT ·Casint32(SB), NOSPLIT, $0-17
155 BR ·Cas(SB)
156
157TEXT ·Casint64(SB), NOSPLIT, $0-25
158 BR ·Cas64(SB)
159
160TEXT ·Casuintptr(SB), NOSPLIT, $0-25
161 BR ·Cas64(SB)
162
163TEXT ·Loaduintptr(SB), NOSPLIT|NOFRAME, $0-16
164 BR ·Load64(SB)
165
166TEXT ·LoadAcquintptr(SB), NOSPLIT|NOFRAME, $0-16
167 BR ·LoadAcq64(SB)
168
169TEXT ·Loaduint(SB), NOSPLIT|NOFRAME, $0-16
170 BR ·Load64(SB)
171
172TEXT ·Storeint32(SB), NOSPLIT, $0-12
173 BR ·Store(SB)
174
175TEXT ·Storeint64(SB), NOSPLIT, $0-16
176 BR ·Store64(SB)
177
178TEXT ·Storeuintptr(SB), NOSPLIT, $0-16
179 BR ·Store64(SB)
180
181TEXT ·StoreReluintptr(SB), NOSPLIT, $0-16
182 BR ·StoreRel64(SB)
183
184TEXT ·Xadduintptr(SB), NOSPLIT, $0-24
185 BR ·Xadd64(SB)
186
187TEXT ·Loadint32(SB), NOSPLIT, $0-12
188 BR ·Load(SB)
189
190TEXT ·Loadint64(SB), NOSPLIT, $0-16
191 BR ·Load64(SB)
192
193TEXT ·Xaddint32(SB), NOSPLIT, $0-20
194 BR ·Xadd(SB)
195
196TEXT ·Xaddint64(SB), NOSPLIT, $0-24
197 BR ·Xadd64(SB)
198
199// bool casp(void **val, void *old, void *new)
200// Atomically:
201// if(*val == old){
202// *val = new;
203// return 1;
204// } else
205// return 0;
206TEXT ·Casp1(SB), NOSPLIT, $0-25
207 BR ·Cas64(SB)
208
209// uint32 xadd(uint32 volatile *ptr, int32 delta)
210// Atomically:
211// *val += delta;
212// return *val;
213TEXT ·Xadd(SB), NOSPLIT, $0-20
214 MOVD ptr+0(FP), R4
215 MOVW delta+8(FP), R5
216 LWSYNC
217 LWAR (R4), R3
218 ADD R5, R3
219 STWCCC R3, (R4)
220 BNE -3(PC)
221 MOVW R3, ret+16(FP)
222 RET
223
224// uint64 Xadd64(uint64 volatile *val, int64 delta)
225// Atomically:
226// *val += delta;
227// return *val;
228TEXT ·Xadd64(SB), NOSPLIT, $0-24
229 MOVD ptr+0(FP), R4
230 MOVD delta+8(FP), R5
231 LWSYNC
232 LDAR (R4), R3
233 ADD R5, R3
234 STDCCC R3, (R4)
235 BNE -3(PC)
236 MOVD R3, ret+16(FP)
237 RET
238
239// uint8 Xchg(ptr *uint8, new uint8)
240// Atomically:
241// old := *ptr;
242// *ptr = new;
243// return old;
244TEXT ·Xchg8(SB), NOSPLIT, $0-17
245 MOVD ptr+0(FP), R4
246 MOVB new+8(FP), R5
247 LWSYNC
248 LBAR (R4), R3
249 STBCCC R5, (R4)
250 BNE -2(PC)
251 ISYNC
252 MOVB R3, ret+16(FP)
253 RET
254
255// uint32 Xchg(ptr *uint32, new uint32)
256// Atomically:
257// old := *ptr;
258// *ptr = new;
259// return old;
260TEXT ·Xchg(SB), NOSPLIT, $0-20
261 MOVD ptr+0(FP), R4
262 MOVW new+8(FP), R5
263 LWSYNC
264 LWAR (R4), R3
265 STWCCC R5, (R4)
266 BNE -2(PC)
267 ISYNC
268 MOVW R3, ret+16(FP)
269 RET
270
271// uint64 Xchg64(ptr *uint64, new uint64)
272// Atomically:
273// old := *ptr;
274// *ptr = new;
275// return old;
276TEXT ·Xchg64(SB), NOSPLIT, $0-24
277 MOVD ptr+0(FP), R4
278 MOVD new+8(FP), R5
279 LWSYNC
280 LDAR (R4), R3
281 STDCCC R5, (R4)
282 BNE -2(PC)
283 ISYNC
284 MOVD R3, ret+16(FP)
285 RET
286
287TEXT ·Xchgint32(SB), NOSPLIT, $0-20
288 BR ·Xchg(SB)
289
290TEXT ·Xchgint64(SB), NOSPLIT, $0-24
291 BR ·Xchg64(SB)
292
293TEXT ·Xchguintptr(SB), NOSPLIT, $0-24
294 BR ·Xchg64(SB)
295
296TEXT ·StorepNoWB(SB), NOSPLIT, $0-16
297 BR ·Store64(SB)
298
299TEXT ·Store(SB), NOSPLIT, $0-12
300 MOVD ptr+0(FP), R3
301 MOVW val+8(FP), R4
302 SYNC
303 MOVW R4, 0(R3)
304 RET
305
306TEXT ·Store8(SB), NOSPLIT, $0-9
307 MOVD ptr+0(FP), R3
308 MOVB val+8(FP), R4
309 SYNC
310 MOVB R4, 0(R3)
311 RET
312
313TEXT ·Store64(SB), NOSPLIT, $0-16
314 MOVD ptr+0(FP), R3
315 MOVD val+8(FP), R4
316 SYNC
317 MOVD R4, 0(R3)
318 RET
319
320TEXT ·StoreRel(SB), NOSPLIT, $0-12
321 MOVD ptr+0(FP), R3
322 MOVW val+8(FP), R4
323 LWSYNC
324 MOVW R4, 0(R3)
325 RET
326
327TEXT ·StoreRel64(SB), NOSPLIT, $0-16
328 MOVD ptr+0(FP), R3
329 MOVD val+8(FP), R4
330 LWSYNC
331 MOVD R4, 0(R3)
332 RET
333
334// void ·Or8(byte volatile*, byte);
335TEXT ·Or8(SB), NOSPLIT, $0-9
336 MOVD ptr+0(FP), R3
337 MOVBZ val+8(FP), R4
338 LWSYNC
339again:
340 LBAR (R3), R6
341 OR R4, R6
342 STBCCC R6, (R3)
343 BNE again
344 RET
345
346// void ·And8(byte volatile*, byte);
347TEXT ·And8(SB), NOSPLIT, $0-9
348 MOVD ptr+0(FP), R3
349 MOVBZ val+8(FP), R4
350 LWSYNC
351again:
352 LBAR (R3), R6
353 AND R4, R6
354 STBCCC R6, (R3)
355 BNE again
356 RET
357
358// func Or(addr *uint32, v uint32)
359TEXT ·Or(SB), NOSPLIT, $0-12
360 MOVD ptr+0(FP), R3
361 MOVW val+8(FP), R4
362 LWSYNC
363again:
364 LWAR (R3), R6
365 OR R4, R6
366 STWCCC R6, (R3)
367 BNE again
368 RET
369
370// func And(addr *uint32, v uint32)
371TEXT ·And(SB), NOSPLIT, $0-12
372 MOVD ptr+0(FP), R3
373 MOVW val+8(FP), R4
374 LWSYNC
375again:
376 LWAR (R3),R6
377 AND R4, R6
378 STWCCC R6, (R3)
379 BNE again
380 RET
381
382// func Or32(addr *uint32, v uint32) old uint32
383TEXT ·Or32(SB), NOSPLIT, $0-20
384 MOVD ptr+0(FP), R3
385 MOVW val+8(FP), R4
386 LWSYNC
387again:
388 LWAR (R3), R6
389 OR R4, R6, R7
390 STWCCC R7, (R3)
391 BNE again
392 MOVW R6, ret+16(FP)
393 RET
394
395// func And32(addr *uint32, v uint32) old uint32
396TEXT ·And32(SB), NOSPLIT, $0-20
397 MOVD ptr+0(FP), R3
398 MOVW val+8(FP), R4
399 LWSYNC
400again:
401 LWAR (R3),R6
402 AND R4, R6, R7
403 STWCCC R7, (R3)
404 BNE again
405 MOVW R6, ret+16(FP)
406 RET
407
408// func Or64(addr *uint64, v uint64) old uint64
409TEXT ·Or64(SB), NOSPLIT, $0-24
410 MOVD ptr+0(FP), R3
411 MOVD val+8(FP), R4
412 LWSYNC
413again:
414 LDAR (R3), R6
415 OR R4, R6, R7
416 STDCCC R7, (R3)
417 BNE again
418 MOVD R6, ret+16(FP)
419 RET
420
421// func And64(addr *uint64, v uint64) old uint64
422TEXT ·And64(SB), NOSPLIT, $0-24
423 MOVD ptr+0(FP), R3
424 MOVD val+8(FP), R4
425 LWSYNC
426again:
427 LDAR (R3),R6
428 AND R4, R6, R7
429 STDCCC R7, (R3)
430 BNE again
431 MOVD R6, ret+16(FP)
432 RET
433
434// func Anduintptr(addr *uintptr, v uintptr) old uintptr
435TEXT ·Anduintptr(SB), NOSPLIT, $0-24
436 JMP ·And64(SB)
437
438// func Oruintptr(addr *uintptr, v uintptr) old uintptr
439TEXT ·Oruintptr(SB), NOSPLIT, $0-24
440 JMP ·Or64(SB)
View as plain text