Text file
src/runtime/race_arm64.s
Documentation: runtime
1// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:build race
6
7#include "go_asm.h"
8#include "funcdata.h"
9#include "textflag.h"
10#include "tls_arm64.h"
11#include "cgo/abi_arm64.h"
12
13// The following thunks allow calling the gcc-compiled race runtime directly
14// from Go code without going all the way through cgo.
15// First, it's much faster (up to 50% speedup for real Go programs).
16// Second, it eliminates race-related special cases from cgocall and scheduler.
17// Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
18
19// A brief recap of the arm64 calling convention.
20// Arguments are passed in R0...R7, the rest is on stack.
21// Callee-saved registers are: R19...R28.
22// Temporary registers are: R9...R15
23// SP must be 16-byte aligned.
24
25// When calling racecalladdr, R9 is the call target address.
26
27// The race ctx, ThreadState *thr below, is passed in R0 and loaded in racecalladdr.
28
29// Darwin may return unaligned thread pointer. Align it. (See tls_arm64.s)
30// No-op on other OSes.
31#ifdef TLS_darwin
32#define TP_ALIGN AND $~7, R0
33#else
34#define TP_ALIGN
35#endif
36
37// Load g from TLS. (See tls_arm64.s)
38#define load_g \
39 MRS_TPIDR_R0 \
40 TP_ALIGN \
41 MOVD runtime·tls_g(SB), R11 \
42 MOVD (R0)(R11), g
43
44// func runtime·raceread(addr uintptr)
45// Called from instrumented code.
46// Defined as ABIInternal so as to avoid introducing a wrapper,
47// which would make caller's PC ineffective.
48TEXT runtime·raceread<ABIInternal>(SB), NOSPLIT, $0-8
49 MOVD R0, R1 // addr
50 MOVD LR, R2
51 // void __tsan_read(ThreadState *thr, void *addr, void *pc);
52 MOVD $__tsan_read(SB), R9
53 JMP racecalladdr<>(SB)
54
55// func runtime·RaceRead(addr uintptr)
56TEXT runtime·RaceRead(SB), NOSPLIT, $0-8
57 // This needs to be a tail call, because raceread reads caller pc.
58 JMP runtime·raceread(SB)
59
60// func runtime·racereadpc(void *addr, void *callpc, void *pc)
61TEXT runtime·racereadpc(SB), NOSPLIT, $0-24
62 MOVD addr+0(FP), R1
63 MOVD callpc+8(FP), R2
64 MOVD pc+16(FP), R3
65 // void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
66 MOVD $__tsan_read_pc(SB), R9
67 JMP racecalladdr<>(SB)
68
69// func runtime·racewrite(addr uintptr)
70// Called from instrumented code.
71// Defined as ABIInternal so as to avoid introducing a wrapper,
72// which would make caller's PC ineffective.
73TEXT runtime·racewrite<ABIInternal>(SB), NOSPLIT, $0-8
74 MOVD R0, R1 // addr
75 MOVD LR, R2
76 // void __tsan_write(ThreadState *thr, void *addr, void *pc);
77 MOVD $__tsan_write(SB), R9
78 JMP racecalladdr<>(SB)
79
80// func runtime·RaceWrite(addr uintptr)
81TEXT runtime·RaceWrite(SB), NOSPLIT, $0-8
82 // This needs to be a tail call, because racewrite reads caller pc.
83 JMP runtime·racewrite(SB)
84
85// func runtime·racewritepc(void *addr, void *callpc, void *pc)
86TEXT runtime·racewritepc(SB), NOSPLIT, $0-24
87 MOVD addr+0(FP), R1
88 MOVD callpc+8(FP), R2
89 MOVD pc+16(FP), R3
90 // void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
91 MOVD $__tsan_write_pc(SB), R9
92 JMP racecalladdr<>(SB)
93
94// func runtime·racereadrange(addr, size uintptr)
95// Called from instrumented code.
96// Defined as ABIInternal so as to avoid introducing a wrapper,
97// which would make caller's PC ineffective.
98TEXT runtime·racereadrange<ABIInternal>(SB), NOSPLIT, $0-16
99 MOVD R1, R2 // size
100 MOVD R0, R1 // addr
101 MOVD LR, R3
102 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
103 MOVD $__tsan_read_range(SB), R9
104 JMP racecalladdr<>(SB)
105
106// func runtime·RaceReadRange(addr, size uintptr)
107TEXT runtime·RaceReadRange(SB), NOSPLIT, $0-16
108 // This needs to be a tail call, because racereadrange reads caller pc.
109 JMP runtime·racereadrange(SB)
110
111// func runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
112TEXT runtime·racereadrangepc1(SB), NOSPLIT, $0-24
113 MOVD addr+0(FP), R1
114 MOVD size+8(FP), R2
115 MOVD pc+16(FP), R3
116 ADD $4, R3 // pc is function start, tsan wants return address.
117 // void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
118 MOVD $__tsan_read_range(SB), R9
119 JMP racecalladdr<>(SB)
120
121// func runtime·racewriterange(addr, size uintptr)
122// Called from instrumented code.
123// Defined as ABIInternal so as to avoid introducing a wrapper,
124// which would make caller's PC ineffective.
125TEXT runtime·racewriterange<ABIInternal>(SB), NOSPLIT, $0-16
126 MOVD R1, R2 // size
127 MOVD R0, R1 // addr
128 MOVD LR, R3
129 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
130 MOVD $__tsan_write_range(SB), R9
131 JMP racecalladdr<>(SB)
132
133// func runtime·RaceWriteRange(addr, size uintptr)
134TEXT runtime·RaceWriteRange(SB), NOSPLIT, $0-16
135 // This needs to be a tail call, because racewriterange reads caller pc.
136 JMP runtime·racewriterange(SB)
137
138// func runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
139TEXT runtime·racewriterangepc1(SB), NOSPLIT, $0-24
140 MOVD addr+0(FP), R1
141 MOVD size+8(FP), R2
142 MOVD pc+16(FP), R3
143 ADD $4, R3 // pc is function start, tsan wants return address.
144 // void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
145 MOVD $__tsan_write_range(SB), R9
146 JMP racecalladdr<>(SB)
147
148// If addr (R1) is out of range, do nothing.
149// Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
150TEXT racecalladdr<>(SB), NOSPLIT, $0-0
151 load_g
152 MOVD g_racectx(g), R0
153 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
154 MOVD runtime·racearenastart(SB), R10
155 CMP R10, R1
156 BLT data
157 MOVD runtime·racearenaend(SB), R10
158 CMP R10, R1
159 BLT call
160data:
161 MOVD runtime·racedatastart(SB), R10
162 CMP R10, R1
163 BLT ret
164 MOVD runtime·racedataend(SB), R10
165 CMP R10, R1
166 BGT ret
167call:
168 JMP racecall<>(SB)
169ret:
170 RET
171
172// func runtime·racefuncenter(pc uintptr)
173// Called from instrumented code.
174TEXT runtime·racefuncenter<ABIInternal>(SB), NOSPLIT, $0-8
175 MOVD R0, R9 // callpc
176 JMP racefuncenter<>(SB)
177
178// Common code for racefuncenter
179// R9 = caller's return address
180TEXT racefuncenter<>(SB), NOSPLIT, $0-0
181 load_g
182 MOVD g_racectx(g), R0 // goroutine racectx
183 MOVD R9, R1
184 // void __tsan_func_enter(ThreadState *thr, void *pc);
185 MOVD $__tsan_func_enter(SB), R9
186 BL racecall<>(SB)
187 RET
188
189// func runtime·racefuncexit()
190// Called from instrumented code.
191TEXT runtime·racefuncexit<ABIInternal>(SB), NOSPLIT, $0-0
192 load_g
193 MOVD g_racectx(g), R0 // race context
194 // void __tsan_func_exit(ThreadState *thr);
195 MOVD $__tsan_func_exit(SB), R9
196 JMP racecall<>(SB)
197
198// Atomic operations for sync/atomic package.
199// R3 = addr of arguments passed to this function, it can
200// be fetched at 40(RSP) in racecallatomic after two times BL
201// R0, R1, R2 set in racecallatomic
202
203// Load
204TEXT sync∕atomic·LoadInt32(SB), NOSPLIT, $0-12
205 GO_ARGS
206 MOVD $__tsan_go_atomic32_load(SB), R9
207 BL racecallatomic<>(SB)
208 RET
209
210TEXT sync∕atomic·LoadInt64(SB), NOSPLIT, $0-16
211 GO_ARGS
212 MOVD $__tsan_go_atomic64_load(SB), R9
213 BL racecallatomic<>(SB)
214 RET
215
216TEXT sync∕atomic·LoadUint32(SB), NOSPLIT, $0-12
217 GO_ARGS
218 JMP sync∕atomic·LoadInt32(SB)
219
220TEXT sync∕atomic·LoadUint64(SB), NOSPLIT, $0-16
221 GO_ARGS
222 JMP sync∕atomic·LoadInt64(SB)
223
224TEXT sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-16
225 GO_ARGS
226 JMP sync∕atomic·LoadInt64(SB)
227
228TEXT sync∕atomic·LoadPointer(SB), NOSPLIT, $0-16
229 GO_ARGS
230 JMP sync∕atomic·LoadInt64(SB)
231
232// Store
233TEXT sync∕atomic·StoreInt32(SB), NOSPLIT, $0-12
234 GO_ARGS
235 MOVD $__tsan_go_atomic32_store(SB), R9
236 BL racecallatomic<>(SB)
237 RET
238
239TEXT sync∕atomic·StoreInt64(SB), NOSPLIT, $0-16
240 GO_ARGS
241 MOVD $__tsan_go_atomic64_store(SB), R9
242 BL racecallatomic<>(SB)
243 RET
244
245TEXT sync∕atomic·StoreUint32(SB), NOSPLIT, $0-12
246 GO_ARGS
247 JMP sync∕atomic·StoreInt32(SB)
248
249TEXT sync∕atomic·StoreUint64(SB), NOSPLIT, $0-16
250 GO_ARGS
251 JMP sync∕atomic·StoreInt64(SB)
252
253TEXT sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-16
254 GO_ARGS
255 JMP sync∕atomic·StoreInt64(SB)
256
257// Swap
258TEXT sync∕atomic·SwapInt32(SB), NOSPLIT, $0-20
259 GO_ARGS
260 MOVD $__tsan_go_atomic32_exchange(SB), R9
261 BL racecallatomic<>(SB)
262 RET
263
264TEXT sync∕atomic·SwapInt64(SB), NOSPLIT, $0-24
265 GO_ARGS
266 MOVD $__tsan_go_atomic64_exchange(SB), R9
267 BL racecallatomic<>(SB)
268 RET
269
270TEXT sync∕atomic·SwapUint32(SB), NOSPLIT, $0-20
271 GO_ARGS
272 JMP sync∕atomic·SwapInt32(SB)
273
274TEXT sync∕atomic·SwapUint64(SB), NOSPLIT, $0-24
275 GO_ARGS
276 JMP sync∕atomic·SwapInt64(SB)
277
278TEXT sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-24
279 GO_ARGS
280 JMP sync∕atomic·SwapInt64(SB)
281
282// Add
283TEXT sync∕atomic·AddInt32(SB), NOSPLIT, $0-20
284 GO_ARGS
285 MOVD $__tsan_go_atomic32_fetch_add(SB), R9
286 BL racecallatomic<>(SB)
287 MOVW add+8(FP), R0 // convert fetch_add to add_fetch
288 MOVW ret+16(FP), R1
289 ADD R0, R1, R0
290 MOVW R0, ret+16(FP)
291 RET
292
293TEXT sync∕atomic·AddInt64(SB), NOSPLIT, $0-24
294 GO_ARGS
295 MOVD $__tsan_go_atomic64_fetch_add(SB), R9
296 BL racecallatomic<>(SB)
297 MOVD add+8(FP), R0 // convert fetch_add to add_fetch
298 MOVD ret+16(FP), R1
299 ADD R0, R1, R0
300 MOVD R0, ret+16(FP)
301 RET
302
303TEXT sync∕atomic·AddUint32(SB), NOSPLIT, $0-20
304 GO_ARGS
305 JMP sync∕atomic·AddInt32(SB)
306
307TEXT sync∕atomic·AddUint64(SB), NOSPLIT, $0-24
308 GO_ARGS
309 JMP sync∕atomic·AddInt64(SB)
310
311TEXT sync∕atomic·AddUintptr(SB), NOSPLIT, $0-24
312 GO_ARGS
313 JMP sync∕atomic·AddInt64(SB)
314
315// And
316TEXT sync∕atomic·AndInt32(SB), NOSPLIT, $0-20
317 GO_ARGS
318 MOVD $__tsan_go_atomic32_fetch_and(SB), R9
319 BL racecallatomic<>(SB)
320 RET
321
322TEXT sync∕atomic·AndInt64(SB), NOSPLIT, $0-24
323 GO_ARGS
324 MOVD $__tsan_go_atomic64_fetch_and(SB), R9
325 BL racecallatomic<>(SB)
326 RET
327
328TEXT sync∕atomic·AndUint32(SB), NOSPLIT, $0-20
329 GO_ARGS
330 JMP sync∕atomic·AndInt32(SB)
331
332TEXT sync∕atomic·AndUint64(SB), NOSPLIT, $0-24
333 GO_ARGS
334 JMP sync∕atomic·AndInt64(SB)
335
336TEXT sync∕atomic·AndUintptr(SB), NOSPLIT, $0-24
337 GO_ARGS
338 JMP sync∕atomic·AndInt64(SB)
339
340// Or
341TEXT sync∕atomic·OrInt32(SB), NOSPLIT, $0-20
342 GO_ARGS
343 MOVD $__tsan_go_atomic32_fetch_or(SB), R9
344 BL racecallatomic<>(SB)
345 RET
346
347TEXT sync∕atomic·OrInt64(SB), NOSPLIT, $0-24
348 GO_ARGS
349 MOVD $__tsan_go_atomic64_fetch_or(SB), R9
350 BL racecallatomic<>(SB)
351 RET
352
353TEXT sync∕atomic·OrUint32(SB), NOSPLIT, $0-20
354 GO_ARGS
355 JMP sync∕atomic·OrInt32(SB)
356
357TEXT sync∕atomic·OrUint64(SB), NOSPLIT, $0-24
358 GO_ARGS
359 JMP sync∕atomic·OrInt64(SB)
360
361TEXT sync∕atomic·OrUintptr(SB), NOSPLIT, $0-24
362 GO_ARGS
363 JMP sync∕atomic·OrInt64(SB)
364
365// CompareAndSwap
366TEXT sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT, $0-17
367 GO_ARGS
368 MOVD $__tsan_go_atomic32_compare_exchange(SB), R9
369 BL racecallatomic<>(SB)
370 RET
371
372TEXT sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT, $0-25
373 GO_ARGS
374 MOVD $__tsan_go_atomic64_compare_exchange(SB), R9
375 BL racecallatomic<>(SB)
376 RET
377
378TEXT sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-17
379 GO_ARGS
380 JMP sync∕atomic·CompareAndSwapInt32(SB)
381
382TEXT sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-25
383 GO_ARGS
384 JMP sync∕atomic·CompareAndSwapInt64(SB)
385
386TEXT sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-25
387 GO_ARGS
388 JMP sync∕atomic·CompareAndSwapInt64(SB)
389
390// Generic atomic operation implementation.
391// R9 = addr of target function
392TEXT racecallatomic<>(SB), NOSPLIT, $0
393 // Set up these registers
394 // R0 = *ThreadState
395 // R1 = caller pc
396 // R2 = pc
397 // R3 = addr of incoming arg list
398
399 // Trigger SIGSEGV early.
400 MOVD 40(RSP), R3 // 1st arg is addr. after two times BL, get it at 40(RSP)
401 MOVB (R3), R13 // segv here if addr is bad
402 // Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
403 MOVD runtime·racearenastart(SB), R10
404 CMP R10, R3
405 BLT racecallatomic_data
406 MOVD runtime·racearenaend(SB), R10
407 CMP R10, R3
408 BLT racecallatomic_ok
409racecallatomic_data:
410 MOVD runtime·racedatastart(SB), R10
411 CMP R10, R3
412 BLT racecallatomic_ignore
413 MOVD runtime·racedataend(SB), R10
414 CMP R10, R3
415 BGE racecallatomic_ignore
416racecallatomic_ok:
417 // Addr is within the good range, call the atomic function.
418 load_g
419 MOVD g_racectx(g), R0 // goroutine context
420 MOVD 16(RSP), R1 // caller pc
421 MOVD R9, R2 // pc
422 ADD $40, RSP, R3
423 JMP racecall<>(SB) // does not return
424racecallatomic_ignore:
425 // Addr is outside the good range.
426 // Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
427 // An attempt to synchronize on the address would cause crash.
428 MOVD R9, R21 // remember the original function
429 MOVD $__tsan_go_ignore_sync_begin(SB), R9
430 load_g
431 MOVD g_racectx(g), R0 // goroutine context
432 BL racecall<>(SB)
433 MOVD R21, R9 // restore the original function
434 // Call the atomic function.
435 // racecall will call LLVM race code which might clobber R28 (g)
436 load_g
437 MOVD g_racectx(g), R0 // goroutine context
438 MOVD 16(RSP), R1 // caller pc
439 MOVD R9, R2 // pc
440 ADD $40, RSP, R3 // arguments
441 BL racecall<>(SB)
442 // Call __tsan_go_ignore_sync_end.
443 MOVD $__tsan_go_ignore_sync_end(SB), R9
444 MOVD g_racectx(g), R0 // goroutine context
445 BL racecall<>(SB)
446 RET
447
448// func runtime·racecall(void(*f)(...), ...)
449// Calls C function f from race runtime and passes up to 4 arguments to it.
450// The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
451TEXT runtime·racecall(SB), NOSPLIT, $0-0
452 MOVD fn+0(FP), R9
453 MOVD arg0+8(FP), R0
454 MOVD arg1+16(FP), R1
455 MOVD arg2+24(FP), R2
456 MOVD arg3+32(FP), R3
457 JMP racecall<>(SB)
458
459// Switches SP to g0 stack and calls (R9). Arguments already set.
460// Clobbers R19, R20.
461TEXT racecall<>(SB), NOSPLIT|NOFRAME, $0-0
462 MOVD g_m(g), R10
463 // Switch to g0 stack.
464 MOVD RSP, R19 // callee-saved, preserved across the CALL
465 MOVD R30, R20 // callee-saved, preserved across the CALL
466 MOVD m_g0(R10), R11
467 CMP R11, g
468 BEQ call // already on g0
469 MOVD (g_sched+gobuf_sp)(R11), R12
470 MOVD R12, RSP
471call:
472 // Decrement SP past where the frame pointer is saved in the Go arm64
473 // ABI (one word below the stack pointer) so the race detector library
474 // code doesn't clobber it
475 SUB $16, RSP
476 BL R9
477 MOVD R19, RSP
478 JMP (R20)
479
480// C->Go callback thunk that allows to call runtime·racesymbolize from C code.
481// Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
482// The overall effect of Go->C->Go call chain is similar to that of mcall.
483// R0 contains command code. R1 contains command-specific context.
484// See racecallback for command codes.
485TEXT runtime·racecallbackthunk(SB), NOSPLIT|NOFRAME, $0
486 // Handle command raceGetProcCmd (0) here.
487 // First, code below assumes that we are on curg, while raceGetProcCmd
488 // can be executed on g0. Second, it is called frequently, so will
489 // benefit from this fast path.
490 CBNZ R0, rest
491 MOVD g, R13
492#ifdef TLS_darwin
493 MOVD R27, R12 // save R27 a.k.a. REGTMP (callee-save in C). load_g clobbers it
494#endif
495 load_g
496#ifdef TLS_darwin
497 MOVD R12, R27
498#endif
499 MOVD g_m(g), R0
500 MOVD m_p(R0), R0
501 MOVD p_raceprocctx(R0), R0
502 MOVD R0, (R1)
503 MOVD R13, g
504 JMP (LR)
505rest:
506 // Save callee-saved registers (Go code won't respect that).
507 // 8(RSP) and 16(RSP) are for args passed through racecallback
508 SUB $176, RSP
509 MOVD LR, 0(RSP)
510
511 SAVE_R19_TO_R28(8*3)
512 SAVE_F8_TO_F15(8*13)
513 MOVD R29, (8*21)(RSP)
514 // Set g = g0.
515 // load_g will clobber R0, Save R0
516 MOVD R0, R13
517 load_g
518 // restore R0
519 MOVD R13, R0
520 MOVD g_m(g), R13
521 MOVD m_g0(R13), R14
522 CMP R14, g
523 BEQ noswitch // branch if already on g0
524 MOVD R14, g
525
526 MOVD R0, 8(RSP) // func arg
527 MOVD R1, 16(RSP) // func arg
528 BL runtime·racecallback(SB)
529
530 // All registers are smashed after Go code, reload.
531 MOVD g_m(g), R13
532 MOVD m_curg(R13), g // g = m->curg
533ret:
534 // Restore callee-saved registers.
535 MOVD 0(RSP), LR
536 MOVD (8*21)(RSP), R29
537 RESTORE_F8_TO_F15(8*13)
538 RESTORE_R19_TO_R28(8*3)
539 ADD $176, RSP
540 JMP (LR)
541
542noswitch:
543 // already on g0
544 MOVD R0, 8(RSP) // func arg
545 MOVD R1, 16(RSP) // func arg
546 BL runtime·racecallback(SB)
547 JMP ret
548
549#ifndef TLSG_IS_VARIABLE
550// tls_g, g value for each thread in TLS
551GLOBL runtime·tls_g+0(SB), TLSBSS+DUPOK, $8
552#endif
View as plain text