...

Text file src/runtime/race_amd64.s

Documentation: runtime

     1// Copyright 2013 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5//go:build race
     6
     7#include "go_asm.h"
     8#include "go_tls.h"
     9#include "funcdata.h"
    10#include "textflag.h"
    11#include "cgo/abi_amd64.h"
    12
    13// The following thunks allow calling the gcc-compiled race runtime directly
    14// from Go code without going all the way through cgo.
    15// First, it's much faster (up to 50% speedup for real Go programs).
    16// Second, it eliminates race-related special cases from cgocall and scheduler.
    17// Third, in long-term it will allow to remove cyclic runtime/race dependency on cmd/go.
    18
    19// A brief recap of the amd64 calling convention.
    20// Arguments are passed in DI, SI, DX, CX, R8, R9, the rest is on stack.
    21// Callee-saved registers are: BX, BP, R12-R15.
    22// SP must be 16-byte aligned.
    23// On Windows:
    24// Arguments are passed in CX, DX, R8, R9, the rest is on stack.
    25// Callee-saved registers are: BX, BP, DI, SI, R12-R15.
    26// SP must be 16-byte aligned. Windows also requires "stack-backing" for the 4 register arguments:
    27// https://learn.microsoft.com/en-us/cpp/build/x64-calling-convention
    28// We do not do this, because it seems to be intended for vararg/unprototyped functions.
    29// Gcc-compiled race runtime does not try to use that space.
    30
    31#ifdef GOOS_windows
    32#define RARG0 CX
    33#define RARG1 DX
    34#define RARG2 R8
    35#define RARG3 R9
    36#else
    37#define RARG0 DI
    38#define RARG1 SI
    39#define RARG2 DX
    40#define RARG3 CX
    41#endif
    42
    43// func runtime·raceread(addr uintptr)
    44// Called from instrumented code.
    45// Defined as ABIInternal so as to avoid introducing a wrapper,
    46// which would render runtime.getcallerpc ineffective.
    47TEXT	runtime·raceread<ABIInternal>(SB), NOSPLIT, $0-8
    48	MOVQ	AX, RARG1
    49	MOVQ	(SP), RARG2
    50	// void __tsan_read(ThreadState *thr, void *addr, void *pc);
    51	MOVQ	$__tsan_read(SB), AX
    52	JMP	racecalladdr<>(SB)
    53
    54// func runtime·RaceRead(addr uintptr)
    55TEXT	runtime·RaceRead(SB), NOSPLIT, $0-8
    56	// This needs to be a tail call, because raceread reads caller pc.
    57	JMP	runtime·raceread(SB)
    58
    59// void runtime·racereadpc(void *addr, void *callpc, void *pc)
    60TEXT	runtime·racereadpc(SB), NOSPLIT, $0-24
    61	MOVQ	addr+0(FP), RARG1
    62	MOVQ	callpc+8(FP), RARG2
    63	MOVQ	pc+16(FP), RARG3
    64	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
    65	// void __tsan_read_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    66	MOVQ	$__tsan_read_pc(SB), AX
    67	JMP	racecalladdr<>(SB)
    68
    69// func runtime·racewrite(addr uintptr)
    70// Called from instrumented code.
    71// Defined as ABIInternal so as to avoid introducing a wrapper,
    72// which would render runtime.getcallerpc ineffective.
    73TEXT	runtime·racewrite<ABIInternal>(SB), NOSPLIT, $0-8
    74	MOVQ	AX, RARG1
    75	MOVQ	(SP), RARG2
    76	// void __tsan_write(ThreadState *thr, void *addr, void *pc);
    77	MOVQ	$__tsan_write(SB), AX
    78	JMP	racecalladdr<>(SB)
    79
    80// func runtime·RaceWrite(addr uintptr)
    81TEXT	runtime·RaceWrite(SB), NOSPLIT, $0-8
    82	// This needs to be a tail call, because racewrite reads caller pc.
    83	JMP	runtime·racewrite(SB)
    84
    85// void runtime·racewritepc(void *addr, void *callpc, void *pc)
    86TEXT	runtime·racewritepc(SB), NOSPLIT, $0-24
    87	MOVQ	addr+0(FP), RARG1
    88	MOVQ	callpc+8(FP), RARG2
    89	MOVQ	pc+16(FP), RARG3
    90	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
    91	// void __tsan_write_pc(ThreadState *thr, void *addr, void *callpc, void *pc);
    92	MOVQ	$__tsan_write_pc(SB), AX
    93	JMP	racecalladdr<>(SB)
    94
    95// func runtime·racereadrange(addr, size uintptr)
    96// Called from instrumented code.
    97// Defined as ABIInternal so as to avoid introducing a wrapper,
    98// which would render runtime.getcallerpc ineffective.
    99TEXT	runtime·racereadrange<ABIInternal>(SB), NOSPLIT, $0-16
   100	MOVQ	AX, RARG1
   101	MOVQ	BX, RARG2
   102	MOVQ	(SP), RARG3
   103	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   104	MOVQ	$__tsan_read_range(SB), AX
   105	JMP	racecalladdr<>(SB)
   106
   107// func runtime·RaceReadRange(addr, size uintptr)
   108TEXT	runtime·RaceReadRange(SB), NOSPLIT, $0-16
   109	// This needs to be a tail call, because racereadrange reads caller pc.
   110	JMP	runtime·racereadrange(SB)
   111
   112// void runtime·racereadrangepc1(void *addr, uintptr sz, void *pc)
   113TEXT	runtime·racereadrangepc1(SB), NOSPLIT, $0-24
   114	MOVQ	addr+0(FP), RARG1
   115	MOVQ	size+8(FP), RARG2
   116	MOVQ	pc+16(FP), RARG3
   117	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
   118	// void __tsan_read_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   119	MOVQ	$__tsan_read_range(SB), AX
   120	JMP	racecalladdr<>(SB)
   121
   122// func runtime·racewriterange(addr, size uintptr)
   123// Called from instrumented code.
   124// Defined as ABIInternal so as to avoid introducing a wrapper,
   125// which would render runtime.getcallerpc ineffective.
   126TEXT	runtime·racewriterange<ABIInternal>(SB), NOSPLIT, $0-16
   127	MOVQ	AX, RARG1
   128	MOVQ	BX, RARG2
   129	MOVQ	(SP), RARG3
   130	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   131	MOVQ	$__tsan_write_range(SB), AX
   132	JMP	racecalladdr<>(SB)
   133
   134// func runtime·RaceWriteRange(addr, size uintptr)
   135TEXT	runtime·RaceWriteRange(SB), NOSPLIT, $0-16
   136	// This needs to be a tail call, because racewriterange reads caller pc.
   137	JMP	runtime·racewriterange(SB)
   138
   139// void runtime·racewriterangepc1(void *addr, uintptr sz, void *pc)
   140TEXT	runtime·racewriterangepc1(SB), NOSPLIT, $0-24
   141	MOVQ	addr+0(FP), RARG1
   142	MOVQ	size+8(FP), RARG2
   143	MOVQ	pc+16(FP), RARG3
   144	ADDQ	$1, RARG3 // pc is function start, tsan wants return address
   145	// void __tsan_write_range(ThreadState *thr, void *addr, uintptr size, void *pc);
   146	MOVQ	$__tsan_write_range(SB), AX
   147	JMP	racecalladdr<>(SB)
   148
   149// If addr (RARG1) is out of range, do nothing.
   150// Otherwise, setup goroutine context and invoke racecall. Other arguments already set.
   151TEXT	racecalladdr<>(SB), NOSPLIT, $0-0
   152	MOVQ	g_racectx(R14), RARG0	// goroutine context
   153	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
   154	CMPQ	RARG1, runtime·racearenastart(SB)
   155	JB	data
   156	CMPQ	RARG1, runtime·racearenaend(SB)
   157	JB	call
   158data:
   159	CMPQ	RARG1, runtime·racedatastart(SB)
   160	JB	ret
   161	CMPQ	RARG1, runtime·racedataend(SB)
   162	JAE	ret
   163call:
   164	MOVQ	AX, AX		// w/o this 6a miscompiles this function
   165	JMP	racecall<>(SB)
   166ret:
   167	RET
   168
   169// func runtime·racefuncenter(pc uintptr)
   170// Called from instrumented code.
   171TEXT	runtime·racefuncenter(SB), NOSPLIT, $0-8
   172	MOVQ	callpc+0(FP), R11
   173	JMP	racefuncenter<>(SB)
   174
   175// Common code for racefuncenter
   176// R11 = caller's return address
   177TEXT	racefuncenter<>(SB), NOSPLIT|NOFRAME, $0-0
   178	MOVQ	DX, BX		// save function entry context (for closures)
   179	MOVQ	g_racectx(R14), RARG0	// goroutine context
   180	MOVQ	R11, RARG1
   181	// void __tsan_func_enter(ThreadState *thr, void *pc);
   182	MOVQ	$__tsan_func_enter(SB), AX
   183	// racecall<> preserves BX
   184	CALL	racecall<>(SB)
   185	MOVQ	BX, DX	// restore function entry context
   186	RET
   187
   188// func runtime·racefuncexit()
   189// Called from instrumented code.
   190TEXT	runtime·racefuncexit(SB), NOSPLIT, $0-0
   191	MOVQ	g_racectx(R14), RARG0	// goroutine context
   192	// void __tsan_func_exit(ThreadState *thr);
   193	MOVQ	$__tsan_func_exit(SB), AX
   194	JMP	racecall<>(SB)
   195
   196// Atomic operations for sync/atomic package.
   197
   198// Load
   199TEXT	sync∕atomic·LoadInt32(SB), NOSPLIT|NOFRAME, $0-12
   200	GO_ARGS
   201	MOVQ	$__tsan_go_atomic32_load(SB), AX
   202	CALL	racecallatomic<>(SB)
   203	RET
   204
   205TEXT	sync∕atomic·LoadInt64(SB), NOSPLIT|NOFRAME, $0-16
   206	GO_ARGS
   207	MOVQ	$__tsan_go_atomic64_load(SB), AX
   208	CALL	racecallatomic<>(SB)
   209	RET
   210
   211TEXT	sync∕atomic·LoadUint32(SB), NOSPLIT, $0-12
   212	GO_ARGS
   213	JMP	sync∕atomic·LoadInt32(SB)
   214
   215TEXT	sync∕atomic·LoadUint64(SB), NOSPLIT, $0-16
   216	GO_ARGS
   217	JMP	sync∕atomic·LoadInt64(SB)
   218
   219TEXT	sync∕atomic·LoadUintptr(SB), NOSPLIT, $0-16
   220	GO_ARGS
   221	JMP	sync∕atomic·LoadInt64(SB)
   222
   223TEXT	sync∕atomic·LoadPointer(SB), NOSPLIT, $0-16
   224	GO_ARGS
   225	JMP	sync∕atomic·LoadInt64(SB)
   226
   227// Store
   228TEXT	sync∕atomic·StoreInt32(SB), NOSPLIT|NOFRAME, $0-12
   229	GO_ARGS
   230	MOVQ	$__tsan_go_atomic32_store(SB), AX
   231	CALL	racecallatomic<>(SB)
   232	RET
   233
   234TEXT	sync∕atomic·StoreInt64(SB), NOSPLIT|NOFRAME, $0-16
   235	GO_ARGS
   236	MOVQ	$__tsan_go_atomic64_store(SB), AX
   237	CALL	racecallatomic<>(SB)
   238	RET
   239
   240TEXT	sync∕atomic·StoreUint32(SB), NOSPLIT, $0-12
   241	GO_ARGS
   242	JMP	sync∕atomic·StoreInt32(SB)
   243
   244TEXT	sync∕atomic·StoreUint64(SB), NOSPLIT, $0-16
   245	GO_ARGS
   246	JMP	sync∕atomic·StoreInt64(SB)
   247
   248TEXT	sync∕atomic·StoreUintptr(SB), NOSPLIT, $0-16
   249	GO_ARGS
   250	JMP	sync∕atomic·StoreInt64(SB)
   251
   252// Swap
   253TEXT	sync∕atomic·SwapInt32(SB), NOSPLIT|NOFRAME, $0-20
   254	GO_ARGS
   255	MOVQ	$__tsan_go_atomic32_exchange(SB), AX
   256	CALL	racecallatomic<>(SB)
   257	RET
   258
   259TEXT	sync∕atomic·SwapInt64(SB), NOSPLIT|NOFRAME, $0-24
   260	GO_ARGS
   261	MOVQ	$__tsan_go_atomic64_exchange(SB), AX
   262	CALL	racecallatomic<>(SB)
   263	RET
   264
   265TEXT	sync∕atomic·SwapUint32(SB), NOSPLIT, $0-20
   266	GO_ARGS
   267	JMP	sync∕atomic·SwapInt32(SB)
   268
   269TEXT	sync∕atomic·SwapUint64(SB), NOSPLIT, $0-24
   270	GO_ARGS
   271	JMP	sync∕atomic·SwapInt64(SB)
   272
   273TEXT	sync∕atomic·SwapUintptr(SB), NOSPLIT, $0-24
   274	GO_ARGS
   275	JMP	sync∕atomic·SwapInt64(SB)
   276
   277// Add
   278TEXT	sync∕atomic·AddInt32(SB), NOSPLIT|NOFRAME, $0-20
   279	GO_ARGS
   280	MOVQ	$__tsan_go_atomic32_fetch_add(SB), AX
   281	CALL	racecallatomic<>(SB)
   282	MOVL	add+8(FP), AX	// convert fetch_add to add_fetch
   283	ADDL	AX, ret+16(FP)
   284	RET
   285
   286TEXT	sync∕atomic·AddInt64(SB), NOSPLIT|NOFRAME, $0-24
   287	GO_ARGS
   288	MOVQ	$__tsan_go_atomic64_fetch_add(SB), AX
   289	CALL	racecallatomic<>(SB)
   290	MOVQ	add+8(FP), AX	// convert fetch_add to add_fetch
   291	ADDQ	AX, ret+16(FP)
   292	RET
   293
   294TEXT	sync∕atomic·AddUint32(SB), NOSPLIT, $0-20
   295	GO_ARGS
   296	JMP	sync∕atomic·AddInt32(SB)
   297
   298TEXT	sync∕atomic·AddUint64(SB), NOSPLIT, $0-24
   299	GO_ARGS
   300	JMP	sync∕atomic·AddInt64(SB)
   301
   302TEXT	sync∕atomic·AddUintptr(SB), NOSPLIT, $0-24
   303	GO_ARGS
   304	JMP	sync∕atomic·AddInt64(SB)
   305
   306// And
   307TEXT	sync∕atomic·AndInt32(SB), NOSPLIT|NOFRAME, $0-20
   308	GO_ARGS
   309	MOVQ	$__tsan_go_atomic32_fetch_and(SB), AX
   310	CALL	racecallatomic<>(SB)
   311	RET
   312
   313TEXT	sync∕atomic·AndInt64(SB), NOSPLIT|NOFRAME, $0-24
   314	GO_ARGS
   315	MOVQ	$__tsan_go_atomic64_fetch_and(SB), AX
   316	CALL	racecallatomic<>(SB)
   317	RET
   318
   319TEXT	sync∕atomic·AndUint32(SB), NOSPLIT, $0-20
   320	GO_ARGS
   321	JMP	sync∕atomic·AndInt32(SB)
   322
   323TEXT	sync∕atomic·AndUint64(SB), NOSPLIT, $0-24
   324	GO_ARGS
   325	JMP	sync∕atomic·AndInt64(SB)
   326
   327TEXT	sync∕atomic·AndUintptr(SB), NOSPLIT, $0-24
   328	GO_ARGS
   329	JMP	sync∕atomic·AndInt64(SB)
   330
   331// Or
   332TEXT	sync∕atomic·OrInt32(SB), NOSPLIT|NOFRAME, $0-20
   333	GO_ARGS
   334	MOVQ	$__tsan_go_atomic32_fetch_or(SB), AX
   335	CALL	racecallatomic<>(SB)
   336	RET
   337
   338TEXT	sync∕atomic·OrInt64(SB), NOSPLIT|NOFRAME, $0-24
   339	GO_ARGS
   340	MOVQ	$__tsan_go_atomic64_fetch_or(SB), AX
   341	CALL	racecallatomic<>(SB)
   342	RET
   343
   344TEXT	sync∕atomic·OrUint32(SB), NOSPLIT, $0-20
   345	GO_ARGS
   346	JMP	sync∕atomic·OrInt32(SB)
   347
   348TEXT	sync∕atomic·OrUint64(SB), NOSPLIT, $0-24
   349	GO_ARGS
   350	JMP	sync∕atomic·OrInt64(SB)
   351
   352TEXT	sync∕atomic·OrUintptr(SB), NOSPLIT, $0-24
   353	GO_ARGS
   354	JMP	sync∕atomic·OrInt64(SB)
   355
   356
   357// CompareAndSwap
   358TEXT	sync∕atomic·CompareAndSwapInt32(SB), NOSPLIT|NOFRAME, $0-17
   359	GO_ARGS
   360	MOVQ	$__tsan_go_atomic32_compare_exchange(SB), AX
   361	CALL	racecallatomic<>(SB)
   362	RET
   363
   364TEXT	sync∕atomic·CompareAndSwapInt64(SB), NOSPLIT|NOFRAME, $0-25
   365	GO_ARGS
   366	MOVQ	$__tsan_go_atomic64_compare_exchange(SB), AX
   367	CALL	racecallatomic<>(SB)
   368	RET
   369
   370TEXT	sync∕atomic·CompareAndSwapUint32(SB), NOSPLIT, $0-17
   371	GO_ARGS
   372	JMP	sync∕atomic·CompareAndSwapInt32(SB)
   373
   374TEXT	sync∕atomic·CompareAndSwapUint64(SB), NOSPLIT, $0-25
   375	GO_ARGS
   376	JMP	sync∕atomic·CompareAndSwapInt64(SB)
   377
   378TEXT	sync∕atomic·CompareAndSwapUintptr(SB), NOSPLIT, $0-25
   379	GO_ARGS
   380	JMP	sync∕atomic·CompareAndSwapInt64(SB)
   381
   382// Generic atomic operation implementation.
   383// AX already contains target function.
   384TEXT	racecallatomic<>(SB), NOSPLIT|NOFRAME, $0-0
   385	// Trigger SIGSEGV early.
   386	MOVQ	16(SP), R12
   387	MOVBLZX	(R12), R13
   388	// Check that addr is within [arenastart, arenaend) or within [racedatastart, racedataend).
   389	CMPQ	R12, runtime·racearenastart(SB)
   390	JB	racecallatomic_data
   391	CMPQ	R12, runtime·racearenaend(SB)
   392	JB	racecallatomic_ok
   393racecallatomic_data:
   394	CMPQ	R12, runtime·racedatastart(SB)
   395	JB	racecallatomic_ignore
   396	CMPQ	R12, runtime·racedataend(SB)
   397	JAE	racecallatomic_ignore
   398racecallatomic_ok:
   399	// Addr is within the good range, call the atomic function.
   400	MOVQ	g_racectx(R14), RARG0	// goroutine context
   401	MOVQ	8(SP), RARG1	// caller pc
   402	MOVQ	(SP), RARG2	// pc
   403	LEAQ	16(SP), RARG3	// arguments
   404	JMP	racecall<>(SB)	// does not return
   405racecallatomic_ignore:
   406	// Addr is outside the good range.
   407	// Call __tsan_go_ignore_sync_begin to ignore synchronization during the atomic op.
   408	// An attempt to synchronize on the address would cause crash.
   409	MOVQ	AX, BX	// remember the original function
   410	MOVQ	$__tsan_go_ignore_sync_begin(SB), AX
   411	MOVQ	g_racectx(R14), RARG0	// goroutine context
   412	CALL	racecall<>(SB)
   413	MOVQ	BX, AX	// restore the original function
   414	// Call the atomic function.
   415	MOVQ	g_racectx(R14), RARG0	// goroutine context
   416	MOVQ	8(SP), RARG1	// caller pc
   417	MOVQ	(SP), RARG2	// pc
   418	LEAQ	16(SP), RARG3	// arguments
   419	CALL	racecall<>(SB)
   420	// Call __tsan_go_ignore_sync_end.
   421	MOVQ	$__tsan_go_ignore_sync_end(SB), AX
   422	MOVQ	g_racectx(R14), RARG0	// goroutine context
   423	JMP	racecall<>(SB)
   424
   425// void runtime·racecall(void(*f)(...), ...)
   426// Calls C function f from race runtime and passes up to 4 arguments to it.
   427// The arguments are never heap-object-preserving pointers, so we pretend there are no arguments.
   428TEXT	runtime·racecall(SB), NOSPLIT, $0-0
   429	MOVQ	fn+0(FP), AX
   430	MOVQ	arg0+8(FP), RARG0
   431	MOVQ	arg1+16(FP), RARG1
   432	MOVQ	arg2+24(FP), RARG2
   433	MOVQ	arg3+32(FP), RARG3
   434	JMP	racecall<>(SB)
   435
   436// Switches SP to g0 stack and calls (AX). Arguments already set.
   437TEXT	racecall<>(SB), NOSPLIT|NOFRAME, $0-0
   438	MOVQ	g_m(R14), R13
   439	// Switch to g0 stack.
   440	MOVQ	SP, R12		// callee-saved, preserved across the CALL
   441	MOVQ	m_g0(R13), R10
   442	CMPQ	R10, R14
   443	JE	call	// already on g0
   444	MOVQ	(g_sched+gobuf_sp)(R10), SP
   445call:
   446	ANDQ	$~15, SP	// alignment for gcc ABI
   447	CALL	AX
   448	MOVQ	R12, SP
   449	// Back to Go world, set special registers.
   450	// The g register (R14) is preserved in C.
   451	XORPS	X15, X15
   452	RET
   453
   454// C->Go callback thunk that allows to call runtime·racesymbolize from C code.
   455// Direct Go->C race call has only switched SP, finish g->g0 switch by setting correct g.
   456// The overall effect of Go->C->Go call chain is similar to that of mcall.
   457// RARG0 contains command code. RARG1 contains command-specific context.
   458// See racecallback for command codes.
   459TEXT	runtime·racecallbackthunk(SB), NOSPLIT|NOFRAME, $0-0
   460	// Handle command raceGetProcCmd (0) here.
   461	// First, code below assumes that we are on curg, while raceGetProcCmd
   462	// can be executed on g0. Second, it is called frequently, so will
   463	// benefit from this fast path.
   464	CMPQ	RARG0, $0
   465	JNE	rest
   466	get_tls(RARG0)
   467	MOVQ	g(RARG0), RARG0
   468	MOVQ	g_m(RARG0), RARG0
   469	MOVQ	m_p(RARG0), RARG0
   470	MOVQ	p_raceprocctx(RARG0), RARG0
   471	MOVQ	RARG0, (RARG1)
   472	RET
   473
   474rest:
   475	// Transition from C ABI to Go ABI.
   476	PUSH_REGS_HOST_TO_ABI0()
   477	// Set g = g0.
   478	get_tls(R12)
   479	MOVQ	g(R12), R14
   480	MOVQ	g_m(R14), R13
   481	MOVQ	m_g0(R13), R15
   482	CMPQ	R13, R15
   483	JEQ	noswitch	// branch if already on g0
   484	MOVQ	R15, g(R12)	// g = m->g0
   485	MOVQ	R15, R14	// set g register
   486	PUSHQ	RARG1	// func arg
   487	PUSHQ	RARG0	// func arg
   488	CALL	runtime·racecallback(SB)
   489	POPQ	R12
   490	POPQ	R12
   491	// All registers are smashed after Go code, reload.
   492	get_tls(R12)
   493	MOVQ	g(R12), R13
   494	MOVQ	g_m(R13), R13
   495	MOVQ	m_curg(R13), R14
   496	MOVQ	R14, g(R12)	// g = m->curg
   497ret:
   498	POP_REGS_HOST_TO_ABI0()
   499	RET
   500
   501noswitch:
   502	// already on g0
   503	PUSHQ	RARG1	// func arg
   504	PUSHQ	RARG0	// func arg
   505	CALL	runtime·racecallback(SB)
   506	POPQ	R12
   507	POPQ	R12
   508	JMP	ret

View as plain text