...

Text file src/runtime/asm_amd64.s

Documentation: runtime

     1// Copyright 2009 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5#include "go_asm.h"
     6#include "go_tls.h"
     7#include "funcdata.h"
     8#include "textflag.h"
     9#include "cgo/abi_amd64.h"
    10
    11// _rt0_amd64 is common startup code for most amd64 systems when using
    12// internal linking. This is the entry point for the program from the
    13// kernel for an ordinary -buildmode=exe program. The stack holds the
    14// number of arguments and the C-style argv.
    15TEXT _rt0_amd64(SB),NOSPLIT,$-8
    16	MOVQ	0(SP), DI	// argc
    17	LEAQ	8(SP), SI	// argv
    18	JMP	runtime·rt0_go(SB)
    19
    20// main is common startup code for most amd64 systems when using
    21// external linking. The C startup code will call the symbol "main"
    22// passing argc and argv in the usual C ABI registers DI and SI.
    23TEXT main(SB),NOSPLIT,$-8
    24	JMP	runtime·rt0_go(SB)
    25
    26// _rt0_amd64_lib is common startup code for most amd64 systems when
    27// using -buildmode=c-archive or -buildmode=c-shared. The linker will
    28// arrange to invoke this function as a global constructor (for
    29// c-archive) or when the shared library is loaded (for c-shared).
    30// We expect argc and argv to be passed in the usual C ABI registers
    31// DI and SI.
    32TEXT _rt0_amd64_lib(SB),NOSPLIT|NOFRAME,$0
    33	// Transition from C ABI to Go ABI.
    34	PUSH_REGS_HOST_TO_ABI0()
    35
    36	MOVQ	DI, _rt0_amd64_lib_argc<>(SB)
    37	MOVQ	SI, _rt0_amd64_lib_argv<>(SB)
    38
    39	// Synchronous initialization.
    40	CALL	runtime·libpreinit(SB)
    41
    42	// Create a new thread to finish Go runtime initialization.
    43	MOVQ	_cgo_sys_thread_create(SB), AX
    44	TESTQ	AX, AX
    45	JZ	nocgo
    46
    47	// We're calling back to C.
    48	// Align stack per ELF ABI requirements.
    49	MOVQ	SP, BX  // Callee-save in C ABI
    50	ANDQ	$~15, SP
    51	MOVQ	$_rt0_amd64_lib_go(SB), DI
    52	MOVQ	$0, SI
    53	CALL	AX
    54	MOVQ	BX, SP
    55	JMP	restore
    56
    57nocgo:
    58	ADJSP	$16
    59	MOVQ	$0x800000, 0(SP)		// stacksize
    60	MOVQ	$_rt0_amd64_lib_go(SB), AX
    61	MOVQ	AX, 8(SP)			// fn
    62	CALL	runtime·newosproc0(SB)
    63	ADJSP	$-16
    64
    65restore:
    66	POP_REGS_HOST_TO_ABI0()
    67	RET
    68
    69// _rt0_amd64_lib_go initializes the Go runtime.
    70// This is started in a separate thread by _rt0_amd64_lib.
    71TEXT _rt0_amd64_lib_go(SB),NOSPLIT,$0
    72	MOVQ	_rt0_amd64_lib_argc<>(SB), DI
    73	MOVQ	_rt0_amd64_lib_argv<>(SB), SI
    74	JMP	runtime·rt0_go(SB)
    75
    76DATA _rt0_amd64_lib_argc<>(SB)/8, $0
    77GLOBL _rt0_amd64_lib_argc<>(SB),NOPTR, $8
    78DATA _rt0_amd64_lib_argv<>(SB)/8, $0
    79GLOBL _rt0_amd64_lib_argv<>(SB),NOPTR, $8
    80
    81#ifdef GOAMD64_v2
    82DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v2 microarchitecture support.\n"
    83#endif
    84
    85#ifdef GOAMD64_v3
    86DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v3 microarchitecture support.\n"
    87#endif
    88
    89#ifdef GOAMD64_v4
    90DATA bad_cpu_msg<>+0x00(SB)/84, $"This program can only be run on AMD64 processors with v4 microarchitecture support.\n"
    91#endif
    92
    93GLOBL bad_cpu_msg<>(SB), RODATA, $84
    94
    95// Define a list of AMD64 microarchitecture level features
    96// https://en.wikipedia.org/wiki/X86-64#Microarchitecture_levels
    97
    98                     // SSE3     SSSE3    CMPXCHNG16 SSE4.1    SSE4.2    POPCNT
    99#define V2_FEATURES_CX (1 << 0 | 1 << 9 | 1 << 13  | 1 << 19 | 1 << 20 | 1 << 23)
   100                         // LAHF/SAHF
   101#define V2_EXT_FEATURES_CX (1 << 0)
   102                                      // FMA       MOVBE     OSXSAVE   AVX       F16C
   103#define V3_FEATURES_CX (V2_FEATURES_CX | 1 << 12 | 1 << 22 | 1 << 27 | 1 << 28 | 1 << 29)
   104                                              // ABM (FOR LZNCT)
   105#define V3_EXT_FEATURES_CX (V2_EXT_FEATURES_CX | 1 << 5)
   106                         // BMI1     AVX2     BMI2
   107#define V3_EXT_FEATURES_BX (1 << 3 | 1 << 5 | 1 << 8)
   108                       // XMM      YMM
   109#define V3_OS_SUPPORT_AX (1 << 1 | 1 << 2)
   110
   111#define V4_FEATURES_CX V3_FEATURES_CX
   112
   113#define V4_EXT_FEATURES_CX V3_EXT_FEATURES_CX
   114                                              // AVX512F   AVX512DQ  AVX512CD  AVX512BW  AVX512VL
   115#define V4_EXT_FEATURES_BX (V3_EXT_FEATURES_BX | 1 << 16 | 1 << 17 | 1 << 28 | 1 << 30 | 1 << 31)
   116                                          // OPMASK   ZMM
   117#define V4_OS_SUPPORT_AX (V3_OS_SUPPORT_AX | 1 << 5 | (1 << 6 | 1 << 7))
   118
   119#ifdef GOAMD64_v2
   120#define NEED_MAX_CPUID 0x80000001
   121#define NEED_FEATURES_CX V2_FEATURES_CX
   122#define NEED_EXT_FEATURES_CX V2_EXT_FEATURES_CX
   123#endif
   124
   125#ifdef GOAMD64_v3
   126#define NEED_MAX_CPUID 0x80000001
   127#define NEED_FEATURES_CX V3_FEATURES_CX
   128#define NEED_EXT_FEATURES_CX V3_EXT_FEATURES_CX
   129#define NEED_EXT_FEATURES_BX V3_EXT_FEATURES_BX
   130#define NEED_OS_SUPPORT_AX V3_OS_SUPPORT_AX
   131#endif
   132
   133#ifdef GOAMD64_v4
   134#define NEED_MAX_CPUID 0x80000001
   135#define NEED_FEATURES_CX V4_FEATURES_CX
   136#define NEED_EXT_FEATURES_CX V4_EXT_FEATURES_CX
   137#define NEED_EXT_FEATURES_BX V4_EXT_FEATURES_BX
   138
   139// Darwin requires a different approach to check AVX512 support, see CL 285572.
   140#ifdef GOOS_darwin
   141#define NEED_OS_SUPPORT_AX V3_OS_SUPPORT_AX
   142// These values are from:
   143// https://github.com/apple/darwin-xnu/blob/xnu-4570.1.46/osfmk/i386/cpu_capabilities.h
   144#define commpage64_base_address         0x00007fffffe00000
   145#define commpage64_cpu_capabilities64   (commpage64_base_address+0x010)
   146#define commpage64_version              (commpage64_base_address+0x01E)
   147#define AVX512F                         0x0000004000000000
   148#define AVX512CD                        0x0000008000000000
   149#define AVX512DQ                        0x0000010000000000
   150#define AVX512BW                        0x0000020000000000
   151#define AVX512VL                        0x0000100000000000
   152#define NEED_DARWIN_SUPPORT             (AVX512F | AVX512DQ | AVX512CD | AVX512BW | AVX512VL)
   153#else
   154#define NEED_OS_SUPPORT_AX V4_OS_SUPPORT_AX
   155#endif
   156
   157#endif
   158
   159TEXT runtime·rt0_go(SB),NOSPLIT|NOFRAME|TOPFRAME,$0
   160	// copy arguments forward on an even stack
   161	MOVQ	DI, AX		// argc
   162	MOVQ	SI, BX		// argv
   163	SUBQ	$(5*8), SP		// 3args 2auto
   164	ANDQ	$~15, SP
   165	MOVQ	AX, 24(SP)
   166	MOVQ	BX, 32(SP)
   167
   168	// create istack out of the given (operating system) stack.
   169	// _cgo_init may update stackguard.
   170	MOVQ	$runtime·g0(SB), DI
   171	LEAQ	(-64*1024)(SP), BX
   172	MOVQ	BX, g_stackguard0(DI)
   173	MOVQ	BX, g_stackguard1(DI)
   174	MOVQ	BX, (g_stack+stack_lo)(DI)
   175	MOVQ	SP, (g_stack+stack_hi)(DI)
   176
   177	// find out information about the processor we're on
   178	MOVL	$0, AX
   179	CPUID
   180	CMPL	AX, $0
   181	JE	nocpuinfo
   182
   183	CMPL	BX, $0x756E6547  // "Genu"
   184	JNE	notintel
   185	CMPL	DX, $0x49656E69  // "ineI"
   186	JNE	notintel
   187	CMPL	CX, $0x6C65746E  // "ntel"
   188	JNE	notintel
   189	MOVB	$1, runtime·isIntel(SB)
   190
   191notintel:
   192	// Load EAX=1 cpuid flags
   193	MOVL	$1, AX
   194	CPUID
   195	MOVL	AX, runtime·processorVersionInfo(SB)
   196
   197nocpuinfo:
   198	// if there is an _cgo_init, call it.
   199	MOVQ	_cgo_init(SB), AX
   200	TESTQ	AX, AX
   201	JZ	needtls
   202	// arg 1: g0, already in DI
   203	MOVQ	$setg_gcc<>(SB), SI // arg 2: setg_gcc
   204	MOVQ	$0, DX	// arg 3, 4: not used when using platform's TLS
   205	MOVQ	$0, CX
   206#ifdef GOOS_android
   207	MOVQ	$runtime·tls_g(SB), DX 	// arg 3: &tls_g
   208	// arg 4: TLS base, stored in slot 0 (Android's TLS_SLOT_SELF).
   209	// Compensate for tls_g (+16).
   210	MOVQ	-16(TLS), CX
   211#endif
   212#ifdef GOOS_windows
   213	MOVQ	$runtime·tls_g(SB), DX 	// arg 3: &tls_g
   214	// Adjust for the Win64 calling convention.
   215	MOVQ	CX, R9 // arg 4
   216	MOVQ	DX, R8 // arg 3
   217	MOVQ	SI, DX // arg 2
   218	MOVQ	DI, CX // arg 1
   219#endif
   220	CALL	AX
   221
   222	// update stackguard after _cgo_init
   223	MOVQ	$runtime·g0(SB), CX
   224	MOVQ	(g_stack+stack_lo)(CX), AX
   225	ADDQ	$const_stackGuard, AX
   226	MOVQ	AX, g_stackguard0(CX)
   227	MOVQ	AX, g_stackguard1(CX)
   228
   229#ifndef GOOS_windows
   230	JMP ok
   231#endif
   232needtls:
   233#ifdef GOOS_plan9
   234	// skip TLS setup on Plan 9
   235	JMP ok
   236#endif
   237#ifdef GOOS_solaris
   238	// skip TLS setup on Solaris
   239	JMP ok
   240#endif
   241#ifdef GOOS_illumos
   242	// skip TLS setup on illumos
   243	JMP ok
   244#endif
   245#ifdef GOOS_darwin
   246	// skip TLS setup on Darwin
   247	JMP ok
   248#endif
   249#ifdef GOOS_openbsd
   250	// skip TLS setup on OpenBSD
   251	JMP ok
   252#endif
   253
   254#ifdef GOOS_windows
   255	CALL	runtime·wintls(SB)
   256#endif
   257
   258	LEAQ	runtime·m0+m_tls(SB), DI
   259	CALL	runtime·settls(SB)
   260
   261	// store through it, to make sure it works
   262	get_tls(BX)
   263	MOVQ	$0x123, g(BX)
   264	MOVQ	runtime·m0+m_tls(SB), AX
   265	CMPQ	AX, $0x123
   266	JEQ 2(PC)
   267	CALL	runtime·abort(SB)
   268ok:
   269	// set the per-goroutine and per-mach "registers"
   270	get_tls(BX)
   271	LEAQ	runtime·g0(SB), CX
   272	MOVQ	CX, g(BX)
   273	LEAQ	runtime·m0(SB), AX
   274
   275	// save m->g0 = g0
   276	MOVQ	CX, m_g0(AX)
   277	// save m0 to g0->m
   278	MOVQ	AX, g_m(CX)
   279
   280	CLD				// convention is D is always left cleared
   281
   282	// Check GOAMD64 requirements
   283	// We need to do this after setting up TLS, so that
   284	// we can report an error if there is a failure. See issue 49586.
   285#ifdef NEED_FEATURES_CX
   286	MOVL	$0, AX
   287	CPUID
   288	CMPL	AX, $0
   289	JE	bad_cpu
   290	MOVL	$1, AX
   291	CPUID
   292	ANDL	$NEED_FEATURES_CX, CX
   293	CMPL	CX, $NEED_FEATURES_CX
   294	JNE	bad_cpu
   295#endif
   296
   297#ifdef NEED_MAX_CPUID
   298	MOVL	$0x80000000, AX
   299	CPUID
   300	CMPL	AX, $NEED_MAX_CPUID
   301	JL	bad_cpu
   302#endif
   303
   304#ifdef NEED_EXT_FEATURES_BX
   305	MOVL	$7, AX
   306	MOVL	$0, CX
   307	CPUID
   308	ANDL	$NEED_EXT_FEATURES_BX, BX
   309	CMPL	BX, $NEED_EXT_FEATURES_BX
   310	JNE	bad_cpu
   311#endif
   312
   313#ifdef NEED_EXT_FEATURES_CX
   314	MOVL	$0x80000001, AX
   315	CPUID
   316	ANDL	$NEED_EXT_FEATURES_CX, CX
   317	CMPL	CX, $NEED_EXT_FEATURES_CX
   318	JNE	bad_cpu
   319#endif
   320
   321#ifdef NEED_OS_SUPPORT_AX
   322	XORL    CX, CX
   323	XGETBV
   324	ANDL	$NEED_OS_SUPPORT_AX, AX
   325	CMPL	AX, $NEED_OS_SUPPORT_AX
   326	JNE	bad_cpu
   327#endif
   328
   329#ifdef NEED_DARWIN_SUPPORT
   330	MOVQ	$commpage64_version, BX
   331	CMPW	(BX), $13  // cpu_capabilities64 undefined in versions < 13
   332	JL	bad_cpu
   333	MOVQ	$commpage64_cpu_capabilities64, BX
   334	MOVQ	(BX), BX
   335	MOVQ	$NEED_DARWIN_SUPPORT, CX
   336	ANDQ	CX, BX
   337	CMPQ	BX, CX
   338	JNE	bad_cpu
   339#endif
   340
   341	CALL	runtime·check(SB)
   342
   343	MOVL	24(SP), AX		// copy argc
   344	MOVL	AX, 0(SP)
   345	MOVQ	32(SP), AX		// copy argv
   346	MOVQ	AX, 8(SP)
   347	CALL	runtime·args(SB)
   348	CALL	runtime·osinit(SB)
   349	CALL	runtime·schedinit(SB)
   350
   351	// create a new goroutine to start program
   352	MOVQ	$runtime·mainPC(SB), AX		// entry
   353	PUSHQ	AX
   354	CALL	runtime·newproc(SB)
   355	POPQ	AX
   356
   357	// start this M
   358	CALL	runtime·mstart(SB)
   359
   360	CALL	runtime·abort(SB)	// mstart should never return
   361	RET
   362
   363bad_cpu: // show that the program requires a certain microarchitecture level.
   364	MOVQ	$2, 0(SP)
   365	MOVQ	$bad_cpu_msg<>(SB), AX
   366	MOVQ	AX, 8(SP)
   367	MOVQ	$84, 16(SP)
   368	CALL	runtime·write(SB)
   369	MOVQ	$1, 0(SP)
   370	CALL	runtime·exit(SB)
   371	CALL	runtime·abort(SB)
   372	RET
   373
   374	// Prevent dead-code elimination of debugCallV2 and debugPinnerV1, which are
   375	// intended to be called by debuggers.
   376	MOVQ	$runtime·debugPinnerV1<ABIInternal>(SB), AX
   377	MOVQ	$runtime·debugCallV2<ABIInternal>(SB), AX
   378	RET
   379
   380// mainPC is a function value for runtime.main, to be passed to newproc.
   381// The reference to runtime.main is made via ABIInternal, since the
   382// actual function (not the ABI0 wrapper) is needed by newproc.
   383DATA	runtime·mainPC+0(SB)/8,$runtime·main<ABIInternal>(SB)
   384GLOBL	runtime·mainPC(SB),RODATA,$8
   385
   386TEXT runtime·breakpoint(SB),NOSPLIT,$0-0
   387	BYTE	$0xcc
   388	RET
   389
   390TEXT runtime·asminit(SB),NOSPLIT,$0-0
   391	// No per-thread init.
   392	RET
   393
   394TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME|NOFRAME,$0
   395	CALL	runtime·mstart0(SB)
   396	RET // not reached
   397
   398/*
   399 *  go-routine
   400 */
   401
   402// func gogo(buf *gobuf)
   403// restore state from Gobuf; longjmp
   404TEXT runtime·gogo(SB), NOSPLIT, $0-8
   405	MOVQ	buf+0(FP), BX		// gobuf
   406	MOVQ	gobuf_g(BX), DX
   407	MOVQ	0(DX), CX		// make sure g != nil
   408	JMP	gogo<>(SB)
   409
   410TEXT gogo<>(SB), NOSPLIT, $0
   411	get_tls(CX)
   412	MOVQ	DX, g(CX)
   413	MOVQ	DX, R14		// set the g register
   414	MOVQ	gobuf_sp(BX), SP	// restore SP
   415	MOVQ	gobuf_ret(BX), AX
   416	MOVQ	gobuf_ctxt(BX), DX
   417	MOVQ	gobuf_bp(BX), BP
   418	MOVQ	$0, gobuf_sp(BX)	// clear to help garbage collector
   419	MOVQ	$0, gobuf_ret(BX)
   420	MOVQ	$0, gobuf_ctxt(BX)
   421	MOVQ	$0, gobuf_bp(BX)
   422	MOVQ	gobuf_pc(BX), BX
   423	JMP	BX
   424
   425// func mcall(fn func(*g))
   426// Switch to m->g0's stack, call fn(g).
   427// Fn must never return. It should gogo(&g->sched)
   428// to keep running g.
   429TEXT runtime·mcall<ABIInternal>(SB), NOSPLIT, $0-8
   430	MOVQ	AX, DX	// DX = fn
   431
   432	// Save state in g->sched. The caller's SP and PC are restored by gogo to
   433	// resume execution in the caller's frame (implicit return). The caller's BP
   434	// is also restored to support frame pointer unwinding.
   435	MOVQ	SP, BX	// hide (SP) reads from vet
   436	MOVQ	8(BX), BX	// caller's PC
   437	MOVQ	BX, (g_sched+gobuf_pc)(R14)
   438	LEAQ	fn+0(FP), BX	// caller's SP
   439	MOVQ	BX, (g_sched+gobuf_sp)(R14)
   440	// Get the caller's frame pointer by dereferencing BP. Storing BP as it is
   441	// can cause a frame pointer cycle, see CL 476235.
   442	MOVQ	(BP), BX // caller's BP
   443	MOVQ	BX, (g_sched+gobuf_bp)(R14)
   444
   445	// switch to m->g0 & its stack, call fn
   446	MOVQ	g_m(R14), BX
   447	MOVQ	m_g0(BX), SI	// SI = g.m.g0
   448	CMPQ	SI, R14	// if g == m->g0 call badmcall
   449	JNE	goodm
   450	JMP	runtime·badmcall(SB)
   451goodm:
   452	MOVQ	R14, AX		// AX (and arg 0) = g
   453	MOVQ	SI, R14		// g = g.m.g0
   454	get_tls(CX)		// Set G in TLS
   455	MOVQ	R14, g(CX)
   456	MOVQ	(g_sched+gobuf_sp)(R14), SP	// sp = g0.sched.sp
   457	PUSHQ	AX	// open up space for fn's arg spill slot
   458	MOVQ	0(DX), R12
   459	CALL	R12		// fn(g)
   460	// The Windows native stack unwinder incorrectly classifies the next instruction
   461	// as part of the function epilogue, producing a wrong call stack.
   462	// Add a NOP to work around this issue. See go.dev/issue/67007.
   463	BYTE	$0x90
   464	POPQ	AX
   465	JMP	runtime·badmcall2(SB)
   466	RET
   467
   468// systemstack_switch is a dummy routine that systemstack leaves at the bottom
   469// of the G stack. We need to distinguish the routine that
   470// lives at the bottom of the G stack from the one that lives
   471// at the top of the system stack because the one at the top of
   472// the system stack terminates the stack walk (see topofstack()).
   473// The frame layout needs to match systemstack
   474// so that it can pretend to be systemstack_switch.
   475TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
   476	UNDEF
   477	// Make sure this function is not leaf,
   478	// so the frame is saved.
   479	CALL	runtime·abort(SB)
   480	RET
   481
   482// func systemstack(fn func())
   483TEXT runtime·systemstack(SB), NOSPLIT, $0-8
   484	MOVQ	fn+0(FP), DI	// DI = fn
   485	get_tls(CX)
   486	MOVQ	g(CX), AX	// AX = g
   487	MOVQ	g_m(AX), BX	// BX = m
   488
   489	CMPQ	AX, m_gsignal(BX)
   490	JEQ	noswitch
   491
   492	MOVQ	m_g0(BX), DX	// DX = g0
   493	CMPQ	AX, DX
   494	JEQ	noswitch
   495
   496	CMPQ	AX, m_curg(BX)
   497	JNE	bad
   498
   499	// Switch stacks.
   500	// The original frame pointer is stored in BP,
   501	// which is useful for stack unwinding.
   502	// Save our state in g->sched. Pretend to
   503	// be systemstack_switch if the G stack is scanned.
   504	CALL	gosave_systemstack_switch<>(SB)
   505
   506	// switch to g0
   507	MOVQ	DX, g(CX)
   508	MOVQ	DX, R14 // set the g register
   509	MOVQ	(g_sched+gobuf_sp)(DX), SP
   510
   511	// call target function
   512	MOVQ	DI, DX
   513	MOVQ	0(DI), DI
   514	CALL	DI
   515
   516	// switch back to g
   517	get_tls(CX)
   518	MOVQ	g(CX), AX
   519	MOVQ	g_m(AX), BX
   520	MOVQ	m_curg(BX), AX
   521	MOVQ	AX, g(CX)
   522	MOVQ	(g_sched+gobuf_sp)(AX), SP
   523	MOVQ	(g_sched+gobuf_bp)(AX), BP
   524	MOVQ	$0, (g_sched+gobuf_sp)(AX)
   525	MOVQ	$0, (g_sched+gobuf_bp)(AX)
   526	RET
   527
   528noswitch:
   529	// already on m stack; tail call the function
   530	// Using a tail call here cleans up tracebacks since we won't stop
   531	// at an intermediate systemstack.
   532	MOVQ	DI, DX
   533	MOVQ	0(DI), DI
   534	// The function epilogue is not called on a tail call.
   535	// Pop BP from the stack to simulate it.
   536	POPQ	BP
   537	JMP	DI
   538
   539bad:
   540	// Bad: g is not gsignal, not g0, not curg. What is it?
   541	MOVQ	$runtime·badsystemstack(SB), AX
   542	CALL	AX
   543	INT	$3
   544
   545// func switchToCrashStack0(fn func())
   546TEXT runtime·switchToCrashStack0<ABIInternal>(SB), NOSPLIT, $0-8
   547	MOVQ	g_m(R14), BX // curm
   548
   549	// set g to gcrash
   550	LEAQ	runtime·gcrash(SB), R14 // g = &gcrash
   551	MOVQ	BX, g_m(R14)            // g.m = curm
   552	MOVQ	R14, m_g0(BX)           // curm.g0 = g
   553	get_tls(CX)
   554	MOVQ	R14, g(CX)
   555
   556	// switch to crashstack
   557	MOVQ	(g_stack+stack_hi)(R14), BX
   558	SUBQ	$(4*8), BX
   559	MOVQ	BX, SP
   560
   561	// call target function
   562	MOVQ	AX, DX
   563	MOVQ	0(AX), AX
   564	CALL	AX
   565
   566	// should never return
   567	CALL	runtime·abort(SB)
   568	UNDEF
   569
   570/*
   571 * support for morestack
   572 */
   573
   574// Called during function prolog when more stack is needed.
   575//
   576// The traceback routines see morestack on a g0 as being
   577// the top of a stack (for example, morestack calling newstack
   578// calling the scheduler calling newm calling gc), so we must
   579// record an argument size. For that purpose, it has no arguments.
   580TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
   581	// Cannot grow scheduler stack (m->g0).
   582	get_tls(CX)
   583	MOVQ	g(CX), DI     // DI = g
   584	MOVQ	g_m(DI), BX   // BX = m
   585
   586	// Set g->sched to context in f.
   587	MOVQ	0(SP), AX // f's PC
   588	MOVQ	AX, (g_sched+gobuf_pc)(DI)
   589	LEAQ	8(SP), AX // f's SP
   590	MOVQ	AX, (g_sched+gobuf_sp)(DI)
   591	MOVQ	BP, (g_sched+gobuf_bp)(DI)
   592	MOVQ	DX, (g_sched+gobuf_ctxt)(DI)
   593
   594	MOVQ	m_g0(BX), SI  // SI = m.g0
   595	CMPQ	DI, SI
   596	JNE	3(PC)
   597	CALL	runtime·badmorestackg0(SB)
   598	CALL	runtime·abort(SB)
   599
   600	// Cannot grow signal stack (m->gsignal).
   601	MOVQ	m_gsignal(BX), SI
   602	CMPQ	DI, SI
   603	JNE	3(PC)
   604	CALL	runtime·badmorestackgsignal(SB)
   605	CALL	runtime·abort(SB)
   606
   607	// Called from f.
   608	// Set m->morebuf to f's caller.
   609	NOP	SP	// tell vet SP changed - stop checking offsets
   610	MOVQ	8(SP), AX	// f's caller's PC
   611	MOVQ	AX, (m_morebuf+gobuf_pc)(BX)
   612	LEAQ	16(SP), AX	// f's caller's SP
   613	MOVQ	AX, (m_morebuf+gobuf_sp)(BX)
   614	MOVQ	DI, (m_morebuf+gobuf_g)(BX)
   615
   616	// Call newstack on m->g0's stack.
   617	MOVQ	m_g0(BX), BX
   618	MOVQ	BX, g(CX)
   619	MOVQ	(g_sched+gobuf_sp)(BX), SP
   620	MOVQ	(g_sched+gobuf_bp)(BX), BP
   621	CALL	runtime·newstack(SB)
   622	CALL	runtime·abort(SB)	// crash if newstack returns
   623	RET
   624
   625// morestack but not preserving ctxt.
   626TEXT runtime·morestack_noctxt(SB),NOSPLIT,$0
   627	MOVL	$0, DX
   628	JMP	runtime·morestack(SB)
   629
   630// spillArgs stores return values from registers to a *internal/abi.RegArgs in R12.
   631TEXT ·spillArgs(SB),NOSPLIT,$0-0
   632	MOVQ AX, 0(R12)
   633	MOVQ BX, 8(R12)
   634	MOVQ CX, 16(R12)
   635	MOVQ DI, 24(R12)
   636	MOVQ SI, 32(R12)
   637	MOVQ R8, 40(R12)
   638	MOVQ R9, 48(R12)
   639	MOVQ R10, 56(R12)
   640	MOVQ R11, 64(R12)
   641	MOVQ X0, 72(R12)
   642	MOVQ X1, 80(R12)
   643	MOVQ X2, 88(R12)
   644	MOVQ X3, 96(R12)
   645	MOVQ X4, 104(R12)
   646	MOVQ X5, 112(R12)
   647	MOVQ X6, 120(R12)
   648	MOVQ X7, 128(R12)
   649	MOVQ X8, 136(R12)
   650	MOVQ X9, 144(R12)
   651	MOVQ X10, 152(R12)
   652	MOVQ X11, 160(R12)
   653	MOVQ X12, 168(R12)
   654	MOVQ X13, 176(R12)
   655	MOVQ X14, 184(R12)
   656	RET
   657
   658// unspillArgs loads args into registers from a *internal/abi.RegArgs in R12.
   659TEXT ·unspillArgs(SB),NOSPLIT,$0-0
   660	MOVQ 0(R12), AX
   661	MOVQ 8(R12), BX
   662	MOVQ 16(R12), CX
   663	MOVQ 24(R12), DI
   664	MOVQ 32(R12), SI
   665	MOVQ 40(R12), R8
   666	MOVQ 48(R12), R9
   667	MOVQ 56(R12), R10
   668	MOVQ 64(R12), R11
   669	MOVQ 72(R12), X0
   670	MOVQ 80(R12), X1
   671	MOVQ 88(R12), X2
   672	MOVQ 96(R12), X3
   673	MOVQ 104(R12), X4
   674	MOVQ 112(R12), X5
   675	MOVQ 120(R12), X6
   676	MOVQ 128(R12), X7
   677	MOVQ 136(R12), X8
   678	MOVQ 144(R12), X9
   679	MOVQ 152(R12), X10
   680	MOVQ 160(R12), X11
   681	MOVQ 168(R12), X12
   682	MOVQ 176(R12), X13
   683	MOVQ 184(R12), X14
   684	RET
   685
   686// reflectcall: call a function with the given argument list
   687// func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs).
   688// we don't have variable-sized frames, so we use a small number
   689// of constant-sized-frame functions to encode a few bits of size in the pc.
   690// Caution: ugly multiline assembly macros in your future!
   691
   692#define DISPATCH(NAME,MAXSIZE)		\
   693	CMPQ	CX, $MAXSIZE;		\
   694	JA	3(PC);			\
   695	MOVQ	$NAME(SB), AX;		\
   696	JMP	AX
   697// Note: can't just "JMP NAME(SB)" - bad inlining results.
   698
   699TEXT ·reflectcall(SB), NOSPLIT, $0-48
   700	MOVLQZX frameSize+32(FP), CX
   701	DISPATCH(runtime·call16, 16)
   702	DISPATCH(runtime·call32, 32)
   703	DISPATCH(runtime·call64, 64)
   704	DISPATCH(runtime·call128, 128)
   705	DISPATCH(runtime·call256, 256)
   706	DISPATCH(runtime·call512, 512)
   707	DISPATCH(runtime·call1024, 1024)
   708	DISPATCH(runtime·call2048, 2048)
   709	DISPATCH(runtime·call4096, 4096)
   710	DISPATCH(runtime·call8192, 8192)
   711	DISPATCH(runtime·call16384, 16384)
   712	DISPATCH(runtime·call32768, 32768)
   713	DISPATCH(runtime·call65536, 65536)
   714	DISPATCH(runtime·call131072, 131072)
   715	DISPATCH(runtime·call262144, 262144)
   716	DISPATCH(runtime·call524288, 524288)
   717	DISPATCH(runtime·call1048576, 1048576)
   718	DISPATCH(runtime·call2097152, 2097152)
   719	DISPATCH(runtime·call4194304, 4194304)
   720	DISPATCH(runtime·call8388608, 8388608)
   721	DISPATCH(runtime·call16777216, 16777216)
   722	DISPATCH(runtime·call33554432, 33554432)
   723	DISPATCH(runtime·call67108864, 67108864)
   724	DISPATCH(runtime·call134217728, 134217728)
   725	DISPATCH(runtime·call268435456, 268435456)
   726	DISPATCH(runtime·call536870912, 536870912)
   727	DISPATCH(runtime·call1073741824, 1073741824)
   728	MOVQ	$runtime·badreflectcall(SB), AX
   729	JMP	AX
   730
   731#define CALLFN(NAME,MAXSIZE)			\
   732TEXT NAME(SB), WRAPPER, $MAXSIZE-48;		\
   733	NO_LOCAL_POINTERS;			\
   734	/* copy arguments to stack */		\
   735	MOVQ	stackArgs+16(FP), SI;		\
   736	MOVLQZX stackArgsSize+24(FP), CX;		\
   737	MOVQ	SP, DI;				\
   738	REP;MOVSB;				\
   739	/* set up argument registers */		\
   740	MOVQ    regArgs+40(FP), R12;		\
   741	CALL    ·unspillArgs(SB);		\
   742	/* call function */			\
   743	MOVQ	f+8(FP), DX;			\
   744	PCDATA  $PCDATA_StackMapIndex, $0;	\
   745	MOVQ	(DX), R12;			\
   746	CALL	R12;				\
   747	/* copy register return values back */		\
   748	MOVQ    regArgs+40(FP), R12;		\
   749	CALL    ·spillArgs(SB);		\
   750	MOVLQZX	stackArgsSize+24(FP), CX;		\
   751	MOVLQZX	stackRetOffset+28(FP), BX;		\
   752	MOVQ	stackArgs+16(FP), DI;		\
   753	MOVQ	stackArgsType+0(FP), DX;		\
   754	MOVQ	SP, SI;				\
   755	ADDQ	BX, DI;				\
   756	ADDQ	BX, SI;				\
   757	SUBQ	BX, CX;				\
   758	CALL	callRet<>(SB);			\
   759	RET
   760
   761// callRet copies return values back at the end of call*. This is a
   762// separate function so it can allocate stack space for the arguments
   763// to reflectcallmove. It does not follow the Go ABI; it expects its
   764// arguments in registers.
   765TEXT callRet<>(SB), NOSPLIT, $40-0
   766	NO_LOCAL_POINTERS
   767	MOVQ	DX, 0(SP)
   768	MOVQ	DI, 8(SP)
   769	MOVQ	SI, 16(SP)
   770	MOVQ	CX, 24(SP)
   771	MOVQ	R12, 32(SP)
   772	CALL	runtime·reflectcallmove(SB)
   773	RET
   774
   775CALLFN(·call16, 16)
   776CALLFN(·call32, 32)
   777CALLFN(·call64, 64)
   778CALLFN(·call128, 128)
   779CALLFN(·call256, 256)
   780CALLFN(·call512, 512)
   781CALLFN(·call1024, 1024)
   782CALLFN(·call2048, 2048)
   783CALLFN(·call4096, 4096)
   784CALLFN(·call8192, 8192)
   785CALLFN(·call16384, 16384)
   786CALLFN(·call32768, 32768)
   787CALLFN(·call65536, 65536)
   788CALLFN(·call131072, 131072)
   789CALLFN(·call262144, 262144)
   790CALLFN(·call524288, 524288)
   791CALLFN(·call1048576, 1048576)
   792CALLFN(·call2097152, 2097152)
   793CALLFN(·call4194304, 4194304)
   794CALLFN(·call8388608, 8388608)
   795CALLFN(·call16777216, 16777216)
   796CALLFN(·call33554432, 33554432)
   797CALLFN(·call67108864, 67108864)
   798CALLFN(·call134217728, 134217728)
   799CALLFN(·call268435456, 268435456)
   800CALLFN(·call536870912, 536870912)
   801CALLFN(·call1073741824, 1073741824)
   802
   803TEXT runtime·procyield(SB),NOSPLIT,$0-0
   804	MOVL	cycles+0(FP), AX
   805again:
   806	PAUSE
   807	SUBL	$1, AX
   808	JNZ	again
   809	RET
   810
   811
   812TEXT ·publicationBarrier<ABIInternal>(SB),NOSPLIT,$0-0
   813	// Stores are already ordered on x86, so this is just a
   814	// compile barrier.
   815	RET
   816
   817// Save state of caller into g->sched,
   818// but using fake PC from systemstack_switch.
   819// Must only be called from functions with frame pointer
   820// and without locals ($0) or else unwinding from
   821// systemstack_switch is incorrect.
   822// Smashes R9.
   823TEXT gosave_systemstack_switch<>(SB),NOSPLIT|NOFRAME,$0
   824	// Take systemstack_switch PC and add 8 bytes to skip
   825	// the prologue. The final location does not matter
   826	// as long as we are between the prologue and the epilogue.
   827	MOVQ	$runtime·systemstack_switch+8(SB), R9
   828	MOVQ	R9, (g_sched+gobuf_pc)(R14)
   829	LEAQ	8(SP), R9
   830	MOVQ	R9, (g_sched+gobuf_sp)(R14)
   831	MOVQ	$0, (g_sched+gobuf_ret)(R14)
   832	MOVQ	BP, (g_sched+gobuf_bp)(R14)
   833	// Assert ctxt is zero. See func save.
   834	MOVQ	(g_sched+gobuf_ctxt)(R14), R9
   835	TESTQ	R9, R9
   836	JZ	2(PC)
   837	CALL	runtime·abort(SB)
   838	RET
   839
   840// func asmcgocall_no_g(fn, arg unsafe.Pointer)
   841// Call fn(arg) aligned appropriately for the gcc ABI.
   842// Called on a system stack, and there may be no g yet (during needm).
   843TEXT ·asmcgocall_no_g(SB),NOSPLIT,$32-16
   844	MOVQ	fn+0(FP), AX
   845	MOVQ	arg+8(FP), BX
   846	MOVQ	SP, DX
   847	ANDQ	$~15, SP	// alignment
   848	MOVQ	DX, 8(SP)
   849	MOVQ	BX, DI		// DI = first argument in AMD64 ABI
   850	MOVQ	BX, CX		// CX = first argument in Win64
   851	CALL	AX
   852	MOVQ	8(SP), DX
   853	MOVQ	DX, SP
   854	RET
   855
   856// asmcgocall_landingpad calls AX with BX as argument.
   857// Must be called on the system stack.
   858TEXT ·asmcgocall_landingpad(SB),NOSPLIT,$0-0
   859#ifdef GOOS_windows
   860	// Make sure we have enough room for 4 stack-backed fast-call
   861	// registers as per Windows amd64 calling convention.
   862	ADJSP	$32
   863	// On Windows, asmcgocall_landingpad acts as landing pad for exceptions
   864	// thrown in the cgo call. Exceptions that reach this function will be
   865	// handled by runtime.sehtramp thanks to the SEH metadata added
   866	// by the compiler.
   867	// Note that runtime.sehtramp can't be attached directly to asmcgocall
   868	// because its initial stack pointer can be outside the system stack bounds,
   869	// and Windows stops the stack unwinding without calling the exception handler
   870	// when it reaches that point.
   871	MOVQ	BX, CX		// CX = first argument in Win64
   872	CALL	AX
   873	// The exception handler is not called if the next instruction is part of
   874	// the epilogue, which includes the RET instruction, so we need to add a NOP here.
   875	BYTE	$0x90
   876	ADJSP	$-32
   877	RET
   878#endif
   879	// Tail call AX on non-Windows, as the extra stack frame is not needed.
   880	MOVQ	BX, DI		// DI = first argument in AMD64 ABI
   881	JMP	AX
   882
   883// func asmcgocall(fn, arg unsafe.Pointer) int32
   884// Call fn(arg) on the scheduler stack,
   885// aligned appropriately for the gcc ABI.
   886// See cgocall.go for more details.
   887TEXT ·asmcgocall(SB),NOSPLIT,$0-20
   888	MOVQ	fn+0(FP), AX
   889	MOVQ	arg+8(FP), BX
   890
   891	MOVQ	SP, DX
   892
   893	// Figure out if we need to switch to m->g0 stack.
   894	// We get called to create new OS threads too, and those
   895	// come in on the m->g0 stack already. Or we might already
   896	// be on the m->gsignal stack.
   897	get_tls(CX)
   898	MOVQ	g(CX), DI
   899	CMPQ	DI, $0
   900	JEQ	nosave
   901	MOVQ	g_m(DI), R8
   902	MOVQ	m_gsignal(R8), SI
   903	CMPQ	DI, SI
   904	JEQ	nosave
   905	MOVQ	m_g0(R8), SI
   906	CMPQ	DI, SI
   907	JEQ	nosave
   908
   909	// Switch to system stack.
   910	// The original frame pointer is stored in BP,
   911	// which is useful for stack unwinding.
   912	CALL	gosave_systemstack_switch<>(SB)
   913	MOVQ	SI, g(CX)
   914	MOVQ	(g_sched+gobuf_sp)(SI), SP
   915
   916	// Now on a scheduling stack (a pthread-created stack).
   917	SUBQ	$16, SP
   918	ANDQ	$~15, SP	// alignment for gcc ABI
   919	MOVQ	DI, 8(SP)	// save g
   920	MOVQ	(g_stack+stack_hi)(DI), DI
   921	SUBQ	DX, DI
   922	MOVQ	DI, 0(SP)	// save depth in stack (can't just save SP, as stack might be copied during a callback)
   923	CALL	runtime·asmcgocall_landingpad(SB)
   924
   925	// Restore registers, g, stack pointer.
   926	get_tls(CX)
   927	MOVQ	8(SP), DI
   928	MOVQ	(g_stack+stack_hi)(DI), SI
   929	SUBQ	0(SP), SI
   930	MOVQ	DI, g(CX)
   931	MOVQ	SI, SP
   932
   933	MOVL	AX, ret+16(FP)
   934	RET
   935
   936nosave:
   937	// Running on a system stack, perhaps even without a g.
   938	// Having no g can happen during thread creation or thread teardown
   939	// (see needm/dropm on Solaris, for example).
   940	// This code is like the above sequence but without saving/restoring g
   941	// and without worrying about the stack moving out from under us
   942	// (because we're on a system stack, not a goroutine stack).
   943	// The above code could be used directly if already on a system stack,
   944	// but then the only path through this code would be a rare case on Solaris.
   945	// Using this code for all "already on system stack" calls exercises it more,
   946	// which should help keep it correct.
   947	SUBQ	$16, SP
   948	ANDQ	$~15, SP
   949	MOVQ	$0, 8(SP)		// where above code stores g, in case someone looks during debugging
   950	MOVQ	DX, 0(SP)	// save original stack pointer
   951	CALL	runtime·asmcgocall_landingpad(SB)
   952	MOVQ	0(SP), SI	// restore original stack pointer
   953	MOVQ	SI, SP
   954	MOVL	AX, ret+16(FP)
   955	RET
   956
   957#ifdef GOOS_windows
   958// Dummy TLS that's used on Windows so that we don't crash trying
   959// to restore the G register in needm. needm and its callees are
   960// very careful never to actually use the G, the TLS just can't be
   961// unset since we're in Go code.
   962GLOBL zeroTLS<>(SB),RODATA,$const_tlsSize
   963#endif
   964
   965// func cgocallback(fn, frame unsafe.Pointer, ctxt uintptr)
   966// See cgocall.go for more details.
   967TEXT ·cgocallback(SB),NOSPLIT,$24-24
   968	NO_LOCAL_POINTERS
   969
   970	// Skip cgocallbackg, just dropm when fn is nil, and frame is the saved g.
   971	// It is used to dropm while thread is exiting.
   972	MOVQ	fn+0(FP), AX
   973	CMPQ	AX, $0
   974	JNE	loadg
   975	// Restore the g from frame.
   976	get_tls(CX)
   977	MOVQ	frame+8(FP), BX
   978	MOVQ	BX, g(CX)
   979	JMP	dropm
   980
   981loadg:
   982	// If g is nil, Go did not create the current thread,
   983	// or if this thread never called into Go on pthread platforms.
   984	// Call needm to obtain one m for temporary use.
   985	// In this case, we're running on the thread stack, so there's
   986	// lots of space, but the linker doesn't know. Hide the call from
   987	// the linker analysis by using an indirect call through AX.
   988	get_tls(CX)
   989#ifdef GOOS_windows
   990	MOVL	$0, BX
   991	CMPQ	CX, $0
   992	JEQ	2(PC)
   993#endif
   994	MOVQ	g(CX), BX
   995	CMPQ	BX, $0
   996	JEQ	needm
   997	MOVQ	g_m(BX), BX
   998	MOVQ	BX, savedm-8(SP)	// saved copy of oldm
   999	JMP	havem
  1000needm:
  1001#ifdef GOOS_windows
  1002	// Set up a dummy TLS value. needm is careful not to use it,
  1003	// but it needs to be there to prevent autogenerated code from
  1004	// crashing when it loads from it.
  1005	// We don't need to clear it or anything later because needm
  1006	// will set up TLS properly.
  1007	MOVQ	$zeroTLS<>(SB), DI
  1008	CALL	runtime·settls(SB)
  1009#endif
  1010	// On some platforms (Windows) we cannot call needm through
  1011	// an ABI wrapper because there's no TLS set up, and the ABI
  1012	// wrapper will try to restore the G register (R14) from TLS.
  1013	// Clear X15 because Go expects it and we're not calling
  1014	// through a wrapper, but otherwise avoid setting the G
  1015	// register in the wrapper and call needm directly. It
  1016	// takes no arguments and doesn't return any values so
  1017	// there's no need to handle that. Clear R14 so that there's
  1018	// a bad value in there, in case needm tries to use it.
  1019	XORPS	X15, X15
  1020	XORQ    R14, R14
  1021	MOVQ	$runtime·needAndBindM<ABIInternal>(SB), AX
  1022	CALL	AX
  1023	MOVQ	$0, savedm-8(SP)
  1024	get_tls(CX)
  1025	MOVQ	g(CX), BX
  1026	MOVQ	g_m(BX), BX
  1027
  1028	// Set m->sched.sp = SP, so that if a panic happens
  1029	// during the function we are about to execute, it will
  1030	// have a valid SP to run on the g0 stack.
  1031	// The next few lines (after the havem label)
  1032	// will save this SP onto the stack and then write
  1033	// the same SP back to m->sched.sp. That seems redundant,
  1034	// but if an unrecovered panic happens, unwindm will
  1035	// restore the g->sched.sp from the stack location
  1036	// and then systemstack will try to use it. If we don't set it here,
  1037	// that restored SP will be uninitialized (typically 0) and
  1038	// will not be usable.
  1039	MOVQ	m_g0(BX), SI
  1040	MOVQ	SP, (g_sched+gobuf_sp)(SI)
  1041
  1042havem:
  1043	// Now there's a valid m, and we're running on its m->g0.
  1044	// Save current m->g0->sched.sp on stack and then set it to SP.
  1045	// Save current sp in m->g0->sched.sp in preparation for
  1046	// switch back to m->curg stack.
  1047	// NOTE: unwindm knows that the saved g->sched.sp is at 0(SP).
  1048	MOVQ	m_g0(BX), SI
  1049	MOVQ	(g_sched+gobuf_sp)(SI), AX
  1050	MOVQ	AX, 0(SP)
  1051	MOVQ	SP, (g_sched+gobuf_sp)(SI)
  1052
  1053	// Switch to m->curg stack and call runtime.cgocallbackg.
  1054	// Because we are taking over the execution of m->curg
  1055	// but *not* resuming what had been running, we need to
  1056	// save that information (m->curg->sched) so we can restore it.
  1057	// We can restore m->curg->sched.sp easily, because calling
  1058	// runtime.cgocallbackg leaves SP unchanged upon return.
  1059	// To save m->curg->sched.pc, we push it onto the curg stack and
  1060	// open a frame the same size as cgocallback's g0 frame.
  1061	// Once we switch to the curg stack, the pushed PC will appear
  1062	// to be the return PC of cgocallback, so that the traceback
  1063	// will seamlessly trace back into the earlier calls.
  1064	MOVQ	m_curg(BX), SI
  1065	MOVQ	SI, g(CX)
  1066	MOVQ	(g_sched+gobuf_sp)(SI), DI  // prepare stack as DI
  1067	MOVQ	(g_sched+gobuf_pc)(SI), BX
  1068	MOVQ	BX, -8(DI)  // "push" return PC on the g stack
  1069	// Gather our arguments into registers.
  1070	MOVQ	fn+0(FP), BX
  1071	MOVQ	frame+8(FP), CX
  1072	MOVQ	ctxt+16(FP), DX
  1073	// Compute the size of the frame, including return PC and, if
  1074	// GOEXPERIMENT=framepointer, the saved base pointer
  1075	LEAQ	fn+0(FP), AX
  1076	SUBQ	SP, AX   // AX is our actual frame size
  1077	SUBQ	AX, DI   // Allocate the same frame size on the g stack
  1078	MOVQ	DI, SP
  1079
  1080	MOVQ	BX, 0(SP)
  1081	MOVQ	CX, 8(SP)
  1082	MOVQ	DX, 16(SP)
  1083	MOVQ	$runtime·cgocallbackg(SB), AX
  1084	CALL	AX	// indirect call to bypass nosplit check. We're on a different stack now.
  1085
  1086	// Compute the size of the frame again. FP and SP have
  1087	// completely different values here than they did above,
  1088	// but only their difference matters.
  1089	LEAQ	fn+0(FP), AX
  1090	SUBQ	SP, AX
  1091
  1092	// Restore g->sched (== m->curg->sched) from saved values.
  1093	get_tls(CX)
  1094	MOVQ	g(CX), SI
  1095	MOVQ	SP, DI
  1096	ADDQ	AX, DI
  1097	MOVQ	-8(DI), BX
  1098	MOVQ	BX, (g_sched+gobuf_pc)(SI)
  1099	MOVQ	DI, (g_sched+gobuf_sp)(SI)
  1100
  1101	// Switch back to m->g0's stack and restore m->g0->sched.sp.
  1102	// (Unlike m->curg, the g0 goroutine never uses sched.pc,
  1103	// so we do not have to restore it.)
  1104	MOVQ	g(CX), BX
  1105	MOVQ	g_m(BX), BX
  1106	MOVQ	m_g0(BX), SI
  1107	MOVQ	SI, g(CX)
  1108	MOVQ	(g_sched+gobuf_sp)(SI), SP
  1109	MOVQ	0(SP), AX
  1110	MOVQ	AX, (g_sched+gobuf_sp)(SI)
  1111
  1112	// If the m on entry was nil, we called needm above to borrow an m,
  1113	// 1. for the duration of the call on non-pthread platforms,
  1114	// 2. or the duration of the C thread alive on pthread platforms.
  1115	// If the m on entry wasn't nil,
  1116	// 1. the thread might be a Go thread,
  1117	// 2. or it wasn't the first call from a C thread on pthread platforms,
  1118	//    since then we skip dropm to reuse the m in the first call.
  1119	MOVQ	savedm-8(SP), BX
  1120	CMPQ	BX, $0
  1121	JNE	done
  1122
  1123	// Skip dropm to reuse it in the next call, when a pthread key has been created.
  1124	MOVQ	_cgo_pthread_key_created(SB), AX
  1125	// It means cgo is disabled when _cgo_pthread_key_created is a nil pointer, need dropm.
  1126	CMPQ	AX, $0
  1127	JEQ	dropm
  1128	CMPQ	(AX), $0
  1129	JNE	done
  1130
  1131dropm:
  1132	MOVQ	$runtime·dropm(SB), AX
  1133	CALL	AX
  1134#ifdef GOOS_windows
  1135	// We need to clear the TLS pointer in case the next
  1136	// thread that comes into Go tries to reuse that space
  1137	// but uses the same M.
  1138	XORQ	DI, DI
  1139	CALL	runtime·settls(SB)
  1140#endif
  1141done:
  1142
  1143	// Done!
  1144	RET
  1145
  1146// func setg(gg *g)
  1147// set g. for use by needm.
  1148TEXT runtime·setg(SB), NOSPLIT, $0-8
  1149	MOVQ	gg+0(FP), BX
  1150	get_tls(CX)
  1151	MOVQ	BX, g(CX)
  1152	RET
  1153
  1154// void setg_gcc(G*); set g called from gcc.
  1155TEXT setg_gcc<>(SB),NOSPLIT,$0
  1156	get_tls(AX)
  1157	MOVQ	DI, g(AX)
  1158	MOVQ	DI, R14 // set the g register
  1159	RET
  1160
  1161TEXT runtime·abort(SB),NOSPLIT,$0-0
  1162	INT	$3
  1163loop:
  1164	JMP	loop
  1165
  1166// check that SP is in range [g->stack.lo, g->stack.hi)
  1167TEXT runtime·stackcheck(SB), NOSPLIT|NOFRAME, $0-0
  1168	get_tls(CX)
  1169	MOVQ	g(CX), AX
  1170	CMPQ	(g_stack+stack_hi)(AX), SP
  1171	JHI	2(PC)
  1172	CALL	runtime·abort(SB)
  1173	CMPQ	SP, (g_stack+stack_lo)(AX)
  1174	JHI	2(PC)
  1175	CALL	runtime·abort(SB)
  1176	RET
  1177
  1178// func cputicks() int64
  1179TEXT runtime·cputicks(SB),NOSPLIT,$0-0
  1180	CMPB	internal∕cpu·X86+const_offsetX86HasRDTSCP(SB), $1
  1181	JNE	fences
  1182	// Instruction stream serializing RDTSCP is supported.
  1183	// RDTSCP is supported by Intel Nehalem (2008) and
  1184	// AMD K8 Rev. F (2006) and newer.
  1185	RDTSCP
  1186done:
  1187	SHLQ	$32, DX
  1188	ADDQ	DX, AX
  1189	MOVQ	AX, ret+0(FP)
  1190	RET
  1191fences:
  1192	// MFENCE is instruction stream serializing and flushes the
  1193	// store buffers on AMD. The serialization semantics of LFENCE on AMD
  1194	// are dependent on MSR C001_1029 and CPU generation.
  1195	// LFENCE on Intel does wait for all previous instructions to have executed.
  1196	// Intel recommends MFENCE;LFENCE in its manuals before RDTSC to have all
  1197	// previous instructions executed and all previous loads and stores to globally visible.
  1198	// Using MFENCE;LFENCE here aligns the serializing properties without
  1199	// runtime detection of CPU manufacturer.
  1200	MFENCE
  1201	LFENCE
  1202	RDTSC
  1203	JMP done
  1204
  1205// func memhash(p unsafe.Pointer, h, s uintptr) uintptr
  1206// hash function using AES hardware instructions
  1207TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT,$0-32
  1208	// AX = ptr to data
  1209	// BX = seed
  1210	// CX = size
  1211	CMPB	runtime·useAeshash(SB), $0
  1212	JEQ	noaes
  1213	JMP	aeshashbody<>(SB)
  1214noaes:
  1215	JMP	runtime·memhashFallback<ABIInternal>(SB)
  1216
  1217// func strhash(p unsafe.Pointer, h uintptr) uintptr
  1218TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT,$0-24
  1219	// AX = ptr to string struct
  1220	// BX = seed
  1221	CMPB	runtime·useAeshash(SB), $0
  1222	JEQ	noaes
  1223	MOVQ	8(AX), CX	// length of string
  1224	MOVQ	(AX), AX	// string data
  1225	JMP	aeshashbody<>(SB)
  1226noaes:
  1227	JMP	runtime·strhashFallback<ABIInternal>(SB)
  1228
  1229// AX: data
  1230// BX: hash seed
  1231// CX: length
  1232// At return: AX = return value
  1233TEXT aeshashbody<>(SB),NOSPLIT,$0-0
  1234	// Fill an SSE register with our seeds.
  1235	MOVQ	BX, X0				// 64 bits of per-table hash seed
  1236	PINSRW	$4, CX, X0			// 16 bits of length
  1237	PSHUFHW $0, X0, X0			// repeat length 4 times total
  1238	MOVO	X0, X1				// save unscrambled seed
  1239	PXOR	runtime·aeskeysched(SB), X0	// xor in per-process seed
  1240	AESENC	X0, X0				// scramble seed
  1241
  1242	CMPQ	CX, $16
  1243	JB	aes0to15
  1244	JE	aes16
  1245	CMPQ	CX, $32
  1246	JBE	aes17to32
  1247	CMPQ	CX, $64
  1248	JBE	aes33to64
  1249	CMPQ	CX, $128
  1250	JBE	aes65to128
  1251	JMP	aes129plus
  1252
  1253aes0to15:
  1254	TESTQ	CX, CX
  1255	JE	aes0
  1256
  1257	ADDQ	$16, AX
  1258	TESTW	$0xff0, AX
  1259	JE	endofpage
  1260
  1261	// 16 bytes loaded at this address won't cross
  1262	// a page boundary, so we can load it directly.
  1263	MOVOU	-16(AX), X1
  1264	ADDQ	CX, CX
  1265	MOVQ	$masks<>(SB), AX
  1266	PAND	(AX)(CX*8), X1
  1267final1:
  1268	PXOR	X0, X1	// xor data with seed
  1269	AESENC	X1, X1	// scramble combo 3 times
  1270	AESENC	X1, X1
  1271	AESENC	X1, X1
  1272	MOVQ	X1, AX	// return X1
  1273	RET
  1274
  1275endofpage:
  1276	// address ends in 1111xxxx. Might be up against
  1277	// a page boundary, so load ending at last byte.
  1278	// Then shift bytes down using pshufb.
  1279	MOVOU	-32(AX)(CX*1), X1
  1280	ADDQ	CX, CX
  1281	MOVQ	$shifts<>(SB), AX
  1282	PSHUFB	(AX)(CX*8), X1
  1283	JMP	final1
  1284
  1285aes0:
  1286	// Return scrambled input seed
  1287	AESENC	X0, X0
  1288	MOVQ	X0, AX	// return X0
  1289	RET
  1290
  1291aes16:
  1292	MOVOU	(AX), X1
  1293	JMP	final1
  1294
  1295aes17to32:
  1296	// make second starting seed
  1297	PXOR	runtime·aeskeysched+16(SB), X1
  1298	AESENC	X1, X1
  1299
  1300	// load data to be hashed
  1301	MOVOU	(AX), X2
  1302	MOVOU	-16(AX)(CX*1), X3
  1303
  1304	// xor with seed
  1305	PXOR	X0, X2
  1306	PXOR	X1, X3
  1307
  1308	// scramble 3 times
  1309	AESENC	X2, X2
  1310	AESENC	X3, X3
  1311	AESENC	X2, X2
  1312	AESENC	X3, X3
  1313	AESENC	X2, X2
  1314	AESENC	X3, X3
  1315
  1316	// combine results
  1317	PXOR	X3, X2
  1318	MOVQ	X2, AX	// return X2
  1319	RET
  1320
  1321aes33to64:
  1322	// make 3 more starting seeds
  1323	MOVO	X1, X2
  1324	MOVO	X1, X3
  1325	PXOR	runtime·aeskeysched+16(SB), X1
  1326	PXOR	runtime·aeskeysched+32(SB), X2
  1327	PXOR	runtime·aeskeysched+48(SB), X3
  1328	AESENC	X1, X1
  1329	AESENC	X2, X2
  1330	AESENC	X3, X3
  1331
  1332	MOVOU	(AX), X4
  1333	MOVOU	16(AX), X5
  1334	MOVOU	-32(AX)(CX*1), X6
  1335	MOVOU	-16(AX)(CX*1), X7
  1336
  1337	PXOR	X0, X4
  1338	PXOR	X1, X5
  1339	PXOR	X2, X6
  1340	PXOR	X3, X7
  1341
  1342	AESENC	X4, X4
  1343	AESENC	X5, X5
  1344	AESENC	X6, X6
  1345	AESENC	X7, X7
  1346
  1347	AESENC	X4, X4
  1348	AESENC	X5, X5
  1349	AESENC	X6, X6
  1350	AESENC	X7, X7
  1351
  1352	AESENC	X4, X4
  1353	AESENC	X5, X5
  1354	AESENC	X6, X6
  1355	AESENC	X7, X7
  1356
  1357	PXOR	X6, X4
  1358	PXOR	X7, X5
  1359	PXOR	X5, X4
  1360	MOVQ	X4, AX	// return X4
  1361	RET
  1362
  1363aes65to128:
  1364	// make 7 more starting seeds
  1365	MOVO	X1, X2
  1366	MOVO	X1, X3
  1367	MOVO	X1, X4
  1368	MOVO	X1, X5
  1369	MOVO	X1, X6
  1370	MOVO	X1, X7
  1371	PXOR	runtime·aeskeysched+16(SB), X1
  1372	PXOR	runtime·aeskeysched+32(SB), X2
  1373	PXOR	runtime·aeskeysched+48(SB), X3
  1374	PXOR	runtime·aeskeysched+64(SB), X4
  1375	PXOR	runtime·aeskeysched+80(SB), X5
  1376	PXOR	runtime·aeskeysched+96(SB), X6
  1377	PXOR	runtime·aeskeysched+112(SB), X7
  1378	AESENC	X1, X1
  1379	AESENC	X2, X2
  1380	AESENC	X3, X3
  1381	AESENC	X4, X4
  1382	AESENC	X5, X5
  1383	AESENC	X6, X6
  1384	AESENC	X7, X7
  1385
  1386	// load data
  1387	MOVOU	(AX), X8
  1388	MOVOU	16(AX), X9
  1389	MOVOU	32(AX), X10
  1390	MOVOU	48(AX), X11
  1391	MOVOU	-64(AX)(CX*1), X12
  1392	MOVOU	-48(AX)(CX*1), X13
  1393	MOVOU	-32(AX)(CX*1), X14
  1394	MOVOU	-16(AX)(CX*1), X15
  1395
  1396	// xor with seed
  1397	PXOR	X0, X8
  1398	PXOR	X1, X9
  1399	PXOR	X2, X10
  1400	PXOR	X3, X11
  1401	PXOR	X4, X12
  1402	PXOR	X5, X13
  1403	PXOR	X6, X14
  1404	PXOR	X7, X15
  1405
  1406	// scramble 3 times
  1407	AESENC	X8, X8
  1408	AESENC	X9, X9
  1409	AESENC	X10, X10
  1410	AESENC	X11, X11
  1411	AESENC	X12, X12
  1412	AESENC	X13, X13
  1413	AESENC	X14, X14
  1414	AESENC	X15, X15
  1415
  1416	AESENC	X8, X8
  1417	AESENC	X9, X9
  1418	AESENC	X10, X10
  1419	AESENC	X11, X11
  1420	AESENC	X12, X12
  1421	AESENC	X13, X13
  1422	AESENC	X14, X14
  1423	AESENC	X15, X15
  1424
  1425	AESENC	X8, X8
  1426	AESENC	X9, X9
  1427	AESENC	X10, X10
  1428	AESENC	X11, X11
  1429	AESENC	X12, X12
  1430	AESENC	X13, X13
  1431	AESENC	X14, X14
  1432	AESENC	X15, X15
  1433
  1434	// combine results
  1435	PXOR	X12, X8
  1436	PXOR	X13, X9
  1437	PXOR	X14, X10
  1438	PXOR	X15, X11
  1439	PXOR	X10, X8
  1440	PXOR	X11, X9
  1441	PXOR	X9, X8
  1442	// X15 must be zero on return
  1443	PXOR	X15, X15
  1444	MOVQ	X8, AX	// return X8
  1445	RET
  1446
  1447aes129plus:
  1448	// make 7 more starting seeds
  1449	MOVO	X1, X2
  1450	MOVO	X1, X3
  1451	MOVO	X1, X4
  1452	MOVO	X1, X5
  1453	MOVO	X1, X6
  1454	MOVO	X1, X7
  1455	PXOR	runtime·aeskeysched+16(SB), X1
  1456	PXOR	runtime·aeskeysched+32(SB), X2
  1457	PXOR	runtime·aeskeysched+48(SB), X3
  1458	PXOR	runtime·aeskeysched+64(SB), X4
  1459	PXOR	runtime·aeskeysched+80(SB), X5
  1460	PXOR	runtime·aeskeysched+96(SB), X6
  1461	PXOR	runtime·aeskeysched+112(SB), X7
  1462	AESENC	X1, X1
  1463	AESENC	X2, X2
  1464	AESENC	X3, X3
  1465	AESENC	X4, X4
  1466	AESENC	X5, X5
  1467	AESENC	X6, X6
  1468	AESENC	X7, X7
  1469
  1470	// start with last (possibly overlapping) block
  1471	MOVOU	-128(AX)(CX*1), X8
  1472	MOVOU	-112(AX)(CX*1), X9
  1473	MOVOU	-96(AX)(CX*1), X10
  1474	MOVOU	-80(AX)(CX*1), X11
  1475	MOVOU	-64(AX)(CX*1), X12
  1476	MOVOU	-48(AX)(CX*1), X13
  1477	MOVOU	-32(AX)(CX*1), X14
  1478	MOVOU	-16(AX)(CX*1), X15
  1479
  1480	// xor in seed
  1481	PXOR	X0, X8
  1482	PXOR	X1, X9
  1483	PXOR	X2, X10
  1484	PXOR	X3, X11
  1485	PXOR	X4, X12
  1486	PXOR	X5, X13
  1487	PXOR	X6, X14
  1488	PXOR	X7, X15
  1489
  1490	// compute number of remaining 128-byte blocks
  1491	DECQ	CX
  1492	SHRQ	$7, CX
  1493
  1494	PCALIGN $16
  1495aesloop:
  1496	// scramble state
  1497	AESENC	X8, X8
  1498	AESENC	X9, X9
  1499	AESENC	X10, X10
  1500	AESENC	X11, X11
  1501	AESENC	X12, X12
  1502	AESENC	X13, X13
  1503	AESENC	X14, X14
  1504	AESENC	X15, X15
  1505
  1506	// scramble state, xor in a block
  1507	MOVOU	(AX), X0
  1508	MOVOU	16(AX), X1
  1509	MOVOU	32(AX), X2
  1510	MOVOU	48(AX), X3
  1511	AESENC	X0, X8
  1512	AESENC	X1, X9
  1513	AESENC	X2, X10
  1514	AESENC	X3, X11
  1515	MOVOU	64(AX), X4
  1516	MOVOU	80(AX), X5
  1517	MOVOU	96(AX), X6
  1518	MOVOU	112(AX), X7
  1519	AESENC	X4, X12
  1520	AESENC	X5, X13
  1521	AESENC	X6, X14
  1522	AESENC	X7, X15
  1523
  1524	ADDQ	$128, AX
  1525	DECQ	CX
  1526	JNE	aesloop
  1527
  1528	// 3 more scrambles to finish
  1529	AESENC	X8, X8
  1530	AESENC	X9, X9
  1531	AESENC	X10, X10
  1532	AESENC	X11, X11
  1533	AESENC	X12, X12
  1534	AESENC	X13, X13
  1535	AESENC	X14, X14
  1536	AESENC	X15, X15
  1537	AESENC	X8, X8
  1538	AESENC	X9, X9
  1539	AESENC	X10, X10
  1540	AESENC	X11, X11
  1541	AESENC	X12, X12
  1542	AESENC	X13, X13
  1543	AESENC	X14, X14
  1544	AESENC	X15, X15
  1545	AESENC	X8, X8
  1546	AESENC	X9, X9
  1547	AESENC	X10, X10
  1548	AESENC	X11, X11
  1549	AESENC	X12, X12
  1550	AESENC	X13, X13
  1551	AESENC	X14, X14
  1552	AESENC	X15, X15
  1553
  1554	PXOR	X12, X8
  1555	PXOR	X13, X9
  1556	PXOR	X14, X10
  1557	PXOR	X15, X11
  1558	PXOR	X10, X8
  1559	PXOR	X11, X9
  1560	PXOR	X9, X8
  1561	// X15 must be zero on return
  1562	PXOR	X15, X15
  1563	MOVQ	X8, AX	// return X8
  1564	RET
  1565
  1566// func memhash32(p unsafe.Pointer, h uintptr) uintptr
  1567// ABIInternal for performance.
  1568TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT,$0-24
  1569	// AX = ptr to data
  1570	// BX = seed
  1571	CMPB	runtime·useAeshash(SB), $0
  1572	JEQ	noaes
  1573	MOVQ	BX, X0	// X0 = seed
  1574	PINSRD	$2, (AX), X0	// data
  1575	AESENC	runtime·aeskeysched+0(SB), X0
  1576	AESENC	runtime·aeskeysched+16(SB), X0
  1577	AESENC	runtime·aeskeysched+32(SB), X0
  1578	MOVQ	X0, AX	// return X0
  1579	RET
  1580noaes:
  1581	JMP	runtime·memhash32Fallback<ABIInternal>(SB)
  1582
  1583// func memhash64(p unsafe.Pointer, h uintptr) uintptr
  1584// ABIInternal for performance.
  1585TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT,$0-24
  1586	// AX = ptr to data
  1587	// BX = seed
  1588	CMPB	runtime·useAeshash(SB), $0
  1589	JEQ	noaes
  1590	MOVQ	BX, X0	// X0 = seed
  1591	PINSRQ	$1, (AX), X0	// data
  1592	AESENC	runtime·aeskeysched+0(SB), X0
  1593	AESENC	runtime·aeskeysched+16(SB), X0
  1594	AESENC	runtime·aeskeysched+32(SB), X0
  1595	MOVQ	X0, AX	// return X0
  1596	RET
  1597noaes:
  1598	JMP	runtime·memhash64Fallback<ABIInternal>(SB)
  1599
  1600// simple mask to get rid of data in the high part of the register.
  1601DATA masks<>+0x00(SB)/8, $0x0000000000000000
  1602DATA masks<>+0x08(SB)/8, $0x0000000000000000
  1603DATA masks<>+0x10(SB)/8, $0x00000000000000ff
  1604DATA masks<>+0x18(SB)/8, $0x0000000000000000
  1605DATA masks<>+0x20(SB)/8, $0x000000000000ffff
  1606DATA masks<>+0x28(SB)/8, $0x0000000000000000
  1607DATA masks<>+0x30(SB)/8, $0x0000000000ffffff
  1608DATA masks<>+0x38(SB)/8, $0x0000000000000000
  1609DATA masks<>+0x40(SB)/8, $0x00000000ffffffff
  1610DATA masks<>+0x48(SB)/8, $0x0000000000000000
  1611DATA masks<>+0x50(SB)/8, $0x000000ffffffffff
  1612DATA masks<>+0x58(SB)/8, $0x0000000000000000
  1613DATA masks<>+0x60(SB)/8, $0x0000ffffffffffff
  1614DATA masks<>+0x68(SB)/8, $0x0000000000000000
  1615DATA masks<>+0x70(SB)/8, $0x00ffffffffffffff
  1616DATA masks<>+0x78(SB)/8, $0x0000000000000000
  1617DATA masks<>+0x80(SB)/8, $0xffffffffffffffff
  1618DATA masks<>+0x88(SB)/8, $0x0000000000000000
  1619DATA masks<>+0x90(SB)/8, $0xffffffffffffffff
  1620DATA masks<>+0x98(SB)/8, $0x00000000000000ff
  1621DATA masks<>+0xa0(SB)/8, $0xffffffffffffffff
  1622DATA masks<>+0xa8(SB)/8, $0x000000000000ffff
  1623DATA masks<>+0xb0(SB)/8, $0xffffffffffffffff
  1624DATA masks<>+0xb8(SB)/8, $0x0000000000ffffff
  1625DATA masks<>+0xc0(SB)/8, $0xffffffffffffffff
  1626DATA masks<>+0xc8(SB)/8, $0x00000000ffffffff
  1627DATA masks<>+0xd0(SB)/8, $0xffffffffffffffff
  1628DATA masks<>+0xd8(SB)/8, $0x000000ffffffffff
  1629DATA masks<>+0xe0(SB)/8, $0xffffffffffffffff
  1630DATA masks<>+0xe8(SB)/8, $0x0000ffffffffffff
  1631DATA masks<>+0xf0(SB)/8, $0xffffffffffffffff
  1632DATA masks<>+0xf8(SB)/8, $0x00ffffffffffffff
  1633GLOBL masks<>(SB),RODATA,$256
  1634
  1635// func checkASM() bool
  1636TEXT ·checkASM(SB),NOSPLIT,$0-1
  1637	// check that masks<>(SB) and shifts<>(SB) are aligned to 16-byte
  1638	MOVQ	$masks<>(SB), AX
  1639	MOVQ	$shifts<>(SB), BX
  1640	ORQ	BX, AX
  1641	TESTQ	$15, AX
  1642	SETEQ	ret+0(FP)
  1643	RET
  1644
  1645// these are arguments to pshufb. They move data down from
  1646// the high bytes of the register to the low bytes of the register.
  1647// index is how many bytes to move.
  1648DATA shifts<>+0x00(SB)/8, $0x0000000000000000
  1649DATA shifts<>+0x08(SB)/8, $0x0000000000000000
  1650DATA shifts<>+0x10(SB)/8, $0xffffffffffffff0f
  1651DATA shifts<>+0x18(SB)/8, $0xffffffffffffffff
  1652DATA shifts<>+0x20(SB)/8, $0xffffffffffff0f0e
  1653DATA shifts<>+0x28(SB)/8, $0xffffffffffffffff
  1654DATA shifts<>+0x30(SB)/8, $0xffffffffff0f0e0d
  1655DATA shifts<>+0x38(SB)/8, $0xffffffffffffffff
  1656DATA shifts<>+0x40(SB)/8, $0xffffffff0f0e0d0c
  1657DATA shifts<>+0x48(SB)/8, $0xffffffffffffffff
  1658DATA shifts<>+0x50(SB)/8, $0xffffff0f0e0d0c0b
  1659DATA shifts<>+0x58(SB)/8, $0xffffffffffffffff
  1660DATA shifts<>+0x60(SB)/8, $0xffff0f0e0d0c0b0a
  1661DATA shifts<>+0x68(SB)/8, $0xffffffffffffffff
  1662DATA shifts<>+0x70(SB)/8, $0xff0f0e0d0c0b0a09
  1663DATA shifts<>+0x78(SB)/8, $0xffffffffffffffff
  1664DATA shifts<>+0x80(SB)/8, $0x0f0e0d0c0b0a0908
  1665DATA shifts<>+0x88(SB)/8, $0xffffffffffffffff
  1666DATA shifts<>+0x90(SB)/8, $0x0e0d0c0b0a090807
  1667DATA shifts<>+0x98(SB)/8, $0xffffffffffffff0f
  1668DATA shifts<>+0xa0(SB)/8, $0x0d0c0b0a09080706
  1669DATA shifts<>+0xa8(SB)/8, $0xffffffffffff0f0e
  1670DATA shifts<>+0xb0(SB)/8, $0x0c0b0a0908070605
  1671DATA shifts<>+0xb8(SB)/8, $0xffffffffff0f0e0d
  1672DATA shifts<>+0xc0(SB)/8, $0x0b0a090807060504
  1673DATA shifts<>+0xc8(SB)/8, $0xffffffff0f0e0d0c
  1674DATA shifts<>+0xd0(SB)/8, $0x0a09080706050403
  1675DATA shifts<>+0xd8(SB)/8, $0xffffff0f0e0d0c0b
  1676DATA shifts<>+0xe0(SB)/8, $0x0908070605040302
  1677DATA shifts<>+0xe8(SB)/8, $0xffff0f0e0d0c0b0a
  1678DATA shifts<>+0xf0(SB)/8, $0x0807060504030201
  1679DATA shifts<>+0xf8(SB)/8, $0xff0f0e0d0c0b0a09
  1680GLOBL shifts<>(SB),RODATA,$256
  1681
  1682TEXT runtime·return0(SB), NOSPLIT, $0
  1683	MOVL	$0, AX
  1684	RET
  1685
  1686
  1687// Called from cgo wrappers, this function returns g->m->curg.stack.hi.
  1688// Must obey the gcc calling convention.
  1689TEXT _cgo_topofstack(SB),NOSPLIT,$0
  1690	get_tls(CX)
  1691	MOVQ	g(CX), AX
  1692	MOVQ	g_m(AX), AX
  1693	MOVQ	m_curg(AX), AX
  1694	MOVQ	(g_stack+stack_hi)(AX), AX
  1695	RET
  1696
  1697// The top-most function running on a goroutine
  1698// returns to goexit+PCQuantum.
  1699TEXT runtime·goexit(SB),NOSPLIT|TOPFRAME|NOFRAME,$0-0
  1700	BYTE	$0x90	// NOP
  1701	CALL	runtime·goexit1(SB)	// does not return
  1702	// traceback from goexit1 must hit code range of goexit
  1703	BYTE	$0x90	// NOP
  1704
  1705// This is called from .init_array and follows the platform, not Go, ABI.
  1706TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
  1707	PUSHQ	R15 // The access to global variables below implicitly uses R15, which is callee-save
  1708	MOVQ	runtime·lastmoduledatap(SB), AX
  1709	MOVQ	DI, moduledata_next(AX)
  1710	MOVQ	DI, runtime·lastmoduledatap(SB)
  1711	POPQ	R15
  1712	RET
  1713
  1714// Initialize special registers then jump to sigpanic.
  1715// This function is injected from the signal handler for panicking
  1716// signals. It is quite painful to set X15 in the signal context,
  1717// so we do it here.
  1718TEXT ·sigpanic0(SB),NOSPLIT,$0-0
  1719	get_tls(R14)
  1720	MOVQ	g(R14), R14
  1721#ifndef GOOS_plan9
  1722	XORPS	X15, X15
  1723#endif
  1724	JMP	·sigpanic<ABIInternal>(SB)
  1725
  1726// gcWriteBarrier informs the GC about heap pointer writes.
  1727//
  1728// gcWriteBarrier returns space in a write barrier buffer which
  1729// should be filled in by the caller.
  1730// gcWriteBarrier does NOT follow the Go ABI. It accepts the
  1731// number of bytes of buffer needed in R11, and returns a pointer
  1732// to the buffer space in R11.
  1733// It clobbers FLAGS. It does not clobber any general-purpose registers,
  1734// but may clobber others (e.g., SSE registers).
  1735// Typical use would be, when doing *(CX+88) = AX
  1736//     CMPL    $0, runtime.writeBarrier(SB)
  1737//     JEQ     dowrite
  1738//     CALL    runtime.gcBatchBarrier2(SB)
  1739//     MOVQ    AX, (R11)
  1740//     MOVQ    88(CX), DX
  1741//     MOVQ    DX, 8(R11)
  1742// dowrite:
  1743//     MOVQ    AX, 88(CX)
  1744TEXT gcWriteBarrier<>(SB),NOSPLIT,$112
  1745	// Save the registers clobbered by the fast path. This is slightly
  1746	// faster than having the caller spill these.
  1747	MOVQ	R12, 96(SP)
  1748	MOVQ	R13, 104(SP)
  1749retry:
  1750	// TODO: Consider passing g.m.p in as an argument so they can be shared
  1751	// across a sequence of write barriers.
  1752	MOVQ	g_m(R14), R13
  1753	MOVQ	m_p(R13), R13
  1754	// Get current buffer write position.
  1755	MOVQ	(p_wbBuf+wbBuf_next)(R13), R12	// original next position
  1756	ADDQ	R11, R12			// new next position
  1757	// Is the buffer full?
  1758	CMPQ	R12, (p_wbBuf+wbBuf_end)(R13)
  1759	JA	flush
  1760	// Commit to the larger buffer.
  1761	MOVQ	R12, (p_wbBuf+wbBuf_next)(R13)
  1762	// Make return value (the original next position)
  1763	SUBQ	R11, R12
  1764	MOVQ	R12, R11
  1765	// Restore registers.
  1766	MOVQ	96(SP), R12
  1767	MOVQ	104(SP), R13
  1768	RET
  1769
  1770flush:
  1771	// Save all general purpose registers since these could be
  1772	// clobbered by wbBufFlush and were not saved by the caller.
  1773	// It is possible for wbBufFlush to clobber other registers
  1774	// (e.g., SSE registers), but the compiler takes care of saving
  1775	// those in the caller if necessary. This strikes a balance
  1776	// with registers that are likely to be used.
  1777	//
  1778	// We don't have type information for these, but all code under
  1779	// here is NOSPLIT, so nothing will observe these.
  1780	//
  1781	// TODO: We could strike a different balance; e.g., saving X0
  1782	// and not saving GP registers that are less likely to be used.
  1783	MOVQ	DI, 0(SP)
  1784	MOVQ	AX, 8(SP)
  1785	MOVQ	BX, 16(SP)
  1786	MOVQ	CX, 24(SP)
  1787	MOVQ	DX, 32(SP)
  1788	// DI already saved
  1789	MOVQ	SI, 40(SP)
  1790	MOVQ	BP, 48(SP)
  1791	MOVQ	R8, 56(SP)
  1792	MOVQ	R9, 64(SP)
  1793	MOVQ	R10, 72(SP)
  1794	MOVQ	R11, 80(SP)
  1795	// R12 already saved
  1796	// R13 already saved
  1797	// R14 is g
  1798	MOVQ	R15, 88(SP)
  1799
  1800	CALL	runtime·wbBufFlush(SB)
  1801
  1802	MOVQ	0(SP), DI
  1803	MOVQ	8(SP), AX
  1804	MOVQ	16(SP), BX
  1805	MOVQ	24(SP), CX
  1806	MOVQ	32(SP), DX
  1807	MOVQ	40(SP), SI
  1808	MOVQ	48(SP), BP
  1809	MOVQ	56(SP), R8
  1810	MOVQ	64(SP), R9
  1811	MOVQ	72(SP), R10
  1812	MOVQ	80(SP), R11
  1813	MOVQ	88(SP), R15
  1814	JMP	retry
  1815
  1816TEXT runtime·gcWriteBarrier1<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
  1817	MOVL   $8, R11
  1818	JMP     gcWriteBarrier<>(SB)
  1819TEXT runtime·gcWriteBarrier2<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
  1820	MOVL   $16, R11
  1821	JMP     gcWriteBarrier<>(SB)
  1822TEXT runtime·gcWriteBarrier3<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
  1823	MOVL   $24, R11
  1824	JMP     gcWriteBarrier<>(SB)
  1825TEXT runtime·gcWriteBarrier4<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
  1826	MOVL   $32, R11
  1827	JMP     gcWriteBarrier<>(SB)
  1828TEXT runtime·gcWriteBarrier5<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
  1829	MOVL   $40, R11
  1830	JMP     gcWriteBarrier<>(SB)
  1831TEXT runtime·gcWriteBarrier6<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
  1832	MOVL   $48, R11
  1833	JMP     gcWriteBarrier<>(SB)
  1834TEXT runtime·gcWriteBarrier7<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
  1835	MOVL   $56, R11
  1836	JMP     gcWriteBarrier<>(SB)
  1837TEXT runtime·gcWriteBarrier8<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
  1838	MOVL   $64, R11
  1839	JMP     gcWriteBarrier<>(SB)
  1840
  1841DATA	debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large"
  1842GLOBL	debugCallFrameTooLarge<>(SB), RODATA, $20	// Size duplicated below
  1843
  1844// debugCallV2 is the entry point for debugger-injected function
  1845// calls on running goroutines. It informs the runtime that a
  1846// debug call has been injected and creates a call frame for the
  1847// debugger to fill in.
  1848//
  1849// To inject a function call, a debugger should:
  1850// 1. Check that the goroutine is in state _Grunning and that
  1851//    there are at least 256 bytes free on the stack.
  1852// 2. Push the current PC on the stack (updating SP).
  1853// 3. Write the desired argument frame size at SP-16 (using the SP
  1854//    after step 2).
  1855// 4. Save all machine registers (including flags and XMM registers)
  1856//    so they can be restored later by the debugger.
  1857// 5. Set the PC to debugCallV2 and resume execution.
  1858//
  1859// If the goroutine is in state _Grunnable, then it's not generally
  1860// safe to inject a call because it may return out via other runtime
  1861// operations. Instead, the debugger should unwind the stack to find
  1862// the return to non-runtime code, add a temporary breakpoint there,
  1863// and inject the call once that breakpoint is hit.
  1864//
  1865// If the goroutine is in any other state, it's not safe to inject a call.
  1866//
  1867// This function communicates back to the debugger by setting R12 and
  1868// invoking INT3 to raise a breakpoint signal. See the comments in the
  1869// implementation for the protocol the debugger is expected to
  1870// follow. InjectDebugCall in the runtime tests demonstrates this protocol.
  1871//
  1872// The debugger must ensure that any pointers passed to the function
  1873// obey escape analysis requirements. Specifically, it must not pass
  1874// a stack pointer to an escaping argument. debugCallV2 cannot check
  1875// this invariant.
  1876//
  1877// This is ABIInternal because Go code injects its PC directly into new
  1878// goroutine stacks.
  1879TEXT runtime·debugCallV2<ABIInternal>(SB),NOSPLIT,$152-0
  1880	// Save all registers that may contain pointers so they can be
  1881	// conservatively scanned.
  1882	//
  1883	// We can't do anything that might clobber any of these
  1884	// registers before this.
  1885	MOVQ	R15, r15-(14*8+8)(SP)
  1886	MOVQ	R14, r14-(13*8+8)(SP)
  1887	MOVQ	R13, r13-(12*8+8)(SP)
  1888	MOVQ	R12, r12-(11*8+8)(SP)
  1889	MOVQ	R11, r11-(10*8+8)(SP)
  1890	MOVQ	R10, r10-(9*8+8)(SP)
  1891	MOVQ	R9, r9-(8*8+8)(SP)
  1892	MOVQ	R8, r8-(7*8+8)(SP)
  1893	MOVQ	DI, di-(6*8+8)(SP)
  1894	MOVQ	SI, si-(5*8+8)(SP)
  1895	MOVQ	BP, bp-(4*8+8)(SP)
  1896	MOVQ	BX, bx-(3*8+8)(SP)
  1897	MOVQ	DX, dx-(2*8+8)(SP)
  1898	// Save the frame size before we clobber it. Either of the last
  1899	// saves could clobber this depending on whether there's a saved BP.
  1900	MOVQ	frameSize-24(FP), DX	// aka -16(RSP) before prologue
  1901	MOVQ	CX, cx-(1*8+8)(SP)
  1902	MOVQ	AX, ax-(0*8+8)(SP)
  1903
  1904	// Save the argument frame size.
  1905	MOVQ	DX, frameSize-128(SP)
  1906
  1907	// Perform a safe-point check.
  1908	MOVQ	retpc-8(FP), AX	// Caller's PC
  1909	MOVQ	AX, 0(SP)
  1910	CALL	runtime·debugCallCheck(SB)
  1911	MOVQ	8(SP), AX
  1912	TESTQ	AX, AX
  1913	JZ	good
  1914	// The safety check failed. Put the reason string at the top
  1915	// of the stack.
  1916	MOVQ	AX, 0(SP)
  1917	MOVQ	16(SP), AX
  1918	MOVQ	AX, 8(SP)
  1919	// Set R12 to 8 and invoke INT3. The debugger should get the
  1920	// reason a call can't be injected from the top of the stack
  1921	// and resume execution.
  1922	MOVQ	$8, R12
  1923	BYTE	$0xcc
  1924	JMP	restore
  1925
  1926good:
  1927	// Registers are saved and it's safe to make a call.
  1928	// Open up a call frame, moving the stack if necessary.
  1929	//
  1930	// Once the frame is allocated, this will set R12 to 0 and
  1931	// invoke INT3. The debugger should write the argument
  1932	// frame for the call at SP, set up argument registers, push
  1933	// the trapping PC on the stack, set the PC to the function to
  1934	// call, set RDX to point to the closure (if a closure call),
  1935	// and resume execution.
  1936	//
  1937	// If the function returns, this will set R12 to 1 and invoke
  1938	// INT3. The debugger can then inspect any return value saved
  1939	// on the stack at SP and in registers and resume execution again.
  1940	//
  1941	// If the function panics, this will set R12 to 2 and invoke INT3.
  1942	// The interface{} value of the panic will be at SP. The debugger
  1943	// can inspect the panic value and resume execution again.
  1944#define DEBUG_CALL_DISPATCH(NAME,MAXSIZE)	\
  1945	CMPQ	AX, $MAXSIZE;			\
  1946	JA	5(PC);				\
  1947	MOVQ	$NAME(SB), AX;			\
  1948	MOVQ	AX, 0(SP);			\
  1949	CALL	runtime·debugCallWrap(SB);	\
  1950	JMP	restore
  1951
  1952	MOVQ	frameSize-128(SP), AX
  1953	DEBUG_CALL_DISPATCH(debugCall32<>, 32)
  1954	DEBUG_CALL_DISPATCH(debugCall64<>, 64)
  1955	DEBUG_CALL_DISPATCH(debugCall128<>, 128)
  1956	DEBUG_CALL_DISPATCH(debugCall256<>, 256)
  1957	DEBUG_CALL_DISPATCH(debugCall512<>, 512)
  1958	DEBUG_CALL_DISPATCH(debugCall1024<>, 1024)
  1959	DEBUG_CALL_DISPATCH(debugCall2048<>, 2048)
  1960	DEBUG_CALL_DISPATCH(debugCall4096<>, 4096)
  1961	DEBUG_CALL_DISPATCH(debugCall8192<>, 8192)
  1962	DEBUG_CALL_DISPATCH(debugCall16384<>, 16384)
  1963	DEBUG_CALL_DISPATCH(debugCall32768<>, 32768)
  1964	DEBUG_CALL_DISPATCH(debugCall65536<>, 65536)
  1965	// The frame size is too large. Report the error.
  1966	MOVQ	$debugCallFrameTooLarge<>(SB), AX
  1967	MOVQ	AX, 0(SP)
  1968	MOVQ	$20, 8(SP) // length of debugCallFrameTooLarge string
  1969	MOVQ	$8, R12
  1970	BYTE	$0xcc
  1971	JMP	restore
  1972
  1973restore:
  1974	// Calls and failures resume here.
  1975	//
  1976	// Set R12 to 16 and invoke INT3. The debugger should restore
  1977	// all registers except RIP and RSP and resume execution.
  1978	MOVQ	$16, R12
  1979	BYTE	$0xcc
  1980	// We must not modify flags after this point.
  1981
  1982	// Restore pointer-containing registers, which may have been
  1983	// modified from the debugger's copy by stack copying.
  1984	MOVQ	ax-(0*8+8)(SP), AX
  1985	MOVQ	cx-(1*8+8)(SP), CX
  1986	MOVQ	dx-(2*8+8)(SP), DX
  1987	MOVQ	bx-(3*8+8)(SP), BX
  1988	MOVQ	bp-(4*8+8)(SP), BP
  1989	MOVQ	si-(5*8+8)(SP), SI
  1990	MOVQ	di-(6*8+8)(SP), DI
  1991	MOVQ	r8-(7*8+8)(SP), R8
  1992	MOVQ	r9-(8*8+8)(SP), R9
  1993	MOVQ	r10-(9*8+8)(SP), R10
  1994	MOVQ	r11-(10*8+8)(SP), R11
  1995	MOVQ	r12-(11*8+8)(SP), R12
  1996	MOVQ	r13-(12*8+8)(SP), R13
  1997	MOVQ	r14-(13*8+8)(SP), R14
  1998	MOVQ	r15-(14*8+8)(SP), R15
  1999
  2000	RET
  2001
  2002// runtime.debugCallCheck assumes that functions defined with the
  2003// DEBUG_CALL_FN macro are safe points to inject calls.
  2004#define DEBUG_CALL_FN(NAME,MAXSIZE)		\
  2005TEXT NAME(SB),WRAPPER,$MAXSIZE-0;		\
  2006	NO_LOCAL_POINTERS;			\
  2007	MOVQ	$0, R12;				\
  2008	BYTE	$0xcc;				\
  2009	MOVQ	$1, R12;				\
  2010	BYTE	$0xcc;				\
  2011	RET
  2012DEBUG_CALL_FN(debugCall32<>, 32)
  2013DEBUG_CALL_FN(debugCall64<>, 64)
  2014DEBUG_CALL_FN(debugCall128<>, 128)
  2015DEBUG_CALL_FN(debugCall256<>, 256)
  2016DEBUG_CALL_FN(debugCall512<>, 512)
  2017DEBUG_CALL_FN(debugCall1024<>, 1024)
  2018DEBUG_CALL_FN(debugCall2048<>, 2048)
  2019DEBUG_CALL_FN(debugCall4096<>, 4096)
  2020DEBUG_CALL_FN(debugCall8192<>, 8192)
  2021DEBUG_CALL_FN(debugCall16384<>, 16384)
  2022DEBUG_CALL_FN(debugCall32768<>, 32768)
  2023DEBUG_CALL_FN(debugCall65536<>, 65536)
  2024
  2025// func debugCallPanicked(val interface{})
  2026TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16
  2027	// Copy the panic value to the top of stack.
  2028	MOVQ	val_type+0(FP), AX
  2029	MOVQ	AX, 0(SP)
  2030	MOVQ	val_data+8(FP), AX
  2031	MOVQ	AX, 8(SP)
  2032	MOVQ	$2, R12
  2033	BYTE	$0xcc
  2034	RET
  2035
  2036// Note: these functions use a special calling convention to save generated code space.
  2037// Arguments are passed in registers, but the space for those arguments are allocated
  2038// in the caller's stack frame. These stubs write the args into that stack space and
  2039// then tail call to the corresponding runtime handler.
  2040// The tail call makes these stubs disappear in backtraces.
  2041// Defined as ABIInternal since they do not use the stack-based Go ABI.
  2042TEXT runtime·panicIndex<ABIInternal>(SB),NOSPLIT,$0-16
  2043	MOVQ	CX, BX
  2044	JMP	runtime·goPanicIndex<ABIInternal>(SB)
  2045TEXT runtime·panicIndexU<ABIInternal>(SB),NOSPLIT,$0-16
  2046	MOVQ	CX, BX
  2047	JMP	runtime·goPanicIndexU<ABIInternal>(SB)
  2048TEXT runtime·panicSliceAlen<ABIInternal>(SB),NOSPLIT,$0-16
  2049	MOVQ	CX, AX
  2050	MOVQ	DX, BX
  2051	JMP	runtime·goPanicSliceAlen<ABIInternal>(SB)
  2052TEXT runtime·panicSliceAlenU<ABIInternal>(SB),NOSPLIT,$0-16
  2053	MOVQ	CX, AX
  2054	MOVQ	DX, BX
  2055	JMP	runtime·goPanicSliceAlenU<ABIInternal>(SB)
  2056TEXT runtime·panicSliceAcap<ABIInternal>(SB),NOSPLIT,$0-16
  2057	MOVQ	CX, AX
  2058	MOVQ	DX, BX
  2059	JMP	runtime·goPanicSliceAcap<ABIInternal>(SB)
  2060TEXT runtime·panicSliceAcapU<ABIInternal>(SB),NOSPLIT,$0-16
  2061	MOVQ	CX, AX
  2062	MOVQ	DX, BX
  2063	JMP	runtime·goPanicSliceAcapU<ABIInternal>(SB)
  2064TEXT runtime·panicSliceB<ABIInternal>(SB),NOSPLIT,$0-16
  2065	MOVQ	CX, BX
  2066	JMP	runtime·goPanicSliceB<ABIInternal>(SB)
  2067TEXT runtime·panicSliceBU<ABIInternal>(SB),NOSPLIT,$0-16
  2068	MOVQ	CX, BX
  2069	JMP	runtime·goPanicSliceBU<ABIInternal>(SB)
  2070TEXT runtime·panicSlice3Alen<ABIInternal>(SB),NOSPLIT,$0-16
  2071	MOVQ	DX, AX
  2072	JMP	runtime·goPanicSlice3Alen<ABIInternal>(SB)
  2073TEXT runtime·panicSlice3AlenU<ABIInternal>(SB),NOSPLIT,$0-16
  2074	MOVQ	DX, AX
  2075	JMP	runtime·goPanicSlice3AlenU<ABIInternal>(SB)
  2076TEXT runtime·panicSlice3Acap<ABIInternal>(SB),NOSPLIT,$0-16
  2077	MOVQ	DX, AX
  2078	JMP	runtime·goPanicSlice3Acap<ABIInternal>(SB)
  2079TEXT runtime·panicSlice3AcapU<ABIInternal>(SB),NOSPLIT,$0-16
  2080	MOVQ	DX, AX
  2081	JMP	runtime·goPanicSlice3AcapU<ABIInternal>(SB)
  2082TEXT runtime·panicSlice3B<ABIInternal>(SB),NOSPLIT,$0-16
  2083	MOVQ	CX, AX
  2084	MOVQ	DX, BX
  2085	JMP	runtime·goPanicSlice3B<ABIInternal>(SB)
  2086TEXT runtime·panicSlice3BU<ABIInternal>(SB),NOSPLIT,$0-16
  2087	MOVQ	CX, AX
  2088	MOVQ	DX, BX
  2089	JMP	runtime·goPanicSlice3BU<ABIInternal>(SB)
  2090TEXT runtime·panicSlice3C<ABIInternal>(SB),NOSPLIT,$0-16
  2091	MOVQ	CX, BX
  2092	JMP	runtime·goPanicSlice3C<ABIInternal>(SB)
  2093TEXT runtime·panicSlice3CU<ABIInternal>(SB),NOSPLIT,$0-16
  2094	MOVQ	CX, BX
  2095	JMP	runtime·goPanicSlice3CU<ABIInternal>(SB)
  2096TEXT runtime·panicSliceConvert<ABIInternal>(SB),NOSPLIT,$0-16
  2097	MOVQ	DX, AX
  2098	JMP	runtime·goPanicSliceConvert<ABIInternal>(SB)
  2099
  2100#ifdef GOOS_android
  2101// Use the free TLS_SLOT_APP slot #2 on Android Q.
  2102// Earlier androids are set up in gcc_android.c.
  2103DATA runtime·tls_g+0(SB)/8, $16
  2104GLOBL runtime·tls_g+0(SB), NOPTR, $8
  2105#endif
  2106#ifdef GOOS_windows
  2107GLOBL runtime·tls_g+0(SB), NOPTR, $8
  2108#endif
  2109
  2110// The compiler and assembler's -spectre=ret mode rewrites
  2111// all indirect CALL AX / JMP AX instructions to be
  2112// CALL retpolineAX / JMP retpolineAX.
  2113// See https://support.google.com/faqs/answer/7625886.
  2114#define RETPOLINE(reg) \
  2115	/*   CALL setup */     BYTE $0xE8; BYTE $(2+2); BYTE $0; BYTE $0; BYTE $0;	\
  2116	/* nospec: */									\
  2117	/*   PAUSE */           BYTE $0xF3; BYTE $0x90;					\
  2118	/*   JMP nospec */      BYTE $0xEB; BYTE $-(2+2);				\
  2119	/* setup: */									\
  2120	/*   MOVQ AX, 0(SP) */  BYTE $0x48|((reg&8)>>1); BYTE $0x89;			\
  2121	                        BYTE $0x04|((reg&7)<<3); BYTE $0x24;			\
  2122	/*   RET */             BYTE $0xC3
  2123
  2124TEXT runtime·retpolineAX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(0)
  2125TEXT runtime·retpolineCX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(1)
  2126TEXT runtime·retpolineDX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(2)
  2127TEXT runtime·retpolineBX(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(3)
  2128/* SP is 4, can't happen / magic encodings */
  2129TEXT runtime·retpolineBP(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(5)
  2130TEXT runtime·retpolineSI(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(6)
  2131TEXT runtime·retpolineDI(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(7)
  2132TEXT runtime·retpolineR8(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(8)
  2133TEXT runtime·retpolineR9(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(9)
  2134TEXT runtime·retpolineR10(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(10)
  2135TEXT runtime·retpolineR11(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(11)
  2136TEXT runtime·retpolineR12(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(12)
  2137TEXT runtime·retpolineR13(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(13)
  2138TEXT runtime·retpolineR14(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(14)
  2139TEXT runtime·retpolineR15(SB),NOSPLIT|NOFRAME,$0; RETPOLINE(15)
  2140
  2141TEXT ·getfp<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
  2142	MOVQ BP, AX
  2143	RET

View as plain text