Text file
src/runtime/asm_arm64.s
Documentation: runtime
1// Copyright 2015 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5#include "go_asm.h"
6#include "go_tls.h"
7#include "tls_arm64.h"
8#include "funcdata.h"
9#include "textflag.h"
10
11TEXT runtime·rt0_go(SB),NOSPLIT|TOPFRAME,$0
12 // SP = stack; R0 = argc; R1 = argv
13
14 SUB $32, RSP
15 MOVW R0, 8(RSP) // argc
16 MOVD R1, 16(RSP) // argv
17
18#ifdef TLS_darwin
19 // Initialize TLS.
20 MOVD ZR, g // clear g, make sure it's not junk.
21 SUB $32, RSP
22 MRS_TPIDR_R0
23 AND $~7, R0
24 MOVD R0, 16(RSP) // arg2: TLS base
25 MOVD $runtime·tls_g(SB), R2
26 MOVD R2, 8(RSP) // arg1: &tlsg
27 BL ·tlsinit(SB)
28 ADD $32, RSP
29#endif
30
31 // create istack out of the given (operating system) stack.
32 // _cgo_init may update stackguard.
33 MOVD $runtime·g0(SB), g
34 MOVD RSP, R7
35 MOVD $(-64*1024)(R7), R0
36 MOVD R0, g_stackguard0(g)
37 MOVD R0, g_stackguard1(g)
38 MOVD R0, (g_stack+stack_lo)(g)
39 MOVD R7, (g_stack+stack_hi)(g)
40
41 // if there is a _cgo_init, call it using the gcc ABI.
42 MOVD _cgo_init(SB), R12
43 CBZ R12, nocgo
44
45#ifdef GOOS_android
46 MRS_TPIDR_R0 // load TLS base pointer
47 MOVD R0, R3 // arg 3: TLS base pointer
48 MOVD $runtime·tls_g(SB), R2 // arg 2: &tls_g
49#else
50 MOVD $0, R2 // arg 2: not used when using platform's TLS
51#endif
52 MOVD $setg_gcc<>(SB), R1 // arg 1: setg
53 MOVD g, R0 // arg 0: G
54 SUB $16, RSP // reserve 16 bytes for sp-8 where fp may be saved.
55 BL (R12)
56 ADD $16, RSP
57
58nocgo:
59 BL runtime·save_g(SB)
60 // update stackguard after _cgo_init
61 MOVD (g_stack+stack_lo)(g), R0
62 ADD $const_stackGuard, R0
63 MOVD R0, g_stackguard0(g)
64 MOVD R0, g_stackguard1(g)
65
66 // set the per-goroutine and per-mach "registers"
67 MOVD $runtime·m0(SB), R0
68
69 // save m->g0 = g0
70 MOVD g, m_g0(R0)
71 // save m0 to g0->m
72 MOVD R0, g_m(g)
73
74 BL runtime·check(SB)
75
76#ifdef GOOS_windows
77 BL runtime·wintls(SB)
78#endif
79
80 MOVW 8(RSP), R0 // copy argc
81 MOVW R0, -8(RSP)
82 MOVD 16(RSP), R0 // copy argv
83 MOVD R0, 0(RSP)
84 BL runtime·args(SB)
85 BL runtime·osinit(SB)
86 BL runtime·schedinit(SB)
87
88 // create a new goroutine to start program
89 MOVD $runtime·mainPC(SB), R0 // entry
90 SUB $16, RSP
91 MOVD R0, 8(RSP) // arg
92 MOVD $0, 0(RSP) // dummy LR
93 BL runtime·newproc(SB)
94 ADD $16, RSP
95
96 // start this M
97 BL runtime·mstart(SB)
98
99 // Prevent dead-code elimination of debugCallV2 and debugPinnerV1, which are
100 // intended to be called by debuggers.
101 MOVD $runtime·debugPinnerV1<ABIInternal>(SB), R0
102 MOVD $runtime·debugCallV2<ABIInternal>(SB), R0
103
104 MOVD $0, R0
105 MOVD R0, (R0) // boom
106 UNDEF
107
108DATA runtime·mainPC+0(SB)/8,$runtime·main<ABIInternal>(SB)
109GLOBL runtime·mainPC(SB),RODATA,$8
110
111// Windows ARM64 needs an immediate 0xf000 argument.
112// See go.dev/issues/53837.
113#define BREAK \
114#ifdef GOOS_windows \
115 BRK $0xf000 \
116#else \
117 BRK \
118#endif \
119
120
121TEXT runtime·breakpoint(SB),NOSPLIT|NOFRAME,$0-0
122 BREAK
123 RET
124
125TEXT runtime·asminit(SB),NOSPLIT|NOFRAME,$0-0
126 RET
127
128TEXT runtime·mstart(SB),NOSPLIT|TOPFRAME,$0
129 BL runtime·mstart0(SB)
130 RET // not reached
131
132/*
133 * go-routine
134 */
135
136// void gogo(Gobuf*)
137// restore state from Gobuf; longjmp
138TEXT runtime·gogo(SB), NOSPLIT|NOFRAME, $0-8
139 MOVD buf+0(FP), R5
140 MOVD gobuf_g(R5), R6
141 MOVD 0(R6), R4 // make sure g != nil
142 B gogo<>(SB)
143
144TEXT gogo<>(SB), NOSPLIT|NOFRAME, $0
145 MOVD R6, g
146 BL runtime·save_g(SB)
147
148 MOVD gobuf_sp(R5), R0
149 MOVD R0, RSP
150 MOVD gobuf_bp(R5), R29
151 MOVD gobuf_lr(R5), LR
152 MOVD gobuf_ret(R5), R0
153 MOVD gobuf_ctxt(R5), R26
154 MOVD $0, gobuf_sp(R5)
155 MOVD $0, gobuf_bp(R5)
156 MOVD $0, gobuf_ret(R5)
157 MOVD $0, gobuf_lr(R5)
158 MOVD $0, gobuf_ctxt(R5)
159 CMP ZR, ZR // set condition codes for == test, needed by stack split
160 MOVD gobuf_pc(R5), R6
161 B (R6)
162
163// void mcall(fn func(*g))
164// Switch to m->g0's stack, call fn(g).
165// Fn must never return. It should gogo(&g->sched)
166// to keep running g.
167TEXT runtime·mcall<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-8
168 MOVD R0, R26 // context
169
170 // Save caller state in g->sched
171 MOVD RSP, R0
172 MOVD R0, (g_sched+gobuf_sp)(g)
173 MOVD R29, (g_sched+gobuf_bp)(g)
174 MOVD LR, (g_sched+gobuf_pc)(g)
175 MOVD $0, (g_sched+gobuf_lr)(g)
176
177 // Switch to m->g0 & its stack, call fn.
178 MOVD g, R3
179 MOVD g_m(g), R8
180 MOVD m_g0(R8), g
181 BL runtime·save_g(SB)
182 CMP g, R3
183 BNE 2(PC)
184 B runtime·badmcall(SB)
185
186 MOVD (g_sched+gobuf_sp)(g), R0
187 MOVD R0, RSP // sp = m->g0->sched.sp
188 MOVD (g_sched+gobuf_bp)(g), R29
189 MOVD R3, R0 // arg = g
190 MOVD $0, -16(RSP) // dummy LR
191 SUB $16, RSP
192 MOVD 0(R26), R4 // code pointer
193 BL (R4)
194 B runtime·badmcall2(SB)
195
196// systemstack_switch is a dummy routine that systemstack leaves at the bottom
197// of the G stack. We need to distinguish the routine that
198// lives at the bottom of the G stack from the one that lives
199// at the top of the system stack because the one at the top of
200// the system stack terminates the stack walk (see topofstack()).
201TEXT runtime·systemstack_switch(SB), NOSPLIT, $0-0
202 UNDEF
203 BL (LR) // make sure this function is not leaf
204 RET
205
206// func systemstack(fn func())
207TEXT runtime·systemstack(SB), NOSPLIT, $0-8
208 MOVD fn+0(FP), R3 // R3 = fn
209 MOVD R3, R26 // context
210 MOVD g_m(g), R4 // R4 = m
211
212 MOVD m_gsignal(R4), R5 // R5 = gsignal
213 CMP g, R5
214 BEQ noswitch
215
216 MOVD m_g0(R4), R5 // R5 = g0
217 CMP g, R5
218 BEQ noswitch
219
220 MOVD m_curg(R4), R6
221 CMP g, R6
222 BEQ switch
223
224 // Bad: g is not gsignal, not g0, not curg. What is it?
225 // Hide call from linker nosplit analysis.
226 MOVD $runtime·badsystemstack(SB), R3
227 BL (R3)
228 B runtime·abort(SB)
229
230switch:
231 // save our state in g->sched. Pretend to
232 // be systemstack_switch if the G stack is scanned.
233 BL gosave_systemstack_switch<>(SB)
234
235 // switch to g0
236 MOVD R5, g
237 BL runtime·save_g(SB)
238 MOVD (g_sched+gobuf_sp)(g), R3
239 MOVD R3, RSP
240 MOVD (g_sched+gobuf_bp)(g), R29
241
242 // call target function
243 MOVD 0(R26), R3 // code pointer
244 BL (R3)
245
246 // switch back to g
247 MOVD g_m(g), R3
248 MOVD m_curg(R3), g
249 BL runtime·save_g(SB)
250 MOVD (g_sched+gobuf_sp)(g), R0
251 MOVD R0, RSP
252 MOVD (g_sched+gobuf_bp)(g), R29
253 MOVD $0, (g_sched+gobuf_sp)(g)
254 MOVD $0, (g_sched+gobuf_bp)(g)
255 RET
256
257noswitch:
258 // already on m stack, just call directly
259 // Using a tail call here cleans up tracebacks since we won't stop
260 // at an intermediate systemstack.
261 MOVD 0(R26), R3 // code pointer
262 MOVD.P 16(RSP), R30 // restore LR
263 SUB $8, RSP, R29 // restore FP
264 B (R3)
265
266// func switchToCrashStack0(fn func())
267TEXT runtime·switchToCrashStack0<ABIInternal>(SB), NOSPLIT, $0-8
268 MOVD R0, R26 // context register
269 MOVD g_m(g), R1 // curm
270
271 // set g to gcrash
272 MOVD $runtime·gcrash(SB), g // g = &gcrash
273 BL runtime·save_g(SB) // clobbers R0
274 MOVD R1, g_m(g) // g.m = curm
275 MOVD g, m_g0(R1) // curm.g0 = g
276
277 // switch to crashstack
278 MOVD (g_stack+stack_hi)(g), R1
279 SUB $(4*8), R1
280 MOVD R1, RSP
281
282 // call target function
283 MOVD 0(R26), R0
284 CALL (R0)
285
286 // should never return
287 CALL runtime·abort(SB)
288 UNDEF
289
290/*
291 * support for morestack
292 */
293
294// Called during function prolog when more stack is needed.
295// Caller has already loaded:
296// R3 prolog's LR (R30)
297//
298// The traceback routines see morestack on a g0 as being
299// the top of a stack (for example, morestack calling newstack
300// calling the scheduler calling newm calling gc), so we must
301// record an argument size. For that purpose, it has no arguments.
302TEXT runtime·morestack(SB),NOSPLIT|NOFRAME,$0-0
303 // Cannot grow scheduler stack (m->g0).
304 MOVD g_m(g), R8
305 MOVD m_g0(R8), R4
306
307 // Called from f.
308 // Set g->sched to context in f
309 MOVD RSP, R0
310 MOVD R0, (g_sched+gobuf_sp)(g)
311 MOVD R29, (g_sched+gobuf_bp)(g)
312 MOVD LR, (g_sched+gobuf_pc)(g)
313 MOVD R3, (g_sched+gobuf_lr)(g)
314 MOVD R26, (g_sched+gobuf_ctxt)(g)
315
316 CMP g, R4
317 BNE 3(PC)
318 BL runtime·badmorestackg0(SB)
319 B runtime·abort(SB)
320
321 // Cannot grow signal stack (m->gsignal).
322 MOVD m_gsignal(R8), R4
323 CMP g, R4
324 BNE 3(PC)
325 BL runtime·badmorestackgsignal(SB)
326 B runtime·abort(SB)
327
328 // Called from f.
329 // Set m->morebuf to f's callers.
330 MOVD R3, (m_morebuf+gobuf_pc)(R8) // f's caller's PC
331 MOVD RSP, R0
332 MOVD R0, (m_morebuf+gobuf_sp)(R8) // f's caller's RSP
333 MOVD g, (m_morebuf+gobuf_g)(R8)
334
335 // Call newstack on m->g0's stack.
336 MOVD m_g0(R8), g
337 BL runtime·save_g(SB)
338 MOVD (g_sched+gobuf_sp)(g), R0
339 MOVD R0, RSP
340 MOVD (g_sched+gobuf_bp)(g), R29
341 MOVD.W $0, -16(RSP) // create a call frame on g0 (saved LR; keep 16-aligned)
342 BL runtime·newstack(SB)
343
344 // Not reached, but make sure the return PC from the call to newstack
345 // is still in this function, and not the beginning of the next.
346 UNDEF
347
348TEXT runtime·morestack_noctxt(SB),NOSPLIT|NOFRAME,$0-0
349 // Force SPWRITE. This function doesn't actually write SP,
350 // but it is called with a special calling convention where
351 // the caller doesn't save LR on stack but passes it as a
352 // register (R3), and the unwinder currently doesn't understand.
353 // Make it SPWRITE to stop unwinding. (See issue 54332)
354 MOVD RSP, RSP
355
356 MOVW $0, R26
357 B runtime·morestack(SB)
358
359// spillArgs stores return values from registers to a *internal/abi.RegArgs in R20.
360TEXT ·spillArgs(SB),NOSPLIT,$0-0
361 STP (R0, R1), (0*8)(R20)
362 STP (R2, R3), (2*8)(R20)
363 STP (R4, R5), (4*8)(R20)
364 STP (R6, R7), (6*8)(R20)
365 STP (R8, R9), (8*8)(R20)
366 STP (R10, R11), (10*8)(R20)
367 STP (R12, R13), (12*8)(R20)
368 STP (R14, R15), (14*8)(R20)
369 FSTPD (F0, F1), (16*8)(R20)
370 FSTPD (F2, F3), (18*8)(R20)
371 FSTPD (F4, F5), (20*8)(R20)
372 FSTPD (F6, F7), (22*8)(R20)
373 FSTPD (F8, F9), (24*8)(R20)
374 FSTPD (F10, F11), (26*8)(R20)
375 FSTPD (F12, F13), (28*8)(R20)
376 FSTPD (F14, F15), (30*8)(R20)
377 RET
378
379// unspillArgs loads args into registers from a *internal/abi.RegArgs in R20.
380TEXT ·unspillArgs(SB),NOSPLIT,$0-0
381 LDP (0*8)(R20), (R0, R1)
382 LDP (2*8)(R20), (R2, R3)
383 LDP (4*8)(R20), (R4, R5)
384 LDP (6*8)(R20), (R6, R7)
385 LDP (8*8)(R20), (R8, R9)
386 LDP (10*8)(R20), (R10, R11)
387 LDP (12*8)(R20), (R12, R13)
388 LDP (14*8)(R20), (R14, R15)
389 FLDPD (16*8)(R20), (F0, F1)
390 FLDPD (18*8)(R20), (F2, F3)
391 FLDPD (20*8)(R20), (F4, F5)
392 FLDPD (22*8)(R20), (F6, F7)
393 FLDPD (24*8)(R20), (F8, F9)
394 FLDPD (26*8)(R20), (F10, F11)
395 FLDPD (28*8)(R20), (F12, F13)
396 FLDPD (30*8)(R20), (F14, F15)
397 RET
398
399// reflectcall: call a function with the given argument list
400// func call(stackArgsType *_type, f *FuncVal, stackArgs *byte, stackArgsSize, stackRetOffset, frameSize uint32, regArgs *abi.RegArgs).
401// we don't have variable-sized frames, so we use a small number
402// of constant-sized-frame functions to encode a few bits of size in the pc.
403// Caution: ugly multiline assembly macros in your future!
404
405#define DISPATCH(NAME,MAXSIZE) \
406 MOVD $MAXSIZE, R27; \
407 CMP R27, R16; \
408 BGT 3(PC); \
409 MOVD $NAME(SB), R27; \
410 B (R27)
411// Note: can't just "B NAME(SB)" - bad inlining results.
412
413TEXT ·reflectcall(SB), NOSPLIT|NOFRAME, $0-48
414 MOVWU frameSize+32(FP), R16
415 DISPATCH(runtime·call16, 16)
416 DISPATCH(runtime·call32, 32)
417 DISPATCH(runtime·call64, 64)
418 DISPATCH(runtime·call128, 128)
419 DISPATCH(runtime·call256, 256)
420 DISPATCH(runtime·call512, 512)
421 DISPATCH(runtime·call1024, 1024)
422 DISPATCH(runtime·call2048, 2048)
423 DISPATCH(runtime·call4096, 4096)
424 DISPATCH(runtime·call8192, 8192)
425 DISPATCH(runtime·call16384, 16384)
426 DISPATCH(runtime·call32768, 32768)
427 DISPATCH(runtime·call65536, 65536)
428 DISPATCH(runtime·call131072, 131072)
429 DISPATCH(runtime·call262144, 262144)
430 DISPATCH(runtime·call524288, 524288)
431 DISPATCH(runtime·call1048576, 1048576)
432 DISPATCH(runtime·call2097152, 2097152)
433 DISPATCH(runtime·call4194304, 4194304)
434 DISPATCH(runtime·call8388608, 8388608)
435 DISPATCH(runtime·call16777216, 16777216)
436 DISPATCH(runtime·call33554432, 33554432)
437 DISPATCH(runtime·call67108864, 67108864)
438 DISPATCH(runtime·call134217728, 134217728)
439 DISPATCH(runtime·call268435456, 268435456)
440 DISPATCH(runtime·call536870912, 536870912)
441 DISPATCH(runtime·call1073741824, 1073741824)
442 MOVD $runtime·badreflectcall(SB), R0
443 B (R0)
444
445#define CALLFN(NAME,MAXSIZE) \
446TEXT NAME(SB), WRAPPER, $MAXSIZE-48; \
447 NO_LOCAL_POINTERS; \
448 /* copy arguments to stack */ \
449 MOVD stackArgs+16(FP), R3; \
450 MOVWU stackArgsSize+24(FP), R4; \
451 ADD $8, RSP, R5; \
452 BIC $0xf, R4, R6; \
453 CBZ R6, 6(PC); \
454 /* if R6=(argsize&~15) != 0 */ \
455 ADD R6, R5, R6; \
456 /* copy 16 bytes a time */ \
457 LDP.P 16(R3), (R7, R8); \
458 STP.P (R7, R8), 16(R5); \
459 CMP R5, R6; \
460 BNE -3(PC); \
461 AND $0xf, R4, R6; \
462 CBZ R6, 6(PC); \
463 /* if R6=(argsize&15) != 0 */ \
464 ADD R6, R5, R6; \
465 /* copy 1 byte a time for the rest */ \
466 MOVBU.P 1(R3), R7; \
467 MOVBU.P R7, 1(R5); \
468 CMP R5, R6; \
469 BNE -3(PC); \
470 /* set up argument registers */ \
471 MOVD regArgs+40(FP), R20; \
472 CALL ·unspillArgs(SB); \
473 /* call function */ \
474 MOVD f+8(FP), R26; \
475 MOVD (R26), R20; \
476 PCDATA $PCDATA_StackMapIndex, $0; \
477 BL (R20); \
478 /* copy return values back */ \
479 MOVD regArgs+40(FP), R20; \
480 CALL ·spillArgs(SB); \
481 MOVD stackArgsType+0(FP), R7; \
482 MOVD stackArgs+16(FP), R3; \
483 MOVWU stackArgsSize+24(FP), R4; \
484 MOVWU stackRetOffset+28(FP), R6; \
485 ADD $8, RSP, R5; \
486 ADD R6, R5; \
487 ADD R6, R3; \
488 SUB R6, R4; \
489 BL callRet<>(SB); \
490 RET
491
492// callRet copies return values back at the end of call*. This is a
493// separate function so it can allocate stack space for the arguments
494// to reflectcallmove. It does not follow the Go ABI; it expects its
495// arguments in registers.
496TEXT callRet<>(SB), NOSPLIT, $48-0
497 NO_LOCAL_POINTERS
498 STP (R7, R3), 8(RSP)
499 STP (R5, R4), 24(RSP)
500 MOVD R20, 40(RSP)
501 BL runtime·reflectcallmove(SB)
502 RET
503
504CALLFN(·call16, 16)
505CALLFN(·call32, 32)
506CALLFN(·call64, 64)
507CALLFN(·call128, 128)
508CALLFN(·call256, 256)
509CALLFN(·call512, 512)
510CALLFN(·call1024, 1024)
511CALLFN(·call2048, 2048)
512CALLFN(·call4096, 4096)
513CALLFN(·call8192, 8192)
514CALLFN(·call16384, 16384)
515CALLFN(·call32768, 32768)
516CALLFN(·call65536, 65536)
517CALLFN(·call131072, 131072)
518CALLFN(·call262144, 262144)
519CALLFN(·call524288, 524288)
520CALLFN(·call1048576, 1048576)
521CALLFN(·call2097152, 2097152)
522CALLFN(·call4194304, 4194304)
523CALLFN(·call8388608, 8388608)
524CALLFN(·call16777216, 16777216)
525CALLFN(·call33554432, 33554432)
526CALLFN(·call67108864, 67108864)
527CALLFN(·call134217728, 134217728)
528CALLFN(·call268435456, 268435456)
529CALLFN(·call536870912, 536870912)
530CALLFN(·call1073741824, 1073741824)
531
532// func memhash32(p unsafe.Pointer, h uintptr) uintptr
533TEXT runtime·memhash32<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
534 MOVB runtime·useAeshash(SB), R10
535 CBZ R10, noaes
536 MOVD $runtime·aeskeysched+0(SB), R3
537
538 VEOR V0.B16, V0.B16, V0.B16
539 VLD1 (R3), [V2.B16]
540 VLD1 (R0), V0.S[1]
541 VMOV R1, V0.S[0]
542
543 AESE V2.B16, V0.B16
544 AESMC V0.B16, V0.B16
545 AESE V2.B16, V0.B16
546 AESMC V0.B16, V0.B16
547 AESE V2.B16, V0.B16
548
549 VMOV V0.D[0], R0
550 RET
551noaes:
552 B runtime·memhash32Fallback<ABIInternal>(SB)
553
554// func memhash64(p unsafe.Pointer, h uintptr) uintptr
555TEXT runtime·memhash64<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
556 MOVB runtime·useAeshash(SB), R10
557 CBZ R10, noaes
558 MOVD $runtime·aeskeysched+0(SB), R3
559
560 VEOR V0.B16, V0.B16, V0.B16
561 VLD1 (R3), [V2.B16]
562 VLD1 (R0), V0.D[1]
563 VMOV R1, V0.D[0]
564
565 AESE V2.B16, V0.B16
566 AESMC V0.B16, V0.B16
567 AESE V2.B16, V0.B16
568 AESMC V0.B16, V0.B16
569 AESE V2.B16, V0.B16
570
571 VMOV V0.D[0], R0
572 RET
573noaes:
574 B runtime·memhash64Fallback<ABIInternal>(SB)
575
576// func memhash(p unsafe.Pointer, h, size uintptr) uintptr
577TEXT runtime·memhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-32
578 MOVB runtime·useAeshash(SB), R10
579 CBZ R10, noaes
580 B aeshashbody<>(SB)
581noaes:
582 B runtime·memhashFallback<ABIInternal>(SB)
583
584// func strhash(p unsafe.Pointer, h uintptr) uintptr
585TEXT runtime·strhash<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-24
586 MOVB runtime·useAeshash(SB), R10
587 CBZ R10, noaes
588 LDP (R0), (R0, R2) // string data / length
589 B aeshashbody<>(SB)
590noaes:
591 B runtime·strhashFallback<ABIInternal>(SB)
592
593// R0: data
594// R1: seed data
595// R2: length
596// At return, R0 = return value
597TEXT aeshashbody<>(SB),NOSPLIT|NOFRAME,$0
598 VEOR V30.B16, V30.B16, V30.B16
599 VMOV R1, V30.D[0]
600 VMOV R2, V30.D[1] // load length into seed
601
602 MOVD $runtime·aeskeysched+0(SB), R4
603 VLD1.P 16(R4), [V0.B16]
604 AESE V30.B16, V0.B16
605 AESMC V0.B16, V0.B16
606 CMP $16, R2
607 BLO aes0to15
608 BEQ aes16
609 CMP $32, R2
610 BLS aes17to32
611 CMP $64, R2
612 BLS aes33to64
613 CMP $128, R2
614 BLS aes65to128
615 B aes129plus
616
617aes0to15:
618 CBZ R2, aes0
619 VEOR V2.B16, V2.B16, V2.B16
620 TBZ $3, R2, less_than_8
621 VLD1.P 8(R0), V2.D[0]
622
623less_than_8:
624 TBZ $2, R2, less_than_4
625 VLD1.P 4(R0), V2.S[2]
626
627less_than_4:
628 TBZ $1, R2, less_than_2
629 VLD1.P 2(R0), V2.H[6]
630
631less_than_2:
632 TBZ $0, R2, done
633 VLD1 (R0), V2.B[14]
634done:
635 AESE V0.B16, V2.B16
636 AESMC V2.B16, V2.B16
637 AESE V0.B16, V2.B16
638 AESMC V2.B16, V2.B16
639 AESE V0.B16, V2.B16
640 AESMC V2.B16, V2.B16
641
642 VMOV V2.D[0], R0
643 RET
644
645aes0:
646 VMOV V0.D[0], R0
647 RET
648
649aes16:
650 VLD1 (R0), [V2.B16]
651 B done
652
653aes17to32:
654 // make second seed
655 VLD1 (R4), [V1.B16]
656 AESE V30.B16, V1.B16
657 AESMC V1.B16, V1.B16
658 SUB $16, R2, R10
659 VLD1.P (R0)(R10), [V2.B16]
660 VLD1 (R0), [V3.B16]
661
662 AESE V0.B16, V2.B16
663 AESMC V2.B16, V2.B16
664 AESE V1.B16, V3.B16
665 AESMC V3.B16, V3.B16
666
667 AESE V0.B16, V2.B16
668 AESMC V2.B16, V2.B16
669 AESE V1.B16, V3.B16
670 AESMC V3.B16, V3.B16
671
672 AESE V0.B16, V2.B16
673 AESE V1.B16, V3.B16
674
675 VEOR V3.B16, V2.B16, V2.B16
676
677 VMOV V2.D[0], R0
678 RET
679
680aes33to64:
681 VLD1 (R4), [V1.B16, V2.B16, V3.B16]
682 AESE V30.B16, V1.B16
683 AESMC V1.B16, V1.B16
684 AESE V30.B16, V2.B16
685 AESMC V2.B16, V2.B16
686 AESE V30.B16, V3.B16
687 AESMC V3.B16, V3.B16
688 SUB $32, R2, R10
689
690 VLD1.P (R0)(R10), [V4.B16, V5.B16]
691 VLD1 (R0), [V6.B16, V7.B16]
692
693 AESE V0.B16, V4.B16
694 AESMC V4.B16, V4.B16
695 AESE V1.B16, V5.B16
696 AESMC V5.B16, V5.B16
697 AESE V2.B16, V6.B16
698 AESMC V6.B16, V6.B16
699 AESE V3.B16, V7.B16
700 AESMC V7.B16, V7.B16
701
702 AESE V0.B16, V4.B16
703 AESMC V4.B16, V4.B16
704 AESE V1.B16, V5.B16
705 AESMC V5.B16, V5.B16
706 AESE V2.B16, V6.B16
707 AESMC V6.B16, V6.B16
708 AESE V3.B16, V7.B16
709 AESMC V7.B16, V7.B16
710
711 AESE V0.B16, V4.B16
712 AESE V1.B16, V5.B16
713 AESE V2.B16, V6.B16
714 AESE V3.B16, V7.B16
715
716 VEOR V6.B16, V4.B16, V4.B16
717 VEOR V7.B16, V5.B16, V5.B16
718 VEOR V5.B16, V4.B16, V4.B16
719
720 VMOV V4.D[0], R0
721 RET
722
723aes65to128:
724 VLD1.P 64(R4), [V1.B16, V2.B16, V3.B16, V4.B16]
725 VLD1 (R4), [V5.B16, V6.B16, V7.B16]
726 AESE V30.B16, V1.B16
727 AESMC V1.B16, V1.B16
728 AESE V30.B16, V2.B16
729 AESMC V2.B16, V2.B16
730 AESE V30.B16, V3.B16
731 AESMC V3.B16, V3.B16
732 AESE V30.B16, V4.B16
733 AESMC V4.B16, V4.B16
734 AESE V30.B16, V5.B16
735 AESMC V5.B16, V5.B16
736 AESE V30.B16, V6.B16
737 AESMC V6.B16, V6.B16
738 AESE V30.B16, V7.B16
739 AESMC V7.B16, V7.B16
740
741 SUB $64, R2, R10
742 VLD1.P (R0)(R10), [V8.B16, V9.B16, V10.B16, V11.B16]
743 VLD1 (R0), [V12.B16, V13.B16, V14.B16, V15.B16]
744 AESE V0.B16, V8.B16
745 AESMC V8.B16, V8.B16
746 AESE V1.B16, V9.B16
747 AESMC V9.B16, V9.B16
748 AESE V2.B16, V10.B16
749 AESMC V10.B16, V10.B16
750 AESE V3.B16, V11.B16
751 AESMC V11.B16, V11.B16
752 AESE V4.B16, V12.B16
753 AESMC V12.B16, V12.B16
754 AESE V5.B16, V13.B16
755 AESMC V13.B16, V13.B16
756 AESE V6.B16, V14.B16
757 AESMC V14.B16, V14.B16
758 AESE V7.B16, V15.B16
759 AESMC V15.B16, V15.B16
760
761 AESE V0.B16, V8.B16
762 AESMC V8.B16, V8.B16
763 AESE V1.B16, V9.B16
764 AESMC V9.B16, V9.B16
765 AESE V2.B16, V10.B16
766 AESMC V10.B16, V10.B16
767 AESE V3.B16, V11.B16
768 AESMC V11.B16, V11.B16
769 AESE V4.B16, V12.B16
770 AESMC V12.B16, V12.B16
771 AESE V5.B16, V13.B16
772 AESMC V13.B16, V13.B16
773 AESE V6.B16, V14.B16
774 AESMC V14.B16, V14.B16
775 AESE V7.B16, V15.B16
776 AESMC V15.B16, V15.B16
777
778 AESE V0.B16, V8.B16
779 AESE V1.B16, V9.B16
780 AESE V2.B16, V10.B16
781 AESE V3.B16, V11.B16
782 AESE V4.B16, V12.B16
783 AESE V5.B16, V13.B16
784 AESE V6.B16, V14.B16
785 AESE V7.B16, V15.B16
786
787 VEOR V12.B16, V8.B16, V8.B16
788 VEOR V13.B16, V9.B16, V9.B16
789 VEOR V14.B16, V10.B16, V10.B16
790 VEOR V15.B16, V11.B16, V11.B16
791 VEOR V10.B16, V8.B16, V8.B16
792 VEOR V11.B16, V9.B16, V9.B16
793 VEOR V9.B16, V8.B16, V8.B16
794
795 VMOV V8.D[0], R0
796 RET
797
798aes129plus:
799 PRFM (R0), PLDL1KEEP
800 VLD1.P 64(R4), [V1.B16, V2.B16, V3.B16, V4.B16]
801 VLD1 (R4), [V5.B16, V6.B16, V7.B16]
802 AESE V30.B16, V1.B16
803 AESMC V1.B16, V1.B16
804 AESE V30.B16, V2.B16
805 AESMC V2.B16, V2.B16
806 AESE V30.B16, V3.B16
807 AESMC V3.B16, V3.B16
808 AESE V30.B16, V4.B16
809 AESMC V4.B16, V4.B16
810 AESE V30.B16, V5.B16
811 AESMC V5.B16, V5.B16
812 AESE V30.B16, V6.B16
813 AESMC V6.B16, V6.B16
814 AESE V30.B16, V7.B16
815 AESMC V7.B16, V7.B16
816 ADD R0, R2, R10
817 SUB $128, R10, R10
818 VLD1.P 64(R10), [V8.B16, V9.B16, V10.B16, V11.B16]
819 VLD1 (R10), [V12.B16, V13.B16, V14.B16, V15.B16]
820 SUB $1, R2, R2
821 LSR $7, R2, R2
822
823aesloop:
824 AESE V8.B16, V0.B16
825 AESMC V0.B16, V0.B16
826 AESE V9.B16, V1.B16
827 AESMC V1.B16, V1.B16
828 AESE V10.B16, V2.B16
829 AESMC V2.B16, V2.B16
830 AESE V11.B16, V3.B16
831 AESMC V3.B16, V3.B16
832 AESE V12.B16, V4.B16
833 AESMC V4.B16, V4.B16
834 AESE V13.B16, V5.B16
835 AESMC V5.B16, V5.B16
836 AESE V14.B16, V6.B16
837 AESMC V6.B16, V6.B16
838 AESE V15.B16, V7.B16
839 AESMC V7.B16, V7.B16
840
841 VLD1.P 64(R0), [V8.B16, V9.B16, V10.B16, V11.B16]
842 AESE V8.B16, V0.B16
843 AESMC V0.B16, V0.B16
844 AESE V9.B16, V1.B16
845 AESMC V1.B16, V1.B16
846 AESE V10.B16, V2.B16
847 AESMC V2.B16, V2.B16
848 AESE V11.B16, V3.B16
849 AESMC V3.B16, V3.B16
850
851 VLD1.P 64(R0), [V12.B16, V13.B16, V14.B16, V15.B16]
852 AESE V12.B16, V4.B16
853 AESMC V4.B16, V4.B16
854 AESE V13.B16, V5.B16
855 AESMC V5.B16, V5.B16
856 AESE V14.B16, V6.B16
857 AESMC V6.B16, V6.B16
858 AESE V15.B16, V7.B16
859 AESMC V7.B16, V7.B16
860 SUB $1, R2, R2
861 CBNZ R2, aesloop
862
863 AESE V8.B16, V0.B16
864 AESMC V0.B16, V0.B16
865 AESE V9.B16, V1.B16
866 AESMC V1.B16, V1.B16
867 AESE V10.B16, V2.B16
868 AESMC V2.B16, V2.B16
869 AESE V11.B16, V3.B16
870 AESMC V3.B16, V3.B16
871 AESE V12.B16, V4.B16
872 AESMC V4.B16, V4.B16
873 AESE V13.B16, V5.B16
874 AESMC V5.B16, V5.B16
875 AESE V14.B16, V6.B16
876 AESMC V6.B16, V6.B16
877 AESE V15.B16, V7.B16
878 AESMC V7.B16, V7.B16
879
880 AESE V8.B16, V0.B16
881 AESMC V0.B16, V0.B16
882 AESE V9.B16, V1.B16
883 AESMC V1.B16, V1.B16
884 AESE V10.B16, V2.B16
885 AESMC V2.B16, V2.B16
886 AESE V11.B16, V3.B16
887 AESMC V3.B16, V3.B16
888 AESE V12.B16, V4.B16
889 AESMC V4.B16, V4.B16
890 AESE V13.B16, V5.B16
891 AESMC V5.B16, V5.B16
892 AESE V14.B16, V6.B16
893 AESMC V6.B16, V6.B16
894 AESE V15.B16, V7.B16
895 AESMC V7.B16, V7.B16
896
897 AESE V8.B16, V0.B16
898 AESE V9.B16, V1.B16
899 AESE V10.B16, V2.B16
900 AESE V11.B16, V3.B16
901 AESE V12.B16, V4.B16
902 AESE V13.B16, V5.B16
903 AESE V14.B16, V6.B16
904 AESE V15.B16, V7.B16
905
906 VEOR V0.B16, V1.B16, V0.B16
907 VEOR V2.B16, V3.B16, V2.B16
908 VEOR V4.B16, V5.B16, V4.B16
909 VEOR V6.B16, V7.B16, V6.B16
910 VEOR V0.B16, V2.B16, V0.B16
911 VEOR V4.B16, V6.B16, V4.B16
912 VEOR V4.B16, V0.B16, V0.B16
913
914 VMOV V0.D[0], R0
915 RET
916
917TEXT runtime·procyield(SB),NOSPLIT,$0-0
918 MOVWU cycles+0(FP), R0
919again:
920 YIELD
921 SUBW $1, R0
922 CBNZ R0, again
923 RET
924
925// Save state of caller into g->sched,
926// but using fake PC from systemstack_switch.
927// Must only be called from functions with no locals ($0)
928// or else unwinding from systemstack_switch is incorrect.
929// Smashes R0.
930TEXT gosave_systemstack_switch<>(SB),NOSPLIT|NOFRAME,$0
931 MOVD $runtime·systemstack_switch(SB), R0
932 ADD $8, R0 // get past prologue
933 MOVD R0, (g_sched+gobuf_pc)(g)
934 MOVD RSP, R0
935 MOVD R0, (g_sched+gobuf_sp)(g)
936 MOVD R29, (g_sched+gobuf_bp)(g)
937 MOVD $0, (g_sched+gobuf_lr)(g)
938 MOVD $0, (g_sched+gobuf_ret)(g)
939 // Assert ctxt is zero. See func save.
940 MOVD (g_sched+gobuf_ctxt)(g), R0
941 CBZ R0, 2(PC)
942 CALL runtime·abort(SB)
943 RET
944
945// func asmcgocall_no_g(fn, arg unsafe.Pointer)
946// Call fn(arg) aligned appropriately for the gcc ABI.
947// Called on a system stack, and there may be no g yet (during needm).
948TEXT ·asmcgocall_no_g(SB),NOSPLIT,$0-16
949 MOVD fn+0(FP), R1
950 MOVD arg+8(FP), R0
951 SUB $16, RSP // skip over saved frame pointer below RSP
952 BL (R1)
953 ADD $16, RSP // skip over saved frame pointer below RSP
954 RET
955
956// func asmcgocall(fn, arg unsafe.Pointer) int32
957// Call fn(arg) on the scheduler stack,
958// aligned appropriately for the gcc ABI.
959// See cgocall.go for more details.
960TEXT ·asmcgocall(SB),NOSPLIT,$0-20
961 MOVD fn+0(FP), R1
962 MOVD arg+8(FP), R0
963
964 MOVD RSP, R2 // save original stack pointer
965 CBZ g, nosave
966 MOVD g, R4
967
968 // Figure out if we need to switch to m->g0 stack.
969 // We get called to create new OS threads too, and those
970 // come in on the m->g0 stack already. Or we might already
971 // be on the m->gsignal stack.
972 MOVD g_m(g), R8
973 MOVD m_gsignal(R8), R3
974 CMP R3, g
975 BEQ nosave
976 MOVD m_g0(R8), R3
977 CMP R3, g
978 BEQ nosave
979
980 // Switch to system stack.
981 MOVD R0, R9 // gosave_systemstack_switch<> and save_g might clobber R0
982 BL gosave_systemstack_switch<>(SB)
983 MOVD R3, g
984 BL runtime·save_g(SB)
985 MOVD (g_sched+gobuf_sp)(g), R0
986 MOVD R0, RSP
987 MOVD (g_sched+gobuf_bp)(g), R29
988 MOVD R9, R0
989
990 // Now on a scheduling stack (a pthread-created stack).
991 // Save room for two of our pointers /*, plus 32 bytes of callee
992 // save area that lives on the caller stack. */
993 MOVD RSP, R13
994 SUB $16, R13
995 MOVD R13, RSP
996 MOVD R4, 0(RSP) // save old g on stack
997 MOVD (g_stack+stack_hi)(R4), R4
998 SUB R2, R4
999 MOVD R4, 8(RSP) // save depth in old g stack (can't just save SP, as stack might be copied during a callback)
1000 BL (R1)
1001 MOVD R0, R9
1002
1003 // Restore g, stack pointer. R0 is errno, so don't touch it
1004 MOVD 0(RSP), g
1005 BL runtime·save_g(SB)
1006 MOVD (g_stack+stack_hi)(g), R5
1007 MOVD 8(RSP), R6
1008 SUB R6, R5
1009 MOVD R9, R0
1010 MOVD R5, RSP
1011
1012 MOVW R0, ret+16(FP)
1013 RET
1014
1015nosave:
1016 // Running on a system stack, perhaps even without a g.
1017 // Having no g can happen during thread creation or thread teardown
1018 // (see needm/dropm on Solaris, for example).
1019 // This code is like the above sequence but without saving/restoring g
1020 // and without worrying about the stack moving out from under us
1021 // (because we're on a system stack, not a goroutine stack).
1022 // The above code could be used directly if already on a system stack,
1023 // but then the only path through this code would be a rare case on Solaris.
1024 // Using this code for all "already on system stack" calls exercises it more,
1025 // which should help keep it correct.
1026 MOVD RSP, R13
1027 SUB $16, R13
1028 MOVD R13, RSP
1029 MOVD $0, R4
1030 MOVD R4, 0(RSP) // Where above code stores g, in case someone looks during debugging.
1031 MOVD R2, 8(RSP) // Save original stack pointer.
1032 BL (R1)
1033 // Restore stack pointer.
1034 MOVD 8(RSP), R2
1035 MOVD R2, RSP
1036 MOVD R0, ret+16(FP)
1037 RET
1038
1039// cgocallback(fn, frame unsafe.Pointer, ctxt uintptr)
1040// See cgocall.go for more details.
1041TEXT ·cgocallback(SB),NOSPLIT,$24-24
1042 NO_LOCAL_POINTERS
1043
1044 // Skip cgocallbackg, just dropm when fn is nil, and frame is the saved g.
1045 // It is used to dropm while thread is exiting.
1046 MOVD fn+0(FP), R1
1047 CBNZ R1, loadg
1048 // Restore the g from frame.
1049 MOVD frame+8(FP), g
1050 B dropm
1051
1052loadg:
1053 // Load g from thread-local storage.
1054 BL runtime·load_g(SB)
1055
1056 // If g is nil, Go did not create the current thread,
1057 // or if this thread never called into Go on pthread platforms.
1058 // Call needm to obtain one for temporary use.
1059 // In this case, we're running on the thread stack, so there's
1060 // lots of space, but the linker doesn't know. Hide the call from
1061 // the linker analysis by using an indirect call.
1062 CBZ g, needm
1063
1064 MOVD g_m(g), R8
1065 MOVD R8, savedm-8(SP)
1066 B havem
1067
1068needm:
1069 MOVD g, savedm-8(SP) // g is zero, so is m.
1070 MOVD $runtime·needAndBindM(SB), R0
1071 BL (R0)
1072
1073 // Set m->g0->sched.sp = SP, so that if a panic happens
1074 // during the function we are about to execute, it will
1075 // have a valid SP to run on the g0 stack.
1076 // The next few lines (after the havem label)
1077 // will save this SP onto the stack and then write
1078 // the same SP back to m->sched.sp. That seems redundant,
1079 // but if an unrecovered panic happens, unwindm will
1080 // restore the g->sched.sp from the stack location
1081 // and then systemstack will try to use it. If we don't set it here,
1082 // that restored SP will be uninitialized (typically 0) and
1083 // will not be usable.
1084 MOVD g_m(g), R8
1085 MOVD m_g0(R8), R3
1086 MOVD RSP, R0
1087 MOVD R0, (g_sched+gobuf_sp)(R3)
1088 MOVD R29, (g_sched+gobuf_bp)(R3)
1089
1090havem:
1091 // Now there's a valid m, and we're running on its m->g0.
1092 // Save current m->g0->sched.sp on stack and then set it to SP.
1093 // Save current sp in m->g0->sched.sp in preparation for
1094 // switch back to m->curg stack.
1095 // NOTE: unwindm knows that the saved g->sched.sp is at 16(RSP) aka savedsp-16(SP).
1096 // Beware that the frame size is actually 32+16.
1097 MOVD m_g0(R8), R3
1098 MOVD (g_sched+gobuf_sp)(R3), R4
1099 MOVD R4, savedsp-16(SP)
1100 MOVD RSP, R0
1101 MOVD R0, (g_sched+gobuf_sp)(R3)
1102
1103 // Switch to m->curg stack and call runtime.cgocallbackg.
1104 // Because we are taking over the execution of m->curg
1105 // but *not* resuming what had been running, we need to
1106 // save that information (m->curg->sched) so we can restore it.
1107 // We can restore m->curg->sched.sp easily, because calling
1108 // runtime.cgocallbackg leaves SP unchanged upon return.
1109 // To save m->curg->sched.pc, we push it onto the curg stack and
1110 // open a frame the same size as cgocallback's g0 frame.
1111 // Once we switch to the curg stack, the pushed PC will appear
1112 // to be the return PC of cgocallback, so that the traceback
1113 // will seamlessly trace back into the earlier calls.
1114 MOVD m_curg(R8), g
1115 BL runtime·save_g(SB)
1116 MOVD (g_sched+gobuf_sp)(g), R4 // prepare stack as R4
1117 MOVD (g_sched+gobuf_pc)(g), R5
1118 MOVD R5, -48(R4)
1119 MOVD (g_sched+gobuf_bp)(g), R5
1120 MOVD R5, -56(R4)
1121 // Gather our arguments into registers.
1122 MOVD fn+0(FP), R1
1123 MOVD frame+8(FP), R2
1124 MOVD ctxt+16(FP), R3
1125 MOVD $-48(R4), R0 // maintain 16-byte SP alignment
1126 MOVD R0, RSP // switch stack
1127 MOVD R1, 8(RSP)
1128 MOVD R2, 16(RSP)
1129 MOVD R3, 24(RSP)
1130 MOVD $runtime·cgocallbackg(SB), R0
1131 CALL (R0) // indirect call to bypass nosplit check. We're on a different stack now.
1132
1133 // Restore g->sched (== m->curg->sched) from saved values.
1134 MOVD 0(RSP), R5
1135 MOVD R5, (g_sched+gobuf_pc)(g)
1136 MOVD RSP, R4
1137 ADD $48, R4, R4
1138 MOVD R4, (g_sched+gobuf_sp)(g)
1139
1140 // Switch back to m->g0's stack and restore m->g0->sched.sp.
1141 // (Unlike m->curg, the g0 goroutine never uses sched.pc,
1142 // so we do not have to restore it.)
1143 MOVD g_m(g), R8
1144 MOVD m_g0(R8), g
1145 BL runtime·save_g(SB)
1146 MOVD (g_sched+gobuf_sp)(g), R0
1147 MOVD R0, RSP
1148 MOVD savedsp-16(SP), R4
1149 MOVD R4, (g_sched+gobuf_sp)(g)
1150
1151 // If the m on entry was nil, we called needm above to borrow an m,
1152 // 1. for the duration of the call on non-pthread platforms,
1153 // 2. or the duration of the C thread alive on pthread platforms.
1154 // If the m on entry wasn't nil,
1155 // 1. the thread might be a Go thread,
1156 // 2. or it wasn't the first call from a C thread on pthread platforms,
1157 // since then we skip dropm to reuse the m in the first call.
1158 MOVD savedm-8(SP), R6
1159 CBNZ R6, droppedm
1160
1161 // Skip dropm to reuse it in the next call, when a pthread key has been created.
1162 MOVD _cgo_pthread_key_created(SB), R6
1163 // It means cgo is disabled when _cgo_pthread_key_created is a nil pointer, need dropm.
1164 CBZ R6, dropm
1165 MOVD (R6), R6
1166 CBNZ R6, droppedm
1167
1168dropm:
1169 MOVD $runtime·dropm(SB), R0
1170 BL (R0)
1171droppedm:
1172
1173 // Done!
1174 RET
1175
1176// Called from cgo wrappers, this function returns g->m->curg.stack.hi.
1177// Must obey the gcc calling convention.
1178TEXT _cgo_topofstack(SB),NOSPLIT,$24
1179 // g (R28) and REGTMP (R27) might be clobbered by load_g. They
1180 // are callee-save in the gcc calling convention, so save them.
1181 MOVD R27, savedR27-8(SP)
1182 MOVD g, saveG-16(SP)
1183
1184 BL runtime·load_g(SB)
1185 MOVD g_m(g), R0
1186 MOVD m_curg(R0), R0
1187 MOVD (g_stack+stack_hi)(R0), R0
1188
1189 MOVD saveG-16(SP), g
1190 MOVD savedR28-8(SP), R27
1191 RET
1192
1193// void setg(G*); set g. for use by needm.
1194TEXT runtime·setg(SB), NOSPLIT, $0-8
1195 MOVD gg+0(FP), g
1196 // This only happens if iscgo, so jump straight to save_g
1197 BL runtime·save_g(SB)
1198 RET
1199
1200// void setg_gcc(G*); set g called from gcc
1201TEXT setg_gcc<>(SB),NOSPLIT,$8
1202 MOVD R0, g
1203 MOVD R27, savedR27-8(SP)
1204 BL runtime·save_g(SB)
1205 MOVD savedR27-8(SP), R27
1206 RET
1207
1208TEXT runtime·emptyfunc(SB),0,$0-0
1209 RET
1210
1211TEXT runtime·abort(SB),NOSPLIT|NOFRAME,$0-0
1212 MOVD ZR, R0
1213 MOVD (R0), R0
1214 UNDEF
1215
1216TEXT runtime·return0(SB), NOSPLIT, $0
1217 MOVW $0, R0
1218 RET
1219
1220// The top-most function running on a goroutine
1221// returns to goexit+PCQuantum.
1222TEXT runtime·goexit(SB),NOSPLIT|NOFRAME|TOPFRAME,$0-0
1223 MOVD R0, R0 // NOP
1224 BL runtime·goexit1(SB) // does not return
1225
1226// This is called from .init_array and follows the platform, not Go, ABI.
1227TEXT runtime·addmoduledata(SB),NOSPLIT,$0-0
1228 SUB $0x10, RSP
1229 MOVD R27, 8(RSP) // The access to global variables below implicitly uses R27, which is callee-save
1230 MOVD runtime·lastmoduledatap(SB), R1
1231 MOVD R0, moduledata_next(R1)
1232 MOVD R0, runtime·lastmoduledatap(SB)
1233 MOVD 8(RSP), R27
1234 ADD $0x10, RSP
1235 RET
1236
1237TEXT ·checkASM(SB),NOSPLIT,$0-1
1238 MOVW $1, R3
1239 MOVB R3, ret+0(FP)
1240 RET
1241
1242// gcWriteBarrier informs the GC about heap pointer writes.
1243//
1244// gcWriteBarrier does NOT follow the Go ABI. It accepts the
1245// number of bytes of buffer needed in R25, and returns a pointer
1246// to the buffer space in R25.
1247// It clobbers condition codes.
1248// It does not clobber any general-purpose registers except R27,
1249// but may clobber others (e.g., floating point registers)
1250// The act of CALLing gcWriteBarrier will clobber R30 (LR).
1251TEXT gcWriteBarrier<>(SB),NOSPLIT,$200
1252 // Save the registers clobbered by the fast path.
1253 STP (R0, R1), 184(RSP)
1254retry:
1255 MOVD g_m(g), R0
1256 MOVD m_p(R0), R0
1257 MOVD (p_wbBuf+wbBuf_next)(R0), R1
1258 MOVD (p_wbBuf+wbBuf_end)(R0), R27
1259 // Increment wbBuf.next position.
1260 ADD R25, R1
1261 // Is the buffer full?
1262 CMP R27, R1
1263 BHI flush
1264 // Commit to the larger buffer.
1265 MOVD R1, (p_wbBuf+wbBuf_next)(R0)
1266 // Make return value (the original next position)
1267 SUB R25, R1, R25
1268 // Restore registers.
1269 LDP 184(RSP), (R0, R1)
1270 RET
1271
1272flush:
1273 // Save all general purpose registers since these could be
1274 // clobbered by wbBufFlush and were not saved by the caller.
1275 // R0 and R1 already saved
1276 STP (R2, R3), 1*8(RSP)
1277 STP (R4, R5), 3*8(RSP)
1278 STP (R6, R7), 5*8(RSP)
1279 STP (R8, R9), 7*8(RSP)
1280 STP (R10, R11), 9*8(RSP)
1281 STP (R12, R13), 11*8(RSP)
1282 STP (R14, R15), 13*8(RSP)
1283 // R16, R17 may be clobbered by linker trampoline
1284 // R18 is unused.
1285 STP (R19, R20), 15*8(RSP)
1286 STP (R21, R22), 17*8(RSP)
1287 STP (R23, R24), 19*8(RSP)
1288 STP (R25, R26), 21*8(RSP)
1289 // R27 is temp register.
1290 // R28 is g.
1291 // R29 is frame pointer (unused).
1292 // R30 is LR, which was saved by the prologue.
1293 // R31 is SP.
1294
1295 CALL runtime·wbBufFlush(SB)
1296 LDP 1*8(RSP), (R2, R3)
1297 LDP 3*8(RSP), (R4, R5)
1298 LDP 5*8(RSP), (R6, R7)
1299 LDP 7*8(RSP), (R8, R9)
1300 LDP 9*8(RSP), (R10, R11)
1301 LDP 11*8(RSP), (R12, R13)
1302 LDP 13*8(RSP), (R14, R15)
1303 LDP 15*8(RSP), (R19, R20)
1304 LDP 17*8(RSP), (R21, R22)
1305 LDP 19*8(RSP), (R23, R24)
1306 LDP 21*8(RSP), (R25, R26)
1307 JMP retry
1308
1309TEXT runtime·gcWriteBarrier1<ABIInternal>(SB),NOSPLIT,$0
1310 MOVD $8, R25
1311 JMP gcWriteBarrier<>(SB)
1312TEXT runtime·gcWriteBarrier2<ABIInternal>(SB),NOSPLIT,$0
1313 MOVD $16, R25
1314 JMP gcWriteBarrier<>(SB)
1315TEXT runtime·gcWriteBarrier3<ABIInternal>(SB),NOSPLIT,$0
1316 MOVD $24, R25
1317 JMP gcWriteBarrier<>(SB)
1318TEXT runtime·gcWriteBarrier4<ABIInternal>(SB),NOSPLIT,$0
1319 MOVD $32, R25
1320 JMP gcWriteBarrier<>(SB)
1321TEXT runtime·gcWriteBarrier5<ABIInternal>(SB),NOSPLIT,$0
1322 MOVD $40, R25
1323 JMP gcWriteBarrier<>(SB)
1324TEXT runtime·gcWriteBarrier6<ABIInternal>(SB),NOSPLIT,$0
1325 MOVD $48, R25
1326 JMP gcWriteBarrier<>(SB)
1327TEXT runtime·gcWriteBarrier7<ABIInternal>(SB),NOSPLIT,$0
1328 MOVD $56, R25
1329 JMP gcWriteBarrier<>(SB)
1330TEXT runtime·gcWriteBarrier8<ABIInternal>(SB),NOSPLIT,$0
1331 MOVD $64, R25
1332 JMP gcWriteBarrier<>(SB)
1333
1334DATA debugCallFrameTooLarge<>+0x00(SB)/20, $"call frame too large"
1335GLOBL debugCallFrameTooLarge<>(SB), RODATA, $20 // Size duplicated below
1336
1337// debugCallV2 is the entry point for debugger-injected function
1338// calls on running goroutines. It informs the runtime that a
1339// debug call has been injected and creates a call frame for the
1340// debugger to fill in.
1341//
1342// To inject a function call, a debugger should:
1343// 1. Check that the goroutine is in state _Grunning and that
1344// there are at least 288 bytes free on the stack.
1345// 2. Set SP as SP-16.
1346// 3. Store the current LR in (SP) (using the SP after step 2).
1347// 4. Store the current PC in the LR register.
1348// 5. Write the desired argument frame size at SP-16
1349// 6. Save all machine registers (including flags and fpsimd registers)
1350// so they can be restored later by the debugger.
1351// 7. Set the PC to debugCallV2 and resume execution.
1352//
1353// If the goroutine is in state _Grunnable, then it's not generally
1354// safe to inject a call because it may return out via other runtime
1355// operations. Instead, the debugger should unwind the stack to find
1356// the return to non-runtime code, add a temporary breakpoint there,
1357// and inject the call once that breakpoint is hit.
1358//
1359// If the goroutine is in any other state, it's not safe to inject a call.
1360//
1361// This function communicates back to the debugger by setting R20 and
1362// invoking BRK to raise a breakpoint signal. Note that the signal PC of
1363// the signal triggered by the BRK instruction is the PC where the signal
1364// is trapped, not the next PC, so to resume execution, the debugger needs
1365// to set the signal PC to PC+4. See the comments in the implementation for
1366// the protocol the debugger is expected to follow. InjectDebugCall in the
1367// runtime tests demonstrates this protocol.
1368//
1369// The debugger must ensure that any pointers passed to the function
1370// obey escape analysis requirements. Specifically, it must not pass
1371// a stack pointer to an escaping argument. debugCallV2 cannot check
1372// this invariant.
1373//
1374// This is ABIInternal because Go code injects its PC directly into new
1375// goroutine stacks.
1376TEXT runtime·debugCallV2<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-0
1377 STP (R29, R30), -280(RSP)
1378 SUB $272, RSP, RSP
1379 SUB $8, RSP, R29
1380 // Save all registers that may contain pointers so they can be
1381 // conservatively scanned.
1382 //
1383 // We can't do anything that might clobber any of these
1384 // registers before this.
1385 STP (R27, g), (30*8)(RSP)
1386 STP (R25, R26), (28*8)(RSP)
1387 STP (R23, R24), (26*8)(RSP)
1388 STP (R21, R22), (24*8)(RSP)
1389 STP (R19, R20), (22*8)(RSP)
1390 STP (R16, R17), (20*8)(RSP)
1391 STP (R14, R15), (18*8)(RSP)
1392 STP (R12, R13), (16*8)(RSP)
1393 STP (R10, R11), (14*8)(RSP)
1394 STP (R8, R9), (12*8)(RSP)
1395 STP (R6, R7), (10*8)(RSP)
1396 STP (R4, R5), (8*8)(RSP)
1397 STP (R2, R3), (6*8)(RSP)
1398 STP (R0, R1), (4*8)(RSP)
1399
1400 // Perform a safe-point check.
1401 MOVD R30, 8(RSP) // Caller's PC
1402 CALL runtime·debugCallCheck(SB)
1403 MOVD 16(RSP), R0
1404 CBZ R0, good
1405
1406 // The safety check failed. Put the reason string at the top
1407 // of the stack.
1408 MOVD R0, 8(RSP)
1409 MOVD 24(RSP), R0
1410 MOVD R0, 16(RSP)
1411
1412 // Set R20 to 8 and invoke BRK. The debugger should get the
1413 // reason a call can't be injected from SP+8 and resume execution.
1414 MOVD $8, R20
1415 BREAK
1416 JMP restore
1417
1418good:
1419 // Registers are saved and it's safe to make a call.
1420 // Open up a call frame, moving the stack if necessary.
1421 //
1422 // Once the frame is allocated, this will set R20 to 0 and
1423 // invoke BRK. The debugger should write the argument
1424 // frame for the call at SP+8, set up argument registers,
1425 // set the LR as the signal PC + 4, set the PC to the function
1426 // to call, set R26 to point to the closure (if a closure call),
1427 // and resume execution.
1428 //
1429 // If the function returns, this will set R20 to 1 and invoke
1430 // BRK. The debugger can then inspect any return value saved
1431 // on the stack at SP+8 and in registers. To resume execution,
1432 // the debugger should restore the LR from (SP).
1433 //
1434 // If the function panics, this will set R20 to 2 and invoke BRK.
1435 // The interface{} value of the panic will be at SP+8. The debugger
1436 // can inspect the panic value and resume execution again.
1437#define DEBUG_CALL_DISPATCH(NAME,MAXSIZE) \
1438 CMP $MAXSIZE, R0; \
1439 BGT 5(PC); \
1440 MOVD $NAME(SB), R0; \
1441 MOVD R0, 8(RSP); \
1442 CALL runtime·debugCallWrap(SB); \
1443 JMP restore
1444
1445 MOVD 256(RSP), R0 // the argument frame size
1446 DEBUG_CALL_DISPATCH(debugCall32<>, 32)
1447 DEBUG_CALL_DISPATCH(debugCall64<>, 64)
1448 DEBUG_CALL_DISPATCH(debugCall128<>, 128)
1449 DEBUG_CALL_DISPATCH(debugCall256<>, 256)
1450 DEBUG_CALL_DISPATCH(debugCall512<>, 512)
1451 DEBUG_CALL_DISPATCH(debugCall1024<>, 1024)
1452 DEBUG_CALL_DISPATCH(debugCall2048<>, 2048)
1453 DEBUG_CALL_DISPATCH(debugCall4096<>, 4096)
1454 DEBUG_CALL_DISPATCH(debugCall8192<>, 8192)
1455 DEBUG_CALL_DISPATCH(debugCall16384<>, 16384)
1456 DEBUG_CALL_DISPATCH(debugCall32768<>, 32768)
1457 DEBUG_CALL_DISPATCH(debugCall65536<>, 65536)
1458 // The frame size is too large. Report the error.
1459 MOVD $debugCallFrameTooLarge<>(SB), R0
1460 MOVD R0, 8(RSP)
1461 MOVD $20, R0
1462 MOVD R0, 16(RSP) // length of debugCallFrameTooLarge string
1463 MOVD $8, R20
1464 BREAK
1465 JMP restore
1466
1467restore:
1468 // Calls and failures resume here.
1469 //
1470 // Set R20 to 16 and invoke BRK. The debugger should restore
1471 // all registers except for PC and RSP and resume execution.
1472 MOVD $16, R20
1473 BREAK
1474 // We must not modify flags after this point.
1475
1476 // Restore pointer-containing registers, which may have been
1477 // modified from the debugger's copy by stack copying.
1478 LDP (30*8)(RSP), (R27, g)
1479 LDP (28*8)(RSP), (R25, R26)
1480 LDP (26*8)(RSP), (R23, R24)
1481 LDP (24*8)(RSP), (R21, R22)
1482 LDP (22*8)(RSP), (R19, R20)
1483 LDP (20*8)(RSP), (R16, R17)
1484 LDP (18*8)(RSP), (R14, R15)
1485 LDP (16*8)(RSP), (R12, R13)
1486 LDP (14*8)(RSP), (R10, R11)
1487 LDP (12*8)(RSP), (R8, R9)
1488 LDP (10*8)(RSP), (R6, R7)
1489 LDP (8*8)(RSP), (R4, R5)
1490 LDP (6*8)(RSP), (R2, R3)
1491 LDP (4*8)(RSP), (R0, R1)
1492
1493 LDP -8(RSP), (R29, R27)
1494 ADD $288, RSP, RSP // Add 16 more bytes, see saveSigContext
1495 MOVD -16(RSP), R30 // restore old lr
1496 JMP (R27)
1497
1498// runtime.debugCallCheck assumes that functions defined with the
1499// DEBUG_CALL_FN macro are safe points to inject calls.
1500#define DEBUG_CALL_FN(NAME,MAXSIZE) \
1501TEXT NAME(SB),WRAPPER,$MAXSIZE-0; \
1502 NO_LOCAL_POINTERS; \
1503 MOVD $0, R20; \
1504 BREAK; \
1505 MOVD $1, R20; \
1506 BREAK; \
1507 RET
1508DEBUG_CALL_FN(debugCall32<>, 32)
1509DEBUG_CALL_FN(debugCall64<>, 64)
1510DEBUG_CALL_FN(debugCall128<>, 128)
1511DEBUG_CALL_FN(debugCall256<>, 256)
1512DEBUG_CALL_FN(debugCall512<>, 512)
1513DEBUG_CALL_FN(debugCall1024<>, 1024)
1514DEBUG_CALL_FN(debugCall2048<>, 2048)
1515DEBUG_CALL_FN(debugCall4096<>, 4096)
1516DEBUG_CALL_FN(debugCall8192<>, 8192)
1517DEBUG_CALL_FN(debugCall16384<>, 16384)
1518DEBUG_CALL_FN(debugCall32768<>, 32768)
1519DEBUG_CALL_FN(debugCall65536<>, 65536)
1520
1521// func debugCallPanicked(val interface{})
1522TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16
1523 // Copy the panic value to the top of stack at SP+8.
1524 MOVD val_type+0(FP), R0
1525 MOVD R0, 8(RSP)
1526 MOVD val_data+8(FP), R0
1527 MOVD R0, 16(RSP)
1528 MOVD $2, R20
1529 BREAK
1530 RET
1531
1532// Note: these functions use a special calling convention to save generated code space.
1533// Arguments are passed in registers, but the space for those arguments are allocated
1534// in the caller's stack frame. These stubs write the args into that stack space and
1535// then tail call to the corresponding runtime handler.
1536// The tail call makes these stubs disappear in backtraces.
1537//
1538// Defined as ABIInternal since the compiler generates ABIInternal
1539// calls to it directly and it does not use the stack-based Go ABI.
1540TEXT runtime·panicIndex<ABIInternal>(SB),NOSPLIT,$0-16
1541 JMP runtime·goPanicIndex<ABIInternal>(SB)
1542TEXT runtime·panicIndexU<ABIInternal>(SB),NOSPLIT,$0-16
1543 JMP runtime·goPanicIndexU<ABIInternal>(SB)
1544TEXT runtime·panicSliceAlen<ABIInternal>(SB),NOSPLIT,$0-16
1545 MOVD R1, R0
1546 MOVD R2, R1
1547 JMP runtime·goPanicSliceAlen<ABIInternal>(SB)
1548TEXT runtime·panicSliceAlenU<ABIInternal>(SB),NOSPLIT,$0-16
1549 MOVD R1, R0
1550 MOVD R2, R1
1551 JMP runtime·goPanicSliceAlenU<ABIInternal>(SB)
1552TEXT runtime·panicSliceAcap<ABIInternal>(SB),NOSPLIT,$0-16
1553 MOVD R1, R0
1554 MOVD R2, R1
1555 JMP runtime·goPanicSliceAcap<ABIInternal>(SB)
1556TEXT runtime·panicSliceAcapU<ABIInternal>(SB),NOSPLIT,$0-16
1557 MOVD R1, R0
1558 MOVD R2, R1
1559 JMP runtime·goPanicSliceAcapU<ABIInternal>(SB)
1560TEXT runtime·panicSliceB<ABIInternal>(SB),NOSPLIT,$0-16
1561 JMP runtime·goPanicSliceB<ABIInternal>(SB)
1562TEXT runtime·panicSliceBU<ABIInternal>(SB),NOSPLIT,$0-16
1563 JMP runtime·goPanicSliceBU<ABIInternal>(SB)
1564TEXT runtime·panicSlice3Alen<ABIInternal>(SB),NOSPLIT,$0-16
1565 MOVD R2, R0
1566 MOVD R3, R1
1567 JMP runtime·goPanicSlice3Alen<ABIInternal>(SB)
1568TEXT runtime·panicSlice3AlenU<ABIInternal>(SB),NOSPLIT,$0-16
1569 MOVD R2, R0
1570 MOVD R3, R1
1571 JMP runtime·goPanicSlice3AlenU<ABIInternal>(SB)
1572TEXT runtime·panicSlice3Acap<ABIInternal>(SB),NOSPLIT,$0-16
1573 MOVD R2, R0
1574 MOVD R3, R1
1575 JMP runtime·goPanicSlice3Acap<ABIInternal>(SB)
1576TEXT runtime·panicSlice3AcapU<ABIInternal>(SB),NOSPLIT,$0-16
1577 MOVD R2, R0
1578 MOVD R3, R1
1579 JMP runtime·goPanicSlice3AcapU<ABIInternal>(SB)
1580TEXT runtime·panicSlice3B<ABIInternal>(SB),NOSPLIT,$0-16
1581 MOVD R1, R0
1582 MOVD R2, R1
1583 JMP runtime·goPanicSlice3B<ABIInternal>(SB)
1584TEXT runtime·panicSlice3BU<ABIInternal>(SB),NOSPLIT,$0-16
1585 MOVD R1, R0
1586 MOVD R2, R1
1587 JMP runtime·goPanicSlice3BU<ABIInternal>(SB)
1588TEXT runtime·panicSlice3C<ABIInternal>(SB),NOSPLIT,$0-16
1589 JMP runtime·goPanicSlice3C<ABIInternal>(SB)
1590TEXT runtime·panicSlice3CU<ABIInternal>(SB),NOSPLIT,$0-16
1591 JMP runtime·goPanicSlice3CU<ABIInternal>(SB)
1592TEXT runtime·panicSliceConvert<ABIInternal>(SB),NOSPLIT,$0-16
1593 MOVD R2, R0
1594 MOVD R3, R1
1595 JMP runtime·goPanicSliceConvert<ABIInternal>(SB)
1596
1597TEXT ·getfp<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
1598 MOVD R29, R0
1599 RET
View as plain text