...

Text file src/math/big/arith_arm.s

Documentation: math/big

     1// Copyright 2025 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5// Code generated by 'go generate' (with ./internal/asmgen). DO NOT EDIT.
     6
     7//go:build !math_big_pure_go
     8
     9#include "textflag.h"
    10
    11// func addVV(z, x, y []Word) (c Word)
    12TEXT ·addVV(SB), NOSPLIT, $0
    13	MOVW z_len+4(FP), R0
    14	MOVW x_base+12(FP), R1
    15	MOVW y_base+24(FP), R2
    16	MOVW z_base+0(FP), R3
    17	// compute unrolled loop lengths
    18	AND $3, R0, R4
    19	MOVW R0>>2, R0
    20	ADD.S $0, R0	// clear carry
    21loop1:
    22	TEQ $0, R4; BEQ loop1done
    23loop1cont:
    24	// unroll 1X
    25	MOVW.P 4(R1), R5
    26	MOVW.P 4(R2), R6
    27	ADC.S R6, R5
    28	MOVW.P R5, 4(R3)
    29	SUB $1, R4
    30	TEQ $0, R4; BNE loop1cont
    31loop1done:
    32loop4:
    33	TEQ $0, R0; BEQ loop4done
    34loop4cont:
    35	// unroll 4X
    36	MOVW.P 4(R1), R4
    37	MOVW.P 4(R1), R5
    38	MOVW.P 4(R1), R6
    39	MOVW.P 4(R1), R7
    40	MOVW.P 4(R2), R8
    41	MOVW.P 4(R2), R9
    42	MOVW.P 4(R2), R11
    43	MOVW.P 4(R2), R12
    44	ADC.S R8, R4
    45	ADC.S R9, R5
    46	ADC.S R11, R6
    47	ADC.S R12, R7
    48	MOVW.P R4, 4(R3)
    49	MOVW.P R5, 4(R3)
    50	MOVW.P R6, 4(R3)
    51	MOVW.P R7, 4(R3)
    52	SUB $1, R0
    53	TEQ $0, R0; BNE loop4cont
    54loop4done:
    55	SBC R1, R1	// save carry
    56	ADD $1, R1	// convert add carry
    57	MOVW R1, c+36(FP)
    58	RET
    59
    60// func subVV(z, x, y []Word) (c Word)
    61TEXT ·subVV(SB), NOSPLIT, $0
    62	MOVW z_len+4(FP), R0
    63	MOVW x_base+12(FP), R1
    64	MOVW y_base+24(FP), R2
    65	MOVW z_base+0(FP), R3
    66	// compute unrolled loop lengths
    67	AND $3, R0, R4
    68	MOVW R0>>2, R0
    69	SUB.S $0, R0	// clear carry
    70loop1:
    71	TEQ $0, R4; BEQ loop1done
    72loop1cont:
    73	// unroll 1X
    74	MOVW.P 4(R1), R5
    75	MOVW.P 4(R2), R6
    76	SBC.S R6, R5
    77	MOVW.P R5, 4(R3)
    78	SUB $1, R4
    79	TEQ $0, R4; BNE loop1cont
    80loop1done:
    81loop4:
    82	TEQ $0, R0; BEQ loop4done
    83loop4cont:
    84	// unroll 4X
    85	MOVW.P 4(R1), R4
    86	MOVW.P 4(R1), R5
    87	MOVW.P 4(R1), R6
    88	MOVW.P 4(R1), R7
    89	MOVW.P 4(R2), R8
    90	MOVW.P 4(R2), R9
    91	MOVW.P 4(R2), R11
    92	MOVW.P 4(R2), R12
    93	SBC.S R8, R4
    94	SBC.S R9, R5
    95	SBC.S R11, R6
    96	SBC.S R12, R7
    97	MOVW.P R4, 4(R3)
    98	MOVW.P R5, 4(R3)
    99	MOVW.P R6, 4(R3)
   100	MOVW.P R7, 4(R3)
   101	SUB $1, R0
   102	TEQ $0, R0; BNE loop4cont
   103loop4done:
   104	SBC R1, R1	// save carry
   105	RSB $0, R1, R1	// convert sub carry
   106	MOVW R1, c+36(FP)
   107	RET
   108
   109// func lshVU(z, x []Word, s uint) (c Word)
   110TEXT ·lshVU(SB), NOSPLIT, $0
   111	MOVW z_len+4(FP), R0
   112	TEQ $0, R0; BEQ ret0
   113	MOVW s+24(FP), R1
   114	MOVW x_base+12(FP), R2
   115	MOVW z_base+0(FP), R3
   116	// run loop backward
   117	ADD R0<<2, R2, R2
   118	ADD R0<<2, R3, R3
   119	// shift first word into carry
   120	MOVW.W -4(R2), R4
   121	MOVW $32, R5
   122	SUB R1, R5
   123	MOVW R4>>R5, R6
   124	MOVW R4<<R1, R4
   125	MOVW R6, c+28(FP)
   126	// shift remaining words
   127	SUB $1, R0
   128	// compute unrolled loop lengths
   129	AND $3, R0, R6
   130	MOVW R0>>2, R0
   131loop1:
   132	TEQ $0, R6; BEQ loop1done
   133loop1cont:
   134	// unroll 1X
   135	MOVW.W -4(R2), R7
   136	ORR R7>>R5, R4
   137	MOVW.W R4, -4(R3)
   138	MOVW R7<<R1, R4
   139	SUB $1, R6
   140	TEQ $0, R6; BNE loop1cont
   141loop1done:
   142loop4:
   143	TEQ $0, R0; BEQ loop4done
   144loop4cont:
   145	// unroll 4X
   146	MOVW.W -4(R2), R6
   147	MOVW.W -4(R2), R7
   148	MOVW.W -4(R2), R8
   149	MOVW.W -4(R2), R9
   150	ORR R6>>R5, R4
   151	MOVW.W R4, -4(R3)
   152	MOVW R6<<R1, R4
   153	ORR R7>>R5, R4
   154	MOVW.W R4, -4(R3)
   155	MOVW R7<<R1, R4
   156	ORR R8>>R5, R4
   157	MOVW.W R4, -4(R3)
   158	MOVW R8<<R1, R4
   159	ORR R9>>R5, R4
   160	MOVW.W R4, -4(R3)
   161	MOVW R9<<R1, R4
   162	SUB $1, R0
   163	TEQ $0, R0; BNE loop4cont
   164loop4done:
   165	// store final shifted bits
   166	MOVW.W R4, -4(R3)
   167	RET
   168ret0:
   169	MOVW $0, R1
   170	MOVW R1, c+28(FP)
   171	RET
   172
   173// func rshVU(z, x []Word, s uint) (c Word)
   174TEXT ·rshVU(SB), NOSPLIT, $0
   175	MOVW z_len+4(FP), R0
   176	TEQ $0, R0; BEQ ret0
   177	MOVW s+24(FP), R1
   178	MOVW x_base+12(FP), R2
   179	MOVW z_base+0(FP), R3
   180	// shift first word into carry
   181	MOVW.P 4(R2), R4
   182	MOVW $32, R5
   183	SUB R1, R5
   184	MOVW R4<<R5, R6
   185	MOVW R4>>R1, R4
   186	MOVW R6, c+28(FP)
   187	// shift remaining words
   188	SUB $1, R0
   189	// compute unrolled loop lengths
   190	AND $3, R0, R6
   191	MOVW R0>>2, R0
   192loop1:
   193	TEQ $0, R6; BEQ loop1done
   194loop1cont:
   195	// unroll 1X
   196	MOVW.P 4(R2), R7
   197	ORR R7<<R5, R4
   198	MOVW.P R4, 4(R3)
   199	MOVW R7>>R1, R4
   200	SUB $1, R6
   201	TEQ $0, R6; BNE loop1cont
   202loop1done:
   203loop4:
   204	TEQ $0, R0; BEQ loop4done
   205loop4cont:
   206	// unroll 4X
   207	MOVW.P 4(R2), R6
   208	MOVW.P 4(R2), R7
   209	MOVW.P 4(R2), R8
   210	MOVW.P 4(R2), R9
   211	ORR R6<<R5, R4
   212	MOVW.P R4, 4(R3)
   213	MOVW R6>>R1, R4
   214	ORR R7<<R5, R4
   215	MOVW.P R4, 4(R3)
   216	MOVW R7>>R1, R4
   217	ORR R8<<R5, R4
   218	MOVW.P R4, 4(R3)
   219	MOVW R8>>R1, R4
   220	ORR R9<<R5, R4
   221	MOVW.P R4, 4(R3)
   222	MOVW R9>>R1, R4
   223	SUB $1, R0
   224	TEQ $0, R0; BNE loop4cont
   225loop4done:
   226	// store final shifted bits
   227	MOVW.P R4, 4(R3)
   228	RET
   229ret0:
   230	MOVW $0, R1
   231	MOVW R1, c+28(FP)
   232	RET
   233
   234// func mulAddVWW(z, x []Word, m, a Word) (c Word)
   235TEXT ·mulAddVWW(SB), NOSPLIT, $0
   236	MOVW m+24(FP), R0
   237	MOVW a+28(FP), R1
   238	MOVW z_len+4(FP), R2
   239	MOVW x_base+12(FP), R3
   240	MOVW z_base+0(FP), R4
   241	// compute unrolled loop lengths
   242	AND $3, R2, R5
   243	MOVW R2>>2, R2
   244loop1:
   245	TEQ $0, R5; BEQ loop1done
   246loop1cont:
   247	// unroll 1X
   248	MOVW.P 4(R3), R6
   249	// multiply
   250	MULLU R0, R6, (R7, R6)
   251	ADD.S R1, R6
   252	ADC $0, R7, R1
   253	MOVW.P R6, 4(R4)
   254	SUB $1, R5
   255	TEQ $0, R5; BNE loop1cont
   256loop1done:
   257loop4:
   258	TEQ $0, R2; BEQ loop4done
   259loop4cont:
   260	// unroll 4X in batches of 2
   261	MOVW.P 4(R3), R5
   262	MOVW.P 4(R3), R6
   263	// multiply
   264	MULLU R0, R5, (R7, R5)
   265	ADD.S R1, R5
   266	MULLU R0, R6, (R8, R6)
   267	ADC.S R7, R6
   268	ADC $0, R8, R1
   269	MOVW.P R5, 4(R4)
   270	MOVW.P R6, 4(R4)
   271	MOVW.P 4(R3), R5
   272	MOVW.P 4(R3), R6
   273	// multiply
   274	MULLU R0, R5, (R7, R5)
   275	ADD.S R1, R5
   276	MULLU R0, R6, (R8, R6)
   277	ADC.S R7, R6
   278	ADC $0, R8, R1
   279	MOVW.P R5, 4(R4)
   280	MOVW.P R6, 4(R4)
   281	SUB $1, R2
   282	TEQ $0, R2; BNE loop4cont
   283loop4done:
   284	MOVW R1, c+32(FP)
   285	RET
   286
   287// func addMulVVWW(z, x, y []Word, m, a Word) (c Word)
   288TEXT ·addMulVVWW(SB), NOSPLIT, $0
   289	MOVW m+36(FP), R0
   290	MOVW a+40(FP), R1
   291	MOVW z_len+4(FP), R2
   292	MOVW x_base+12(FP), R3
   293	MOVW y_base+24(FP), R4
   294	MOVW z_base+0(FP), R5
   295	// compute unrolled loop lengths
   296	AND $3, R2, R6
   297	MOVW R2>>2, R2
   298loop1:
   299	TEQ $0, R6; BEQ loop1done
   300loop1cont:
   301	// unroll 1X
   302	MOVW.P 4(R3), R7
   303	MOVW.P 4(R4), R8
   304	// multiply
   305	MULLU R0, R8, (R9, R8)
   306	ADD.S R1, R8
   307	ADC $0, R9, R1
   308	// add
   309	ADD.S R7, R8
   310	ADC $0, R1
   311	MOVW.P R8, 4(R5)
   312	SUB $1, R6
   313	TEQ $0, R6; BNE loop1cont
   314loop1done:
   315loop4:
   316	TEQ $0, R2; BEQ loop4done
   317loop4cont:
   318	// unroll 4X in batches of 2
   319	MOVW.P 4(R3), R6
   320	MOVW.P 4(R3), R7
   321	MOVW.P 4(R4), R8
   322	MOVW.P 4(R4), R9
   323	// multiply
   324	MULLU R0, R8, (R11, R8)
   325	ADD.S R1, R8
   326	MULLU R0, R9, (R12, R9)
   327	ADC.S R11, R9
   328	ADC $0, R12, R1
   329	// add
   330	ADD.S R6, R8
   331	ADC.S R7, R9
   332	ADC $0, R1
   333	MOVW.P R8, 4(R5)
   334	MOVW.P R9, 4(R5)
   335	MOVW.P 4(R3), R6
   336	MOVW.P 4(R3), R7
   337	MOVW.P 4(R4), R8
   338	MOVW.P 4(R4), R9
   339	// multiply
   340	MULLU R0, R8, (R11, R8)
   341	ADD.S R1, R8
   342	MULLU R0, R9, (R12, R9)
   343	ADC.S R11, R9
   344	ADC $0, R12, R1
   345	// add
   346	ADD.S R6, R8
   347	ADC.S R7, R9
   348	ADC $0, R1
   349	MOVW.P R8, 4(R5)
   350	MOVW.P R9, 4(R5)
   351	SUB $1, R2
   352	TEQ $0, R2; BNE loop4cont
   353loop4done:
   354	MOVW R1, c+44(FP)
   355	RET

View as plain text