Text file
src/math/big/arith_arm.s
1// Copyright 2025 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Code generated by 'go generate' (with ./internal/asmgen). DO NOT EDIT.
6
7//go:build !math_big_pure_go
8
9#include "textflag.h"
10
11// func addVV(z, x, y []Word) (c Word)
12TEXT ·addVV(SB), NOSPLIT, $0
13 MOVW z_len+4(FP), R0
14 MOVW x_base+12(FP), R1
15 MOVW y_base+24(FP), R2
16 MOVW z_base+0(FP), R3
17 // compute unrolled loop lengths
18 AND $3, R0, R4
19 MOVW R0>>2, R0
20 ADD.S $0, R0 // clear carry
21loop1:
22 TEQ $0, R4; BEQ loop1done
23loop1cont:
24 // unroll 1X
25 MOVW.P 4(R1), R5
26 MOVW.P 4(R2), R6
27 ADC.S R6, R5
28 MOVW.P R5, 4(R3)
29 SUB $1, R4
30 TEQ $0, R4; BNE loop1cont
31loop1done:
32loop4:
33 TEQ $0, R0; BEQ loop4done
34loop4cont:
35 // unroll 4X
36 MOVW.P 4(R1), R4
37 MOVW.P 4(R1), R5
38 MOVW.P 4(R1), R6
39 MOVW.P 4(R1), R7
40 MOVW.P 4(R2), R8
41 MOVW.P 4(R2), R9
42 MOVW.P 4(R2), R11
43 MOVW.P 4(R2), R12
44 ADC.S R8, R4
45 ADC.S R9, R5
46 ADC.S R11, R6
47 ADC.S R12, R7
48 MOVW.P R4, 4(R3)
49 MOVW.P R5, 4(R3)
50 MOVW.P R6, 4(R3)
51 MOVW.P R7, 4(R3)
52 SUB $1, R0
53 TEQ $0, R0; BNE loop4cont
54loop4done:
55 SBC R1, R1 // save carry
56 ADD $1, R1 // convert add carry
57 MOVW R1, c+36(FP)
58 RET
59
60// func subVV(z, x, y []Word) (c Word)
61TEXT ·subVV(SB), NOSPLIT, $0
62 MOVW z_len+4(FP), R0
63 MOVW x_base+12(FP), R1
64 MOVW y_base+24(FP), R2
65 MOVW z_base+0(FP), R3
66 // compute unrolled loop lengths
67 AND $3, R0, R4
68 MOVW R0>>2, R0
69 SUB.S $0, R0 // clear carry
70loop1:
71 TEQ $0, R4; BEQ loop1done
72loop1cont:
73 // unroll 1X
74 MOVW.P 4(R1), R5
75 MOVW.P 4(R2), R6
76 SBC.S R6, R5
77 MOVW.P R5, 4(R3)
78 SUB $1, R4
79 TEQ $0, R4; BNE loop1cont
80loop1done:
81loop4:
82 TEQ $0, R0; BEQ loop4done
83loop4cont:
84 // unroll 4X
85 MOVW.P 4(R1), R4
86 MOVW.P 4(R1), R5
87 MOVW.P 4(R1), R6
88 MOVW.P 4(R1), R7
89 MOVW.P 4(R2), R8
90 MOVW.P 4(R2), R9
91 MOVW.P 4(R2), R11
92 MOVW.P 4(R2), R12
93 SBC.S R8, R4
94 SBC.S R9, R5
95 SBC.S R11, R6
96 SBC.S R12, R7
97 MOVW.P R4, 4(R3)
98 MOVW.P R5, 4(R3)
99 MOVW.P R6, 4(R3)
100 MOVW.P R7, 4(R3)
101 SUB $1, R0
102 TEQ $0, R0; BNE loop4cont
103loop4done:
104 SBC R1, R1 // save carry
105 RSB $0, R1, R1 // convert sub carry
106 MOVW R1, c+36(FP)
107 RET
108
109// func lshVU(z, x []Word, s uint) (c Word)
110TEXT ·lshVU(SB), NOSPLIT, $0
111 MOVW z_len+4(FP), R0
112 TEQ $0, R0; BEQ ret0
113 MOVW s+24(FP), R1
114 MOVW x_base+12(FP), R2
115 MOVW z_base+0(FP), R3
116 // run loop backward
117 ADD R0<<2, R2, R2
118 ADD R0<<2, R3, R3
119 // shift first word into carry
120 MOVW.W -4(R2), R4
121 MOVW $32, R5
122 SUB R1, R5
123 MOVW R4>>R5, R6
124 MOVW R4<<R1, R4
125 MOVW R6, c+28(FP)
126 // shift remaining words
127 SUB $1, R0
128 // compute unrolled loop lengths
129 AND $3, R0, R6
130 MOVW R0>>2, R0
131loop1:
132 TEQ $0, R6; BEQ loop1done
133loop1cont:
134 // unroll 1X
135 MOVW.W -4(R2), R7
136 ORR R7>>R5, R4
137 MOVW.W R4, -4(R3)
138 MOVW R7<<R1, R4
139 SUB $1, R6
140 TEQ $0, R6; BNE loop1cont
141loop1done:
142loop4:
143 TEQ $0, R0; BEQ loop4done
144loop4cont:
145 // unroll 4X
146 MOVW.W -4(R2), R6
147 MOVW.W -4(R2), R7
148 MOVW.W -4(R2), R8
149 MOVW.W -4(R2), R9
150 ORR R6>>R5, R4
151 MOVW.W R4, -4(R3)
152 MOVW R6<<R1, R4
153 ORR R7>>R5, R4
154 MOVW.W R4, -4(R3)
155 MOVW R7<<R1, R4
156 ORR R8>>R5, R4
157 MOVW.W R4, -4(R3)
158 MOVW R8<<R1, R4
159 ORR R9>>R5, R4
160 MOVW.W R4, -4(R3)
161 MOVW R9<<R1, R4
162 SUB $1, R0
163 TEQ $0, R0; BNE loop4cont
164loop4done:
165 // store final shifted bits
166 MOVW.W R4, -4(R3)
167 RET
168ret0:
169 MOVW $0, R1
170 MOVW R1, c+28(FP)
171 RET
172
173// func rshVU(z, x []Word, s uint) (c Word)
174TEXT ·rshVU(SB), NOSPLIT, $0
175 MOVW z_len+4(FP), R0
176 TEQ $0, R0; BEQ ret0
177 MOVW s+24(FP), R1
178 MOVW x_base+12(FP), R2
179 MOVW z_base+0(FP), R3
180 // shift first word into carry
181 MOVW.P 4(R2), R4
182 MOVW $32, R5
183 SUB R1, R5
184 MOVW R4<<R5, R6
185 MOVW R4>>R1, R4
186 MOVW R6, c+28(FP)
187 // shift remaining words
188 SUB $1, R0
189 // compute unrolled loop lengths
190 AND $3, R0, R6
191 MOVW R0>>2, R0
192loop1:
193 TEQ $0, R6; BEQ loop1done
194loop1cont:
195 // unroll 1X
196 MOVW.P 4(R2), R7
197 ORR R7<<R5, R4
198 MOVW.P R4, 4(R3)
199 MOVW R7>>R1, R4
200 SUB $1, R6
201 TEQ $0, R6; BNE loop1cont
202loop1done:
203loop4:
204 TEQ $0, R0; BEQ loop4done
205loop4cont:
206 // unroll 4X
207 MOVW.P 4(R2), R6
208 MOVW.P 4(R2), R7
209 MOVW.P 4(R2), R8
210 MOVW.P 4(R2), R9
211 ORR R6<<R5, R4
212 MOVW.P R4, 4(R3)
213 MOVW R6>>R1, R4
214 ORR R7<<R5, R4
215 MOVW.P R4, 4(R3)
216 MOVW R7>>R1, R4
217 ORR R8<<R5, R4
218 MOVW.P R4, 4(R3)
219 MOVW R8>>R1, R4
220 ORR R9<<R5, R4
221 MOVW.P R4, 4(R3)
222 MOVW R9>>R1, R4
223 SUB $1, R0
224 TEQ $0, R0; BNE loop4cont
225loop4done:
226 // store final shifted bits
227 MOVW.P R4, 4(R3)
228 RET
229ret0:
230 MOVW $0, R1
231 MOVW R1, c+28(FP)
232 RET
233
234// func mulAddVWW(z, x []Word, m, a Word) (c Word)
235TEXT ·mulAddVWW(SB), NOSPLIT, $0
236 MOVW m+24(FP), R0
237 MOVW a+28(FP), R1
238 MOVW z_len+4(FP), R2
239 MOVW x_base+12(FP), R3
240 MOVW z_base+0(FP), R4
241 // compute unrolled loop lengths
242 AND $3, R2, R5
243 MOVW R2>>2, R2
244loop1:
245 TEQ $0, R5; BEQ loop1done
246loop1cont:
247 // unroll 1X
248 MOVW.P 4(R3), R6
249 // multiply
250 MULLU R0, R6, (R7, R6)
251 ADD.S R1, R6
252 ADC $0, R7, R1
253 MOVW.P R6, 4(R4)
254 SUB $1, R5
255 TEQ $0, R5; BNE loop1cont
256loop1done:
257loop4:
258 TEQ $0, R2; BEQ loop4done
259loop4cont:
260 // unroll 4X in batches of 2
261 MOVW.P 4(R3), R5
262 MOVW.P 4(R3), R6
263 // multiply
264 MULLU R0, R5, (R7, R5)
265 ADD.S R1, R5
266 MULLU R0, R6, (R8, R6)
267 ADC.S R7, R6
268 ADC $0, R8, R1
269 MOVW.P R5, 4(R4)
270 MOVW.P R6, 4(R4)
271 MOVW.P 4(R3), R5
272 MOVW.P 4(R3), R6
273 // multiply
274 MULLU R0, R5, (R7, R5)
275 ADD.S R1, R5
276 MULLU R0, R6, (R8, R6)
277 ADC.S R7, R6
278 ADC $0, R8, R1
279 MOVW.P R5, 4(R4)
280 MOVW.P R6, 4(R4)
281 SUB $1, R2
282 TEQ $0, R2; BNE loop4cont
283loop4done:
284 MOVW R1, c+32(FP)
285 RET
286
287// func addMulVVWW(z, x, y []Word, m, a Word) (c Word)
288TEXT ·addMulVVWW(SB), NOSPLIT, $0
289 MOVW m+36(FP), R0
290 MOVW a+40(FP), R1
291 MOVW z_len+4(FP), R2
292 MOVW x_base+12(FP), R3
293 MOVW y_base+24(FP), R4
294 MOVW z_base+0(FP), R5
295 // compute unrolled loop lengths
296 AND $3, R2, R6
297 MOVW R2>>2, R2
298loop1:
299 TEQ $0, R6; BEQ loop1done
300loop1cont:
301 // unroll 1X
302 MOVW.P 4(R3), R7
303 MOVW.P 4(R4), R8
304 // multiply
305 MULLU R0, R8, (R9, R8)
306 ADD.S R1, R8
307 ADC $0, R9, R1
308 // add
309 ADD.S R7, R8
310 ADC $0, R1
311 MOVW.P R8, 4(R5)
312 SUB $1, R6
313 TEQ $0, R6; BNE loop1cont
314loop1done:
315loop4:
316 TEQ $0, R2; BEQ loop4done
317loop4cont:
318 // unroll 4X in batches of 2
319 MOVW.P 4(R3), R6
320 MOVW.P 4(R3), R7
321 MOVW.P 4(R4), R8
322 MOVW.P 4(R4), R9
323 // multiply
324 MULLU R0, R8, (R11, R8)
325 ADD.S R1, R8
326 MULLU R0, R9, (R12, R9)
327 ADC.S R11, R9
328 ADC $0, R12, R1
329 // add
330 ADD.S R6, R8
331 ADC.S R7, R9
332 ADC $0, R1
333 MOVW.P R8, 4(R5)
334 MOVW.P R9, 4(R5)
335 MOVW.P 4(R3), R6
336 MOVW.P 4(R3), R7
337 MOVW.P 4(R4), R8
338 MOVW.P 4(R4), R9
339 // multiply
340 MULLU R0, R8, (R11, R8)
341 ADD.S R1, R8
342 MULLU R0, R9, (R12, R9)
343 ADC.S R11, R9
344 ADC $0, R12, R1
345 // add
346 ADD.S R6, R8
347 ADC.S R7, R9
348 ADC $0, R1
349 MOVW.P R8, 4(R5)
350 MOVW.P R9, 4(R5)
351 SUB $1, R2
352 TEQ $0, R2; BNE loop4cont
353loop4done:
354 MOVW R1, c+44(FP)
355 RET
View as plain text