Text file
src/math/big/arith_386.s
1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:build !math_big_pure_go
6
7#include "textflag.h"
8
9// This file provides fast assembly versions for the elementary
10// arithmetic operations on vectors implemented in arith.go.
11
12// func addVV(z, x, y []Word) (c Word)
13TEXT ·addVV(SB),NOSPLIT,$0
14 MOVL z+0(FP), DI
15 MOVL x+12(FP), SI
16 MOVL y+24(FP), CX
17 MOVL z_len+4(FP), BP
18 MOVL $0, BX // i = 0
19 MOVL $0, DX // c = 0
20 JMP E1
21
22L1: MOVL (SI)(BX*4), AX
23 ADDL DX, DX // restore CF
24 ADCL (CX)(BX*4), AX
25 SBBL DX, DX // save CF
26 MOVL AX, (DI)(BX*4)
27 ADDL $1, BX // i++
28
29E1: CMPL BX, BP // i < n
30 JL L1
31
32 NEGL DX
33 MOVL DX, c+36(FP)
34 RET
35
36
37// func subVV(z, x, y []Word) (c Word)
38// (same as addVV except for SBBL instead of ADCL and label names)
39TEXT ·subVV(SB),NOSPLIT,$0
40 MOVL z+0(FP), DI
41 MOVL x+12(FP), SI
42 MOVL y+24(FP), CX
43 MOVL z_len+4(FP), BP
44 MOVL $0, BX // i = 0
45 MOVL $0, DX // c = 0
46 JMP E2
47
48L2: MOVL (SI)(BX*4), AX
49 ADDL DX, DX // restore CF
50 SBBL (CX)(BX*4), AX
51 SBBL DX, DX // save CF
52 MOVL AX, (DI)(BX*4)
53 ADDL $1, BX // i++
54
55E2: CMPL BX, BP // i < n
56 JL L2
57
58 NEGL DX
59 MOVL DX, c+36(FP)
60 RET
61
62
63// func addVW(z, x []Word, y Word) (c Word)
64TEXT ·addVW(SB),NOSPLIT,$0
65 MOVL z+0(FP), DI
66 MOVL x+12(FP), SI
67 MOVL y+24(FP), AX // c = y
68 MOVL z_len+4(FP), BP
69 MOVL $0, BX // i = 0
70 JMP E3
71
72L3: ADDL (SI)(BX*4), AX
73 MOVL AX, (DI)(BX*4)
74 SBBL AX, AX // save CF
75 NEGL AX
76 ADDL $1, BX // i++
77
78E3: CMPL BX, BP // i < n
79 JL L3
80
81 MOVL AX, c+28(FP)
82 RET
83
84
85// func subVW(z, x []Word, y Word) (c Word)
86TEXT ·subVW(SB),NOSPLIT,$0
87 MOVL z+0(FP), DI
88 MOVL x+12(FP), SI
89 MOVL y+24(FP), AX // c = y
90 MOVL z_len+4(FP), BP
91 MOVL $0, BX // i = 0
92 JMP E4
93
94L4: MOVL (SI)(BX*4), DX
95 SUBL AX, DX
96 MOVL DX, (DI)(BX*4)
97 SBBL AX, AX // save CF
98 NEGL AX
99 ADDL $1, BX // i++
100
101E4: CMPL BX, BP // i < n
102 JL L4
103
104 MOVL AX, c+28(FP)
105 RET
106
107
108// func shlVU(z, x []Word, s uint) (c Word)
109TEXT ·shlVU(SB),NOSPLIT,$0
110 MOVL z_len+4(FP), BX // i = z
111 SUBL $1, BX // i--
112 JL X8b // i < 0 (n <= 0)
113
114 // n > 0
115 MOVL z+0(FP), DI
116 MOVL x+12(FP), SI
117 MOVL s+24(FP), CX
118 MOVL (SI)(BX*4), AX // w1 = x[n-1]
119 MOVL $0, DX
120 SHLL CX, AX, DX // w1>>ŝ
121 MOVL DX, c+28(FP)
122
123 CMPL BX, $0
124 JLE X8a // i <= 0
125
126 // i > 0
127L8: MOVL AX, DX // w = w1
128 MOVL -4(SI)(BX*4), AX // w1 = x[i-1]
129 SHLL CX, AX, DX // w<<s | w1>>ŝ
130 MOVL DX, (DI)(BX*4) // z[i] = w<<s | w1>>ŝ
131 SUBL $1, BX // i--
132 JG L8 // i > 0
133
134 // i <= 0
135X8a: SHLL CX, AX // w1<<s
136 MOVL AX, (DI) // z[0] = w1<<s
137 RET
138
139X8b: MOVL $0, c+28(FP)
140 RET
141
142
143// func shrVU(z, x []Word, s uint) (c Word)
144TEXT ·shrVU(SB),NOSPLIT,$0
145 MOVL z_len+4(FP), BP
146 SUBL $1, BP // n--
147 JL X9b // n < 0 (n <= 0)
148
149 // n > 0
150 MOVL z+0(FP), DI
151 MOVL x+12(FP), SI
152 MOVL s+24(FP), CX
153 MOVL (SI), AX // w1 = x[0]
154 MOVL $0, DX
155 SHRL CX, AX, DX // w1<<ŝ
156 MOVL DX, c+28(FP)
157
158 MOVL $0, BX // i = 0
159 JMP E9
160
161 // i < n-1
162L9: MOVL AX, DX // w = w1
163 MOVL 4(SI)(BX*4), AX // w1 = x[i+1]
164 SHRL CX, AX, DX // w>>s | w1<<ŝ
165 MOVL DX, (DI)(BX*4) // z[i] = w>>s | w1<<ŝ
166 ADDL $1, BX // i++
167
168E9: CMPL BX, BP
169 JL L9 // i < n-1
170
171 // i >= n-1
172X9a: SHRL CX, AX // w1>>s
173 MOVL AX, (DI)(BP*4) // z[n-1] = w1>>s
174 RET
175
176X9b: MOVL $0, c+28(FP)
177 RET
178
179
180// func mulAddVWW(z, x []Word, y, r Word) (c Word)
181TEXT ·mulAddVWW(SB),NOSPLIT,$0
182 MOVL z+0(FP), DI
183 MOVL x+12(FP), SI
184 MOVL y+24(FP), BP
185 MOVL r+28(FP), CX // c = r
186 MOVL z_len+4(FP), BX
187 LEAL (DI)(BX*4), DI
188 LEAL (SI)(BX*4), SI
189 NEGL BX // i = -n
190 JMP E5
191
192L5: MOVL (SI)(BX*4), AX
193 MULL BP
194 ADDL CX, AX
195 ADCL $0, DX
196 MOVL AX, (DI)(BX*4)
197 MOVL DX, CX
198 ADDL $1, BX // i++
199
200E5: CMPL BX, $0 // i < 0
201 JL L5
202
203 MOVL CX, c+32(FP)
204 RET
205
206
207// func addMulVVW(z, x []Word, y Word) (c Word)
208TEXT ·addMulVVW(SB),NOSPLIT,$0
209 MOVL z+0(FP), DI
210 MOVL x+12(FP), SI
211 MOVL y+24(FP), BP
212 MOVL z_len+4(FP), BX
213 LEAL (DI)(BX*4), DI
214 LEAL (SI)(BX*4), SI
215 NEGL BX // i = -n
216 MOVL $0, CX // c = 0
217 JMP E6
218
219L6: MOVL (SI)(BX*4), AX
220 MULL BP
221 ADDL CX, AX
222 ADCL $0, DX
223 ADDL AX, (DI)(BX*4)
224 ADCL $0, DX
225 MOVL DX, CX
226 ADDL $1, BX // i++
227
228E6: CMPL BX, $0 // i < 0
229 JL L6
230
231 MOVL CX, c+28(FP)
232 RET
233
234
235
View as plain text