Text file
src/math/atan2_s390x.s
Documentation: math
1// Copyright 2017 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5#include "textflag.h"
6
7#define PosInf 0x7FF0000000000000
8#define NegInf 0xFFF0000000000000
9#define NegZero 0x8000000000000000
10#define Pi 0x400921FB54442D18
11#define NegPi 0xC00921FB54442D18
12#define Pi3Div4 0x4002D97C7F3321D2 // 3Pi/4
13#define NegPi3Div4 0xC002D97C7F3321D2 // -3Pi/4
14#define PiDiv4 0x3FE921FB54442D18 // Pi/4
15#define NegPiDiv4 0xBFE921FB54442D18 // -Pi/4
16
17// Minimax polynomial coefficients and other constants
18DATA ·atan2rodataL25<> + 0(SB)/8, $0.199999999999554423E+00
19DATA ·atan2rodataL25<> + 8(SB)/8, $-.333333333333330928E+00
20DATA ·atan2rodataL25<> + 16(SB)/8, $0.111111110136634272E+00
21DATA ·atan2rodataL25<> + 24(SB)/8, $-.142857142828026806E+00
22DATA ·atan2rodataL25<> + 32(SB)/8, $0.769228118888682505E-01
23DATA ·atan2rodataL25<> + 40(SB)/8, $0.588059263575587687E-01
24DATA ·atan2rodataL25<> + 48(SB)/8, $-.909090711945939878E-01
25DATA ·atan2rodataL25<> + 56(SB)/8, $-.666641501287528609E-01
26DATA ·atan2rodataL25<> + 64(SB)/8, $0.472329433805024762E-01
27DATA ·atan2rodataL25<> + 72(SB)/8, $-.525380587584426406E-01
28DATA ·atan2rodataL25<> + 80(SB)/8, $-.422172007412067035E-01
29DATA ·atan2rodataL25<> + 88(SB)/8, $0.366935664549587481E-01
30DATA ·atan2rodataL25<> + 96(SB)/8, $0.220852012160300086E-01
31DATA ·atan2rodataL25<> + 104(SB)/8, $-.299856214685512712E-01
32DATA ·atan2rodataL25<> + 112(SB)/8, $0.726338160757602439E-02
33DATA ·atan2rodataL25<> + 120(SB)/8, $0.134893651284712515E-04
34DATA ·atan2rodataL25<> + 128(SB)/8, $-.291935324869629616E-02
35DATA ·atan2rodataL25<> + 136(SB)/8, $-.154797890856877418E-03
36DATA ·atan2rodataL25<> + 144(SB)/8, $0.843488472994227321E-03
37DATA ·atan2rodataL25<> + 152(SB)/8, $-.139950258898989925E-01
38GLOBL ·atan2rodataL25<> + 0(SB), RODATA, $160
39
40DATA ·atan2xpi2h<> + 0(SB)/8, $0x3ff330e4e4fa7b1b
41DATA ·atan2xpi2h<> + 8(SB)/8, $0xbff330e4e4fa7b1b
42DATA ·atan2xpi2h<> + 16(SB)/8, $0x400330e4e4fa7b1b
43DATA ·atan2xpi2h<> + 24(SB)/8, $0xc00330e4e4fa7b1b
44GLOBL ·atan2xpi2h<> + 0(SB), RODATA, $32
45DATA ·atan2xpim<> + 0(SB)/8, $0x3ff4f42b00000000
46GLOBL ·atan2xpim<> + 0(SB), RODATA, $8
47
48// Atan2 returns the arc tangent of y/x, using
49// the signs of the two to determine the quadrant
50// of the return value.
51//
52// Special cases are (in order):
53// Atan2(y, NaN) = NaN
54// Atan2(NaN, x) = NaN
55// Atan2(+0, x>=0) = +0
56// Atan2(-0, x>=0) = -0
57// Atan2(+0, x<=-0) = +Pi
58// Atan2(-0, x<=-0) = -Pi
59// Atan2(y>0, 0) = +Pi/2
60// Atan2(y<0, 0) = -Pi/2
61// Atan2(+Inf, +Inf) = +Pi/4
62// Atan2(-Inf, +Inf) = -Pi/4
63// Atan2(+Inf, -Inf) = 3Pi/4
64// Atan2(-Inf, -Inf) = -3Pi/4
65// Atan2(y, +Inf) = 0
66// Atan2(y>0, -Inf) = +Pi
67// Atan2(y<0, -Inf) = -Pi
68// Atan2(+Inf, x) = +Pi/2
69// Atan2(-Inf, x) = -Pi/2
70// The algorithm used is minimax polynomial approximation
71// with coefficients determined with a Remez exchange algorithm.
72
73TEXT ·atan2Asm(SB), NOSPLIT, $0-24
74 // special case
75 MOVD x+0(FP), R1
76 MOVD y+8(FP), R2
77
78 // special case Atan2(NaN, y) = NaN
79 MOVD $~(1<<63), R5
80 AND R1, R5 // x = |x|
81 MOVD $PosInf, R3
82 CMPUBLT R3, R5, returnX
83
84 // special case Atan2(x, NaN) = NaN
85 MOVD $~(1<<63), R5
86 AND R2, R5
87 CMPUBLT R3, R5, returnY
88
89 MOVD $NegZero, R3
90 CMPUBEQ R3, R1, xIsNegZero
91
92 MOVD $0, R3
93 CMPUBEQ R3, R1, xIsPosZero
94
95 MOVD $PosInf, R4
96 CMPUBEQ R4, R2, yIsPosInf
97
98 MOVD $NegInf, R4
99 CMPUBEQ R4, R2, yIsNegInf
100 BR Normal
101xIsNegZero:
102 // special case Atan(-0, y>=0) = -0
103 MOVD $0, R4
104 CMPBLE R4, R2, returnX
105
106 //special case Atan2(-0, y<=-0) = -Pi
107 MOVD $NegZero, R4
108 CMPBGE R4, R2, returnNegPi
109 BR Normal
110xIsPosZero:
111 //special case Atan2(0, 0) = 0
112 MOVD $0, R4
113 CMPUBEQ R4, R2, returnX
114
115 //special case Atan2(0, y<=-0) = Pi
116 MOVD $NegZero, R4
117 CMPBGE R4, R2, returnPi
118 BR Normal
119yIsNegInf:
120 //special case Atan2(+Inf, -Inf) = 3Pi/4
121 MOVD $PosInf, R3
122 CMPUBEQ R3, R1, posInfNegInf
123
124 //special case Atan2(-Inf, -Inf) = -3Pi/4
125 MOVD $NegInf, R3
126 CMPUBEQ R3, R1, negInfNegInf
127 BR Normal
128yIsPosInf:
129 //special case Atan2(+Inf, +Inf) = Pi/4
130 MOVD $PosInf, R3
131 CMPUBEQ R3, R1, posInfPosInf
132
133 //special case Atan2(-Inf, +Inf) = -Pi/4
134 MOVD $NegInf, R3
135 CMPUBEQ R3, R1, negInfPosInf
136
137 //special case Atan2(x, +Inf) = Copysign(0, x)
138 CMPBLT R1, $0, returnNegZero
139 BR returnPosZero
140
141Normal:
142 FMOVD x+0(FP), F0
143 FMOVD y+8(FP), F2
144 MOVD $·atan2rodataL25<>+0(SB), R9
145 LGDR F0, R2
146 LGDR F2, R1
147 RISBGNZ $32, $63, $32, R2, R2
148 RISBGNZ $32, $63, $32, R1, R1
149 WORD $0xB9170032 //llgtr %r3,%r2
150 RISBGZ $63, $63, $33, R2, R5
151 WORD $0xB9170041 //llgtr %r4,%r1
152 WFLCDB V0, V20
153 MOVW R4, R6
154 MOVW R3, R7
155 CMPUBLT R6, R7, L17
156 WFDDB V2, V0, V3
157 ADDW $2, R5, R2
158 MOVW R4, R6
159 MOVW R3, R7
160 CMPUBLE R6, R7, L20
161L3:
162 WFMDB V3, V3, V4
163 VLEG $0, 152(R9), V18
164 VLEG $0, 144(R9), V16
165 FMOVD 136(R9), F1
166 FMOVD 128(R9), F5
167 FMOVD 120(R9), F6
168 WFMADB V4, V16, V5, V16
169 WFMADB V4, V6, V1, V6
170 FMOVD 112(R9), F7
171 WFMDB V4, V4, V1
172 WFMADB V4, V7, V18, V7
173 VLEG $0, 104(R9), V18
174 WFMADB V1, V6, V16, V6
175 CMPWU R4, R3
176 FMOVD 96(R9), F5
177 VLEG $0, 88(R9), V16
178 WFMADB V4, V5, V18, V5
179 VLEG $0, 80(R9), V18
180 VLEG $0, 72(R9), V22
181 WFMADB V4, V16, V18, V16
182 VLEG $0, 64(R9), V18
183 WFMADB V1, V7, V5, V7
184 WFMADB V4, V18, V22, V18
185 WFMDB V1, V1, V5
186 WFMADB V1, V16, V18, V16
187 VLEG $0, 56(R9), V18
188 WFMADB V5, V6, V7, V6
189 VLEG $0, 48(R9), V22
190 FMOVD 40(R9), F7
191 WFMADB V4, V7, V18, V7
192 VLEG $0, 32(R9), V18
193 WFMADB V5, V6, V16, V6
194 WFMADB V4, V18, V22, V18
195 VLEG $0, 24(R9), V16
196 WFMADB V1, V7, V18, V7
197 VLEG $0, 16(R9), V18
198 VLEG $0, 8(R9), V22
199 WFMADB V4, V18, V16, V18
200 VLEG $0, 0(R9), V16
201 WFMADB V5, V6, V7, V6
202 WFMADB V4, V16, V22, V16
203 FMUL F3, F4
204 WFMADB V1, V18, V16, V1
205 FMADD F6, F5, F1
206 WFMADB V4, V1, V3, V4
207 BLT L18
208 BGT L7
209 LTDBR F2, F2
210 BLTU L21
211L8:
212 LTDBR F0, F0
213 BLTU L22
214L9:
215 WFCHDBS V2, V0, V0
216 BNE L18
217L7:
218 MOVW R1, R6
219 CMPBGE R6, $0, L1
220L18:
221 RISBGZ $58, $60, $3, R2, R2
222 MOVD $·atan2xpi2h<>+0(SB), R1
223 MOVD ·atan2xpim<>+0(SB), R3
224 LDGR R3, F0
225 WORD $0xED021000 //madb %f4,%f0,0(%r2,%r1)
226 BYTE $0x40
227 BYTE $0x1E
228L1:
229 FMOVD F4, ret+16(FP)
230 RET
231
232L20:
233 LTDBR F2, F2
234 BLTU L23
235 FMOVD F2, F6
236L4:
237 LTDBR F0, F0
238 BLTU L24
239 FMOVD F0, F4
240L5:
241 WFCHDBS V6, V4, V4
242 BEQ L3
243L17:
244 WFDDB V0, V2, V4
245 BYTE $0x18 //lr %r2,%r5
246 BYTE $0x25
247 WORD $0xB3130034 //lcdbr %f3,%f4
248 BR L3
249L23:
250 WORD $0xB3130062 //lcdbr %f6,%f2
251 BR L4
252L22:
253 VLR V20, V0
254 BR L9
255L21:
256 WORD $0xB3130022 //lcdbr %f2,%f2
257 BR L8
258L24:
259 VLR V20, V4
260 BR L5
261returnX: //the result is same as the first argument
262 MOVD R1, ret+16(FP)
263 RET
264returnY: //the result is same as the second argument
265 MOVD R2, ret+16(FP)
266 RET
267returnPi:
268 MOVD $Pi, R1
269 MOVD R1, ret+16(FP)
270 RET
271returnNegPi:
272 MOVD $NegPi, R1
273 MOVD R1, ret+16(FP)
274 RET
275posInfNegInf:
276 MOVD $Pi3Div4, R1
277 MOVD R1, ret+16(FP)
278 RET
279negInfNegInf:
280 MOVD $NegPi3Div4, R1
281 MOVD R1, ret+16(FP)
282 RET
283posInfPosInf:
284 MOVD $PiDiv4, R1
285 MOVD R1, ret+16(FP)
286 RET
287negInfPosInf:
288 MOVD $NegPiDiv4, R1
289 MOVD R1, ret+16(FP)
290 RET
291returnNegZero:
292 MOVD $NegZero, R1
293 MOVD R1, ret+16(FP)
294 RET
295returnPosZero:
296 MOVD $0, ret+16(FP)
297 RET
View as plain text