...

Text file src/math/atan2_s390x.s

Documentation: math

     1// Copyright 2017 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5#include "textflag.h"
     6
     7#define PosInf		0x7FF0000000000000
     8#define NegInf		0xFFF0000000000000
     9#define NegZero		0x8000000000000000
    10#define Pi		0x400921FB54442D18
    11#define NegPi		0xC00921FB54442D18
    12#define Pi3Div4		0x4002D97C7F3321D2	// 3Pi/4
    13#define NegPi3Div4	0xC002D97C7F3321D2	// -3Pi/4
    14#define PiDiv4		0x3FE921FB54442D18	// Pi/4
    15#define NegPiDiv4	0xBFE921FB54442D18	// -Pi/4
    16
    17// Minimax polynomial coefficients and other constants
    18DATA ·atan2rodataL25<> + 0(SB)/8, $0.199999999999554423E+00
    19DATA ·atan2rodataL25<> + 8(SB)/8, $-.333333333333330928E+00
    20DATA ·atan2rodataL25<> + 16(SB)/8, $0.111111110136634272E+00
    21DATA ·atan2rodataL25<> + 24(SB)/8, $-.142857142828026806E+00
    22DATA ·atan2rodataL25<> + 32(SB)/8, $0.769228118888682505E-01
    23DATA ·atan2rodataL25<> + 40(SB)/8, $0.588059263575587687E-01
    24DATA ·atan2rodataL25<> + 48(SB)/8, $-.909090711945939878E-01
    25DATA ·atan2rodataL25<> + 56(SB)/8, $-.666641501287528609E-01
    26DATA ·atan2rodataL25<> + 64(SB)/8, $0.472329433805024762E-01
    27DATA ·atan2rodataL25<> + 72(SB)/8, $-.525380587584426406E-01
    28DATA ·atan2rodataL25<> + 80(SB)/8, $-.422172007412067035E-01
    29DATA ·atan2rodataL25<> + 88(SB)/8, $0.366935664549587481E-01
    30DATA ·atan2rodataL25<> + 96(SB)/8, $0.220852012160300086E-01
    31DATA ·atan2rodataL25<> + 104(SB)/8, $-.299856214685512712E-01
    32DATA ·atan2rodataL25<> + 112(SB)/8, $0.726338160757602439E-02
    33DATA ·atan2rodataL25<> + 120(SB)/8, $0.134893651284712515E-04
    34DATA ·atan2rodataL25<> + 128(SB)/8, $-.291935324869629616E-02
    35DATA ·atan2rodataL25<> + 136(SB)/8, $-.154797890856877418E-03
    36DATA ·atan2rodataL25<> + 144(SB)/8, $0.843488472994227321E-03
    37DATA ·atan2rodataL25<> + 152(SB)/8, $-.139950258898989925E-01
    38GLOBL ·atan2rodataL25<> + 0(SB), RODATA, $160
    39
    40DATA ·atan2xpi2h<> + 0(SB)/8, $0x3ff330e4e4fa7b1b
    41DATA ·atan2xpi2h<> + 8(SB)/8, $0xbff330e4e4fa7b1b
    42DATA ·atan2xpi2h<> + 16(SB)/8, $0x400330e4e4fa7b1b
    43DATA ·atan2xpi2h<> + 24(SB)/8, $0xc00330e4e4fa7b1b
    44GLOBL ·atan2xpi2h<> + 0(SB), RODATA, $32
    45DATA ·atan2xpim<> + 0(SB)/8, $0x3ff4f42b00000000
    46GLOBL ·atan2xpim<> + 0(SB), RODATA, $8
    47
    48// Atan2 returns the arc tangent of y/x, using
    49// the signs of the two to determine the quadrant
    50// of the return value.
    51//
    52// Special cases are (in order):
    53//      Atan2(y, NaN) = NaN
    54//      Atan2(NaN, x) = NaN
    55//      Atan2(+0, x>=0) = +0
    56//      Atan2(-0, x>=0) = -0
    57//      Atan2(+0, x<=-0) = +Pi
    58//      Atan2(-0, x<=-0) = -Pi
    59//      Atan2(y>0, 0) = +Pi/2
    60//      Atan2(y<0, 0) = -Pi/2
    61//      Atan2(+Inf, +Inf) = +Pi/4
    62//      Atan2(-Inf, +Inf) = -Pi/4
    63//      Atan2(+Inf, -Inf) = 3Pi/4
    64//      Atan2(-Inf, -Inf) = -3Pi/4
    65//      Atan2(y, +Inf) = 0
    66//      Atan2(y>0, -Inf) = +Pi
    67//      Atan2(y<0, -Inf) = -Pi
    68//      Atan2(+Inf, x) = +Pi/2
    69//      Atan2(-Inf, x) = -Pi/2
    70// The algorithm used is minimax polynomial approximation
    71// with coefficients determined with a Remez exchange algorithm.
    72
    73TEXT	·atan2Asm(SB), NOSPLIT, $0-24
    74	// special case
    75	MOVD	x+0(FP), R1
    76	MOVD	y+8(FP), R2
    77
    78	// special case Atan2(NaN, y) = NaN
    79	MOVD	$~(1<<63), R5
    80	AND	R1, R5		// x = |x|
    81	MOVD	$PosInf, R3
    82	CMPUBLT	R3, R5, returnX
    83
    84	// special case Atan2(x, NaN) = NaN
    85	MOVD	$~(1<<63), R5
    86	AND	R2, R5
    87	CMPUBLT R3, R5, returnY
    88
    89	MOVD	$NegZero, R3
    90	CMPUBEQ	R3, R1, xIsNegZero
    91
    92	MOVD	$0, R3
    93	CMPUBEQ	R3, R1, xIsPosZero
    94
    95	MOVD	$PosInf, R4
    96	CMPUBEQ	R4, R2, yIsPosInf
    97
    98	MOVD	$NegInf, R4
    99	CMPUBEQ	R4, R2, yIsNegInf
   100	BR	Normal
   101xIsNegZero:
   102	// special case Atan(-0, y>=0) = -0
   103	MOVD	$0, R4
   104	CMPBLE	R4, R2, returnX
   105
   106	//special case Atan2(-0, y<=-0) = -Pi
   107	MOVD	$NegZero, R4
   108	CMPBGE	R4, R2, returnNegPi
   109	BR	Normal
   110xIsPosZero:
   111	//special case Atan2(0, 0) = 0
   112	MOVD	$0, R4
   113	CMPUBEQ	R4, R2, returnX
   114
   115	//special case Atan2(0, y<=-0) = Pi
   116	MOVD	$NegZero, R4
   117	CMPBGE	R4, R2, returnPi
   118	BR Normal
   119yIsNegInf:
   120	//special case Atan2(+Inf, -Inf) = 3Pi/4
   121	MOVD	$PosInf, R3
   122	CMPUBEQ	R3, R1, posInfNegInf
   123
   124	//special case Atan2(-Inf, -Inf) = -3Pi/4
   125	MOVD	$NegInf, R3
   126	CMPUBEQ	R3, R1, negInfNegInf
   127	BR Normal
   128yIsPosInf:
   129	//special case Atan2(+Inf, +Inf) = Pi/4
   130	MOVD	$PosInf, R3
   131	CMPUBEQ	R3, R1, posInfPosInf
   132
   133	//special case Atan2(-Inf, +Inf) = -Pi/4
   134	MOVD	$NegInf, R3
   135	CMPUBEQ	R3, R1, negInfPosInf
   136
   137	//special case Atan2(x, +Inf) = Copysign(0, x)
   138	CMPBLT	R1, $0, returnNegZero
   139	BR returnPosZero
   140
   141Normal:
   142	FMOVD	x+0(FP), F0
   143	FMOVD	y+8(FP), F2
   144	MOVD	$·atan2rodataL25<>+0(SB), R9
   145	LGDR	F0, R2
   146	LGDR	F2, R1
   147	RISBGNZ	$32, $63, $32, R2, R2
   148	RISBGNZ	$32, $63, $32, R1, R1
   149	WORD	$0xB9170032	//llgtr	%r3,%r2
   150	RISBGZ	$63, $63, $33, R2, R5
   151	WORD	$0xB9170041	//llgtr	%r4,%r1
   152	WFLCDB	V0, V20
   153	MOVW	R4, R6
   154	MOVW	R3, R7
   155	CMPUBLT	R6, R7, L17
   156	WFDDB	V2, V0, V3
   157	ADDW	$2, R5, R2
   158	MOVW	R4, R6
   159	MOVW	R3, R7
   160	CMPUBLE	R6, R7, L20
   161L3:
   162	WFMDB	V3, V3, V4
   163	VLEG	$0, 152(R9), V18
   164	VLEG	$0, 144(R9), V16
   165	FMOVD	136(R9), F1
   166	FMOVD	128(R9), F5
   167	FMOVD	120(R9), F6
   168	WFMADB	V4, V16, V5, V16
   169	WFMADB	V4, V6, V1, V6
   170	FMOVD	112(R9), F7
   171	WFMDB	V4, V4, V1
   172	WFMADB	V4, V7, V18, V7
   173	VLEG	$0, 104(R9), V18
   174	WFMADB	V1, V6, V16, V6
   175	CMPWU	R4, R3
   176	FMOVD	96(R9), F5
   177	VLEG	$0, 88(R9), V16
   178	WFMADB	V4, V5, V18, V5
   179	VLEG	$0, 80(R9), V18
   180	VLEG	$0, 72(R9), V22
   181	WFMADB	V4, V16, V18, V16
   182	VLEG	$0, 64(R9), V18
   183	WFMADB	V1, V7, V5, V7
   184	WFMADB	V4, V18, V22, V18
   185	WFMDB	V1, V1, V5
   186	WFMADB	V1, V16, V18, V16
   187	VLEG	$0, 56(R9), V18
   188	WFMADB	V5, V6, V7, V6
   189	VLEG	$0, 48(R9), V22
   190	FMOVD	40(R9), F7
   191	WFMADB	V4, V7, V18, V7
   192	VLEG	$0, 32(R9), V18
   193	WFMADB	V5, V6, V16, V6
   194	WFMADB	V4, V18, V22, V18
   195	VLEG	$0, 24(R9), V16
   196	WFMADB	V1, V7, V18, V7
   197	VLEG	$0, 16(R9), V18
   198	VLEG	$0, 8(R9), V22
   199	WFMADB	V4, V18, V16, V18
   200	VLEG	$0, 0(R9), V16
   201	WFMADB	V5, V6, V7, V6
   202	WFMADB	V4, V16, V22, V16
   203	FMUL	F3, F4
   204	WFMADB	V1, V18, V16, V1
   205	FMADD	F6, F5, F1
   206	WFMADB	V4, V1, V3, V4
   207	BLT	L18
   208	BGT	L7
   209	LTDBR	F2, F2
   210	BLTU	L21
   211L8:
   212	LTDBR	F0, F0
   213	BLTU	L22
   214L9:
   215	WFCHDBS	V2, V0, V0
   216	BNE	L18
   217L7:
   218	MOVW	R1, R6
   219	CMPBGE	R6, $0, L1
   220L18:
   221	RISBGZ	$58, $60, $3, R2, R2
   222	MOVD	$·atan2xpi2h<>+0(SB), R1
   223	MOVD	·atan2xpim<>+0(SB), R3
   224	LDGR	R3, F0
   225	WORD	$0xED021000	//madb	%f4,%f0,0(%r2,%r1)
   226	BYTE	$0x40
   227	BYTE	$0x1E
   228L1:
   229	FMOVD	F4, ret+16(FP)
   230	RET
   231
   232L20:
   233	LTDBR	F2, F2
   234	BLTU	L23
   235	FMOVD	F2, F6
   236L4:
   237	LTDBR	F0, F0
   238	BLTU	L24
   239	FMOVD	F0, F4
   240L5:
   241	WFCHDBS	V6, V4, V4
   242	BEQ	L3
   243L17:
   244	WFDDB	V0, V2, V4
   245	BYTE	$0x18	//lr	%r2,%r5
   246	BYTE	$0x25
   247	WORD	$0xB3130034	//lcdbr	%f3,%f4
   248	BR	L3
   249L23:
   250	WORD	$0xB3130062	//lcdbr	%f6,%f2
   251	BR	L4
   252L22:
   253	VLR	V20, V0
   254	BR	L9
   255L21:
   256	WORD	$0xB3130022	//lcdbr	%f2,%f2
   257	BR	L8
   258L24:
   259	VLR	V20, V4
   260	BR	L5
   261returnX:	//the result is same as the first argument
   262	MOVD	R1, ret+16(FP)
   263	RET
   264returnY:	//the result is same as the second argument
   265	MOVD	R2, ret+16(FP)
   266	RET
   267returnPi:
   268	MOVD	$Pi, R1
   269	MOVD	R1, ret+16(FP)
   270	RET
   271returnNegPi:
   272	MOVD	$NegPi, R1
   273	MOVD	R1, ret+16(FP)
   274	RET
   275posInfNegInf:
   276	MOVD	$Pi3Div4, R1
   277	MOVD	R1, ret+16(FP)
   278	RET
   279negInfNegInf:
   280	MOVD	$NegPi3Div4, R1
   281	MOVD	R1, ret+16(FP)
   282	RET
   283posInfPosInf:
   284	MOVD	$PiDiv4, R1
   285	MOVD	R1, ret+16(FP)
   286	RET
   287negInfPosInf:
   288	MOVD	$NegPiDiv4, R1
   289	MOVD	R1, ret+16(FP)
   290	RET
   291returnNegZero:
   292	MOVD	$NegZero, R1
   293	MOVD	R1, ret+16(FP)
   294	RET
   295returnPosZero:
   296	MOVD	$0, ret+16(FP)
   297	RET

View as plain text