...

Text file src/math/exp_loong64.s

Documentation: math

     1// Copyright 2025 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5#include "textflag.h"
     6
     7#define NearZero	0x3e30000000000000	// 2**-28
     8#define PosInf		0x7ff0000000000000
     9#define FracMask	0x000fffffffffffff
    10#define C1		0x3cb0000000000000	// 2**-52
    11
    12DATA exprodata<>+0(SB)/8, $0.0
    13DATA exprodata<>+8(SB)/8, $0.5
    14DATA exprodata<>+16(SB)/8, $1.0
    15DATA exprodata<>+24(SB)/8, $2.0
    16DATA exprodata<>+32(SB)/8, $6.93147180369123816490e-01	// Ln2Hi
    17DATA exprodata<>+40(SB)/8, $1.90821492927058770002e-10	// Ln2Lo
    18DATA exprodata<>+48(SB)/8, $1.44269504088896338700e+00	// Log2e
    19DATA exprodata<>+56(SB)/8, $7.09782712893383973096e+02	// Overflow
    20DATA exprodata<>+64(SB)/8, $-7.45133219101941108420e+02	// Underflow
    21DATA exprodata<>+72(SB)/8, $1.0239999999999999e+03	// Overflow2
    22DATA exprodata<>+80(SB)/8, $-1.0740e+03			// Underflow2
    23DATA exprodata<>+88(SB)/8, $3.7252902984619141e-09	// NearZero
    24GLOBL exprodata<>+0(SB), NOPTR|RODATA, $96
    25
    26DATA expmultirodata<>+0(SB)/8, $1.66666666666666657415e-01	// P1
    27DATA expmultirodata<>+8(SB)/8, $-2.77777777770155933842e-03	// P2
    28DATA expmultirodata<>+16(SB)/8, $6.61375632143793436117e-05	// P3
    29DATA expmultirodata<>+24(SB)/8, $-1.65339022054652515390e-06	// P4
    30DATA expmultirodata<>+32(SB)/8, $4.13813679705723846039e-08	// P5
    31GLOBL expmultirodata<>+0(SB), NOPTR|RODATA, $40
    32
    33// Exp returns e**x, the base-e exponential of x.
    34// This is an assembly implementation of the method used for function Exp in file exp.go.
    35//
    36// func Exp(x float64) float64
    37TEXT ·archExp(SB),$0-16
    38	MOVD	x+0(FP), F0	// F0 = x
    39
    40	MOVV	$exprodata<>+0(SB), R10
    41	MOVD	56(R10), F1	// Overflow
    42	MOVD	64(R10), F2	// Underflow
    43	MOVD	88(R10), F3	// NearZero
    44	MOVD	16(R10), F17	// 1.0
    45
    46	CMPEQD	F0, F0, FCC0
    47	BFPF	isNaN		// x = NaN, return NaN
    48
    49	CMPGTD	F0, F1, FCC0
    50	BFPT	overflow	// x > Overflow, return PosInf
    51
    52	CMPGTD	F2, F0, FCC0
    53	BFPT	underflow	// x < Underflow, return 0
    54
    55	ABSD	F0, F5
    56	CMPGTD	F3, F5, FCC0
    57	BFPT	nearzero	// fabs(x) < NearZero, return 1 + x
    58
    59	// argument reduction, x = k*ln2 + r,  |r| <= 0.5*ln2
    60	// computed as r = hi - lo for extra precision.
    61	MOVD	0(R10), F5
    62	MOVD	8(R10), F3
    63	MOVD	48(R10), F2
    64	CMPGTD	F0, F5, FCC0
    65	BFPT	add		// x > 0
    66sub:
    67	FMSUBD	F3, F2, F0, F3	// Log2e*x - 0.5
    68	JMP	2(PC)
    69add:
    70	FMADDD	F3, F2, F0, F3	// Log2e*x + 0.5
    71
    72	FTINTRZVD F3, F4	// float64 -> int64
    73	MOVV	F4, R5		// R5 = int(k)
    74	FFINTDV	F4, F3		// int64 -> float64
    75
    76	MOVD	32(R10), F4
    77	MOVD	40(R10), F5
    78	FNMSUBD	F0, F3, F4, F4
    79	MULD	F3, F5, F5
    80	SUBD	F5, F4, F6
    81	MULD	F6, F6, F7
    82
    83	// compute c
    84	MOVV	$expmultirodata<>+0(SB), R11
    85	MOVD	32(R11), F8
    86	MOVD	24(R11), F9
    87	FMADDD	F9, F8, F7, F13
    88	MOVD	16(R11), F10
    89	FMADDD	F10, F13, F7, F13
    90	MOVD	8(R11), F11
    91	FMADDD	F11, F13, F7, F13
    92	MOVD	0(R11), F12
    93	FMADDD	F12, F13, F7, F13
    94	FNMSUBD	F6, F13, F7, F13
    95
    96	// compute y
    97	MOVD	24(R10), F14
    98	SUBD	F13, F14, F14
    99	MULD	F6, F13, F15
   100	DIVD	F14, F15, F15
   101	SUBD	F15, F5, F15
   102	SUBD	F4, F15, F15
   103	SUBD	F15, F17, F16
   104
   105	// inline Ldexp(y, k), benefit:
   106	// 1, no parameter pass overhead.
   107	// 2, skip unnecessary checks for Inf/NaN/Zero
   108	MOVV	F16, R4
   109	MOVV	$FracMask, R9
   110	AND	R9, R4, R6	// fraction
   111	SRLV	$52, R4, R7	// exponent
   112	ADDV	R5, R7
   113	MOVV	$1, R12
   114	BGE	R7, R12, normal
   115	ADDV	$52, R7		// denormal
   116	MOVV	$C1, R8
   117	MOVV	R8, F17
   118normal:
   119	SLLV	$52, R7
   120	OR	R7, R6, R4
   121	MOVV	R4, F0
   122	MULD	F17, F0		// return m * x
   123	MOVD	F0, ret+8(FP)
   124	RET
   125nearzero:
   126	ADDD	F17, F0, F0
   127isNaN:
   128	MOVD	F0, ret+8(FP)
   129	RET
   130underflow:
   131	MOVV	R0, ret+8(FP)
   132	RET
   133overflow:
   134	MOVV	$PosInf, R4
   135	MOVV	R4, ret+8(FP)
   136	RET
   137
   138
   139// Exp2 returns 2**x, the base-2 exponential of x.
   140// This is an assembly implementation of the method used for function Exp2 in file exp.go.
   141//
   142// func Exp2(x float64) float64
   143TEXT ·archExp2(SB),$0-16
   144	MOVD	x+0(FP), F0	// F0 = x
   145
   146	MOVV	$exprodata<>+0(SB), R10
   147	MOVD	72(R10), F1	// Overflow2
   148	MOVD	80(R10), F2	// Underflow2
   149	MOVD	88(R10), F3	// NearZero
   150
   151	CMPEQD	F0, F0, FCC0
   152	BFPF	isNaN		// x = NaN, return NaN
   153
   154	CMPGTD	F0, F1, FCC0
   155	BFPT	overflow	// x > Overflow, return PosInf
   156
   157	CMPGTD	F2, F0, FCC0
   158	BFPT	underflow	// x < Underflow, return 0
   159
   160	// argument reduction; x = r*lg(e) + k with |r| <= ln(2)/2
   161	// computed as r = hi - lo for extra precision.
   162	MOVD	0(R10), F10
   163	MOVD	8(R10), F2
   164	CMPGTD	F0, F10, FCC0
   165	BFPT	add
   166sub:
   167	SUBD	F2, F0, F3	// x - 0.5
   168	JMP	2(PC)
   169add:
   170	ADDD	F2, F0, F3	// x + 0.5
   171
   172	FTINTRZVD F3, F4
   173	MOVV	F4, R5
   174	FFINTDV	F4, F3
   175
   176	MOVD	32(R10), F4
   177	MOVD	40(R10), F5
   178	SUBD	F3, F0, F3
   179	MULD	F3, F4
   180	FNMSUBD	F10, F3, F5, F5
   181	SUBD	F5, F4, F6
   182	MULD	F6, F6, F7
   183
   184	// compute c
   185	MOVV	$expmultirodata<>+0(SB), R11
   186	MOVD	32(R11), F8
   187	MOVD	24(R11), F9
   188	FMADDD	F9, F8, F7, F13
   189	MOVD	16(R11), F10
   190	FMADDD	F10, F13, F7, F13
   191	MOVD	8(R11), F11
   192	FMADDD	F11, F13, F7, F13
   193	MOVD	0(R11), F12
   194	FMADDD	F12, F13, F7, F13
   195	FNMSUBD	F6, F13, F7, F13
   196
   197	// compute y
   198	MOVD	24(R10), F14
   199	SUBD	F13, F14, F14
   200	MULD	F6, F13, F15
   201	DIVD	F14, F15
   202
   203	MOVD	16(R10), F17
   204	SUBD	F15, F5, F15
   205	SUBD	F4, F15, F15
   206	SUBD	F15, F17, F16
   207
   208	// inline Ldexp(y, k), benefit:
   209	// 1, no parameter pass overhead.
   210	// 2, skip unnecessary checks for Inf/NaN/Zero
   211	MOVV	F16, R4
   212	MOVV	$FracMask, R9
   213	SRLV	$52, R4, R7	// exponent
   214	AND	R9, R4, R6	// fraction
   215	ADDV	R5, R7
   216	MOVV	$1, R12
   217	BGE	R7, R12, normal
   218
   219	ADDV	$52, R7		// denormal
   220	MOVV	$C1, R8
   221	MOVV	R8, F17
   222normal:
   223	SLLV	$52, R7
   224	OR	R7, R6, R4
   225	MOVV	R4, F0
   226	MULD	F17, F0
   227isNaN:
   228	MOVD	F0, ret+8(FP)
   229	RET
   230underflow:
   231	MOVV	R0, ret+8(FP)
   232	RET
   233overflow:
   234	MOVV	$PosInf, R4
   235	MOVV	R4, ret+8(FP)
   236	RET

View as plain text