...

Text file src/crypto/internal/fips140/sha512/sha512block_loong64.s

Documentation: crypto/internal/fips140/sha512

     1// Copyright 2024 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5//go:build !purego
     6
     7#include "textflag.h"
     8
     9// SHA512 block routine. See sha512block.go for Go equivalent.
    10
    11#define REGTMP	R30
    12#define REGTMP1	R16
    13#define REGTMP2	R17
    14#define REGTMP3	R18
    15#define REGTMP4	R7
    16#define REGTMP5	R6
    17
    18// W[i] = M[i]; for 0 <= i <= 15
    19#define LOAD0(index) \
    20	MOVV	(index*8)(R5), REGTMP4; \
    21	WORD	$0x3ce7; \	//REVBV	REGTMP4, REGTMP4
    22	MOVV	REGTMP4, (index*8)(R3)
    23
    24// W[i] = SIGMA1(W[i-2]) + W[i-7] + SIGMA0(W[i-15]) + W[i-16]; for 16 <= i <= 79
    25//   SIGMA0(x) = ROTR(1,x) XOR ROTR(8,x) XOR SHR(7,x)
    26//   SIGMA1(x) = ROTR(19,x) XOR ROTR(61,x) XOR SHR(6,x)
    27#define LOAD1(index) \
    28	MOVV	(((index-2)&0xf)*8)(R3), REGTMP4; \
    29	MOVV	(((index-15)&0xf)*8)(R3), REGTMP1; \
    30	MOVV	(((index-7)&0xf)*8)(R3), REGTMP; \
    31	MOVV	REGTMP4, REGTMP2; \
    32	MOVV	REGTMP4, REGTMP3; \
    33	ROTRV	$19, REGTMP4; \
    34	ROTRV	$61, REGTMP2; \
    35	SRLV	$6, REGTMP3; \
    36	XOR	REGTMP2, REGTMP4; \
    37	XOR	REGTMP3, REGTMP4; \
    38	ROTRV	$1, REGTMP1, REGTMP5; \
    39	SRLV	$7, REGTMP1, REGTMP2; \
    40	ROTRV	$8, REGTMP1; \
    41	ADDV	REGTMP, REGTMP4; \
    42	MOVV	(((index-16)&0xf)*8)(R3), REGTMP; \
    43	XOR	REGTMP1, REGTMP5; \
    44	XOR	REGTMP2, REGTMP5; \
    45	ADDV	REGTMP, REGTMP5; \
    46	ADDV	REGTMP5, REGTMP4; \
    47	MOVV	REGTMP4, ((index&0xf)*8)(R3)
    48
    49// h is also used as an accumulator. Wt is passed in REGTMP4.
    50//   T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + K[i] + W[i]
    51//     BIGSIGMA1(x) = ROTR(14,x) XOR ROTR(18,x) XOR ROTR(41,x)
    52//     Ch(x, y, z) = (x AND y) XOR (NOT x AND z)
    53// Calculate T1 in REGTMP4
    54#define SHA512T1(const, e, f, g, h) \
    55	ADDV	$const, h; \
    56	ADDV	REGTMP4, h; \
    57	ROTRV	$14, e, REGTMP4; \
    58	ROTRV	$18, e, REGTMP; \
    59	ROTRV	$41, e, REGTMP3; \
    60	AND	f, e, REGTMP2; \
    61	XOR	REGTMP, REGTMP4; \
    62	MOVV	$0xffffffffffffffff, REGTMP; \
    63	XOR	REGTMP4, REGTMP3; \
    64	XOR	REGTMP, e, REGTMP5; \
    65	ADDV	REGTMP3, h; \
    66	AND	g, REGTMP5; \
    67	XOR	REGTMP2, REGTMP5; \
    68	ADDV	h, REGTMP5, REGTMP4
    69
    70// T2 = BIGSIGMA0(a) + Maj(a, b, c)
    71// BIGSIGMA0(x) = ROTR(28,x) XOR ROTR(34,x) XOR ROTR(39,x)
    72// Maj(x, y, z) = (x AND y) XOR (x AND z) XOR (y AND z)
    73// Calculate T2 in REGTMP1
    74#define SHA512T2(a, b, c) \
    75	ROTRV	$28, a, REGTMP5; \
    76	AND	b, c, REGTMP1; \
    77	ROTRV	$34, a, REGTMP3; \
    78	AND	c, a, REGTMP; \
    79	XOR	REGTMP3, REGTMP5; \
    80	XOR	REGTMP, REGTMP1; \
    81	ROTRV	$39, a, REGTMP2; \
    82	AND	a, b, REGTMP3; \
    83	XOR	REGTMP3, REGTMP1; \
    84	XOR	REGTMP2, REGTMP5; \
    85	ADDV	REGTMP5, REGTMP1
    86
    87// Calculate T1 and T2, then e = d + T1 and a = T1 + T2.
    88// The values for e and a are stored in d and h, ready for rotation.
    89#define SHA512ROUND(const, a, b, c, d, e, f, g, h) \
    90	SHA512T1(const, e, f, g, h); \
    91	SHA512T2(a, b, c); \
    92	ADDV	REGTMP4, d; \
    93	ADDV	REGTMP1, REGTMP4, h
    94
    95#define SHA512ROUND0(index, const, a, b, c, d, e, f, g, h) \
    96	LOAD0(index); \
    97	SHA512ROUND(const, a, b, c, d, e, f, g, h)
    98
    99#define SHA512ROUND1(index, const, a, b, c, d, e, f, g, h) \
   100	LOAD1(index); \
   101	SHA512ROUND(const, a, b, c, d, e, f, g, h)
   102
   103// A stack frame size of 128 bytes is required here, because
   104// the frame size used for data expansion is 128 bytes.
   105// See the definition of the macro LOAD1 above (8 bytes * 16 entries).
   106//
   107// func block(dig *Digest, p []byte)
   108TEXT ·block(SB),NOSPLIT,$128-32
   109	MOVV	p_len+16(FP), R6
   110	MOVV	p_base+8(FP), R5
   111	AND	$~127, R6
   112	BEQ	R6, end
   113
   114	// p_len >= 128
   115	MOVV	dig+0(FP), R4
   116	ADDV	R5, R6, R25
   117	MOVV	(0*8)(R4), R8	// a = H0
   118	MOVV	(1*8)(R4), R9	// b = H1
   119	MOVV	(2*8)(R4), R10	// c = H2
   120	MOVV	(3*8)(R4), R11	// d = H3
   121	MOVV	(4*8)(R4), R12	// e = H4
   122	MOVV	(5*8)(R4), R13	// f = H5
   123	MOVV	(6*8)(R4), R14	// g = H6
   124	MOVV	(7*8)(R4), R15	// h = H7
   125
   126loop:
   127	SHA512ROUND0( 0, 0x428a2f98d728ae22, R8,  R9,  R10, R11, R12, R13, R14, R15)
   128	SHA512ROUND0( 1, 0x7137449123ef65cd, R15, R8,  R9,  R10, R11, R12, R13, R14)
   129	SHA512ROUND0( 2, 0xb5c0fbcfec4d3b2f, R14, R15, R8,  R9,  R10, R11, R12, R13)
   130	SHA512ROUND0( 3, 0xe9b5dba58189dbbc, R13, R14, R15, R8,  R9,  R10, R11, R12)
   131	SHA512ROUND0( 4, 0x3956c25bf348b538, R12, R13, R14, R15, R8,  R9,  R10, R11)
   132	SHA512ROUND0( 5, 0x59f111f1b605d019, R11, R12, R13, R14, R15, R8,  R9,  R10)
   133	SHA512ROUND0( 6, 0x923f82a4af194f9b, R10, R11, R12, R13, R14, R15, R8,  R9)
   134	SHA512ROUND0( 7, 0xab1c5ed5da6d8118, R9,  R10, R11, R12, R13, R14, R15, R8)
   135	SHA512ROUND0( 8, 0xd807aa98a3030242, R8,  R9,  R10, R11, R12, R13, R14, R15)
   136	SHA512ROUND0( 9, 0x12835b0145706fbe, R15, R8,  R9,  R10, R11, R12, R13, R14)
   137	SHA512ROUND0(10, 0x243185be4ee4b28c, R14, R15, R8,  R9,  R10, R11, R12, R13)
   138	SHA512ROUND0(11, 0x550c7dc3d5ffb4e2, R13, R14, R15, R8,  R9,  R10, R11, R12)
   139	SHA512ROUND0(12, 0x72be5d74f27b896f, R12, R13, R14, R15, R8,  R9,  R10, R11)
   140	SHA512ROUND0(13, 0x80deb1fe3b1696b1, R11, R12, R13, R14, R15, R8,  R9,  R10)
   141	SHA512ROUND0(14, 0x9bdc06a725c71235, R10, R11, R12, R13, R14, R15, R8,  R9)
   142	SHA512ROUND0(15, 0xc19bf174cf692694, R9,  R10, R11, R12, R13, R14, R15, R8)
   143
   144	SHA512ROUND1(16, 0xe49b69c19ef14ad2, R8,  R9,  R10, R11, R12, R13, R14, R15)
   145	SHA512ROUND1(17, 0xefbe4786384f25e3, R15, R8,  R9,  R10, R11, R12, R13, R14)
   146	SHA512ROUND1(18, 0x0fc19dc68b8cd5b5, R14, R15, R8,  R9,  R10, R11, R12, R13)
   147	SHA512ROUND1(19, 0x240ca1cc77ac9c65, R13, R14, R15, R8,  R9,  R10, R11, R12)
   148	SHA512ROUND1(20, 0x2de92c6f592b0275, R12, R13, R14, R15, R8,  R9,  R10, R11)
   149	SHA512ROUND1(21, 0x4a7484aa6ea6e483, R11, R12, R13, R14, R15, R8,  R9,  R10)
   150	SHA512ROUND1(22, 0x5cb0a9dcbd41fbd4, R10, R11, R12, R13, R14, R15, R8,  R9)
   151	SHA512ROUND1(23, 0x76f988da831153b5, R9,  R10, R11, R12, R13, R14, R15, R8)
   152	SHA512ROUND1(24, 0x983e5152ee66dfab, R8,  R9,  R10, R11, R12, R13, R14, R15)
   153	SHA512ROUND1(25, 0xa831c66d2db43210, R15, R8,  R9,  R10, R11, R12, R13, R14)
   154	SHA512ROUND1(26, 0xb00327c898fb213f, R14, R15, R8,  R9,  R10, R11, R12, R13)
   155	SHA512ROUND1(27, 0xbf597fc7beef0ee4, R13, R14, R15, R8,  R9,  R10, R11, R12)
   156	SHA512ROUND1(28, 0xc6e00bf33da88fc2, R12, R13, R14, R15, R8,  R9,  R10, R11)
   157	SHA512ROUND1(29, 0xd5a79147930aa725, R11, R12, R13, R14, R15, R8,  R9,  R10)
   158	SHA512ROUND1(30, 0x06ca6351e003826f, R10, R11, R12, R13, R14, R15, R8,  R9)
   159	SHA512ROUND1(31, 0x142929670a0e6e70, R9,  R10, R11, R12, R13, R14, R15, R8)
   160	SHA512ROUND1(32, 0x27b70a8546d22ffc, R8,  R9,  R10, R11, R12, R13, R14, R15)
   161	SHA512ROUND1(33, 0x2e1b21385c26c926, R15, R8,  R9,  R10, R11, R12, R13, R14)
   162	SHA512ROUND1(34, 0x4d2c6dfc5ac42aed, R14, R15, R8,  R9,  R10, R11, R12, R13)
   163	SHA512ROUND1(35, 0x53380d139d95b3df, R13, R14, R15, R8,  R9,  R10, R11, R12)
   164	SHA512ROUND1(36, 0x650a73548baf63de, R12, R13, R14, R15, R8,  R9,  R10, R11)
   165	SHA512ROUND1(37, 0x766a0abb3c77b2a8, R11, R12, R13, R14, R15, R8,  R9,  R10)
   166	SHA512ROUND1(38, 0x81c2c92e47edaee6, R10, R11, R12, R13, R14, R15, R8,  R9)
   167	SHA512ROUND1(39, 0x92722c851482353b, R9,  R10, R11, R12, R13, R14, R15, R8)
   168	SHA512ROUND1(40, 0xa2bfe8a14cf10364, R8,  R9,  R10, R11, R12, R13, R14, R15)
   169	SHA512ROUND1(41, 0xa81a664bbc423001, R15, R8,  R9,  R10, R11, R12, R13, R14)
   170	SHA512ROUND1(42, 0xc24b8b70d0f89791, R14, R15, R8,  R9,  R10, R11, R12, R13)
   171	SHA512ROUND1(43, 0xc76c51a30654be30, R13, R14, R15, R8,  R9,  R10, R11, R12)
   172	SHA512ROUND1(44, 0xd192e819d6ef5218, R12, R13, R14, R15, R8,  R9,  R10, R11)
   173	SHA512ROUND1(45, 0xd69906245565a910, R11, R12, R13, R14, R15, R8,  R9,  R10)
   174	SHA512ROUND1(46, 0xf40e35855771202a, R10, R11, R12, R13, R14, R15, R8,  R9)
   175	SHA512ROUND1(47, 0x106aa07032bbd1b8, R9,  R10, R11, R12, R13, R14, R15, R8)
   176	SHA512ROUND1(48, 0x19a4c116b8d2d0c8, R8,  R9,  R10, R11, R12, R13, R14, R15)
   177	SHA512ROUND1(49, 0x1e376c085141ab53, R15, R8,  R9,  R10, R11, R12, R13, R14)
   178	SHA512ROUND1(50, 0x2748774cdf8eeb99, R14, R15, R8,  R9,  R10, R11, R12, R13)
   179	SHA512ROUND1(51, 0x34b0bcb5e19b48a8, R13, R14, R15, R8,  R9,  R10, R11, R12)
   180	SHA512ROUND1(52, 0x391c0cb3c5c95a63, R12, R13, R14, R15, R8,  R9,  R10, R11)
   181	SHA512ROUND1(53, 0x4ed8aa4ae3418acb, R11, R12, R13, R14, R15, R8,  R9,  R10)
   182	SHA512ROUND1(54, 0x5b9cca4f7763e373, R10, R11, R12, R13, R14, R15, R8,  R9)
   183	SHA512ROUND1(55, 0x682e6ff3d6b2b8a3, R9,  R10, R11, R12, R13, R14, R15, R8)
   184	SHA512ROUND1(56, 0x748f82ee5defb2fc, R8,  R9,  R10, R11, R12, R13, R14, R15)
   185	SHA512ROUND1(57, 0x78a5636f43172f60, R15, R8,  R9,  R10, R11, R12, R13, R14)
   186	SHA512ROUND1(58, 0x84c87814a1f0ab72, R14, R15, R8,  R9,  R10, R11, R12, R13)
   187	SHA512ROUND1(59, 0x8cc702081a6439ec, R13, R14, R15, R8,  R9,  R10, R11, R12)
   188	SHA512ROUND1(60, 0x90befffa23631e28, R12, R13, R14, R15, R8,  R9,  R10, R11)
   189	SHA512ROUND1(61, 0xa4506cebde82bde9, R11, R12, R13, R14, R15, R8,  R9,  R10)
   190	SHA512ROUND1(62, 0xbef9a3f7b2c67915, R10, R11, R12, R13, R14, R15, R8,  R9)
   191	SHA512ROUND1(63, 0xc67178f2e372532b, R9,  R10, R11, R12, R13, R14, R15, R8)
   192	SHA512ROUND1(64, 0xca273eceea26619c, R8,  R9,  R10, R11, R12, R13, R14, R15)
   193	SHA512ROUND1(65, 0xd186b8c721c0c207, R15, R8,  R9,  R10, R11, R12, R13, R14)
   194	SHA512ROUND1(66, 0xeada7dd6cde0eb1e, R14, R15, R8,  R9,  R10, R11, R12, R13)
   195	SHA512ROUND1(67, 0xf57d4f7fee6ed178, R13, R14, R15, R8,  R9,  R10, R11, R12)
   196	SHA512ROUND1(68, 0x06f067aa72176fba, R12, R13, R14, R15, R8,  R9,  R10, R11)
   197	SHA512ROUND1(69, 0x0a637dc5a2c898a6, R11, R12, R13, R14, R15, R8,  R9,  R10)
   198	SHA512ROUND1(70, 0x113f9804bef90dae, R10, R11, R12, R13, R14, R15, R8,  R9)
   199	SHA512ROUND1(71, 0x1b710b35131c471b, R9,  R10, R11, R12, R13, R14, R15, R8)
   200	SHA512ROUND1(72, 0x28db77f523047d84, R8,  R9,  R10, R11, R12, R13, R14, R15)
   201	SHA512ROUND1(73, 0x32caab7b40c72493, R15, R8,  R9,  R10, R11, R12, R13, R14)
   202	SHA512ROUND1(74, 0x3c9ebe0a15c9bebc, R14, R15, R8,  R9,  R10, R11, R12, R13)
   203	SHA512ROUND1(75, 0x431d67c49c100d4c, R13, R14, R15, R8,  R9,  R10, R11, R12)
   204	SHA512ROUND1(76, 0x4cc5d4becb3e42b6, R12, R13, R14, R15, R8,  R9,  R10, R11)
   205	SHA512ROUND1(77, 0x597f299cfc657e2a, R11, R12, R13, R14, R15, R8,  R9,  R10)
   206	SHA512ROUND1(78, 0x5fcb6fab3ad6faec, R10, R11, R12, R13, R14, R15, R8,  R9)
   207	SHA512ROUND1(79, 0x6c44198c4a475817, R9,  R10, R11, R12, R13, R14, R15, R8)
   208
   209	MOVV	(0*8)(R4), REGTMP
   210	MOVV	(1*8)(R4), REGTMP1
   211	MOVV	(2*8)(R4), REGTMP2
   212	MOVV	(3*8)(R4), REGTMP3
   213	ADDV	REGTMP, R8	// H0 = a + H0
   214	ADDV	REGTMP1, R9	// H1 = b + H1
   215	ADDV	REGTMP2, R10	// H2 = c + H2
   216	ADDV	REGTMP3, R11	// H3 = d + H3
   217	MOVV	R8, (0*8)(R4)
   218	MOVV	R9, (1*8)(R4)
   219	MOVV	R10, (2*8)(R4)
   220	MOVV	R11, (3*8)(R4)
   221	MOVV	(4*8)(R4), REGTMP
   222	MOVV	(5*8)(R4), REGTMP1
   223	MOVV	(6*8)(R4), REGTMP2
   224	MOVV	(7*8)(R4), REGTMP3
   225	ADDV	REGTMP, R12	// H4 = e + H4
   226	ADDV	REGTMP1, R13	// H5 = f + H5
   227	ADDV	REGTMP2, R14	// H6 = g + H6
   228	ADDV	REGTMP3, R15	// H7 = h + H7
   229	MOVV	R12, (4*8)(R4)
   230	MOVV	R13, (5*8)(R4)
   231	MOVV	R14, (6*8)(R4)
   232	MOVV	R15, (7*8)(R4)
   233
   234	ADDV	$128, R5
   235	BNE	R5, R25, loop
   236
   237end:
   238	RET

View as plain text