...

Text file src/crypto/internal/fips140/sha256/sha256block_loong64.s

Documentation: crypto/internal/fips140/sha256

     1// Copyright 2024 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5//go:build !purego
     6
     7#include "textflag.h"
     8
     9// SHA256 block routine. See sha256block.go for Go equivalent.
    10//
    11// The algorithm is detailed in FIPS 180-4:
    12//
    13//  https://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf
    14//
    15// W[i] = M[i]; for 0 <= i <= 15
    16// W[i] = SIGMA1(W[i-2]) + W[i-7] + SIGMA0(W[i-15]) + W[i-16]; for 16 <= i <= 63
    17//
    18// a = H0
    19// b = H1
    20// c = H2
    21// d = H3
    22// e = H4
    23// f = H5
    24// g = H6
    25// h = H7
    26//
    27// for i = 0 to 63 {
    28//    T1 = h + BIGSIGMA1(e) + Ch(e,f,g) + K[i] + W[i]
    29//    T2 = BIGSIGMA0(a) + Maj(a,b,c)
    30//    h = g
    31//    g = f
    32//    f = e
    33//    e = d + T1
    34//    d = c
    35//    c = b
    36//    b = a
    37//    a = T1 + T2
    38// }
    39//
    40// H0 = a + H0
    41// H1 = b + H1
    42// H2 = c + H2
    43// H3 = d + H3
    44// H4 = e + H4
    45// H5 = f + H5
    46// H6 = g + H6
    47// H7 = h + H7
    48
    49#define REGTMP	R30
    50#define REGTMP1	R16
    51#define REGTMP2	R17
    52#define REGTMP3	R18
    53#define REGTMP4	R7
    54#define REGTMP5	R6
    55
    56// W[i] = M[i]; for 0 <= i <= 15
    57#define LOAD0(index) \
    58	MOVW	(index*4)(R5), REGTMP4; \
    59	WORD	$0x38e7; \	// REVB2W REGTMP4, REGTMP4 to big-endian
    60	MOVW	REGTMP4, (index*4)(R3)
    61
    62// W[i] = SIGMA1(W[i-2]) + W[i-7] + SIGMA0(W[i-15]) + W[i-16]; for 16 <= i <= 63
    63//   SIGMA0(x) = ROTR(7,x) XOR ROTR(18,x) XOR SHR(3,x)
    64//   SIGMA1(x) = ROTR(17,x) XOR ROTR(19,x) XOR SHR(10,x)
    65#define LOAD1(index) \
    66	MOVW	(((index-2)&0xf)*4)(R3), REGTMP4; \
    67	MOVW	(((index-15)&0xf)*4)(R3), REGTMP1; \
    68	MOVW	(((index-7)&0xf)*4)(R3), REGTMP; \
    69	MOVW	REGTMP4, REGTMP2; \
    70	MOVW	REGTMP4, REGTMP3; \
    71	ROTR	$17, REGTMP4; \
    72	ROTR	$19, REGTMP2; \
    73	SRL	$10, REGTMP3; \
    74	XOR	REGTMP2, REGTMP4; \
    75	XOR	REGTMP3, REGTMP4; \
    76	ROTR	$7, REGTMP1, REGTMP5; \
    77	SRL	$3, REGTMP1, REGTMP3; \
    78	ROTR	$18, REGTMP1, REGTMP2; \
    79	ADD	REGTMP, REGTMP4; \
    80	MOVW	(((index-16)&0xf)*4)(R3), REGTMP; \
    81	XOR	REGTMP3, REGTMP5; \
    82	XOR	REGTMP2, REGTMP5; \
    83	ADD	REGTMP, REGTMP5; \
    84	ADD	REGTMP5, REGTMP4; \
    85	MOVW	REGTMP4, ((index&0xf)*4)(R3)
    86
    87// T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + K[i] + W[i]
    88// BIGSIGMA1(x) = ROTR(6,x) XOR ROTR(11,x) XOR ROTR(25,x)
    89// Ch(x, y, z) = (x AND y) XOR (NOT x AND z)
    90// Calculate T1 in REGTMP4
    91#define SHA256T1(const, e, f, g, h) \
    92	ADDV	$const, h; \
    93	ADD	REGTMP4, h; \
    94	ROTR	$6, e, REGTMP4; \
    95	ROTR	$11, e, REGTMP; \
    96	ROTR	$25, e, REGTMP3; \
    97	AND	f, e, REGTMP2; \
    98	XOR	REGTMP, REGTMP4; \
    99	MOVV	$0xffffffff, REGTMP; \
   100	XOR	REGTMP4, REGTMP3; \
   101	XOR	REGTMP, e, REGTMP5; \
   102	ADD	REGTMP3, h; \
   103	AND	g, REGTMP5; \
   104	XOR	REGTMP2, REGTMP5; \
   105	ADD	h, REGTMP5, REGTMP4
   106
   107// T2 = BIGSIGMA0(a) + Maj(a, b, c)
   108// BIGSIGMA0(x) = ROTR(2,x) XOR ROTR(13,x) XOR ROTR(22,x)
   109// Maj(x, y, z) = (x AND y) XOR (x AND z) XOR (y AND z)
   110// Calculate T2 in REGTMP1
   111#define SHA256T2(a, b, c) \
   112	ROTR	$2, a, REGTMP5; \
   113	AND	b, c, REGTMP1; \
   114	ROTR	$13, a, REGTMP3; \
   115	AND	c, a, REGTMP; \
   116	XOR	REGTMP3, REGTMP5; \
   117	XOR	REGTMP, REGTMP1; \
   118	ROTR	$22, a, REGTMP2; \
   119	AND	a, b, REGTMP3; \
   120	XOR	REGTMP2, REGTMP5; \
   121	XOR	REGTMP3, REGTMP1; \
   122	ADD	REGTMP5, REGTMP1
   123
   124// Calculate T1 and T2, then e = d + T1 and a = T1 + T2.
   125// The values for e and a are stored in d and h, ready for rotation.
   126#define SHA256ROUND(const, a, b, c, d, e, f, g, h) \
   127	SHA256T1(const, e, f, g, h); \
   128	SHA256T2(a, b, c); \
   129	ADD	REGTMP4, d; \
   130	ADD	REGTMP1, REGTMP4, h
   131
   132#define SHA256ROUND0(index, const, a, b, c, d, e, f, g, h) \
   133	LOAD0(index); \
   134	SHA256ROUND(const, a, b, c, d, e, f, g, h)
   135
   136#define SHA256ROUND1(index, const, a, b, c, d, e, f, g, h) \
   137	LOAD1(index); \
   138	SHA256ROUND(const, a, b, c, d, e, f, g, h)
   139
   140// A stack frame size of 64 bytes is required here, because
   141// the frame size used for data expansion is 64 bytes.
   142// See the definition of the macro LOAD1 above (4 bytes * 16 entries).
   143//
   144//func block(dig *Digest, p []byte)
   145TEXT ·block(SB),NOSPLIT,$64-32
   146	MOVV	p_base+8(FP), R5
   147	MOVV	p_len+16(FP), R6
   148	AND	$~63, R6
   149	BEQ	R6, end
   150
   151	// p_len >= 64
   152	MOVV	dig+0(FP), R4
   153	ADDV	R5, R6, R25
   154	MOVW	(0*4)(R4), R8	// a = H0
   155	MOVW	(1*4)(R4), R9	// b = H1
   156	MOVW	(2*4)(R4), R10	// c = H2
   157	MOVW	(3*4)(R4), R11	// d = H3
   158	MOVW	(4*4)(R4), R12	// e = H4
   159	MOVW	(5*4)(R4), R13	// f = H5
   160	MOVW	(6*4)(R4), R14	// g = H6
   161	MOVW	(7*4)(R4), R15	// h = H7
   162
   163loop:
   164	SHA256ROUND0(0,  0x428a2f98, R8,  R9,  R10, R11, R12, R13, R14, R15)
   165	SHA256ROUND0(1,  0x71374491, R15, R8,  R9,  R10, R11, R12, R13, R14)
   166	SHA256ROUND0(2,  0xb5c0fbcf, R14, R15, R8,  R9,  R10, R11, R12, R13)
   167	SHA256ROUND0(3,  0xe9b5dba5, R13, R14, R15, R8,  R9,  R10, R11, R12)
   168	SHA256ROUND0(4,  0x3956c25b, R12, R13, R14, R15, R8,  R9,  R10, R11)
   169	SHA256ROUND0(5,  0x59f111f1, R11, R12, R13, R14, R15, R8,  R9,  R10)
   170	SHA256ROUND0(6,  0x923f82a4, R10, R11, R12, R13, R14, R15, R8,  R9)
   171	SHA256ROUND0(7,  0xab1c5ed5, R9,  R10, R11, R12, R13, R14, R15, R8)
   172	SHA256ROUND0(8,  0xd807aa98, R8,  R9,  R10, R11, R12, R13, R14, R15)
   173	SHA256ROUND0(9,  0x12835b01, R15, R8,  R9,  R10, R11, R12, R13, R14)
   174	SHA256ROUND0(10, 0x243185be, R14, R15, R8,  R9,  R10, R11, R12, R13)
   175	SHA256ROUND0(11, 0x550c7dc3, R13, R14, R15, R8,  R9,  R10, R11, R12)
   176	SHA256ROUND0(12, 0x72be5d74, R12, R13, R14, R15, R8,  R9,  R10, R11)
   177	SHA256ROUND0(13, 0x80deb1fe, R11, R12, R13, R14, R15, R8,  R9,  R10)
   178	SHA256ROUND0(14, 0x9bdc06a7, R10, R11, R12, R13, R14, R15, R8,  R9)
   179	SHA256ROUND0(15, 0xc19bf174, R9,  R10, R11, R12, R13, R14, R15, R8)
   180
   181	SHA256ROUND1(16, 0xe49b69c1, R8,  R9,  R10, R11, R12, R13, R14, R15)
   182	SHA256ROUND1(17, 0xefbe4786, R15, R8,  R9,  R10, R11, R12, R13, R14)
   183	SHA256ROUND1(18, 0x0fc19dc6, R14, R15, R8,  R9,  R10, R11, R12, R13)
   184	SHA256ROUND1(19, 0x240ca1cc, R13, R14, R15, R8,  R9,  R10, R11, R12)
   185	SHA256ROUND1(20, 0x2de92c6f, R12, R13, R14, R15, R8,  R9,  R10, R11)
   186	SHA256ROUND1(21, 0x4a7484aa, R11, R12, R13, R14, R15, R8,  R9,  R10)
   187	SHA256ROUND1(22, 0x5cb0a9dc, R10, R11, R12, R13, R14, R15, R8,  R9)
   188	SHA256ROUND1(23, 0x76f988da, R9,  R10, R11, R12, R13, R14, R15, R8)
   189	SHA256ROUND1(24, 0x983e5152, R8,  R9,  R10, R11, R12, R13, R14, R15)
   190	SHA256ROUND1(25, 0xa831c66d, R15, R8,  R9,  R10, R11, R12, R13, R14)
   191	SHA256ROUND1(26, 0xb00327c8, R14, R15, R8,  R9,  R10, R11, R12, R13)
   192	SHA256ROUND1(27, 0xbf597fc7, R13, R14, R15, R8,  R9,  R10, R11, R12)
   193	SHA256ROUND1(28, 0xc6e00bf3, R12, R13, R14, R15, R8,  R9,  R10, R11)
   194	SHA256ROUND1(29, 0xd5a79147, R11, R12, R13, R14, R15, R8,  R9,  R10)
   195	SHA256ROUND1(30, 0x06ca6351, R10, R11, R12, R13, R14, R15, R8,  R9)
   196	SHA256ROUND1(31, 0x14292967, R9,  R10, R11, R12, R13, R14, R15, R8)
   197	SHA256ROUND1(32, 0x27b70a85, R8,  R9,  R10, R11, R12, R13, R14, R15)
   198	SHA256ROUND1(33, 0x2e1b2138, R15, R8,  R9,  R10, R11, R12, R13, R14)
   199	SHA256ROUND1(34, 0x4d2c6dfc, R14, R15, R8,  R9,  R10, R11, R12, R13)
   200	SHA256ROUND1(35, 0x53380d13, R13, R14, R15, R8,  R9,  R10, R11, R12)
   201	SHA256ROUND1(36, 0x650a7354, R12, R13, R14, R15, R8,  R9,  R10, R11)
   202	SHA256ROUND1(37, 0x766a0abb, R11, R12, R13, R14, R15, R8,  R9,  R10)
   203	SHA256ROUND1(38, 0x81c2c92e, R10, R11, R12, R13, R14, R15, R8,  R9)
   204	SHA256ROUND1(39, 0x92722c85, R9,  R10, R11, R12, R13, R14, R15, R8)
   205	SHA256ROUND1(40, 0xa2bfe8a1, R8,  R9,  R10, R11, R12, R13, R14, R15)
   206	SHA256ROUND1(41, 0xa81a664b, R15, R8,  R9,  R10, R11, R12, R13, R14)
   207	SHA256ROUND1(42, 0xc24b8b70, R14, R15, R8,  R9,  R10, R11, R12, R13)
   208	SHA256ROUND1(43, 0xc76c51a3, R13, R14, R15, R8,  R9,  R10, R11, R12)
   209	SHA256ROUND1(44, 0xd192e819, R12, R13, R14, R15, R8,  R9,  R10, R11)
   210	SHA256ROUND1(45, 0xd6990624, R11, R12, R13, R14, R15, R8,  R9,  R10)
   211	SHA256ROUND1(46, 0xf40e3585, R10, R11, R12, R13, R14, R15, R8,  R9)
   212	SHA256ROUND1(47, 0x106aa070, R9,  R10, R11, R12, R13, R14, R15, R8)
   213	SHA256ROUND1(48, 0x19a4c116, R8,  R9,  R10, R11, R12, R13, R14, R15)
   214	SHA256ROUND1(49, 0x1e376c08, R15, R8,  R9,  R10, R11, R12, R13, R14)
   215	SHA256ROUND1(50, 0x2748774c, R14, R15, R8,  R9,  R10, R11, R12, R13)
   216	SHA256ROUND1(51, 0x34b0bcb5, R13, R14, R15, R8,  R9,  R10, R11, R12)
   217	SHA256ROUND1(52, 0x391c0cb3, R12, R13, R14, R15, R8,  R9,  R10, R11)
   218	SHA256ROUND1(53, 0x4ed8aa4a, R11, R12, R13, R14, R15, R8,  R9,  R10)
   219	SHA256ROUND1(54, 0x5b9cca4f, R10, R11, R12, R13, R14, R15, R8,  R9)
   220	SHA256ROUND1(55, 0x682e6ff3, R9,  R10, R11, R12, R13, R14, R15, R8)
   221	SHA256ROUND1(56, 0x748f82ee, R8,  R9,  R10, R11, R12, R13, R14, R15)
   222	SHA256ROUND1(57, 0x78a5636f, R15, R8,  R9,  R10, R11, R12, R13, R14)
   223	SHA256ROUND1(58, 0x84c87814, R14, R15, R8,  R9,  R10, R11, R12, R13)
   224	SHA256ROUND1(59, 0x8cc70208, R13, R14, R15, R8,  R9,  R10, R11, R12)
   225	SHA256ROUND1(60, 0x90befffa, R12, R13, R14, R15, R8,  R9,  R10, R11)
   226	SHA256ROUND1(61, 0xa4506ceb, R11, R12, R13, R14, R15, R8,  R9,  R10)
   227	SHA256ROUND1(62, 0xbef9a3f7, R10, R11, R12, R13, R14, R15, R8,  R9)
   228	SHA256ROUND1(63, 0xc67178f2, R9,  R10, R11, R12, R13, R14, R15, R8)
   229
   230	MOVW	(0*4)(R4), REGTMP
   231	MOVW	(1*4)(R4), REGTMP1
   232	MOVW	(2*4)(R4), REGTMP2
   233	MOVW	(3*4)(R4), REGTMP3
   234	ADD	REGTMP, R8	// H0 = a + H0
   235	ADD	REGTMP1, R9	// H1 = b + H1
   236	ADD	REGTMP2, R10	// H2 = c + H2
   237	ADD	REGTMP3, R11	// H3 = d + H3
   238	MOVW	R8, (0*4)(R4)
   239	MOVW	R9, (1*4)(R4)
   240	MOVW	R10, (2*4)(R4)
   241	MOVW	R11, (3*4)(R4)
   242	MOVW	(4*4)(R4), REGTMP
   243	MOVW	(5*4)(R4), REGTMP1
   244	MOVW	(6*4)(R4), REGTMP2
   245	MOVW	(7*4)(R4), REGTMP3
   246	ADD	REGTMP, R12	// H4 = e + H4
   247	ADD	REGTMP1, R13	// H5 = f + H5
   248	ADD	REGTMP2, R14	// H6 = g + H6
   249	ADD	REGTMP3, R15	// H7 = h + H7
   250	MOVW	R12, (4*4)(R4)
   251	MOVW	R13, (5*4)(R4)
   252	MOVW	R14, (6*4)(R4)
   253	MOVW	R15, (7*4)(R4)
   254
   255	ADDV	$64, R5
   256	BNE	R5, R25, loop
   257
   258end:
   259	RET

View as plain text