...

Text file src/crypto/sha1/sha1block_loong64.s

Documentation: crypto/sha1

     1// Copyright 2024 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5//go:build !purego
     6
     7#include "textflag.h"
     8
     9// SHA-1 block routine. See sha1block.go for Go equivalent.
    10//
    11// There are 80 rounds of 4 types:
    12//   - rounds 0-15 are type 1 and load data (ROUND1 macro).
    13//   - rounds 16-19 are type 1 and do not load data (ROUND1x macro).
    14//   - rounds 20-39 are type 2 and do not load data (ROUND2 macro).
    15//   - rounds 40-59 are type 3 and do not load data (ROUND3 macro).
    16//   - rounds 60-79 are type 4 and do not load data (ROUND4 macro).
    17//
    18// Each round loads or shuffles the data, then computes a per-round
    19// function of b, c, d, and then mixes the result into and rotates the
    20// five registers a, b, c, d, e holding the intermediate results.
    21//
    22// The register rotation is implemented by rotating the arguments to
    23// the round macros instead of by explicit move instructions.
    24
    25#define REGTMP	R30
    26#define REGTMP1	R17
    27#define REGTMP2	R18
    28#define REGTMP3	R19
    29
    30#define LOAD1(index) \
    31	MOVW	(index*4)(R5), REGTMP3; \
    32	WORD	$0x3a73; \	// REVB2W REGTMP3, REGTMP3   to big-endian
    33	MOVW	REGTMP3, (index*4)(R3)
    34
    35#define LOAD(index) \
    36	MOVW	(((index)&0xf)*4)(R3), REGTMP3; \
    37	MOVW	(((index-3)&0xf)*4)(R3), REGTMP; \
    38	MOVW	(((index-8)&0xf)*4)(R3), REGTMP1; \
    39	MOVW	(((index-14)&0xf)*4)(R3), REGTMP2; \
    40	XOR	REGTMP, REGTMP3; \
    41	XOR	REGTMP1, REGTMP3; \
    42	XOR	REGTMP2, REGTMP3; \
    43	ROTR	$31, REGTMP3; \
    44	MOVW	REGTMP3, (((index)&0xf)*4)(R3)
    45
    46// f = d ^ (b & (c ^ d))
    47#define FUNC1(a, b, c, d, e) \
    48	XOR	c, d, REGTMP1; \
    49	AND	b, REGTMP1; \
    50	XOR	d, REGTMP1
    51
    52// f = b ^ c ^ d
    53#define FUNC2(a, b, c, d, e) \
    54	XOR	b, c, REGTMP1; \
    55	XOR	d, REGTMP1
    56
    57// f = (b & c) | ((b | c) & d)
    58#define FUNC3(a, b, c, d, e) \
    59	OR	b, c, REGTMP2; \
    60	AND	b, c, REGTMP; \
    61	AND	d, REGTMP2; \
    62	OR	REGTMP, REGTMP2, REGTMP1
    63
    64#define FUNC4 FUNC2
    65
    66#define MIX(a, b, c, d, e, const) \
    67	ROTR	$2, b; \	// b << 30
    68	ADD	REGTMP1, e; \	// e = e + f
    69	ROTR	$27, a, REGTMP2; \	// a << 5
    70	ADD	REGTMP3, e; \	// e = e + w[i]
    71	ADDV	$const, e; \	// e = e + k
    72	ADD	REGTMP2, e	// e = e + a<<5
    73
    74#define ROUND1(a, b, c, d, e, index) \
    75	LOAD1(index); \
    76	FUNC1(a, b, c, d, e); \
    77	MIX(a, b, c, d, e, 0x5A827999)
    78
    79#define ROUND1x(a, b, c, d, e, index) \
    80	LOAD(index); \
    81	FUNC1(a, b, c, d, e); \
    82	MIX(a, b, c, d, e, 0x5A827999)
    83
    84#define ROUND2(a, b, c, d, e, index) \
    85	LOAD(index); \
    86	FUNC2(a, b, c, d, e); \
    87	MIX(a, b, c, d, e, 0x6ED9EBA1)
    88
    89#define ROUND3(a, b, c, d, e, index) \
    90	LOAD(index); \
    91	FUNC3(a, b, c, d, e); \
    92	MIX(a, b, c, d, e, 0x8F1BBCDC)
    93
    94#define ROUND4(a, b, c, d, e, index) \
    95	LOAD(index); \
    96	FUNC4(a, b, c, d, e); \
    97	MIX(a, b, c, d, e, 0xCA62C1D6)
    98
    99// A stack frame size of 64 bytes is required here, because
   100// the frame size used for data expansion is 64 bytes.
   101// See the definition of the macro LOAD above, and the definition
   102// of the local variable w in the general implementation (sha1block.go).
   103TEXT ·block(SB),NOSPLIT,$64-32
   104	MOVV	dig+0(FP),	R4
   105	MOVV	p_base+8(FP),	R5
   106	MOVV	p_len+16(FP),	R6
   107	AND	$~63, R6
   108	BEQ	R6, zero
   109
   110	// p_len >= 64
   111	ADDV    R5, R6, R24
   112	MOVW	(0*4)(R4), R7
   113	MOVW	(1*4)(R4), R8
   114	MOVW	(2*4)(R4), R9
   115	MOVW	(3*4)(R4), R10
   116	MOVW	(4*4)(R4), R11
   117
   118loop:
   119	MOVW	R7,	R12
   120	MOVW	R8,	R13
   121	MOVW	R9,	R14
   122	MOVW	R10,	R15
   123	MOVW	R11,	R16
   124
   125	ROUND1(R7,  R8,  R9,  R10, R11, 0)
   126	ROUND1(R11, R7,  R8,  R9,  R10, 1)
   127	ROUND1(R10, R11, R7,  R8,  R9,  2)
   128	ROUND1(R9,  R10, R11, R7,  R8,  3)
   129	ROUND1(R8,  R9,  R10, R11, R7,  4)
   130	ROUND1(R7,  R8,  R9,  R10, R11, 5)
   131	ROUND1(R11, R7,  R8,  R9,  R10, 6)
   132	ROUND1(R10, R11, R7,  R8,  R9,  7)
   133	ROUND1(R9,  R10, R11, R7,  R8,  8)
   134	ROUND1(R8,  R9,  R10, R11, R7,  9)
   135	ROUND1(R7,  R8,  R9,  R10, R11, 10)
   136	ROUND1(R11, R7,  R8,  R9,  R10, 11)
   137	ROUND1(R10, R11, R7,  R8,  R9,  12)
   138	ROUND1(R9,  R10, R11, R7,  R8,  13)
   139	ROUND1(R8,  R9,  R10, R11, R7,  14)
   140	ROUND1(R7,  R8,  R9,  R10, R11, 15)
   141
   142	ROUND1x(R11, R7,  R8,  R9,  R10, 16)
   143	ROUND1x(R10, R11, R7,  R8,  R9,  17)
   144	ROUND1x(R9,  R10, R11, R7,  R8,  18)
   145	ROUND1x(R8,  R9,  R10, R11, R7,  19)
   146
   147	ROUND2(R7,  R8,  R9,  R10, R11, 20)
   148	ROUND2(R11, R7,  R8,  R9,  R10, 21)
   149	ROUND2(R10, R11, R7,  R8,  R9,  22)
   150	ROUND2(R9,  R10, R11, R7,  R8,  23)
   151	ROUND2(R8,  R9,  R10, R11, R7,  24)
   152	ROUND2(R7,  R8,  R9,  R10, R11, 25)
   153	ROUND2(R11, R7,  R8,  R9,  R10, 26)
   154	ROUND2(R10, R11, R7,  R8,  R9,  27)
   155	ROUND2(R9,  R10, R11, R7,  R8,  28)
   156	ROUND2(R8,  R9,  R10, R11, R7,  29)
   157	ROUND2(R7,  R8,  R9,  R10, R11, 30)
   158	ROUND2(R11, R7,  R8,  R9,  R10, 31)
   159	ROUND2(R10, R11, R7,  R8,  R9,  32)
   160	ROUND2(R9,  R10, R11, R7,  R8,  33)
   161	ROUND2(R8,  R9,  R10, R11, R7,  34)
   162	ROUND2(R7,  R8,  R9,  R10, R11, 35)
   163	ROUND2(R11, R7,  R8,  R9,  R10, 36)
   164	ROUND2(R10, R11, R7,  R8,  R9,  37)
   165	ROUND2(R9,  R10, R11, R7,  R8,  38)
   166	ROUND2(R8,  R9,  R10, R11, R7,  39)
   167
   168	ROUND3(R7,  R8,  R9,  R10, R11, 40)
   169	ROUND3(R11, R7,  R8,  R9,  R10, 41)
   170	ROUND3(R10, R11, R7,  R8,  R9,  42)
   171	ROUND3(R9,  R10, R11, R7,  R8,  43)
   172	ROUND3(R8,  R9,  R10, R11, R7,  44)
   173	ROUND3(R7,  R8,  R9,  R10, R11, 45)
   174	ROUND3(R11, R7,  R8,  R9,  R10, 46)
   175	ROUND3(R10, R11, R7,  R8,  R9,  47)
   176	ROUND3(R9,  R10, R11, R7,  R8,  48)
   177	ROUND3(R8,  R9,  R10, R11, R7,  49)
   178	ROUND3(R7,  R8,  R9,  R10, R11, 50)
   179	ROUND3(R11, R7,  R8,  R9,  R10, 51)
   180	ROUND3(R10, R11, R7,  R8,  R9,  52)
   181	ROUND3(R9,  R10, R11, R7,  R8,  53)
   182	ROUND3(R8,  R9,  R10, R11, R7,  54)
   183	ROUND3(R7,  R8,  R9,  R10, R11, 55)
   184	ROUND3(R11, R7,  R8,  R9,  R10, 56)
   185	ROUND3(R10, R11, R7,  R8,  R9,  57)
   186	ROUND3(R9,  R10, R11, R7,  R8,  58)
   187	ROUND3(R8,  R9,  R10, R11, R7,  59)
   188
   189	ROUND4(R7,  R8,  R9,  R10, R11, 60)
   190	ROUND4(R11, R7,  R8,  R9,  R10, 61)
   191	ROUND4(R10, R11, R7,  R8,  R9,  62)
   192	ROUND4(R9,  R10, R11, R7,  R8,  63)
   193	ROUND4(R8,  R9,  R10, R11, R7,  64)
   194	ROUND4(R7,  R8,  R9,  R10, R11, 65)
   195	ROUND4(R11, R7,  R8,  R9,  R10, 66)
   196	ROUND4(R10, R11, R7,  R8,  R9,  67)
   197	ROUND4(R9,  R10, R11, R7,  R8,  68)
   198	ROUND4(R8,  R9,  R10, R11, R7,  69)
   199	ROUND4(R7,  R8,  R9,  R10, R11, 70)
   200	ROUND4(R11, R7,  R8,  R9,  R10, 71)
   201	ROUND4(R10, R11, R7,  R8,  R9,  72)
   202	ROUND4(R9,  R10, R11, R7,  R8,  73)
   203	ROUND4(R8,  R9,  R10, R11, R7,  74)
   204	ROUND4(R7,  R8,  R9,  R10, R11, 75)
   205	ROUND4(R11, R7,  R8,  R9,  R10, 76)
   206	ROUND4(R10, R11, R7,  R8,  R9,  77)
   207	ROUND4(R9,  R10, R11, R7,  R8,  78)
   208	ROUND4(R8,  R9,  R10, R11, R7,  79)
   209
   210	ADD	R12, R7
   211	ADD	R13, R8
   212	ADD	R14, R9
   213	ADD	R15, R10
   214	ADD	R16, R11
   215
   216	ADDV	$64, R5
   217	BNE	R5, R24, loop
   218
   219end:
   220	MOVW	R7, (0*4)(R4)
   221	MOVW	R8, (1*4)(R4)
   222	MOVW	R9, (2*4)(R4)
   223	MOVW	R10, (3*4)(R4)
   224	MOVW	R11, (4*4)(R4)
   225zero:
   226	RET

View as plain text