...

Text file src/crypto/md5/md5block_loong64.s

Documentation: crypto/md5

     1// Copyright 2024 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4//
     5// Loong64 version of md5block.go
     6// derived from crypto/md5/md5block_amd64.s
     7
     8//go:build !purego
     9
    10#define REGTMP	R30
    11#define REGTMP1 R12
    12#define REGTMP2 R18
    13
    14#include "textflag.h"
    15
    16// func block(dig *digest, p []byte)
    17TEXT	·block(SB),NOSPLIT,$0-32
    18	MOVV	dig+0(FP), R4
    19	MOVV	p+8(FP), R5
    20	MOVV	p_len+16(FP), R6
    21	AND	$~63, R6
    22	BEQ	R6, zero
    23
    24	// p_len >= 64
    25	ADDV	R5, R6, R24
    26	MOVW	(0*4)(R4), R7
    27	MOVW	(1*4)(R4), R8
    28	MOVW	(2*4)(R4), R9
    29	MOVW	(3*4)(R4), R10
    30
    31loop:
    32	MOVW	R7, R14
    33	MOVW	R8, R15
    34	MOVW	R9, R16
    35	MOVW	R10, R17
    36
    37	MOVW	(0*4)(R5), R11
    38	MOVW	R10, REGTMP1
    39
    40// F = ((c ^ d) & b) ^ d
    41#define ROUND1(a, b, c, d, index, const, shift) \
    42	ADDV	$const, a; \
    43	ADD	R11, a; \
    44	MOVW	(index*4)(R5), R11; \
    45	XOR	c, REGTMP1; \
    46	AND	b, REGTMP1; \
    47	XOR	d, REGTMP1; \
    48	ADD	REGTMP1, a; \
    49	ROTR	$(32-shift), a; \
    50	MOVW	c, REGTMP1; \
    51	ADD	b, a
    52
    53	ROUND1(R7,  R8,  R9,  R10,  1, 0xd76aa478,  7);
    54	ROUND1(R10, R7,  R8,  R9,   2, 0xe8c7b756, 12);
    55	ROUND1(R9,  R10, R7,  R8,   3, 0x242070db, 17);
    56	ROUND1(R8,  R9,  R10, R7,   4, 0xc1bdceee, 22);
    57	ROUND1(R7,  R8,  R9,  R10,  5, 0xf57c0faf,  7);
    58	ROUND1(R10, R7,  R8,  R9,   6, 0x4787c62a, 12);
    59	ROUND1(R9,  R10, R7,  R8,   7, 0xa8304613, 17);
    60	ROUND1(R8,  R9,  R10, R7,   8, 0xfd469501, 22);
    61	ROUND1(R7,  R8,  R9,  R10,  9, 0x698098d8,  7);
    62	ROUND1(R10, R7,  R8,  R9,  10, 0x8b44f7af, 12);
    63	ROUND1(R9,  R10, R7,  R8,  11, 0xffff5bb1, 17);
    64	ROUND1(R8,  R9,  R10, R7,  12, 0x895cd7be, 22);
    65	ROUND1(R7,  R8,  R9,  R10, 13, 0x6b901122,  7);
    66	ROUND1(R10, R7,  R8,  R9,  14, 0xfd987193, 12);
    67	ROUND1(R9,  R10, R7,  R8,  15, 0xa679438e, 17);
    68	ROUND1(R8,  R9,  R10, R7,   1, 0x49b40821, 22);
    69
    70	MOVW	(1*4)(R5), R11
    71
    72// F = ((b ^ c) & d) ^ c
    73#define ROUND2(a, b, c, d, index, const, shift) \
    74	ADDV	$const, a; \
    75	ADD	R11, a; \
    76	MOVW	(index*4)(R5), R11; \
    77	XOR	b, c, REGTMP; \
    78	AND	REGTMP, d, REGTMP; \
    79	XOR	REGTMP, c, REGTMP; \
    80	ADD	REGTMP, a; \
    81	ROTR	$(32-shift), a; \
    82	ADD	b, a
    83
    84	ROUND2(R7,  R8,  R9,  R10,  6, 0xf61e2562,  5);
    85	ROUND2(R10, R7,  R8,  R9,  11, 0xc040b340,  9);
    86	ROUND2(R9,  R10, R7,  R8,   0, 0x265e5a51, 14);
    87	ROUND2(R8,  R9,  R10, R7,   5, 0xe9b6c7aa, 20);
    88	ROUND2(R7,  R8,  R9,  R10, 10, 0xd62f105d,  5);
    89	ROUND2(R10, R7,  R8,  R9,  15,  0x2441453,  9);
    90	ROUND2(R9,  R10, R7,  R8,   4, 0xd8a1e681, 14);
    91	ROUND2(R8,  R9,  R10, R7,   9, 0xe7d3fbc8, 20);
    92	ROUND2(R7,  R8,  R9,  R10, 14, 0x21e1cde6,  5);
    93	ROUND2(R10, R7,  R8,  R9,   3, 0xc33707d6,  9);
    94	ROUND2(R9,  R10, R7,  R8,   8, 0xf4d50d87, 14);
    95	ROUND2(R8,  R9,  R10, R7,  13, 0x455a14ed, 20);
    96	ROUND2(R7,  R8,  R9,  R10,  2, 0xa9e3e905,  5);
    97	ROUND2(R10, R7,  R8,  R9,   7, 0xfcefa3f8,  9);
    98	ROUND2(R9,  R10, R7,  R8,  12, 0x676f02d9, 14);
    99	ROUND2(R8,  R9,  R10, R7,   5, 0x8d2a4c8a, 20);
   100
   101	MOVW	(5*4)(R5), R11
   102	MOVW	R9, REGTMP1
   103
   104// F = b ^ c ^ d
   105#define ROUND3(a, b, c, d, index, const, shift) \
   106	ADDV	$const, a; \
   107	ADD	R11, a; \
   108	MOVW	(index*4)(R5), R11; \
   109	XOR	d, REGTMP1; \
   110	XOR	b, REGTMP1; \
   111	ADD	REGTMP1, a; \
   112	ROTR	$(32-shift), a; \
   113	MOVW	b, REGTMP1; \
   114	ADD	b, a
   115
   116	ROUND3(R7,  R8,  R9,  R10,  8, 0xfffa3942,  4);
   117	ROUND3(R10, R7,  R8,  R9,  11, 0x8771f681, 11);
   118	ROUND3(R9,  R10, R7,  R8,  14, 0x6d9d6122, 16);
   119	ROUND3(R8,  R9,  R10, R7,   1, 0xfde5380c, 23);
   120	ROUND3(R7,  R8,  R9,  R10,  4, 0xa4beea44,  4);
   121	ROUND3(R10, R7,  R8,  R9,   7, 0x4bdecfa9, 11);
   122	ROUND3(R9,  R10, R7,  R8,  10, 0xf6bb4b60, 16);
   123	ROUND3(R8,  R9,  R10, R7,  13, 0xbebfbc70, 23);
   124	ROUND3(R7,  R8,  R9,  R10,  0, 0x289b7ec6,  4);
   125	ROUND3(R10, R7,  R8,  R9,   3, 0xeaa127fa, 11);
   126	ROUND3(R9,  R10, R7,  R8,   6, 0xd4ef3085, 16);
   127	ROUND3(R8,  R9,  R10, R7,   9,  0x4881d05, 23);
   128	ROUND3(R7,  R8,  R9,  R10, 12, 0xd9d4d039,  4);
   129	ROUND3(R10, R7,  R8,  R9,  15, 0xe6db99e5, 11);
   130	ROUND3(R9,  R10, R7,  R8,   2, 0x1fa27cf8, 16);
   131	ROUND3(R8,  R9,  R10, R7,   0, 0xc4ac5665, 23);
   132
   133	MOVW	(0*4)(R5), R11
   134	MOVV	$0xffffffff, REGTMP2
   135	XOR	R10, REGTMP2, REGTMP1	// REGTMP1 = ~d
   136
   137// F = c ^ (b | (~d))
   138#define ROUND4(a, b, c, d, index, const, shift) \
   139	ADDV	$const, a; \
   140	ADD	R11, a; \
   141	MOVW	(index*4)(R5), R11; \
   142	OR	b, REGTMP1; \
   143	XOR	c, REGTMP1; \
   144	ADD	REGTMP1, a; \
   145	ROTR	$(32-shift), a; \
   146	MOVV	$0xffffffff, REGTMP2; \
   147	XOR	c, REGTMP2, REGTMP1; \
   148	ADD	b, a
   149
   150	ROUND4(R7,  R8,  R9,  R10,  7, 0xf4292244,  6);
   151	ROUND4(R10, R7,  R8,  R9,  14, 0x432aff97, 10);
   152	ROUND4(R9,  R10, R7,  R8,   5, 0xab9423a7, 15);
   153	ROUND4(R8,  R9,  R10, R7,  12, 0xfc93a039, 21);
   154	ROUND4(R7,  R8,  R9,  R10,  3, 0x655b59c3,  6);
   155	ROUND4(R10, R7,  R8,  R9,  10, 0x8f0ccc92, 10);
   156	ROUND4(R9,  R10, R7,  R8,   1, 0xffeff47d, 15);
   157	ROUND4(R8,  R9,  R10, R7,   8, 0x85845dd1, 21);
   158	ROUND4(R7,  R8,  R9,  R10, 15, 0x6fa87e4f,  6);
   159	ROUND4(R10, R7,  R8,  R9,   6, 0xfe2ce6e0, 10);
   160	ROUND4(R9,  R10, R7,  R8,  13, 0xa3014314, 15);
   161	ROUND4(R8,  R9,  R10, R7,   4, 0x4e0811a1, 21);
   162	ROUND4(R7,  R8,  R9,  R10, 11, 0xf7537e82,  6);
   163	ROUND4(R10, R7,  R8,  R9,   2, 0xbd3af235, 10);
   164	ROUND4(R9,  R10, R7,  R8,   9, 0x2ad7d2bb, 15);
   165	ROUND4(R8,  R9,  R10, R7,   0, 0xeb86d391, 21);
   166
   167	ADD	R14, R7
   168	ADD	R15, R8
   169	ADD	R16, R9
   170	ADD	R17, R10
   171
   172	ADDV	$64, R5
   173	BNE	R5, R24, loop
   174
   175	MOVW	R7, (0*4)(R4)
   176	MOVW	R8, (1*4)(R4)
   177	MOVW	R9, (2*4)(R4)
   178	MOVW	R10, (3*4)(R4)
   179zero:
   180	RET

View as plain text