Text file
src/crypto/md5/md5block_s390x.s
1// Original source:
2// http://www.zorinaq.com/papers/md5-amd64.html
3// http://www.zorinaq.com/papers/md5-amd64.tar.bz2
4//
5// MD5 adapted for s390x using Go's assembler for
6// s390x, based on md5block_amd64.s implementation by
7// the Go authors.
8//
9// Author: Marc Bevand <bevand_m (at) epita.fr>
10// Licence: I hereby disclaim the copyright on this code and place it
11// in the public domain.
12
13//go:build !purego
14
15#include "textflag.h"
16
17// func block(dig *digest, p []byte)
18TEXT ·block(SB),NOSPLIT,$16-32
19 MOVD dig+0(FP), R1
20 MOVD p+8(FP), R6
21 MOVD p_len+16(FP), R5
22 AND $-64, R5
23 LAY (R6)(R5*1), R7
24
25 LMY 0(R1), R2, R5
26 CMPBEQ R6, R7, end
27
28loop:
29 STMY R2, R5, tmp-16(SP)
30
31 MOVWBR 0(R6), R8
32 MOVWZ R5, R9
33
34#define ROUND1(a, b, c, d, index, const, shift) \
35 XOR c, R9; \
36 ADD $const, a; \
37 ADD R8, a; \
38 MOVWBR (index*4)(R6), R8; \
39 AND b, R9; \
40 XOR d, R9; \
41 ADD R9, a; \
42 RLL $shift, a; \
43 MOVWZ c, R9; \
44 ADD b, a
45
46 ROUND1(R2,R3,R4,R5, 1,0xd76aa478, 7);
47 ROUND1(R5,R2,R3,R4, 2,0xe8c7b756,12);
48 ROUND1(R4,R5,R2,R3, 3,0x242070db,17);
49 ROUND1(R3,R4,R5,R2, 4,0xc1bdceee,22);
50 ROUND1(R2,R3,R4,R5, 5,0xf57c0faf, 7);
51 ROUND1(R5,R2,R3,R4, 6,0x4787c62a,12);
52 ROUND1(R4,R5,R2,R3, 7,0xa8304613,17);
53 ROUND1(R3,R4,R5,R2, 8,0xfd469501,22);
54 ROUND1(R2,R3,R4,R5, 9,0x698098d8, 7);
55 ROUND1(R5,R2,R3,R4,10,0x8b44f7af,12);
56 ROUND1(R4,R5,R2,R3,11,0xffff5bb1,17);
57 ROUND1(R3,R4,R5,R2,12,0x895cd7be,22);
58 ROUND1(R2,R3,R4,R5,13,0x6b901122, 7);
59 ROUND1(R5,R2,R3,R4,14,0xfd987193,12);
60 ROUND1(R4,R5,R2,R3,15,0xa679438e,17);
61 ROUND1(R3,R4,R5,R2, 0,0x49b40821,22);
62
63 MOVWBR (1*4)(R6), R8
64 MOVWZ R5, R9
65 MOVWZ R5, R1
66
67#define ROUND2(a, b, c, d, index, const, shift) \
68 XOR $0xffffffff, R9; \ // NOTW R9
69 ADD $const, a; \
70 ADD R8, a; \
71 MOVWBR (index*4)(R6), R8; \
72 AND b, R1; \
73 AND c, R9; \
74 OR R9, R1; \
75 MOVWZ c, R9; \
76 ADD R1, a; \
77 MOVWZ c, R1; \
78 RLL $shift, a; \
79 ADD b, a
80
81 ROUND2(R2,R3,R4,R5, 6,0xf61e2562, 5);
82 ROUND2(R5,R2,R3,R4,11,0xc040b340, 9);
83 ROUND2(R4,R5,R2,R3, 0,0x265e5a51,14);
84 ROUND2(R3,R4,R5,R2, 5,0xe9b6c7aa,20);
85 ROUND2(R2,R3,R4,R5,10,0xd62f105d, 5);
86 ROUND2(R5,R2,R3,R4,15, 0x2441453, 9);
87 ROUND2(R4,R5,R2,R3, 4,0xd8a1e681,14);
88 ROUND2(R3,R4,R5,R2, 9,0xe7d3fbc8,20);
89 ROUND2(R2,R3,R4,R5,14,0x21e1cde6, 5);
90 ROUND2(R5,R2,R3,R4, 3,0xc33707d6, 9);
91 ROUND2(R4,R5,R2,R3, 8,0xf4d50d87,14);
92 ROUND2(R3,R4,R5,R2,13,0x455a14ed,20);
93 ROUND2(R2,R3,R4,R5, 2,0xa9e3e905, 5);
94 ROUND2(R5,R2,R3,R4, 7,0xfcefa3f8, 9);
95 ROUND2(R4,R5,R2,R3,12,0x676f02d9,14);
96 ROUND2(R3,R4,R5,R2, 0,0x8d2a4c8a,20);
97
98 MOVWBR (5*4)(R6), R8
99 MOVWZ R4, R9
100
101#define ROUND3(a, b, c, d, index, const, shift) \
102 ADD $const, a; \
103 ADD R8, a; \
104 MOVWBR (index*4)(R6), R8; \
105 XOR d, R9; \
106 XOR b, R9; \
107 ADD R9, a; \
108 RLL $shift, a; \
109 MOVWZ b, R9; \
110 ADD b, a
111
112 ROUND3(R2,R3,R4,R5, 8,0xfffa3942, 4);
113 ROUND3(R5,R2,R3,R4,11,0x8771f681,11);
114 ROUND3(R4,R5,R2,R3,14,0x6d9d6122,16);
115 ROUND3(R3,R4,R5,R2, 1,0xfde5380c,23);
116 ROUND3(R2,R3,R4,R5, 4,0xa4beea44, 4);
117 ROUND3(R5,R2,R3,R4, 7,0x4bdecfa9,11);
118 ROUND3(R4,R5,R2,R3,10,0xf6bb4b60,16);
119 ROUND3(R3,R4,R5,R2,13,0xbebfbc70,23);
120 ROUND3(R2,R3,R4,R5, 0,0x289b7ec6, 4);
121 ROUND3(R5,R2,R3,R4, 3,0xeaa127fa,11);
122 ROUND3(R4,R5,R2,R3, 6,0xd4ef3085,16);
123 ROUND3(R3,R4,R5,R2, 9, 0x4881d05,23);
124 ROUND3(R2,R3,R4,R5,12,0xd9d4d039, 4);
125 ROUND3(R5,R2,R3,R4,15,0xe6db99e5,11);
126 ROUND3(R4,R5,R2,R3, 2,0x1fa27cf8,16);
127 ROUND3(R3,R4,R5,R2, 0,0xc4ac5665,23);
128
129 MOVWBR (0*4)(R6), R8
130 MOVWZ $0xffffffff, R9
131 XOR R5, R9
132
133#define ROUND4(a, b, c, d, index, const, shift) \
134 ADD $const, a; \
135 ADD R8, a; \
136 MOVWBR (index*4)(R6), R8; \
137 OR b, R9; \
138 XOR c, R9; \
139 ADD R9, a; \
140 MOVWZ $0xffffffff, R9; \
141 RLL $shift, a; \
142 XOR c, R9; \
143 ADD b, a
144
145 ROUND4(R2,R3,R4,R5, 7,0xf4292244, 6);
146 ROUND4(R5,R2,R3,R4,14,0x432aff97,10);
147 ROUND4(R4,R5,R2,R3, 5,0xab9423a7,15);
148 ROUND4(R3,R4,R5,R2,12,0xfc93a039,21);
149 ROUND4(R2,R3,R4,R5, 3,0x655b59c3, 6);
150 ROUND4(R5,R2,R3,R4,10,0x8f0ccc92,10);
151 ROUND4(R4,R5,R2,R3, 1,0xffeff47d,15);
152 ROUND4(R3,R4,R5,R2, 8,0x85845dd1,21);
153 ROUND4(R2,R3,R4,R5,15,0x6fa87e4f, 6);
154 ROUND4(R5,R2,R3,R4, 6,0xfe2ce6e0,10);
155 ROUND4(R4,R5,R2,R3,13,0xa3014314,15);
156 ROUND4(R3,R4,R5,R2, 4,0x4e0811a1,21);
157 ROUND4(R2,R3,R4,R5,11,0xf7537e82, 6);
158 ROUND4(R5,R2,R3,R4, 2,0xbd3af235,10);
159 ROUND4(R4,R5,R2,R3, 9,0x2ad7d2bb,15);
160 ROUND4(R3,R4,R5,R2, 0,0xeb86d391,21);
161
162 MOVWZ tmp-16(SP), R1
163 ADD R1, R2
164 MOVWZ tmp-12(SP), R1
165 ADD R1, R3
166 MOVWZ tmp-8(SP), R1
167 ADD R1, R4
168 MOVWZ tmp-4(SP), R1
169 ADD R1, R5
170
171 LA 64(R6), R6
172 CMPBLT R6, R7, loop
173
174end:
175 MOVD dig+0(FP), R1
176 STMY R2, R5, 0(R1)
177 RET
View as plain text