Text file
src/crypto/md5/md5block_386.s
1// Original source:
2// http://www.zorinaq.com/papers/md5-amd64.html
3// http://www.zorinaq.com/papers/md5-amd64.tar.bz2
4//
5// Translated from Perl generating GNU assembly into
6// #defines generating 8a assembly, and adjusted for 386,
7// by the Go Authors.
8
9//go:build !purego
10
11#include "textflag.h"
12
13// MD5 optimized for AMD64.
14//
15// Author: Marc Bevand <bevand_m (at) epita.fr>
16// Licence: I hereby disclaim the copyright on this code and place it
17// in the public domain.
18
19#define ROUND1(a, b, c, d, index, const, shift) \
20 XORL c, BP; \
21 LEAL const(a)(DI*1), a; \
22 ANDL b, BP; \
23 XORL d, BP; \
24 MOVL (index*4)(SI), DI; \
25 ADDL BP, a; \
26 ROLL $shift, a; \
27 MOVL c, BP; \
28 ADDL b, a
29
30#define ROUND2(a, b, c, d, index, const, shift) \
31 LEAL const(a)(DI*1),a; \
32 MOVL d, DI; \
33 ANDL b, DI; \
34 MOVL d, BP; \
35 NOTL BP; \
36 ANDL c, BP; \
37 ORL DI, BP; \
38 MOVL (index*4)(SI),DI; \
39 ADDL BP, a; \
40 ROLL $shift, a; \
41 ADDL b, a
42
43#define ROUND3(a, b, c, d, index, const, shift) \
44 LEAL const(a)(DI*1),a; \
45 MOVL (index*4)(SI),DI; \
46 XORL d, BP; \
47 XORL b, BP; \
48 ADDL BP, a; \
49 ROLL $shift, a; \
50 MOVL b, BP; \
51 ADDL b, a
52
53#define ROUND4(a, b, c, d, index, const, shift) \
54 LEAL const(a)(DI*1),a; \
55 ORL b, BP; \
56 XORL c, BP; \
57 ADDL BP, a; \
58 MOVL (index*4)(SI),DI; \
59 MOVL $0xffffffff, BP; \
60 ROLL $shift, a; \
61 XORL c, BP; \
62 ADDL b, a
63
64TEXT ·block(SB),NOSPLIT,$24-16
65 MOVL dig+0(FP), BP
66 MOVL p+4(FP), SI
67 MOVL p_len+8(FP), DX
68 SHRL $6, DX
69 SHLL $6, DX
70
71 LEAL (SI)(DX*1), DI
72 MOVL (0*4)(BP), AX
73 MOVL (1*4)(BP), BX
74 MOVL (2*4)(BP), CX
75 MOVL (3*4)(BP), DX
76
77 CMPL SI, DI
78 JEQ end
79
80 MOVL DI, 16(SP)
81
82loop:
83 MOVL AX, 0(SP)
84 MOVL BX, 4(SP)
85 MOVL CX, 8(SP)
86 MOVL DX, 12(SP)
87
88 MOVL (0*4)(SI), DI
89 MOVL DX, BP
90
91 ROUND1(AX,BX,CX,DX, 1,0xd76aa478, 7);
92 ROUND1(DX,AX,BX,CX, 2,0xe8c7b756,12);
93 ROUND1(CX,DX,AX,BX, 3,0x242070db,17);
94 ROUND1(BX,CX,DX,AX, 4,0xc1bdceee,22);
95 ROUND1(AX,BX,CX,DX, 5,0xf57c0faf, 7);
96 ROUND1(DX,AX,BX,CX, 6,0x4787c62a,12);
97 ROUND1(CX,DX,AX,BX, 7,0xa8304613,17);
98 ROUND1(BX,CX,DX,AX, 8,0xfd469501,22);
99 ROUND1(AX,BX,CX,DX, 9,0x698098d8, 7);
100 ROUND1(DX,AX,BX,CX,10,0x8b44f7af,12);
101 ROUND1(CX,DX,AX,BX,11,0xffff5bb1,17);
102 ROUND1(BX,CX,DX,AX,12,0x895cd7be,22);
103 ROUND1(AX,BX,CX,DX,13,0x6b901122, 7);
104 ROUND1(DX,AX,BX,CX,14,0xfd987193,12);
105 ROUND1(CX,DX,AX,BX,15,0xa679438e,17);
106 ROUND1(BX,CX,DX,AX, 0,0x49b40821,22);
107
108 MOVL (1*4)(SI), DI
109 MOVL DX, BP
110
111 ROUND2(AX,BX,CX,DX, 6,0xf61e2562, 5);
112 ROUND2(DX,AX,BX,CX,11,0xc040b340, 9);
113 ROUND2(CX,DX,AX,BX, 0,0x265e5a51,14);
114 ROUND2(BX,CX,DX,AX, 5,0xe9b6c7aa,20);
115 ROUND2(AX,BX,CX,DX,10,0xd62f105d, 5);
116 ROUND2(DX,AX,BX,CX,15, 0x2441453, 9);
117 ROUND2(CX,DX,AX,BX, 4,0xd8a1e681,14);
118 ROUND2(BX,CX,DX,AX, 9,0xe7d3fbc8,20);
119 ROUND2(AX,BX,CX,DX,14,0x21e1cde6, 5);
120 ROUND2(DX,AX,BX,CX, 3,0xc33707d6, 9);
121 ROUND2(CX,DX,AX,BX, 8,0xf4d50d87,14);
122 ROUND2(BX,CX,DX,AX,13,0x455a14ed,20);
123 ROUND2(AX,BX,CX,DX, 2,0xa9e3e905, 5);
124 ROUND2(DX,AX,BX,CX, 7,0xfcefa3f8, 9);
125 ROUND2(CX,DX,AX,BX,12,0x676f02d9,14);
126 ROUND2(BX,CX,DX,AX, 0,0x8d2a4c8a,20);
127
128 MOVL (5*4)(SI), DI
129 MOVL CX, BP
130
131 ROUND3(AX,BX,CX,DX, 8,0xfffa3942, 4);
132 ROUND3(DX,AX,BX,CX,11,0x8771f681,11);
133 ROUND3(CX,DX,AX,BX,14,0x6d9d6122,16);
134 ROUND3(BX,CX,DX,AX, 1,0xfde5380c,23);
135 ROUND3(AX,BX,CX,DX, 4,0xa4beea44, 4);
136 ROUND3(DX,AX,BX,CX, 7,0x4bdecfa9,11);
137 ROUND3(CX,DX,AX,BX,10,0xf6bb4b60,16);
138 ROUND3(BX,CX,DX,AX,13,0xbebfbc70,23);
139 ROUND3(AX,BX,CX,DX, 0,0x289b7ec6, 4);
140 ROUND3(DX,AX,BX,CX, 3,0xeaa127fa,11);
141 ROUND3(CX,DX,AX,BX, 6,0xd4ef3085,16);
142 ROUND3(BX,CX,DX,AX, 9, 0x4881d05,23);
143 ROUND3(AX,BX,CX,DX,12,0xd9d4d039, 4);
144 ROUND3(DX,AX,BX,CX,15,0xe6db99e5,11);
145 ROUND3(CX,DX,AX,BX, 2,0x1fa27cf8,16);
146 ROUND3(BX,CX,DX,AX, 0,0xc4ac5665,23);
147
148 MOVL (0*4)(SI), DI
149 MOVL $0xffffffff, BP
150 XORL DX, BP
151
152 ROUND4(AX,BX,CX,DX, 7,0xf4292244, 6);
153 ROUND4(DX,AX,BX,CX,14,0x432aff97,10);
154 ROUND4(CX,DX,AX,BX, 5,0xab9423a7,15);
155 ROUND4(BX,CX,DX,AX,12,0xfc93a039,21);
156 ROUND4(AX,BX,CX,DX, 3,0x655b59c3, 6);
157 ROUND4(DX,AX,BX,CX,10,0x8f0ccc92,10);
158 ROUND4(CX,DX,AX,BX, 1,0xffeff47d,15);
159 ROUND4(BX,CX,DX,AX, 8,0x85845dd1,21);
160 ROUND4(AX,BX,CX,DX,15,0x6fa87e4f, 6);
161 ROUND4(DX,AX,BX,CX, 6,0xfe2ce6e0,10);
162 ROUND4(CX,DX,AX,BX,13,0xa3014314,15);
163 ROUND4(BX,CX,DX,AX, 4,0x4e0811a1,21);
164 ROUND4(AX,BX,CX,DX,11,0xf7537e82, 6);
165 ROUND4(DX,AX,BX,CX, 2,0xbd3af235,10);
166 ROUND4(CX,DX,AX,BX, 9,0x2ad7d2bb,15);
167 ROUND4(BX,CX,DX,AX, 0,0xeb86d391,21);
168
169 ADDL 0(SP), AX
170 ADDL 4(SP), BX
171 ADDL 8(SP), CX
172 ADDL 12(SP), DX
173
174 ADDL $64, SI
175 CMPL SI, 16(SP)
176 JB loop
177
178end:
179 MOVL dig+0(FP), BP
180 MOVL AX, (0*4)(BP)
181 MOVL BX, (1*4)(BP)
182 MOVL CX, (2*4)(BP)
183 MOVL DX, (3*4)(BP)
184 RET
View as plain text