...

Text file src/crypto/md5/md5block_arm.s

Documentation: crypto/md5

     1// Copyright 2013 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4//
     5// ARM version of md5block.go
     6
     7//go:build !purego
     8
     9#include "textflag.h"
    10
    11// Register definitions
    12#define Rtable	R0	// Pointer to MD5 constants table
    13#define Rdata	R1	// Pointer to data to hash
    14#define Ra	R2	// MD5 accumulator
    15#define Rb	R3	// MD5 accumulator
    16#define Rc	R4	// MD5 accumulator
    17#define Rd	R5	// MD5 accumulator
    18#define Rc0	R6	// MD5 constant
    19#define Rc1	R7	// MD5 constant
    20#define Rc2	R8	// MD5 constant
    21// r9, r10 are forbidden
    22// r11 is OK provided you check the assembler that no synthetic instructions use it
    23#define Rc3	R11	// MD5 constant
    24#define Rt0	R12	// temporary
    25#define Rt1	R14	// temporary
    26
    27// func block(dig *digest, p []byte)
    28// 0(FP) is *digest
    29// 4(FP) is p.array (struct Slice)
    30// 8(FP) is p.len
    31//12(FP) is p.cap
    32//
    33// Stack frame
    34#define p_end	end-4(SP)	// pointer to the end of data
    35#define p_data	data-8(SP)	// current data pointer
    36#define buf	buffer-(8+4*16)(SP)	//16 words temporary buffer
    37		// 3 words at 4..12(R13) for called routine parameters
    38
    39TEXT	·block(SB), NOSPLIT, $84-16
    40	MOVW	p+4(FP), Rdata	// pointer to the data
    41	MOVW	p_len+8(FP), Rt0	// number of bytes
    42	ADD	Rdata, Rt0
    43	MOVW	Rt0, p_end	// pointer to end of data
    44
    45loop:
    46	MOVW	Rdata, p_data	// Save Rdata
    47	AND.S	$3, Rdata, Rt0	// TST $3, Rdata not working see issue 5921
    48	BEQ	aligned			// aligned detected - skip copy
    49
    50	// Copy the unaligned source data into the aligned temporary buffer
    51	// memmove(to=4(R13), from=8(R13), n=12(R13)) - Corrupts all registers
    52	MOVW	$buf, Rtable	// to
    53	MOVW	$64, Rc0		// n
    54	MOVM.IB	[Rtable,Rdata,Rc0], (R13)
    55	BL	runtime·memmove(SB)
    56
    57	// Point to the local aligned copy of the data
    58	MOVW	$buf, Rdata
    59
    60aligned:
    61	// Point to the table of constants
    62	// A PC relative add would be cheaper than this
    63	MOVW	$·table(SB), Rtable
    64
    65	// Load up initial MD5 accumulator
    66	MOVW	dig+0(FP), Rc0
    67	MOVM.IA (Rc0), [Ra,Rb,Rc,Rd]
    68
    69// a += (((c^d)&b)^d) + X[index] + const
    70// a = a<<shift | a>>(32-shift) + b
    71#define ROUND1(Ra, Rb, Rc, Rd, index, shift, Rconst) \
    72	EOR	Rc, Rd, Rt0		; \
    73	AND	Rb, Rt0			; \
    74	EOR	Rd, Rt0			; \
    75	MOVW	(index<<2)(Rdata), Rt1	; \
    76	ADD	Rt1, Rt0			; \
    77	ADD	Rconst, Rt0			; \
    78	ADD	Rt0, Ra			; \
    79	ADD	Ra@>(32-shift), Rb, Ra	;
    80
    81	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
    82	ROUND1(Ra, Rb, Rc, Rd,  0,	7, Rc0)
    83	ROUND1(Rd, Ra, Rb, Rc,  1, 12, Rc1)
    84	ROUND1(Rc, Rd, Ra, Rb,  2, 17, Rc2)
    85	ROUND1(Rb, Rc, Rd, Ra,  3, 22, Rc3)
    86
    87	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
    88	ROUND1(Ra, Rb, Rc, Rd,  4,	7, Rc0)
    89	ROUND1(Rd, Ra, Rb, Rc,  5, 12, Rc1)
    90	ROUND1(Rc, Rd, Ra, Rb,  6, 17, Rc2)
    91	ROUND1(Rb, Rc, Rd, Ra,  7, 22, Rc3)
    92
    93	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
    94	ROUND1(Ra, Rb, Rc, Rd,  8,	7, Rc0)
    95	ROUND1(Rd, Ra, Rb, Rc,  9, 12, Rc1)
    96	ROUND1(Rc, Rd, Ra, Rb, 10, 17, Rc2)
    97	ROUND1(Rb, Rc, Rd, Ra, 11, 22, Rc3)
    98
    99	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   100	ROUND1(Ra, Rb, Rc, Rd, 12,	7, Rc0)
   101	ROUND1(Rd, Ra, Rb, Rc, 13, 12, Rc1)
   102	ROUND1(Rc, Rd, Ra, Rb, 14, 17, Rc2)
   103	ROUND1(Rb, Rc, Rd, Ra, 15, 22, Rc3)
   104
   105// a += (((b^c)&d)^c) + X[index] + const
   106// a = a<<shift | a>>(32-shift) + b
   107#define ROUND2(Ra, Rb, Rc, Rd, index, shift, Rconst) \
   108	EOR	Rb, Rc, Rt0		; \
   109	AND	Rd, Rt0			; \
   110	EOR	Rc, Rt0			; \
   111	MOVW	(index<<2)(Rdata), Rt1	; \
   112	ADD	Rt1, Rt0			; \
   113	ADD	Rconst, Rt0			; \
   114	ADD	Rt0, Ra			; \
   115	ADD	Ra@>(32-shift), Rb, Ra	;
   116
   117	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   118	ROUND2(Ra, Rb, Rc, Rd,  1,	5, Rc0)
   119	ROUND2(Rd, Ra, Rb, Rc,  6,	9, Rc1)
   120	ROUND2(Rc, Rd, Ra, Rb, 11, 14, Rc2)
   121	ROUND2(Rb, Rc, Rd, Ra,  0, 20, Rc3)
   122
   123	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   124	ROUND2(Ra, Rb, Rc, Rd,  5,	5, Rc0)
   125	ROUND2(Rd, Ra, Rb, Rc, 10,	9, Rc1)
   126	ROUND2(Rc, Rd, Ra, Rb, 15, 14, Rc2)
   127	ROUND2(Rb, Rc, Rd, Ra,  4, 20, Rc3)
   128
   129	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   130	ROUND2(Ra, Rb, Rc, Rd,  9,	5, Rc0)
   131	ROUND2(Rd, Ra, Rb, Rc, 14,	9, Rc1)
   132	ROUND2(Rc, Rd, Ra, Rb,  3, 14, Rc2)
   133	ROUND2(Rb, Rc, Rd, Ra,  8, 20, Rc3)
   134
   135	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   136	ROUND2(Ra, Rb, Rc, Rd, 13,	5, Rc0)
   137	ROUND2(Rd, Ra, Rb, Rc,  2,	9, Rc1)
   138	ROUND2(Rc, Rd, Ra, Rb,  7, 14, Rc2)
   139	ROUND2(Rb, Rc, Rd, Ra, 12, 20, Rc3)
   140
   141// a += (b^c^d) + X[index] + const
   142// a = a<<shift | a>>(32-shift) + b
   143#define ROUND3(Ra, Rb, Rc, Rd, index, shift, Rconst) \
   144	EOR	Rb, Rc, Rt0		; \
   145	EOR	Rd, Rt0			; \
   146	MOVW	(index<<2)(Rdata), Rt1	; \
   147	ADD	Rt1, Rt0			; \
   148	ADD	Rconst, Rt0			; \
   149	ADD	Rt0, Ra			; \
   150	ADD	Ra@>(32-shift), Rb, Ra	;
   151
   152	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   153	ROUND3(Ra, Rb, Rc, Rd,  5,	4, Rc0)
   154	ROUND3(Rd, Ra, Rb, Rc,  8, 11, Rc1)
   155	ROUND3(Rc, Rd, Ra, Rb, 11, 16, Rc2)
   156	ROUND3(Rb, Rc, Rd, Ra, 14, 23, Rc3)
   157
   158	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   159	ROUND3(Ra, Rb, Rc, Rd,  1,	4, Rc0)
   160	ROUND3(Rd, Ra, Rb, Rc,  4, 11, Rc1)
   161	ROUND3(Rc, Rd, Ra, Rb,  7, 16, Rc2)
   162	ROUND3(Rb, Rc, Rd, Ra, 10, 23, Rc3)
   163
   164	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   165	ROUND3(Ra, Rb, Rc, Rd, 13,	4, Rc0)
   166	ROUND3(Rd, Ra, Rb, Rc,  0, 11, Rc1)
   167	ROUND3(Rc, Rd, Ra, Rb,  3, 16, Rc2)
   168	ROUND3(Rb, Rc, Rd, Ra,  6, 23, Rc3)
   169
   170	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   171	ROUND3(Ra, Rb, Rc, Rd,  9,	4, Rc0)
   172	ROUND3(Rd, Ra, Rb, Rc, 12, 11, Rc1)
   173	ROUND3(Rc, Rd, Ra, Rb, 15, 16, Rc2)
   174	ROUND3(Rb, Rc, Rd, Ra,  2, 23, Rc3)
   175
   176// a += (c^(b|^d)) + X[index] + const
   177// a = a<<shift | a>>(32-shift) + b
   178#define ROUND4(Ra, Rb, Rc, Rd, index, shift, Rconst) \
   179	MVN	Rd, Rt0			; \
   180	ORR	Rb, Rt0			; \
   181	EOR	Rc, Rt0			; \
   182	MOVW	(index<<2)(Rdata), Rt1	; \
   183	ADD	Rt1, Rt0			; \
   184	ADD	Rconst, Rt0			; \
   185	ADD	Rt0, Ra			; \
   186	ADD	Ra@>(32-shift), Rb, Ra	;
   187
   188	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   189	ROUND4(Ra, Rb, Rc, Rd,  0,	6, Rc0)
   190	ROUND4(Rd, Ra, Rb, Rc,  7, 10, Rc1)
   191	ROUND4(Rc, Rd, Ra, Rb, 14, 15, Rc2)
   192	ROUND4(Rb, Rc, Rd, Ra,  5, 21, Rc3)
   193
   194	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   195	ROUND4(Ra, Rb, Rc, Rd, 12,	6, Rc0)
   196	ROUND4(Rd, Ra, Rb, Rc,  3, 10, Rc1)
   197	ROUND4(Rc, Rd, Ra, Rb, 10, 15, Rc2)
   198	ROUND4(Rb, Rc, Rd, Ra,  1, 21, Rc3)
   199
   200	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   201	ROUND4(Ra, Rb, Rc, Rd,  8,	6, Rc0)
   202	ROUND4(Rd, Ra, Rb, Rc, 15, 10, Rc1)
   203	ROUND4(Rc, Rd, Ra, Rb,  6, 15, Rc2)
   204	ROUND4(Rb, Rc, Rd, Ra, 13, 21, Rc3)
   205
   206	MOVM.IA.W (Rtable), [Rc0,Rc1,Rc2,Rc3]
   207	ROUND4(Ra, Rb, Rc, Rd,  4,	6, Rc0)
   208	ROUND4(Rd, Ra, Rb, Rc, 11, 10, Rc1)
   209	ROUND4(Rc, Rd, Ra, Rb,  2, 15, Rc2)
   210	ROUND4(Rb, Rc, Rd, Ra,  9, 21, Rc3)
   211
   212	MOVW	dig+0(FP), Rt0
   213	MOVM.IA (Rt0), [Rc0,Rc1,Rc2,Rc3]
   214
   215	ADD	Rc0, Ra
   216	ADD	Rc1, Rb
   217	ADD	Rc2, Rc
   218	ADD	Rc3, Rd
   219
   220	MOVM.IA [Ra,Rb,Rc,Rd], (Rt0)
   221
   222	MOVW	p_data, Rdata
   223	MOVW	p_end, Rt0
   224	ADD	$64, Rdata
   225	CMP	Rt0, Rdata
   226	BLO	loop
   227
   228	RET
   229
   230// MD5 constants table
   231
   232	// Round 1
   233	DATA	·table+0x00(SB)/4, $0xd76aa478
   234	DATA	·table+0x04(SB)/4, $0xe8c7b756
   235	DATA	·table+0x08(SB)/4, $0x242070db
   236	DATA	·table+0x0c(SB)/4, $0xc1bdceee
   237	DATA	·table+0x10(SB)/4, $0xf57c0faf
   238	DATA	·table+0x14(SB)/4, $0x4787c62a
   239	DATA	·table+0x18(SB)/4, $0xa8304613
   240	DATA	·table+0x1c(SB)/4, $0xfd469501
   241	DATA	·table+0x20(SB)/4, $0x698098d8
   242	DATA	·table+0x24(SB)/4, $0x8b44f7af
   243	DATA	·table+0x28(SB)/4, $0xffff5bb1
   244	DATA	·table+0x2c(SB)/4, $0x895cd7be
   245	DATA	·table+0x30(SB)/4, $0x6b901122
   246	DATA	·table+0x34(SB)/4, $0xfd987193
   247	DATA	·table+0x38(SB)/4, $0xa679438e
   248	DATA	·table+0x3c(SB)/4, $0x49b40821
   249	// Round 2
   250	DATA	·table+0x40(SB)/4, $0xf61e2562
   251	DATA	·table+0x44(SB)/4, $0xc040b340
   252	DATA	·table+0x48(SB)/4, $0x265e5a51
   253	DATA	·table+0x4c(SB)/4, $0xe9b6c7aa
   254	DATA	·table+0x50(SB)/4, $0xd62f105d
   255	DATA	·table+0x54(SB)/4, $0x02441453
   256	DATA	·table+0x58(SB)/4, $0xd8a1e681
   257	DATA	·table+0x5c(SB)/4, $0xe7d3fbc8
   258	DATA	·table+0x60(SB)/4, $0x21e1cde6
   259	DATA	·table+0x64(SB)/4, $0xc33707d6
   260	DATA	·table+0x68(SB)/4, $0xf4d50d87
   261	DATA	·table+0x6c(SB)/4, $0x455a14ed
   262	DATA	·table+0x70(SB)/4, $0xa9e3e905
   263	DATA	·table+0x74(SB)/4, $0xfcefa3f8
   264	DATA	·table+0x78(SB)/4, $0x676f02d9
   265	DATA	·table+0x7c(SB)/4, $0x8d2a4c8a
   266	// Round 3
   267	DATA	·table+0x80(SB)/4, $0xfffa3942
   268	DATA	·table+0x84(SB)/4, $0x8771f681
   269	DATA	·table+0x88(SB)/4, $0x6d9d6122
   270	DATA	·table+0x8c(SB)/4, $0xfde5380c
   271	DATA	·table+0x90(SB)/4, $0xa4beea44
   272	DATA	·table+0x94(SB)/4, $0x4bdecfa9
   273	DATA	·table+0x98(SB)/4, $0xf6bb4b60
   274	DATA	·table+0x9c(SB)/4, $0xbebfbc70
   275	DATA	·table+0xa0(SB)/4, $0x289b7ec6
   276	DATA	·table+0xa4(SB)/4, $0xeaa127fa
   277	DATA	·table+0xa8(SB)/4, $0xd4ef3085
   278	DATA	·table+0xac(SB)/4, $0x04881d05
   279	DATA	·table+0xb0(SB)/4, $0xd9d4d039
   280	DATA	·table+0xb4(SB)/4, $0xe6db99e5
   281	DATA	·table+0xb8(SB)/4, $0x1fa27cf8
   282	DATA	·table+0xbc(SB)/4, $0xc4ac5665
   283	// Round 4
   284	DATA	·table+0xc0(SB)/4, $0xf4292244
   285	DATA	·table+0xc4(SB)/4, $0x432aff97
   286	DATA	·table+0xc8(SB)/4, $0xab9423a7
   287	DATA	·table+0xcc(SB)/4, $0xfc93a039
   288	DATA	·table+0xd0(SB)/4, $0x655b59c3
   289	DATA	·table+0xd4(SB)/4, $0x8f0ccc92
   290	DATA	·table+0xd8(SB)/4, $0xffeff47d
   291	DATA	·table+0xdc(SB)/4, $0x85845dd1
   292	DATA	·table+0xe0(SB)/4, $0x6fa87e4f
   293	DATA	·table+0xe4(SB)/4, $0xfe2ce6e0
   294	DATA	·table+0xe8(SB)/4, $0xa3014314
   295	DATA	·table+0xec(SB)/4, $0x4e0811a1
   296	DATA	·table+0xf0(SB)/4, $0xf7537e82
   297	DATA	·table+0xf4(SB)/4, $0xbd3af235
   298	DATA	·table+0xf8(SB)/4, $0x2ad7d2bb
   299	DATA	·table+0xfc(SB)/4, $0xeb86d391
   300	// Global definition
   301	GLOBL	·table(SB),8,$256

View as plain text