Text file
src/crypto/sha512/sha512block_arm64.s
1// Copyright 2022 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:build !purego
6
7// Based on the Linux Kernel with the following comment:
8// Algorithm based on https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=fb87127bcefc17efab757606e1b1e333fd614dd0
9// Originally written by Ard Biesheuvel <ard.biesheuvel@linaro.org>
10
11#include "textflag.h"
12
13#define SHA512TRANS(i0, i1, i2, i3, i4, rc0, in0) \
14 VADD in0.D2, rc0.D2, V5.D2 \
15 VEXT $8, i3.B16, i2.B16, V6.B16 \
16 VEXT $8, V5.B16, V5.B16, V5.B16 \
17 VEXT $8, i2.B16, i1.B16, V7.B16 \
18 VADD V5.D2, i3.D2, i3.D2 \
19
20#define SHA512ROUND(i0, i1, i2, i3, i4, rc0, rc1, in0, in1, in2, in3, in4) \
21 VLD1.P 16(R4), [rc1.D2] \
22 SHA512TRANS(i0, i1, i2, i3, i4, rc0, in0) \
23 VEXT $8, in4.B16, in3.B16, V5.B16 \
24 SHA512SU0 in1.D2, in0.D2 \
25 SHA512H V7.D2, V6, i3 \
26 SHA512SU1 V5.D2, in2.D2, in0.D2 \
27 VADD i3.D2, i1.D2, i4.D2 \
28 SHA512H2 i0.D2, i1, i3
29
30#define SHA512ROUND_NO_UPDATE(i0, i1, i2, i3, i4, rc0, rc1, in0) \
31 VLD1.P 16(R4), [rc1.D2] \
32 SHA512TRANS(i0, i1, i2, i3, i4, rc0, in0) \
33 SHA512H V7.D2, V6, i3 \
34 VADD i3.D2, i1.D2, i4.D2 \
35 SHA512H2 i0.D2, i1, i3
36
37#define SHA512ROUND_LAST(i0, i1, i2, i3, i4, rc0, in0) \
38 SHA512TRANS(i0, i1, i2, i3, i4, rc0, in0) \
39 SHA512H V7.D2, V6, i3 \
40 VADD i3.D2, i1.D2, i4.D2 \
41 SHA512H2 i0.D2, i1, i3
42
43// func blockAsm(dig *digest, p []byte)
44TEXT ·blockAsm(SB),NOSPLIT,$0
45 MOVD dig+0(FP), R0
46 MOVD p_base+8(FP), R1
47 MOVD p_len+16(FP), R2
48 MOVD ·_K+0(SB), R3
49
50 // long enough to prefetch
51 PRFM (R3), PLDL3KEEP
52 // load digest
53 VLD1 (R0), [V8.D2, V9.D2, V10.D2, V11.D2]
54loop:
55 // load digest in V0-V3 keeping original in V8-V11
56 VMOV V8.B16, V0.B16
57 VMOV V9.B16, V1.B16
58 VMOV V10.B16, V2.B16
59 VMOV V11.B16, V3.B16
60
61 // load message data in V12-V19
62 VLD1.P 64(R1), [V12.D2, V13.D2, V14.D2, V15.D2]
63 VLD1.P 64(R1), [V16.D2, V17.D2, V18.D2, V19.D2]
64
65 // convert message into big endian format
66 VREV64 V12.B16, V12.B16
67 VREV64 V13.B16, V13.B16
68 VREV64 V14.B16, V14.B16
69 VREV64 V15.B16, V15.B16
70 VREV64 V16.B16, V16.B16
71 VREV64 V17.B16, V17.B16
72 VREV64 V18.B16, V18.B16
73 VREV64 V19.B16, V19.B16
74
75 MOVD R3, R4
76 // load first 4 round consts in V20-V23
77 VLD1.P 64(R4), [V20.D2, V21.D2, V22.D2, V23.D2]
78
79 SHA512ROUND(V0, V1, V2, V3, V4, V20, V24, V12, V13, V19, V16, V17)
80 SHA512ROUND(V3, V0, V4, V2, V1, V21, V25, V13, V14, V12, V17, V18)
81 SHA512ROUND(V2, V3, V1, V4, V0, V22, V26, V14, V15, V13, V18, V19)
82 SHA512ROUND(V4, V2, V0, V1, V3, V23, V27, V15, V16, V14, V19, V12)
83 SHA512ROUND(V1, V4, V3, V0, V2, V24, V28, V16, V17, V15, V12, V13)
84
85 SHA512ROUND(V0, V1, V2, V3, V4, V25, V29, V17, V18, V16, V13, V14)
86 SHA512ROUND(V3, V0, V4, V2, V1, V26, V30, V18, V19, V17, V14, V15)
87 SHA512ROUND(V2, V3, V1, V4, V0, V27, V31, V19, V12, V18, V15, V16)
88 SHA512ROUND(V4, V2, V0, V1, V3, V28, V24, V12, V13, V19, V16, V17)
89 SHA512ROUND(V1, V4, V3, V0, V2, V29, V25, V13, V14, V12, V17, V18)
90
91 SHA512ROUND(V0, V1, V2, V3, V4, V30, V26, V14, V15, V13, V18, V19)
92 SHA512ROUND(V3, V0, V4, V2, V1, V31, V27, V15, V16, V14, V19, V12)
93 SHA512ROUND(V2, V3, V1, V4, V0, V24, V28, V16, V17, V15, V12, V13)
94 SHA512ROUND(V4, V2, V0, V1, V3, V25, V29, V17, V18, V16, V13, V14)
95 SHA512ROUND(V1, V4, V3, V0, V2, V26, V30, V18, V19, V17, V14, V15)
96
97 SHA512ROUND(V0, V1, V2, V3, V4, V27, V31, V19, V12, V18, V15, V16)
98 SHA512ROUND(V3, V0, V4, V2, V1, V28, V24, V12, V13, V19, V16, V17)
99 SHA512ROUND(V2, V3, V1, V4, V0, V29, V25, V13, V14, V12, V17, V18)
100 SHA512ROUND(V4, V2, V0, V1, V3, V30, V26, V14, V15, V13, V18, V19)
101 SHA512ROUND(V1, V4, V3, V0, V2, V31, V27, V15, V16, V14, V19, V12)
102
103 SHA512ROUND(V0, V1, V2, V3, V4, V24, V28, V16, V17, V15, V12, V13)
104 SHA512ROUND(V3, V0, V4, V2, V1, V25, V29, V17, V18, V16, V13, V14)
105 SHA512ROUND(V2, V3, V1, V4, V0, V26, V30, V18, V19, V17, V14, V15)
106 SHA512ROUND(V4, V2, V0, V1, V3, V27, V31, V19, V12, V18, V15, V16)
107 SHA512ROUND(V1, V4, V3, V0, V2, V28, V24, V12, V13, V19, V16, V17)
108
109 SHA512ROUND(V0, V1, V2, V3, V4, V29, V25, V13, V14, V12, V17, V18)
110 SHA512ROUND(V3, V0, V4, V2, V1, V30, V26, V14, V15, V13, V18, V19)
111 SHA512ROUND(V2, V3, V1, V4, V0, V31, V27, V15, V16, V14, V19, V12)
112 SHA512ROUND(V4, V2, V0, V1, V3, V24, V28, V16, V17, V15, V12, V13)
113 SHA512ROUND(V1, V4, V3, V0, V2, V25, V29, V17, V18, V16, V13, V14)
114
115 SHA512ROUND(V0, V1, V2, V3, V4, V26, V30, V18, V19, V17, V14, V15)
116 SHA512ROUND(V3, V0, V4, V2, V1, V27, V31, V19, V12, V18, V15, V16)
117
118 SHA512ROUND_NO_UPDATE(V2, V3, V1, V4, V0, V28, V24, V12)
119 SHA512ROUND_NO_UPDATE(V4, V2, V0, V1, V3, V29, V25, V13)
120 SHA512ROUND_NO_UPDATE(V1, V4, V3, V0, V2, V30, V26, V14)
121 SHA512ROUND_NO_UPDATE(V0, V1, V2, V3, V4, V31, V27, V15)
122
123 SHA512ROUND_LAST(V3, V0, V4, V2, V1, V24, V16)
124 SHA512ROUND_LAST(V2, V3, V1, V4, V0, V25, V17)
125 SHA512ROUND_LAST(V4, V2, V0, V1, V3, V26, V18)
126 SHA512ROUND_LAST(V1, V4, V3, V0, V2, V27, V19)
127
128 // add result to digest
129 VADD V0.D2, V8.D2, V8.D2
130 VADD V1.D2, V9.D2, V9.D2
131 VADD V2.D2, V10.D2, V10.D2
132 VADD V3.D2, V11.D2, V11.D2
133 SUB $128, R2
134 CBNZ R2, loop
135
136 VST1 [V8.D2, V9.D2, V10.D2, V11.D2], (R0)
137 RET
View as plain text