...

Text file src/crypto/aes/asm_arm64.s

Documentation: crypto/aes

     1// Copyright 2017 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5//go:build !purego
     6
     7#include "textflag.h"
     8DATA rotInvSRows<>+0x00(SB)/8, $0x080f0205040b0e01
     9DATA rotInvSRows<>+0x08(SB)/8, $0x00070a0d0c030609
    10GLOBL rotInvSRows<>(SB), (NOPTR+RODATA), $16
    11DATA invSRows<>+0x00(SB)/8, $0x0b0e0104070a0d00
    12DATA invSRows<>+0x08(SB)/8, $0x0306090c0f020508
    13GLOBL invSRows<>(SB), (NOPTR+RODATA), $16
    14// func encryptBlockAsm(nr int, xk *uint32, dst, src *byte)
    15TEXT ·encryptBlockAsm(SB),NOSPLIT,$0
    16	MOVD	nr+0(FP), R9
    17	MOVD	xk+8(FP), R10
    18	MOVD	dst+16(FP), R11
    19	MOVD	src+24(FP), R12
    20
    21	VLD1	(R12), [V0.B16]
    22
    23	CMP	$12, R9
    24	BLT	enc128
    25	BEQ	enc196
    26enc256:
    27	VLD1.P	32(R10), [V1.B16, V2.B16]
    28	AESE	V1.B16, V0.B16
    29	AESMC	V0.B16, V0.B16
    30	AESE	V2.B16, V0.B16
    31	AESMC	V0.B16, V0.B16
    32enc196:
    33	VLD1.P	32(R10), [V3.B16, V4.B16]
    34	AESE	V3.B16, V0.B16
    35	AESMC	V0.B16, V0.B16
    36	AESE	V4.B16, V0.B16
    37	AESMC	V0.B16, V0.B16
    38enc128:
    39	VLD1.P	64(R10), [V5.B16, V6.B16, V7.B16, V8.B16]
    40	VLD1.P	64(R10), [V9.B16, V10.B16, V11.B16, V12.B16]
    41	VLD1.P	48(R10), [V13.B16, V14.B16, V15.B16]
    42	AESE	V5.B16, V0.B16
    43	AESMC	V0.B16, V0.B16
    44	AESE	V6.B16, V0.B16
    45	AESMC	V0.B16, V0.B16
    46	AESE	V7.B16, V0.B16
    47	AESMC	V0.B16, V0.B16
    48	AESE	V8.B16, V0.B16
    49	AESMC	V0.B16, V0.B16
    50	AESE	V9.B16, V0.B16
    51	AESMC	V0.B16, V0.B16
    52	AESE	V10.B16, V0.B16
    53	AESMC	V0.B16, V0.B16
    54	AESE	V11.B16, V0.B16
    55	AESMC	V0.B16, V0.B16
    56	AESE	V12.B16, V0.B16
    57	AESMC	V0.B16, V0.B16
    58	AESE	V13.B16, V0.B16
    59	AESMC	V0.B16, V0.B16
    60	AESE	V14.B16, V0.B16
    61	VEOR    V0.B16, V15.B16, V0.B16
    62	VST1	[V0.B16], (R11)
    63	RET
    64
    65// func decryptBlockAsm(nr int, xk *uint32, dst, src *byte)
    66TEXT ·decryptBlockAsm(SB),NOSPLIT,$0
    67	MOVD	nr+0(FP), R9
    68	MOVD	xk+8(FP), R10
    69	MOVD	dst+16(FP), R11
    70	MOVD	src+24(FP), R12
    71
    72	VLD1	(R12), [V0.B16]
    73
    74	CMP	$12, R9
    75	BLT	dec128
    76	BEQ	dec196
    77dec256:
    78	VLD1.P	32(R10), [V1.B16, V2.B16]
    79	AESD	V1.B16, V0.B16
    80	AESIMC	V0.B16, V0.B16
    81	AESD	V2.B16, V0.B16
    82	AESIMC	V0.B16, V0.B16
    83dec196:
    84	VLD1.P	32(R10), [V3.B16, V4.B16]
    85	AESD	V3.B16, V0.B16
    86	AESIMC	V0.B16, V0.B16
    87	AESD	V4.B16, V0.B16
    88	AESIMC	V0.B16, V0.B16
    89dec128:
    90	VLD1.P	64(R10), [V5.B16, V6.B16, V7.B16, V8.B16]
    91	VLD1.P	64(R10), [V9.B16, V10.B16, V11.B16, V12.B16]
    92	VLD1.P	48(R10), [V13.B16, V14.B16, V15.B16]
    93	AESD	V5.B16, V0.B16
    94	AESIMC	V0.B16, V0.B16
    95	AESD	V6.B16, V0.B16
    96	AESIMC	V0.B16, V0.B16
    97	AESD	V7.B16, V0.B16
    98	AESIMC	V0.B16, V0.B16
    99	AESD	V8.B16, V0.B16
   100	AESIMC	V0.B16, V0.B16
   101	AESD	V9.B16, V0.B16
   102	AESIMC	V0.B16, V0.B16
   103	AESD	V10.B16, V0.B16
   104	AESIMC	V0.B16, V0.B16
   105	AESD	V11.B16, V0.B16
   106	AESIMC	V0.B16, V0.B16
   107	AESD	V12.B16, V0.B16
   108	AESIMC	V0.B16, V0.B16
   109	AESD	V13.B16, V0.B16
   110	AESIMC	V0.B16, V0.B16
   111	AESD	V14.B16, V0.B16
   112	VEOR    V0.B16, V15.B16, V0.B16
   113	VST1	[V0.B16], (R11)
   114	RET
   115
   116// func expandKeyAsm(nr int, key *byte, enc, dec *uint32) {
   117// Note that round keys are stored in uint128 format, not uint32
   118TEXT ·expandKeyAsm(SB),NOSPLIT,$0
   119	MOVD	nr+0(FP), R8
   120	MOVD	key+8(FP), R9
   121	MOVD	enc+16(FP), R10
   122	MOVD	dec+24(FP), R11
   123	LDP	rotInvSRows<>(SB), (R0, R1)
   124	VMOV	R0, V3.D[0]
   125	VMOV	R1, V3.D[1]
   126	VEOR	V0.B16, V0.B16, V0.B16 // All zeroes
   127	MOVW	$1, R13
   128	TBZ	$1, R8, ks192
   129	TBNZ	$2, R8, ks256
   130	LDPW	(R9), (R4, R5)
   131	LDPW	8(R9), (R6, R7)
   132	STPW.P	(R4, R5), 8(R10)
   133	STPW.P	(R6, R7), 8(R10)
   134	MOVW	$0x1b, R14
   135ks128Loop:
   136		VMOV	R7, V2.S[0]
   137		WORD	$0x4E030042       // TBL V3.B16, [V2.B16], V2.B16
   138		AESE	V0.B16, V2.B16    // Use AES to compute the SBOX
   139		EORW	R13, R4
   140		LSLW	$1, R13           // Compute next Rcon
   141		ANDSW	$0x100, R13, ZR
   142		CSELW	NE, R14, R13, R13 // Fake modulo
   143		SUBS	$1, R8
   144		VMOV	V2.S[0], R0
   145		EORW	R0, R4
   146		EORW	R4, R5
   147		EORW	R5, R6
   148		EORW	R6, R7
   149		STPW.P	(R4, R5), 8(R10)
   150		STPW.P	(R6, R7), 8(R10)
   151	BNE	ks128Loop
   152	CBZ	R11, ksDone       // If dec is nil we are done
   153	SUB	$176, R10
   154	// Decryption keys are encryption keys with InverseMixColumns applied
   155	VLD1.P	64(R10), [V0.B16, V1.B16, V2.B16, V3.B16]
   156	VMOV	V0.B16, V7.B16
   157	AESIMC	V1.B16, V6.B16
   158	AESIMC	V2.B16, V5.B16
   159	AESIMC	V3.B16, V4.B16
   160	VLD1.P	64(R10), [V0.B16, V1.B16, V2.B16, V3.B16]
   161	AESIMC	V0.B16, V11.B16
   162	AESIMC	V1.B16, V10.B16
   163	AESIMC	V2.B16, V9.B16
   164	AESIMC	V3.B16, V8.B16
   165	VLD1	(R10), [V0.B16, V1.B16, V2.B16]
   166	AESIMC	V0.B16, V14.B16
   167	AESIMC	V1.B16, V13.B16
   168	VMOV	V2.B16, V12.B16
   169	VST1.P	[V12.B16, V13.B16, V14.B16], 48(R11)
   170	VST1.P	[V8.B16, V9.B16, V10.B16, V11.B16], 64(R11)
   171	VST1	[V4.B16, V5.B16, V6.B16, V7.B16], (R11)
   172	B	ksDone
   173ks192:
   174	LDPW	(R9), (R2, R3)
   175	LDPW	8(R9), (R4, R5)
   176	LDPW	16(R9), (R6, R7)
   177	STPW.P	(R2, R3), 8(R10)
   178	STPW.P	(R4, R5), 8(R10)
   179	SUB	$4, R8
   180ks192Loop:
   181		STPW.P	(R6, R7), 8(R10)
   182		VMOV	R7, V2.S[0]
   183		WORD	$0x4E030042 //TBL	V3.B16, [V2.B16], V2.B16
   184		AESE	V0.B16, V2.B16
   185		EORW	R13, R2
   186		LSLW	$1, R13
   187		SUBS	$1, R8
   188		VMOV	V2.S[0], R0
   189		EORW	R0, R2
   190		EORW	R2, R3
   191		EORW	R3, R4
   192		EORW	R4, R5
   193		EORW	R5, R6
   194		EORW	R6, R7
   195		STPW.P	(R2, R3), 8(R10)
   196		STPW.P	(R4, R5), 8(R10)
   197	BNE	ks192Loop
   198	CBZ	R11, ksDone
   199	SUB	$208, R10
   200	VLD1.P	64(R10), [V0.B16, V1.B16, V2.B16, V3.B16]
   201	VMOV	V0.B16, V7.B16
   202	AESIMC	V1.B16, V6.B16
   203	AESIMC	V2.B16, V5.B16
   204	AESIMC	V3.B16, V4.B16
   205	VLD1.P	64(R10), [V0.B16, V1.B16, V2.B16, V3.B16]
   206	AESIMC	V0.B16, V11.B16
   207	AESIMC	V1.B16, V10.B16
   208	AESIMC	V2.B16, V9.B16
   209	AESIMC	V3.B16, V8.B16
   210	VLD1.P	64(R10), [V0.B16, V1.B16, V2.B16, V3.B16]
   211	AESIMC	V0.B16, V15.B16
   212	AESIMC	V1.B16, V14.B16
   213	AESIMC	V2.B16, V13.B16
   214	AESIMC	V3.B16, V12.B16
   215	VLD1	(R10), [V0.B16]
   216	VST1.P	[V0.B16], 16(R11)
   217	VST1.P	[V12.B16, V13.B16, V14.B16, V15.B16], 64(R11)
   218	VST1.P	[V8.B16, V9.B16, V10.B16, V11.B16], 64(R11)
   219	VST1	[V4.B16, V5.B16, V6.B16, V7.B16], (R11)
   220	B	ksDone
   221ks256:
   222	LDP	invSRows<>(SB), (R0, R1)
   223	VMOV	R0, V4.D[0]
   224	VMOV	R1, V4.D[1]
   225	LDPW	(R9), (R0, R1)
   226	LDPW	8(R9), (R2, R3)
   227	LDPW	16(R9), (R4, R5)
   228	LDPW	24(R9), (R6, R7)
   229	STPW.P	(R0, R1), 8(R10)
   230	STPW.P	(R2, R3), 8(R10)
   231	SUB	$7, R8
   232ks256Loop:
   233		STPW.P	(R4, R5), 8(R10)
   234		STPW.P	(R6, R7), 8(R10)
   235		VMOV	R7, V2.S[0]
   236		WORD	$0x4E030042 //TBL	V3.B16, [V2.B16], V2.B16
   237		AESE	V0.B16, V2.B16
   238		EORW	R13, R0
   239		LSLW	$1, R13
   240		SUBS	$1, R8
   241		VMOV	V2.S[0], R9
   242		EORW	R9, R0
   243		EORW	R0, R1
   244		EORW	R1, R2
   245		EORW	R2, R3
   246		VMOV	R3, V2.S[0]
   247		WORD	$0x4E040042 //TBL	V3.B16, [V2.B16], V2.B16
   248		AESE	V0.B16, V2.B16
   249		VMOV	V2.S[0], R9
   250		EORW	R9, R4
   251		EORW	R4, R5
   252		EORW	R5, R6
   253		EORW	R6, R7
   254		STPW.P	(R0, R1), 8(R10)
   255		STPW.P	(R2, R3), 8(R10)
   256	BNE	ks256Loop
   257	CBZ	R11, ksDone
   258	SUB	$240, R10
   259	VLD1.P	64(R10), [V0.B16, V1.B16, V2.B16, V3.B16]
   260	VMOV	V0.B16, V7.B16
   261	AESIMC	V1.B16, V6.B16
   262	AESIMC	V2.B16, V5.B16
   263	AESIMC	V3.B16, V4.B16
   264	VLD1.P	64(R10), [V0.B16, V1.B16, V2.B16, V3.B16]
   265	AESIMC	V0.B16, V11.B16
   266	AESIMC	V1.B16, V10.B16
   267	AESIMC	V2.B16, V9.B16
   268	AESIMC	V3.B16, V8.B16
   269	VLD1.P	64(R10), [V0.B16, V1.B16, V2.B16, V3.B16]
   270	AESIMC	V0.B16, V15.B16
   271	AESIMC	V1.B16, V14.B16
   272	AESIMC	V2.B16, V13.B16
   273	AESIMC	V3.B16, V12.B16
   274	VLD1	(R10), [V0.B16, V1.B16, V2.B16]
   275	AESIMC	V0.B16, V18.B16
   276	AESIMC	V1.B16, V17.B16
   277	VMOV	V2.B16, V16.B16
   278	VST1.P	[V16.B16, V17.B16, V18.B16], 48(R11)
   279	VST1.P	[V12.B16, V13.B16, V14.B16, V15.B16], 64(R11)
   280	VST1.P	[V8.B16, V9.B16, V10.B16, V11.B16], 64(R11)
   281	VST1	[V4.B16, V5.B16, V6.B16, V7.B16], (R11)
   282ksDone:
   283	RET

View as plain text