...

Text file src/crypto/internal/fips140/sha3/sha3_arm64.s

Documentation: crypto/internal/fips140/sha3

     1// Copyright 2022 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5//go:build !purego
     6
     7#include "textflag.h"
     8
     9// func keccakF1600NEON(a *[200]byte)
    10TEXT ·keccakF1600NEON(SB), $200-8
    11	MOVD	a+0(FP), R0
    12	MOVD	$round_consts<>(SB), R1
    13	MOVD	$24, R2 // counter for loop
    14
    15	VLD1.P	16(R0), [V0.D1, V1.D1]
    16	VLD1.P	16(R0), [V2.D1, V3.D1]
    17	VLD1.P	16(R0), [V4.D1, V5.D1]
    18	VLD1.P	16(R0), [V6.D1, V7.D1]
    19	VLD1.P	16(R0), [V8.D1, V9.D1]
    20	VLD1.P	16(R0), [V10.D1, V11.D1]
    21	VLD1.P	16(R0), [V12.D1, V13.D1]
    22	VLD1.P	16(R0), [V14.D1, V15.D1]
    23	VLD1.P	16(R0), [V16.D1, V17.D1]
    24	VLD1.P	16(R0), [V18.D1, V19.D1]
    25	VLD1.P	16(R0), [V20.D1, V21.D1]
    26	VLD1.P	16(R0), [V22.D1, V23.D1]
    27	VLD1	(R0), [V24.D1]
    28
    29	SUB	$192, R0, R0
    30
    31loop:
    32	// theta
    33	VEOR3	 V20.B16, V15.B16, V10.B16, V25.B16
    34	VEOR3	 V21.B16, V16.B16, V11.B16, V26.B16
    35	VEOR3	 V22.B16, V17.B16, V12.B16, V27.B16
    36	VEOR3	 V23.B16, V18.B16, V13.B16, V28.B16
    37	VEOR3	 V24.B16, V19.B16, V14.B16, V29.B16
    38	VEOR3	 V25.B16, V5.B16, V0.B16, V25.B16
    39	VEOR3	 V26.B16, V6.B16, V1.B16, V26.B16
    40	VEOR3	 V27.B16, V7.B16, V2.B16, V27.B16
    41	VEOR3	 V28.B16, V8.B16, V3.B16, V28.B16
    42	VEOR3	 V29.B16, V9.B16, V4.B16, V29.B16
    43
    44	VRAX1	V27.D2, V25.D2, V30.D2
    45	VRAX1	V28.D2, V26.D2, V31.D2
    46	VRAX1	V29.D2, V27.D2, V27.D2
    47	VRAX1	V25.D2, V28.D2, V28.D2
    48	VRAX1	V26.D2, V29.D2, V29.D2
    49
    50	// theta and rho and Pi
    51	VEOR	V29.B16, V0.B16, V0.B16
    52
    53	VXAR	$63, V30.D2, V1.D2, V25.D2
    54
    55	VXAR	$20, V30.D2, V6.D2, V1.D2
    56	VXAR	$44, V28.D2, V9.D2, V6.D2
    57	VXAR	$3, V31.D2, V22.D2, V9.D2
    58	VXAR	$25, V28.D2, V14.D2, V22.D2
    59	VXAR	$46, V29.D2, V20.D2, V14.D2
    60
    61	VXAR	$2, V31.D2, V2.D2, V26.D2
    62
    63	VXAR	$21, V31.D2, V12.D2, V2.D2
    64	VXAR	$39, V27.D2, V13.D2, V12.D2
    65	VXAR	$56, V28.D2, V19.D2, V13.D2
    66	VXAR	$8, V27.D2, V23.D2, V19.D2
    67	VXAR	$23, V29.D2, V15.D2, V23.D2
    68
    69	VXAR	$37, V28.D2, V4.D2, V15.D2
    70
    71	VXAR	$50, V28.D2, V24.D2, V28.D2
    72	VXAR	$62, V30.D2, V21.D2, V24.D2
    73	VXAR	$9, V27.D2, V8.D2, V8.D2
    74	VXAR	$19, V30.D2, V16.D2, V4.D2
    75	VXAR	$28, V29.D2, V5.D2, V16.D2
    76
    77	VXAR	$36, V27.D2, V3.D2, V5.D2
    78
    79	VXAR	$43, V27.D2, V18.D2, V27.D2
    80	VXAR	$49, V31.D2, V17.D2, V3.D2
    81	VXAR	$54, V30.D2, V11.D2, V30.D2
    82	VXAR	$58, V31.D2, V7.D2, V31.D2
    83	VXAR	$61, V29.D2, V10.D2, V29.D2
    84
    85	// chi and iota
    86	VBCAX	V8.B16, V22.B16, V26.B16, V20.B16
    87	VBCAX	V22.B16, V23.B16, V8.B16, V21.B16
    88	VBCAX	V23.B16, V24.B16, V22.B16, V22.B16
    89	VBCAX	V24.B16, V26.B16, V23.B16, V23.B16
    90	VBCAX	V26.B16, V8.B16, V24.B16, V24.B16
    91
    92	VLD1R.P	8(R1), [V26.D2]
    93
    94	VBCAX	V3.B16, V19.B16, V30.B16, V17.B16
    95	VBCAX	V19.B16, V15.B16, V3.B16, V18.B16
    96	VBCAX	V15.B16, V16.B16, V19.B16, V19.B16
    97	VBCAX	V16.B16, V30.B16, V15.B16, V15.B16
    98	VBCAX	V30.B16, V3.B16, V16.B16, V16.B16
    99
   100	VBCAX	V31.B16, V12.B16, V25.B16, V10.B16
   101	VBCAX	V12.B16, V13.B16, V31.B16, V11.B16
   102	VBCAX	V13.B16, V14.B16, V12.B16, V12.B16
   103	VBCAX	V14.B16, V25.B16, V13.B16, V13.B16
   104	VBCAX	V25.B16, V31.B16, V14.B16, V14.B16
   105
   106	VBCAX	V4.B16, V9.B16, V29.B16, V7.B16
   107	VBCAX	V9.B16, V5.B16, V4.B16, V8.B16
   108	VBCAX	V5.B16, V6.B16, V9.B16, V9.B16
   109	VBCAX	V6.B16, V29.B16, V5.B16, V5.B16
   110	VBCAX	V29.B16, V4.B16, V6.B16, V6.B16
   111
   112	VBCAX	V28.B16, V0.B16, V27.B16, V3.B16
   113	VBCAX	V0.B16, V1.B16, V28.B16, V4.B16
   114
   115	VBCAX	V1.B16, V2.B16, V0.B16, V0.B16  // iota (chi part)
   116
   117	VBCAX	V2.B16, V27.B16, V1.B16, V1.B16
   118	VBCAX	V27.B16, V28.B16, V2.B16, V2.B16
   119
   120	VEOR	V26.B16, V0.B16, V0.B16 // iota
   121
   122	SUB		$1, R2, R2
   123	CBNZ	R2, loop
   124
   125	VST1.P	[V0.D1, V1.D1], 16(R0)
   126	VST1.P	[V2.D1, V3.D1], 16(R0)
   127	VST1.P	[V4.D1, V5.D1], 16(R0)
   128	VST1.P	[V6.D1, V7.D1], 16(R0)
   129	VST1.P	[V8.D1, V9.D1], 16(R0)
   130	VST1.P	[V10.D1, V11.D1], 16(R0)
   131	VST1.P	[V12.D1, V13.D1], 16(R0)
   132	VST1.P	[V14.D1, V15.D1], 16(R0)
   133	VST1.P	[V16.D1, V17.D1], 16(R0)
   134	VST1.P	[V18.D1, V19.D1], 16(R0)
   135	VST1.P	[V20.D1, V21.D1], 16(R0)
   136	VST1.P	[V22.D1, V23.D1], 16(R0)
   137	VST1	[V24.D1], (R0)
   138
   139	RET
   140
   141DATA	round_consts<>+0x00(SB)/8, $0x0000000000000001
   142DATA	round_consts<>+0x08(SB)/8, $0x0000000000008082
   143DATA	round_consts<>+0x10(SB)/8, $0x800000000000808a
   144DATA	round_consts<>+0x18(SB)/8, $0x8000000080008000
   145DATA	round_consts<>+0x20(SB)/8, $0x000000000000808b
   146DATA	round_consts<>+0x28(SB)/8, $0x0000000080000001
   147DATA	round_consts<>+0x30(SB)/8, $0x8000000080008081
   148DATA	round_consts<>+0x38(SB)/8, $0x8000000000008009
   149DATA	round_consts<>+0x40(SB)/8, $0x000000000000008a
   150DATA	round_consts<>+0x48(SB)/8, $0x0000000000000088
   151DATA	round_consts<>+0x50(SB)/8, $0x0000000080008009
   152DATA	round_consts<>+0x58(SB)/8, $0x000000008000000a
   153DATA	round_consts<>+0x60(SB)/8, $0x000000008000808b
   154DATA	round_consts<>+0x68(SB)/8, $0x800000000000008b
   155DATA	round_consts<>+0x70(SB)/8, $0x8000000000008089
   156DATA	round_consts<>+0x78(SB)/8, $0x8000000000008003
   157DATA	round_consts<>+0x80(SB)/8, $0x8000000000008002
   158DATA	round_consts<>+0x88(SB)/8, $0x8000000000000080
   159DATA	round_consts<>+0x90(SB)/8, $0x000000000000800a
   160DATA	round_consts<>+0x98(SB)/8, $0x800000008000000a
   161DATA	round_consts<>+0xA0(SB)/8, $0x8000000080008081
   162DATA	round_consts<>+0xA8(SB)/8, $0x8000000000008080
   163DATA	round_consts<>+0xB0(SB)/8, $0x0000000080000001
   164DATA	round_consts<>+0xB8(SB)/8, $0x8000000080008008
   165GLOBL	round_consts<>(SB), NOPTR|RODATA, $192

View as plain text