...

Text file src/crypto/sha256/sha256block_ppc64x.s

Documentation: crypto/sha256

     1// Copyright 2016 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5//go:build (ppc64 || ppc64le) && !purego
     6
     7// Based on CRYPTOGAMS code with the following comment:
     8// # ====================================================================
     9// # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
    10// # project. The module is, however, dual licensed under OpenSSL and
    11// # CRYPTOGAMS licenses depending on where you obtain it. For further
    12// # details see http://www.openssl.org/~appro/cryptogams/.
    13// # ====================================================================
    14
    15#include "textflag.h"
    16
    17// SHA256 block routine. See sha256block.go for Go equivalent.
    18//
    19// The algorithm is detailed in FIPS 180-4:
    20//
    21//  https://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf
    22//
    23// Wt = Mt; for 0 <= t <= 15
    24// Wt = SIGMA1(Wt-2) + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 63
    25//
    26// a = H0
    27// b = H1
    28// c = H2
    29// d = H3
    30// e = H4
    31// f = H5
    32// g = H6
    33// h = H7
    34//
    35// for t = 0 to 63 {
    36//    T1 = h + BIGSIGMA1(e) + Ch(e,f,g) + Kt + Wt
    37//    T2 = BIGSIGMA0(a) + Maj(a,b,c)
    38//    h = g
    39//    g = f
    40//    f = e
    41//    e = d + T1
    42//    d = c
    43//    c = b
    44//    b = a
    45//    a = T1 + T2
    46// }
    47//
    48// H0 = a + H0
    49// H1 = b + H1
    50// H2 = c + H2
    51// H3 = d + H3
    52// H4 = e + H4
    53// H5 = f + H5
    54// H6 = g + H6
    55// H7 = h + H7
    56
    57#define CTX	R3
    58#define INP	R4
    59#define END	R5
    60#define TBL	R6 // Pointer into kcon table
    61#define LEN	R9
    62#define TEMP	R12
    63
    64#define TBL_STRT	R7 // Pointer to start of kcon table.
    65
    66#define R_x000	R0
    67#define R_x010	R8
    68#define R_x020	R10
    69#define R_x030	R11
    70#define R_x040	R14
    71#define R_x050	R15
    72#define R_x060	R16
    73#define R_x070	R17
    74#define R_x080	R18
    75#define R_x090	R19
    76#define R_x0a0	R20
    77#define R_x0b0	R21
    78#define R_x0c0	R22
    79#define R_x0d0	R23
    80#define R_x0e0	R24
    81#define R_x0f0	R25
    82#define R_x100	R26
    83#define R_x110	R27
    84
    85
    86// V0-V7 are A-H
    87// V8-V23 are used for the message schedule
    88#define KI	V24
    89#define FUNC	V25
    90#define S0	V26
    91#define S1	V27
    92#define s0	V28
    93#define s1	V29
    94#define LEMASK	V31 // Permutation control register for little endian
    95
    96// 4 copies of each Kt, to fill all 4 words of a vector register
    97DATA  ·kcon+0x000(SB)/8, $0x428a2f98428a2f98
    98DATA  ·kcon+0x008(SB)/8, $0x428a2f98428a2f98
    99DATA  ·kcon+0x010(SB)/8, $0x7137449171374491
   100DATA  ·kcon+0x018(SB)/8, $0x7137449171374491
   101DATA  ·kcon+0x020(SB)/8, $0xb5c0fbcfb5c0fbcf
   102DATA  ·kcon+0x028(SB)/8, $0xb5c0fbcfb5c0fbcf
   103DATA  ·kcon+0x030(SB)/8, $0xe9b5dba5e9b5dba5
   104DATA  ·kcon+0x038(SB)/8, $0xe9b5dba5e9b5dba5
   105DATA  ·kcon+0x040(SB)/8, $0x3956c25b3956c25b
   106DATA  ·kcon+0x048(SB)/8, $0x3956c25b3956c25b
   107DATA  ·kcon+0x050(SB)/8, $0x59f111f159f111f1
   108DATA  ·kcon+0x058(SB)/8, $0x59f111f159f111f1
   109DATA  ·kcon+0x060(SB)/8, $0x923f82a4923f82a4
   110DATA  ·kcon+0x068(SB)/8, $0x923f82a4923f82a4
   111DATA  ·kcon+0x070(SB)/8, $0xab1c5ed5ab1c5ed5
   112DATA  ·kcon+0x078(SB)/8, $0xab1c5ed5ab1c5ed5
   113DATA  ·kcon+0x080(SB)/8, $0xd807aa98d807aa98
   114DATA  ·kcon+0x088(SB)/8, $0xd807aa98d807aa98
   115DATA  ·kcon+0x090(SB)/8, $0x12835b0112835b01
   116DATA  ·kcon+0x098(SB)/8, $0x12835b0112835b01
   117DATA  ·kcon+0x0A0(SB)/8, $0x243185be243185be
   118DATA  ·kcon+0x0A8(SB)/8, $0x243185be243185be
   119DATA  ·kcon+0x0B0(SB)/8, $0x550c7dc3550c7dc3
   120DATA  ·kcon+0x0B8(SB)/8, $0x550c7dc3550c7dc3
   121DATA  ·kcon+0x0C0(SB)/8, $0x72be5d7472be5d74
   122DATA  ·kcon+0x0C8(SB)/8, $0x72be5d7472be5d74
   123DATA  ·kcon+0x0D0(SB)/8, $0x80deb1fe80deb1fe
   124DATA  ·kcon+0x0D8(SB)/8, $0x80deb1fe80deb1fe
   125DATA  ·kcon+0x0E0(SB)/8, $0x9bdc06a79bdc06a7
   126DATA  ·kcon+0x0E8(SB)/8, $0x9bdc06a79bdc06a7
   127DATA  ·kcon+0x0F0(SB)/8, $0xc19bf174c19bf174
   128DATA  ·kcon+0x0F8(SB)/8, $0xc19bf174c19bf174
   129DATA  ·kcon+0x100(SB)/8, $0xe49b69c1e49b69c1
   130DATA  ·kcon+0x108(SB)/8, $0xe49b69c1e49b69c1
   131DATA  ·kcon+0x110(SB)/8, $0xefbe4786efbe4786
   132DATA  ·kcon+0x118(SB)/8, $0xefbe4786efbe4786
   133DATA  ·kcon+0x120(SB)/8, $0x0fc19dc60fc19dc6
   134DATA  ·kcon+0x128(SB)/8, $0x0fc19dc60fc19dc6
   135DATA  ·kcon+0x130(SB)/8, $0x240ca1cc240ca1cc
   136DATA  ·kcon+0x138(SB)/8, $0x240ca1cc240ca1cc
   137DATA  ·kcon+0x140(SB)/8, $0x2de92c6f2de92c6f
   138DATA  ·kcon+0x148(SB)/8, $0x2de92c6f2de92c6f
   139DATA  ·kcon+0x150(SB)/8, $0x4a7484aa4a7484aa
   140DATA  ·kcon+0x158(SB)/8, $0x4a7484aa4a7484aa
   141DATA  ·kcon+0x160(SB)/8, $0x5cb0a9dc5cb0a9dc
   142DATA  ·kcon+0x168(SB)/8, $0x5cb0a9dc5cb0a9dc
   143DATA  ·kcon+0x170(SB)/8, $0x76f988da76f988da
   144DATA  ·kcon+0x178(SB)/8, $0x76f988da76f988da
   145DATA  ·kcon+0x180(SB)/8, $0x983e5152983e5152
   146DATA  ·kcon+0x188(SB)/8, $0x983e5152983e5152
   147DATA  ·kcon+0x190(SB)/8, $0xa831c66da831c66d
   148DATA  ·kcon+0x198(SB)/8, $0xa831c66da831c66d
   149DATA  ·kcon+0x1A0(SB)/8, $0xb00327c8b00327c8
   150DATA  ·kcon+0x1A8(SB)/8, $0xb00327c8b00327c8
   151DATA  ·kcon+0x1B0(SB)/8, $0xbf597fc7bf597fc7
   152DATA  ·kcon+0x1B8(SB)/8, $0xbf597fc7bf597fc7
   153DATA  ·kcon+0x1C0(SB)/8, $0xc6e00bf3c6e00bf3
   154DATA  ·kcon+0x1C8(SB)/8, $0xc6e00bf3c6e00bf3
   155DATA  ·kcon+0x1D0(SB)/8, $0xd5a79147d5a79147
   156DATA  ·kcon+0x1D8(SB)/8, $0xd5a79147d5a79147
   157DATA  ·kcon+0x1E0(SB)/8, $0x06ca635106ca6351
   158DATA  ·kcon+0x1E8(SB)/8, $0x06ca635106ca6351
   159DATA  ·kcon+0x1F0(SB)/8, $0x1429296714292967
   160DATA  ·kcon+0x1F8(SB)/8, $0x1429296714292967
   161DATA  ·kcon+0x200(SB)/8, $0x27b70a8527b70a85
   162DATA  ·kcon+0x208(SB)/8, $0x27b70a8527b70a85
   163DATA  ·kcon+0x210(SB)/8, $0x2e1b21382e1b2138
   164DATA  ·kcon+0x218(SB)/8, $0x2e1b21382e1b2138
   165DATA  ·kcon+0x220(SB)/8, $0x4d2c6dfc4d2c6dfc
   166DATA  ·kcon+0x228(SB)/8, $0x4d2c6dfc4d2c6dfc
   167DATA  ·kcon+0x230(SB)/8, $0x53380d1353380d13
   168DATA  ·kcon+0x238(SB)/8, $0x53380d1353380d13
   169DATA  ·kcon+0x240(SB)/8, $0x650a7354650a7354
   170DATA  ·kcon+0x248(SB)/8, $0x650a7354650a7354
   171DATA  ·kcon+0x250(SB)/8, $0x766a0abb766a0abb
   172DATA  ·kcon+0x258(SB)/8, $0x766a0abb766a0abb
   173DATA  ·kcon+0x260(SB)/8, $0x81c2c92e81c2c92e
   174DATA  ·kcon+0x268(SB)/8, $0x81c2c92e81c2c92e
   175DATA  ·kcon+0x270(SB)/8, $0x92722c8592722c85
   176DATA  ·kcon+0x278(SB)/8, $0x92722c8592722c85
   177DATA  ·kcon+0x280(SB)/8, $0xa2bfe8a1a2bfe8a1
   178DATA  ·kcon+0x288(SB)/8, $0xa2bfe8a1a2bfe8a1
   179DATA  ·kcon+0x290(SB)/8, $0xa81a664ba81a664b
   180DATA  ·kcon+0x298(SB)/8, $0xa81a664ba81a664b
   181DATA  ·kcon+0x2A0(SB)/8, $0xc24b8b70c24b8b70
   182DATA  ·kcon+0x2A8(SB)/8, $0xc24b8b70c24b8b70
   183DATA  ·kcon+0x2B0(SB)/8, $0xc76c51a3c76c51a3
   184DATA  ·kcon+0x2B8(SB)/8, $0xc76c51a3c76c51a3
   185DATA  ·kcon+0x2C0(SB)/8, $0xd192e819d192e819
   186DATA  ·kcon+0x2C8(SB)/8, $0xd192e819d192e819
   187DATA  ·kcon+0x2D0(SB)/8, $0xd6990624d6990624
   188DATA  ·kcon+0x2D8(SB)/8, $0xd6990624d6990624
   189DATA  ·kcon+0x2E0(SB)/8, $0xf40e3585f40e3585
   190DATA  ·kcon+0x2E8(SB)/8, $0xf40e3585f40e3585
   191DATA  ·kcon+0x2F0(SB)/8, $0x106aa070106aa070
   192DATA  ·kcon+0x2F8(SB)/8, $0x106aa070106aa070
   193DATA  ·kcon+0x300(SB)/8, $0x19a4c11619a4c116
   194DATA  ·kcon+0x308(SB)/8, $0x19a4c11619a4c116
   195DATA  ·kcon+0x310(SB)/8, $0x1e376c081e376c08
   196DATA  ·kcon+0x318(SB)/8, $0x1e376c081e376c08
   197DATA  ·kcon+0x320(SB)/8, $0x2748774c2748774c
   198DATA  ·kcon+0x328(SB)/8, $0x2748774c2748774c
   199DATA  ·kcon+0x330(SB)/8, $0x34b0bcb534b0bcb5
   200DATA  ·kcon+0x338(SB)/8, $0x34b0bcb534b0bcb5
   201DATA  ·kcon+0x340(SB)/8, $0x391c0cb3391c0cb3
   202DATA  ·kcon+0x348(SB)/8, $0x391c0cb3391c0cb3
   203DATA  ·kcon+0x350(SB)/8, $0x4ed8aa4a4ed8aa4a
   204DATA  ·kcon+0x358(SB)/8, $0x4ed8aa4a4ed8aa4a
   205DATA  ·kcon+0x360(SB)/8, $0x5b9cca4f5b9cca4f
   206DATA  ·kcon+0x368(SB)/8, $0x5b9cca4f5b9cca4f
   207DATA  ·kcon+0x370(SB)/8, $0x682e6ff3682e6ff3
   208DATA  ·kcon+0x378(SB)/8, $0x682e6ff3682e6ff3
   209DATA  ·kcon+0x380(SB)/8, $0x748f82ee748f82ee
   210DATA  ·kcon+0x388(SB)/8, $0x748f82ee748f82ee
   211DATA  ·kcon+0x390(SB)/8, $0x78a5636f78a5636f
   212DATA  ·kcon+0x398(SB)/8, $0x78a5636f78a5636f
   213DATA  ·kcon+0x3A0(SB)/8, $0x84c8781484c87814
   214DATA  ·kcon+0x3A8(SB)/8, $0x84c8781484c87814
   215DATA  ·kcon+0x3B0(SB)/8, $0x8cc702088cc70208
   216DATA  ·kcon+0x3B8(SB)/8, $0x8cc702088cc70208
   217DATA  ·kcon+0x3C0(SB)/8, $0x90befffa90befffa
   218DATA  ·kcon+0x3C8(SB)/8, $0x90befffa90befffa
   219DATA  ·kcon+0x3D0(SB)/8, $0xa4506ceba4506ceb
   220DATA  ·kcon+0x3D8(SB)/8, $0xa4506ceba4506ceb
   221DATA  ·kcon+0x3E0(SB)/8, $0xbef9a3f7bef9a3f7
   222DATA  ·kcon+0x3E8(SB)/8, $0xbef9a3f7bef9a3f7
   223DATA  ·kcon+0x3F0(SB)/8, $0xc67178f2c67178f2
   224DATA  ·kcon+0x3F8(SB)/8, $0xc67178f2c67178f2
   225DATA  ·kcon+0x400(SB)/8, $0x0000000000000000
   226DATA  ·kcon+0x408(SB)/8, $0x0000000000000000
   227
   228#ifdef GOARCH_ppc64le
   229DATA  ·kcon+0x410(SB)/8, $0x1011121310111213 // permutation control vectors
   230DATA  ·kcon+0x418(SB)/8, $0x1011121300010203
   231DATA  ·kcon+0x420(SB)/8, $0x1011121310111213
   232DATA  ·kcon+0x428(SB)/8, $0x0405060700010203
   233DATA  ·kcon+0x430(SB)/8, $0x1011121308090a0b
   234DATA  ·kcon+0x438(SB)/8, $0x0405060700010203
   235#else
   236DATA  ·kcon+0x410(SB)/8, $0x1011121300010203
   237DATA  ·kcon+0x418(SB)/8, $0x1011121310111213 // permutation control vectors
   238DATA  ·kcon+0x420(SB)/8, $0x0405060700010203
   239DATA  ·kcon+0x428(SB)/8, $0x1011121310111213
   240DATA  ·kcon+0x430(SB)/8, $0x0001020304050607
   241DATA  ·kcon+0x438(SB)/8, $0x08090a0b10111213
   242#endif
   243
   244GLOBL ·kcon(SB), RODATA, $1088
   245
   246#define SHA256ROUND0(a, b, c, d, e, f, g, h, xi, idx) \
   247	VSEL		g, f, e, FUNC; \
   248	VSHASIGMAW	$15, e, $1, S1; \
   249	VADDUWM		xi, h, h; \
   250	VSHASIGMAW	$0, a, $1, S0; \
   251	VADDUWM		FUNC, h, h; \
   252	VXOR		b, a, FUNC; \
   253	VADDUWM		S1, h, h; \
   254	VSEL		b, c, FUNC, FUNC; \
   255	VADDUWM		KI, g, g; \
   256	VADDUWM		h, d, d; \
   257	VADDUWM		FUNC, S0, S0; \
   258	LVX		(TBL)(idx), KI; \
   259	VADDUWM		S0, h, h
   260
   261#define SHA256ROUND1(a, b, c, d, e, f, g, h, xi, xj, xj_1, xj_9, xj_14, idx) \
   262	VSHASIGMAW	$0, xj_1, $0, s0; \
   263	VSEL		g, f, e, FUNC; \
   264	VSHASIGMAW	$15, e, $1, S1; \
   265	VADDUWM		xi, h, h; \
   266	VSHASIGMAW	$0, a, $1, S0; \
   267	VSHASIGMAW	$15, xj_14, $0, s1; \
   268	VADDUWM		FUNC, h, h; \
   269	VXOR		b, a, FUNC; \
   270	VADDUWM		xj_9, xj, xj; \
   271	VADDUWM		S1, h, h; \
   272	VSEL		b, c, FUNC, FUNC; \
   273	VADDUWM		KI, g, g; \
   274	VADDUWM		h, d, d; \
   275	VADDUWM		FUNC, S0, S0; \
   276	VADDUWM		s0, xj, xj; \
   277	LVX		(TBL)(idx), KI; \
   278	VADDUWM		S0, h, h; \
   279	VADDUWM		s1, xj, xj
   280
   281#ifdef GOARCH_ppc64le
   282#define VPERMLE(va,vb,vc,vt) VPERM va, vb, vc, vt
   283#else
   284#define VPERMLE(va,vb,vc,vt)
   285#endif
   286
   287// func block(dig *digest, p []byte)
   288TEXT ·block(SB),0,$0-32
   289	MOVD	dig+0(FP), CTX
   290	MOVD	p_base+8(FP), INP
   291	MOVD	p_len+16(FP), LEN
   292
   293	SRD	$6, LEN
   294	SLD	$6, LEN
   295	ADD	INP, LEN, END
   296
   297	CMP	INP, END
   298	BEQ	end
   299
   300	MOVD	$·kcon(SB), TBL_STRT
   301	MOVD	$0x10, R_x010
   302
   303#ifdef GOARCH_ppc64le
   304	MOVWZ	$8, TEMP
   305	LVSL	(TEMP)(R0), LEMASK
   306	VSPLTISB	$0x0F, KI
   307	VXOR	KI, LEMASK, LEMASK
   308#endif
   309
   310	LXVW4X	(CTX)(R_x000), V0
   311	LXVW4X	(CTX)(R_x010), V4
   312
   313	// unpack the input values into vector registers
   314	VSLDOI	$4, V0, V0, V1
   315	VSLDOI	$8, V0, V0, V2
   316	VSLDOI	$12, V0, V0, V3
   317	VSLDOI	$4, V4, V4, V5
   318	VSLDOI	$8, V4, V4, V6
   319	VSLDOI	$12, V4, V4, V7
   320
   321	MOVD	$0x020, R_x020
   322	MOVD	$0x030, R_x030
   323	MOVD	$0x040, R_x040
   324	MOVD	$0x050, R_x050
   325	MOVD	$0x060, R_x060
   326	MOVD	$0x070, R_x070
   327	MOVD	$0x080, R_x080
   328	MOVD	$0x090, R_x090
   329	MOVD	$0x0a0, R_x0a0
   330	MOVD	$0x0b0, R_x0b0
   331	MOVD	$0x0c0, R_x0c0
   332	MOVD	$0x0d0, R_x0d0
   333	MOVD	$0x0e0, R_x0e0
   334	MOVD	$0x0f0, R_x0f0
   335	MOVD	$0x100, R_x100
   336	MOVD	$0x110, R_x110
   337
   338loop:
   339	MOVD	TBL_STRT, TBL
   340	LVX	(TBL)(R_x000), KI
   341
   342	LXVD2X	(INP)(R_x000), V8 // load v8 in advance
   343
   344	// Offload to VSR24-31 (aka FPR24-31)
   345	XXLOR	V0, V0, VS24
   346	XXLOR	V1, V1, VS25
   347	XXLOR	V2, V2, VS26
   348	XXLOR	V3, V3, VS27
   349	XXLOR	V4, V4, VS28
   350	XXLOR	V5, V5, VS29
   351	XXLOR	V6, V6, VS30
   352	XXLOR	V7, V7, VS31
   353
   354	VADDUWM	KI, V7, V7        // h+K[i]
   355	LVX	(TBL)(R_x010), KI
   356
   357	VPERMLE(V8, V8, LEMASK, V8)
   358	SHA256ROUND0(V0, V1, V2, V3, V4, V5, V6, V7, V8, R_x020)
   359	VSLDOI	$4, V8, V8, V9
   360	SHA256ROUND0(V7, V0, V1, V2, V3, V4, V5, V6, V9, R_x030)
   361	VSLDOI	$4, V9, V9, V10
   362	SHA256ROUND0(V6, V7, V0, V1, V2, V3, V4, V5, V10, R_x040)
   363	LXVD2X	(INP)(R_x010), V12 // load v12 in advance
   364	VSLDOI	$4, V10, V10, V11
   365	SHA256ROUND0(V5, V6, V7, V0, V1, V2, V3, V4, V11, R_x050)
   366	VPERMLE(V12, V12, LEMASK, V12)
   367	SHA256ROUND0(V4, V5, V6, V7, V0, V1, V2, V3, V12, R_x060)
   368	VSLDOI	$4, V12, V12, V13
   369	SHA256ROUND0(V3, V4, V5, V6, V7, V0, V1, V2, V13, R_x070)
   370	VSLDOI	$4, V13, V13, V14
   371	SHA256ROUND0(V2, V3, V4, V5, V6, V7, V0, V1, V14, R_x080)
   372	LXVD2X	(INP)(R_x020), V16 // load v16 in advance
   373	VSLDOI	$4, V14, V14, V15
   374	SHA256ROUND0(V1, V2, V3, V4, V5, V6, V7, V0, V15, R_x090)
   375	VPERMLE(V16, V16, LEMASK, V16)
   376	SHA256ROUND0(V0, V1, V2, V3, V4, V5, V6, V7, V16, R_x0a0)
   377	VSLDOI	$4, V16, V16, V17
   378	SHA256ROUND0(V7, V0, V1, V2, V3, V4, V5, V6, V17, R_x0b0)
   379	VSLDOI	$4, V17, V17, V18
   380	SHA256ROUND0(V6, V7, V0, V1, V2, V3, V4, V5, V18, R_x0c0)
   381	VSLDOI	$4, V18, V18, V19
   382	LXVD2X	(INP)(R_x030), V20 // load v20 in advance
   383	SHA256ROUND0(V5, V6, V7, V0, V1, V2, V3, V4, V19, R_x0d0)
   384	VPERMLE(V20, V20, LEMASK, V20)
   385	SHA256ROUND0(V4, V5, V6, V7, V0, V1, V2, V3, V20, R_x0e0)
   386	VSLDOI	$4, V20, V20, V21
   387	SHA256ROUND0(V3, V4, V5, V6, V7, V0, V1, V2, V21, R_x0f0)
   388	VSLDOI	$4, V21, V21, V22
   389	SHA256ROUND0(V2, V3, V4, V5, V6, V7, V0, V1, V22, R_x100)
   390	VSLDOI	$4, V22, V22, V23
   391	SHA256ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V23, V8, V9, V17, V22, R_x110)
   392
   393	MOVD	$3, TEMP
   394	MOVD	TEMP, CTR
   395	ADD	$0x120, TBL
   396	ADD	$0x40, INP
   397
   398L16_xx:
   399	SHA256ROUND1(V0, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V18, V23, R_x000)
   400	SHA256ROUND1(V7, V0, V1, V2, V3, V4, V5, V6, V9, V10, V11, V19, V8, R_x010)
   401	SHA256ROUND1(V6, V7, V0, V1, V2, V3, V4, V5, V10, V11, V12, V20, V9, R_x020)
   402	SHA256ROUND1(V5, V6, V7, V0, V1, V2, V3, V4, V11, V12, V13, V21, V10, R_x030)
   403	SHA256ROUND1(V4, V5, V6, V7, V0, V1, V2, V3, V12, V13, V14, V22, V11, R_x040)
   404	SHA256ROUND1(V3, V4, V5, V6, V7, V0, V1, V2, V13, V14, V15, V23, V12, R_x050)
   405	SHA256ROUND1(V2, V3, V4, V5, V6, V7, V0, V1, V14, V15, V16, V8, V13, R_x060)
   406	SHA256ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V15, V16, V17, V9, V14, R_x070)
   407	SHA256ROUND1(V0, V1, V2, V3, V4, V5, V6, V7, V16, V17, V18, V10, V15, R_x080)
   408	SHA256ROUND1(V7, V0, V1, V2, V3, V4, V5, V6, V17, V18, V19, V11, V16, R_x090)
   409	SHA256ROUND1(V6, V7, V0, V1, V2, V3, V4, V5, V18, V19, V20, V12, V17, R_x0a0)
   410	SHA256ROUND1(V5, V6, V7, V0, V1, V2, V3, V4, V19, V20, V21, V13, V18, R_x0b0)
   411	SHA256ROUND1(V4, V5, V6, V7, V0, V1, V2, V3, V20, V21, V22, V14, V19, R_x0c0)
   412	SHA256ROUND1(V3, V4, V5, V6, V7, V0, V1, V2, V21, V22, V23, V15, V20, R_x0d0)
   413	SHA256ROUND1(V2, V3, V4, V5, V6, V7, V0, V1, V22, V23, V8, V16, V21, R_x0e0)
   414	SHA256ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V23, V8, V9, V17, V22, R_x0f0)
   415	ADD	$0x100, TBL
   416
   417	BDNZ	L16_xx
   418
   419	XXLOR	VS24, VS24, V10
   420
   421	XXLOR	VS25, VS25, V11
   422	VADDUWM	V10, V0, V0
   423	XXLOR	VS26, VS26, V12
   424	VADDUWM	V11, V1, V1
   425	XXLOR	VS27, VS27, V13
   426	VADDUWM	V12, V2, V2
   427	XXLOR	VS28, VS28, V14
   428	VADDUWM	V13, V3, V3
   429	XXLOR	VS29, VS29, V15
   430	VADDUWM	V14, V4, V4
   431	XXLOR	VS30, VS30, V16
   432	VADDUWM	V15, V5, V5
   433	XXLOR	VS31, VS31, V17
   434	VADDUWM	V16, V6, V6
   435	VADDUWM	V17, V7, V7
   436
   437	CMPU	INP, END
   438	BLT	loop
   439
   440	LVX	(TBL)(R_x000), V8
   441	VPERM	V0, V1, KI, V0
   442	LVX	(TBL)(R_x010), V9
   443	VPERM	V4, V5, KI, V4
   444	VPERM	V0, V2, V8, V0
   445	VPERM	V4, V6, V8, V4
   446	VPERM	V0, V3, V9, V0
   447	VPERM	V4, V7, V9, V4
   448	STXVD2X	V0, (CTX+R_x000)
   449	STXVD2X	V4, (CTX+R_x010)
   450
   451end:
   452	RET
   453

View as plain text