...

Text file src/cmd/internal/notsha256/sha256block_ppc64x.s

Documentation: cmd/internal/notsha256

     1// Copyright 2016 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5//
     6// WARNING: this file is built by the bootstrap compiler, thus
     7// it must maintain compatibility with the oldest supported
     8// bootstrap toolchain.
     9//
    10
    11//go:build !purego && (ppc64 || ppc64le)
    12
    13// Based on CRYPTOGAMS code with the following comment:
    14// # ====================================================================
    15// # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
    16// # project. The module is, however, dual licensed under OpenSSL and
    17// # CRYPTOGAMS licenses depending on where you obtain it. For further
    18// # details see http://www.openssl.org/~appro/cryptogams/.
    19// # ====================================================================
    20
    21#include "textflag.h"
    22
    23// SHA256 block routine. See sha256block.go for Go equivalent.
    24//
    25// The algorithm is detailed in FIPS 180-4:
    26//
    27//  https://csrc.nist.gov/publications/fips/fips180-4/fips-180-4.pdf
    28//
    29// Wt = Mt; for 0 <= t <= 15
    30// Wt = SIGMA1(Wt-2) + SIGMA0(Wt-15) + Wt-16; for 16 <= t <= 63
    31//
    32// a = H0
    33// b = H1
    34// c = H2
    35// d = H3
    36// e = H4
    37// f = H5
    38// g = H6
    39// h = H7
    40//
    41// for t = 0 to 63 {
    42//    T1 = h + BIGSIGMA1(e) + Ch(e,f,g) + Kt + Wt
    43//    T2 = BIGSIGMA0(a) + Maj(a,b,c)
    44//    h = g
    45//    g = f
    46//    f = e
    47//    e = d + T1
    48//    d = c
    49//    c = b
    50//    b = a
    51//    a = T1 + T2
    52// }
    53//
    54// H0 = a + H0
    55// H1 = b + H1
    56// H2 = c + H2
    57// H3 = d + H3
    58// H4 = e + H4
    59// H5 = f + H5
    60// H6 = g + H6
    61// H7 = h + H7
    62
    63#define CTX	R3
    64#define INP	R4
    65#define END	R5
    66#define TBL	R6 // Pointer into kcon table
    67#define LEN	R9
    68#define TEMP	R12
    69
    70#define TBL_STRT	R7 // Pointer to start of kcon table.
    71
    72#define R_x000	R0
    73#define R_x010	R8
    74#define R_x020	R10
    75#define R_x030	R11
    76#define R_x040	R14
    77#define R_x050	R15
    78#define R_x060	R16
    79#define R_x070	R17
    80#define R_x080	R18
    81#define R_x090	R19
    82#define R_x0a0	R20
    83#define R_x0b0	R21
    84#define R_x0c0	R22
    85#define R_x0d0	R23
    86#define R_x0e0	R24
    87#define R_x0f0	R25
    88#define R_x100	R26
    89#define R_x110	R27
    90
    91
    92// V0-V7 are A-H
    93// V8-V23 are used for the message schedule
    94#define KI	V24
    95#define FUNC	V25
    96#define S0	V26
    97#define S1	V27
    98#define s0	V28
    99#define s1	V29
   100#define LEMASK	V31 // Permutation control register for little endian
   101
   102// 4 copies of each Kt, to fill all 4 words of a vector register
   103DATA  ·kcon+0x000(SB)/8, $0x428a2f98428a2f98
   104DATA  ·kcon+0x008(SB)/8, $0x428a2f98428a2f98
   105DATA  ·kcon+0x010(SB)/8, $0x7137449171374491
   106DATA  ·kcon+0x018(SB)/8, $0x7137449171374491
   107DATA  ·kcon+0x020(SB)/8, $0xb5c0fbcfb5c0fbcf
   108DATA  ·kcon+0x028(SB)/8, $0xb5c0fbcfb5c0fbcf
   109DATA  ·kcon+0x030(SB)/8, $0xe9b5dba5e9b5dba5
   110DATA  ·kcon+0x038(SB)/8, $0xe9b5dba5e9b5dba5
   111DATA  ·kcon+0x040(SB)/8, $0x3956c25b3956c25b
   112DATA  ·kcon+0x048(SB)/8, $0x3956c25b3956c25b
   113DATA  ·kcon+0x050(SB)/8, $0x59f111f159f111f1
   114DATA  ·kcon+0x058(SB)/8, $0x59f111f159f111f1
   115DATA  ·kcon+0x060(SB)/8, $0x923f82a4923f82a4
   116DATA  ·kcon+0x068(SB)/8, $0x923f82a4923f82a4
   117DATA  ·kcon+0x070(SB)/8, $0xab1c5ed5ab1c5ed5
   118DATA  ·kcon+0x078(SB)/8, $0xab1c5ed5ab1c5ed5
   119DATA  ·kcon+0x080(SB)/8, $0xd807aa98d807aa98
   120DATA  ·kcon+0x088(SB)/8, $0xd807aa98d807aa98
   121DATA  ·kcon+0x090(SB)/8, $0x12835b0112835b01
   122DATA  ·kcon+0x098(SB)/8, $0x12835b0112835b01
   123DATA  ·kcon+0x0A0(SB)/8, $0x243185be243185be
   124DATA  ·kcon+0x0A8(SB)/8, $0x243185be243185be
   125DATA  ·kcon+0x0B0(SB)/8, $0x550c7dc3550c7dc3
   126DATA  ·kcon+0x0B8(SB)/8, $0x550c7dc3550c7dc3
   127DATA  ·kcon+0x0C0(SB)/8, $0x72be5d7472be5d74
   128DATA  ·kcon+0x0C8(SB)/8, $0x72be5d7472be5d74
   129DATA  ·kcon+0x0D0(SB)/8, $0x80deb1fe80deb1fe
   130DATA  ·kcon+0x0D8(SB)/8, $0x80deb1fe80deb1fe
   131DATA  ·kcon+0x0E0(SB)/8, $0x9bdc06a79bdc06a7
   132DATA  ·kcon+0x0E8(SB)/8, $0x9bdc06a79bdc06a7
   133DATA  ·kcon+0x0F0(SB)/8, $0xc19bf174c19bf174
   134DATA  ·kcon+0x0F8(SB)/8, $0xc19bf174c19bf174
   135DATA  ·kcon+0x100(SB)/8, $0xe49b69c1e49b69c1
   136DATA  ·kcon+0x108(SB)/8, $0xe49b69c1e49b69c1
   137DATA  ·kcon+0x110(SB)/8, $0xefbe4786efbe4786
   138DATA  ·kcon+0x118(SB)/8, $0xefbe4786efbe4786
   139DATA  ·kcon+0x120(SB)/8, $0x0fc19dc60fc19dc6
   140DATA  ·kcon+0x128(SB)/8, $0x0fc19dc60fc19dc6
   141DATA  ·kcon+0x130(SB)/8, $0x240ca1cc240ca1cc
   142DATA  ·kcon+0x138(SB)/8, $0x240ca1cc240ca1cc
   143DATA  ·kcon+0x140(SB)/8, $0x2de92c6f2de92c6f
   144DATA  ·kcon+0x148(SB)/8, $0x2de92c6f2de92c6f
   145DATA  ·kcon+0x150(SB)/8, $0x4a7484aa4a7484aa
   146DATA  ·kcon+0x158(SB)/8, $0x4a7484aa4a7484aa
   147DATA  ·kcon+0x160(SB)/8, $0x5cb0a9dc5cb0a9dc
   148DATA  ·kcon+0x168(SB)/8, $0x5cb0a9dc5cb0a9dc
   149DATA  ·kcon+0x170(SB)/8, $0x76f988da76f988da
   150DATA  ·kcon+0x178(SB)/8, $0x76f988da76f988da
   151DATA  ·kcon+0x180(SB)/8, $0x983e5152983e5152
   152DATA  ·kcon+0x188(SB)/8, $0x983e5152983e5152
   153DATA  ·kcon+0x190(SB)/8, $0xa831c66da831c66d
   154DATA  ·kcon+0x198(SB)/8, $0xa831c66da831c66d
   155DATA  ·kcon+0x1A0(SB)/8, $0xb00327c8b00327c8
   156DATA  ·kcon+0x1A8(SB)/8, $0xb00327c8b00327c8
   157DATA  ·kcon+0x1B0(SB)/8, $0xbf597fc7bf597fc7
   158DATA  ·kcon+0x1B8(SB)/8, $0xbf597fc7bf597fc7
   159DATA  ·kcon+0x1C0(SB)/8, $0xc6e00bf3c6e00bf3
   160DATA  ·kcon+0x1C8(SB)/8, $0xc6e00bf3c6e00bf3
   161DATA  ·kcon+0x1D0(SB)/8, $0xd5a79147d5a79147
   162DATA  ·kcon+0x1D8(SB)/8, $0xd5a79147d5a79147
   163DATA  ·kcon+0x1E0(SB)/8, $0x06ca635106ca6351
   164DATA  ·kcon+0x1E8(SB)/8, $0x06ca635106ca6351
   165DATA  ·kcon+0x1F0(SB)/8, $0x1429296714292967
   166DATA  ·kcon+0x1F8(SB)/8, $0x1429296714292967
   167DATA  ·kcon+0x200(SB)/8, $0x27b70a8527b70a85
   168DATA  ·kcon+0x208(SB)/8, $0x27b70a8527b70a85
   169DATA  ·kcon+0x210(SB)/8, $0x2e1b21382e1b2138
   170DATA  ·kcon+0x218(SB)/8, $0x2e1b21382e1b2138
   171DATA  ·kcon+0x220(SB)/8, $0x4d2c6dfc4d2c6dfc
   172DATA  ·kcon+0x228(SB)/8, $0x4d2c6dfc4d2c6dfc
   173DATA  ·kcon+0x230(SB)/8, $0x53380d1353380d13
   174DATA  ·kcon+0x238(SB)/8, $0x53380d1353380d13
   175DATA  ·kcon+0x240(SB)/8, $0x650a7354650a7354
   176DATA  ·kcon+0x248(SB)/8, $0x650a7354650a7354
   177DATA  ·kcon+0x250(SB)/8, $0x766a0abb766a0abb
   178DATA  ·kcon+0x258(SB)/8, $0x766a0abb766a0abb
   179DATA  ·kcon+0x260(SB)/8, $0x81c2c92e81c2c92e
   180DATA  ·kcon+0x268(SB)/8, $0x81c2c92e81c2c92e
   181DATA  ·kcon+0x270(SB)/8, $0x92722c8592722c85
   182DATA  ·kcon+0x278(SB)/8, $0x92722c8592722c85
   183DATA  ·kcon+0x280(SB)/8, $0xa2bfe8a1a2bfe8a1
   184DATA  ·kcon+0x288(SB)/8, $0xa2bfe8a1a2bfe8a1
   185DATA  ·kcon+0x290(SB)/8, $0xa81a664ba81a664b
   186DATA  ·kcon+0x298(SB)/8, $0xa81a664ba81a664b
   187DATA  ·kcon+0x2A0(SB)/8, $0xc24b8b70c24b8b70
   188DATA  ·kcon+0x2A8(SB)/8, $0xc24b8b70c24b8b70
   189DATA  ·kcon+0x2B0(SB)/8, $0xc76c51a3c76c51a3
   190DATA  ·kcon+0x2B8(SB)/8, $0xc76c51a3c76c51a3
   191DATA  ·kcon+0x2C0(SB)/8, $0xd192e819d192e819
   192DATA  ·kcon+0x2C8(SB)/8, $0xd192e819d192e819
   193DATA  ·kcon+0x2D0(SB)/8, $0xd6990624d6990624
   194DATA  ·kcon+0x2D8(SB)/8, $0xd6990624d6990624
   195DATA  ·kcon+0x2E0(SB)/8, $0xf40e3585f40e3585
   196DATA  ·kcon+0x2E8(SB)/8, $0xf40e3585f40e3585
   197DATA  ·kcon+0x2F0(SB)/8, $0x106aa070106aa070
   198DATA  ·kcon+0x2F8(SB)/8, $0x106aa070106aa070
   199DATA  ·kcon+0x300(SB)/8, $0x19a4c11619a4c116
   200DATA  ·kcon+0x308(SB)/8, $0x19a4c11619a4c116
   201DATA  ·kcon+0x310(SB)/8, $0x1e376c081e376c08
   202DATA  ·kcon+0x318(SB)/8, $0x1e376c081e376c08
   203DATA  ·kcon+0x320(SB)/8, $0x2748774c2748774c
   204DATA  ·kcon+0x328(SB)/8, $0x2748774c2748774c
   205DATA  ·kcon+0x330(SB)/8, $0x34b0bcb534b0bcb5
   206DATA  ·kcon+0x338(SB)/8, $0x34b0bcb534b0bcb5
   207DATA  ·kcon+0x340(SB)/8, $0x391c0cb3391c0cb3
   208DATA  ·kcon+0x348(SB)/8, $0x391c0cb3391c0cb3
   209DATA  ·kcon+0x350(SB)/8, $0x4ed8aa4a4ed8aa4a
   210DATA  ·kcon+0x358(SB)/8, $0x4ed8aa4a4ed8aa4a
   211DATA  ·kcon+0x360(SB)/8, $0x5b9cca4f5b9cca4f
   212DATA  ·kcon+0x368(SB)/8, $0x5b9cca4f5b9cca4f
   213DATA  ·kcon+0x370(SB)/8, $0x682e6ff3682e6ff3
   214DATA  ·kcon+0x378(SB)/8, $0x682e6ff3682e6ff3
   215DATA  ·kcon+0x380(SB)/8, $0x748f82ee748f82ee
   216DATA  ·kcon+0x388(SB)/8, $0x748f82ee748f82ee
   217DATA  ·kcon+0x390(SB)/8, $0x78a5636f78a5636f
   218DATA  ·kcon+0x398(SB)/8, $0x78a5636f78a5636f
   219DATA  ·kcon+0x3A0(SB)/8, $0x84c8781484c87814
   220DATA  ·kcon+0x3A8(SB)/8, $0x84c8781484c87814
   221DATA  ·kcon+0x3B0(SB)/8, $0x8cc702088cc70208
   222DATA  ·kcon+0x3B8(SB)/8, $0x8cc702088cc70208
   223DATA  ·kcon+0x3C0(SB)/8, $0x90befffa90befffa
   224DATA  ·kcon+0x3C8(SB)/8, $0x90befffa90befffa
   225DATA  ·kcon+0x3D0(SB)/8, $0xa4506ceba4506ceb
   226DATA  ·kcon+0x3D8(SB)/8, $0xa4506ceba4506ceb
   227DATA  ·kcon+0x3E0(SB)/8, $0xbef9a3f7bef9a3f7
   228DATA  ·kcon+0x3E8(SB)/8, $0xbef9a3f7bef9a3f7
   229DATA  ·kcon+0x3F0(SB)/8, $0xc67178f2c67178f2
   230DATA  ·kcon+0x3F8(SB)/8, $0xc67178f2c67178f2
   231DATA  ·kcon+0x400(SB)/8, $0x0000000000000000
   232DATA  ·kcon+0x408(SB)/8, $0x0000000000000000
   233
   234#ifdef GOARCH_ppc64le
   235DATA  ·kcon+0x410(SB)/8, $0x1011121310111213 // permutation control vectors
   236DATA  ·kcon+0x418(SB)/8, $0x1011121300010203
   237DATA  ·kcon+0x420(SB)/8, $0x1011121310111213
   238DATA  ·kcon+0x428(SB)/8, $0x0405060700010203
   239DATA  ·kcon+0x430(SB)/8, $0x1011121308090a0b
   240DATA  ·kcon+0x438(SB)/8, $0x0405060700010203
   241#else
   242DATA  ·kcon+0x410(SB)/8, $0x1011121300010203
   243DATA  ·kcon+0x418(SB)/8, $0x1011121310111213 // permutation control vectors
   244DATA  ·kcon+0x420(SB)/8, $0x0405060700010203
   245DATA  ·kcon+0x428(SB)/8, $0x1011121310111213
   246DATA  ·kcon+0x430(SB)/8, $0x0001020304050607
   247DATA  ·kcon+0x438(SB)/8, $0x08090a0b10111213
   248#endif
   249
   250GLOBL ·kcon(SB), RODATA, $1088
   251
   252#define SHA256ROUND0(a, b, c, d, e, f, g, h, xi, idx) \
   253	VSEL		g, f, e, FUNC; \
   254	VSHASIGMAW	$15, e, $1, S1; \
   255	VADDUWM		xi, h, h; \
   256	VSHASIGMAW	$0, a, $1, S0; \
   257	VADDUWM		FUNC, h, h; \
   258	VXOR		b, a, FUNC; \
   259	VADDUWM		S1, h, h; \
   260	VSEL		b, c, FUNC, FUNC; \
   261	VADDUWM		KI, g, g; \
   262	VADDUWM		h, d, d; \
   263	VADDUWM		FUNC, S0, S0; \
   264	LVX		(TBL)(idx), KI; \
   265	VADDUWM		S0, h, h
   266
   267#define SHA256ROUND1(a, b, c, d, e, f, g, h, xi, xj, xj_1, xj_9, xj_14, idx) \
   268	VSHASIGMAW	$0, xj_1, $0, s0; \
   269	VSEL		g, f, e, FUNC; \
   270	VSHASIGMAW	$15, e, $1, S1; \
   271	VADDUWM		xi, h, h; \
   272	VSHASIGMAW	$0, a, $1, S0; \
   273	VSHASIGMAW	$15, xj_14, $0, s1; \
   274	VADDUWM		FUNC, h, h; \
   275	VXOR		b, a, FUNC; \
   276	VADDUWM		xj_9, xj, xj; \
   277	VADDUWM		S1, h, h; \
   278	VSEL		b, c, FUNC, FUNC; \
   279	VADDUWM		KI, g, g; \
   280	VADDUWM		h, d, d; \
   281	VADDUWM		FUNC, S0, S0; \
   282	VADDUWM		s0, xj, xj; \
   283	LVX		(TBL)(idx), KI; \
   284	VADDUWM		S0, h, h; \
   285	VADDUWM		s1, xj, xj
   286
   287#ifdef GOARCH_ppc64le
   288#define VPERMLE(va,vb,vc,vt) VPERM va, vb, vc, vt
   289#else
   290#define VPERMLE(va,vb,vc,vt)
   291#endif
   292
   293// func block(dig *digest, p []byte)
   294TEXT ·block(SB),0,$0-32
   295	MOVD	dig+0(FP), CTX
   296	MOVD	p_base+8(FP), INP
   297	MOVD	p_len+16(FP), LEN
   298
   299	SRD	$6, LEN
   300	SLD	$6, LEN
   301	ADD	INP, LEN, END
   302
   303	CMP	INP, END
   304	BEQ	end
   305
   306	MOVD	$·kcon(SB), TBL_STRT
   307	MOVD	$0x10, R_x010
   308
   309#ifdef GOARCH_ppc64le
   310	MOVWZ	$8, TEMP
   311	LVSL	(TEMP)(R0), LEMASK
   312	VSPLTISB	$0x0F, KI
   313	VXOR	KI, LEMASK, LEMASK
   314#endif
   315
   316	LXVW4X	(CTX)(R_x000), V0
   317	LXVW4X	(CTX)(R_x010), V4
   318
   319	// unpack the input values into vector registers
   320	VSLDOI	$4, V0, V0, V1
   321	VSLDOI	$8, V0, V0, V2
   322	VSLDOI	$12, V0, V0, V3
   323	VSLDOI	$4, V4, V4, V5
   324	VSLDOI	$8, V4, V4, V6
   325	VSLDOI	$12, V4, V4, V7
   326
   327	MOVD	$0x020, R_x020
   328	MOVD	$0x030, R_x030
   329	MOVD	$0x040, R_x040
   330	MOVD	$0x050, R_x050
   331	MOVD	$0x060, R_x060
   332	MOVD	$0x070, R_x070
   333	MOVD	$0x080, R_x080
   334	MOVD	$0x090, R_x090
   335	MOVD	$0x0a0, R_x0a0
   336	MOVD	$0x0b0, R_x0b0
   337	MOVD	$0x0c0, R_x0c0
   338	MOVD	$0x0d0, R_x0d0
   339	MOVD	$0x0e0, R_x0e0
   340	MOVD	$0x0f0, R_x0f0
   341	MOVD	$0x100, R_x100
   342	MOVD	$0x110, R_x110
   343
   344loop:
   345	MOVD	TBL_STRT, TBL
   346	LVX	(TBL)(R_x000), KI
   347
   348	LXVD2X	(INP)(R_x000), V8 // load v8 in advance
   349
   350	// Offload to VSR24-31 (aka FPR24-31)
   351	XXLOR	V0, V0, VS24
   352	XXLOR	V1, V1, VS25
   353	XXLOR	V2, V2, VS26
   354	XXLOR	V3, V3, VS27
   355	XXLOR	V4, V4, VS28
   356	XXLOR	V5, V5, VS29
   357	XXLOR	V6, V6, VS30
   358	XXLOR	V7, V7, VS31
   359
   360	VADDUWM	KI, V7, V7        // h+K[i]
   361	LVX	(TBL)(R_x010), KI
   362
   363	VPERMLE(V8, V8, LEMASK, V8)
   364	SHA256ROUND0(V0, V1, V2, V3, V4, V5, V6, V7, V8, R_x020)
   365	VSLDOI	$4, V8, V8, V9
   366	SHA256ROUND0(V7, V0, V1, V2, V3, V4, V5, V6, V9, R_x030)
   367	VSLDOI	$4, V9, V9, V10
   368	SHA256ROUND0(V6, V7, V0, V1, V2, V3, V4, V5, V10, R_x040)
   369	LXVD2X	(INP)(R_x010), V12 // load v12 in advance
   370	VSLDOI	$4, V10, V10, V11
   371	SHA256ROUND0(V5, V6, V7, V0, V1, V2, V3, V4, V11, R_x050)
   372	VPERMLE(V12, V12, LEMASK, V12)
   373	SHA256ROUND0(V4, V5, V6, V7, V0, V1, V2, V3, V12, R_x060)
   374	VSLDOI	$4, V12, V12, V13
   375	SHA256ROUND0(V3, V4, V5, V6, V7, V0, V1, V2, V13, R_x070)
   376	VSLDOI	$4, V13, V13, V14
   377	SHA256ROUND0(V2, V3, V4, V5, V6, V7, V0, V1, V14, R_x080)
   378	LXVD2X	(INP)(R_x020), V16 // load v16 in advance
   379	VSLDOI	$4, V14, V14, V15
   380	SHA256ROUND0(V1, V2, V3, V4, V5, V6, V7, V0, V15, R_x090)
   381	VPERMLE(V16, V16, LEMASK, V16)
   382	SHA256ROUND0(V0, V1, V2, V3, V4, V5, V6, V7, V16, R_x0a0)
   383	VSLDOI	$4, V16, V16, V17
   384	SHA256ROUND0(V7, V0, V1, V2, V3, V4, V5, V6, V17, R_x0b0)
   385	VSLDOI	$4, V17, V17, V18
   386	SHA256ROUND0(V6, V7, V0, V1, V2, V3, V4, V5, V18, R_x0c0)
   387	VSLDOI	$4, V18, V18, V19
   388	LXVD2X	(INP)(R_x030), V20 // load v20 in advance
   389	SHA256ROUND0(V5, V6, V7, V0, V1, V2, V3, V4, V19, R_x0d0)
   390	VPERMLE(V20, V20, LEMASK, V20)
   391	SHA256ROUND0(V4, V5, V6, V7, V0, V1, V2, V3, V20, R_x0e0)
   392	VSLDOI	$4, V20, V20, V21
   393	SHA256ROUND0(V3, V4, V5, V6, V7, V0, V1, V2, V21, R_x0f0)
   394	VSLDOI	$4, V21, V21, V22
   395	SHA256ROUND0(V2, V3, V4, V5, V6, V7, V0, V1, V22, R_x100)
   396	VSLDOI	$4, V22, V22, V23
   397	SHA256ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V23, V8, V9, V17, V22, R_x110)
   398
   399	MOVD	$3, TEMP
   400	MOVD	TEMP, CTR
   401	ADD	$0x120, TBL
   402	ADD	$0x40, INP
   403
   404L16_xx:
   405	SHA256ROUND1(V0, V1, V2, V3, V4, V5, V6, V7, V8, V9, V10, V18, V23, R_x000)
   406	SHA256ROUND1(V7, V0, V1, V2, V3, V4, V5, V6, V9, V10, V11, V19, V8, R_x010)
   407	SHA256ROUND1(V6, V7, V0, V1, V2, V3, V4, V5, V10, V11, V12, V20, V9, R_x020)
   408	SHA256ROUND1(V5, V6, V7, V0, V1, V2, V3, V4, V11, V12, V13, V21, V10, R_x030)
   409	SHA256ROUND1(V4, V5, V6, V7, V0, V1, V2, V3, V12, V13, V14, V22, V11, R_x040)
   410	SHA256ROUND1(V3, V4, V5, V6, V7, V0, V1, V2, V13, V14, V15, V23, V12, R_x050)
   411	SHA256ROUND1(V2, V3, V4, V5, V6, V7, V0, V1, V14, V15, V16, V8, V13, R_x060)
   412	SHA256ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V15, V16, V17, V9, V14, R_x070)
   413	SHA256ROUND1(V0, V1, V2, V3, V4, V5, V6, V7, V16, V17, V18, V10, V15, R_x080)
   414	SHA256ROUND1(V7, V0, V1, V2, V3, V4, V5, V6, V17, V18, V19, V11, V16, R_x090)
   415	SHA256ROUND1(V6, V7, V0, V1, V2, V3, V4, V5, V18, V19, V20, V12, V17, R_x0a0)
   416	SHA256ROUND1(V5, V6, V7, V0, V1, V2, V3, V4, V19, V20, V21, V13, V18, R_x0b0)
   417	SHA256ROUND1(V4, V5, V6, V7, V0, V1, V2, V3, V20, V21, V22, V14, V19, R_x0c0)
   418	SHA256ROUND1(V3, V4, V5, V6, V7, V0, V1, V2, V21, V22, V23, V15, V20, R_x0d0)
   419	SHA256ROUND1(V2, V3, V4, V5, V6, V7, V0, V1, V22, V23, V8, V16, V21, R_x0e0)
   420	SHA256ROUND1(V1, V2, V3, V4, V5, V6, V7, V0, V23, V8, V9, V17, V22, R_x0f0)
   421	ADD	$0x100, TBL
   422
   423	BDNZ	L16_xx
   424
   425	XXLOR	VS24, VS24, V10
   426
   427	XXLOR	VS25, VS25, V11
   428	VADDUWM	V10, V0, V0
   429	XXLOR	VS26, VS26, V12
   430	VADDUWM	V11, V1, V1
   431	XXLOR	VS27, VS27, V13
   432	VADDUWM	V12, V2, V2
   433	XXLOR	VS28, VS28, V14
   434	VADDUWM	V13, V3, V3
   435	XXLOR	VS29, VS29, V15
   436	VADDUWM	V14, V4, V4
   437	XXLOR	VS30, VS30, V16
   438	VADDUWM	V15, V5, V5
   439	XXLOR	VS31, VS31, V17
   440	VADDUWM	V16, V6, V6
   441	VADDUWM	V17, V7, V7
   442
   443	CMPU	INP, END
   444	BLT	loop
   445
   446	LVX	(TBL)(R_x000), V8
   447	VPERM	V0, V1, KI, V0
   448	LVX	(TBL)(R_x010), V9
   449	VPERM	V4, V5, KI, V4
   450	VPERM	V0, V2, V8, V0
   451	VPERM	V4, V6, V8, V4
   452	VPERM	V0, V3, V9, V0
   453	VPERM	V4, V7, V9, V4
   454	STXVD2X	V0, (CTX+R_x000)
   455	STXVD2X	V4, (CTX+R_x010)
   456
   457end:
   458	RET
   459

View as plain text