...

Text file src/crypto/sha1/sha1block_amd64.s

Documentation: crypto/sha1

     1// Code generated by command: go run sha1block_amd64_asm.go -out ../sha1block_amd64.s -pkg sha1. DO NOT EDIT.
     2
     3//go:build !purego
     4
     5#include "textflag.h"
     6
     7// func blockAVX2(dig *digest, p []byte)
     8// Requires: AVX, AVX2, BMI, BMI2, CMOV
     9TEXT ·blockAVX2(SB), $1408-32
    10	MOVQ        dig+0(FP), DI
    11	MOVQ        p_base+8(FP), SI
    12	MOVQ        p_len+16(FP), DX
    13	SHRQ        $0x06, DX
    14	SHLQ        $0x06, DX
    15	LEAQ        K_XMM_AR<>+0(SB), R8
    16	MOVQ        DI, R9
    17	MOVQ        SI, R10
    18	LEAQ        64(SI), R13
    19	ADDQ        SI, DX
    20	ADDQ        $0x40, DX
    21	MOVQ        DX, R11
    22	CMPQ        R13, R11
    23	CMOVQCC     R8, R13
    24	VMOVDQU     BSWAP_SHUFB_CTL<>+0(SB), Y10
    25	MOVL        (R9), CX
    26	MOVL        4(R9), SI
    27	MOVL        8(R9), DI
    28	MOVL        12(R9), AX
    29	MOVL        16(R9), DX
    30	MOVQ        SP, R14
    31	LEAQ        672(SP), R15
    32	VMOVDQU     (R10), X0
    33	VINSERTI128 $0x01, (R13), Y0, Y0
    34	VPSHUFB     Y10, Y0, Y15
    35	VPADDD      (R8), Y15, Y0
    36	VMOVDQU     Y0, (R14)
    37	VMOVDQU     16(R10), X0
    38	VINSERTI128 $0x01, 16(R13), Y0, Y0
    39	VPSHUFB     Y10, Y0, Y14
    40	VPADDD      (R8), Y14, Y0
    41	VMOVDQU     Y0, 32(R14)
    42	VMOVDQU     32(R10), X0
    43	VINSERTI128 $0x01, 32(R13), Y0, Y0
    44	VPSHUFB     Y10, Y0, Y13
    45	VPADDD      (R8), Y13, Y0
    46	VMOVDQU     Y0, 64(R14)
    47	VMOVDQU     48(R10), X0
    48	VINSERTI128 $0x01, 48(R13), Y0, Y0
    49	VPSHUFB     Y10, Y0, Y12
    50	VPADDD      (R8), Y12, Y0
    51	VMOVDQU     Y0, 96(R14)
    52	VPALIGNR    $0x08, Y15, Y14, Y8
    53	VPSRLDQ     $0x04, Y12, Y0
    54	VPXOR       Y13, Y8, Y8
    55	VPXOR       Y15, Y0, Y0
    56	VPXOR       Y0, Y8, Y8
    57	VPSLLDQ     $0x0c, Y8, Y9
    58	VPSLLD      $0x01, Y8, Y0
    59	VPSRLD      $0x1f, Y8, Y8
    60	VPOR        Y8, Y0, Y0
    61	VPSLLD      $0x02, Y9, Y8
    62	VPSRLD      $0x1e, Y9, Y9
    63	VPXOR       Y8, Y0, Y0
    64	VPXOR       Y9, Y0, Y8
    65	VPADDD      (R8), Y8, Y0
    66	VMOVDQU     Y0, 128(R14)
    67	VPALIGNR    $0x08, Y14, Y13, Y7
    68	VPSRLDQ     $0x04, Y8, Y0
    69	VPXOR       Y12, Y7, Y7
    70	VPXOR       Y14, Y0, Y0
    71	VPXOR       Y0, Y7, Y7
    72	VPSLLDQ     $0x0c, Y7, Y9
    73	VPSLLD      $0x01, Y7, Y0
    74	VPSRLD      $0x1f, Y7, Y7
    75	VPOR        Y7, Y0, Y0
    76	VPSLLD      $0x02, Y9, Y7
    77	VPSRLD      $0x1e, Y9, Y9
    78	VPXOR       Y7, Y0, Y0
    79	VPXOR       Y9, Y0, Y7
    80	VPADDD      32(R8), Y7, Y0
    81	VMOVDQU     Y0, 160(R14)
    82	VPALIGNR    $0x08, Y13, Y12, Y5
    83	VPSRLDQ     $0x04, Y7, Y0
    84	VPXOR       Y8, Y5, Y5
    85	VPXOR       Y13, Y0, Y0
    86	VPXOR       Y0, Y5, Y5
    87	VPSLLDQ     $0x0c, Y5, Y9
    88	VPSLLD      $0x01, Y5, Y0
    89	VPSRLD      $0x1f, Y5, Y5
    90	VPOR        Y5, Y0, Y0
    91	VPSLLD      $0x02, Y9, Y5
    92	VPSRLD      $0x1e, Y9, Y9
    93	VPXOR       Y5, Y0, Y0
    94	VPXOR       Y9, Y0, Y5
    95	VPADDD      32(R8), Y5, Y0
    96	VMOVDQU     Y0, 192(R14)
    97	VPALIGNR    $0x08, Y12, Y8, Y3
    98	VPSRLDQ     $0x04, Y5, Y0
    99	VPXOR       Y7, Y3, Y3
   100	VPXOR       Y12, Y0, Y0
   101	VPXOR       Y0, Y3, Y3
   102	VPSLLDQ     $0x0c, Y3, Y9
   103	VPSLLD      $0x01, Y3, Y0
   104	VPSRLD      $0x1f, Y3, Y3
   105	VPOR        Y3, Y0, Y0
   106	VPSLLD      $0x02, Y9, Y3
   107	VPSRLD      $0x1e, Y9, Y9
   108	VPXOR       Y3, Y0, Y0
   109	VPXOR       Y9, Y0, Y3
   110	VPADDD      32(R8), Y3, Y0
   111	VMOVDQU     Y0, 224(R14)
   112	VPALIGNR    $0x08, Y5, Y3, Y0
   113	VPXOR       Y14, Y15, Y15
   114	VPXOR       Y8, Y0, Y0
   115	VPXOR       Y0, Y15, Y15
   116	VPSLLD      $0x02, Y15, Y0
   117	VPSRLD      $0x1e, Y15, Y15
   118	VPOR        Y15, Y0, Y15
   119	VPADDD      32(R8), Y15, Y0
   120	VMOVDQU     Y0, 256(R14)
   121	VPALIGNR    $0x08, Y3, Y15, Y0
   122	VPXOR       Y13, Y14, Y14
   123	VPXOR       Y7, Y0, Y0
   124	VPXOR       Y0, Y14, Y14
   125	VPSLLD      $0x02, Y14, Y0
   126	VPSRLD      $0x1e, Y14, Y14
   127	VPOR        Y14, Y0, Y14
   128	VPADDD      32(R8), Y14, Y0
   129	VMOVDQU     Y0, 288(R14)
   130	VPALIGNR    $0x08, Y15, Y14, Y0
   131	VPXOR       Y12, Y13, Y13
   132	VPXOR       Y5, Y0, Y0
   133	VPXOR       Y0, Y13, Y13
   134	VPSLLD      $0x02, Y13, Y0
   135	VPSRLD      $0x1e, Y13, Y13
   136	VPOR        Y13, Y0, Y13
   137	VPADDD      64(R8), Y13, Y0
   138	VMOVDQU     Y0, 320(R14)
   139	VPALIGNR    $0x08, Y14, Y13, Y0
   140	VPXOR       Y8, Y12, Y12
   141	VPXOR       Y3, Y0, Y0
   142	VPXOR       Y0, Y12, Y12
   143	VPSLLD      $0x02, Y12, Y0
   144	VPSRLD      $0x1e, Y12, Y12
   145	VPOR        Y12, Y0, Y12
   146	VPADDD      64(R8), Y12, Y0
   147	VMOVDQU     Y0, 352(R14)
   148	VPALIGNR    $0x08, Y13, Y12, Y0
   149	VPXOR       Y7, Y8, Y8
   150	VPXOR       Y15, Y0, Y0
   151	VPXOR       Y0, Y8, Y8
   152	VPSLLD      $0x02, Y8, Y0
   153	VPSRLD      $0x1e, Y8, Y8
   154	VPOR        Y8, Y0, Y8
   155	VPADDD      64(R8), Y8, Y0
   156	VMOVDQU     Y0, 384(R14)
   157	VPALIGNR    $0x08, Y12, Y8, Y0
   158	VPXOR       Y5, Y7, Y7
   159	VPXOR       Y14, Y0, Y0
   160	VPXOR       Y0, Y7, Y7
   161	VPSLLD      $0x02, Y7, Y0
   162	VPSRLD      $0x1e, Y7, Y7
   163	VPOR        Y7, Y0, Y7
   164	VPADDD      64(R8), Y7, Y0
   165	VMOVDQU     Y0, 416(R14)
   166	VPALIGNR    $0x08, Y8, Y7, Y0
   167	VPXOR       Y3, Y5, Y5
   168	VPXOR       Y13, Y0, Y0
   169	VPXOR       Y0, Y5, Y5
   170	VPSLLD      $0x02, Y5, Y0
   171	VPSRLD      $0x1e, Y5, Y5
   172	VPOR        Y5, Y0, Y5
   173	VPADDD      64(R8), Y5, Y0
   174	VMOVDQU     Y0, 448(R14)
   175	VPALIGNR    $0x08, Y7, Y5, Y0
   176	VPXOR       Y15, Y3, Y3
   177	VPXOR       Y12, Y0, Y0
   178	VPXOR       Y0, Y3, Y3
   179	VPSLLD      $0x02, Y3, Y0
   180	VPSRLD      $0x1e, Y3, Y3
   181	VPOR        Y3, Y0, Y3
   182	VPADDD      96(R8), Y3, Y0
   183	VMOVDQU     Y0, 480(R14)
   184	VPALIGNR    $0x08, Y5, Y3, Y0
   185	VPXOR       Y14, Y15, Y15
   186	VPXOR       Y8, Y0, Y0
   187	VPXOR       Y0, Y15, Y15
   188	VPSLLD      $0x02, Y15, Y0
   189	VPSRLD      $0x1e, Y15, Y15
   190	VPOR        Y15, Y0, Y15
   191	VPADDD      96(R8), Y15, Y0
   192	VMOVDQU     Y0, 512(R14)
   193	VPALIGNR    $0x08, Y3, Y15, Y0
   194	VPXOR       Y13, Y14, Y14
   195	VPXOR       Y7, Y0, Y0
   196	VPXOR       Y0, Y14, Y14
   197	VPSLLD      $0x02, Y14, Y0
   198	VPSRLD      $0x1e, Y14, Y14
   199	VPOR        Y14, Y0, Y14
   200	VPADDD      96(R8), Y14, Y0
   201	VMOVDQU     Y0, 544(R14)
   202	VPALIGNR    $0x08, Y15, Y14, Y0
   203	VPXOR       Y12, Y13, Y13
   204	VPXOR       Y5, Y0, Y0
   205	VPXOR       Y0, Y13, Y13
   206	VPSLLD      $0x02, Y13, Y0
   207	VPSRLD      $0x1e, Y13, Y13
   208	VPOR        Y13, Y0, Y13
   209	VPADDD      96(R8), Y13, Y0
   210	VMOVDQU     Y0, 576(R14)
   211	VPALIGNR    $0x08, Y14, Y13, Y0
   212	VPXOR       Y8, Y12, Y12
   213	VPXOR       Y3, Y0, Y0
   214	VPXOR       Y0, Y12, Y12
   215	VPSLLD      $0x02, Y12, Y0
   216	VPSRLD      $0x1e, Y12, Y12
   217	VPOR        Y12, Y0, Y12
   218	VPADDD      96(R8), Y12, Y0
   219	VMOVDQU     Y0, 608(R14)
   220	XCHGQ       R15, R14
   221
   222loop:
   223	CMPQ R10, R8
   224	JNE  begin
   225	VZEROUPPER
   226	RET
   227
   228begin:
   229	MOVL        SI, BX
   230	RORXL       $0x02, SI, SI
   231	ANDNL       AX, BX, BP
   232	ANDL        DI, BX
   233	XORL        BP, BX
   234	ADDL        (R15), DX
   235	ANDNL       DI, CX, BP
   236	LEAL        (DX)(BX*1), DX
   237	RORXL       $0x1b, CX, R12
   238	RORXL       $0x02, CX, BX
   239	VMOVDQU     128(R10), X0
   240	ANDL        SI, CX
   241	XORL        BP, CX
   242	LEAL        (DX)(R12*1), DX
   243	ADDL        4(R15), AX
   244	ANDNL       SI, DX, BP
   245	LEAL        (AX)(CX*1), AX
   246	RORXL       $0x1b, DX, R12
   247	RORXL       $0x02, DX, CX
   248	VINSERTI128 $0x01, 128(R13), Y0, Y0
   249	ANDL        BX, DX
   250	XORL        BP, DX
   251	LEAL        (AX)(R12*1), AX
   252	ADDL        8(R15), DI
   253	ANDNL       BX, AX, BP
   254	LEAL        (DI)(DX*1), DI
   255	RORXL       $0x1b, AX, R12
   256	RORXL       $0x02, AX, DX
   257	VPSHUFB     Y10, Y0, Y15
   258	ANDL        CX, AX
   259	XORL        BP, AX
   260	LEAL        (DI)(R12*1), DI
   261	ADDL        12(R15), SI
   262	ANDNL       CX, DI, BP
   263	LEAL        (SI)(AX*1), SI
   264	RORXL       $0x1b, DI, R12
   265	RORXL       $0x02, DI, AX
   266	ANDL        DX, DI
   267	XORL        BP, DI
   268	LEAL        (SI)(R12*1), SI
   269	ADDL        32(R15), BX
   270	ANDNL       DX, SI, BP
   271	LEAL        (BX)(DI*1), BX
   272	RORXL       $0x1b, SI, R12
   273	RORXL       $0x02, SI, DI
   274	VPADDD      (R8), Y15, Y0
   275	ANDL        AX, SI
   276	XORL        BP, SI
   277	LEAL        (BX)(R12*1), BX
   278	ADDL        36(R15), CX
   279	ANDNL       AX, BX, BP
   280	LEAL        (CX)(SI*1), CX
   281	RORXL       $0x1b, BX, R12
   282	RORXL       $0x02, BX, SI
   283	ANDL        DI, BX
   284	XORL        BP, BX
   285	LEAL        (CX)(R12*1), CX
   286	ADDL        40(R15), DX
   287	ANDNL       DI, CX, BP
   288	LEAL        (DX)(BX*1), DX
   289	RORXL       $0x1b, CX, R12
   290	RORXL       $0x02, CX, BX
   291	ANDL        SI, CX
   292	XORL        BP, CX
   293	LEAL        (DX)(R12*1), DX
   294	ADDL        44(R15), AX
   295	ANDNL       SI, DX, BP
   296	LEAL        (AX)(CX*1), AX
   297	RORXL       $0x1b, DX, R12
   298	RORXL       $0x02, DX, CX
   299	VMOVDQU     Y0, (R14)
   300	ANDL        BX, DX
   301	XORL        BP, DX
   302	LEAL        (AX)(R12*1), AX
   303	ADDL        64(R15), DI
   304	ANDNL       BX, AX, BP
   305	LEAL        (DI)(DX*1), DI
   306	RORXL       $0x1b, AX, R12
   307	RORXL       $0x02, AX, DX
   308	VMOVDQU     144(R10), X0
   309	ANDL        CX, AX
   310	XORL        BP, AX
   311	LEAL        (DI)(R12*1), DI
   312	ADDL        68(R15), SI
   313	ANDNL       CX, DI, BP
   314	LEAL        (SI)(AX*1), SI
   315	RORXL       $0x1b, DI, R12
   316	RORXL       $0x02, DI, AX
   317	VINSERTI128 $0x01, 144(R13), Y0, Y0
   318	ANDL        DX, DI
   319	XORL        BP, DI
   320	LEAL        (SI)(R12*1), SI
   321	ADDL        72(R15), BX
   322	ANDNL       DX, SI, BP
   323	LEAL        (BX)(DI*1), BX
   324	RORXL       $0x1b, SI, R12
   325	RORXL       $0x02, SI, DI
   326	VPSHUFB     Y10, Y0, Y14
   327	ANDL        AX, SI
   328	XORL        BP, SI
   329	LEAL        (BX)(R12*1), BX
   330	ADDL        76(R15), CX
   331	ANDNL       AX, BX, BP
   332	LEAL        (CX)(SI*1), CX
   333	RORXL       $0x1b, BX, R12
   334	RORXL       $0x02, BX, SI
   335	ANDL        DI, BX
   336	XORL        BP, BX
   337	LEAL        (CX)(R12*1), CX
   338	ADDL        96(R15), DX
   339	ANDNL       DI, CX, BP
   340	LEAL        (DX)(BX*1), DX
   341	RORXL       $0x1b, CX, R12
   342	RORXL       $0x02, CX, BX
   343	VPADDD      (R8), Y14, Y0
   344	ANDL        SI, CX
   345	XORL        BP, CX
   346	LEAL        (DX)(R12*1), DX
   347	ADDL        100(R15), AX
   348	ANDNL       SI, DX, BP
   349	LEAL        (AX)(CX*1), AX
   350	RORXL       $0x1b, DX, R12
   351	RORXL       $0x02, DX, CX
   352	ANDL        BX, DX
   353	XORL        BP, DX
   354	LEAL        (AX)(R12*1), AX
   355	ADDL        104(R15), DI
   356	ANDNL       BX, AX, BP
   357	LEAL        (DI)(DX*1), DI
   358	RORXL       $0x1b, AX, R12
   359	RORXL       $0x02, AX, DX
   360	ANDL        CX, AX
   361	XORL        BP, AX
   362	LEAL        (DI)(R12*1), DI
   363	ADDL        108(R15), SI
   364	ANDNL       CX, DI, BP
   365	LEAL        (SI)(AX*1), SI
   366	RORXL       $0x1b, DI, R12
   367	RORXL       $0x02, DI, AX
   368	VMOVDQU     Y0, 32(R14)
   369	ANDL        DX, DI
   370	XORL        BP, DI
   371	LEAL        (SI)(R12*1), SI
   372	ADDL        128(R15), BX
   373	ANDNL       DX, SI, BP
   374	LEAL        (BX)(DI*1), BX
   375	RORXL       $0x1b, SI, R12
   376	RORXL       $0x02, SI, DI
   377	VMOVDQU     160(R10), X0
   378	ANDL        AX, SI
   379	XORL        BP, SI
   380	LEAL        (BX)(R12*1), BX
   381	ADDL        132(R15), CX
   382	ANDNL       AX, BX, BP
   383	LEAL        (CX)(SI*1), CX
   384	RORXL       $0x1b, BX, R12
   385	RORXL       $0x02, BX, SI
   386	VINSERTI128 $0x01, 160(R13), Y0, Y0
   387	ANDL        DI, BX
   388	XORL        BP, BX
   389	LEAL        (CX)(R12*1), CX
   390	ADDL        136(R15), DX
   391	ANDNL       DI, CX, BP
   392	LEAL        (DX)(BX*1), DX
   393	RORXL       $0x1b, CX, R12
   394	RORXL       $0x02, CX, BX
   395	VPSHUFB     Y10, Y0, Y13
   396	ANDL        SI, CX
   397	XORL        BP, CX
   398	LEAL        (DX)(R12*1), DX
   399	ADDL        140(R15), AX
   400	LEAL        (AX)(CX*1), AX
   401	RORXL       $0x1b, DX, R12
   402	RORXL       $0x02, DX, CX
   403	XORL        BX, DX
   404	ADDL        R12, AX
   405	XORL        SI, DX
   406	ADDL        160(R15), DI
   407	LEAL        (DI)(DX*1), DI
   408	RORXL       $0x1b, AX, R12
   409	RORXL       $0x02, AX, DX
   410	VPADDD      (R8), Y13, Y0
   411	XORL        CX, AX
   412	ADDL        R12, DI
   413	XORL        BX, AX
   414	ADDL        164(R15), SI
   415	LEAL        (SI)(AX*1), SI
   416	RORXL       $0x1b, DI, R12
   417	RORXL       $0x02, DI, AX
   418	XORL        DX, DI
   419	ADDL        R12, SI
   420	XORL        CX, DI
   421	ADDL        168(R15), BX
   422	LEAL        (BX)(DI*1), BX
   423	RORXL       $0x1b, SI, R12
   424	RORXL       $0x02, SI, DI
   425	XORL        AX, SI
   426	ADDL        R12, BX
   427	XORL        DX, SI
   428	ADDL        172(R15), CX
   429	LEAL        (CX)(SI*1), CX
   430	RORXL       $0x1b, BX, R12
   431	RORXL       $0x02, BX, SI
   432	VMOVDQU     Y0, 64(R14)
   433	XORL        DI, BX
   434	ADDL        R12, CX
   435	XORL        AX, BX
   436	ADDL        192(R15), DX
   437	LEAL        (DX)(BX*1), DX
   438	RORXL       $0x1b, CX, R12
   439	RORXL       $0x02, CX, BX
   440	VMOVDQU     176(R10), X0
   441	XORL        SI, CX
   442	ADDL        R12, DX
   443	XORL        DI, CX
   444	ADDL        196(R15), AX
   445	LEAL        (AX)(CX*1), AX
   446	RORXL       $0x1b, DX, R12
   447	RORXL       $0x02, DX, CX
   448	VINSERTI128 $0x01, 176(R13), Y0, Y0
   449	XORL        BX, DX
   450	ADDL        R12, AX
   451	XORL        SI, DX
   452	ADDL        200(R15), DI
   453	LEAL        (DI)(DX*1), DI
   454	RORXL       $0x1b, AX, R12
   455	RORXL       $0x02, AX, DX
   456	VPSHUFB     Y10, Y0, Y12
   457	XORL        CX, AX
   458	ADDL        R12, DI
   459	XORL        BX, AX
   460	ADDL        204(R15), SI
   461	LEAL        (SI)(AX*1), SI
   462	RORXL       $0x1b, DI, R12
   463	RORXL       $0x02, DI, AX
   464	XORL        DX, DI
   465	ADDL        R12, SI
   466	XORL        CX, DI
   467	ADDL        224(R15), BX
   468	LEAL        (BX)(DI*1), BX
   469	RORXL       $0x1b, SI, R12
   470	RORXL       $0x02, SI, DI
   471	VPADDD      (R8), Y12, Y0
   472	XORL        AX, SI
   473	ADDL        R12, BX
   474	XORL        DX, SI
   475	ADDL        228(R15), CX
   476	LEAL        (CX)(SI*1), CX
   477	RORXL       $0x1b, BX, R12
   478	RORXL       $0x02, BX, SI
   479	XORL        DI, BX
   480	ADDL        R12, CX
   481	XORL        AX, BX
   482	ADDL        232(R15), DX
   483	LEAL        (DX)(BX*1), DX
   484	RORXL       $0x1b, CX, R12
   485	RORXL       $0x02, CX, BX
   486	XORL        SI, CX
   487	ADDL        R12, DX
   488	XORL        DI, CX
   489	ADDL        236(R15), AX
   490	LEAL        (AX)(CX*1), AX
   491	RORXL       $0x1b, DX, R12
   492	RORXL       $0x02, DX, CX
   493	VMOVDQU     Y0, 96(R14)
   494	XORL        BX, DX
   495	ADDL        R12, AX
   496	XORL        SI, DX
   497	ADDL        256(R15), DI
   498	LEAL        (DI)(DX*1), DI
   499	RORXL       $0x1b, AX, R12
   500	RORXL       $0x02, AX, DX
   501	VPALIGNR    $0x08, Y15, Y14, Y8
   502	VPSRLDQ     $0x04, Y12, Y0
   503	XORL        CX, AX
   504	ADDL        R12, DI
   505	XORL        BX, AX
   506	ADDL        260(R15), SI
   507	LEAL        (SI)(AX*1), SI
   508	RORXL       $0x1b, DI, R12
   509	RORXL       $0x02, DI, AX
   510	VPXOR       Y13, Y8, Y8
   511	VPXOR       Y15, Y0, Y0
   512	XORL        DX, DI
   513	ADDL        R12, SI
   514	XORL        CX, DI
   515	ADDL        264(R15), BX
   516	LEAL        (BX)(DI*1), BX
   517	RORXL       $0x1b, SI, R12
   518	RORXL       $0x02, SI, DI
   519	VPXOR       Y0, Y8, Y8
   520	VPSLLDQ     $0x0c, Y8, Y9
   521	XORL        AX, SI
   522	ADDL        R12, BX
   523	XORL        DX, SI
   524	ADDL        268(R15), CX
   525	LEAL        (CX)(SI*1), CX
   526	RORXL       $0x1b, BX, R12
   527	RORXL       $0x02, BX, SI
   528	VPSLLD      $0x01, Y8, Y0
   529	VPSRLD      $0x1f, Y8, Y8
   530	XORL        DI, BX
   531	ADDL        R12, CX
   532	XORL        AX, BX
   533	ADDL        288(R15), DX
   534	LEAL        (DX)(BX*1), DX
   535	RORXL       $0x1b, CX, R12
   536	RORXL       $0x02, CX, BX
   537	VPOR        Y8, Y0, Y0
   538	VPSLLD      $0x02, Y9, Y8
   539	XORL        SI, CX
   540	ADDL        R12, DX
   541	XORL        DI, CX
   542	ADDL        292(R15), AX
   543	LEAL        (AX)(CX*1), AX
   544	RORXL       $0x1b, DX, R12
   545	RORXL       $0x02, DX, CX
   546	VPSRLD      $0x1e, Y9, Y9
   547	VPXOR       Y8, Y0, Y0
   548	XORL        BX, DX
   549	ADDL        R12, AX
   550	XORL        SI, DX
   551	ADDL        296(R15), DI
   552	LEAL        (DI)(DX*1), DI
   553	RORXL       $0x1b, AX, R12
   554	RORXL       $0x02, AX, DX
   555	XORL        CX, AX
   556	ADDL        R12, DI
   557	XORL        BX, AX
   558	ADDL        300(R15), SI
   559	VPXOR       Y9, Y0, Y8
   560	VPADDD      (R8), Y8, Y0
   561	VMOVDQU     Y0, 128(R14)
   562	LEAL        (SI)(AX*1), SI
   563	MOVL        DX, BP
   564	ORL         DI, BP
   565	RORXL       $0x1b, DI, R12
   566	RORXL       $0x02, DI, AX
   567	ANDL        CX, BP
   568	ANDL        DX, DI
   569	ORL         BP, DI
   570	ADDL        R12, SI
   571	ADDL        320(R15), BX
   572	VPALIGNR    $0x08, Y14, Y13, Y7
   573	VPSRLDQ     $0x04, Y8, Y0
   574	LEAL        (BX)(DI*1), BX
   575	MOVL        AX, BP
   576	ORL         SI, BP
   577	RORXL       $0x1b, SI, R12
   578	RORXL       $0x02, SI, DI
   579	ANDL        DX, BP
   580	ANDL        AX, SI
   581	ORL         BP, SI
   582	ADDL        R12, BX
   583	ADDL        324(R15), CX
   584	VPXOR       Y12, Y7, Y7
   585	VPXOR       Y14, Y0, Y0
   586	LEAL        (CX)(SI*1), CX
   587	MOVL        DI, BP
   588	ORL         BX, BP
   589	RORXL       $0x1b, BX, R12
   590	RORXL       $0x02, BX, SI
   591	ANDL        AX, BP
   592	ANDL        DI, BX
   593	ORL         BP, BX
   594	ADDL        R12, CX
   595	ADDL        328(R15), DX
   596	VPXOR       Y0, Y7, Y7
   597	VPSLLDQ     $0x0c, Y7, Y9
   598	LEAL        (DX)(BX*1), DX
   599	MOVL        SI, BP
   600	ORL         CX, BP
   601	RORXL       $0x1b, CX, R12
   602	RORXL       $0x02, CX, BX
   603	ANDL        DI, BP
   604	ANDL        SI, CX
   605	ORL         BP, CX
   606	ADDL        R12, DX
   607	ADDL        332(R15), AX
   608	VPSLLD      $0x01, Y7, Y0
   609	VPSRLD      $0x1f, Y7, Y7
   610	LEAL        (AX)(CX*1), AX
   611	MOVL        BX, BP
   612	ORL         DX, BP
   613	RORXL       $0x1b, DX, R12
   614	RORXL       $0x02, DX, CX
   615	ANDL        SI, BP
   616	ANDL        BX, DX
   617	ORL         BP, DX
   618	ADDL        R12, AX
   619	ADDL        352(R15), DI
   620	VPOR        Y7, Y0, Y0
   621	VPSLLD      $0x02, Y9, Y7
   622	LEAL        (DI)(DX*1), DI
   623	MOVL        CX, BP
   624	ORL         AX, BP
   625	RORXL       $0x1b, AX, R12
   626	RORXL       $0x02, AX, DX
   627	ANDL        BX, BP
   628	ANDL        CX, AX
   629	ORL         BP, AX
   630	ADDL        R12, DI
   631	ADDL        356(R15), SI
   632	VPSRLD      $0x1e, Y9, Y9
   633	VPXOR       Y7, Y0, Y0
   634	LEAL        (SI)(AX*1), SI
   635	MOVL        DX, BP
   636	ORL         DI, BP
   637	RORXL       $0x1b, DI, R12
   638	RORXL       $0x02, DI, AX
   639	ANDL        CX, BP
   640	ANDL        DX, DI
   641	ORL         BP, DI
   642	ADDL        R12, SI
   643	ADDL        360(R15), BX
   644	LEAL        (BX)(DI*1), BX
   645	MOVL        AX, BP
   646	ORL         SI, BP
   647	RORXL       $0x1b, SI, R12
   648	RORXL       $0x02, SI, DI
   649	ANDL        DX, BP
   650	ANDL        AX, SI
   651	ORL         BP, SI
   652	ADDL        R12, BX
   653	ADDL        364(R15), CX
   654	VPXOR       Y9, Y0, Y7
   655	VPADDD      32(R8), Y7, Y0
   656	VMOVDQU     Y0, 160(R14)
   657	LEAL        (CX)(SI*1), CX
   658	MOVL        DI, BP
   659	ORL         BX, BP
   660	RORXL       $0x1b, BX, R12
   661	RORXL       $0x02, BX, SI
   662	ANDL        AX, BP
   663	ANDL        DI, BX
   664	ORL         BP, BX
   665	ADDL        R12, CX
   666	ADDL        384(R15), DX
   667	VPALIGNR    $0x08, Y13, Y12, Y5
   668	VPSRLDQ     $0x04, Y7, Y0
   669	LEAL        (DX)(BX*1), DX
   670	MOVL        SI, BP
   671	ORL         CX, BP
   672	RORXL       $0x1b, CX, R12
   673	RORXL       $0x02, CX, BX
   674	ANDL        DI, BP
   675	ANDL        SI, CX
   676	ORL         BP, CX
   677	ADDL        R12, DX
   678	ADDL        388(R15), AX
   679	VPXOR       Y8, Y5, Y5
   680	VPXOR       Y13, Y0, Y0
   681	LEAL        (AX)(CX*1), AX
   682	MOVL        BX, BP
   683	ORL         DX, BP
   684	RORXL       $0x1b, DX, R12
   685	RORXL       $0x02, DX, CX
   686	ANDL        SI, BP
   687	ANDL        BX, DX
   688	ORL         BP, DX
   689	ADDL        R12, AX
   690	ADDL        392(R15), DI
   691	VPXOR       Y0, Y5, Y5
   692	VPSLLDQ     $0x0c, Y5, Y9
   693	LEAL        (DI)(DX*1), DI
   694	MOVL        CX, BP
   695	ORL         AX, BP
   696	RORXL       $0x1b, AX, R12
   697	RORXL       $0x02, AX, DX
   698	ANDL        BX, BP
   699	ANDL        CX, AX
   700	ORL         BP, AX
   701	ADDL        R12, DI
   702	ADDL        396(R15), SI
   703	VPSLLD      $0x01, Y5, Y0
   704	VPSRLD      $0x1f, Y5, Y5
   705	LEAL        (SI)(AX*1), SI
   706	MOVL        DX, BP
   707	ORL         DI, BP
   708	RORXL       $0x1b, DI, R12
   709	RORXL       $0x02, DI, AX
   710	ANDL        CX, BP
   711	ANDL        DX, DI
   712	ORL         BP, DI
   713	ADDL        R12, SI
   714	ADDL        416(R15), BX
   715	VPOR        Y5, Y0, Y0
   716	VPSLLD      $0x02, Y9, Y5
   717	LEAL        (BX)(DI*1), BX
   718	MOVL        AX, BP
   719	ORL         SI, BP
   720	RORXL       $0x1b, SI, R12
   721	RORXL       $0x02, SI, DI
   722	ANDL        DX, BP
   723	ANDL        AX, SI
   724	ORL         BP, SI
   725	ADDL        R12, BX
   726	ADDL        420(R15), CX
   727	VPSRLD      $0x1e, Y9, Y9
   728	VPXOR       Y5, Y0, Y0
   729	LEAL        (CX)(SI*1), CX
   730	MOVL        DI, BP
   731	ORL         BX, BP
   732	RORXL       $0x1b, BX, R12
   733	RORXL       $0x02, BX, SI
   734	ANDL        AX, BP
   735	ANDL        DI, BX
   736	ORL         BP, BX
   737	ADDL        R12, CX
   738	ADDL        424(R15), DX
   739	LEAL        (DX)(BX*1), DX
   740	MOVL        SI, BP
   741	ORL         CX, BP
   742	RORXL       $0x1b, CX, R12
   743	RORXL       $0x02, CX, BX
   744	ANDL        DI, BP
   745	ANDL        SI, CX
   746	ORL         BP, CX
   747	ADDL        R12, DX
   748	ADDL        428(R15), AX
   749	VPXOR       Y9, Y0, Y5
   750	VPADDD      32(R8), Y5, Y0
   751	VMOVDQU     Y0, 192(R14)
   752	LEAL        (AX)(CX*1), AX
   753	MOVL        BX, BP
   754	ORL         DX, BP
   755	RORXL       $0x1b, DX, R12
   756	RORXL       $0x02, DX, CX
   757	ANDL        SI, BP
   758	ANDL        BX, DX
   759	ORL         BP, DX
   760	ADDL        R12, AX
   761	ADDL        448(R15), DI
   762	VPALIGNR    $0x08, Y12, Y8, Y3
   763	VPSRLDQ     $0x04, Y5, Y0
   764	LEAL        (DI)(DX*1), DI
   765	MOVL        CX, BP
   766	ORL         AX, BP
   767	RORXL       $0x1b, AX, R12
   768	RORXL       $0x02, AX, DX
   769	ANDL        BX, BP
   770	ANDL        CX, AX
   771	ORL         BP, AX
   772	ADDL        R12, DI
   773	ADDL        452(R15), SI
   774	VPXOR       Y7, Y3, Y3
   775	VPXOR       Y12, Y0, Y0
   776	LEAL        (SI)(AX*1), SI
   777	MOVL        DX, BP
   778	ORL         DI, BP
   779	RORXL       $0x1b, DI, R12
   780	RORXL       $0x02, DI, AX
   781	ANDL        CX, BP
   782	ANDL        DX, DI
   783	ORL         BP, DI
   784	ADDL        R12, SI
   785	ADDL        456(R15), BX
   786	VPXOR       Y0, Y3, Y3
   787	VPSLLDQ     $0x0c, Y3, Y9
   788	LEAL        (BX)(DI*1), BX
   789	MOVL        AX, BP
   790	ORL         SI, BP
   791	RORXL       $0x1b, SI, R12
   792	RORXL       $0x02, SI, DI
   793	ANDL        DX, BP
   794	ANDL        AX, SI
   795	ORL         BP, SI
   796	ADDL        R12, BX
   797	ADDL        460(R15), CX
   798	LEAL        (CX)(SI*1), CX
   799	RORXL       $0x1b, BX, R12
   800	RORXL       $0x02, BX, SI
   801	VPSLLD      $0x01, Y3, Y0
   802	VPSRLD      $0x1f, Y3, Y3
   803	XORL        DI, BX
   804	ADDL        R12, CX
   805	XORL        AX, BX
   806	ADDQ        $0x80, R10
   807	CMPQ        R10, R11
   808	CMOVQCC     R8, R10
   809	ADDL        480(R15), DX
   810	LEAL        (DX)(BX*1), DX
   811	RORXL       $0x1b, CX, R12
   812	RORXL       $0x02, CX, BX
   813	VPOR        Y3, Y0, Y0
   814	VPSLLD      $0x02, Y9, Y3
   815	XORL        SI, CX
   816	ADDL        R12, DX
   817	XORL        DI, CX
   818	ADDL        484(R15), AX
   819	LEAL        (AX)(CX*1), AX
   820	RORXL       $0x1b, DX, R12
   821	RORXL       $0x02, DX, CX
   822	VPSRLD      $0x1e, Y9, Y9
   823	VPXOR       Y3, Y0, Y0
   824	XORL        BX, DX
   825	ADDL        R12, AX
   826	XORL        SI, DX
   827	ADDL        488(R15), DI
   828	LEAL        (DI)(DX*1), DI
   829	RORXL       $0x1b, AX, R12
   830	RORXL       $0x02, AX, DX
   831	XORL        CX, AX
   832	ADDL        R12, DI
   833	XORL        BX, AX
   834	ADDL        492(R15), SI
   835	LEAL        (SI)(AX*1), SI
   836	RORXL       $0x1b, DI, R12
   837	RORXL       $0x02, DI, AX
   838	VPXOR       Y9, Y0, Y3
   839	VPADDD      32(R8), Y3, Y0
   840	VMOVDQU     Y0, 224(R14)
   841	XORL        DX, DI
   842	ADDL        R12, SI
   843	XORL        CX, DI
   844	ADDL        512(R15), BX
   845	LEAL        (BX)(DI*1), BX
   846	RORXL       $0x1b, SI, R12
   847	RORXL       $0x02, SI, DI
   848	VPALIGNR    $0x08, Y5, Y3, Y0
   849	XORL        AX, SI
   850	ADDL        R12, BX
   851	XORL        DX, SI
   852	ADDL        516(R15), CX
   853	LEAL        (CX)(SI*1), CX
   854	RORXL       $0x1b, BX, R12
   855	RORXL       $0x02, BX, SI
   856	VPXOR       Y14, Y15, Y15
   857	XORL        DI, BX
   858	ADDL        R12, CX
   859	XORL        AX, BX
   860	ADDL        520(R15), DX
   861	LEAL        (DX)(BX*1), DX
   862	RORXL       $0x1b, CX, R12
   863	RORXL       $0x02, CX, BX
   864	VPXOR       Y8, Y0, Y0
   865	XORL        SI, CX
   866	ADDL        R12, DX
   867	XORL        DI, CX
   868	ADDL        524(R15), AX
   869	LEAL        (AX)(CX*1), AX
   870	RORXL       $0x1b, DX, R12
   871	RORXL       $0x02, DX, CX
   872	VPXOR       Y0, Y15, Y15
   873	XORL        BX, DX
   874	ADDL        R12, AX
   875	XORL        SI, DX
   876	ADDL        544(R15), DI
   877	LEAL        (DI)(DX*1), DI
   878	RORXL       $0x1b, AX, R12
   879	RORXL       $0x02, AX, DX
   880	VPSLLD      $0x02, Y15, Y0
   881	XORL        CX, AX
   882	ADDL        R12, DI
   883	XORL        BX, AX
   884	ADDL        548(R15), SI
   885	LEAL        (SI)(AX*1), SI
   886	RORXL       $0x1b, DI, R12
   887	RORXL       $0x02, DI, AX
   888	VPSRLD      $0x1e, Y15, Y15
   889	VPOR        Y15, Y0, Y15
   890	XORL        DX, DI
   891	ADDL        R12, SI
   892	XORL        CX, DI
   893	ADDL        552(R15), BX
   894	LEAL        (BX)(DI*1), BX
   895	RORXL       $0x1b, SI, R12
   896	RORXL       $0x02, SI, DI
   897	XORL        AX, SI
   898	ADDL        R12, BX
   899	XORL        DX, SI
   900	ADDL        556(R15), CX
   901	LEAL        (CX)(SI*1), CX
   902	RORXL       $0x1b, BX, R12
   903	RORXL       $0x02, BX, SI
   904	VPADDD      32(R8), Y15, Y0
   905	VMOVDQU     Y0, 256(R14)
   906	XORL        DI, BX
   907	ADDL        R12, CX
   908	XORL        AX, BX
   909	ADDL        576(R15), DX
   910	LEAL        (DX)(BX*1), DX
   911	RORXL       $0x1b, CX, R12
   912	RORXL       $0x02, CX, BX
   913	VPALIGNR    $0x08, Y3, Y15, Y0
   914	XORL        SI, CX
   915	ADDL        R12, DX
   916	XORL        DI, CX
   917	ADDL        580(R15), AX
   918	LEAL        (AX)(CX*1), AX
   919	RORXL       $0x1b, DX, R12
   920	RORXL       $0x02, DX, CX
   921	VPXOR       Y13, Y14, Y14
   922	XORL        BX, DX
   923	ADDL        R12, AX
   924	XORL        SI, DX
   925	ADDL        584(R15), DI
   926	LEAL        (DI)(DX*1), DI
   927	RORXL       $0x1b, AX, R12
   928	RORXL       $0x02, AX, DX
   929	VPXOR       Y7, Y0, Y0
   930	XORL        CX, AX
   931	ADDL        R12, DI
   932	XORL        BX, AX
   933	ADDL        588(R15), SI
   934	LEAL        (SI)(AX*1), SI
   935	RORXL       $0x1b, DI, R12
   936	RORXL       $0x02, DI, AX
   937	VPXOR       Y0, Y14, Y14
   938	XORL        DX, DI
   939	ADDL        R12, SI
   940	XORL        CX, DI
   941	ADDL        608(R15), BX
   942	LEAL        (BX)(DI*1), BX
   943	RORXL       $0x1b, SI, R12
   944	RORXL       $0x02, SI, DI
   945	VPSLLD      $0x02, Y14, Y0
   946	XORL        AX, SI
   947	ADDL        R12, BX
   948	XORL        DX, SI
   949	ADDL        612(R15), CX
   950	LEAL        (CX)(SI*1), CX
   951	RORXL       $0x1b, BX, R12
   952	RORXL       $0x02, BX, SI
   953	VPSRLD      $0x1e, Y14, Y14
   954	VPOR        Y14, Y0, Y14
   955	XORL        DI, BX
   956	ADDL        R12, CX
   957	XORL        AX, BX
   958	ADDL        616(R15), DX
   959	LEAL        (DX)(BX*1), DX
   960	RORXL       $0x1b, CX, R12
   961	RORXL       $0x02, CX, BX
   962	XORL        SI, CX
   963	ADDL        R12, DX
   964	XORL        DI, CX
   965	ADDL        620(R15), AX
   966	LEAL        (AX)(CX*1), AX
   967	RORXL       $0x1b, DX, R12
   968	VPADDD      32(R8), Y14, Y0
   969	VMOVDQU     Y0, 288(R14)
   970	ADDL        R12, AX
   971	ADDL        (R9), AX
   972	MOVL        AX, (R9)
   973	ADDL        4(R9), DX
   974	MOVL        DX, 4(R9)
   975	ADDL        8(R9), BX
   976	MOVL        BX, 8(R9)
   977	ADDL        12(R9), SI
   978	MOVL        SI, 12(R9)
   979	ADDL        16(R9), DI
   980	MOVL        DI, 16(R9)
   981	CMPQ        R10, R8
   982	JE          loop
   983	MOVL        DX, CX
   984	MOVL        CX, DX
   985	RORXL       $0x02, CX, CX
   986	ANDNL       SI, DX, BP
   987	ANDL        BX, DX
   988	XORL        BP, DX
   989	ADDL        16(R15), DI
   990	ANDNL       BX, AX, BP
   991	LEAL        (DI)(DX*1), DI
   992	RORXL       $0x1b, AX, R12
   993	RORXL       $0x02, AX, DX
   994	VPALIGNR    $0x08, Y15, Y14, Y0
   995	ANDL        CX, AX
   996	XORL        BP, AX
   997	LEAL        (DI)(R12*1), DI
   998	ADDL        20(R15), SI
   999	ANDNL       CX, DI, BP
  1000	LEAL        (SI)(AX*1), SI
  1001	RORXL       $0x1b, DI, R12
  1002	RORXL       $0x02, DI, AX
  1003	VPXOR       Y12, Y13, Y13
  1004	ANDL        DX, DI
  1005	XORL        BP, DI
  1006	LEAL        (SI)(R12*1), SI
  1007	ADDL        24(R15), BX
  1008	ANDNL       DX, SI, BP
  1009	LEAL        (BX)(DI*1), BX
  1010	RORXL       $0x1b, SI, R12
  1011	RORXL       $0x02, SI, DI
  1012	VPXOR       Y5, Y0, Y0
  1013	ANDL        AX, SI
  1014	XORL        BP, SI
  1015	LEAL        (BX)(R12*1), BX
  1016	ADDL        28(R15), CX
  1017	ANDNL       AX, BX, BP
  1018	LEAL        (CX)(SI*1), CX
  1019	RORXL       $0x1b, BX, R12
  1020	RORXL       $0x02, BX, SI
  1021	VPXOR       Y0, Y13, Y13
  1022	ANDL        DI, BX
  1023	XORL        BP, BX
  1024	LEAL        (CX)(R12*1), CX
  1025	ADDL        48(R15), DX
  1026	ANDNL       DI, CX, BP
  1027	LEAL        (DX)(BX*1), DX
  1028	RORXL       $0x1b, CX, R12
  1029	RORXL       $0x02, CX, BX
  1030	VPSLLD      $0x02, Y13, Y0
  1031	ANDL        SI, CX
  1032	XORL        BP, CX
  1033	LEAL        (DX)(R12*1), DX
  1034	ADDL        52(R15), AX
  1035	ANDNL       SI, DX, BP
  1036	LEAL        (AX)(CX*1), AX
  1037	RORXL       $0x1b, DX, R12
  1038	RORXL       $0x02, DX, CX
  1039	VPSRLD      $0x1e, Y13, Y13
  1040	VPOR        Y13, Y0, Y13
  1041	ANDL        BX, DX
  1042	XORL        BP, DX
  1043	LEAL        (AX)(R12*1), AX
  1044	ADDL        56(R15), DI
  1045	ANDNL       BX, AX, BP
  1046	LEAL        (DI)(DX*1), DI
  1047	RORXL       $0x1b, AX, R12
  1048	RORXL       $0x02, AX, DX
  1049	ANDL        CX, AX
  1050	XORL        BP, AX
  1051	LEAL        (DI)(R12*1), DI
  1052	ADDL        60(R15), SI
  1053	ANDNL       CX, DI, BP
  1054	LEAL        (SI)(AX*1), SI
  1055	RORXL       $0x1b, DI, R12
  1056	RORXL       $0x02, DI, AX
  1057	VPADDD      64(R8), Y13, Y0
  1058	VMOVDQU     Y0, 320(R14)
  1059	ANDL        DX, DI
  1060	XORL        BP, DI
  1061	LEAL        (SI)(R12*1), SI
  1062	ADDL        80(R15), BX
  1063	ANDNL       DX, SI, BP
  1064	LEAL        (BX)(DI*1), BX
  1065	RORXL       $0x1b, SI, R12
  1066	RORXL       $0x02, SI, DI
  1067	VPALIGNR    $0x08, Y14, Y13, Y0
  1068	ANDL        AX, SI
  1069	XORL        BP, SI
  1070	LEAL        (BX)(R12*1), BX
  1071	ADDL        84(R15), CX
  1072	ANDNL       AX, BX, BP
  1073	LEAL        (CX)(SI*1), CX
  1074	RORXL       $0x1b, BX, R12
  1075	RORXL       $0x02, BX, SI
  1076	VPXOR       Y8, Y12, Y12
  1077	ANDL        DI, BX
  1078	XORL        BP, BX
  1079	LEAL        (CX)(R12*1), CX
  1080	ADDL        88(R15), DX
  1081	ANDNL       DI, CX, BP
  1082	LEAL        (DX)(BX*1), DX
  1083	RORXL       $0x1b, CX, R12
  1084	RORXL       $0x02, CX, BX
  1085	VPXOR       Y3, Y0, Y0
  1086	ANDL        SI, CX
  1087	XORL        BP, CX
  1088	LEAL        (DX)(R12*1), DX
  1089	ADDL        92(R15), AX
  1090	ANDNL       SI, DX, BP
  1091	LEAL        (AX)(CX*1), AX
  1092	RORXL       $0x1b, DX, R12
  1093	RORXL       $0x02, DX, CX
  1094	VPXOR       Y0, Y12, Y12
  1095	ANDL        BX, DX
  1096	XORL        BP, DX
  1097	LEAL        (AX)(R12*1), AX
  1098	ADDL        112(R15), DI
  1099	ANDNL       BX, AX, BP
  1100	LEAL        (DI)(DX*1), DI
  1101	RORXL       $0x1b, AX, R12
  1102	RORXL       $0x02, AX, DX
  1103	VPSLLD      $0x02, Y12, Y0
  1104	ANDL        CX, AX
  1105	XORL        BP, AX
  1106	LEAL        (DI)(R12*1), DI
  1107	ADDL        116(R15), SI
  1108	ANDNL       CX, DI, BP
  1109	LEAL        (SI)(AX*1), SI
  1110	RORXL       $0x1b, DI, R12
  1111	RORXL       $0x02, DI, AX
  1112	VPSRLD      $0x1e, Y12, Y12
  1113	VPOR        Y12, Y0, Y12
  1114	ANDL        DX, DI
  1115	XORL        BP, DI
  1116	LEAL        (SI)(R12*1), SI
  1117	ADDL        120(R15), BX
  1118	ANDNL       DX, SI, BP
  1119	LEAL        (BX)(DI*1), BX
  1120	RORXL       $0x1b, SI, R12
  1121	RORXL       $0x02, SI, DI
  1122	ANDL        AX, SI
  1123	XORL        BP, SI
  1124	LEAL        (BX)(R12*1), BX
  1125	ADDL        124(R15), CX
  1126	ANDNL       AX, BX, BP
  1127	LEAL        (CX)(SI*1), CX
  1128	RORXL       $0x1b, BX, R12
  1129	RORXL       $0x02, BX, SI
  1130	VPADDD      64(R8), Y12, Y0
  1131	VMOVDQU     Y0, 352(R14)
  1132	ANDL        DI, BX
  1133	XORL        BP, BX
  1134	LEAL        (CX)(R12*1), CX
  1135	ADDL        144(R15), DX
  1136	ANDNL       DI, CX, BP
  1137	LEAL        (DX)(BX*1), DX
  1138	RORXL       $0x1b, CX, R12
  1139	RORXL       $0x02, CX, BX
  1140	VPALIGNR    $0x08, Y13, Y12, Y0
  1141	ANDL        SI, CX
  1142	XORL        BP, CX
  1143	LEAL        (DX)(R12*1), DX
  1144	ADDL        148(R15), AX
  1145	ANDNL       SI, DX, BP
  1146	LEAL        (AX)(CX*1), AX
  1147	RORXL       $0x1b, DX, R12
  1148	RORXL       $0x02, DX, CX
  1149	VPXOR       Y7, Y8, Y8
  1150	ANDL        BX, DX
  1151	XORL        BP, DX
  1152	LEAL        (AX)(R12*1), AX
  1153	ADDL        152(R15), DI
  1154	ANDNL       BX, AX, BP
  1155	LEAL        (DI)(DX*1), DI
  1156	RORXL       $0x1b, AX, R12
  1157	RORXL       $0x02, AX, DX
  1158	VPXOR       Y15, Y0, Y0
  1159	ANDL        CX, AX
  1160	XORL        BP, AX
  1161	LEAL        (DI)(R12*1), DI
  1162	ADDL        156(R15), SI
  1163	LEAL        (SI)(AX*1), SI
  1164	RORXL       $0x1b, DI, R12
  1165	RORXL       $0x02, DI, AX
  1166	VPXOR       Y0, Y8, Y8
  1167	XORL        DX, DI
  1168	ADDL        R12, SI
  1169	XORL        CX, DI
  1170	ADDL        176(R15), BX
  1171	LEAL        (BX)(DI*1), BX
  1172	RORXL       $0x1b, SI, R12
  1173	RORXL       $0x02, SI, DI
  1174	VPSLLD      $0x02, Y8, Y0
  1175	XORL        AX, SI
  1176	ADDL        R12, BX
  1177	XORL        DX, SI
  1178	ADDL        180(R15), CX
  1179	LEAL        (CX)(SI*1), CX
  1180	RORXL       $0x1b, BX, R12
  1181	RORXL       $0x02, BX, SI
  1182	VPSRLD      $0x1e, Y8, Y8
  1183	VPOR        Y8, Y0, Y8
  1184	XORL        DI, BX
  1185	ADDL        R12, CX
  1186	XORL        AX, BX
  1187	ADDL        184(R15), DX
  1188	LEAL        (DX)(BX*1), DX
  1189	RORXL       $0x1b, CX, R12
  1190	RORXL       $0x02, CX, BX
  1191	XORL        SI, CX
  1192	ADDL        R12, DX
  1193	XORL        DI, CX
  1194	ADDL        188(R15), AX
  1195	LEAL        (AX)(CX*1), AX
  1196	RORXL       $0x1b, DX, R12
  1197	RORXL       $0x02, DX, CX
  1198	VPADDD      64(R8), Y8, Y0
  1199	VMOVDQU     Y0, 384(R14)
  1200	XORL        BX, DX
  1201	ADDL        R12, AX
  1202	XORL        SI, DX
  1203	ADDL        208(R15), DI
  1204	LEAL        (DI)(DX*1), DI
  1205	RORXL       $0x1b, AX, R12
  1206	RORXL       $0x02, AX, DX
  1207	VPALIGNR    $0x08, Y12, Y8, Y0
  1208	XORL        CX, AX
  1209	ADDL        R12, DI
  1210	XORL        BX, AX
  1211	ADDL        212(R15), SI
  1212	LEAL        (SI)(AX*1), SI
  1213	RORXL       $0x1b, DI, R12
  1214	RORXL       $0x02, DI, AX
  1215	VPXOR       Y5, Y7, Y7
  1216	XORL        DX, DI
  1217	ADDL        R12, SI
  1218	XORL        CX, DI
  1219	ADDL        216(R15), BX
  1220	LEAL        (BX)(DI*1), BX
  1221	RORXL       $0x1b, SI, R12
  1222	RORXL       $0x02, SI, DI
  1223	VPXOR       Y14, Y0, Y0
  1224	XORL        AX, SI
  1225	ADDL        R12, BX
  1226	XORL        DX, SI
  1227	ADDL        220(R15), CX
  1228	LEAL        (CX)(SI*1), CX
  1229	RORXL       $0x1b, BX, R12
  1230	RORXL       $0x02, BX, SI
  1231	VPXOR       Y0, Y7, Y7
  1232	XORL        DI, BX
  1233	ADDL        R12, CX
  1234	XORL        AX, BX
  1235	ADDL        240(R15), DX
  1236	LEAL        (DX)(BX*1), DX
  1237	RORXL       $0x1b, CX, R12
  1238	RORXL       $0x02, CX, BX
  1239	VPSLLD      $0x02, Y7, Y0
  1240	XORL        SI, CX
  1241	ADDL        R12, DX
  1242	XORL        DI, CX
  1243	ADDL        244(R15), AX
  1244	LEAL        (AX)(CX*1), AX
  1245	RORXL       $0x1b, DX, R12
  1246	RORXL       $0x02, DX, CX
  1247	VPSRLD      $0x1e, Y7, Y7
  1248	VPOR        Y7, Y0, Y7
  1249	XORL        BX, DX
  1250	ADDL        R12, AX
  1251	XORL        SI, DX
  1252	ADDL        248(R15), DI
  1253	LEAL        (DI)(DX*1), DI
  1254	RORXL       $0x1b, AX, R12
  1255	RORXL       $0x02, AX, DX
  1256	XORL        CX, AX
  1257	ADDL        R12, DI
  1258	XORL        BX, AX
  1259	ADDL        252(R15), SI
  1260	LEAL        (SI)(AX*1), SI
  1261	RORXL       $0x1b, DI, R12
  1262	RORXL       $0x02, DI, AX
  1263	VPADDD      64(R8), Y7, Y0
  1264	VMOVDQU     Y0, 416(R14)
  1265	XORL        DX, DI
  1266	ADDL        R12, SI
  1267	XORL        CX, DI
  1268	ADDL        272(R15), BX
  1269	LEAL        (BX)(DI*1), BX
  1270	RORXL       $0x1b, SI, R12
  1271	RORXL       $0x02, SI, DI
  1272	VPALIGNR    $0x08, Y8, Y7, Y0
  1273	XORL        AX, SI
  1274	ADDL        R12, BX
  1275	XORL        DX, SI
  1276	ADDL        276(R15), CX
  1277	LEAL        (CX)(SI*1), CX
  1278	RORXL       $0x1b, BX, R12
  1279	RORXL       $0x02, BX, SI
  1280	VPXOR       Y3, Y5, Y5
  1281	XORL        DI, BX
  1282	ADDL        R12, CX
  1283	XORL        AX, BX
  1284	ADDL        280(R15), DX
  1285	LEAL        (DX)(BX*1), DX
  1286	RORXL       $0x1b, CX, R12
  1287	RORXL       $0x02, CX, BX
  1288	VPXOR       Y13, Y0, Y0
  1289	XORL        SI, CX
  1290	ADDL        R12, DX
  1291	XORL        DI, CX
  1292	ADDL        284(R15), AX
  1293	LEAL        (AX)(CX*1), AX
  1294	RORXL       $0x1b, DX, R12
  1295	RORXL       $0x02, DX, CX
  1296	VPXOR       Y0, Y5, Y5
  1297	XORL        BX, DX
  1298	ADDL        R12, AX
  1299	XORL        SI, DX
  1300	ADDL        304(R15), DI
  1301	LEAL        (DI)(DX*1), DI
  1302	RORXL       $0x1b, AX, R12
  1303	RORXL       $0x02, AX, DX
  1304	VPSLLD      $0x02, Y5, Y0
  1305	XORL        CX, AX
  1306	ADDL        R12, DI
  1307	XORL        BX, AX
  1308	ADDL        308(R15), SI
  1309	LEAL        (SI)(AX*1), SI
  1310	RORXL       $0x1b, DI, R12
  1311	RORXL       $0x02, DI, AX
  1312	VPSRLD      $0x1e, Y5, Y5
  1313	VPOR        Y5, Y0, Y5
  1314	XORL        DX, DI
  1315	ADDL        R12, SI
  1316	XORL        CX, DI
  1317	ADDL        312(R15), BX
  1318	LEAL        (BX)(DI*1), BX
  1319	RORXL       $0x1b, SI, R12
  1320	RORXL       $0x02, SI, DI
  1321	XORL        AX, SI
  1322	ADDL        R12, BX
  1323	XORL        DX, SI
  1324	ADDL        316(R15), CX
  1325	VPADDD      64(R8), Y5, Y0
  1326	VMOVDQU     Y0, 448(R14)
  1327	LEAL        (CX)(SI*1), CX
  1328	MOVL        DI, BP
  1329	ORL         BX, BP
  1330	RORXL       $0x1b, BX, R12
  1331	RORXL       $0x02, BX, SI
  1332	ANDL        AX, BP
  1333	ANDL        DI, BX
  1334	ORL         BP, BX
  1335	ADDL        R12, CX
  1336	ADDL        336(R15), DX
  1337	VPALIGNR    $0x08, Y7, Y5, Y0
  1338	LEAL        (DX)(BX*1), DX
  1339	MOVL        SI, BP
  1340	ORL         CX, BP
  1341	RORXL       $0x1b, CX, R12
  1342	RORXL       $0x02, CX, BX
  1343	ANDL        DI, BP
  1344	ANDL        SI, CX
  1345	ORL         BP, CX
  1346	ADDL        R12, DX
  1347	ADDL        340(R15), AX
  1348	VPXOR       Y15, Y3, Y3
  1349	LEAL        (AX)(CX*1), AX
  1350	MOVL        BX, BP
  1351	ORL         DX, BP
  1352	RORXL       $0x1b, DX, R12
  1353	RORXL       $0x02, DX, CX
  1354	ANDL        SI, BP
  1355	ANDL        BX, DX
  1356	ORL         BP, DX
  1357	ADDL        R12, AX
  1358	ADDL        344(R15), DI
  1359	VPXOR       Y12, Y0, Y0
  1360	LEAL        (DI)(DX*1), DI
  1361	MOVL        CX, BP
  1362	ORL         AX, BP
  1363	RORXL       $0x1b, AX, R12
  1364	RORXL       $0x02, AX, DX
  1365	ANDL        BX, BP
  1366	ANDL        CX, AX
  1367	ORL         BP, AX
  1368	ADDL        R12, DI
  1369	ADDL        348(R15), SI
  1370	VPXOR       Y0, Y3, Y3
  1371	LEAL        (SI)(AX*1), SI
  1372	MOVL        DX, BP
  1373	ORL         DI, BP
  1374	RORXL       $0x1b, DI, R12
  1375	RORXL       $0x02, DI, AX
  1376	ANDL        CX, BP
  1377	ANDL        DX, DI
  1378	ORL         BP, DI
  1379	ADDL        R12, SI
  1380	ADDL        368(R15), BX
  1381	VPSLLD      $0x02, Y3, Y0
  1382	LEAL        (BX)(DI*1), BX
  1383	MOVL        AX, BP
  1384	ORL         SI, BP
  1385	RORXL       $0x1b, SI, R12
  1386	RORXL       $0x02, SI, DI
  1387	ANDL        DX, BP
  1388	ANDL        AX, SI
  1389	ORL         BP, SI
  1390	ADDL        R12, BX
  1391	ADDL        372(R15), CX
  1392	VPSRLD      $0x1e, Y3, Y3
  1393	VPOR        Y3, Y0, Y3
  1394	LEAL        (CX)(SI*1), CX
  1395	MOVL        DI, BP
  1396	ORL         BX, BP
  1397	RORXL       $0x1b, BX, R12
  1398	RORXL       $0x02, BX, SI
  1399	ANDL        AX, BP
  1400	ANDL        DI, BX
  1401	ORL         BP, BX
  1402	ADDL        R12, CX
  1403	ADDL        376(R15), DX
  1404	LEAL        (DX)(BX*1), DX
  1405	MOVL        SI, BP
  1406	ORL         CX, BP
  1407	RORXL       $0x1b, CX, R12
  1408	RORXL       $0x02, CX, BX
  1409	ANDL        DI, BP
  1410	ANDL        SI, CX
  1411	ORL         BP, CX
  1412	ADDL        R12, DX
  1413	ADDL        380(R15), AX
  1414	VPADDD      96(R8), Y3, Y0
  1415	VMOVDQU     Y0, 480(R14)
  1416	LEAL        (AX)(CX*1), AX
  1417	MOVL        BX, BP
  1418	ORL         DX, BP
  1419	RORXL       $0x1b, DX, R12
  1420	RORXL       $0x02, DX, CX
  1421	ANDL        SI, BP
  1422	ANDL        BX, DX
  1423	ORL         BP, DX
  1424	ADDL        R12, AX
  1425	ADDL        400(R15), DI
  1426	VPALIGNR    $0x08, Y5, Y3, Y0
  1427	LEAL        (DI)(DX*1), DI
  1428	MOVL        CX, BP
  1429	ORL         AX, BP
  1430	RORXL       $0x1b, AX, R12
  1431	RORXL       $0x02, AX, DX
  1432	ANDL        BX, BP
  1433	ANDL        CX, AX
  1434	ORL         BP, AX
  1435	ADDL        R12, DI
  1436	ADDL        404(R15), SI
  1437	VPXOR       Y14, Y15, Y15
  1438	LEAL        (SI)(AX*1), SI
  1439	MOVL        DX, BP
  1440	ORL         DI, BP
  1441	RORXL       $0x1b, DI, R12
  1442	RORXL       $0x02, DI, AX
  1443	ANDL        CX, BP
  1444	ANDL        DX, DI
  1445	ORL         BP, DI
  1446	ADDL        R12, SI
  1447	ADDL        408(R15), BX
  1448	VPXOR       Y8, Y0, Y0
  1449	LEAL        (BX)(DI*1), BX
  1450	MOVL        AX, BP
  1451	ORL         SI, BP
  1452	RORXL       $0x1b, SI, R12
  1453	RORXL       $0x02, SI, DI
  1454	ANDL        DX, BP
  1455	ANDL        AX, SI
  1456	ORL         BP, SI
  1457	ADDL        R12, BX
  1458	ADDL        412(R15), CX
  1459	VPXOR       Y0, Y15, Y15
  1460	LEAL        (CX)(SI*1), CX
  1461	MOVL        DI, BP
  1462	ORL         BX, BP
  1463	RORXL       $0x1b, BX, R12
  1464	RORXL       $0x02, BX, SI
  1465	ANDL        AX, BP
  1466	ANDL        DI, BX
  1467	ORL         BP, BX
  1468	ADDL        R12, CX
  1469	ADDL        432(R15), DX
  1470	VPSLLD      $0x02, Y15, Y0
  1471	LEAL        (DX)(BX*1), DX
  1472	MOVL        SI, BP
  1473	ORL         CX, BP
  1474	RORXL       $0x1b, CX, R12
  1475	RORXL       $0x02, CX, BX
  1476	ANDL        DI, BP
  1477	ANDL        SI, CX
  1478	ORL         BP, CX
  1479	ADDL        R12, DX
  1480	ADDL        436(R15), AX
  1481	VPSRLD      $0x1e, Y15, Y15
  1482	VPOR        Y15, Y0, Y15
  1483	LEAL        (AX)(CX*1), AX
  1484	MOVL        BX, BP
  1485	ORL         DX, BP
  1486	RORXL       $0x1b, DX, R12
  1487	RORXL       $0x02, DX, CX
  1488	ANDL        SI, BP
  1489	ANDL        BX, DX
  1490	ORL         BP, DX
  1491	ADDL        R12, AX
  1492	ADDL        440(R15), DI
  1493	LEAL        (DI)(DX*1), DI
  1494	MOVL        CX, BP
  1495	ORL         AX, BP
  1496	RORXL       $0x1b, AX, R12
  1497	RORXL       $0x02, AX, DX
  1498	ANDL        BX, BP
  1499	ANDL        CX, AX
  1500	ORL         BP, AX
  1501	ADDL        R12, DI
  1502	ADDL        444(R15), SI
  1503	VPADDD      96(R8), Y15, Y0
  1504	VMOVDQU     Y0, 512(R14)
  1505	LEAL        (SI)(AX*1), SI
  1506	MOVL        DX, BP
  1507	ORL         DI, BP
  1508	RORXL       $0x1b, DI, R12
  1509	RORXL       $0x02, DI, AX
  1510	ANDL        CX, BP
  1511	ANDL        DX, DI
  1512	ORL         BP, DI
  1513	ADDL        R12, SI
  1514	ADDL        464(R15), BX
  1515	VPALIGNR    $0x08, Y3, Y15, Y0
  1516	LEAL        (BX)(DI*1), BX
  1517	MOVL        AX, BP
  1518	ORL         SI, BP
  1519	RORXL       $0x1b, SI, R12
  1520	RORXL       $0x02, SI, DI
  1521	ANDL        DX, BP
  1522	ANDL        AX, SI
  1523	ORL         BP, SI
  1524	ADDL        R12, BX
  1525	ADDL        468(R15), CX
  1526	VPXOR       Y13, Y14, Y14
  1527	LEAL        (CX)(SI*1), CX
  1528	MOVL        DI, BP
  1529	ORL         BX, BP
  1530	RORXL       $0x1b, BX, R12
  1531	RORXL       $0x02, BX, SI
  1532	ANDL        AX, BP
  1533	ANDL        DI, BX
  1534	ORL         BP, BX
  1535	ADDL        R12, CX
  1536	ADDL        472(R15), DX
  1537	VPXOR       Y7, Y0, Y0
  1538	LEAL        (DX)(BX*1), DX
  1539	MOVL        SI, BP
  1540	ORL         CX, BP
  1541	RORXL       $0x1b, CX, R12
  1542	RORXL       $0x02, CX, BX
  1543	ANDL        DI, BP
  1544	ANDL        SI, CX
  1545	ORL         BP, CX
  1546	ADDL        R12, DX
  1547	ADDL        476(R15), AX
  1548	LEAL        (AX)(CX*1), AX
  1549	RORXL       $0x1b, DX, R12
  1550	RORXL       $0x02, DX, CX
  1551	VPXOR       Y0, Y14, Y14
  1552	XORL        BX, DX
  1553	ADDL        R12, AX
  1554	XORL        SI, DX
  1555	ADDQ        $0x80, R13
  1556	CMPQ        R13, R11
  1557	CMOVQCC     R8, R10
  1558	ADDL        496(R15), DI
  1559	LEAL        (DI)(DX*1), DI
  1560	RORXL       $0x1b, AX, R12
  1561	RORXL       $0x02, AX, DX
  1562	VPSLLD      $0x02, Y14, Y0
  1563	XORL        CX, AX
  1564	ADDL        R12, DI
  1565	XORL        BX, AX
  1566	ADDL        500(R15), SI
  1567	LEAL        (SI)(AX*1), SI
  1568	RORXL       $0x1b, DI, R12
  1569	RORXL       $0x02, DI, AX
  1570	VPSRLD      $0x1e, Y14, Y14
  1571	VPOR        Y14, Y0, Y14
  1572	XORL        DX, DI
  1573	ADDL        R12, SI
  1574	XORL        CX, DI
  1575	ADDL        504(R15), BX
  1576	LEAL        (BX)(DI*1), BX
  1577	RORXL       $0x1b, SI, R12
  1578	RORXL       $0x02, SI, DI
  1579	XORL        AX, SI
  1580	ADDL        R12, BX
  1581	XORL        DX, SI
  1582	ADDL        508(R15), CX
  1583	LEAL        (CX)(SI*1), CX
  1584	RORXL       $0x1b, BX, R12
  1585	RORXL       $0x02, BX, SI
  1586	VPADDD      96(R8), Y14, Y0
  1587	VMOVDQU     Y0, 544(R14)
  1588	XORL        DI, BX
  1589	ADDL        R12, CX
  1590	XORL        AX, BX
  1591	ADDL        528(R15), DX
  1592	LEAL        (DX)(BX*1), DX
  1593	RORXL       $0x1b, CX, R12
  1594	RORXL       $0x02, CX, BX
  1595	VPALIGNR    $0x08, Y15, Y14, Y0
  1596	XORL        SI, CX
  1597	ADDL        R12, DX
  1598	XORL        DI, CX
  1599	ADDL        532(R15), AX
  1600	LEAL        (AX)(CX*1), AX
  1601	RORXL       $0x1b, DX, R12
  1602	RORXL       $0x02, DX, CX
  1603	VPXOR       Y12, Y13, Y13
  1604	XORL        BX, DX
  1605	ADDL        R12, AX
  1606	XORL        SI, DX
  1607	ADDL        536(R15), DI
  1608	LEAL        (DI)(DX*1), DI
  1609	RORXL       $0x1b, AX, R12
  1610	RORXL       $0x02, AX, DX
  1611	VPXOR       Y5, Y0, Y0
  1612	XORL        CX, AX
  1613	ADDL        R12, DI
  1614	XORL        BX, AX
  1615	ADDL        540(R15), SI
  1616	LEAL        (SI)(AX*1), SI
  1617	RORXL       $0x1b, DI, R12
  1618	RORXL       $0x02, DI, AX
  1619	VPXOR       Y0, Y13, Y13
  1620	XORL        DX, DI
  1621	ADDL        R12, SI
  1622	XORL        CX, DI
  1623	ADDL        560(R15), BX
  1624	LEAL        (BX)(DI*1), BX
  1625	RORXL       $0x1b, SI, R12
  1626	RORXL       $0x02, SI, DI
  1627	VPSLLD      $0x02, Y13, Y0
  1628	XORL        AX, SI
  1629	ADDL        R12, BX
  1630	XORL        DX, SI
  1631	ADDL        564(R15), CX
  1632	LEAL        (CX)(SI*1), CX
  1633	RORXL       $0x1b, BX, R12
  1634	RORXL       $0x02, BX, SI
  1635	VPSRLD      $0x1e, Y13, Y13
  1636	VPOR        Y13, Y0, Y13
  1637	XORL        DI, BX
  1638	ADDL        R12, CX
  1639	XORL        AX, BX
  1640	ADDL        568(R15), DX
  1641	LEAL        (DX)(BX*1), DX
  1642	RORXL       $0x1b, CX, R12
  1643	RORXL       $0x02, CX, BX
  1644	XORL        SI, CX
  1645	ADDL        R12, DX
  1646	XORL        DI, CX
  1647	ADDL        572(R15), AX
  1648	LEAL        (AX)(CX*1), AX
  1649	RORXL       $0x1b, DX, R12
  1650	RORXL       $0x02, DX, CX
  1651	VPADDD      96(R8), Y13, Y0
  1652	VMOVDQU     Y0, 576(R14)
  1653	XORL        BX, DX
  1654	ADDL        R12, AX
  1655	XORL        SI, DX
  1656	ADDL        592(R15), DI
  1657	LEAL        (DI)(DX*1), DI
  1658	RORXL       $0x1b, AX, R12
  1659	RORXL       $0x02, AX, DX
  1660	VPALIGNR    $0x08, Y14, Y13, Y0
  1661	XORL        CX, AX
  1662	ADDL        R12, DI
  1663	XORL        BX, AX
  1664	ADDL        596(R15), SI
  1665	LEAL        (SI)(AX*1), SI
  1666	RORXL       $0x1b, DI, R12
  1667	RORXL       $0x02, DI, AX
  1668	VPXOR       Y8, Y12, Y12
  1669	XORL        DX, DI
  1670	ADDL        R12, SI
  1671	XORL        CX, DI
  1672	ADDL        600(R15), BX
  1673	LEAL        (BX)(DI*1), BX
  1674	RORXL       $0x1b, SI, R12
  1675	RORXL       $0x02, SI, DI
  1676	VPXOR       Y3, Y0, Y0
  1677	XORL        AX, SI
  1678	ADDL        R12, BX
  1679	XORL        DX, SI
  1680	ADDL        604(R15), CX
  1681	LEAL        (CX)(SI*1), CX
  1682	RORXL       $0x1b, BX, R12
  1683	RORXL       $0x02, BX, SI
  1684	VPXOR       Y0, Y12, Y12
  1685	XORL        DI, BX
  1686	ADDL        R12, CX
  1687	XORL        AX, BX
  1688	ADDL        624(R15), DX
  1689	LEAL        (DX)(BX*1), DX
  1690	RORXL       $0x1b, CX, R12
  1691	RORXL       $0x02, CX, BX
  1692	VPSLLD      $0x02, Y12, Y0
  1693	XORL        SI, CX
  1694	ADDL        R12, DX
  1695	XORL        DI, CX
  1696	ADDL        628(R15), AX
  1697	LEAL        (AX)(CX*1), AX
  1698	RORXL       $0x1b, DX, R12
  1699	RORXL       $0x02, DX, CX
  1700	VPSRLD      $0x1e, Y12, Y12
  1701	VPOR        Y12, Y0, Y12
  1702	XORL        BX, DX
  1703	ADDL        R12, AX
  1704	XORL        SI, DX
  1705	ADDL        632(R15), DI
  1706	LEAL        (DI)(DX*1), DI
  1707	RORXL       $0x1b, AX, R12
  1708	RORXL       $0x02, AX, DX
  1709	XORL        CX, AX
  1710	ADDL        R12, DI
  1711	XORL        BX, AX
  1712	ADDL        636(R15), SI
  1713	LEAL        (SI)(AX*1), SI
  1714	RORXL       $0x1b, DI, R12
  1715	VPADDD      96(R8), Y12, Y0
  1716	VMOVDQU     Y0, 608(R14)
  1717	ADDL        R12, SI
  1718	ADDL        (R9), SI
  1719	MOVL        SI, (R9)
  1720	ADDL        4(R9), DI
  1721	MOVL        DI, 4(R9)
  1722	ADDL        8(R9), DX
  1723	MOVL        DX, 8(R9)
  1724	ADDL        12(R9), CX
  1725	MOVL        CX, 12(R9)
  1726	ADDL        16(R9), BX
  1727	MOVL        BX, 16(R9)
  1728	MOVL        SI, R12
  1729	MOVL        DI, SI
  1730	MOVL        DX, DI
  1731	MOVL        BX, DX
  1732	MOVL        CX, AX
  1733	MOVL        R12, CX
  1734	XCHGQ       R15, R14
  1735	JMP         loop
  1736
  1737DATA K_XMM_AR<>+0(SB)/4, $0x5a827999
  1738DATA K_XMM_AR<>+4(SB)/4, $0x5a827999
  1739DATA K_XMM_AR<>+8(SB)/4, $0x5a827999
  1740DATA K_XMM_AR<>+12(SB)/4, $0x5a827999
  1741DATA K_XMM_AR<>+16(SB)/4, $0x5a827999
  1742DATA K_XMM_AR<>+20(SB)/4, $0x5a827999
  1743DATA K_XMM_AR<>+24(SB)/4, $0x5a827999
  1744DATA K_XMM_AR<>+28(SB)/4, $0x5a827999
  1745DATA K_XMM_AR<>+32(SB)/4, $0x6ed9eba1
  1746DATA K_XMM_AR<>+36(SB)/4, $0x6ed9eba1
  1747DATA K_XMM_AR<>+40(SB)/4, $0x6ed9eba1
  1748DATA K_XMM_AR<>+44(SB)/4, $0x6ed9eba1
  1749DATA K_XMM_AR<>+48(SB)/4, $0x6ed9eba1
  1750DATA K_XMM_AR<>+52(SB)/4, $0x6ed9eba1
  1751DATA K_XMM_AR<>+56(SB)/4, $0x6ed9eba1
  1752DATA K_XMM_AR<>+60(SB)/4, $0x6ed9eba1
  1753DATA K_XMM_AR<>+64(SB)/4, $0x8f1bbcdc
  1754DATA K_XMM_AR<>+68(SB)/4, $0x8f1bbcdc
  1755DATA K_XMM_AR<>+72(SB)/4, $0x8f1bbcdc
  1756DATA K_XMM_AR<>+76(SB)/4, $0x8f1bbcdc
  1757DATA K_XMM_AR<>+80(SB)/4, $0x8f1bbcdc
  1758DATA K_XMM_AR<>+84(SB)/4, $0x8f1bbcdc
  1759DATA K_XMM_AR<>+88(SB)/4, $0x8f1bbcdc
  1760DATA K_XMM_AR<>+92(SB)/4, $0x8f1bbcdc
  1761DATA K_XMM_AR<>+96(SB)/4, $0xca62c1d6
  1762DATA K_XMM_AR<>+100(SB)/4, $0xca62c1d6
  1763DATA K_XMM_AR<>+104(SB)/4, $0xca62c1d6
  1764DATA K_XMM_AR<>+108(SB)/4, $0xca62c1d6
  1765DATA K_XMM_AR<>+112(SB)/4, $0xca62c1d6
  1766DATA K_XMM_AR<>+116(SB)/4, $0xca62c1d6
  1767DATA K_XMM_AR<>+120(SB)/4, $0xca62c1d6
  1768DATA K_XMM_AR<>+124(SB)/4, $0xca62c1d6
  1769GLOBL K_XMM_AR<>(SB), RODATA, $128
  1770
  1771DATA BSWAP_SHUFB_CTL<>+0(SB)/4, $0x00010203
  1772DATA BSWAP_SHUFB_CTL<>+4(SB)/4, $0x04050607
  1773DATA BSWAP_SHUFB_CTL<>+8(SB)/4, $0x08090a0b
  1774DATA BSWAP_SHUFB_CTL<>+12(SB)/4, $0x0c0d0e0f
  1775DATA BSWAP_SHUFB_CTL<>+16(SB)/4, $0x00010203
  1776DATA BSWAP_SHUFB_CTL<>+20(SB)/4, $0x04050607
  1777DATA BSWAP_SHUFB_CTL<>+24(SB)/4, $0x08090a0b
  1778DATA BSWAP_SHUFB_CTL<>+28(SB)/4, $0x0c0d0e0f
  1779GLOBL BSWAP_SHUFB_CTL<>(SB), RODATA, $32
  1780
  1781// func blockSHANI(dig *digest, p []byte)
  1782// Requires: AVX, SHA, SSE2, SSE4.1, SSSE3
  1783TEXT ·blockSHANI(SB), $48-32
  1784	MOVQ dig+0(FP), DI
  1785	MOVQ p_base+8(FP), SI
  1786	MOVQ p_len+16(FP), DX
  1787	CMPQ DX, $0x00
  1788	JEQ  done
  1789	ADDQ SI, DX
  1790
  1791	// Allocate space on the stack for saving ABCD and E0, and align it to 16 bytes
  1792	LEAQ 15(SP), AX
  1793	MOVQ $0x000000000000000f, CX
  1794	NOTQ CX
  1795	ANDQ CX, AX
  1796
  1797	// Load initial hash state
  1798	PINSRD  $0x03, 16(DI), X5
  1799	VMOVDQU (DI), X0
  1800	PAND    upper_mask<>+0(SB), X5
  1801	PSHUFD  $0x1b, X0, X0
  1802	VMOVDQA shuffle_mask<>+0(SB), X7
  1803
  1804loop:
  1805	// Save ABCD and E working values
  1806	VMOVDQA X5, (AX)
  1807	VMOVDQA X0, 16(AX)
  1808
  1809	// Rounds 0-3
  1810	VMOVDQU   (SI), X1
  1811	PSHUFB    X7, X1
  1812	PADDD     X1, X5
  1813	VMOVDQA   X0, X6
  1814	SHA1RNDS4 $0x00, X5, X0
  1815
  1816	// Rounds 4-7
  1817	VMOVDQU   16(SI), X2
  1818	PSHUFB    X7, X2
  1819	SHA1NEXTE X2, X6
  1820	VMOVDQA   X0, X5
  1821	SHA1RNDS4 $0x00, X6, X0
  1822	SHA1MSG1  X2, X1
  1823
  1824	// Rounds 8-11
  1825	VMOVDQU   32(SI), X3
  1826	PSHUFB    X7, X3
  1827	SHA1NEXTE X3, X5
  1828	VMOVDQA   X0, X6
  1829	SHA1RNDS4 $0x00, X5, X0
  1830	SHA1MSG1  X3, X2
  1831	PXOR      X3, X1
  1832
  1833	// Rounds 12-15
  1834	VMOVDQU   48(SI), X4
  1835	PSHUFB    X7, X4
  1836	SHA1NEXTE X4, X6
  1837	VMOVDQA   X0, X5
  1838	SHA1MSG2  X4, X1
  1839	SHA1RNDS4 $0x00, X6, X0
  1840	SHA1MSG1  X4, X3
  1841	PXOR      X4, X2
  1842
  1843	// Rounds 16-19
  1844	SHA1NEXTE X1, X5
  1845	VMOVDQA   X0, X6
  1846	SHA1MSG2  X1, X2
  1847	SHA1RNDS4 $0x00, X5, X0
  1848	SHA1MSG1  X1, X4
  1849	PXOR      X1, X3
  1850
  1851	// Rounds 20-23
  1852	SHA1NEXTE X2, X6
  1853	VMOVDQA   X0, X5
  1854	SHA1MSG2  X2, X3
  1855	SHA1RNDS4 $0x01, X6, X0
  1856	SHA1MSG1  X2, X1
  1857	PXOR      X2, X4
  1858
  1859	// Rounds 24-27
  1860	SHA1NEXTE X3, X5
  1861	VMOVDQA   X0, X6
  1862	SHA1MSG2  X3, X4
  1863	SHA1RNDS4 $0x01, X5, X0
  1864	SHA1MSG1  X3, X2
  1865	PXOR      X3, X1
  1866
  1867	// Rounds 28-31
  1868	SHA1NEXTE X4, X6
  1869	VMOVDQA   X0, X5
  1870	SHA1MSG2  X4, X1
  1871	SHA1RNDS4 $0x01, X6, X0
  1872	SHA1MSG1  X4, X3
  1873	PXOR      X4, X2
  1874
  1875	// Rounds 32-35
  1876	SHA1NEXTE X1, X5
  1877	VMOVDQA   X0, X6
  1878	SHA1MSG2  X1, X2
  1879	SHA1RNDS4 $0x01, X5, X0
  1880	SHA1MSG1  X1, X4
  1881	PXOR      X1, X3
  1882
  1883	// Rounds 36-39
  1884	SHA1NEXTE X2, X6
  1885	VMOVDQA   X0, X5
  1886	SHA1MSG2  X2, X3
  1887	SHA1RNDS4 $0x01, X6, X0
  1888	SHA1MSG1  X2, X1
  1889	PXOR      X2, X4
  1890
  1891	// Rounds 40-43
  1892	SHA1NEXTE X3, X5
  1893	VMOVDQA   X0, X6
  1894	SHA1MSG2  X3, X4
  1895	SHA1RNDS4 $0x02, X5, X0
  1896	SHA1MSG1  X3, X2
  1897	PXOR      X3, X1
  1898
  1899	// Rounds 44-47
  1900	SHA1NEXTE X4, X6
  1901	VMOVDQA   X0, X5
  1902	SHA1MSG2  X4, X1
  1903	SHA1RNDS4 $0x02, X6, X0
  1904	SHA1MSG1  X4, X3
  1905	PXOR      X4, X2
  1906
  1907	// Rounds 48-51
  1908	SHA1NEXTE X1, X5
  1909	VMOVDQA   X0, X6
  1910	SHA1MSG2  X1, X2
  1911	SHA1RNDS4 $0x02, X5, X0
  1912	SHA1MSG1  X1, X4
  1913	PXOR      X1, X3
  1914
  1915	// Rounds 52-55
  1916	SHA1NEXTE X2, X6
  1917	VMOVDQA   X0, X5
  1918	SHA1MSG2  X2, X3
  1919	SHA1RNDS4 $0x02, X6, X0
  1920	SHA1MSG1  X2, X1
  1921	PXOR      X2, X4
  1922
  1923	// Rounds 56-59
  1924	SHA1NEXTE X3, X5
  1925	VMOVDQA   X0, X6
  1926	SHA1MSG2  X3, X4
  1927	SHA1RNDS4 $0x02, X5, X0
  1928	SHA1MSG1  X3, X2
  1929	PXOR      X3, X1
  1930
  1931	// Rounds 60-63
  1932	SHA1NEXTE X4, X6
  1933	VMOVDQA   X0, X5
  1934	SHA1MSG2  X4, X1
  1935	SHA1RNDS4 $0x03, X6, X0
  1936	SHA1MSG1  X4, X3
  1937	PXOR      X4, X2
  1938
  1939	// Rounds 64-67
  1940	SHA1NEXTE X1, X5
  1941	VMOVDQA   X0, X6
  1942	SHA1MSG2  X1, X2
  1943	SHA1RNDS4 $0x03, X5, X0
  1944	SHA1MSG1  X1, X4
  1945	PXOR      X1, X3
  1946
  1947	// Rounds 68-71
  1948	SHA1NEXTE X2, X6
  1949	VMOVDQA   X0, X5
  1950	SHA1MSG2  X2, X3
  1951	SHA1RNDS4 $0x03, X6, X0
  1952	PXOR      X2, X4
  1953
  1954	// Rounds 72-75
  1955	SHA1NEXTE X3, X5
  1956	VMOVDQA   X0, X6
  1957	SHA1MSG2  X3, X4
  1958	SHA1RNDS4 $0x03, X5, X0
  1959
  1960	// Rounds 76-79
  1961	SHA1NEXTE X4, X6
  1962	VMOVDQA   X0, X5
  1963	SHA1RNDS4 $0x03, X6, X0
  1964
  1965	// Add saved E and ABCD
  1966	SHA1NEXTE (AX), X5
  1967	PADDD     16(AX), X0
  1968
  1969	// Check if we are done, if not return to the loop
  1970	ADDQ $0x40, SI
  1971	CMPQ SI, DX
  1972	JNE  loop
  1973
  1974	// Write the hash state back to digest
  1975	PSHUFD  $0x1b, X0, X0
  1976	VMOVDQU X0, (DI)
  1977	PEXTRD  $0x03, X5, 16(DI)
  1978
  1979done:
  1980	RET
  1981
  1982DATA upper_mask<>+0(SB)/8, $0x0000000000000000
  1983DATA upper_mask<>+8(SB)/8, $0xffffffff00000000
  1984GLOBL upper_mask<>(SB), RODATA, $16
  1985
  1986DATA shuffle_mask<>+0(SB)/8, $0x08090a0b0c0d0e0f
  1987DATA shuffle_mask<>+8(SB)/8, $0x0001020304050607
  1988GLOBL shuffle_mask<>(SB), RODATA, $16

View as plain text