...

Text file src/crypto/internal/fips140/aes/gcm/gcm_amd64.s

Documentation: crypto/internal/fips140/aes/gcm

     1// Code generated by command: go run gcm_amd64_asm.go -out ../../gcm_amd64.s -pkg aes. DO NOT EDIT.
     2
     3//go:build !purego
     4
     5#include "textflag.h"
     6
     7// func gcmAesFinish(productTable *[256]byte, tagMask *[16]byte, T *[16]byte, pLen uint64, dLen uint64)
     8// Requires: PCLMULQDQ, SSE2, SSE4.1, SSSE3
     9TEXT ·gcmAesFinish(SB), NOSPLIT, $0-40
    10	MOVQ      productTable+0(FP), DI
    11	MOVQ      tagMask+8(FP), SI
    12	MOVQ      T+16(FP), DX
    13	MOVQ      pLen+24(FP), AX
    14	MOVQ      dLen+32(FP), CX
    15	MOVOU     (DX), X8
    16	MOVOU     (SI), X13
    17	MOVOU     bswapMask<>+0(SB), X15
    18	MOVOU     gcmPoly<>+0(SB), X14
    19	SHLQ      $0x03, AX
    20	SHLQ      $0x03, CX
    21	MOVQ      AX, X0
    22	PINSRQ    $0x01, CX, X0
    23	PXOR      X8, X0
    24	MOVOU     224(DI), X8
    25	MOVOU     240(DI), X10
    26	MOVOU     X8, X9
    27	PCLMULQDQ $0x00, X0, X8
    28	PCLMULQDQ $0x11, X0, X9
    29	PSHUFD    $0x4e, X0, X11
    30	PXOR      X0, X11
    31	PCLMULQDQ $0x00, X11, X10
    32	PXOR      X8, X10
    33	PXOR      X9, X10
    34	MOVOU     X10, X11
    35	PSRLDQ    $0x08, X10
    36	PSLLDQ    $0x08, X11
    37	PXOR      X10, X9
    38	PXOR      X11, X8
    39	MOVOU     X14, X11
    40	PCLMULQDQ $0x01, X8, X11
    41	PSHUFD    $0x4e, X8, X8
    42	PXOR      X11, X8
    43	MOVOU     X14, X11
    44	PCLMULQDQ $0x01, X8, X11
    45	PSHUFD    $0x4e, X8, X8
    46	PXOR      X11, X8
    47	PXOR      X9, X8
    48	PSHUFB    X15, X8
    49	PXOR      X13, X8
    50	MOVOU     X8, (DX)
    51	RET
    52
    53DATA bswapMask<>+0(SB)/8, $0x08090a0b0c0d0e0f
    54DATA bswapMask<>+8(SB)/8, $0x0001020304050607
    55GLOBL bswapMask<>(SB), RODATA|NOPTR, $16
    56
    57DATA gcmPoly<>+0(SB)/8, $0x0000000000000001
    58DATA gcmPoly<>+8(SB)/8, $0xc200000000000000
    59GLOBL gcmPoly<>(SB), RODATA|NOPTR, $16
    60
    61// func gcmAesInit(productTable *[256]byte, ks []uint32)
    62// Requires: AES, PCLMULQDQ, SSE2, SSSE3
    63TEXT ·gcmAesInit(SB), NOSPLIT, $0-32
    64	MOVQ  productTable+0(FP), DI
    65	MOVQ  ks_base+8(FP), SI
    66	MOVQ  ks_len+16(FP), DX
    67	SHRQ  $0x02, DX
    68	DECQ  DX
    69	MOVOU bswapMask<>+0(SB), X15
    70	MOVOU gcmPoly<>+0(SB), X14
    71
    72	// Encrypt block 0, with the AES key to generate the hash key H
    73	MOVOU  (SI), X0
    74	MOVOU  16(SI), X11
    75	AESENC X11, X0
    76	MOVOU  32(SI), X11
    77	AESENC X11, X0
    78	MOVOU  48(SI), X11
    79	AESENC X11, X0
    80	MOVOU  64(SI), X11
    81	AESENC X11, X0
    82	MOVOU  80(SI), X11
    83	AESENC X11, X0
    84	MOVOU  96(SI), X11
    85	AESENC X11, X0
    86	MOVOU  112(SI), X11
    87	AESENC X11, X0
    88	MOVOU  128(SI), X11
    89	AESENC X11, X0
    90	MOVOU  144(SI), X11
    91	AESENC X11, X0
    92	MOVOU  160(SI), X11
    93	CMPQ   DX, $0x0c
    94	JB     initEncLast
    95	AESENC X11, X0
    96	MOVOU  176(SI), X11
    97	AESENC X11, X0
    98	MOVOU  192(SI), X11
    99	JE     initEncLast
   100	AESENC X11, X0
   101	MOVOU  208(SI), X11
   102	AESENC X11, X0
   103	MOVOU  224(SI), X11
   104
   105initEncLast:
   106	AESENCLAST X11, X0
   107	PSHUFB     X15, X0
   108
   109	// H * 2
   110	PSHUFD $0xff, X0, X11
   111	MOVOU  X0, X12
   112	PSRAL  $0x1f, X11
   113	PAND   X14, X11
   114	PSRLL  $0x1f, X12
   115	PSLLDQ $0x04, X12
   116	PSLLL  $0x01, X0
   117	PXOR   X11, X0
   118	PXOR   X12, X0
   119
   120	// Karatsuba pre-computations
   121	MOVOU  X0, 224(DI)
   122	PSHUFD $0x4e, X0, X1
   123	PXOR   X0, X1
   124	MOVOU  X1, 240(DI)
   125	MOVOU  X0, X2
   126	MOVOU  X1, X3
   127
   128	// Now prepare powers of H and pre-computations for them
   129	MOVQ $0x00000007, AX
   130
   131initLoop:
   132	MOVOU     X2, X11
   133	MOVOU     X2, X12
   134	MOVOU     X3, X13
   135	PCLMULQDQ $0x00, X0, X11
   136	PCLMULQDQ $0x11, X0, X12
   137	PCLMULQDQ $0x00, X1, X13
   138	PXOR      X11, X13
   139	PXOR      X12, X13
   140	MOVOU     X13, X4
   141	PSLLDQ    $0x08, X4
   142	PSRLDQ    $0x08, X13
   143	PXOR      X4, X11
   144	PXOR      X13, X12
   145	MOVOU     X14, X2
   146	PCLMULQDQ $0x01, X11, X2
   147	PSHUFD    $0x4e, X11, X11
   148	PXOR      X2, X11
   149	MOVOU     X14, X2
   150	PCLMULQDQ $0x01, X11, X2
   151	PSHUFD    $0x4e, X11, X11
   152	PXOR      X11, X2
   153	PXOR      X12, X2
   154	MOVOU     X2, 192(DI)
   155	PSHUFD    $0x4e, X2, X3
   156	PXOR      X2, X3
   157	MOVOU     X3, 208(DI)
   158	DECQ      AX
   159	LEAQ      -32(DI), DI
   160	JNE       initLoop
   161	RET
   162
   163// func gcmAesData(productTable *[256]byte, data []byte, T *[16]byte)
   164// Requires: PCLMULQDQ, SSE2, SSE4.1, SSSE3
   165TEXT ·gcmAesData(SB), NOSPLIT, $0-40
   166	MOVQ  productTable+0(FP), DI
   167	MOVQ  data_base+8(FP), SI
   168	MOVQ  data_len+16(FP), DX
   169	MOVQ  T+32(FP), CX
   170	PXOR  X8, X8
   171	MOVOU bswapMask<>+0(SB), X15
   172	MOVOU gcmPoly<>+0(SB), X14
   173	TESTQ DX, DX
   174	JEQ   dataBail
   175	CMPQ  DX, $0x0d
   176	JE    dataTLS
   177	CMPQ  DX, $0x80
   178	JB    startSinglesLoop
   179	JMP   dataOctaLoop
   180
   181dataTLS:
   182	MOVOU  224(DI), X12
   183	MOVOU  240(DI), X13
   184	PXOR   X0, X0
   185	MOVQ   (SI), X0
   186	PINSRD $0x02, 8(SI), X0
   187	PINSRB $0x0c, 12(SI), X0
   188	XORQ   DX, DX
   189	JMP    dataMul
   190
   191dataOctaLoop:
   192	CMPQ      DX, $0x80
   193	JB        startSinglesLoop
   194	SUBQ      $0x80, DX
   195	MOVOU     (SI), X0
   196	MOVOU     16(SI), X1
   197	MOVOU     32(SI), X2
   198	MOVOU     48(SI), X3
   199	MOVOU     64(SI), X4
   200	MOVOU     80(SI), X5
   201	MOVOU     96(SI), X6
   202	MOVOU     112(SI), X7
   203	LEAQ      128(SI), SI
   204	PSHUFB    X15, X0
   205	PSHUFB    X15, X1
   206	PSHUFB    X15, X2
   207	PSHUFB    X15, X3
   208	PSHUFB    X15, X4
   209	PSHUFB    X15, X5
   210	PSHUFB    X15, X6
   211	PSHUFB    X15, X7
   212	PXOR      X8, X0
   213	MOVOU     (DI), X8
   214	MOVOU     16(DI), X10
   215	MOVOU     X8, X9
   216	PSHUFD    $0x4e, X0, X12
   217	PXOR      X0, X12
   218	PCLMULQDQ $0x00, X0, X8
   219	PCLMULQDQ $0x11, X0, X9
   220	PCLMULQDQ $0x00, X12, X10
   221	MOVOU     32(DI), X12
   222	MOVOU     X12, X13
   223	PCLMULQDQ $0x00, X1, X12
   224	PXOR      X12, X8
   225	PCLMULQDQ $0x11, X1, X13
   226	PXOR      X13, X9
   227	PSHUFD    $0x4e, X1, X12
   228	PXOR      X12, X1
   229	MOVOU     48(DI), X12
   230	PCLMULQDQ $0x00, X1, X12
   231	PXOR      X12, X10
   232	MOVOU     64(DI), X12
   233	MOVOU     X12, X13
   234	PCLMULQDQ $0x00, X2, X12
   235	PXOR      X12, X8
   236	PCLMULQDQ $0x11, X2, X13
   237	PXOR      X13, X9
   238	PSHUFD    $0x4e, X2, X12
   239	PXOR      X12, X2
   240	MOVOU     80(DI), X12
   241	PCLMULQDQ $0x00, X2, X12
   242	PXOR      X12, X10
   243	MOVOU     96(DI), X12
   244	MOVOU     X12, X13
   245	PCLMULQDQ $0x00, X3, X12
   246	PXOR      X12, X8
   247	PCLMULQDQ $0x11, X3, X13
   248	PXOR      X13, X9
   249	PSHUFD    $0x4e, X3, X12
   250	PXOR      X12, X3
   251	MOVOU     112(DI), X12
   252	PCLMULQDQ $0x00, X3, X12
   253	PXOR      X12, X10
   254	MOVOU     128(DI), X12
   255	MOVOU     X12, X13
   256	PCLMULQDQ $0x00, X4, X12
   257	PXOR      X12, X8
   258	PCLMULQDQ $0x11, X4, X13
   259	PXOR      X13, X9
   260	PSHUFD    $0x4e, X4, X12
   261	PXOR      X12, X4
   262	MOVOU     144(DI), X12
   263	PCLMULQDQ $0x00, X4, X12
   264	PXOR      X12, X10
   265	MOVOU     160(DI), X12
   266	MOVOU     X12, X13
   267	PCLMULQDQ $0x00, X5, X12
   268	PXOR      X12, X8
   269	PCLMULQDQ $0x11, X5, X13
   270	PXOR      X13, X9
   271	PSHUFD    $0x4e, X5, X12
   272	PXOR      X12, X5
   273	MOVOU     176(DI), X12
   274	PCLMULQDQ $0x00, X5, X12
   275	PXOR      X12, X10
   276	MOVOU     192(DI), X12
   277	MOVOU     X12, X13
   278	PCLMULQDQ $0x00, X6, X12
   279	PXOR      X12, X8
   280	PCLMULQDQ $0x11, X6, X13
   281	PXOR      X13, X9
   282	PSHUFD    $0x4e, X6, X12
   283	PXOR      X12, X6
   284	MOVOU     208(DI), X12
   285	PCLMULQDQ $0x00, X6, X12
   286	PXOR      X12, X10
   287	MOVOU     224(DI), X12
   288	MOVOU     X12, X13
   289	PCLMULQDQ $0x00, X7, X12
   290	PXOR      X12, X8
   291	PCLMULQDQ $0x11, X7, X13
   292	PXOR      X13, X9
   293	PSHUFD    $0x4e, X7, X12
   294	PXOR      X12, X7
   295	MOVOU     240(DI), X12
   296	PCLMULQDQ $0x00, X7, X12
   297	PXOR      X12, X10
   298	PXOR      X8, X10
   299	PXOR      X9, X10
   300	MOVOU     X10, X11
   301	PSRLDQ    $0x08, X10
   302	PSLLDQ    $0x08, X11
   303	PXOR      X10, X9
   304	PXOR      X11, X8
   305	MOVOU     X14, X11
   306	PCLMULQDQ $0x01, X8, X11
   307	PSHUFD    $0x4e, X8, X8
   308	PXOR      X11, X8
   309	MOVOU     X14, X11
   310	PCLMULQDQ $0x01, X8, X11
   311	PSHUFD    $0x4e, X8, X8
   312	PXOR      X11, X8
   313	PXOR      X9, X8
   314	JMP       dataOctaLoop
   315
   316startSinglesLoop:
   317	MOVOU 224(DI), X12
   318	MOVOU 240(DI), X13
   319
   320dataSinglesLoop:
   321	CMPQ  DX, $0x10
   322	JB    dataEnd
   323	SUBQ  $0x10, DX
   324	MOVOU (SI), X0
   325
   326dataMul:
   327	PSHUFB    X15, X0
   328	PXOR      X8, X0
   329	MOVOU     X12, X8
   330	MOVOU     X13, X10
   331	MOVOU     X12, X9
   332	PSHUFD    $0x4e, X0, X11
   333	PXOR      X0, X11
   334	PCLMULQDQ $0x00, X0, X8
   335	PCLMULQDQ $0x11, X0, X9
   336	PCLMULQDQ $0x00, X11, X10
   337	PXOR      X8, X10
   338	PXOR      X9, X10
   339	MOVOU     X10, X11
   340	PSRLDQ    $0x08, X10
   341	PSLLDQ    $0x08, X11
   342	PXOR      X10, X9
   343	PXOR      X11, X8
   344	MOVOU     X14, X11
   345	PCLMULQDQ $0x01, X8, X11
   346	PSHUFD    $0x4e, X8, X8
   347	PXOR      X11, X8
   348	MOVOU     X14, X11
   349	PCLMULQDQ $0x01, X8, X11
   350	PSHUFD    $0x4e, X8, X8
   351	PXOR      X11, X8
   352	PXOR      X9, X8
   353	LEAQ      16(SI), SI
   354	JMP       dataSinglesLoop
   355
   356dataEnd:
   357	TESTQ DX, DX
   358	JEQ   dataBail
   359	PXOR  X0, X0
   360	LEAQ  -1(SI)(DX*1), SI
   361
   362dataLoadLoop:
   363	PSLLDQ $0x01, X0
   364	PINSRB $0x00, (SI), X0
   365	LEAQ   -1(SI), SI
   366	DECQ   DX
   367	JNE    dataLoadLoop
   368	JMP    dataMul
   369
   370dataBail:
   371	MOVOU X8, (CX)
   372	RET
   373
   374// func gcmAesEnc(productTable *[256]byte, dst []byte, src []byte, ctr *[16]byte, T *[16]byte, ks []uint32)
   375// Requires: AES, PCLMULQDQ, SSE2, SSE4.1, SSSE3
   376TEXT ·gcmAesEnc(SB), $256-96
   377	MOVQ   productTable+0(FP), DI
   378	MOVQ   dst_base+8(FP), DX
   379	MOVQ   src_base+32(FP), SI
   380	MOVQ   src_len+40(FP), R9
   381	MOVQ   ctr+56(FP), CX
   382	MOVQ   T+64(FP), R8
   383	MOVQ   ks_base+72(FP), AX
   384	MOVQ   ks_len+80(FP), R13
   385	SHRQ   $0x02, R13
   386	DECQ   R13
   387	MOVOU  bswapMask<>+0(SB), X15
   388	MOVOU  gcmPoly<>+0(SB), X14
   389	MOVOU  (R8), X8
   390	PXOR   X9, X9
   391	PXOR   X10, X10
   392	MOVOU  (CX), X0
   393	MOVL   12(CX), R10
   394	MOVOU  (AX), X11
   395	MOVL   12(AX), R12
   396	BSWAPL R10
   397	BSWAPL R12
   398	PXOR   X0, X11
   399	MOVOU  X11, 128(SP)
   400	ADDL   $0x01, R10
   401	MOVL   R10, R11
   402	XORL   R12, R11
   403	BSWAPL R11
   404	MOVL   R11, 140(SP)
   405	CMPQ   R9, $0x80
   406	JB     gcmAesEncSingles
   407	SUBQ   $0x80, R9
   408
   409	// We have at least 8 blocks to encrypt, prepare the rest of the counters
   410	MOVOU  X11, 144(SP)
   411	ADDL   $0x01, R10
   412	MOVL   R10, R11
   413	XORL   R12, R11
   414	BSWAPL R11
   415	MOVL   R11, 156(SP)
   416	MOVOU  X11, 160(SP)
   417	ADDL   $0x01, R10
   418	MOVL   R10, R11
   419	XORL   R12, R11
   420	BSWAPL R11
   421	MOVL   R11, 172(SP)
   422	MOVOU  X11, 176(SP)
   423	ADDL   $0x01, R10
   424	MOVL   R10, R11
   425	XORL   R12, R11
   426	BSWAPL R11
   427	MOVL   R11, 188(SP)
   428	MOVOU  X11, 192(SP)
   429	ADDL   $0x01, R10
   430	MOVL   R10, R11
   431	XORL   R12, R11
   432	BSWAPL R11
   433	MOVL   R11, 204(SP)
   434	MOVOU  X11, 208(SP)
   435	ADDL   $0x01, R10
   436	MOVL   R10, R11
   437	XORL   R12, R11
   438	BSWAPL R11
   439	MOVL   R11, 220(SP)
   440	MOVOU  X11, 224(SP)
   441	ADDL   $0x01, R10
   442	MOVL   R10, R11
   443	XORL   R12, R11
   444	BSWAPL R11
   445	MOVL   R11, 236(SP)
   446	MOVOU  X11, 240(SP)
   447	ADDL   $0x01, R10
   448	MOVL   R10, R11
   449	XORL   R12, R11
   450	BSWAPL R11
   451	MOVL   R11, 252(SP)
   452	MOVOU  128(SP), X0
   453	MOVOU  144(SP), X1
   454	MOVOU  160(SP), X2
   455	MOVOU  176(SP), X3
   456	MOVOU  192(SP), X4
   457	MOVOU  208(SP), X5
   458	MOVOU  224(SP), X6
   459	MOVOU  240(SP), X7
   460	MOVOU  16(AX), X11
   461	AESENC X11, X0
   462	AESENC X11, X1
   463	AESENC X11, X2
   464	AESENC X11, X3
   465	AESENC X11, X4
   466	AESENC X11, X5
   467	AESENC X11, X6
   468	AESENC X11, X7
   469	ADDL   $0x01, R10
   470	MOVL   R10, R11
   471	XORL   R12, R11
   472	BSWAPL R11
   473	MOVL   R11, 140(SP)
   474	MOVOU  32(AX), X11
   475	AESENC X11, X0
   476	AESENC X11, X1
   477	AESENC X11, X2
   478	AESENC X11, X3
   479	AESENC X11, X4
   480	AESENC X11, X5
   481	AESENC X11, X6
   482	AESENC X11, X7
   483	ADDL   $0x01, R10
   484	MOVL   R10, R11
   485	XORL   R12, R11
   486	BSWAPL R11
   487	MOVL   R11, 156(SP)
   488	MOVOU  48(AX), X11
   489	AESENC X11, X0
   490	AESENC X11, X1
   491	AESENC X11, X2
   492	AESENC X11, X3
   493	AESENC X11, X4
   494	AESENC X11, X5
   495	AESENC X11, X6
   496	AESENC X11, X7
   497	ADDL   $0x01, R10
   498	MOVL   R10, R11
   499	XORL   R12, R11
   500	BSWAPL R11
   501	MOVL   R11, 172(SP)
   502	MOVOU  64(AX), X11
   503	AESENC X11, X0
   504	AESENC X11, X1
   505	AESENC X11, X2
   506	AESENC X11, X3
   507	AESENC X11, X4
   508	AESENC X11, X5
   509	AESENC X11, X6
   510	AESENC X11, X7
   511	ADDL   $0x01, R10
   512	MOVL   R10, R11
   513	XORL   R12, R11
   514	BSWAPL R11
   515	MOVL   R11, 188(SP)
   516	MOVOU  80(AX), X11
   517	AESENC X11, X0
   518	AESENC X11, X1
   519	AESENC X11, X2
   520	AESENC X11, X3
   521	AESENC X11, X4
   522	AESENC X11, X5
   523	AESENC X11, X6
   524	AESENC X11, X7
   525	ADDL   $0x01, R10
   526	MOVL   R10, R11
   527	XORL   R12, R11
   528	BSWAPL R11
   529	MOVL   R11, 204(SP)
   530	MOVOU  96(AX), X11
   531	AESENC X11, X0
   532	AESENC X11, X1
   533	AESENC X11, X2
   534	AESENC X11, X3
   535	AESENC X11, X4
   536	AESENC X11, X5
   537	AESENC X11, X6
   538	AESENC X11, X7
   539	ADDL   $0x01, R10
   540	MOVL   R10, R11
   541	XORL   R12, R11
   542	BSWAPL R11
   543	MOVL   R11, 220(SP)
   544	MOVOU  112(AX), X11
   545	AESENC X11, X0
   546	AESENC X11, X1
   547	AESENC X11, X2
   548	AESENC X11, X3
   549	AESENC X11, X4
   550	AESENC X11, X5
   551	AESENC X11, X6
   552	AESENC X11, X7
   553	ADDL   $0x01, R10
   554	MOVL   R10, R11
   555	XORL   R12, R11
   556	BSWAPL R11
   557	MOVL   R11, 236(SP)
   558	MOVOU  128(AX), X11
   559	AESENC X11, X0
   560	AESENC X11, X1
   561	AESENC X11, X2
   562	AESENC X11, X3
   563	AESENC X11, X4
   564	AESENC X11, X5
   565	AESENC X11, X6
   566	AESENC X11, X7
   567	ADDL   $0x01, R10
   568	MOVL   R10, R11
   569	XORL   R12, R11
   570	BSWAPL R11
   571	MOVL   R11, 252(SP)
   572	MOVOU  144(AX), X11
   573	AESENC X11, X0
   574	AESENC X11, X1
   575	AESENC X11, X2
   576	AESENC X11, X3
   577	AESENC X11, X4
   578	AESENC X11, X5
   579	AESENC X11, X6
   580	AESENC X11, X7
   581	MOVOU  160(AX), X11
   582	CMPQ   R13, $0x0c
   583	JB     encLast1
   584	AESENC X11, X0
   585	AESENC X11, X1
   586	AESENC X11, X2
   587	AESENC X11, X3
   588	AESENC X11, X4
   589	AESENC X11, X5
   590	AESENC X11, X6
   591	AESENC X11, X7
   592	MOVOU  176(AX), X11
   593	AESENC X11, X0
   594	AESENC X11, X1
   595	AESENC X11, X2
   596	AESENC X11, X3
   597	AESENC X11, X4
   598	AESENC X11, X5
   599	AESENC X11, X6
   600	AESENC X11, X7
   601	MOVOU  192(AX), X11
   602	JE     encLast1
   603	AESENC X11, X0
   604	AESENC X11, X1
   605	AESENC X11, X2
   606	AESENC X11, X3
   607	AESENC X11, X4
   608	AESENC X11, X5
   609	AESENC X11, X6
   610	AESENC X11, X7
   611	MOVOU  208(AX), X11
   612	AESENC X11, X0
   613	AESENC X11, X1
   614	AESENC X11, X2
   615	AESENC X11, X3
   616	AESENC X11, X4
   617	AESENC X11, X5
   618	AESENC X11, X6
   619	AESENC X11, X7
   620	MOVOU  224(AX), X11
   621
   622encLast1:
   623	AESENCLAST X11, X0
   624	AESENCLAST X11, X1
   625	AESENCLAST X11, X2
   626	AESENCLAST X11, X3
   627	AESENCLAST X11, X4
   628	AESENCLAST X11, X5
   629	AESENCLAST X11, X6
   630	AESENCLAST X11, X7
   631	MOVOU      (SI), X11
   632	PXOR       X11, X0
   633	MOVOU      16(SI), X11
   634	PXOR       X11, X1
   635	MOVOU      32(SI), X11
   636	PXOR       X11, X2
   637	MOVOU      48(SI), X11
   638	PXOR       X11, X3
   639	MOVOU      64(SI), X11
   640	PXOR       X11, X4
   641	MOVOU      80(SI), X11
   642	PXOR       X11, X5
   643	MOVOU      96(SI), X11
   644	PXOR       X11, X6
   645	MOVOU      112(SI), X11
   646	PXOR       X11, X7
   647	MOVOU      X0, (DX)
   648	PSHUFB     X15, X0
   649	PXOR       X8, X0
   650	MOVOU      X1, 16(DX)
   651	PSHUFB     X15, X1
   652	MOVOU      X2, 32(DX)
   653	PSHUFB     X15, X2
   654	MOVOU      X3, 48(DX)
   655	PSHUFB     X15, X3
   656	MOVOU      X4, 64(DX)
   657	PSHUFB     X15, X4
   658	MOVOU      X5, 80(DX)
   659	PSHUFB     X15, X5
   660	MOVOU      X6, 96(DX)
   661	PSHUFB     X15, X6
   662	MOVOU      X7, 112(DX)
   663	PSHUFB     X15, X7
   664	MOVOU      X0, (SP)
   665	MOVOU      X1, 16(SP)
   666	MOVOU      X2, 32(SP)
   667	MOVOU      X3, 48(SP)
   668	MOVOU      X4, 64(SP)
   669	MOVOU      X5, 80(SP)
   670	MOVOU      X6, 96(SP)
   671	MOVOU      X7, 112(SP)
   672	LEAQ       128(SI), SI
   673	LEAQ       128(DX), DX
   674
   675gcmAesEncOctetsLoop:
   676	CMPQ      R9, $0x80
   677	JB        gcmAesEncOctetsEnd
   678	SUBQ      $0x80, R9
   679	MOVOU     128(SP), X0
   680	MOVOU     144(SP), X1
   681	MOVOU     160(SP), X2
   682	MOVOU     176(SP), X3
   683	MOVOU     192(SP), X4
   684	MOVOU     208(SP), X5
   685	MOVOU     224(SP), X6
   686	MOVOU     240(SP), X7
   687	MOVOU     (SP), X11
   688	PSHUFD    $0x4e, X11, X12
   689	PXOR      X11, X12
   690	MOVOU     (DI), X8
   691	MOVOU     16(DI), X10
   692	MOVOU     X8, X9
   693	PCLMULQDQ $0x00, X12, X10
   694	PCLMULQDQ $0x00, X11, X8
   695	PCLMULQDQ $0x11, X11, X9
   696	MOVOU     16(AX), X11
   697	AESENC    X11, X0
   698	AESENC    X11, X1
   699	AESENC    X11, X2
   700	AESENC    X11, X3
   701	MOVOU     32(DI), X12
   702	MOVOU     X12, X13
   703	AESENC    X11, X4
   704	AESENC    X11, X5
   705	AESENC    X11, X6
   706	AESENC    X11, X7
   707	MOVOU     16(SP), X11
   708	PCLMULQDQ $0x00, X11, X12
   709	PXOR      X12, X8
   710	PSHUFD    $0x4e, X11, X12
   711	PCLMULQDQ $0x11, X11, X13
   712	PXOR      X12, X11
   713	PXOR      X13, X9
   714	MOVOU     48(DI), X13
   715	PCLMULQDQ $0x00, X13, X11
   716	PXOR      X11, X10
   717	ADDL      $0x01, R10
   718	MOVL      R10, R11
   719	XORL      R12, R11
   720	BSWAPL    R11
   721	MOVL      R11, 140(SP)
   722	MOVOU     32(AX), X11
   723	AESENC    X11, X0
   724	AESENC    X11, X1
   725	AESENC    X11, X2
   726	AESENC    X11, X3
   727	MOVOU     64(DI), X12
   728	MOVOU     X12, X13
   729	AESENC    X11, X4
   730	AESENC    X11, X5
   731	AESENC    X11, X6
   732	AESENC    X11, X7
   733	MOVOU     32(SP), X11
   734	PCLMULQDQ $0x00, X11, X12
   735	PXOR      X12, X8
   736	PSHUFD    $0x4e, X11, X12
   737	PCLMULQDQ $0x11, X11, X13
   738	PXOR      X12, X11
   739	PXOR      X13, X9
   740	MOVOU     80(DI), X13
   741	PCLMULQDQ $0x00, X13, X11
   742	PXOR      X11, X10
   743	ADDL      $0x01, R10
   744	MOVL      R10, R11
   745	XORL      R12, R11
   746	BSWAPL    R11
   747	MOVL      R11, 156(SP)
   748	MOVOU     48(AX), X11
   749	AESENC    X11, X0
   750	AESENC    X11, X1
   751	AESENC    X11, X2
   752	AESENC    X11, X3
   753	MOVOU     96(DI), X12
   754	MOVOU     X12, X13
   755	AESENC    X11, X4
   756	AESENC    X11, X5
   757	AESENC    X11, X6
   758	AESENC    X11, X7
   759	MOVOU     48(SP), X11
   760	PCLMULQDQ $0x00, X11, X12
   761	PXOR      X12, X8
   762	PSHUFD    $0x4e, X11, X12
   763	PCLMULQDQ $0x11, X11, X13
   764	PXOR      X12, X11
   765	PXOR      X13, X9
   766	MOVOU     112(DI), X13
   767	PCLMULQDQ $0x00, X13, X11
   768	PXOR      X11, X10
   769	ADDL      $0x01, R10
   770	MOVL      R10, R11
   771	XORL      R12, R11
   772	BSWAPL    R11
   773	MOVL      R11, 172(SP)
   774	MOVOU     64(AX), X11
   775	AESENC    X11, X0
   776	AESENC    X11, X1
   777	AESENC    X11, X2
   778	AESENC    X11, X3
   779	MOVOU     128(DI), X12
   780	MOVOU     X12, X13
   781	AESENC    X11, X4
   782	AESENC    X11, X5
   783	AESENC    X11, X6
   784	AESENC    X11, X7
   785	MOVOU     64(SP), X11
   786	PCLMULQDQ $0x00, X11, X12
   787	PXOR      X12, X8
   788	PSHUFD    $0x4e, X11, X12
   789	PCLMULQDQ $0x11, X11, X13
   790	PXOR      X12, X11
   791	PXOR      X13, X9
   792	MOVOU     144(DI), X13
   793	PCLMULQDQ $0x00, X13, X11
   794	PXOR      X11, X10
   795	ADDL      $0x01, R10
   796	MOVL      R10, R11
   797	XORL      R12, R11
   798	BSWAPL    R11
   799	MOVL      R11, 188(SP)
   800	MOVOU     80(AX), X11
   801	AESENC    X11, X0
   802	AESENC    X11, X1
   803	AESENC    X11, X2
   804	AESENC    X11, X3
   805	MOVOU     160(DI), X12
   806	MOVOU     X12, X13
   807	AESENC    X11, X4
   808	AESENC    X11, X5
   809	AESENC    X11, X6
   810	AESENC    X11, X7
   811	MOVOU     80(SP), X11
   812	PCLMULQDQ $0x00, X11, X12
   813	PXOR      X12, X8
   814	PSHUFD    $0x4e, X11, X12
   815	PCLMULQDQ $0x11, X11, X13
   816	PXOR      X12, X11
   817	PXOR      X13, X9
   818	MOVOU     176(DI), X13
   819	PCLMULQDQ $0x00, X13, X11
   820	PXOR      X11, X10
   821	ADDL      $0x01, R10
   822	MOVL      R10, R11
   823	XORL      R12, R11
   824	BSWAPL    R11
   825	MOVL      R11, 204(SP)
   826	MOVOU     96(AX), X11
   827	AESENC    X11, X0
   828	AESENC    X11, X1
   829	AESENC    X11, X2
   830	AESENC    X11, X3
   831	MOVOU     192(DI), X12
   832	MOVOU     X12, X13
   833	AESENC    X11, X4
   834	AESENC    X11, X5
   835	AESENC    X11, X6
   836	AESENC    X11, X7
   837	MOVOU     96(SP), X11
   838	PCLMULQDQ $0x00, X11, X12
   839	PXOR      X12, X8
   840	PSHUFD    $0x4e, X11, X12
   841	PCLMULQDQ $0x11, X11, X13
   842	PXOR      X12, X11
   843	PXOR      X13, X9
   844	MOVOU     208(DI), X13
   845	PCLMULQDQ $0x00, X13, X11
   846	PXOR      X11, X10
   847	ADDL      $0x01, R10
   848	MOVL      R10, R11
   849	XORL      R12, R11
   850	BSWAPL    R11
   851	MOVL      R11, 220(SP)
   852	MOVOU     112(AX), X11
   853	AESENC    X11, X0
   854	AESENC    X11, X1
   855	AESENC    X11, X2
   856	AESENC    X11, X3
   857	MOVOU     224(DI), X12
   858	MOVOU     X12, X13
   859	AESENC    X11, X4
   860	AESENC    X11, X5
   861	AESENC    X11, X6
   862	AESENC    X11, X7
   863	MOVOU     112(SP), X11
   864	PCLMULQDQ $0x00, X11, X12
   865	PXOR      X12, X8
   866	PSHUFD    $0x4e, X11, X12
   867	PCLMULQDQ $0x11, X11, X13
   868	PXOR      X12, X11
   869	PXOR      X13, X9
   870	MOVOU     240(DI), X13
   871	PCLMULQDQ $0x00, X13, X11
   872	PXOR      X11, X10
   873	ADDL      $0x01, R10
   874	MOVL      R10, R11
   875	XORL      R12, R11
   876	BSWAPL    R11
   877	MOVL      R11, 236(SP)
   878	MOVOU     128(AX), X11
   879	AESENC    X11, X0
   880	AESENC    X11, X1
   881	AESENC    X11, X2
   882	AESENC    X11, X3
   883	AESENC    X11, X4
   884	AESENC    X11, X5
   885	AESENC    X11, X6
   886	AESENC    X11, X7
   887	ADDL      $0x01, R10
   888	MOVL      R10, R11
   889	XORL      R12, R11
   890	BSWAPL    R11
   891	MOVL      R11, 252(SP)
   892	PXOR      X8, X10
   893	PXOR      X9, X10
   894	MOVOU     X10, X11
   895	PSRLDQ    $0x08, X10
   896	PSLLDQ    $0x08, X11
   897	PXOR      X10, X9
   898	PXOR      X11, X8
   899	MOVOU     X14, X11
   900	PCLMULQDQ $0x01, X8, X11
   901	PSHUFD    $0x4e, X8, X8
   902	PXOR      X11, X8
   903	MOVOU     144(AX), X11
   904	AESENC    X11, X0
   905	AESENC    X11, X1
   906	AESENC    X11, X2
   907	AESENC    X11, X3
   908	AESENC    X11, X4
   909	AESENC    X11, X5
   910	AESENC    X11, X6
   911	AESENC    X11, X7
   912	MOVOU     X14, X11
   913	PCLMULQDQ $0x01, X8, X11
   914	PSHUFD    $0x4e, X8, X8
   915	PXOR      X11, X8
   916	PXOR      X9, X8
   917	MOVOU     160(AX), X11
   918	CMPQ      R13, $0x0c
   919	JB        encLast2
   920	AESENC    X11, X0
   921	AESENC    X11, X1
   922	AESENC    X11, X2
   923	AESENC    X11, X3
   924	AESENC    X11, X4
   925	AESENC    X11, X5
   926	AESENC    X11, X6
   927	AESENC    X11, X7
   928	MOVOU     176(AX), X11
   929	AESENC    X11, X0
   930	AESENC    X11, X1
   931	AESENC    X11, X2
   932	AESENC    X11, X3
   933	AESENC    X11, X4
   934	AESENC    X11, X5
   935	AESENC    X11, X6
   936	AESENC    X11, X7
   937	MOVOU     192(AX), X11
   938	JE        encLast2
   939	AESENC    X11, X0
   940	AESENC    X11, X1
   941	AESENC    X11, X2
   942	AESENC    X11, X3
   943	AESENC    X11, X4
   944	AESENC    X11, X5
   945	AESENC    X11, X6
   946	AESENC    X11, X7
   947	MOVOU     208(AX), X11
   948	AESENC    X11, X0
   949	AESENC    X11, X1
   950	AESENC    X11, X2
   951	AESENC    X11, X3
   952	AESENC    X11, X4
   953	AESENC    X11, X5
   954	AESENC    X11, X6
   955	AESENC    X11, X7
   956	MOVOU     224(AX), X11
   957
   958encLast2:
   959	AESENCLAST X11, X0
   960	AESENCLAST X11, X1
   961	AESENCLAST X11, X2
   962	AESENCLAST X11, X3
   963	AESENCLAST X11, X4
   964	AESENCLAST X11, X5
   965	AESENCLAST X11, X6
   966	AESENCLAST X11, X7
   967	MOVOU      (SI), X11
   968	PXOR       X11, X0
   969	MOVOU      16(SI), X11
   970	PXOR       X11, X1
   971	MOVOU      32(SI), X11
   972	PXOR       X11, X2
   973	MOVOU      48(SI), X11
   974	PXOR       X11, X3
   975	MOVOU      64(SI), X11
   976	PXOR       X11, X4
   977	MOVOU      80(SI), X11
   978	PXOR       X11, X5
   979	MOVOU      96(SI), X11
   980	PXOR       X11, X6
   981	MOVOU      112(SI), X11
   982	PXOR       X11, X7
   983	MOVOU      X0, (DX)
   984	PSHUFB     X15, X0
   985	PXOR       X8, X0
   986	MOVOU      X1, 16(DX)
   987	PSHUFB     X15, X1
   988	MOVOU      X2, 32(DX)
   989	PSHUFB     X15, X2
   990	MOVOU      X3, 48(DX)
   991	PSHUFB     X15, X3
   992	MOVOU      X4, 64(DX)
   993	PSHUFB     X15, X4
   994	MOVOU      X5, 80(DX)
   995	PSHUFB     X15, X5
   996	MOVOU      X6, 96(DX)
   997	PSHUFB     X15, X6
   998	MOVOU      X7, 112(DX)
   999	PSHUFB     X15, X7
  1000	MOVOU      X0, (SP)
  1001	MOVOU      X1, 16(SP)
  1002	MOVOU      X2, 32(SP)
  1003	MOVOU      X3, 48(SP)
  1004	MOVOU      X4, 64(SP)
  1005	MOVOU      X5, 80(SP)
  1006	MOVOU      X6, 96(SP)
  1007	MOVOU      X7, 112(SP)
  1008	LEAQ       128(SI), SI
  1009	LEAQ       128(DX), DX
  1010	JMP        gcmAesEncOctetsLoop
  1011
  1012gcmAesEncOctetsEnd:
  1013	MOVOU     (SP), X11
  1014	MOVOU     (DI), X8
  1015	MOVOU     16(DI), X10
  1016	MOVOU     X8, X9
  1017	PSHUFD    $0x4e, X11, X12
  1018	PXOR      X11, X12
  1019	PCLMULQDQ $0x00, X11, X8
  1020	PCLMULQDQ $0x11, X11, X9
  1021	PCLMULQDQ $0x00, X12, X10
  1022	MOVOU     16(SP), X11
  1023	MOVOU     32(DI), X12
  1024	MOVOU     X12, X13
  1025	PCLMULQDQ $0x00, X11, X12
  1026	PXOR      X12, X8
  1027	PCLMULQDQ $0x11, X11, X13
  1028	PXOR      X13, X9
  1029	PSHUFD    $0x4e, X11, X12
  1030	PXOR      X12, X11
  1031	MOVOU     48(DI), X12
  1032	PCLMULQDQ $0x00, X11, X12
  1033	PXOR      X12, X10
  1034	MOVOU     32(SP), X11
  1035	MOVOU     64(DI), X12
  1036	MOVOU     X12, X13
  1037	PCLMULQDQ $0x00, X11, X12
  1038	PXOR      X12, X8
  1039	PCLMULQDQ $0x11, X11, X13
  1040	PXOR      X13, X9
  1041	PSHUFD    $0x4e, X11, X12
  1042	PXOR      X12, X11
  1043	MOVOU     80(DI), X12
  1044	PCLMULQDQ $0x00, X11, X12
  1045	PXOR      X12, X10
  1046	MOVOU     48(SP), X11
  1047	MOVOU     96(DI), X12
  1048	MOVOU     X12, X13
  1049	PCLMULQDQ $0x00, X11, X12
  1050	PXOR      X12, X8
  1051	PCLMULQDQ $0x11, X11, X13
  1052	PXOR      X13, X9
  1053	PSHUFD    $0x4e, X11, X12
  1054	PXOR      X12, X11
  1055	MOVOU     112(DI), X12
  1056	PCLMULQDQ $0x00, X11, X12
  1057	PXOR      X12, X10
  1058	MOVOU     64(SP), X11
  1059	MOVOU     128(DI), X12
  1060	MOVOU     X12, X13
  1061	PCLMULQDQ $0x00, X11, X12
  1062	PXOR      X12, X8
  1063	PCLMULQDQ $0x11, X11, X13
  1064	PXOR      X13, X9
  1065	PSHUFD    $0x4e, X11, X12
  1066	PXOR      X12, X11
  1067	MOVOU     144(DI), X12
  1068	PCLMULQDQ $0x00, X11, X12
  1069	PXOR      X12, X10
  1070	MOVOU     80(SP), X11
  1071	MOVOU     160(DI), X12
  1072	MOVOU     X12, X13
  1073	PCLMULQDQ $0x00, X11, X12
  1074	PXOR      X12, X8
  1075	PCLMULQDQ $0x11, X11, X13
  1076	PXOR      X13, X9
  1077	PSHUFD    $0x4e, X11, X12
  1078	PXOR      X12, X11
  1079	MOVOU     176(DI), X12
  1080	PCLMULQDQ $0x00, X11, X12
  1081	PXOR      X12, X10
  1082	MOVOU     96(SP), X11
  1083	MOVOU     192(DI), X12
  1084	MOVOU     X12, X13
  1085	PCLMULQDQ $0x00, X11, X12
  1086	PXOR      X12, X8
  1087	PCLMULQDQ $0x11, X11, X13
  1088	PXOR      X13, X9
  1089	PSHUFD    $0x4e, X11, X12
  1090	PXOR      X12, X11
  1091	MOVOU     208(DI), X12
  1092	PCLMULQDQ $0x00, X11, X12
  1093	PXOR      X12, X10
  1094	MOVOU     112(SP), X11
  1095	MOVOU     224(DI), X12
  1096	MOVOU     X12, X13
  1097	PCLMULQDQ $0x00, X11, X12
  1098	PXOR      X12, X8
  1099	PCLMULQDQ $0x11, X11, X13
  1100	PXOR      X13, X9
  1101	PSHUFD    $0x4e, X11, X12
  1102	PXOR      X12, X11
  1103	MOVOU     240(DI), X12
  1104	PCLMULQDQ $0x00, X11, X12
  1105	PXOR      X12, X10
  1106	PXOR      X8, X10
  1107	PXOR      X9, X10
  1108	MOVOU     X10, X11
  1109	PSRLDQ    $0x08, X10
  1110	PSLLDQ    $0x08, X11
  1111	PXOR      X10, X9
  1112	PXOR      X11, X8
  1113	MOVOU     X14, X11
  1114	PCLMULQDQ $0x01, X8, X11
  1115	PSHUFD    $0x4e, X8, X8
  1116	PXOR      X11, X8
  1117	MOVOU     X14, X11
  1118	PCLMULQDQ $0x01, X8, X11
  1119	PSHUFD    $0x4e, X8, X8
  1120	PXOR      X11, X8
  1121	PXOR      X9, X8
  1122	TESTQ     R9, R9
  1123	JE        gcmAesEncDone
  1124	SUBQ      $0x07, R10
  1125
  1126gcmAesEncSingles:
  1127	MOVOU 16(AX), X1
  1128	MOVOU 32(AX), X2
  1129	MOVOU 48(AX), X3
  1130	MOVOU 64(AX), X4
  1131	MOVOU 80(AX), X5
  1132	MOVOU 96(AX), X6
  1133	MOVOU 112(AX), X7
  1134	MOVOU 224(DI), X13
  1135
  1136gcmAesEncSinglesLoop:
  1137	CMPQ   R9, $0x10
  1138	JB     gcmAesEncTail
  1139	SUBQ   $0x10, R9
  1140	MOVOU  128(SP), X0
  1141	ADDL   $0x01, R10
  1142	MOVL   R10, R11
  1143	XORL   R12, R11
  1144	BSWAPL R11
  1145	MOVL   R11, 140(SP)
  1146	AESENC X1, X0
  1147	AESENC X2, X0
  1148	AESENC X3, X0
  1149	AESENC X4, X0
  1150	AESENC X5, X0
  1151	AESENC X6, X0
  1152	AESENC X7, X0
  1153	MOVOU  128(AX), X11
  1154	AESENC X11, X0
  1155	MOVOU  144(AX), X11
  1156	AESENC X11, X0
  1157	MOVOU  160(AX), X11
  1158	CMPQ   R13, $0x0c
  1159	JB     encLast3
  1160	AESENC X11, X0
  1161	MOVOU  176(AX), X11
  1162	AESENC X11, X0
  1163	MOVOU  192(AX), X11
  1164	JE     encLast3
  1165	AESENC X11, X0
  1166	MOVOU  208(AX), X11
  1167	AESENC X11, X0
  1168	MOVOU  224(AX), X11
  1169
  1170encLast3:
  1171	AESENCLAST X11, X0
  1172	MOVOU      (SI), X11
  1173	PXOR       X11, X0
  1174	MOVOU      X0, (DX)
  1175	PSHUFB     X15, X0
  1176	PXOR       X8, X0
  1177	MOVOU      X13, X8
  1178	MOVOU      X13, X9
  1179	MOVOU      240(DI), X10
  1180	PSHUFD     $0x4e, X0, X11
  1181	PXOR       X0, X11
  1182	PCLMULQDQ  $0x00, X0, X8
  1183	PCLMULQDQ  $0x11, X0, X9
  1184	PCLMULQDQ  $0x00, X11, X10
  1185	PXOR       X8, X10
  1186	PXOR       X9, X10
  1187	MOVOU      X10, X11
  1188	PSRLDQ     $0x08, X10
  1189	PSLLDQ     $0x08, X11
  1190	PXOR       X10, X9
  1191	PXOR       X11, X8
  1192	MOVOU      X14, X11
  1193	PCLMULQDQ  $0x01, X8, X11
  1194	PSHUFD     $0x4e, X8, X8
  1195	PXOR       X11, X8
  1196	MOVOU      X14, X11
  1197	PCLMULQDQ  $0x01, X8, X11
  1198	PSHUFD     $0x4e, X8, X8
  1199	PXOR       X11, X8
  1200	PXOR       X9, X8
  1201	LEAQ       16(SI), SI
  1202	LEAQ       16(DX), DX
  1203	JMP        gcmAesEncSinglesLoop
  1204
  1205gcmAesEncTail:
  1206	TESTQ  R9, R9
  1207	JE     gcmAesEncDone
  1208	MOVOU  128(SP), X0
  1209	AESENC X1, X0
  1210	AESENC X2, X0
  1211	AESENC X3, X0
  1212	AESENC X4, X0
  1213	AESENC X5, X0
  1214	AESENC X6, X0
  1215	AESENC X7, X0
  1216	MOVOU  128(AX), X11
  1217	AESENC X11, X0
  1218	MOVOU  144(AX), X11
  1219	AESENC X11, X0
  1220	MOVOU  160(AX), X11
  1221	CMPQ   R13, $0x0c
  1222	JB     encLast4
  1223	AESENC X11, X0
  1224	MOVOU  176(AX), X11
  1225	AESENC X11, X0
  1226	MOVOU  192(AX), X11
  1227	JE     encLast4
  1228	AESENC X11, X0
  1229	MOVOU  208(AX), X11
  1230	AESENC X11, X0
  1231	MOVOU  224(AX), X11
  1232
  1233encLast4:
  1234	AESENCLAST X11, X0
  1235	MOVOU      X0, X11
  1236	LEAQ       -1(SI)(R9*1), SI
  1237	MOVQ       R9, R11
  1238	SHLQ       $0x04, R11
  1239	LEAQ       andMask<>+0(SB), R10
  1240	MOVOU      -16(R10)(R11*1), X12
  1241	PXOR       X0, X0
  1242
  1243ptxLoadLoop:
  1244	PSLLDQ    $0x01, X0
  1245	PINSRB    $0x00, (SI), X0
  1246	LEAQ      -1(SI), SI
  1247	DECQ      R9
  1248	JNE       ptxLoadLoop
  1249	PXOR      X11, X0
  1250	PAND      X12, X0
  1251	MOVOU     X0, (DX)
  1252	PSHUFB    X15, X0
  1253	PXOR      X8, X0
  1254	MOVOU     X13, X8
  1255	MOVOU     X13, X9
  1256	MOVOU     240(DI), X10
  1257	PSHUFD    $0x4e, X0, X11
  1258	PXOR      X0, X11
  1259	PCLMULQDQ $0x00, X0, X8
  1260	PCLMULQDQ $0x11, X0, X9
  1261	PCLMULQDQ $0x00, X11, X10
  1262	PXOR      X8, X10
  1263	PXOR      X9, X10
  1264	MOVOU     X10, X11
  1265	PSRLDQ    $0x08, X10
  1266	PSLLDQ    $0x08, X11
  1267	PXOR      X10, X9
  1268	PXOR      X11, X8
  1269	MOVOU     X14, X11
  1270	PCLMULQDQ $0x01, X8, X11
  1271	PSHUFD    $0x4e, X8, X8
  1272	PXOR      X11, X8
  1273	MOVOU     X14, X11
  1274	PCLMULQDQ $0x01, X8, X11
  1275	PSHUFD    $0x4e, X8, X8
  1276	PXOR      X11, X8
  1277	PXOR      X9, X8
  1278
  1279gcmAesEncDone:
  1280	MOVOU X8, (R8)
  1281	RET
  1282
  1283DATA andMask<>+0(SB)/8, $0x00000000000000ff
  1284DATA andMask<>+8(SB)/8, $0x0000000000000000
  1285DATA andMask<>+16(SB)/8, $0x000000000000ffff
  1286DATA andMask<>+24(SB)/8, $0x0000000000000000
  1287DATA andMask<>+32(SB)/8, $0x0000000000ffffff
  1288DATA andMask<>+40(SB)/8, $0x0000000000000000
  1289DATA andMask<>+48(SB)/8, $0x00000000ffffffff
  1290DATA andMask<>+56(SB)/8, $0x0000000000000000
  1291DATA andMask<>+64(SB)/8, $0x000000ffffffffff
  1292DATA andMask<>+72(SB)/8, $0x0000000000000000
  1293DATA andMask<>+80(SB)/8, $0x0000ffffffffffff
  1294DATA andMask<>+88(SB)/8, $0x0000000000000000
  1295DATA andMask<>+96(SB)/8, $0x00ffffffffffffff
  1296DATA andMask<>+104(SB)/8, $0x0000000000000000
  1297DATA andMask<>+112(SB)/8, $0xffffffffffffffff
  1298DATA andMask<>+120(SB)/8, $0x0000000000000000
  1299DATA andMask<>+128(SB)/8, $0xffffffffffffffff
  1300DATA andMask<>+136(SB)/8, $0x00000000000000ff
  1301DATA andMask<>+144(SB)/8, $0xffffffffffffffff
  1302DATA andMask<>+152(SB)/8, $0x000000000000ffff
  1303DATA andMask<>+160(SB)/8, $0xffffffffffffffff
  1304DATA andMask<>+168(SB)/8, $0x0000000000ffffff
  1305DATA andMask<>+176(SB)/8, $0xffffffffffffffff
  1306DATA andMask<>+184(SB)/8, $0x00000000ffffffff
  1307DATA andMask<>+192(SB)/8, $0xffffffffffffffff
  1308DATA andMask<>+200(SB)/8, $0x000000ffffffffff
  1309DATA andMask<>+208(SB)/8, $0xffffffffffffffff
  1310DATA andMask<>+216(SB)/8, $0x0000ffffffffffff
  1311DATA andMask<>+224(SB)/8, $0xffffffffffffffff
  1312DATA andMask<>+232(SB)/8, $0x00ffffffffffffff
  1313GLOBL andMask<>(SB), RODATA|NOPTR, $240
  1314
  1315// func gcmAesDec(productTable *[256]byte, dst []byte, src []byte, ctr *[16]byte, T *[16]byte, ks []uint32)
  1316// Requires: AES, PCLMULQDQ, SSE2, SSE4.1, SSSE3
  1317TEXT ·gcmAesDec(SB), $128-96
  1318	MOVQ   productTable+0(FP), DI
  1319	MOVQ   dst_base+8(FP), SI
  1320	MOVQ   src_base+32(FP), DX
  1321	MOVQ   src_len+40(FP), R9
  1322	MOVQ   ctr+56(FP), CX
  1323	MOVQ   T+64(FP), R8
  1324	MOVQ   ks_base+72(FP), AX
  1325	MOVQ   ks_len+80(FP), R13
  1326	SHRQ   $0x02, R13
  1327	DECQ   R13
  1328	MOVOU  bswapMask<>+0(SB), X15
  1329	MOVOU  gcmPoly<>+0(SB), X14
  1330	MOVOU  (R8), X8
  1331	PXOR   X9, X9
  1332	PXOR   X10, X10
  1333	MOVOU  (CX), X0
  1334	MOVL   12(CX), R10
  1335	MOVOU  (AX), X11
  1336	MOVL   12(AX), R12
  1337	BSWAPL R10
  1338	BSWAPL R12
  1339	PXOR   X0, X11
  1340	MOVOU  X11, (SP)
  1341	ADDL   $0x01, R10
  1342	MOVL   R10, R11
  1343	XORL   R12, R11
  1344	BSWAPL R11
  1345	MOVL   R11, 12(SP)
  1346	CMPQ   R9, $0x80
  1347	JB     gcmAesDecSingles
  1348	MOVOU  X11, 16(SP)
  1349	ADDL   $0x01, R10
  1350	MOVL   R10, R11
  1351	XORL   R12, R11
  1352	BSWAPL R11
  1353	MOVL   R11, 28(SP)
  1354	MOVOU  X11, 32(SP)
  1355	ADDL   $0x01, R10
  1356	MOVL   R10, R11
  1357	XORL   R12, R11
  1358	BSWAPL R11
  1359	MOVL   R11, 44(SP)
  1360	MOVOU  X11, 48(SP)
  1361	ADDL   $0x01, R10
  1362	MOVL   R10, R11
  1363	XORL   R12, R11
  1364	BSWAPL R11
  1365	MOVL   R11, 60(SP)
  1366	MOVOU  X11, 64(SP)
  1367	ADDL   $0x01, R10
  1368	MOVL   R10, R11
  1369	XORL   R12, R11
  1370	BSWAPL R11
  1371	MOVL   R11, 76(SP)
  1372	MOVOU  X11, 80(SP)
  1373	ADDL   $0x01, R10
  1374	MOVL   R10, R11
  1375	XORL   R12, R11
  1376	BSWAPL R11
  1377	MOVL   R11, 92(SP)
  1378	MOVOU  X11, 96(SP)
  1379	ADDL   $0x01, R10
  1380	MOVL   R10, R11
  1381	XORL   R12, R11
  1382	BSWAPL R11
  1383	MOVL   R11, 108(SP)
  1384	MOVOU  X11, 112(SP)
  1385	ADDL   $0x01, R10
  1386	MOVL   R10, R11
  1387	XORL   R12, R11
  1388	BSWAPL R11
  1389	MOVL   R11, 124(SP)
  1390
  1391gcmAesDecOctetsLoop:
  1392	CMPQ      R9, $0x80
  1393	JB        gcmAesDecEndOctets
  1394	SUBQ      $0x80, R9
  1395	MOVOU     (SP), X0
  1396	MOVOU     16(SP), X1
  1397	MOVOU     32(SP), X2
  1398	MOVOU     48(SP), X3
  1399	MOVOU     64(SP), X4
  1400	MOVOU     80(SP), X5
  1401	MOVOU     96(SP), X6
  1402	MOVOU     112(SP), X7
  1403	MOVOU     (DX), X11
  1404	PSHUFB    X15, X11
  1405	PXOR      X8, X11
  1406	PSHUFD    $0x4e, X11, X12
  1407	PXOR      X11, X12
  1408	MOVOU     (DI), X8
  1409	MOVOU     16(DI), X10
  1410	MOVOU     X8, X9
  1411	PCLMULQDQ $0x00, X12, X10
  1412	PCLMULQDQ $0x00, X11, X8
  1413	PCLMULQDQ $0x11, X11, X9
  1414	MOVOU     16(AX), X11
  1415	AESENC    X11, X0
  1416	AESENC    X11, X1
  1417	AESENC    X11, X2
  1418	AESENC    X11, X3
  1419	MOVOU     32(DI), X12
  1420	MOVOU     X12, X13
  1421	AESENC    X11, X4
  1422	AESENC    X11, X5
  1423	AESENC    X11, X6
  1424	AESENC    X11, X7
  1425	MOVOU     16(DX), X11
  1426	PSHUFB    X15, X11
  1427	PCLMULQDQ $0x00, X11, X12
  1428	PXOR      X12, X8
  1429	PSHUFD    $0x4e, X11, X12
  1430	PCLMULQDQ $0x11, X11, X13
  1431	PXOR      X12, X11
  1432	PXOR      X13, X9
  1433	MOVOU     48(DI), X13
  1434	PCLMULQDQ $0x00, X13, X11
  1435	PXOR      X11, X10
  1436	ADDL      $0x01, R10
  1437	MOVL      R10, R11
  1438	XORL      R12, R11
  1439	BSWAPL    R11
  1440	MOVL      R11, 12(SP)
  1441	MOVOU     32(AX), X11
  1442	AESENC    X11, X0
  1443	AESENC    X11, X1
  1444	AESENC    X11, X2
  1445	AESENC    X11, X3
  1446	MOVOU     64(DI), X12
  1447	MOVOU     X12, X13
  1448	AESENC    X11, X4
  1449	AESENC    X11, X5
  1450	AESENC    X11, X6
  1451	AESENC    X11, X7
  1452	MOVOU     32(DX), X11
  1453	PSHUFB    X15, X11
  1454	PCLMULQDQ $0x00, X11, X12
  1455	PXOR      X12, X8
  1456	PSHUFD    $0x4e, X11, X12
  1457	PCLMULQDQ $0x11, X11, X13
  1458	PXOR      X12, X11
  1459	PXOR      X13, X9
  1460	MOVOU     80(DI), X13
  1461	PCLMULQDQ $0x00, X13, X11
  1462	PXOR      X11, X10
  1463	ADDL      $0x01, R10
  1464	MOVL      R10, R11
  1465	XORL      R12, R11
  1466	BSWAPL    R11
  1467	MOVL      R11, 28(SP)
  1468	MOVOU     48(AX), X11
  1469	AESENC    X11, X0
  1470	AESENC    X11, X1
  1471	AESENC    X11, X2
  1472	AESENC    X11, X3
  1473	MOVOU     96(DI), X12
  1474	MOVOU     X12, X13
  1475	AESENC    X11, X4
  1476	AESENC    X11, X5
  1477	AESENC    X11, X6
  1478	AESENC    X11, X7
  1479	MOVOU     48(DX), X11
  1480	PSHUFB    X15, X11
  1481	PCLMULQDQ $0x00, X11, X12
  1482	PXOR      X12, X8
  1483	PSHUFD    $0x4e, X11, X12
  1484	PCLMULQDQ $0x11, X11, X13
  1485	PXOR      X12, X11
  1486	PXOR      X13, X9
  1487	MOVOU     112(DI), X13
  1488	PCLMULQDQ $0x00, X13, X11
  1489	PXOR      X11, X10
  1490	ADDL      $0x01, R10
  1491	MOVL      R10, R11
  1492	XORL      R12, R11
  1493	BSWAPL    R11
  1494	MOVL      R11, 44(SP)
  1495	MOVOU     64(AX), X11
  1496	AESENC    X11, X0
  1497	AESENC    X11, X1
  1498	AESENC    X11, X2
  1499	AESENC    X11, X3
  1500	MOVOU     128(DI), X12
  1501	MOVOU     X12, X13
  1502	AESENC    X11, X4
  1503	AESENC    X11, X5
  1504	AESENC    X11, X6
  1505	AESENC    X11, X7
  1506	MOVOU     64(DX), X11
  1507	PSHUFB    X15, X11
  1508	PCLMULQDQ $0x00, X11, X12
  1509	PXOR      X12, X8
  1510	PSHUFD    $0x4e, X11, X12
  1511	PCLMULQDQ $0x11, X11, X13
  1512	PXOR      X12, X11
  1513	PXOR      X13, X9
  1514	MOVOU     144(DI), X13
  1515	PCLMULQDQ $0x00, X13, X11
  1516	PXOR      X11, X10
  1517	ADDL      $0x01, R10
  1518	MOVL      R10, R11
  1519	XORL      R12, R11
  1520	BSWAPL    R11
  1521	MOVL      R11, 60(SP)
  1522	MOVOU     80(AX), X11
  1523	AESENC    X11, X0
  1524	AESENC    X11, X1
  1525	AESENC    X11, X2
  1526	AESENC    X11, X3
  1527	MOVOU     160(DI), X12
  1528	MOVOU     X12, X13
  1529	AESENC    X11, X4
  1530	AESENC    X11, X5
  1531	AESENC    X11, X6
  1532	AESENC    X11, X7
  1533	MOVOU     80(DX), X11
  1534	PSHUFB    X15, X11
  1535	PCLMULQDQ $0x00, X11, X12
  1536	PXOR      X12, X8
  1537	PSHUFD    $0x4e, X11, X12
  1538	PCLMULQDQ $0x11, X11, X13
  1539	PXOR      X12, X11
  1540	PXOR      X13, X9
  1541	MOVOU     176(DI), X13
  1542	PCLMULQDQ $0x00, X13, X11
  1543	PXOR      X11, X10
  1544	ADDL      $0x01, R10
  1545	MOVL      R10, R11
  1546	XORL      R12, R11
  1547	BSWAPL    R11
  1548	MOVL      R11, 76(SP)
  1549	MOVOU     96(AX), X11
  1550	AESENC    X11, X0
  1551	AESENC    X11, X1
  1552	AESENC    X11, X2
  1553	AESENC    X11, X3
  1554	MOVOU     192(DI), X12
  1555	MOVOU     X12, X13
  1556	AESENC    X11, X4
  1557	AESENC    X11, X5
  1558	AESENC    X11, X6
  1559	AESENC    X11, X7
  1560	MOVOU     96(DX), X11
  1561	PSHUFB    X15, X11
  1562	PCLMULQDQ $0x00, X11, X12
  1563	PXOR      X12, X8
  1564	PSHUFD    $0x4e, X11, X12
  1565	PCLMULQDQ $0x11, X11, X13
  1566	PXOR      X12, X11
  1567	PXOR      X13, X9
  1568	MOVOU     208(DI), X13
  1569	PCLMULQDQ $0x00, X13, X11
  1570	PXOR      X11, X10
  1571	ADDL      $0x01, R10
  1572	MOVL      R10, R11
  1573	XORL      R12, R11
  1574	BSWAPL    R11
  1575	MOVL      R11, 92(SP)
  1576	MOVOU     112(AX), X11
  1577	AESENC    X11, X0
  1578	AESENC    X11, X1
  1579	AESENC    X11, X2
  1580	AESENC    X11, X3
  1581	MOVOU     224(DI), X12
  1582	MOVOU     X12, X13
  1583	AESENC    X11, X4
  1584	AESENC    X11, X5
  1585	AESENC    X11, X6
  1586	AESENC    X11, X7
  1587	MOVOU     112(DX), X11
  1588	PSHUFB    X15, X11
  1589	PCLMULQDQ $0x00, X11, X12
  1590	PXOR      X12, X8
  1591	PSHUFD    $0x4e, X11, X12
  1592	PCLMULQDQ $0x11, X11, X13
  1593	PXOR      X12, X11
  1594	PXOR      X13, X9
  1595	MOVOU     240(DI), X13
  1596	PCLMULQDQ $0x00, X13, X11
  1597	PXOR      X11, X10
  1598	ADDL      $0x01, R10
  1599	MOVL      R10, R11
  1600	XORL      R12, R11
  1601	BSWAPL    R11
  1602	MOVL      R11, 108(SP)
  1603	MOVOU     128(AX), X11
  1604	AESENC    X11, X0
  1605	AESENC    X11, X1
  1606	AESENC    X11, X2
  1607	AESENC    X11, X3
  1608	AESENC    X11, X4
  1609	AESENC    X11, X5
  1610	AESENC    X11, X6
  1611	AESENC    X11, X7
  1612	ADDL      $0x01, R10
  1613	MOVL      R10, R11
  1614	XORL      R12, R11
  1615	BSWAPL    R11
  1616	MOVL      R11, 124(SP)
  1617	PXOR      X8, X10
  1618	PXOR      X9, X10
  1619	MOVOU     X10, X11
  1620	PSRLDQ    $0x08, X10
  1621	PSLLDQ    $0x08, X11
  1622	PXOR      X10, X9
  1623	PXOR      X11, X8
  1624	MOVOU     X14, X11
  1625	PCLMULQDQ $0x01, X8, X11
  1626	PSHUFD    $0x4e, X8, X8
  1627	PXOR      X11, X8
  1628	MOVOU     144(AX), X11
  1629	AESENC    X11, X0
  1630	AESENC    X11, X1
  1631	AESENC    X11, X2
  1632	AESENC    X11, X3
  1633	AESENC    X11, X4
  1634	AESENC    X11, X5
  1635	AESENC    X11, X6
  1636	AESENC    X11, X7
  1637	MOVOU     X14, X11
  1638	PCLMULQDQ $0x01, X8, X11
  1639	PSHUFD    $0x4e, X8, X8
  1640	PXOR      X11, X8
  1641	PXOR      X9, X8
  1642	MOVOU     160(AX), X11
  1643	CMPQ      R13, $0x0c
  1644	JB        decLast1
  1645	AESENC    X11, X0
  1646	AESENC    X11, X1
  1647	AESENC    X11, X2
  1648	AESENC    X11, X3
  1649	AESENC    X11, X4
  1650	AESENC    X11, X5
  1651	AESENC    X11, X6
  1652	AESENC    X11, X7
  1653	MOVOU     176(AX), X11
  1654	AESENC    X11, X0
  1655	AESENC    X11, X1
  1656	AESENC    X11, X2
  1657	AESENC    X11, X3
  1658	AESENC    X11, X4
  1659	AESENC    X11, X5
  1660	AESENC    X11, X6
  1661	AESENC    X11, X7
  1662	MOVOU     192(AX), X11
  1663	JE        decLast1
  1664	AESENC    X11, X0
  1665	AESENC    X11, X1
  1666	AESENC    X11, X2
  1667	AESENC    X11, X3
  1668	AESENC    X11, X4
  1669	AESENC    X11, X5
  1670	AESENC    X11, X6
  1671	AESENC    X11, X7
  1672	MOVOU     208(AX), X11
  1673	AESENC    X11, X0
  1674	AESENC    X11, X1
  1675	AESENC    X11, X2
  1676	AESENC    X11, X3
  1677	AESENC    X11, X4
  1678	AESENC    X11, X5
  1679	AESENC    X11, X6
  1680	AESENC    X11, X7
  1681	MOVOU     224(AX), X11
  1682
  1683decLast1:
  1684	AESENCLAST X11, X0
  1685	AESENCLAST X11, X1
  1686	AESENCLAST X11, X2
  1687	AESENCLAST X11, X3
  1688	AESENCLAST X11, X4
  1689	AESENCLAST X11, X5
  1690	AESENCLAST X11, X6
  1691	AESENCLAST X11, X7
  1692	MOVOU      (DX), X11
  1693	PXOR       X11, X0
  1694	MOVOU      16(DX), X11
  1695	PXOR       X11, X1
  1696	MOVOU      32(DX), X11
  1697	PXOR       X11, X2
  1698	MOVOU      48(DX), X11
  1699	PXOR       X11, X3
  1700	MOVOU      64(DX), X11
  1701	PXOR       X11, X4
  1702	MOVOU      80(DX), X11
  1703	PXOR       X11, X5
  1704	MOVOU      96(DX), X11
  1705	PXOR       X11, X6
  1706	MOVOU      112(DX), X11
  1707	PXOR       X11, X7
  1708	MOVOU      X0, (SI)
  1709	MOVOU      X1, 16(SI)
  1710	MOVOU      X2, 32(SI)
  1711	MOVOU      X3, 48(SI)
  1712	MOVOU      X4, 64(SI)
  1713	MOVOU      X5, 80(SI)
  1714	MOVOU      X6, 96(SI)
  1715	MOVOU      X7, 112(SI)
  1716	LEAQ       128(SI), SI
  1717	LEAQ       128(DX), DX
  1718	JMP        gcmAesDecOctetsLoop
  1719
  1720gcmAesDecEndOctets:
  1721	SUBQ $0x07, R10
  1722
  1723gcmAesDecSingles:
  1724	MOVOU 16(AX), X1
  1725	MOVOU 32(AX), X2
  1726	MOVOU 48(AX), X3
  1727	MOVOU 64(AX), X4
  1728	MOVOU 80(AX), X5
  1729	MOVOU 96(AX), X6
  1730	MOVOU 112(AX), X7
  1731	MOVOU 224(DI), X13
  1732
  1733gcmAesDecSinglesLoop:
  1734	CMPQ      R9, $0x10
  1735	JB        gcmAesDecTail
  1736	SUBQ      $0x10, R9
  1737	MOVOU     (DX), X0
  1738	MOVOU     X0, X12
  1739	PSHUFB    X15, X0
  1740	PXOR      X8, X0
  1741	MOVOU     X13, X8
  1742	MOVOU     X13, X9
  1743	MOVOU     240(DI), X10
  1744	PCLMULQDQ $0x00, X0, X8
  1745	PCLMULQDQ $0x11, X0, X9
  1746	PSHUFD    $0x4e, X0, X11
  1747	PXOR      X0, X11
  1748	PCLMULQDQ $0x00, X11, X10
  1749	PXOR      X8, X10
  1750	PXOR      X9, X10
  1751	MOVOU     X10, X11
  1752	PSRLDQ    $0x08, X10
  1753	PSLLDQ    $0x08, X11
  1754	PXOR      X10, X9
  1755	PXOR      X11, X8
  1756	MOVOU     X14, X11
  1757	PCLMULQDQ $0x01, X8, X11
  1758	PSHUFD    $0x4e, X8, X8
  1759	PXOR      X11, X8
  1760	MOVOU     X14, X11
  1761	PCLMULQDQ $0x01, X8, X11
  1762	PSHUFD    $0x4e, X8, X8
  1763	PXOR      X11, X8
  1764	PXOR      X9, X8
  1765	MOVOU     (SP), X0
  1766	ADDL      $0x01, R10
  1767	MOVL      R10, R11
  1768	XORL      R12, R11
  1769	BSWAPL    R11
  1770	MOVL      R11, 12(SP)
  1771	AESENC    X1, X0
  1772	AESENC    X2, X0
  1773	AESENC    X3, X0
  1774	AESENC    X4, X0
  1775	AESENC    X5, X0
  1776	AESENC    X6, X0
  1777	AESENC    X7, X0
  1778	MOVOU     128(AX), X11
  1779	AESENC    X11, X0
  1780	MOVOU     144(AX), X11
  1781	AESENC    X11, X0
  1782	MOVOU     160(AX), X11
  1783	CMPQ      R13, $0x0c
  1784	JB        decLast2
  1785	AESENC    X11, X0
  1786	MOVOU     176(AX), X11
  1787	AESENC    X11, X0
  1788	MOVOU     192(AX), X11
  1789	JE        decLast2
  1790	AESENC    X11, X0
  1791	MOVOU     208(AX), X11
  1792	AESENC    X11, X0
  1793	MOVOU     224(AX), X11
  1794
  1795decLast2:
  1796	AESENCLAST X11, X0
  1797	PXOR       X12, X0
  1798	MOVOU      X0, (SI)
  1799	LEAQ       16(SI), SI
  1800	LEAQ       16(DX), DX
  1801	JMP        gcmAesDecSinglesLoop
  1802
  1803gcmAesDecTail:
  1804	TESTQ     R9, R9
  1805	JE        gcmAesDecDone
  1806	MOVQ      R9, R11
  1807	SHLQ      $0x04, R11
  1808	LEAQ      andMask<>+0(SB), R10
  1809	MOVOU     -16(R10)(R11*1), X12
  1810	MOVOU     (DX), X0
  1811	PAND      X12, X0
  1812	MOVOU     X0, X12
  1813	PSHUFB    X15, X0
  1814	PXOR      X8, X0
  1815	MOVOU     224(DI), X8
  1816	MOVOU     240(DI), X10
  1817	MOVOU     X8, X9
  1818	PCLMULQDQ $0x00, X0, X8
  1819	PCLMULQDQ $0x11, X0, X9
  1820	PSHUFD    $0x4e, X0, X11
  1821	PXOR      X0, X11
  1822	PCLMULQDQ $0x00, X11, X10
  1823	PXOR      X8, X10
  1824	PXOR      X9, X10
  1825	MOVOU     X10, X11
  1826	PSRLDQ    $0x08, X10
  1827	PSLLDQ    $0x08, X11
  1828	PXOR      X10, X9
  1829	PXOR      X11, X8
  1830	MOVOU     X14, X11
  1831	PCLMULQDQ $0x01, X8, X11
  1832	PSHUFD    $0x4e, X8, X8
  1833	PXOR      X11, X8
  1834	MOVOU     X14, X11
  1835	PCLMULQDQ $0x01, X8, X11
  1836	PSHUFD    $0x4e, X8, X8
  1837	PXOR      X11, X8
  1838	PXOR      X9, X8
  1839	MOVOU     (SP), X0
  1840	ADDL      $0x01, R10
  1841	MOVL      R10, R11
  1842	XORL      R12, R11
  1843	BSWAPL    R11
  1844	MOVL      R11, 12(SP)
  1845	AESENC    X1, X0
  1846	AESENC    X2, X0
  1847	AESENC    X3, X0
  1848	AESENC    X4, X0
  1849	AESENC    X5, X0
  1850	AESENC    X6, X0
  1851	AESENC    X7, X0
  1852	MOVOU     128(AX), X11
  1853	AESENC    X11, X0
  1854	MOVOU     144(AX), X11
  1855	AESENC    X11, X0
  1856	MOVOU     160(AX), X11
  1857	CMPQ      R13, $0x0c
  1858	JB        decLast3
  1859	AESENC    X11, X0
  1860	MOVOU     176(AX), X11
  1861	AESENC    X11, X0
  1862	MOVOU     192(AX), X11
  1863	JE        decLast3
  1864	AESENC    X11, X0
  1865	MOVOU     208(AX), X11
  1866	AESENC    X11, X0
  1867	MOVOU     224(AX), X11
  1868
  1869decLast3:
  1870	AESENCLAST X11, X0
  1871	PXOR       X12, X0
  1872
  1873ptxStoreLoop:
  1874	PEXTRB $0x00, X0, (SI)
  1875	PSRLDQ $0x01, X0
  1876	LEAQ   1(SI), SI
  1877	DECQ   R9
  1878	JNE    ptxStoreLoop
  1879
  1880gcmAesDecDone:
  1881	MOVOU X8, (R8)
  1882	RET

View as plain text