...

Text file src/crypto/internal/fips140/subtle/xor_loong64.s

Documentation: crypto/internal/fips140/subtle

     1// Copyright 2024 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     4
     5//go:build !purego
     6
     7#include "textflag.h"
     8
     9#define SMALL_TAIL \
    10	SGTU	$2, R7, R8; \
    11	BNE	R8, xor_1; \
    12	SGTU	$4, R7, R8; \
    13	BNE	R8, xor_2; \
    14	SGTU	$8, R7, R8; \
    15	BNE	R8, xor_4; \
    16	SGTU	$16, R7, R8; \
    17	BNE	R8, xor_8; \
    18
    19#define SMALL \
    20xor_8_check:; \
    21	SGTU	$8, R7, R8; \
    22	BNE	R8, xor_4_check; \
    23xor_8:; \
    24	SUBV	$8, R7; \
    25	MOVV	(R5), R10; \
    26	MOVV	(R6), R11; \
    27	XOR	R10, R11; \
    28	MOVV	R11, (R4); \
    29	ADDV	$8, R5; \
    30	ADDV	$8, R6; \
    31	ADDV	$8, R4; \
    32	BEQ	R7, R0, end; \
    33xor_4_check:; \
    34	SGTU	$4, R7, R8; \
    35	BNE	R8, xor_2_check; \
    36xor_4:; \
    37	SUBV	$4, R7; \
    38	MOVW	(R5), R10; \
    39	MOVW	(R6), R11; \
    40	XOR	R10, R11; \
    41	MOVW	R11, (R4); \
    42	ADDV	$4, R5; \
    43	ADDV	$4, R6; \
    44	ADDV	$4, R4; \
    45	BEQ	R7, R0, end; \
    46xor_2_check:; \
    47	SGTU	$2, R7, R8; \
    48	BNE	R8, xor_1; \
    49xor_2:; \
    50	SUBV	$2, R7; \
    51	MOVH	(R5), R10; \
    52	MOVH	(R6), R11; \
    53	XOR	R10, R11; \
    54	MOVH	R11, (R4); \
    55	ADDV	$2, R5; \
    56	ADDV	$2, R6; \
    57	ADDV	$2, R4; \
    58	BEQ	R7, R0, end; \
    59xor_1:; \
    60	MOVB	(R5), R10; \
    61	MOVB	(R6), R11; \
    62	XOR	R10, R11; \
    63	MOVB	R11, (R4); \
    64
    65// func xorBytesBasic(dst, a, b *byte, n int)
    66TEXT ·xorBytesBasic(SB), NOSPLIT, $0
    67	MOVV	dst+0(FP), R4
    68	MOVV	a+8(FP), R5
    69	MOVV	b+16(FP), R6
    70	MOVV	n+24(FP), R7
    71
    72	SMALL_TAIL
    73
    74xor_64_check:
    75	SGTU	$64, R7, R8
    76	BNE	R8, xor_32_check
    77xor_64_loop:
    78	SUBV	$64, R7
    79	MOVV	(R5), R10
    80	MOVV	8(R5), R11
    81	MOVV	16(R5), R12
    82	MOVV	24(R5), R13
    83	MOVV	(R6), R14
    84	MOVV	8(R6), R15
    85	MOVV	16(R6), R16
    86	MOVV	24(R6), R17
    87	XOR	R10, R14
    88	XOR	R11, R15
    89	XOR	R12, R16
    90	XOR	R13, R17
    91	MOVV	R14, (R4)
    92	MOVV	R15, 8(R4)
    93	MOVV	R16, 16(R4)
    94	MOVV	R17, 24(R4)
    95	MOVV	32(R5), R10
    96	MOVV	40(R5), R11
    97	MOVV	48(R5), R12
    98	MOVV	56(R5), R13
    99	MOVV	32(R6), R14
   100	MOVV	40(R6), R15
   101	MOVV	48(R6), R16
   102	MOVV	56(R6), R17
   103	XOR	R10, R14
   104	XOR	R11, R15
   105	XOR	R12, R16
   106	XOR	R13, R17
   107	MOVV	R14, 32(R4)
   108	MOVV	R15, 40(R4)
   109	MOVV	R16, 48(R4)
   110	MOVV	R17, 56(R4)
   111	SGTU	$64, R7, R8
   112	ADDV	$64, R5
   113	ADDV	$64, R6
   114	ADDV	$64, R4
   115	BEQ	R8, xor_64_loop
   116	BEQ	R7, end
   117
   118xor_32_check:
   119	SGTU	$32, R7, R8
   120	BNE	R8, xor_16_check
   121xor_32:
   122	SUBV	$32, R7
   123	MOVV	(R5), R10
   124	MOVV	8(R5), R11
   125	MOVV	16(R5), R12
   126	MOVV	24(R5), R13
   127	MOVV	(R6), R14
   128	MOVV	8(R6), R15
   129	MOVV	16(R6), R16
   130	MOVV	24(R6), R17
   131	XOR	R10, R14
   132	XOR	R11, R15
   133	XOR	R12, R16
   134	XOR	R13, R17
   135	MOVV	R14, (R4)
   136	MOVV	R15, 8(R4)
   137	MOVV	R16, 16(R4)
   138	MOVV	R17, 24(R4)
   139	ADDV	$32, R5
   140	ADDV	$32, R6
   141	ADDV	$32, R4
   142	BEQ	R7, R0, end
   143
   144xor_16_check:
   145	SGTU	$16, R7, R8
   146	BNE	R8, xor_8_check
   147xor_16:
   148	SUBV	$16, R7
   149	MOVV	(R5), R10
   150	MOVV	8(R5), R11
   151	MOVV	(R6), R12
   152	MOVV	8(R6), R13
   153	XOR	R10, R12
   154	XOR	R11, R13
   155	MOVV	R12, (R4)
   156	MOVV	R13, 8(R4)
   157	ADDV	$16, R5
   158	ADDV	$16, R6
   159	ADDV	$16, R4
   160	BEQ	R7, R0, end
   161
   162	SMALL
   163end:
   164	RET
   165
   166// func xorBytesLSX(dst, a, b *byte, n int)
   167TEXT ·xorBytesLSX(SB), NOSPLIT, $0
   168	MOVV	dst+0(FP), R4
   169	MOVV	a+8(FP), R5
   170	MOVV	b+16(FP), R6
   171	MOVV	n+24(FP), R7
   172
   173	SMALL_TAIL
   174
   175xor_128_lsx_check:
   176	SGTU	$128, R7, R8
   177	BNE	R8, xor_64_lsx_check
   178xor_128_lsx_loop:
   179	SUBV	$128, R7
   180	VMOVQ	(R5), V0
   181	VMOVQ	16(R5), V1
   182	VMOVQ	32(R5), V2
   183	VMOVQ	48(R5), V3
   184	VMOVQ	64(R5), V4
   185	VMOVQ	80(R5), V5
   186	VMOVQ	96(R5), V6
   187	VMOVQ	112(R5), V7
   188	VMOVQ	(R6), V8
   189	VMOVQ	16(R6), V9
   190	VMOVQ	32(R6), V10
   191	VMOVQ	48(R6), V11
   192	VMOVQ	64(R6), V12
   193	VMOVQ	80(R6), V13
   194	VMOVQ	96(R6), V14
   195	VMOVQ	112(R6), V15
   196	VXORV	V0, V8, V8
   197	VXORV	V1, V9, V9
   198	VXORV	V2, V10, V10
   199	VXORV	V3, V11, V11
   200	VXORV	V4, V12, V12
   201	VXORV	V5, V13, V13
   202	VXORV	V6, V14, V14
   203	VXORV	V7, V15, V15
   204	VMOVQ	V8, (R4)
   205	VMOVQ	V9, 16(R4)
   206	VMOVQ	V10, 32(R4)
   207	VMOVQ	V11, 48(R4)
   208	VMOVQ	V12, 64(R4)
   209	VMOVQ	V13, 80(R4)
   210	VMOVQ	V14, 96(R4)
   211	VMOVQ	V15, 112(R4)
   212	SGTU	$128, R7, R8
   213	ADDV	$128, R5
   214	ADDV	$128, R6
   215	ADDV	$128, R4
   216	BEQ	R8, xor_128_lsx_loop
   217	BEQ	R7, end
   218
   219xor_64_lsx_check:
   220	SGTU	$64, R7, R8
   221	BNE	R8, xor_32_lsx_check
   222xor_64_lsx:
   223	SUBV	$64, R7
   224	VMOVQ	(R5), V0
   225	VMOVQ	16(R5), V1
   226	VMOVQ	32(R5), V2
   227	VMOVQ	48(R5), V3
   228	VMOVQ	(R6), V4
   229	VMOVQ	16(R6), V5
   230	VMOVQ	32(R6), V6
   231	VMOVQ	48(R6), V7
   232	VXORV	V0, V4, V4
   233	VXORV	V1, V5, V5
   234	VXORV	V2, V6, V6
   235	VXORV	V3, V7, V7
   236	VMOVQ	V4, (R4)
   237	VMOVQ	V5, 16(R4)
   238	VMOVQ	V6, 32(R4)
   239	VMOVQ	V7, 48(R4)
   240	ADDV	$64, R5
   241	ADDV	$64, R6
   242	ADDV	$64, R4
   243	BEQ	R7, end
   244
   245xor_32_lsx_check:
   246	SGTU	$32, R7, R8
   247	BNE	R8, xor_16_lsx_check
   248xor_32_lsx:
   249	SUBV	$32, R7
   250	VMOVQ	(R5), V0
   251	VMOVQ	16(R5), V1
   252	VMOVQ	(R6), V2
   253	VMOVQ	16(R6), V3
   254	VXORV	V0, V2, V2
   255	VXORV	V1, V3, V3
   256	VMOVQ	V2, (R4)
   257	VMOVQ	V3, 16(R4)
   258	ADDV	$32, R5
   259	ADDV	$32, R6
   260	ADDV	$32, R4
   261	BEQ	R7, end
   262
   263xor_16_lsx_check:
   264	SGTU	$16, R7, R8
   265	BNE	R8, xor_8_check
   266xor_16_lsx:
   267	SUBV	$16, R7
   268	VMOVQ	(R5), V0
   269	VMOVQ	(R6), V1
   270	VXORV	V0, V1, V1
   271	VMOVQ	V1, (R4)
   272	ADDV	$16, R5
   273	ADDV	$16, R6
   274	ADDV	$16, R4
   275	BEQ	R7, end
   276
   277	SMALL
   278end:
   279	RET
   280
   281// func xorBytesLASX(dst, a, b *byte, n int)
   282TEXT ·xorBytesLASX(SB), NOSPLIT, $0
   283	MOVV	dst+0(FP), R4
   284	MOVV	a+8(FP), R5
   285	MOVV	b+16(FP), R6
   286	MOVV	n+24(FP), R7
   287
   288	SMALL_TAIL
   289
   290xor_256_lasx_check:
   291	SGTU	$256, R7, R8
   292	BNE	R8, xor_128_lasx_check
   293xor_256_lasx_loop:
   294	SUBV	$256, R7
   295	XVMOVQ	(R5), X0
   296	XVMOVQ	32(R5), X1
   297	XVMOVQ	64(R5), X2
   298	XVMOVQ	96(R5), X3
   299	XVMOVQ	128(R5), X4
   300	XVMOVQ	160(R5), X5
   301	XVMOVQ	192(R5), X6
   302	XVMOVQ	224(R5), X7
   303	XVMOVQ	(R6), X8
   304	XVMOVQ	32(R6), X9
   305	XVMOVQ	64(R6), X10
   306	XVMOVQ	96(R6), X11
   307	XVMOVQ	128(R6), X12
   308	XVMOVQ	160(R6), X13
   309	XVMOVQ	192(R6), X14
   310	XVMOVQ	224(R6), X15
   311	XVXORV	X0, X8, X8
   312	XVXORV	X1, X9, X9
   313	XVXORV	X2, X10, X10
   314	XVXORV	X3, X11, X11
   315	XVXORV	X4, X12, X12
   316	XVXORV	X5, X13, X13
   317	XVXORV	X6, X14, X14
   318	XVXORV	X7, X15, X15
   319	XVMOVQ	X8, (R4)
   320	XVMOVQ	X9, 32(R4)
   321	XVMOVQ	X10, 64(R4)
   322	XVMOVQ	X11, 96(R4)
   323	XVMOVQ	X12, 128(R4)
   324	XVMOVQ	X13, 160(R4)
   325	XVMOVQ	X14, 192(R4)
   326	XVMOVQ	X15, 224(R4)
   327	SGTU	$256, R7, R8
   328	ADDV	$256, R5
   329	ADDV	$256, R6
   330	ADDV	$256, R4
   331	BEQ	R8, xor_256_lasx_loop
   332	BEQ	R7, end
   333
   334xor_128_lasx_check:
   335	SGTU	$128, R7, R8
   336	BNE	R8, xor_64_lasx_check
   337xor_128_lasx:
   338	SUBV	$128, R7
   339	XVMOVQ	(R5), X0
   340	XVMOVQ	32(R5), X1
   341	XVMOVQ	64(R5), X2
   342	XVMOVQ	96(R5), X3
   343	XVMOVQ	(R6), X4
   344	XVMOVQ	32(R6), X5
   345	XVMOVQ	64(R6), X6
   346	XVMOVQ	96(R6), X7
   347	XVXORV	X0, X4, X4
   348	XVXORV	X1, X5, X5
   349	XVXORV	X2, X6, X6
   350	XVXORV	X3, X7, X7
   351	XVMOVQ	X4, (R4)
   352	XVMOVQ	X5, 32(R4)
   353	XVMOVQ	X6, 64(R4)
   354	XVMOVQ	X7, 96(R4)
   355	ADDV	$128, R5
   356	ADDV	$128, R6
   357	ADDV	$128, R4
   358	BEQ	R7, end
   359
   360xor_64_lasx_check:
   361	SGTU	$64, R7, R8
   362	BNE	R8, xor_32_lasx_check
   363xor_64_lasx:
   364	SUBV	$64, R7
   365	XVMOVQ	(R5), X0
   366	XVMOVQ	32(R5), X1
   367	XVMOVQ	(R6), X2
   368	XVMOVQ	32(R6), X3
   369	XVXORV	X0, X2, X2
   370	XVXORV	X1, X3, X3
   371	XVMOVQ	X2, (R4)
   372	XVMOVQ	X3, 32(R4)
   373	ADDV	$64, R5
   374	ADDV	$64, R6
   375	ADDV	$64, R4
   376	BEQ	R7, end
   377
   378xor_32_lasx_check:
   379	SGTU	$32, R7, R8
   380	BNE	R8, xor_16_lasx_check
   381xor_32_lasx:
   382	SUBV	$32, R7
   383	XVMOVQ	(R5), X0
   384	XVMOVQ	(R6), X1
   385	XVXORV	X0, X1, X1
   386	XVMOVQ	X1, (R4)
   387	ADDV	$32, R5
   388	ADDV	$32, R6
   389	ADDV	$32, R4
   390	BEQ	R7, end
   391
   392xor_16_lasx_check:
   393	SGTU	$16, R7, R8
   394	BNE	R8, xor_8_check
   395xor_16_lasx:
   396	SUBV	$16, R7
   397	VMOVQ	(R5), V0
   398	VMOVQ	(R6), V1
   399	VXORV	V0, V1, V1
   400	VMOVQ	V1, (R4)
   401	ADDV	$16, R5
   402	ADDV	$16, R6
   403	ADDV	$16, R4
   404	BEQ	R7, end
   405
   406	SMALL
   407end:
   408	RET
   409

View as plain text