
Text file src/internal/bytealg/equal_ppc64x.s

Documentation: internal/bytealg

     1// Copyright 2018 The Go Authors. All rights reserved.
     2// Use of this source code is governed by a BSD-style
     3// license that can be found in the LICENSE file.
     5//go:build ppc64 || ppc64le
     7#include "go_asm.h"
     8#include "textflag.h"
    10// 4K (smallest case) page size offset mask for PPC64.
    11#define PAGE_OFFSET 4095
    13// Likewise, the BC opcode is hard to read, and no extended
    14// mnemonics are offered for these forms.
    15#define BGELR_CR6 BC  4, CR6LT, (LR)
    16#define BEQLR     BC 12, CR0EQ, (LR)
    18// memequal(a, b unsafe.Pointer, size uintptr) bool
    19TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25
    20	// R3 = a
    21	// R4 = b
    22	// R5 = size
    23	BR	memeqbody<>(SB)
    25// memequal_varlen(a, b unsafe.Pointer) bool
    26TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-17
    27	// R3 = a
    28	// R4 = b
    29	CMP	R3, R4
    30	BEQ	eq
    31	MOVD	8(R11), R5    // compiler stores size at offset 8 in the closure
    32	BR	memeqbody<>(SB)
    34	MOVD	$1, R3
    35	RET
    37// Do an efficient memequal for ppc64
    38// R3 = s1
    39// R4 = s2
    40// R5 = len
    41// On exit:
    42// R3 = return value
    43TEXT memeqbody<>(SB),NOSPLIT|NOFRAME,$0-0
    44	MOVD	R3, R8		// Move s1 into R8
    45	ADD	R5, R3, R9	// &s1[len(s1)]
    46	ADD	R5, R4, R10	// &s2[len(s2)]
    47	MOVD	$1, R11
    48	CMP	R5, $16		// Use GPR checks for check for len <= 16
    49	BLE	check0_16
    50	MOVD	$0, R3		// Assume no-match in case BGELR CR6 returns
    51	CMP	R5, $32		// Use overlapping VSX loads for len <= 32
    52	BLE	check17_32	// Do a pair of overlapping VSR compares
    53	CMP	R5, $64
    54	BLE	check33_64	// Hybrid check + overlap compare.
    57	SRD	$6, R5, R6	// number of 64 byte chunks to compare
    58	MOVD	R6, CTR
    59	MOVD	$16, R14	// index for VSX loads and stores
    60	MOVD	$32, R15
    61	MOVD	$48, R16
    62	ANDCC	$0x3F, R5, R5	// len%64==0?
    64	PCALIGN $16
    66	LXVD2X	(R8+R0), V0
    67	LXVD2X	(R4+R0), V1
    68	VCMPEQUBCC V0, V1, V2	// compare, setting CR6
    69	BGELR_CR6
    70	LXVD2X	(R8+R14), V0
    71	LXVD2X	(R4+R14), V1
    72	VCMPEQUBCC	V0, V1, V2
    73	BGELR_CR6
    74	LXVD2X	(R8+R15), V0
    75	LXVD2X	(R4+R15), V1
    76	VCMPEQUBCC	V0, V1, V2
    77	BGELR_CR6
    78	LXVD2X	(R8+R16), V0
    79	LXVD2X	(R4+R16), V1
    80	VCMPEQUBCC	V0, V1, V2
    81	BGELR_CR6
    82	ADD	$64,R8		// bump up to next 64
    83	ADD	$64,R4
    84	BDNZ	loop64
    86	ISEL	CR0EQ, R11, R3, R3	// If no tail, return 1, otherwise R3 remains 0.
    87	BEQLR				// return if no tail.
    89	ADD	$-64, R9, R8
    90	ADD	$-64, R10, R4
    91	LXVD2X	(R8+R0), V0
    92	LXVD2X	(R4+R0), V1
    93	VCMPEQUBCC	V0, V1, V2
    94	BGELR_CR6
    95	LXVD2X	(R8+R14), V0
    96	LXVD2X	(R4+R14), V1
    97	VCMPEQUBCC	V0, V1, V2
    98	BGELR_CR6
    99	LXVD2X	(R8+R15), V0
   100	LXVD2X	(R4+R15), V1
   101	VCMPEQUBCC	V0, V1, V2
   102	BGELR_CR6
   103	LXVD2X	(R8+R16), V0
   104	LXVD2X	(R4+R16), V1
   105	VCMPEQUBCC	V0, V1, V2
   106	ISEL	CR6LT, R11, R0, R3
   107	RET
   110	// Bytes 0-15
   111	LXVD2X	(R8+R0), V0
   112	LXVD2X	(R4+R0), V1
   113	VCMPEQUBCC	V0, V1, V2
   114	BGELR_CR6
   115	ADD	$16, R8
   116	ADD	$16, R4
   118	// Bytes 16-31
   119	LXVD2X	(R8+R0), V0
   120	LXVD2X	(R4+R0), V1
   121	VCMPEQUBCC	V0, V1, V2
   122	BGELR_CR6
   124	// A little tricky, but point R4,R8 to &sx[len-32],
   125	// and reuse check17_32 to check the next 1-31 bytes (with some overlap)
   126	ADD	$-32, R9, R8
   127	ADD	$-32, R10, R4
   128	// Fallthrough
   131	LXVD2X	(R8+R0), V0
   132	LXVD2X	(R4+R0), V1
   133	VCMPEQUBCC	V0, V1, V2
   134	ISEL	CR6LT, R11, R0, R5
   136	// Load sX[len(sX)-16:len(sX)] and compare.
   137	ADD	$-16, R9
   138	ADD	$-16, R10
   139	LXVD2X	(R9+R0), V0
   140	LXVD2X	(R10+R0), V1
   141	VCMPEQUBCC	V0, V1, V2
   142	ISEL	CR6LT, R5, R0, R3
   143	RET
   146#ifdef GOPPC64_power10
   147	SLD	$56, R5, R7
   148	LXVL	R8, R7, V0
   149	LXVL	R4, R7, V1
   150	VCMPEQUDCC	V0, V1, V2
   151	ISEL	CR6LT, R11, R0, R3
   152	RET
   154	CMP	R5, $8
   155	BLT	check0_7
   156	// Load sX[0:7] and compare.
   157	MOVD	(R8), R6
   158	MOVD	(R4), R7
   159	CMP	R6, R7
   160	ISEL	CR0EQ, R11, R0, R5
   161	// Load sX[len(sX)-8:len(sX)] and compare.
   162	MOVD	-8(R9), R6
   163	MOVD	-8(R10), R7
   164	CMP	R6, R7
   165	ISEL	CR0EQ, R5, R0, R3
   166	RET
   169	CMP	R5,$0
   170	MOVD	$1, R3
   171	BEQLR		// return if len == 0
   173	// Check < 8B loads with a single compare, but select the load address
   174	// such that it cannot cross a page boundary. Load a few bytes from the
   175	// lower address if that does not cross the lower page. Or, load a few
   176	// extra bytes from the higher addresses. And align those values
   177	// consistently in register as either address may have differing
   178	// alignment requirements.
   181	SUBC	R5, $8, R12		// 8-len
   182	SLD	$3, R12, R14		// (8-len)*8
   183	CMPU	R6, R12, CR1		// Enough bytes lower in the page to load lower?
   184	CMPU	R9, R12, CR0
   185	SUB	R12, R8, R6		// compute lower load address
   186	SUB	R12, R4, R9
   187	ISEL	CR1LT, R8, R6, R8	// R8 = R6 < 0 ? R8 (&s1) : R6 (&s1 - (8-len))
   188	ISEL	CR0LT, R4, R9, R4	// Similar for s2
   189	MOVD	(R8), R15
   190	MOVD	(R4), R16
   191	SLD	R14, R15, R7
   192	SLD	R14, R16, R17
   193	SRD	R14, R7, R7		// Clear the upper (8-len) bytes (with 2 shifts)
   194	SRD	R14, R17, R17
   195	SRD	R14, R15, R6		// Clear the lower (8-len) bytes
   196	SRD	R14, R16, R9
   197#ifdef GOARCH_ppc64le
   198	ISEL	CR1LT, R7, R6, R8      // Choose the correct len bytes to compare based on alignment
   199	ISEL	CR0LT, R17, R9, R4
   201	ISEL	CR1LT, R6, R7, R8
   202	ISEL	CR0LT, R9, R17, R4
   204	CMP	R4, R8
   205	ISEL	CR0EQ, R11, R0, R3
   206	RET
   207#endif	// tail processing if !defined(GOPPC64_power10)

View as plain text