1// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5#include "go_asm.h"
6#include "textflag.h"
7
8// memequal(a, b unsafe.Pointer, size uintptr) bool
9TEXT runtime·memequal(SB),NOSPLIT,$0-13
10 MOVL a+0(FP), SI
11 MOVL b+4(FP), DI
12 CMPL SI, DI
13 JEQ eq
14 MOVL size+8(FP), BX
15 LEAL ret+12(FP), AX
16 JMP memeqbody<>(SB)
17eq:
18 MOVB $1, ret+12(FP)
19 RET
20
21// memequal_varlen(a, b unsafe.Pointer) bool
22TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
23 MOVL a+0(FP), SI
24 MOVL b+4(FP), DI
25 CMPL SI, DI
26 JEQ eq
27 MOVL 4(DX), BX // compiler stores size at offset 4 in the closure
28 LEAL ret+8(FP), AX
29 JMP memeqbody<>(SB)
30eq:
31 MOVB $1, ret+8(FP)
32 RET
33
34// a in SI
35// b in DI
36// count in BX
37// address of result byte in AX
38TEXT memeqbody<>(SB),NOSPLIT,$0-0
39 CMPL BX, $4
40 JB small
41
42 // 64 bytes at a time using xmm registers
43hugeloop:
44 CMPL BX, $64
45 JB bigloop
46#ifdef GO386_softfloat
47 JMP bigloop
48#endif
49 MOVOU (SI), X0
50 MOVOU (DI), X1
51 MOVOU 16(SI), X2
52 MOVOU 16(DI), X3
53 MOVOU 32(SI), X4
54 MOVOU 32(DI), X5
55 MOVOU 48(SI), X6
56 MOVOU 48(DI), X7
57 PCMPEQB X1, X0
58 PCMPEQB X3, X2
59 PCMPEQB X5, X4
60 PCMPEQB X7, X6
61 PAND X2, X0
62 PAND X6, X4
63 PAND X4, X0
64 PMOVMSKB X0, DX
65 ADDL $64, SI
66 ADDL $64, DI
67 SUBL $64, BX
68 CMPL DX, $0xffff
69 JEQ hugeloop
70 MOVB $0, (AX)
71 RET
72
73 // 4 bytes at a time using 32-bit register
74bigloop:
75 CMPL BX, $4
76 JBE leftover
77 MOVL (SI), CX
78 MOVL (DI), DX
79 ADDL $4, SI
80 ADDL $4, DI
81 SUBL $4, BX
82 CMPL CX, DX
83 JEQ bigloop
84 MOVB $0, (AX)
85 RET
86
87 // remaining 0-4 bytes
88leftover:
89 MOVL -4(SI)(BX*1), CX
90 MOVL -4(DI)(BX*1), DX
91 CMPL CX, DX
92 SETEQ (AX)
93 RET
94
95small:
96 CMPL BX, $0
97 JEQ equal
98
99 LEAL 0(BX*8), CX
100 NEGL CX
101
102 MOVL SI, DX
103 CMPB DX, $0xfc
104 JA si_high
105
106 // load at SI won't cross a page boundary.
107 MOVL (SI), SI
108 JMP si_finish
109si_high:
110 // address ends in 111111xx. Load up to bytes we want, move to correct position.
111 MOVL -4(SI)(BX*1), SI
112 SHRL CX, SI
113si_finish:
114
115 // same for DI.
116 MOVL DI, DX
117 CMPB DX, $0xfc
118 JA di_high
119 MOVL (DI), DI
120 JMP di_finish
121di_high:
122 MOVL -4(DI)(BX*1), DI
123 SHRL CX, DI
124di_finish:
125
126 SUBL SI, DI
127 SHLL CX, DI
128equal:
129 SETEQ (AX)
130 RET
View as plain text