Text file
src/runtime/memmove_ppc64x.s
Documentation: runtime
1// Copyright 2014 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5//go:build ppc64 || ppc64le
6
7#include "textflag.h"
8
9// See memmove Go doc for important implementation constraints.
10
11// func memmove(to, from unsafe.Pointer, n uintptr)
12
13// target address
14#define TGT R3
15// source address
16#define SRC R4
17// length to move
18#define LEN R5
19// number of doublewords
20#define DWORDS R6
21// number of bytes < 8
22#define BYTES R7
23// const 16 used as index
24#define IDX16 R8
25// temp used for copies, etc.
26#define TMP R9
27// number of 64 byte chunks
28#define QWORDS R10
29// index values
30#define IDX32 R14
31#define IDX48 R15
32#define OCTWORDS R16
33
34TEXT runtime·memmove<ABIInternal>(SB), NOSPLIT|NOFRAME, $0-24
35 // R3 = TGT = to
36 // R4 = SRC = from
37 // R5 = LEN = n
38
39 // Determine if there are doublewords to
40 // copy so a more efficient move can be done
41check:
42#ifdef GOPPC64_power10
43 CMP LEN, $16
44 BGT mcopy
45 SLD $56, LEN, TMP
46 LXVL SRC, TMP, V0
47 STXVL V0, TGT, TMP
48 RET
49#endif
50mcopy:
51 ANDCC $7, LEN, BYTES // R7: bytes to copy
52 SRD $3, LEN, DWORDS // R6: double words to copy
53 MOVFL CR0, CR3 // save CR from ANDCC
54 CMP DWORDS, $0, CR1 // CR1[EQ] set if no double words to copy
55
56 // Determine overlap by subtracting dest - src and comparing against the
57 // length. This catches the cases where src and dest are in different types
58 // of storage such as stack and static to avoid doing backward move when not
59 // necessary.
60
61 SUB SRC, TGT, TMP // dest - src
62 CMPU TMP, LEN, CR2 // < len?
63 BC 12, 8, backward // BLT CR2 backward
64
65 // Copying forward if no overlap.
66
67 BC 12, 6, checkbytes // BEQ CR1, checkbytes
68 SRDCC $3, DWORDS, OCTWORDS // 64 byte chunks?
69 MOVD $16, IDX16
70 BEQ lt64gt8 // < 64 bytes
71
72 // Prepare for moves of 64 bytes at a time.
73
74forward64setup:
75 DCBTST (TGT) // prepare data cache
76 DCBT (SRC)
77 MOVD OCTWORDS, CTR // Number of 64 byte chunks
78 MOVD $32, IDX32
79 MOVD $48, IDX48
80 PCALIGN $16
81
82forward64:
83 LXVD2X (R0)(SRC), VS32 // load 64 bytes
84 LXVD2X (IDX16)(SRC), VS33
85 LXVD2X (IDX32)(SRC), VS34
86 LXVD2X (IDX48)(SRC), VS35
87 ADD $64, SRC
88 STXVD2X VS32, (R0)(TGT) // store 64 bytes
89 STXVD2X VS33, (IDX16)(TGT)
90 STXVD2X VS34, (IDX32)(TGT)
91 STXVD2X VS35, (IDX48)(TGT)
92 ADD $64,TGT // bump up for next set
93 BC 16, 0, forward64 // continue
94 ANDCC $7, DWORDS // remaining doublewords
95 BEQ checkbytes // only bytes remain
96
97lt64gt8:
98 CMP DWORDS, $4
99 BLT lt32gt8
100 LXVD2X (R0)(SRC), VS32
101 LXVD2X (IDX16)(SRC), VS33
102 ADD $-4, DWORDS
103 STXVD2X VS32, (R0)(TGT)
104 STXVD2X VS33, (IDX16)(TGT)
105 ADD $32, SRC
106 ADD $32, TGT
107
108lt32gt8:
109 // At this point >= 8 and < 32
110 // Move 16 bytes if possible
111 CMP DWORDS, $2
112 BLT lt16
113 LXVD2X (R0)(SRC), VS32
114 ADD $-2, DWORDS
115 STXVD2X VS32, (R0)(TGT)
116 ADD $16, SRC
117 ADD $16, TGT
118
119lt16: // Move 8 bytes if possible
120 CMP DWORDS, $1
121 BLT checkbytes
122#ifdef GOPPC64_power10
123 ADD $8, BYTES
124 SLD $56, BYTES, TMP
125 LXVL SRC, TMP, V0
126 STXVL V0, TGT, TMP
127 RET
128#endif
129
130 MOVD 0(SRC), TMP
131 ADD $8, SRC
132 MOVD TMP, 0(TGT)
133 ADD $8, TGT
134checkbytes:
135 BC 12, 14, LR // BEQ lr
136#ifdef GOPPC64_power10
137 SLD $56, BYTES, TMP
138 LXVL SRC, TMP, V0
139 STXVL V0, TGT, TMP
140 RET
141#endif
142lt8: // Move word if possible
143 CMP BYTES, $4
144 BLT lt4
145 MOVWZ 0(SRC), TMP
146 ADD $-4, BYTES
147 MOVW TMP, 0(TGT)
148 ADD $4, SRC
149 ADD $4, TGT
150lt4: // Move halfword if possible
151 CMP BYTES, $2
152 BLT lt2
153 MOVHZ 0(SRC), TMP
154 ADD $-2, BYTES
155 MOVH TMP, 0(TGT)
156 ADD $2, SRC
157 ADD $2, TGT
158lt2: // Move last byte if 1 left
159 CMP BYTES, $1
160 BC 12, 0, LR // ble lr
161 MOVBZ 0(SRC), TMP
162 MOVBZ TMP, 0(TGT)
163 RET
164
165backward:
166 // Copying backwards proceeds by copying R7 bytes then copying R6 double words.
167 // R3 and R4 are advanced to the end of the destination/source buffers
168 // respectively and moved back as we copy.
169
170 ADD LEN, SRC, SRC // end of source
171 ADD TGT, LEN, TGT // end of dest
172
173 BEQ nobackwardtail // earlier condition
174
175 MOVD BYTES, CTR // bytes to move
176
177backwardtailloop:
178 MOVBZ -1(SRC), TMP // point to last byte
179 SUB $1,SRC
180 MOVBZ TMP, -1(TGT)
181 SUB $1,TGT
182 BDNZ backwardtailloop
183
184nobackwardtail:
185 BC 4, 5, LR // blelr cr1, return if DWORDS == 0
186 SRDCC $2,DWORDS,QWORDS // Compute number of 32B blocks and compare to 0
187 BNE backward32setup // If QWORDS != 0, start the 32B copy loop.
188
189backward24:
190 // DWORDS is a value between 1-3.
191 CMP DWORDS, $2
192
193 MOVD -8(SRC), TMP
194 MOVD TMP, -8(TGT)
195 BC 12, 0, LR // bltlr, return if DWORDS == 1
196
197 MOVD -16(SRC), TMP
198 MOVD TMP, -16(TGT)
199 BC 12, 2, LR // beqlr, return if DWORDS == 2
200
201 MOVD -24(SRC), TMP
202 MOVD TMP, -24(TGT)
203 RET
204
205backward32setup:
206 ANDCC $3,DWORDS // Compute remaining DWORDS and compare to 0
207 MOVD QWORDS, CTR // set up loop ctr
208 MOVD $16, IDX16 // 32 bytes at a time
209 PCALIGN $16
210
211backward32loop:
212 SUB $32, TGT
213 SUB $32, SRC
214 LXVD2X (R0)(SRC), VS32 // load 16x2 bytes
215 LXVD2X (IDX16)(SRC), VS33
216 STXVD2X VS32, (R0)(TGT) // store 16x2 bytes
217 STXVD2X VS33, (IDX16)(TGT)
218 BDNZ backward32loop
219 BC 12, 2, LR // beqlr, return if DWORDS == 0
220 BR backward24
View as plain text