Text file
src/runtime/memmove_arm.s
Documentation: runtime
1// Inferno's libkern/memmove-arm.s
2// https://bitbucket.org/inferno-os/inferno-os/src/master/libkern/memmove-arm.s
3//
4// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
5// Revisions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com). All rights reserved.
6// Portions Copyright 2009 The Go Authors. All rights reserved.
7//
8// Permission is hereby granted, free of charge, to any person obtaining a copy
9// of this software and associated documentation files (the "Software"), to deal
10// in the Software without restriction, including without limitation the rights
11// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12// copies of the Software, and to permit persons to whom the Software is
13// furnished to do so, subject to the following conditions:
14//
15// The above copyright notice and this permission notice shall be included in
16// all copies or substantial portions of the Software.
17//
18// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24// THE SOFTWARE.
25
26#include "textflag.h"
27
28// TE or TS are spilled to the stack during bulk register moves.
29#define TS R0
30#define TE R8
31
32// Warning: the linker will use R11 to synthesize certain instructions. Please
33// take care and double check with objdump.
34#define FROM R11
35#define N R12
36#define TMP R12 /* N and TMP don't overlap */
37#define TMP1 R5
38
39#define RSHIFT R5
40#define LSHIFT R6
41#define OFFSET R7
42
43#define BR0 R0 /* shared with TS */
44#define BW0 R1
45#define BR1 R1
46#define BW1 R2
47#define BR2 R2
48#define BW2 R3
49#define BR3 R3
50#define BW3 R4
51
52#define FW0 R1
53#define FR0 R2
54#define FW1 R2
55#define FR1 R3
56#define FW2 R3
57#define FR2 R4
58#define FW3 R4
59#define FR3 R8 /* shared with TE */
60
61// See memmove Go doc for important implementation constraints.
62
63// func memmove(to, from unsafe.Pointer, n uintptr)
64TEXT runtime·memmove(SB), NOSPLIT, $4-12
65_memmove:
66 MOVW to+0(FP), TS
67 MOVW from+4(FP), FROM
68 MOVW n+8(FP), N
69
70 ADD N, TS, TE /* to end pointer */
71
72 CMP FROM, TS
73 BLS _forward
74
75_back:
76 ADD N, FROM /* from end pointer */
77 CMP $4, N /* need at least 4 bytes to copy */
78 BLT _b1tail
79
80_b4align: /* align destination on 4 */
81 AND.S $3, TE, TMP
82 BEQ _b4aligned
83
84 MOVBU.W -1(FROM), TMP /* pre-indexed */
85 MOVBU.W TMP, -1(TE) /* pre-indexed */
86 B _b4align
87
88_b4aligned: /* is source now aligned? */
89 AND.S $3, FROM, TMP
90 BNE _bunaligned
91
92 ADD $31, TS, TMP /* do 32-byte chunks if possible */
93 MOVW TS, savedts-4(SP)
94_b32loop:
95 CMP TMP, TE
96 BLS _b4tail
97
98 MOVM.DB.W (FROM), [R0-R7]
99 MOVM.DB.W [R0-R7], (TE)
100 B _b32loop
101
102_b4tail: /* do remaining words if possible */
103 MOVW savedts-4(SP), TS
104 ADD $3, TS, TMP
105_b4loop:
106 CMP TMP, TE
107 BLS _b1tail
108
109 MOVW.W -4(FROM), TMP1 /* pre-indexed */
110 MOVW.W TMP1, -4(TE) /* pre-indexed */
111 B _b4loop
112
113_b1tail: /* remaining bytes */
114 CMP TE, TS
115 BEQ _return
116
117 MOVBU.W -1(FROM), TMP /* pre-indexed */
118 MOVBU.W TMP, -1(TE) /* pre-indexed */
119 B _b1tail
120
121_forward:
122 CMP $4, N /* need at least 4 bytes to copy */
123 BLT _f1tail
124
125_f4align: /* align destination on 4 */
126 AND.S $3, TS, TMP
127 BEQ _f4aligned
128
129 MOVBU.P 1(FROM), TMP /* implicit write back */
130 MOVBU.P TMP, 1(TS) /* implicit write back */
131 B _f4align
132
133_f4aligned: /* is source now aligned? */
134 AND.S $3, FROM, TMP
135 BNE _funaligned
136
137 SUB $31, TE, TMP /* do 32-byte chunks if possible */
138 MOVW TE, savedte-4(SP)
139_f32loop:
140 CMP TMP, TS
141 BHS _f4tail
142
143 MOVM.IA.W (FROM), [R1-R8]
144 MOVM.IA.W [R1-R8], (TS)
145 B _f32loop
146
147_f4tail:
148 MOVW savedte-4(SP), TE
149 SUB $3, TE, TMP /* do remaining words if possible */
150_f4loop:
151 CMP TMP, TS
152 BHS _f1tail
153
154 MOVW.P 4(FROM), TMP1 /* implicit write back */
155 MOVW.P TMP1, 4(TS) /* implicit write back */
156 B _f4loop
157
158_f1tail:
159 CMP TS, TE
160 BEQ _return
161
162 MOVBU.P 1(FROM), TMP /* implicit write back */
163 MOVBU.P TMP, 1(TS) /* implicit write back */
164 B _f1tail
165
166_return:
167 MOVW to+0(FP), R0
168 RET
169
170_bunaligned:
171 CMP $2, TMP /* is TMP < 2 ? */
172
173 MOVW.LT $8, RSHIFT /* (R(n)<<24)|(R(n-1)>>8) */
174 MOVW.LT $24, LSHIFT
175 MOVW.LT $1, OFFSET
176
177 MOVW.EQ $16, RSHIFT /* (R(n)<<16)|(R(n-1)>>16) */
178 MOVW.EQ $16, LSHIFT
179 MOVW.EQ $2, OFFSET
180
181 MOVW.GT $24, RSHIFT /* (R(n)<<8)|(R(n-1)>>24) */
182 MOVW.GT $8, LSHIFT
183 MOVW.GT $3, OFFSET
184
185 ADD $16, TS, TMP /* do 16-byte chunks if possible */
186 CMP TMP, TE
187 BLS _b1tail
188
189 BIC $3, FROM /* align source */
190 MOVW TS, savedts-4(SP)
191 MOVW (FROM), BR0 /* prime first block register */
192
193_bu16loop:
194 CMP TMP, TE
195 BLS _bu1tail
196
197 MOVW BR0<<LSHIFT, BW3
198 MOVM.DB.W (FROM), [BR0-BR3]
199 ORR BR3>>RSHIFT, BW3
200
201 MOVW BR3<<LSHIFT, BW2
202 ORR BR2>>RSHIFT, BW2
203
204 MOVW BR2<<LSHIFT, BW1
205 ORR BR1>>RSHIFT, BW1
206
207 MOVW BR1<<LSHIFT, BW0
208 ORR BR0>>RSHIFT, BW0
209
210 MOVM.DB.W [BW0-BW3], (TE)
211 B _bu16loop
212
213_bu1tail:
214 MOVW savedts-4(SP), TS
215 ADD OFFSET, FROM
216 B _b1tail
217
218_funaligned:
219 CMP $2, TMP
220
221 MOVW.LT $8, RSHIFT /* (R(n+1)<<24)|(R(n)>>8) */
222 MOVW.LT $24, LSHIFT
223 MOVW.LT $3, OFFSET
224
225 MOVW.EQ $16, RSHIFT /* (R(n+1)<<16)|(R(n)>>16) */
226 MOVW.EQ $16, LSHIFT
227 MOVW.EQ $2, OFFSET
228
229 MOVW.GT $24, RSHIFT /* (R(n+1)<<8)|(R(n)>>24) */
230 MOVW.GT $8, LSHIFT
231 MOVW.GT $1, OFFSET
232
233 SUB $16, TE, TMP /* do 16-byte chunks if possible */
234 CMP TMP, TS
235 BHS _f1tail
236
237 BIC $3, FROM /* align source */
238 MOVW TE, savedte-4(SP)
239 MOVW.P 4(FROM), FR3 /* prime last block register, implicit write back */
240
241_fu16loop:
242 CMP TMP, TS
243 BHS _fu1tail
244
245 MOVW FR3>>RSHIFT, FW0
246 MOVM.IA.W (FROM), [FR0,FR1,FR2,FR3]
247 ORR FR0<<LSHIFT, FW0
248
249 MOVW FR0>>RSHIFT, FW1
250 ORR FR1<<LSHIFT, FW1
251
252 MOVW FR1>>RSHIFT, FW2
253 ORR FR2<<LSHIFT, FW2
254
255 MOVW FR2>>RSHIFT, FW3
256 ORR FR3<<LSHIFT, FW3
257
258 MOVM.IA.W [FW0,FW1,FW2,FW3], (TS)
259 B _fu16loop
260
261_fu1tail:
262 MOVW savedte-4(SP), TE
263 SUB OFFSET, FROM
264 B _f1tail
View as plain text