blob: 0cf82d14f68dba4902f800e6b4e7d5a57d9118c8 [file] [log] [blame]
Elliott Hughesda46cae2018-05-24 14:40:32 -07001/* $OpenBSD: _memcpy.S,v 1.6 2016/08/06 19:16:09 guenther Exp $ */
2/* $NetBSD: _memcpy.S,v 1.4 2003/04/05 23:08:52 bjh21 Exp $ */
3
4/*-
5 * Copyright (c) 1997 The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Neil A. Carson and Mark Brinicombe
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#include <private/bionic_asm.h>
34
35 .syntax unified
36
37/*
38 * This is one fun bit of code ...
39 * Some easy listening music is suggested while trying to understand this
40 * code e.g. Iron Maiden
41 *
42 * For anyone attempting to understand it :
43 *
44 * The core code is implemented here with simple stubs for memcpy()
45 * memmove() and bcopy().
46 *
47 * All local labels are prefixed with Lmemcpy_
48 * Following the prefix a label starting f is used in the forward copy code
49 * while a label using b is used in the backwards copy code
50 * The source and destination addresses determine whether a forward or
51 * backward copy is performed.
52 * Separate bits of code are used to deal with the following situations
53 * for both the forward and backwards copy.
54 * unaligned source address
55 * unaligned destination address
56 * Separate copy routines are used to produce an optimised result for each
57 * of these cases.
58 * The copy code will use LDM/STM instructions to copy up to 32 bytes at
59 * a time where possible.
60 *
61 * Note: r12 (aka ip) can be trashed during the function along with
62 * r0-r3 although r0-r2 have defined uses i.e. src, dest, len through out.
63 * Additional registers are preserved prior to use i.e. r4, r5 & lr
64 *
65 * Apologies for the state of the comments ;-)
66 */
67
68ENTRY_PRIVATE(bsd_safe_memcpy)
69 /* Determine copy direction */
70 cmp r1, r0
71 bcc .Lmemcpy_backwards
72
73 moveq r0, #0 /* Quick abort for len=0 */
74 moveq pc, lr
75
76 stmdb sp!, {r0, lr} /* memcpy() returns dest addr */
77 subs r2, r2, #4
78 blt .Lmemcpy_fl4 /* less than 4 bytes */
79 ands r12, r0, #3
80 bne .Lmemcpy_fdestul /* oh unaligned destination addr */
81 ands r12, r1, #3
82 bne .Lmemcpy_fsrcul /* oh unaligned source addr */
83
84.Lmemcpy_ft8:
85 /* We have aligned source and destination */
86 subs r2, r2, #8
87 blt .Lmemcpy_fl12 /* less than 12 bytes (4 from above) */
88 subs r2, r2, #0x14
89 blt .Lmemcpy_fl32 /* less than 32 bytes (12 from above) */
90 stmdb sp!, {r4} /* borrow r4 */
91
92 /* blat 32 bytes at a time */
93 /* XXX for really big copies perhaps we should use more registers */
94.Lmemcpy_floop32:
95 ldmia r1!, {r3, r4, r12, lr}
96 stmia r0!, {r3, r4, r12, lr}
97 ldmia r1!, {r3, r4, r12, lr}
98 stmia r0!, {r3, r4, r12, lr}
99 subs r2, r2, #0x20
100 bge .Lmemcpy_floop32
101
102 cmn r2, #0x10
103 ldmiage r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
104 stmiage r0!, {r3, r4, r12, lr}
105 subge r2, r2, #0x10
106 ldmia sp!, {r4} /* return r4 */
107
108.Lmemcpy_fl32:
109 adds r2, r2, #0x14
110
111 /* blat 12 bytes at a time */
112.Lmemcpy_floop12:
113 ldmiage r1!, {r3, r12, lr}
114 stmiage r0!, {r3, r12, lr}
115 subsge r2, r2, #0x0c
116 bge .Lmemcpy_floop12
117
118.Lmemcpy_fl12:
119 adds r2, r2, #8
120 blt .Lmemcpy_fl4
121
122 subs r2, r2, #4
123 ldrlt r3, [r1], #4
124 strlt r3, [r0], #4
125 ldmiage r1!, {r3, r12}
126 stmiage r0!, {r3, r12}
127 subge r2, r2, #4
128
129.Lmemcpy_fl4:
130 /* less than 4 bytes to go */
131 adds r2, r2, #4
132 ldmiaeq sp!, {r0, pc} /* done */
133
134 /* copy the crud byte at a time */
135 cmp r2, #2
136 ldrb r3, [r1], #1
137 strb r3, [r0], #1
138 ldrbge r3, [r1], #1
139 strbge r3, [r0], #1
140 ldrbgt r3, [r1], #1
141 strbgt r3, [r0], #1
142 ldmia sp!, {r0, pc}
143
144 /* erg - unaligned destination */
145.Lmemcpy_fdestul:
146 rsb r12, r12, #4
147 cmp r12, #2
148
149 /* align destination with byte copies */
150 ldrb r3, [r1], #1
151 strb r3, [r0], #1
152 ldrbge r3, [r1], #1
153 strbge r3, [r0], #1
154 ldrbgt r3, [r1], #1
155 strbgt r3, [r0], #1
156 subs r2, r2, r12
157 blt .Lmemcpy_fl4 /* less the 4 bytes */
158
159 ands r12, r1, #3
160 beq .Lmemcpy_ft8 /* we have an aligned source */
161
162 /* erg - unaligned source */
163 /* This is where it gets nasty ... */
164.Lmemcpy_fsrcul:
165 bic r1, r1, #3
166 ldr lr, [r1], #4
167 cmp r12, #2
168 bgt .Lmemcpy_fsrcul3
169 beq .Lmemcpy_fsrcul2
170 cmp r2, #0x0c
171 blt .Lmemcpy_fsrcul1loop4
172 sub r2, r2, #0x0c
173 stmdb sp!, {r4, r5}
174
175.Lmemcpy_fsrcul1loop16:
176 mov r3, lr, lsr #8
177 ldmia r1!, {r4, r5, r12, lr}
178 orr r3, r3, r4, lsl #24
179 mov r4, r4, lsr #8
180 orr r4, r4, r5, lsl #24
181 mov r5, r5, lsr #8
182 orr r5, r5, r12, lsl #24
183 mov r12, r12, lsr #8
184 orr r12, r12, lr, lsl #24
185 stmia r0!, {r3-r5, r12}
186 subs r2, r2, #0x10
187 bge .Lmemcpy_fsrcul1loop16
188 ldmia sp!, {r4, r5}
189 adds r2, r2, #0x0c
190 blt .Lmemcpy_fsrcul1l4
191
192.Lmemcpy_fsrcul1loop4:
193 mov r12, lr, lsr #8
194 ldr lr, [r1], #4
195 orr r12, r12, lr, lsl #24
196 str r12, [r0], #4
197 subs r2, r2, #4
198 bge .Lmemcpy_fsrcul1loop4
199
200.Lmemcpy_fsrcul1l4:
201 sub r1, r1, #3
202 b .Lmemcpy_fl4
203
204.Lmemcpy_fsrcul2:
205 cmp r2, #0x0c
206 blt .Lmemcpy_fsrcul2loop4
207 sub r2, r2, #0x0c
208 stmdb sp!, {r4, r5}
209
210.Lmemcpy_fsrcul2loop16:
211 mov r3, lr, lsr #16
212 ldmia r1!, {r4, r5, r12, lr}
213 orr r3, r3, r4, lsl #16
214 mov r4, r4, lsr #16
215 orr r4, r4, r5, lsl #16
216 mov r5, r5, lsr #16
217 orr r5, r5, r12, lsl #16
218 mov r12, r12, lsr #16
219 orr r12, r12, lr, lsl #16
220 stmia r0!, {r3-r5, r12}
221 subs r2, r2, #0x10
222 bge .Lmemcpy_fsrcul2loop16
223 ldmia sp!, {r4, r5}
224 adds r2, r2, #0x0c
225 blt .Lmemcpy_fsrcul2l4
226
227.Lmemcpy_fsrcul2loop4:
228 mov r12, lr, lsr #16
229 ldr lr, [r1], #4
230 orr r12, r12, lr, lsl #16
231 str r12, [r0], #4
232 subs r2, r2, #4
233 bge .Lmemcpy_fsrcul2loop4
234
235.Lmemcpy_fsrcul2l4:
236 sub r1, r1, #2
237 b .Lmemcpy_fl4
238
239.Lmemcpy_fsrcul3:
240 cmp r2, #0x0c
241 blt .Lmemcpy_fsrcul3loop4
242 sub r2, r2, #0x0c
243 stmdb sp!, {r4, r5}
244
245.Lmemcpy_fsrcul3loop16:
246 mov r3, lr, lsr #24
247 ldmia r1!, {r4, r5, r12, lr}
248 orr r3, r3, r4, lsl #8
249 mov r4, r4, lsr #24
250 orr r4, r4, r5, lsl #8
251 mov r5, r5, lsr #24
252 orr r5, r5, r12, lsl #8
253 mov r12, r12, lsr #24
254 orr r12, r12, lr, lsl #8
255 stmia r0!, {r3-r5, r12}
256 subs r2, r2, #0x10
257 bge .Lmemcpy_fsrcul3loop16
258 ldmia sp!, {r4, r5}
259 adds r2, r2, #0x0c
260 blt .Lmemcpy_fsrcul3l4
261
262.Lmemcpy_fsrcul3loop4:
263 mov r12, lr, lsr #24
264 ldr lr, [r1], #4
265 orr r12, r12, lr, lsl #8
266 str r12, [r0], #4
267 subs r2, r2, #4
268 bge .Lmemcpy_fsrcul3loop4
269
270.Lmemcpy_fsrcul3l4:
271 sub r1, r1, #1
272 b .Lmemcpy_fl4
273
274.Lmemcpy_backwards:
275 add r1, r1, r2
276 add r0, r0, r2
277 subs r2, r2, #4
278 blt .Lmemcpy_bl4 /* less than 4 bytes */
279 ands r12, r0, #3
280 bne .Lmemcpy_bdestul /* oh unaligned destination addr */
281 ands r12, r1, #3
282 bne .Lmemcpy_bsrcul /* oh unaligned source addr */
283
284.Lmemcpy_bt8:
285 /* We have aligned source and destination */
286 subs r2, r2, #8
287 blt .Lmemcpy_bl12 /* less than 12 bytes (4 from above) */
288 stmdb sp!, {r4, lr}
289 subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */
290 blt .Lmemcpy_bl32
291
292 /* blat 32 bytes at a time */
293 /* XXX for really big copies perhaps we should use more registers */
294.Lmemcpy_bloop32:
295 ldmdb r1!, {r3, r4, r12, lr}
296 stmdb r0!, {r3, r4, r12, lr}
297 ldmdb r1!, {r3, r4, r12, lr}
298 stmdb r0!, {r3, r4, r12, lr}
299 subs r2, r2, #0x20
300 bge .Lmemcpy_bloop32
301
302.Lmemcpy_bl32:
303 cmn r2, #0x10
304 ldmdbge r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
305 stmdbge r0!, {r3, r4, r12, lr}
306 subge r2, r2, #0x10
307 adds r2, r2, #0x14
308 ldmdbge r1!, {r3, r12, lr} /* blat a remaining 12 bytes */
309 stmdbge r0!, {r3, r12, lr}
310 subge r2, r2, #0x0c
311 ldmia sp!, {r4, lr}
312
313.Lmemcpy_bl12:
314 adds r2, r2, #8
315 blt .Lmemcpy_bl4
316 subs r2, r2, #4
317 ldrlt r3, [r1, #-4]!
318 strlt r3, [r0, #-4]!
319 ldmdbge r1!, {r3, r12}
320 stmdbge r0!, {r3, r12}
321 subge r2, r2, #4
322
323.Lmemcpy_bl4:
324 /* less than 4 bytes to go */
325 adds r2, r2, #4
326 moveq pc, lr /* done */
327
328 /* copy the crud byte at a time */
329 cmp r2, #2
330 ldrb r3, [r1, #-1]!
331 strb r3, [r0, #-1]!
332 ldrbge r3, [r1, #-1]!
333 strbge r3, [r0, #-1]!
334 ldrbgt r3, [r1, #-1]!
335 strbgt r3, [r0, #-1]!
336 mov pc, lr
337
338 /* erg - unaligned destination */
339.Lmemcpy_bdestul:
340 cmp r12, #2
341
342 /* align destination with byte copies */
343 ldrb r3, [r1, #-1]!
344 strb r3, [r0, #-1]!
345 ldrbge r3, [r1, #-1]!
346 strbge r3, [r0, #-1]!
347 ldrbgt r3, [r1, #-1]!
348 strbgt r3, [r0, #-1]!
349 subs r2, r2, r12
350 blt .Lmemcpy_bl4 /* less than 4 bytes to go */
351 ands r12, r1, #3
352 beq .Lmemcpy_bt8 /* we have an aligned source */
353
354 /* erg - unaligned source */
355 /* This is where it gets nasty ... */
356.Lmemcpy_bsrcul:
357 bic r1, r1, #3
358 ldr r3, [r1, #0]
359 cmp r12, #2
360 blt .Lmemcpy_bsrcul1
361 beq .Lmemcpy_bsrcul2
362 cmp r2, #0x0c
363 blt .Lmemcpy_bsrcul3loop4
364 sub r2, r2, #0x0c
365 stmdb sp!, {r4, r5, lr}
366
367.Lmemcpy_bsrcul3loop16:
368 mov lr, r3, lsl #8
369 ldmdb r1!, {r3-r5, r12}
370 orr lr, lr, r12, lsr #24
371 mov r12, r12, lsl #8
372 orr r12, r12, r5, lsr #24
373 mov r5, r5, lsl #8
374 orr r5, r5, r4, lsr #24
375 mov r4, r4, lsl #8
376 orr r4, r4, r3, lsr #24
377 stmdb r0!, {r4, r5, r12, lr}
378 subs r2, r2, #0x10
379 bge .Lmemcpy_bsrcul3loop16
380 ldmia sp!, {r4, r5, lr}
381 adds r2, r2, #0x0c
382 blt .Lmemcpy_bsrcul3l4
383
384.Lmemcpy_bsrcul3loop4:
385 mov r12, r3, lsl #8
386 ldr r3, [r1, #-4]!
387 orr r12, r12, r3, lsr #24
388 str r12, [r0, #-4]!
389 subs r2, r2, #4
390 bge .Lmemcpy_bsrcul3loop4
391
392.Lmemcpy_bsrcul3l4:
393 add r1, r1, #3
394 b .Lmemcpy_bl4
395
396.Lmemcpy_bsrcul2:
397 cmp r2, #0x0c
398 blt .Lmemcpy_bsrcul2loop4
399 sub r2, r2, #0x0c
400 stmdb sp!, {r4, r5, lr}
401
402.Lmemcpy_bsrcul2loop16:
403 mov lr, r3, lsl #16
404 ldmdb r1!, {r3-r5, r12}
405 orr lr, lr, r12, lsr #16
406 mov r12, r12, lsl #16
407 orr r12, r12, r5, lsr #16
408 mov r5, r5, lsl #16
409 orr r5, r5, r4, lsr #16
410 mov r4, r4, lsl #16
411 orr r4, r4, r3, lsr #16
412 stmdb r0!, {r4, r5, r12, lr}
413 subs r2, r2, #0x10
414 bge .Lmemcpy_bsrcul2loop16
415 ldmia sp!, {r4, r5, lr}
416 adds r2, r2, #0x0c
417 blt .Lmemcpy_bsrcul2l4
418
419.Lmemcpy_bsrcul2loop4:
420 mov r12, r3, lsl #16
421 ldr r3, [r1, #-4]!
422 orr r12, r12, r3, lsr #16
423 str r12, [r0, #-4]!
424 subs r2, r2, #4
425 bge .Lmemcpy_bsrcul2loop4
426
427.Lmemcpy_bsrcul2l4:
428 add r1, r1, #2
429 b .Lmemcpy_bl4
430
431.Lmemcpy_bsrcul1:
432 cmp r2, #0x0c
433 blt .Lmemcpy_bsrcul1loop4
434 sub r2, r2, #0x0c
435 stmdb sp!, {r4, r5, lr}
436
437.Lmemcpy_bsrcul1loop32:
438 mov lr, r3, lsl #24
439 ldmdb r1!, {r3-r5, r12}
440 orr lr, lr, r12, lsr #8
441 mov r12, r12, lsl #24
442 orr r12, r12, r5, lsr #8
443 mov r5, r5, lsl #24
444 orr r5, r5, r4, lsr #8
445 mov r4, r4, lsl #24
446 orr r4, r4, r3, lsr #8
447 stmdb r0!, {r4, r5, r12, lr}
448 subs r2, r2, #0x10
449 bge .Lmemcpy_bsrcul1loop32
450 ldmia sp!, {r4, r5, lr}
451 adds r2, r2, #0x0c
452 blt .Lmemcpy_bsrcul1l4
453
454.Lmemcpy_bsrcul1loop4:
455 mov r12, r3, lsl #24
456 ldr r3, [r1, #-4]!
457 orr r12, r12, r3, lsr #8
458 str r12, [r0, #-4]!
459 subs r2, r2, #4
460 bge .Lmemcpy_bsrcul1loop4
461
462.Lmemcpy_bsrcul1l4:
463 add r1, r1, #1
464 b .Lmemcpy_bl4
465END(bsd_safe_memcpy)
466
Haibo Huangea9957a2018-11-19 11:00:32 -0800467ENTRY(memmove_generic)
Elliott Hughesda46cae2018-05-24 14:40:32 -0700468 stmfd sp!, {r0, lr}
469 bl bsd_safe_memcpy
470 ldmfd sp!, {r0, pc}
Haibo Huangea9957a2018-11-19 11:00:32 -0800471END(memmove_generic)