blob: 05decc0c219ba50284370467fcdd0d06c86c0977 [file] [log] [blame]
Elliott Hughesd5ac40c2022-11-16 18:50:54 +00001/* Copyright (c) 2012-2013, Linaro Limited
2 All rights reserved.
3
4 Redistribution and use in source and binary forms, with or without
5 modification, are permitted provided that the following conditions are met:
6 * Redistributions of source code must retain the above copyright
7 notice, this list of conditions and the following disclaimer.
8 * Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11 * Neither the name of the Linaro nor the
12 names of its contributors may be used to endorse or promote products
13 derived from this software without specific prior written permission.
14
15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
19 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
21 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
26
Christopher Ferris8cf61da2014-09-24 17:05:20 -070027/*
Elliott Hughesd5ac40c2022-11-16 18:50:54 +000028 * Copyright (c) 2015 ARM Ltd
Christopher Ferris8cf61da2014-09-24 17:05:20 -070029 * All rights reserved.
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +010030 *
Christopher Ferris8cf61da2014-09-24 17:05:20 -070031 * Redistribution and use in source and binary forms, with or without
32 * modification, are permitted provided that the following conditions
33 * are met:
Elliott Hughesd5ac40c2022-11-16 18:50:54 +000034 * 1. Redistributions of source code must retain the above copyright
Christopher Ferris8cf61da2014-09-24 17:05:20 -070035 * notice, this list of conditions and the following disclaimer.
Elliott Hughesd5ac40c2022-11-16 18:50:54 +000036 * 2. Redistributions in binary form must reproduce the above copyright
37 * notice, this list of conditions and the following disclaimer in the
38 * documentation and/or other materials provided with the distribution.
39 * 3. The name of the company may not be used to endorse or promote
40 * products derived from this software without specific prior written
41 * permission.
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +010042 *
Elliott Hughesd5ac40c2022-11-16 18:50:54 +000043 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
44 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
45 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
46 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
47 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
48 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
49 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
50 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
51 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
52 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +010053 */
54
Elliott Hughesd5ac40c2022-11-16 18:50:54 +000055/* Assumptions:
56 *
57 * ARMv8-a, AArch64, unaligned accesses.
58 *
59 */
60
61#include <private/bionic_asm.h>
62
63#define dstin x0
64#define src x1
65#define count x2
66#define dst x3
67#define srcend x4
68#define dstend x5
69#define A_l x6
70#define A_lw w6
71#define A_h x7
72#define A_hw w7
73#define B_l x8
74#define B_lw w8
75#define B_h x9
76#define C_l x10
77#define C_h x11
78#define D_l x12
79#define D_h x13
80#define E_l src
81#define E_h count
82#define F_l srcend
83#define F_h dst
84#define tmp1 x9
85
86#define L(l) .L ## l
87
88/* Copies are split into 3 main cases: small copies of up to 16 bytes,
89 medium copies of 17..96 bytes which are fully unrolled. Large copies
90 of more than 96 bytes align the destination and use an unrolled loop
91 processing 64 bytes per iteration.
92 Small and medium copies read all data before writing, allowing any
93 kind of overlap, and memmove tailcalls memcpy for these cases as
94 well as non-overlapping copies.
95*/
96
Christopher Ferris8cf61da2014-09-24 17:05:20 -070097// Prototype: void *memcpy (void *dst, const void *src, size_t count).
98
Bernhard Rosenkraenzer7e4fa562014-03-05 11:40:57 +010099#include <private/bionic_asm.h>
100
Haibo Huang8a0f0ed2018-05-24 20:39:18 -0700101ENTRY(__memcpy)
Elliott Hughesd5ac40c2022-11-16 18:50:54 +0000102 prfm PLDL1KEEP, [src]
103 add srcend, src, count
104 add dstend, dstin, count
105 cmp count, 16
106 b.ls L(copy16)
107 cmp count, 96
108 b.hi L(copy_long)
109
110 /* Medium copies: 17..96 bytes. */
111 sub tmp1, count, 1
112 ldp A_l, A_h, [src]
113 tbnz tmp1, 6, L(copy96)
114 ldp D_l, D_h, [srcend, -16]
115 tbz tmp1, 5, 1f
116 ldp B_l, B_h, [src, 16]
117 ldp C_l, C_h, [srcend, -32]
118 stp B_l, B_h, [dstin, 16]
119 stp C_l, C_h, [dstend, -32]
1201:
121 stp A_l, A_h, [dstin]
122 stp D_l, D_h, [dstend, -16]
123 ret
124
125 .p2align 4
126
127 /* Small copies: 0..16 bytes. */
128L(copy16):
129 cmp count, 8
130 b.lo 1f
131 ldr A_l, [src]
132 ldr A_h, [srcend, -8]
133 str A_l, [dstin]
134 str A_h, [dstend, -8]
135 ret
136 .p2align 4
1371:
138 tbz count, 2, 1f
139 ldr A_lw, [src]
140 ldr A_hw, [srcend, -4]
141 str A_lw, [dstin]
142 str A_hw, [dstend, -4]
143 ret
144
145 /* Copy 0..3 bytes. Use a branchless sequence that copies the same
146 byte 3 times if count==1, or the 2nd byte twice if count==2. */
1471:
148 cbz count, 2f
149 lsr tmp1, count, 1
150 ldrb A_lw, [src]
151 ldrb A_hw, [srcend, -1]
152 ldrb B_lw, [src, tmp1]
153 strb A_lw, [dstin]
154 strb B_lw, [dstin, tmp1]
155 strb A_hw, [dstend, -1]
1562: ret
157
158 .p2align 4
159 /* Copy 64..96 bytes. Copy 64 bytes from the start and
160 32 bytes from the end. */
161L(copy96):
162 ldp B_l, B_h, [src, 16]
163 ldp C_l, C_h, [src, 32]
164 ldp D_l, D_h, [src, 48]
165 ldp E_l, E_h, [srcend, -32]
166 ldp F_l, F_h, [srcend, -16]
167 stp A_l, A_h, [dstin]
168 stp B_l, B_h, [dstin, 16]
169 stp C_l, C_h, [dstin, 32]
170 stp D_l, D_h, [dstin, 48]
171 stp E_l, E_h, [dstend, -32]
172 stp F_l, F_h, [dstend, -16]
173 ret
174
175 /* Align DST to 16 byte alignment so that we don't cross cache line
176 boundaries on both loads and stores. There are at least 96 bytes
177 to copy, so copy 16 bytes unaligned and then align. The loop
178 copies 64 bytes per iteration and prefetches one iteration ahead. */
179
180 .p2align 4
181L(copy_long):
182 and tmp1, dstin, 15
183 bic dst, dstin, 15
184 ldp D_l, D_h, [src]
185 sub src, src, tmp1
186 add count, count, tmp1 /* Count is now 16 too large. */
187 ldp A_l, A_h, [src, 16]
188 stp D_l, D_h, [dstin]
189 ldp B_l, B_h, [src, 32]
190 ldp C_l, C_h, [src, 48]
191 ldp D_l, D_h, [src, 64]!
192 subs count, count, 128 + 16 /* Test and readjust count. */
193 b.ls 2f
1941:
195 stp A_l, A_h, [dst, 16]
196 ldp A_l, A_h, [src, 16]
197 stp B_l, B_h, [dst, 32]
198 ldp B_l, B_h, [src, 32]
199 stp C_l, C_h, [dst, 48]
200 ldp C_l, C_h, [src, 48]
201 stp D_l, D_h, [dst, 64]!
202 ldp D_l, D_h, [src, 64]!
203 subs count, count, 64
204 b.hi 1b
205
206 /* Write the last full set of 64 bytes. The remainder is at most 64
207 bytes, so it is safe to always copy 64 bytes from the end even if
208 there is just 1 byte left. */
2092:
210 ldp E_l, E_h, [srcend, -64]
211 stp A_l, A_h, [dst, 16]
212 ldp A_l, A_h, [srcend, -48]
213 stp B_l, B_h, [dst, 32]
214 ldp B_l, B_h, [srcend, -32]
215 stp C_l, C_h, [dst, 48]
216 ldp C_l, C_h, [srcend, -16]
217 stp D_l, D_h, [dst, 64]
218 stp E_l, E_h, [dstend, -64]
219 stp A_l, A_h, [dstend, -48]
220 stp B_l, B_h, [dstend, -32]
221 stp C_l, C_h, [dstend, -16]
222 ret
223
Haibo Huang8a0f0ed2018-05-24 20:39:18 -0700224END(__memcpy)
Tamas Petzf5bdee72020-08-31 15:09:40 +0200225
226NOTE_GNU_PROPERTY()