blob: 3d753b6484b2d07d444663e38387ef26e74399a5 [file] [log] [blame]
Christopher Ferris753eb7f2014-06-27 15:26:15 -07001/*
2 * Copyright (C) 2014 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in
12 * the documentation and/or other materials provided with the
13 * distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28/* Copyright (c) 2014, Linaro Limited
29 All rights reserved.
30
31 Redistribution and use in source and binary forms, with or without
32 modification, are permitted provided that the following conditions are met:
33 * Redistributions of source code must retain the above copyright
34 notice, this list of conditions and the following disclaimer.
35 * Redistributions in binary form must reproduce the above copyright
36 notice, this list of conditions and the following disclaimer in the
37 documentation and/or other materials provided with the distribution.
38 * Neither the name of the Linaro nor the
39 names of its contributors may be used to endorse or promote products
40 derived from this software without specific prior written permission.
41
42 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
43 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
44 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
45 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
46 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
47 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
48 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
49 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
50 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
51 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
52 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
53*/
54
55/* Assumptions:
56 *
57 * ARMv8-a, AArch64
58 */
59
60#if !defined(STPCPY) && !defined(STRCPY)
61#error "Either STPCPY or STRCPY must be defined."
62#endif
63
64#include <private/bionic_asm.h>
65
66/* Arguments and results. */
67#if defined(STPCPY)
68#define dst x0
69#elif defined(STRCPY)
70#define dstin x0
71#endif
72#define src x1
73
74/* Locals and temporaries. */
75#if defined(STRCPY)
76#define dst x2
77#endif
78#define data1 x3
79#define data1_w w3
80#define data2 x4
81#define data2_w w4
82#define has_nul1 x5
83#define has_nul1_w w5
84#define has_nul2 x6
85#define tmp1 x7
86#define tmp2 x8
87#define tmp3 x9
88#define tmp4 x10
89#define zeroones x11
90#define zeroones_w w11
91#define pos x12
92
93#define REP8_01 0x0101010101010101
94#define REP8_7f 0x7f7f7f7f7f7f7f7f
95#define REP8_80 0x8080808080808080
96
97#if defined(STPCPY)
98ENTRY(stpcpy)
99#elif defined(STRCPY)
100ENTRY(strcpy)
101#endif
102 mov zeroones, #REP8_01
103#if defined(STRCPY)
104 mov dst, dstin
105#endif
106 ands tmp1, src, #15
107 b.ne .Lmisaligned
108 // NUL detection works on the principle that (X - 1) & (~X) & 0x80
109 // (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
110 // can be done in parallel across the entire word.
111 // The inner loop deals with two Dwords at a time. This has a
112 // slightly higher start-up cost, but we should win quite quickly,
113 // especially on cores with a high number of issue slots per
114 // cycle, as we get much better parallelism out of the operations.
115.Lloop:
116 ldp data1, data2, [src], #16
117 sub tmp1, data1, zeroones
118 orr tmp2, data1, #REP8_7f
119 bic has_nul1, tmp1, tmp2
120 cbnz has_nul1, .Lnul_in_data1
121 sub tmp3, data2, zeroones
122 orr tmp4, data2, #REP8_7f
123 bic has_nul2, tmp3, tmp4
124 cbnz has_nul2, .Lnul_in_data2
125 // No NUL in either register, copy it in a single instruction.
126 stp data1, data2, [dst], #16
127 b .Lloop
128
129.Lnul_in_data1:
130 rev has_nul1, has_nul1
131 clz pos, has_nul1
132 add tmp1, pos, #0x8
133
134 tbz tmp1, #6, 1f
135#if defined(STPCPY)
136 str data1, [dst], #7
137#elif defined(STRCPY)
138 str data1, [dst]
139#endif
140 ret
1411:
142 tbz tmp1, #5, 1f
143 str data1_w, [dst], #4
144 lsr data1, data1, #32
1451:
146 tbz tmp1, #4, 1f
147 strh data1_w, [dst], #2
148 lsr data1, data1, #16
1491:
150 tbz tmp1, #3, 1f
151 strb data1_w, [dst]
152#if defined(STPCPY)
153 ret
154#endif
1551:
156#if defined(STPCPY)
157 // Back up one so that dst points to the '\0' string terminator.
158 sub dst, dst, #1
159#endif
160 ret
161
162.Lnul_in_data2:
163 str data1, [dst], #8
164 rev has_nul2, has_nul2
165 clz pos, has_nul2
166 add tmp1, pos, #0x8
167
168 tbz tmp1, #6, 1f
169#if defined(STPCPY)
170 str data2, [dst], #7
171#elif defined(STRCPY)
172 str data2, [dst]
173#endif
174 ret
1751:
176 tbz tmp1, #5, 1f
177 str data2_w, [dst], #4
178 lsr data2, data2, #32
1791:
180 tbz tmp1, #4, 1f
181 strh data2_w, [dst], #2
182 lsr data2, data2, #16
1831:
184 tbz tmp1, #3, 1f
185 strb data2_w, [dst]
186#if defined(STPCPY)
187 ret
188#endif
1891:
190#if defined(STPCPY)
191 // Back up one so that dst points to the '\0' string terminator.
192 sub dst, dst, #1
193#endif
194 ret
195
196.Lmisaligned:
197 tbz src, #0, 1f
198 ldrb data1_w, [src], #1
199 strb data1_w, [dst], #1
200 cbnz data1_w, 1f
201#if defined(STPCPY)
202 // Back up one so that dst points to the '\0' string terminator.
203 sub dst, dst, #1
204#endif
205 ret
2061:
207 tbz src, #1, 1f
208 ldrb data1_w, [src], #1
209 strb data1_w, [dst], #1
210 cbz data1_w, .Ldone
211 ldrb data2_w, [src], #1
212 strb data2_w, [dst], #1
213 cbnz data2_w, 1f
214.Ldone:
215#if defined(STPCPY)
216 // Back up one so that dst points to the '\0' string terminator.
217 sub dst, dst, #1
218#endif
219 ret
2201:
221 tbz src, #2, 1f
222 ldr data1_w, [src], #4
223 // Check for a zero.
224 sub has_nul1_w, data1_w, zeroones_w
225 bic has_nul1_w, has_nul1_w, data1_w
226 ands has_nul1_w, has_nul1_w, #0x80808080
227 b.ne .Lnul_in_data1
228 str data1_w, [dst], #4
2291:
230 tbz src, #3, .Lloop
231 ldr data1, [src], #8
232 // Check for a zero.
233 sub tmp1, data1, zeroones
234 orr tmp2, data1, #REP8_7f
235 bics has_nul1, tmp1, tmp2
236 b.ne .Lnul_in_data1
237 str data1, [dst], #8
238 b .Lloop
239#if defined(STPCPY)
240END(stpcpy)
241#elif defined(STRCPY)
242END(strcpy)
243#endif