blob: 9fd72e9ea080e52aa16180baaa9fd42f4e0e22ac [file] [log] [blame]
The Android Open Source Project1dc9e472009-03-03 19:28:35 -08001/*
2 * Copyright (C) 2008 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in
12 * the documentation and/or other materials provided with the
13 * distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
Elliott Hughes851e68a2014-02-19 16:53:20 -080029#include <private/bionic_asm.h>
The Android Open Source Project1dc9e472009-03-03 19:28:35 -080030
Henrik Smiding3ebd31c2010-11-05 15:09:37 +010031
32#ifdef HAVE_32_BYTE_CACHE_LINE
33#define CACHE_LINE_SIZE 32
34#else
35#define CACHE_LINE_SIZE 64
36#endif
37
The Android Open Source Project1dc9e472009-03-03 19:28:35 -080038/*
Henrik Smiding3ebd31c2010-11-05 15:09:37 +010039 * Optimized memcmp() for Cortex-A9.
The Android Open Source Project1dc9e472009-03-03 19:28:35 -080040 */
41
Chih-Hung Hsieh33f33512015-05-11 11:21:19 -070042.syntax unified
43
Kenny Root420878c2011-02-16 11:55:58 -080044ENTRY(memcmp)
Henrik Smiding3ebd31c2010-11-05 15:09:37 +010045 pld [r0, #(CACHE_LINE_SIZE * 0)]
46 pld [r0, #(CACHE_LINE_SIZE * 1)]
The Android Open Source Project1dc9e472009-03-03 19:28:35 -080047
48 /* take of the case where length is 0 or the buffers are the same */
49 cmp r0, r1
The Android Open Source Project1dc9e472009-03-03 19:28:35 -080050 moveq r0, #0
51 bxeq lr
52
Henrik Smiding3ebd31c2010-11-05 15:09:37 +010053 pld [r1, #(CACHE_LINE_SIZE * 0)]
54 pld [r1, #(CACHE_LINE_SIZE * 1)]
55
56 /* make sure we have at least 8+4 bytes, this simplify things below
57 * and avoid some overhead for small blocks
58 */
59 cmp r2, #(8+4)
60 bmi 10f
61/*
62 * Neon optimization
63 * Comparing 32 bytes at a time
64 */
Kyle Repinski78da73a2015-04-30 02:13:56 -050065#if defined(__ARM_NEON__)
Henrik Smiding3ebd31c2010-11-05 15:09:37 +010066 subs r2, r2, #32
67 blo 3f
68
69 /* preload all the cache lines we need. */
70 pld [r0, #(CACHE_LINE_SIZE * 2)]
71 pld [r1, #(CACHE_LINE_SIZE * 2)]
72
731: /* The main loop compares 32 bytes at a time */
74 vld1.8 {d0 - d3}, [r0]!
75 pld [r0, #(CACHE_LINE_SIZE * 2)]
76 vld1.8 {d4 - d7}, [r1]!
77 pld [r1, #(CACHE_LINE_SIZE * 2)]
78
79 /* Start subtracting the values and merge results */
80 vsub.i8 q0, q2
81 vsub.i8 q1, q3
82 vorr q2, q0, q1
83 vorr d4, d5
84 vmov r3, ip, d4
85 /* Check if there are any differences among the 32 bytes */
86 orrs r3, ip
87 bne 2f
88 subs r2, r2, #32
89 bhs 1b
90 b 3f
912:
92 /* Check if the difference was in the first or last 16 bytes */
93 sub r0, #32
94 vorr d0, d1
95 sub r1, #32
96 vmov r3, ip, d0
97 orrs r3, ip
98 /* if the first 16 bytes are equal, we only have to rewind 16 bytes */
99 ittt eq
100 subeq r2, #16
101 addeq r0, #16
102 addeq r1, #16
103
1043: /* fix-up the remaining count */
105 add r2, r2, #32
106
107 cmp r2, #(8+4)
108 bmi 10f
109#endif
110
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800111 /* save registers */
112 stmfd sp!, {r4, lr}
Christopher Ferrised459702013-12-02 17:44:53 -0800113 .cfi_def_cfa_offset 8
114 .cfi_rel_offset r4, 0
115 .cfi_rel_offset lr, 4
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800116
117 /* since r0 hold the result, move the first source
118 * pointer somewhere else
119 */
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800120 mov r4, r0
Elliott Hughes67195002013-02-13 15:12:32 -0800121
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800122 /* align first pointer to word boundary
123 * offset = -src & 3
124 */
125 rsb r3, r4, #0
126 ands r3, r3, #3
127 beq 0f
128
129 /* align first pointer */
130 sub r2, r2, r3
1311: ldrb r0, [r4], #1
132 ldrb ip, [r1], #1
133 subs r0, r0, ip
134 bne 9f
135 subs r3, r3, #1
136 bne 1b
137
138
1390: /* here the first pointer is aligned, and we have at least 4 bytes
140 * to process.
141 */
142
143 /* see if the pointers are congruent */
144 eor r0, r4, r1
145 ands r0, r0, #3
146 bne 5f
147
148 /* congruent case, 32 bytes per iteration
149 * We need to make sure there are at least 32+4 bytes left
150 * because we effectively read ahead one word, and we could
151 * read past the buffer (and segfault) if we're not careful.
152 */
153
154 ldr ip, [r1]
155 subs r2, r2, #(32 + 4)
156 bmi 1f
Elliott Hughes67195002013-02-13 15:12:32 -0800157
Henrik Smiding3ebd31c2010-11-05 15:09:37 +01001580: pld [r4, #(CACHE_LINE_SIZE * 2)]
159 pld [r1, #(CACHE_LINE_SIZE * 2)]
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800160 ldr r0, [r4], #4
161 ldr lr, [r1, #4]!
162 eors r0, r0, ip
163 ldreq r0, [r4], #4
164 ldreq ip, [r1, #4]!
Chih-Hung Hsieh33f33512015-05-11 11:21:19 -0700165 eorseq r0, r0, lr
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800166 ldreq r0, [r4], #4
167 ldreq lr, [r1, #4]!
Chih-Hung Hsieh33f33512015-05-11 11:21:19 -0700168 eorseq r0, r0, ip
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800169 ldreq r0, [r4], #4
170 ldreq ip, [r1, #4]!
Chih-Hung Hsieh33f33512015-05-11 11:21:19 -0700171 eorseq r0, r0, lr
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800172 ldreq r0, [r4], #4
173 ldreq lr, [r1, #4]!
Chih-Hung Hsieh33f33512015-05-11 11:21:19 -0700174 eorseq r0, r0, ip
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800175 ldreq r0, [r4], #4
176 ldreq ip, [r1, #4]!
Chih-Hung Hsieh33f33512015-05-11 11:21:19 -0700177 eorseq r0, r0, lr
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800178 ldreq r0, [r4], #4
179 ldreq lr, [r1, #4]!
Chih-Hung Hsieh33f33512015-05-11 11:21:19 -0700180 eorseq r0, r0, ip
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800181 ldreq r0, [r4], #4
182 ldreq ip, [r1, #4]!
Chih-Hung Hsieh33f33512015-05-11 11:21:19 -0700183 eorseq r0, r0, lr
Elliott Hughes67195002013-02-13 15:12:32 -0800184 bne 2f
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800185 subs r2, r2, #32
186 bhs 0b
187
188 /* do we have at least 4 bytes left? */
1891: adds r2, r2, #(32 - 4 + 4)
190 bmi 4f
Elliott Hughes67195002013-02-13 15:12:32 -0800191
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800192 /* finish off 4 bytes at a time */
1933: ldr r0, [r4], #4
194 ldr ip, [r1], #4
195 eors r0, r0, ip
196 bne 2f
197 subs r2, r2, #4
198 bhs 3b
199
200 /* are we done? */
2014: adds r2, r2, #4
202 moveq r0, #0
203 beq 9f
204
205 /* finish off the remaining bytes */
206 b 8f
207
2082: /* the last 4 bytes are different, restart them */
209 sub r4, r4, #4
210 sub r1, r1, #4
211 mov r2, #4
212
213 /* process the last few bytes */
2148: ldrb r0, [r4], #1
215 ldrb ip, [r1], #1
216 // stall
217 subs r0, r0, ip
218 bne 9f
219 subs r2, r2, #1
220 bne 8b
221
2229: /* restore registers and return */
Christopher Ferrise1e434a2015-07-06 12:03:40 -0700223 ldmfd sp!, {r4, pc}
Henrik Smiding3ebd31c2010-11-05 15:09:37 +0100224
22510: /* process less than 12 bytes */
226 cmp r2, #0
227 moveq r0, #0
228 bxeq lr
229 mov r3, r0
23011:
231 ldrb r0, [r3], #1
232 ldrb ip, [r1], #1
233 subs r0, ip
234 bxne lr
235 subs r2, r2, #1
236 bne 11b
237 bx lr
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800238
The Android Open Source Project1dc9e472009-03-03 19:28:35 -08002395: /*************** non-congruent case ***************/
Elliott Hughes67195002013-02-13 15:12:32 -0800240 and r0, r1, #3
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800241 cmp r0, #2
242 bne 4f
243
244 /* here, offset is 2 (16-bits aligned, special cased) */
Elliott Hughes67195002013-02-13 15:12:32 -0800245
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800246 /* make sure we have at least 16 bytes to process */
247 subs r2, r2, #16
248 addmi r2, r2, #16
249 bmi 8b
250
251 /* align the unaligned pointer */
252 bic r1, r1, #3
253 ldr lr, [r1], #4
254
Henrik Smiding3ebd31c2010-11-05 15:09:37 +01002556: pld [r1, #(CACHE_LINE_SIZE * 2)]
256 pld [r4, #(CACHE_LINE_SIZE * 2)]
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800257 mov ip, lr, lsr #16
258 ldr lr, [r1], #4
259 ldr r0, [r4], #4
260 orr ip, ip, lr, lsl #16
261 eors r0, r0, ip
262 moveq ip, lr, lsr #16
263 ldreq lr, [r1], #4
264 ldreq r0, [r4], #4
265 orreq ip, ip, lr, lsl #16
Chih-Hung Hsieh33f33512015-05-11 11:21:19 -0700266 eorseq r0, r0, ip
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800267 moveq ip, lr, lsr #16
268 ldreq lr, [r1], #4
269 ldreq r0, [r4], #4
270 orreq ip, ip, lr, lsl #16
Chih-Hung Hsieh33f33512015-05-11 11:21:19 -0700271 eorseq r0, r0, ip
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800272 moveq ip, lr, lsr #16
273 ldreq lr, [r1], #4
274 ldreq r0, [r4], #4
275 orreq ip, ip, lr, lsl #16
Chih-Hung Hsieh33f33512015-05-11 11:21:19 -0700276 eorseq r0, r0, ip
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800277 bne 7f
278 subs r2, r2, #16
279 bhs 6b
280 sub r1, r1, #2
281 /* are we done? */
282 adds r2, r2, #16
283 moveq r0, #0
284 beq 9b
285 /* finish off the remaining bytes */
286 b 8b
287
2887: /* fix up the 2 pointers and fallthrough... */
289 sub r1, r1, #(4+2)
290 sub r4, r4, #4
291 mov r2, #4
292 b 8b
293
294
2954: /*************** offset is 1 or 3 (less optimized) ***************/
296
297 stmfd sp!, {r5, r6, r7}
298
299 // r5 = rhs
300 // r6 = lhs
301 // r7 = scratch
302
303 mov r5, r0, lsl #3 /* r5 = right shift */
304 rsb r6, r5, #32 /* r6 = left shift */
305
306 /* align the unaligned pointer */
307 bic r1, r1, #3
308 ldr r7, [r1], #4
309 sub r2, r2, #8
310
3116: mov ip, r7, lsr r5
312 ldr r7, [r1], #4
313 ldr r0, [r4], #4
314 orr ip, ip, r7, lsl r6
315 eors r0, r0, ip
316 moveq ip, r7, lsr r5
317 ldreq r7, [r1], #4
318 ldreq r0, [r4], #4
319 orreq ip, ip, r7, lsl r6
Chih-Hung Hsieh33f33512015-05-11 11:21:19 -0700320 eorseq r0, r0, ip
The Android Open Source Project1dc9e472009-03-03 19:28:35 -0800321 bne 7f
322 subs r2, r2, #8
323 bhs 6b
324
325 sub r1, r1, r6, lsr #3
326 ldmfd sp!, {r5, r6, r7}
327
328 /* are we done? */
329 adds r2, r2, #8
330 moveq r0, #0
331 beq 9b
332
333 /* finish off the remaining bytes */
334 b 8b
335
3367: /* fix up the 2 pointers and fallthrough... */
337 sub r1, r1, #4
338 sub r1, r1, r6, lsr #3
339 sub r4, r4, #4
340 mov r2, #4
341 ldmfd sp!, {r5, r6, r7}
342 b 8b
Elliott Hughes67195002013-02-13 15:12:32 -0800343END(memcmp)