blob: 4867c446c3208f35e683fcf0eebb9b350958ee2c [file] [log] [blame]
Prashant Patilfcb877a2017-03-16 18:07:00 +05301/*
2 * Copyright (c) 2017 Imagination Technologies.
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * * Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer
14 * in the documentation and/or other materials provided with
15 * the distribution.
16 * * Neither the name of Imagination Technologies nor the names of its
17 * contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#ifdef __ANDROID__
34# include <private/bionic_asm.h>
35#elif _LIBC
36# include <sysdep.h>
37# include <regdef.h>
38# include <sys/asm.h>
39#elif _COMPILING_NEWLIB
40# include "machine/asm.h"
41# include "machine/regdef.h"
42#else
43# include <regdef.h>
44# include <sys/asm.h>
45#endif
46
47#if __mips64
48# define NSIZE 8
49# define LW ld
50# define LWR ldr
51# define LWL ldl
52# define EXT dext
53# define SRL dsrl
54# define SUBU dsubu
55#else
56# define NSIZE 4
57# define LW lw
58# define LWR lwr
59# define LWL lwl
60# define EXT ext
61# define SRL srl
62# define SUBU subu
63#endif
64
65/* Technically strcmp should not read past the end of the strings being
66 compared. We will read a full word that may contain excess bits beyond
67 the NULL string terminator but unless ENABLE_READAHEAD is set, we will not
68 read the next word after the end of string. Setting ENABLE_READAHEAD will
69 improve performance but is technically illegal based on the definition of
70 strcmp. */
71#ifdef ENABLE_READAHEAD
72# define DELAY_READ
73#else
74# define DELAY_READ nop
75#endif
76
77/* Testing on a little endian machine showed using CLZ was a
78 performance loss, so we are not turning it on by default. */
79#if defined(ENABLE_CLZ) && (__mips_isa_rev > 1) && (!__mips64)
80# define USE_CLZ
81#endif
82
83/* Some asm.h files do not have the L macro definition. */
84#ifndef L
85# if _MIPS_SIM == _ABIO32
86# define L(label) $L ## label
87# else
88# define L(label) .L ## label
89# endif
90#endif
91
92/* Some asm.h files do not have the PTR_ADDIU macro definition. */
93#ifndef PTR_ADDIU
94# if _MIPS_SIM == _ABIO32
95# define PTR_ADDIU addiu
96# else
97# define PTR_ADDIU daddiu
98# endif
99#endif
100
101/* It might seem better to do the 'beq' instruction between the two 'lbu'
102 instructions so that the nop is not needed but testing showed that this
103 code is actually faster (based on glibc strcmp test). */
104#define BYTECMP01(OFFSET) \
105 lbu v0, OFFSET(a0); \
106 lbu v1, OFFSET(a1); \
107 beq v0, zero, L(bexit01); \
108 nop; \
109 bne v0, v1, L(bexit01)
110
111#define BYTECMP89(OFFSET) \
112 lbu t8, OFFSET(a0); \
113 lbu t9, OFFSET(a1); \
114 beq t8, zero, L(bexit89); \
115 nop; \
116 bne t8, t9, L(bexit89)
117
118/* Allow the routine to be named something else if desired. */
119#ifndef STRNCMP_NAME
120# define STRNCMP_NAME strncmp
121#endif
122
123#ifdef __ANDROID__
124LEAF(STRNCMP_NAME, 0)
125#else
126LEAF(STRNCMP_NAME)
127#endif
128 .set nomips16
129 .set noreorder
130
131 srl t0, a2, (2 + NSIZE / 4)
132 beqz t0, L(byteloop) #process by bytes if less than (2 * NSIZE)
133 andi t1, a1, (NSIZE - 1)
134 beqz t1, L(exitalign)
135 or t0, zero, NSIZE
136 SUBU t1, t0, t1 #process (NSIZE - 1) bytes at max
137 SUBU a2, a2, t1 #dec count by t1
138
139L(alignloop): #do by bytes until a1 aligned
140 BYTECMP01(0)
141 SUBU t1, t1, 0x1
142 PTR_ADDIU a0, a0, 0x1
143 bne t1, zero, L(alignloop)
144 PTR_ADDIU a1, a1, 0x1
145
146L(exitalign):
147
148/* string a1 is NSIZE byte aligned at this point. */
149#ifndef __mips1
150 lui t8, 0x0101
151 ori t8, 0x0101
152 lui t9, 0x7f7f
153 ori t9, 0x7f7f
154#if __mips64
155 dsll t0, t8, 32
156 or t8, t0
157 dsll t1, t9, 32
158 or t9, t1
159#endif
160#endif
161
162/* hardware or software alignment not supported for mips1
163 rev6 archs have h/w unaligned support
164 remainings archs need to implemented with unaligned instructions */
165
166#if __mips1
167 andi t0, a0, (NSIZE - 1)
168 bne t0, zero, L(byteloop)
169#elif __mips_isa_rev < 6
170 andi t0, a0, (NSIZE - 1)
171 bne t0, zero, L(uwordloop)
172#endif
173
174#define STRCMPW(OFFSET) \
175 LW v0, (OFFSET)(a0); \
176 LW v1, (OFFSET)(a1); \
177 SUBU t0, v0, t8; \
178 bne v0, v1, L(worddiff); \
179 nor t1, v0, t9; \
180 and t0, t0, t1; \
181 bne t0, zero, L(returnzero);\
182
183L(wordloop):
184 SUBU t1, a2, (8 * NSIZE)
185 bltz t1, L(onewords)
186 STRCMPW(0 * NSIZE)
187 DELAY_READ
188 STRCMPW(1 * NSIZE)
189 DELAY_READ
190 STRCMPW(2 * NSIZE)
191 DELAY_READ
192 STRCMPW(3 * NSIZE)
193 DELAY_READ
194 STRCMPW(4 * NSIZE)
195 DELAY_READ
196 STRCMPW(5 * NSIZE)
197 DELAY_READ
198 STRCMPW(6 * NSIZE)
199 DELAY_READ
200 STRCMPW(7 * NSIZE)
201 SUBU a2, a2, (8 * NSIZE)
202 PTR_ADDIU a0, a0, (8 * NSIZE)
203 b L(wordloop)
204 PTR_ADDIU a1, a1, (8 * NSIZE)
205
206L(onewords):
207 SUBU t1, a2, NSIZE
208 bltz t1, L(byteloop)
209 STRCMPW(0)
210 SUBU a2, a2, NSIZE
211 PTR_ADDIU a0, a0, NSIZE
212 b L(onewords)
213 PTR_ADDIU a1, a1, NSIZE
214
215#if __mips_isa_rev < 6 && !__mips1
216#define USTRCMPW(OFFSET) \
217 LWR v0, (OFFSET)(a0); \
218 LWL v0, (OFFSET + NSIZE - 1)(a0); \
219 LW v1, (OFFSET)(a1); \
220 SUBU t0, v0, t8; \
221 bne v0, v1, L(worddiff); \
222 nor t1, v0, t9; \
223 and t0, t0, t1; \
224 bne t0, zero, L(returnzero);\
225
226L(uwordloop):
227 SUBU t1, a2, (8 * NSIZE)
228 bltz t1, L(uonewords)
229 USTRCMPW(0 * NSIZE)
230 DELAY_READ
231 USTRCMPW(1 * NSIZE)
232 DELAY_READ
233 USTRCMPW(2 * NSIZE)
234 DELAY_READ
235 USTRCMPW(3 * NSIZE)
236 DELAY_READ
237 USTRCMPW(4 * NSIZE)
238 DELAY_READ
239 USTRCMPW(5 * NSIZE)
240 DELAY_READ
241 USTRCMPW(6 * NSIZE)
242 DELAY_READ
243 USTRCMPW(7 * NSIZE)
244 SUBU a2, a2, (8 * NSIZE)
245 PTR_ADDIU a0, a0, (8 * NSIZE)
246 b L(uwordloop)
247 PTR_ADDIU a1, a1, (8 * NSIZE)
248
249L(uonewords):
250 SUBU t1, a2, NSIZE
251 bltz t1, L(byteloop)
252 USTRCMPW(0)
253 SUBU a2, a2, NSIZE
254 PTR_ADDIU a0, a0, NSIZE
255 b L(uonewords)
256 PTR_ADDIU a1, a1, NSIZE
257
258#endif
259
260L(returnzero):
261 j ra
262 move v0, zero
263
264#if __mips_isa_rev > 1
265#define EXT_COMPARE01(POS) \
266 EXT t0, v0, POS, 8; \
267 beq t0, zero, L(wexit01); \
268 EXT t1, v1, POS, 8; \
269 bne t0, t1, L(wexit01)
270#define EXT_COMPARE89(POS) \
271 EXT t8, v0, POS, 8; \
272 beq t8, zero, L(wexit89); \
273 EXT t9, v1, POS, 8; \
274 bne t8, t9, L(wexit89)
275#else
276#define EXT_COMPARE01(POS) \
277 SRL t0, v0, POS; \
278 SRL t1, v1, POS; \
279 andi t0, t0, 0xff; \
280 beq t0, zero, L(wexit01); \
281 andi t1, t1, 0xff; \
282 bne t0, t1, L(wexit01)
283#define EXT_COMPARE89(POS) \
284 SRL t8, v0, POS; \
285 SRL t9, v1, POS; \
286 andi t8, t8, 0xff; \
287 beq t8, zero, L(wexit89); \
288 andi t9, t9, 0xff; \
289 bne t8, t9, L(wexit89)
290#endif
291
292L(worddiff):
293#ifdef USE_CLZ
294 SUBU t0, v0, t8
295 nor t1, v0, t9
296 and t1, t0, t1
297 xor t0, v0, v1
298 or t0, t0, t1
299# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
300 wsbh t0, t0
301 rotr t0, t0, 16
302# endif
303 clz t1, t0
304 and t1, 0xf8
305# if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
306 neg t1
307 addu t1, 24
308# endif
309 rotrv v0, v0, t1
310 rotrv v1, v1, t1
311 and v0, v0, 0xff
312 and v1, v1, 0xff
313 j ra
314 SUBU v0, v0, v1
315#else /* USE_CLZ */
316# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
317 andi t0, v0, 0xff
318 beq t0, zero, L(wexit01)
319 andi t1, v1, 0xff
320 bne t0, t1, L(wexit01)
321 EXT_COMPARE89(8)
322 EXT_COMPARE01(16)
323#ifndef __mips64
324 SRL t8, v0, 24
325 SRL t9, v1, 24
326#else
327 EXT_COMPARE89(24)
328 EXT_COMPARE01(32)
329 EXT_COMPARE89(40)
330 EXT_COMPARE01(48)
331 SRL t8, v0, 56
332 SRL t9, v1, 56
333#endif
334
335# else /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
336#ifdef __mips64
337 SRL t0, v0, 56
338 beq t0, zero, L(wexit01)
339 SRL t1, v1, 56
340 bne t0, t1, L(wexit01)
341 EXT_COMPARE89(48)
342 EXT_COMPARE01(40)
343 EXT_COMPARE89(32)
344 EXT_COMPARE01(24)
345#else
346 SRL t0, v0, 24
347 beq t0, zero, L(wexit01)
348 SRL t1, v1, 24
349 bne t0, t1, L(wexit01)
350#endif
351 EXT_COMPARE89(16)
352 EXT_COMPARE01(8)
353
354 andi t8, v0, 0xff
355 andi t9, v1, 0xff
356# endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
357
358L(wexit89):
359 j ra
360 SUBU v0, t8, t9
361L(wexit01):
362 j ra
363 SUBU v0, t0, t1
364#endif /* USE_CLZ */
365
366L(byteloop):
367 beq a2, zero, L(returnzero)
368 SUBU a2, a2, 1
369 BYTECMP01(0)
370 nop
371 beq a2, zero, L(returnzero)
372 SUBU a2, a2, 1
373 BYTECMP89(1)
374 nop
375 beq a2, zero, L(returnzero)
376 SUBU a2, a2, 1
377 BYTECMP01(2)
378 nop
379 beq a2, zero, L(returnzero)
380 SUBU a2, a2, 1
381 BYTECMP89(3)
382 PTR_ADDIU a0, a0, 4
383 b L(byteloop)
384 PTR_ADDIU a1, a1, 4
385
386L(bexit01):
387 j ra
388 SUBU v0, v0, v1
389L(bexit89):
390 j ra
391 SUBU v0, t8, t9
392
393 .set at
394 .set reorder
395
396END(STRNCMP_NAME)
397#ifndef __ANDROID__
398# ifdef _LIBC
399libc_hidden_builtin_def (STRNCMP_NAME)
400# endif
401#endif