blob: ee253b938fefacd3e5c34788f0fd90f0c1abcdd0 [file] [log] [blame]
Bruce Beare8ff1a272010-03-04 11:03:37 -08001/*
2Copyright (c) 2010, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8 * Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10
11 * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14
15 * Neither the name of Intel Corporation nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31#ifndef L
32# define L(label) .L##label
33#endif
34
35#ifndef cfi_startproc
36# define cfi_startproc .cfi_startproc
37#endif
38
39#ifndef cfi_endproc
40# define cfi_endproc .cfi_endproc
41#endif
42
43#ifndef cfi_rel_offset
44# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
45#endif
46
47#ifndef cfi_restore
Bruce Beare124a5422010-10-11 12:24:41 -070048# define cfi_restore(reg) .cfi_restore reg
Bruce Beare8ff1a272010-03-04 11:03:37 -080049#endif
50
51#ifndef cfi_adjust_cfa_offset
52# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
53#endif
54
Bruce Beare124a5422010-10-11 12:24:41 -070055#ifndef cfi_remember_state
56# define cfi_remember_state .cfi_remember_state
57#endif
58
59#ifndef cfi_restore_state
60# define cfi_restore_state .cfi_restore_state
61#endif
62
Bruce Beare8ff1a272010-03-04 11:03:37 -080063#ifndef ENTRY
64# define ENTRY(name) \
65 .type name, @function; \
66 .globl name; \
67 .p2align 4; \
68name: \
69 cfi_startproc
70#endif
71
72#ifndef END
73# define END(name) \
74 cfi_endproc; \
75 .size name, .-name
76#endif
77
78#define CFI_PUSH(REG) \
79 cfi_adjust_cfa_offset (4); \
80 cfi_rel_offset (REG, 0)
81
82#define CFI_POP(REG) \
83 cfi_adjust_cfa_offset (-4); \
84 cfi_restore (REG)
85
86#define PUSH(REG) pushl REG; CFI_PUSH (REG)
87#define POP(REG) popl REG; CFI_POP (REG)
88
89#ifndef USE_AS_STRNCMP
90# define STR1 4
91# define STR2 STR1+4
92# define RETURN ret
93
94# define UPDATE_STRNCMP_COUNTER
95#else
96# define STR1 8
97# define STR2 STR1+4
98# define CNT STR2+4
99# define RETURN POP (%ebp); ret; CFI_PUSH (%ebp)
100
101# define UPDATE_STRNCMP_COUNTER \
102 /* calculate left number to compare */ \
103 mov $16, %esi; \
104 sub %ecx, %esi; \
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400105 cmpl %esi, %ebp; \
Bruce Beare8ff1a272010-03-04 11:03:37 -0800106 jbe L(more8byteseq); \
107 sub %esi, %ebp
108#endif
109
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400110#ifndef STRCMP
George Burgess IV3451c752024-09-11 20:33:55 -0600111# define STRCMP strcmp
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400112#endif
113
Bruce Beare8ff1a272010-03-04 11:03:37 -0800114 .section .text.ssse3,"ax",@progbits
Liubov Dmitrieva0a490662012-01-17 12:55:46 +0400115ENTRY (STRCMP)
Bruce Beare8ff1a272010-03-04 11:03:37 -0800116#ifdef USE_AS_STRNCMP
117 PUSH (%ebp)
Christopher Ferrisbff9cca2016-02-17 16:34:02 -0800118 cfi_remember_state
Bruce Beare8ff1a272010-03-04 11:03:37 -0800119#endif
120 movl STR1(%esp), %edx
121 movl STR2(%esp), %eax
122#ifdef USE_AS_STRNCMP
123 movl CNT(%esp), %ebp
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400124 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800125 jb L(less16bytes_sncmp)
126 jmp L(more16bytes)
127#endif
128
129 movzbl (%eax), %ecx
130 cmpb %cl, (%edx)
131 jne L(neq)
132 cmpl $0, %ecx
133 je L(eq)
134
135 movzbl 1(%eax), %ecx
136 cmpb %cl, 1(%edx)
137 jne L(neq)
138 cmpl $0, %ecx
139 je L(eq)
140
141 movzbl 2(%eax), %ecx
142 cmpb %cl, 2(%edx)
143 jne L(neq)
144 cmpl $0, %ecx
145 je L(eq)
146
147 movzbl 3(%eax), %ecx
148 cmpb %cl, 3(%edx)
149 jne L(neq)
150 cmpl $0, %ecx
151 je L(eq)
152
153 movzbl 4(%eax), %ecx
154 cmpb %cl, 4(%edx)
155 jne L(neq)
156 cmpl $0, %ecx
157 je L(eq)
158
159 movzbl 5(%eax), %ecx
160 cmpb %cl, 5(%edx)
161 jne L(neq)
162 cmpl $0, %ecx
163 je L(eq)
164
165 movzbl 6(%eax), %ecx
166 cmpb %cl, 6(%edx)
167 jne L(neq)
168 cmpl $0, %ecx
169 je L(eq)
170
171 movzbl 7(%eax), %ecx
172 cmpb %cl, 7(%edx)
173 jne L(neq)
174 cmpl $0, %ecx
175 je L(eq)
176
177 add $8, %edx
178 add $8, %eax
179#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400180 cmpl $8, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800181 lea -8(%ebp), %ebp
182 je L(eq)
183L(more16bytes):
184#endif
185 movl %edx, %ecx
186 and $0xfff, %ecx
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400187 cmpl $0xff0, %ecx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800188 ja L(crosspage)
189 mov %eax, %ecx
190 and $0xfff, %ecx
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400191 cmpl $0xff0, %ecx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800192 ja L(crosspage)
193 pxor %xmm0, %xmm0
194 movlpd (%eax), %xmm1
195 movlpd (%edx), %xmm2
196 movhpd 8(%eax), %xmm1
197 movhpd 8(%edx), %xmm2
198 pcmpeqb %xmm1, %xmm0
199 pcmpeqb %xmm2, %xmm1
200 psubb %xmm0, %xmm1
201 pmovmskb %xmm1, %ecx
202 sub $0xffff, %ecx
203 jnz L(less16bytes)
204#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400205 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800206 lea -16(%ebp), %ebp
207 jbe L(eq)
208#endif
209 add $16, %eax
210 add $16, %edx
211
212L(crosspage):
213
214 PUSH (%ebx)
215 PUSH (%edi)
216 PUSH (%esi)
217
218 movl %edx, %edi
219 movl %eax, %ecx
220 and $0xf, %ecx
221 and $0xf, %edi
222 xor %ecx, %eax
223 xor %edi, %edx
224 xor %ebx, %ebx
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400225 cmpl %edi, %ecx
Bruce Beare8ff1a272010-03-04 11:03:37 -0800226 je L(ashr_0)
227 ja L(bigger)
228 or $0x20, %ebx
229 xchg %edx, %eax
230 xchg %ecx, %edi
231L(bigger):
232 lea 15(%edi), %edi
233 sub %ecx, %edi
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400234 cmpl $8, %edi
Bruce Beare8ff1a272010-03-04 11:03:37 -0800235 jle L(ashr_less_8)
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400236 cmpl $14, %edi
Bruce Beare8ff1a272010-03-04 11:03:37 -0800237 je L(ashr_15)
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400238 cmpl $13, %edi
Bruce Beare8ff1a272010-03-04 11:03:37 -0800239 je L(ashr_14)
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400240 cmpl $12, %edi
Bruce Beare8ff1a272010-03-04 11:03:37 -0800241 je L(ashr_13)
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400242 cmpl $11, %edi
Bruce Beare8ff1a272010-03-04 11:03:37 -0800243 je L(ashr_12)
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400244 cmpl $10, %edi
Bruce Beare8ff1a272010-03-04 11:03:37 -0800245 je L(ashr_11)
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400246 cmpl $9, %edi
Bruce Beare8ff1a272010-03-04 11:03:37 -0800247 je L(ashr_10)
248L(ashr_less_8):
249 je L(ashr_9)
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400250 cmpl $7, %edi
Bruce Beare8ff1a272010-03-04 11:03:37 -0800251 je L(ashr_8)
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400252 cmpl $6, %edi
Bruce Beare8ff1a272010-03-04 11:03:37 -0800253 je L(ashr_7)
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400254 cmpl $5, %edi
Bruce Beare8ff1a272010-03-04 11:03:37 -0800255 je L(ashr_6)
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400256 cmpl $4, %edi
Bruce Beare8ff1a272010-03-04 11:03:37 -0800257 je L(ashr_5)
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400258 cmpl $3, %edi
Bruce Beare8ff1a272010-03-04 11:03:37 -0800259 je L(ashr_4)
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400260 cmpl $2, %edi
Bruce Beare8ff1a272010-03-04 11:03:37 -0800261 je L(ashr_3)
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400262 cmpl $1, %edi
Bruce Beare8ff1a272010-03-04 11:03:37 -0800263 je L(ashr_2)
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400264 cmpl $0, %edi
Bruce Beare8ff1a272010-03-04 11:03:37 -0800265 je L(ashr_1)
266
267/*
268 * The following cases will be handled by ashr_0
269 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
270 * n(0~15) n(0~15) 15(15+ n-n) ashr_0
271 */
272 .p2align 4
273L(ashr_0):
274 mov $0xffff, %esi
275 movdqa (%eax), %xmm1
276 pxor %xmm0, %xmm0
277 pcmpeqb %xmm1, %xmm0
278 pcmpeqb (%edx), %xmm1
279 psubb %xmm0, %xmm1
280 pmovmskb %xmm1, %edi
281 shr %cl, %esi
282 shr %cl, %edi
283 sub %edi, %esi
284 mov %ecx, %edi
285 jne L(less32bytes)
286 UPDATE_STRNCMP_COUNTER
287 mov $0x10, %ebx
288 mov $0x10, %ecx
289 pxor %xmm0, %xmm0
290 .p2align 4
291L(loop_ashr_0):
292 movdqa (%eax, %ecx), %xmm1
293 movdqa (%edx, %ecx), %xmm2
294
295 pcmpeqb %xmm1, %xmm0
296 pcmpeqb %xmm2, %xmm1
297 psubb %xmm0, %xmm1
298 pmovmskb %xmm1, %esi
299 sub $0xffff, %esi
300 jnz L(exit)
301#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400302 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800303 lea -16(%ebp), %ebp
304 jbe L(more8byteseq)
305#endif
306 add $16, %ecx
307 jmp L(loop_ashr_0)
308
309/*
310 * The following cases will be handled by ashr_1
311 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
312 * n(15) n -15 0(15 +(n-15) - n) ashr_1
313 */
314 .p2align 4
315L(ashr_1):
316 mov $0xffff, %esi
317 pxor %xmm0, %xmm0
318 movdqa (%edx), %xmm2
319 movdqa (%eax), %xmm1
320 pcmpeqb %xmm1, %xmm0
321 pslldq $15, %xmm2
322 pcmpeqb %xmm1, %xmm2
323 psubb %xmm0, %xmm2
324 pmovmskb %xmm2, %edi
325 shr %cl, %esi
326 shr %cl, %edi
327 sub %edi, %esi
328 lea -15(%ecx), %edi
329 jnz L(less32bytes)
330
331 UPDATE_STRNCMP_COUNTER
332
333 movdqa (%edx), %xmm3
334 pxor %xmm0, %xmm0
335 mov $16, %ecx
336 or $1, %ebx
337 lea 1(%edx), %edi
338 and $0xfff, %edi
339 sub $0x1000, %edi
340
341 .p2align 4
342L(loop_ashr_1):
343 add $16, %edi
344 jg L(nibble_ashr_1)
345
346L(gobble_ashr_1):
347 movdqa (%eax, %ecx), %xmm1
348 movdqa (%edx, %ecx), %xmm2
349 movdqa %xmm2, %xmm4
350
351 palignr $1, %xmm3, %xmm2
352
353 pcmpeqb %xmm1, %xmm0
354 pcmpeqb %xmm2, %xmm1
355 psubb %xmm0, %xmm1
356 pmovmskb %xmm1, %esi
357 sub $0xffff, %esi
358 jnz L(exit)
359#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400360 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800361 lea -16(%ebp), %ebp
362 jbe L(more8byteseq)
363#endif
364
365 add $16, %ecx
366 movdqa %xmm4, %xmm3
367
368 add $16, %edi
369 jg L(nibble_ashr_1)
370
371 movdqa (%eax, %ecx), %xmm1
372 movdqa (%edx, %ecx), %xmm2
373 movdqa %xmm2, %xmm4
374
375 palignr $1, %xmm3, %xmm2
376
377 pcmpeqb %xmm1, %xmm0
378 pcmpeqb %xmm2, %xmm1
379 psubb %xmm0, %xmm1
380 pmovmskb %xmm1, %esi
381 sub $0xffff, %esi
382 jnz L(exit)
383
384#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400385 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800386 lea -16(%ebp), %ebp
387 jbe L(more8byteseq)
388#endif
389 add $16, %ecx
390 movdqa %xmm4, %xmm3
391 jmp L(loop_ashr_1)
392
393 .p2align 4
394L(nibble_ashr_1):
395 pcmpeqb %xmm3, %xmm0
396 pmovmskb %xmm0, %esi
397 test $0xfffe, %esi
398 jnz L(ashr_1_exittail)
399
400#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400401 cmpl $15, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800402 jbe L(ashr_1_exittail)
403#endif
404 pxor %xmm0, %xmm0
405 sub $0x1000, %edi
406 jmp L(gobble_ashr_1)
407
408 .p2align 4
409L(ashr_1_exittail):
410 movdqa (%eax, %ecx), %xmm1
411 psrldq $1, %xmm0
412 psrldq $1, %xmm3
413 jmp L(aftertail)
414
415/*
416 * The following cases will be handled by ashr_2
417 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
418 * n(14~15) n -14 1(15 +(n-14) - n) ashr_2
419 */
420 .p2align 4
421L(ashr_2):
422 mov $0xffff, %esi
423 pxor %xmm0, %xmm0
424 movdqa (%edx), %xmm2
425 movdqa (%eax), %xmm1
426 pcmpeqb %xmm1, %xmm0
427 pslldq $14, %xmm2
428 pcmpeqb %xmm1, %xmm2
429 psubb %xmm0, %xmm2
430 pmovmskb %xmm2, %edi
431 shr %cl, %esi
432 shr %cl, %edi
433 sub %edi, %esi
434 lea -14(%ecx), %edi
435 jnz L(less32bytes)
436
437 UPDATE_STRNCMP_COUNTER
438
439 movdqa (%edx), %xmm3
440 pxor %xmm0, %xmm0
441 mov $16, %ecx
442 or $2, %ebx
443 lea 2(%edx), %edi
444 and $0xfff, %edi
445 sub $0x1000, %edi
446
447 .p2align 4
448L(loop_ashr_2):
449 add $16, %edi
450 jg L(nibble_ashr_2)
451
452L(gobble_ashr_2):
453 movdqa (%eax, %ecx), %xmm1
454 movdqa (%edx, %ecx), %xmm2
455 movdqa %xmm2, %xmm4
456
457 palignr $2, %xmm3, %xmm2
458
459 pcmpeqb %xmm1, %xmm0
460 pcmpeqb %xmm2, %xmm1
461 psubb %xmm0, %xmm1
462 pmovmskb %xmm1, %esi
463 sub $0xffff, %esi
464 jnz L(exit)
465
466#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400467 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800468 lea -16(%ebp), %ebp
469 jbe L(more8byteseq)
470#endif
471 add $16, %ecx
472 movdqa %xmm4, %xmm3
473
474 add $16, %edi
475 jg L(nibble_ashr_2)
476
477 movdqa (%eax, %ecx), %xmm1
478 movdqa (%edx, %ecx), %xmm2
479 movdqa %xmm2, %xmm4
480
481 palignr $2, %xmm3, %xmm2
482
483 pcmpeqb %xmm1, %xmm0
484 pcmpeqb %xmm2, %xmm1
485 psubb %xmm0, %xmm1
486 pmovmskb %xmm1, %esi
487 sub $0xffff, %esi
488 jnz L(exit)
489
490#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400491 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800492 lea -16(%ebp), %ebp
493 jbe L(more8byteseq)
494#endif
495 add $16, %ecx
496 movdqa %xmm4, %xmm3
497 jmp L(loop_ashr_2)
498
499 .p2align 4
500L(nibble_ashr_2):
501 pcmpeqb %xmm3, %xmm0
502 pmovmskb %xmm0, %esi
503 test $0xfffc, %esi
504 jnz L(ashr_2_exittail)
505
506#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400507 cmpl $14, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800508 jbe L(ashr_2_exittail)
509#endif
510
511 pxor %xmm0, %xmm0
512 sub $0x1000, %edi
513 jmp L(gobble_ashr_2)
514
515 .p2align 4
516L(ashr_2_exittail):
517 movdqa (%eax, %ecx), %xmm1
518 psrldq $2, %xmm0
519 psrldq $2, %xmm3
520 jmp L(aftertail)
521
522/*
523 * The following cases will be handled by ashr_3
524 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
525 * n(13~15) n -13 2(15 +(n-13) - n) ashr_3
526 */
527 .p2align 4
528L(ashr_3):
529 mov $0xffff, %esi
530 pxor %xmm0, %xmm0
531 movdqa (%edx), %xmm2
532 movdqa (%eax), %xmm1
533 pcmpeqb %xmm1, %xmm0
534 pslldq $13, %xmm2
535 pcmpeqb %xmm1, %xmm2
536 psubb %xmm0, %xmm2
537 pmovmskb %xmm2, %edi
538 shr %cl, %esi
539 shr %cl, %edi
540 sub %edi, %esi
541 lea -13(%ecx), %edi
542 jnz L(less32bytes)
543
544 UPDATE_STRNCMP_COUNTER
545
546 movdqa (%edx), %xmm3
547 pxor %xmm0, %xmm0
548 mov $16, %ecx
549 or $3, %ebx
550 lea 3(%edx), %edi
551 and $0xfff, %edi
552 sub $0x1000, %edi
553
554 .p2align 4
555L(loop_ashr_3):
556 add $16, %edi
557 jg L(nibble_ashr_3)
558
559L(gobble_ashr_3):
560 movdqa (%eax, %ecx), %xmm1
561 movdqa (%edx, %ecx), %xmm2
562 movdqa %xmm2, %xmm4
563
564 palignr $3, %xmm3, %xmm2
565
566 pcmpeqb %xmm1, %xmm0
567 pcmpeqb %xmm2, %xmm1
568 psubb %xmm0, %xmm1
569 pmovmskb %xmm1, %esi
570 sub $0xffff, %esi
571 jnz L(exit)
572
573#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400574 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800575 lea -16(%ebp), %ebp
576 jbe L(more8byteseq)
577#endif
578 add $16, %ecx
579 movdqa %xmm4, %xmm3
580
581 add $16, %edi
582 jg L(nibble_ashr_3)
583
584 movdqa (%eax, %ecx), %xmm1
585 movdqa (%edx, %ecx), %xmm2
586 movdqa %xmm2, %xmm4
587
588 palignr $3, %xmm3, %xmm2
589
590 pcmpeqb %xmm1, %xmm0
591 pcmpeqb %xmm2, %xmm1
592 psubb %xmm0, %xmm1
593 pmovmskb %xmm1, %esi
594 sub $0xffff, %esi
595 jnz L(exit)
596
597#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400598 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800599 lea -16(%ebp), %ebp
600 jbe L(more8byteseq)
601#endif
602 add $16, %ecx
603 movdqa %xmm4, %xmm3
604 jmp L(loop_ashr_3)
605
606 .p2align 4
607L(nibble_ashr_3):
608 pcmpeqb %xmm3, %xmm0
609 pmovmskb %xmm0, %esi
610 test $0xfff8, %esi
611 jnz L(ashr_3_exittail)
612
613#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400614 cmpl $13, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800615 jbe L(ashr_3_exittail)
616#endif
617 pxor %xmm0, %xmm0
618 sub $0x1000, %edi
619 jmp L(gobble_ashr_3)
620
621 .p2align 4
622L(ashr_3_exittail):
623 movdqa (%eax, %ecx), %xmm1
624 psrldq $3, %xmm0
625 psrldq $3, %xmm3
626 jmp L(aftertail)
627
628/*
629 * The following cases will be handled by ashr_4
630 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
631 * n(12~15) n -12 3(15 +(n-12) - n) ashr_4
632 */
633 .p2align 4
634L(ashr_4):
635 mov $0xffff, %esi
636 pxor %xmm0, %xmm0
637 movdqa (%edx), %xmm2
638 movdqa (%eax), %xmm1
639 pcmpeqb %xmm1, %xmm0
640 pslldq $12, %xmm2
641 pcmpeqb %xmm1, %xmm2
642 psubb %xmm0, %xmm2
643 pmovmskb %xmm2, %edi
644 shr %cl, %esi
645 shr %cl, %edi
646 sub %edi, %esi
647 lea -12(%ecx), %edi
648 jnz L(less32bytes)
649
650 UPDATE_STRNCMP_COUNTER
651
652 movdqa (%edx), %xmm3
653 pxor %xmm0, %xmm0
654 mov $16, %ecx
655 or $4, %ebx
656 lea 4(%edx), %edi
657 and $0xfff, %edi
658 sub $0x1000, %edi
659
660 .p2align 4
661L(loop_ashr_4):
662 add $16, %edi
663 jg L(nibble_ashr_4)
664
665L(gobble_ashr_4):
666 movdqa (%eax, %ecx), %xmm1
667 movdqa (%edx, %ecx), %xmm2
668 movdqa %xmm2, %xmm4
669
670 palignr $4, %xmm3, %xmm2
671
672 pcmpeqb %xmm1, %xmm0
673 pcmpeqb %xmm2, %xmm1
674 psubb %xmm0, %xmm1
675 pmovmskb %xmm1, %esi
676 sub $0xffff, %esi
677 jnz L(exit)
678
679#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400680 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800681 lea -16(%ebp), %ebp
682 jbe L(more8byteseq)
683#endif
684
685 add $16, %ecx
686 movdqa %xmm4, %xmm3
687
688 add $16, %edi
689 jg L(nibble_ashr_4)
690
691 movdqa (%eax, %ecx), %xmm1
692 movdqa (%edx, %ecx), %xmm2
693 movdqa %xmm2, %xmm4
694
695 palignr $4, %xmm3, %xmm2
696
697 pcmpeqb %xmm1, %xmm0
698 pcmpeqb %xmm2, %xmm1
699 psubb %xmm0, %xmm1
700 pmovmskb %xmm1, %esi
701 sub $0xffff, %esi
702 jnz L(exit)
703
704#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400705 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800706 lea -16(%ebp), %ebp
707 jbe L(more8byteseq)
708#endif
709
710 add $16, %ecx
711 movdqa %xmm4, %xmm3
712 jmp L(loop_ashr_4)
713
714 .p2align 4
715L(nibble_ashr_4):
716 pcmpeqb %xmm3, %xmm0
717 pmovmskb %xmm0, %esi
718 test $0xfff0, %esi
719 jnz L(ashr_4_exittail)
720
721#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400722 cmpl $12, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800723 jbe L(ashr_4_exittail)
724#endif
725
726 pxor %xmm0, %xmm0
727 sub $0x1000, %edi
728 jmp L(gobble_ashr_4)
729
730 .p2align 4
731L(ashr_4_exittail):
732 movdqa (%eax, %ecx), %xmm1
733 psrldq $4, %xmm0
734 psrldq $4, %xmm3
735 jmp L(aftertail)
736
737/*
738 * The following cases will be handled by ashr_5
739 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
740 * n(11~15) n -11 4(15 +(n-11) - n) ashr_5
741 */
742 .p2align 4
743L(ashr_5):
744 mov $0xffff, %esi
745 pxor %xmm0, %xmm0
746 movdqa (%edx), %xmm2
747 movdqa (%eax), %xmm1
748 pcmpeqb %xmm1, %xmm0
749 pslldq $11, %xmm2
750 pcmpeqb %xmm1, %xmm2
751 psubb %xmm0, %xmm2
752 pmovmskb %xmm2, %edi
753 shr %cl, %esi
754 shr %cl, %edi
755 sub %edi, %esi
756 lea -11(%ecx), %edi
757 jnz L(less32bytes)
758
759 UPDATE_STRNCMP_COUNTER
760
761 movdqa (%edx), %xmm3
762 pxor %xmm0, %xmm0
763 mov $16, %ecx
764 or $5, %ebx
765 lea 5(%edx), %edi
766 and $0xfff, %edi
767 sub $0x1000, %edi
768
769 .p2align 4
770L(loop_ashr_5):
771 add $16, %edi
772 jg L(nibble_ashr_5)
773
774L(gobble_ashr_5):
775 movdqa (%eax, %ecx), %xmm1
776 movdqa (%edx, %ecx), %xmm2
777 movdqa %xmm2, %xmm4
778
779 palignr $5, %xmm3, %xmm2
780
781 pcmpeqb %xmm1, %xmm0
782 pcmpeqb %xmm2, %xmm1
783 psubb %xmm0, %xmm1
784 pmovmskb %xmm1, %esi
785 sub $0xffff, %esi
786 jnz L(exit)
787
788#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400789 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800790 lea -16(%ebp), %ebp
791 jbe L(more8byteseq)
792#endif
793 add $16, %ecx
794 movdqa %xmm4, %xmm3
795
796 add $16, %edi
797 jg L(nibble_ashr_5)
798
799 movdqa (%eax, %ecx), %xmm1
800 movdqa (%edx, %ecx), %xmm2
801 movdqa %xmm2, %xmm4
802
803 palignr $5, %xmm3, %xmm2
804
805 pcmpeqb %xmm1, %xmm0
806 pcmpeqb %xmm2, %xmm1
807 psubb %xmm0, %xmm1
808 pmovmskb %xmm1, %esi
809 sub $0xffff, %esi
810 jnz L(exit)
811
812#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400813 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800814 lea -16(%ebp), %ebp
815 jbe L(more8byteseq)
816#endif
817 add $16, %ecx
818 movdqa %xmm4, %xmm3
819 jmp L(loop_ashr_5)
820
821 .p2align 4
822L(nibble_ashr_5):
823 pcmpeqb %xmm3, %xmm0
824 pmovmskb %xmm0, %esi
825 test $0xffe0, %esi
826 jnz L(ashr_5_exittail)
827
828#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400829 cmpl $11, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800830 jbe L(ashr_5_exittail)
831#endif
832 pxor %xmm0, %xmm0
833 sub $0x1000, %edi
834 jmp L(gobble_ashr_5)
835
836 .p2align 4
837L(ashr_5_exittail):
838 movdqa (%eax, %ecx), %xmm1
839 psrldq $5, %xmm0
840 psrldq $5, %xmm3
841 jmp L(aftertail)
842
843/*
844 * The following cases will be handled by ashr_6
845 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
846 * n(10~15) n -10 5(15 +(n-10) - n) ashr_6
847 */
848
849 .p2align 4
850L(ashr_6):
851 mov $0xffff, %esi
852 pxor %xmm0, %xmm0
853 movdqa (%edx), %xmm2
854 movdqa (%eax), %xmm1
855 pcmpeqb %xmm1, %xmm0
856 pslldq $10, %xmm2
857 pcmpeqb %xmm1, %xmm2
858 psubb %xmm0, %xmm2
859 pmovmskb %xmm2, %edi
860 shr %cl, %esi
861 shr %cl, %edi
862 sub %edi, %esi
863 lea -10(%ecx), %edi
864 jnz L(less32bytes)
865
866 UPDATE_STRNCMP_COUNTER
867
868 movdqa (%edx), %xmm3
869 pxor %xmm0, %xmm0
870 mov $16, %ecx
871 or $6, %ebx
872 lea 6(%edx), %edi
873 and $0xfff, %edi
874 sub $0x1000, %edi
875
876 .p2align 4
877L(loop_ashr_6):
878 add $16, %edi
879 jg L(nibble_ashr_6)
880
881L(gobble_ashr_6):
882 movdqa (%eax, %ecx), %xmm1
883 movdqa (%edx, %ecx), %xmm2
884 movdqa %xmm2, %xmm4
885
886 palignr $6, %xmm3, %xmm2
887
888 pcmpeqb %xmm1, %xmm0
889 pcmpeqb %xmm2, %xmm1
890 psubb %xmm0, %xmm1
891 pmovmskb %xmm1, %esi
892 sub $0xffff, %esi
893 jnz L(exit)
894
895#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400896 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800897 lea -16(%ebp), %ebp
898 jbe L(more8byteseq)
899#endif
900
901 add $16, %ecx
902 movdqa %xmm4, %xmm3
903
904 add $16, %edi
905 jg L(nibble_ashr_6)
906
907 movdqa (%eax, %ecx), %xmm1
908 movdqa (%edx, %ecx), %xmm2
909 movdqa %xmm2, %xmm4
910
911 palignr $6, %xmm3, %xmm2
912
913 pcmpeqb %xmm1, %xmm0
914 pcmpeqb %xmm2, %xmm1
915 psubb %xmm0, %xmm1
916 pmovmskb %xmm1, %esi
917 sub $0xffff, %esi
918 jnz L(exit)
919#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400920 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800921 lea -16(%ebp), %ebp
922 jbe L(more8byteseq)
923#endif
924
925 add $16, %ecx
926 movdqa %xmm4, %xmm3
927 jmp L(loop_ashr_6)
928
929 .p2align 4
930L(nibble_ashr_6):
931 pcmpeqb %xmm3, %xmm0
932 pmovmskb %xmm0, %esi
933 test $0xffc0, %esi
934 jnz L(ashr_6_exittail)
935
936#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -0400937 cmpl $10, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -0800938 jbe L(ashr_6_exittail)
939#endif
940 pxor %xmm0, %xmm0
941 sub $0x1000, %edi
942 jmp L(gobble_ashr_6)
943
944 .p2align 4
945L(ashr_6_exittail):
946 movdqa (%eax, %ecx), %xmm1
947 psrldq $6, %xmm0
948 psrldq $6, %xmm3
949 jmp L(aftertail)
950
951/*
952 * The following cases will be handled by ashr_7
953 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
954 * n(9~15) n - 9 6(15 +(n-9) - n) ashr_7
955 */
956
957 .p2align 4
958L(ashr_7):
959 mov $0xffff, %esi
960 pxor %xmm0, %xmm0
961 movdqa (%edx), %xmm2
962 movdqa (%eax), %xmm1
963 pcmpeqb %xmm1, %xmm0
964 pslldq $9, %xmm2
965 pcmpeqb %xmm1, %xmm2
966 psubb %xmm0, %xmm2
967 pmovmskb %xmm2, %edi
968 shr %cl, %esi
969 shr %cl, %edi
970 sub %edi, %esi
971 lea -9(%ecx), %edi
972 jnz L(less32bytes)
973
974 UPDATE_STRNCMP_COUNTER
975
976 movdqa (%edx), %xmm3
977 pxor %xmm0, %xmm0
978 mov $16, %ecx
979 or $7, %ebx
980 lea 8(%edx), %edi
981 and $0xfff, %edi
982 sub $0x1000, %edi
983
984 .p2align 4
985L(loop_ashr_7):
986 add $16, %edi
987 jg L(nibble_ashr_7)
988
989L(gobble_ashr_7):
990 movdqa (%eax, %ecx), %xmm1
991 movdqa (%edx, %ecx), %xmm2
992 movdqa %xmm2, %xmm4
993
994 palignr $7, %xmm3, %xmm2
995
996 pcmpeqb %xmm1, %xmm0
997 pcmpeqb %xmm2, %xmm1
998 psubb %xmm0, %xmm1
999 pmovmskb %xmm1, %esi
1000 sub $0xffff, %esi
1001 jnz L(exit)
1002
1003#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001004 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001005 lea -16(%ebp), %ebp
1006 jbe L(more8byteseq)
1007#endif
1008
1009 add $16, %ecx
1010 movdqa %xmm4, %xmm3
1011
1012 add $16, %edi
1013 jg L(nibble_ashr_7)
1014
1015 movdqa (%eax, %ecx), %xmm1
1016 movdqa (%edx, %ecx), %xmm2
1017 movdqa %xmm2, %xmm4
1018
1019 palignr $7, %xmm3, %xmm2
1020
1021 pcmpeqb %xmm1, %xmm0
1022 pcmpeqb %xmm2, %xmm1
1023 psubb %xmm0, %xmm1
1024 pmovmskb %xmm1, %esi
1025 sub $0xffff, %esi
1026 jnz L(exit)
1027
1028#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001029 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001030 lea -16(%ebp), %ebp
1031 jbe L(more8byteseq)
1032#endif
1033
1034 add $16, %ecx
1035 movdqa %xmm4, %xmm3
1036 jmp L(loop_ashr_7)
1037
1038 .p2align 4
1039L(nibble_ashr_7):
1040 pcmpeqb %xmm3, %xmm0
1041 pmovmskb %xmm0, %esi
1042 test $0xff80, %esi
1043 jnz L(ashr_7_exittail)
1044
1045#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001046 cmpl $9, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001047 jbe L(ashr_7_exittail)
1048#endif
1049 pxor %xmm0, %xmm0
1050 pxor %xmm0, %xmm0
1051 sub $0x1000, %edi
1052 jmp L(gobble_ashr_7)
1053
1054 .p2align 4
1055L(ashr_7_exittail):
1056 movdqa (%eax, %ecx), %xmm1
1057 psrldq $7, %xmm0
1058 psrldq $7, %xmm3
1059 jmp L(aftertail)
1060
1061/*
1062 * The following cases will be handled by ashr_8
1063 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1064 * n(8~15) n - 8 7(15 +(n-8) - n) ashr_8
1065 */
1066 .p2align 4
1067L(ashr_8):
1068 mov $0xffff, %esi
1069 pxor %xmm0, %xmm0
1070 movdqa (%edx), %xmm2
1071 movdqa (%eax), %xmm1
1072 pcmpeqb %xmm1, %xmm0
1073 pslldq $8, %xmm2
1074 pcmpeqb %xmm1, %xmm2
1075 psubb %xmm0, %xmm2
1076 pmovmskb %xmm2, %edi
1077 shr %cl, %esi
1078 shr %cl, %edi
1079 sub %edi, %esi
1080 lea -8(%ecx), %edi
1081 jnz L(less32bytes)
1082
1083 UPDATE_STRNCMP_COUNTER
1084
1085 movdqa (%edx), %xmm3
1086 pxor %xmm0, %xmm0
1087 mov $16, %ecx
1088 or $8, %ebx
1089 lea 8(%edx), %edi
1090 and $0xfff, %edi
1091 sub $0x1000, %edi
1092
1093 .p2align 4
1094L(loop_ashr_8):
1095 add $16, %edi
1096 jg L(nibble_ashr_8)
1097
1098L(gobble_ashr_8):
1099 movdqa (%eax, %ecx), %xmm1
1100 movdqa (%edx, %ecx), %xmm2
1101 movdqa %xmm2, %xmm4
1102
1103 palignr $8, %xmm3, %xmm2
1104
1105 pcmpeqb %xmm1, %xmm0
1106 pcmpeqb %xmm2, %xmm1
1107 psubb %xmm0, %xmm1
1108 pmovmskb %xmm1, %esi
1109 sub $0xffff, %esi
1110 jnz L(exit)
1111
1112#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001113 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001114 lea -16(%ebp), %ebp
1115 jbe L(more8byteseq)
1116#endif
1117 add $16, %ecx
1118 movdqa %xmm4, %xmm3
1119
1120 add $16, %edi
1121 jg L(nibble_ashr_8)
1122
1123 movdqa (%eax, %ecx), %xmm1
1124 movdqa (%edx, %ecx), %xmm2
1125 movdqa %xmm2, %xmm4
1126
1127 palignr $8, %xmm3, %xmm2
1128
1129 pcmpeqb %xmm1, %xmm0
1130 pcmpeqb %xmm2, %xmm1
1131 psubb %xmm0, %xmm1
1132 pmovmskb %xmm1, %esi
1133 sub $0xffff, %esi
1134 jnz L(exit)
1135
1136#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001137 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001138 lea -16(%ebp), %ebp
1139 jbe L(more8byteseq)
1140#endif
1141 add $16, %ecx
1142 movdqa %xmm4, %xmm3
1143 jmp L(loop_ashr_8)
1144
1145 .p2align 4
1146L(nibble_ashr_8):
1147 pcmpeqb %xmm3, %xmm0
1148 pmovmskb %xmm0, %esi
1149 test $0xff00, %esi
1150 jnz L(ashr_8_exittail)
1151
1152#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001153 cmpl $8, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001154 jbe L(ashr_8_exittail)
1155#endif
1156 pxor %xmm0, %xmm0
1157 pxor %xmm0, %xmm0
1158 sub $0x1000, %edi
1159 jmp L(gobble_ashr_8)
1160
1161 .p2align 4
1162L(ashr_8_exittail):
1163 movdqa (%eax, %ecx), %xmm1
1164 psrldq $8, %xmm0
1165 psrldq $8, %xmm3
1166 jmp L(aftertail)
1167
1168/*
1169 * The following cases will be handled by ashr_9
1170 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1171 * n(7~15) n - 7 8(15 +(n-7) - n) ashr_9
1172 */
1173 .p2align 4
1174L(ashr_9):
1175 mov $0xffff, %esi
1176 pxor %xmm0, %xmm0
1177 movdqa (%edx), %xmm2
1178 movdqa (%eax), %xmm1
1179 pcmpeqb %xmm1, %xmm0
1180 pslldq $7, %xmm2
1181 pcmpeqb %xmm1, %xmm2
1182 psubb %xmm0, %xmm2
1183 pmovmskb %xmm2, %edi
1184 shr %cl, %esi
1185 shr %cl, %edi
1186 sub %edi, %esi
1187 lea -7(%ecx), %edi
1188 jnz L(less32bytes)
1189
1190 UPDATE_STRNCMP_COUNTER
1191
1192 movdqa (%edx), %xmm3
1193 pxor %xmm0, %xmm0
1194 mov $16, %ecx
1195 or $9, %ebx
1196 lea 9(%edx), %edi
1197 and $0xfff, %edi
1198 sub $0x1000, %edi
1199
1200 .p2align 4
1201L(loop_ashr_9):
1202 add $16, %edi
1203 jg L(nibble_ashr_9)
1204
1205L(gobble_ashr_9):
1206 movdqa (%eax, %ecx), %xmm1
1207 movdqa (%edx, %ecx), %xmm2
1208 movdqa %xmm2, %xmm4
1209
1210 palignr $9, %xmm3, %xmm2
1211
1212 pcmpeqb %xmm1, %xmm0
1213 pcmpeqb %xmm2, %xmm1
1214 psubb %xmm0, %xmm1
1215 pmovmskb %xmm1, %esi
1216 sub $0xffff, %esi
1217 jnz L(exit)
1218
1219#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001220 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001221 lea -16(%ebp), %ebp
1222 jbe L(more8byteseq)
1223#endif
1224 add $16, %ecx
1225 movdqa %xmm4, %xmm3
1226
1227 add $16, %edi
1228 jg L(nibble_ashr_9)
1229
1230 movdqa (%eax, %ecx), %xmm1
1231 movdqa (%edx, %ecx), %xmm2
1232 movdqa %xmm2, %xmm4
1233
1234 palignr $9, %xmm3, %xmm2
1235
1236 pcmpeqb %xmm1, %xmm0
1237 pcmpeqb %xmm2, %xmm1
1238 psubb %xmm0, %xmm1
1239 pmovmskb %xmm1, %esi
1240 sub $0xffff, %esi
1241 jnz L(exit)
1242
1243#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001244 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001245 lea -16(%ebp), %ebp
1246 jbe L(more8byteseq)
1247#endif
1248 add $16, %ecx
1249 movdqa %xmm4, %xmm3
1250 jmp L(loop_ashr_9)
1251
1252 .p2align 4
1253L(nibble_ashr_9):
1254 pcmpeqb %xmm3, %xmm0
1255 pmovmskb %xmm0, %esi
1256 test $0xfe00, %esi
1257 jnz L(ashr_9_exittail)
1258
1259#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001260 cmpl $7, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001261 jbe L(ashr_9_exittail)
1262#endif
1263 pxor %xmm0, %xmm0
1264 sub $0x1000, %edi
1265 jmp L(gobble_ashr_9)
1266
1267 .p2align 4
1268L(ashr_9_exittail):
1269 movdqa (%eax, %ecx), %xmm1
1270 psrldq $9, %xmm0
1271 psrldq $9, %xmm3
1272 jmp L(aftertail)
1273
1274/*
1275 * The following cases will be handled by ashr_10
1276 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1277 * n(6~15) n - 6 9(15 +(n-6) - n) ashr_10
1278 */
1279 .p2align 4
1280L(ashr_10):
1281 mov $0xffff, %esi
1282 pxor %xmm0, %xmm0
1283 movdqa (%edx), %xmm2
1284 movdqa (%eax), %xmm1
1285 pcmpeqb %xmm1, %xmm0
1286 pslldq $6, %xmm2
1287 pcmpeqb %xmm1, %xmm2
1288 psubb %xmm0, %xmm2
1289 pmovmskb %xmm2, %edi
1290 shr %cl, %esi
1291 shr %cl, %edi
1292 sub %edi, %esi
1293 lea -6(%ecx), %edi
1294 jnz L(less32bytes)
1295
1296 UPDATE_STRNCMP_COUNTER
1297
1298 movdqa (%edx), %xmm3
1299 pxor %xmm0, %xmm0
1300 mov $16, %ecx
1301 or $10, %ebx
1302 lea 10(%edx), %edi
1303 and $0xfff, %edi
1304 sub $0x1000, %edi
1305
1306 .p2align 4
1307L(loop_ashr_10):
1308 add $16, %edi
1309 jg L(nibble_ashr_10)
1310
1311L(gobble_ashr_10):
1312 movdqa (%eax, %ecx), %xmm1
1313 movdqa (%edx, %ecx), %xmm2
1314 movdqa %xmm2, %xmm4
1315
1316 palignr $10, %xmm3, %xmm2
1317
1318 pcmpeqb %xmm1, %xmm0
1319 pcmpeqb %xmm2, %xmm1
1320 psubb %xmm0, %xmm1
1321 pmovmskb %xmm1, %esi
1322 sub $0xffff, %esi
1323 jnz L(exit)
1324
1325#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001326 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001327 lea -16(%ebp), %ebp
1328 jbe L(more8byteseq)
1329#endif
1330 add $16, %ecx
1331 movdqa %xmm4, %xmm3
1332
1333 add $16, %edi
1334 jg L(nibble_ashr_10)
1335
1336 movdqa (%eax, %ecx), %xmm1
1337 movdqa (%edx, %ecx), %xmm2
1338 movdqa %xmm2, %xmm4
1339
1340 palignr $10, %xmm3, %xmm2
1341
1342 pcmpeqb %xmm1, %xmm0
1343 pcmpeqb %xmm2, %xmm1
1344 psubb %xmm0, %xmm1
1345 pmovmskb %xmm1, %esi
1346 sub $0xffff, %esi
1347 jnz L(exit)
1348
1349#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001350 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001351 lea -16(%ebp), %ebp
1352 jbe L(more8byteseq)
1353#endif
1354 add $16, %ecx
1355 movdqa %xmm4, %xmm3
1356 jmp L(loop_ashr_10)
1357
1358 .p2align 4
1359L(nibble_ashr_10):
1360 pcmpeqb %xmm3, %xmm0
1361 pmovmskb %xmm0, %esi
1362 test $0xfc00, %esi
1363 jnz L(ashr_10_exittail)
1364
1365#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001366 cmpl $6, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001367 jbe L(ashr_10_exittail)
1368#endif
1369 pxor %xmm0, %xmm0
1370 sub $0x1000, %edi
1371 jmp L(gobble_ashr_10)
1372
1373 .p2align 4
1374L(ashr_10_exittail):
1375 movdqa (%eax, %ecx), %xmm1
1376 psrldq $10, %xmm0
1377 psrldq $10, %xmm3
1378 jmp L(aftertail)
1379
1380/*
1381 * The following cases will be handled by ashr_11
1382 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1383 * n(5~15) n - 5 10(15 +(n-5) - n) ashr_11
1384 */
1385 .p2align 4
1386L(ashr_11):
1387 mov $0xffff, %esi
1388 pxor %xmm0, %xmm0
1389 movdqa (%edx), %xmm2
1390 movdqa (%eax), %xmm1
1391 pcmpeqb %xmm1, %xmm0
1392 pslldq $5, %xmm2
1393 pcmpeqb %xmm1, %xmm2
1394 psubb %xmm0, %xmm2
1395 pmovmskb %xmm2, %edi
1396 shr %cl, %esi
1397 shr %cl, %edi
1398 sub %edi, %esi
1399 lea -5(%ecx), %edi
1400 jnz L(less32bytes)
1401
1402 UPDATE_STRNCMP_COUNTER
1403
1404 movdqa (%edx), %xmm3
1405 pxor %xmm0, %xmm0
1406 mov $16, %ecx
1407 or $11, %ebx
1408 lea 11(%edx), %edi
1409 and $0xfff, %edi
1410 sub $0x1000, %edi
1411
1412 .p2align 4
1413L(loop_ashr_11):
1414 add $16, %edi
1415 jg L(nibble_ashr_11)
1416
1417L(gobble_ashr_11):
1418 movdqa (%eax, %ecx), %xmm1
1419 movdqa (%edx, %ecx), %xmm2
1420 movdqa %xmm2, %xmm4
1421
1422 palignr $11, %xmm3, %xmm2
1423
1424 pcmpeqb %xmm1, %xmm0
1425 pcmpeqb %xmm2, %xmm1
1426 psubb %xmm0, %xmm1
1427 pmovmskb %xmm1, %esi
1428 sub $0xffff, %esi
1429 jnz L(exit)
1430
1431#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001432 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001433 lea -16(%ebp), %ebp
1434 jbe L(more8byteseq)
1435#endif
1436 add $16, %ecx
1437 movdqa %xmm4, %xmm3
1438
1439 add $16, %edi
1440 jg L(nibble_ashr_11)
1441
1442 movdqa (%eax, %ecx), %xmm1
1443 movdqa (%edx, %ecx), %xmm2
1444 movdqa %xmm2, %xmm4
1445
1446 palignr $11, %xmm3, %xmm2
1447
1448 pcmpeqb %xmm1, %xmm0
1449 pcmpeqb %xmm2, %xmm1
1450 psubb %xmm0, %xmm1
1451 pmovmskb %xmm1, %esi
1452 sub $0xffff, %esi
1453 jnz L(exit)
1454
1455#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001456 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001457 lea -16(%ebp), %ebp
1458 jbe L(more8byteseq)
1459#endif
1460 add $16, %ecx
1461 movdqa %xmm4, %xmm3
1462 jmp L(loop_ashr_11)
1463
1464 .p2align 4
1465L(nibble_ashr_11):
1466 pcmpeqb %xmm3, %xmm0
1467 pmovmskb %xmm0, %esi
1468 test $0xf800, %esi
1469 jnz L(ashr_11_exittail)
1470
1471#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001472 cmpl $5, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001473 jbe L(ashr_11_exittail)
1474#endif
1475 pxor %xmm0, %xmm0
1476 sub $0x1000, %edi
1477 jmp L(gobble_ashr_11)
1478
1479 .p2align 4
1480L(ashr_11_exittail):
1481 movdqa (%eax, %ecx), %xmm1
1482 psrldq $11, %xmm0
1483 psrldq $11, %xmm3
1484 jmp L(aftertail)
1485
1486/*
1487 * The following cases will be handled by ashr_12
1488 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1489 * n(4~15) n - 4 11(15 +(n-4) - n) ashr_12
1490 */
1491 .p2align 4
1492L(ashr_12):
1493 mov $0xffff, %esi
1494 pxor %xmm0, %xmm0
1495 movdqa (%edx), %xmm2
1496 movdqa (%eax), %xmm1
1497 pcmpeqb %xmm1, %xmm0
1498 pslldq $4, %xmm2
1499 pcmpeqb %xmm1, %xmm2
1500 psubb %xmm0, %xmm2
1501 pmovmskb %xmm2, %edi
1502 shr %cl, %esi
1503 shr %cl, %edi
1504 sub %edi, %esi
1505 lea -4(%ecx), %edi
1506 jnz L(less32bytes)
1507
1508 UPDATE_STRNCMP_COUNTER
1509
1510 movdqa (%edx), %xmm3
1511 pxor %xmm0, %xmm0
1512 mov $16, %ecx
1513 or $12, %ebx
1514 lea 12(%edx), %edi
1515 and $0xfff, %edi
1516 sub $0x1000, %edi
1517
1518 .p2align 4
1519L(loop_ashr_12):
1520 add $16, %edi
1521 jg L(nibble_ashr_12)
1522
1523L(gobble_ashr_12):
1524 movdqa (%eax, %ecx), %xmm1
1525 movdqa (%edx, %ecx), %xmm2
1526 movdqa %xmm2, %xmm4
1527
1528 palignr $12, %xmm3, %xmm2
1529
1530 pcmpeqb %xmm1, %xmm0
1531 pcmpeqb %xmm2, %xmm1
1532 psubb %xmm0, %xmm1
1533 pmovmskb %xmm1, %esi
1534 sub $0xffff, %esi
1535 jnz L(exit)
1536
Bruce Beare124a5422010-10-11 12:24:41 -07001537#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001538 cmpl $16, %ebp
Bruce Beare124a5422010-10-11 12:24:41 -07001539 lea -16(%ebp), %ebp
1540 jbe L(more8byteseq)
1541#endif
1542
Bruce Beare8ff1a272010-03-04 11:03:37 -08001543 add $16, %ecx
1544 movdqa %xmm4, %xmm3
1545
1546 add $16, %edi
1547 jg L(nibble_ashr_12)
1548
Bruce Beare8ff1a272010-03-04 11:03:37 -08001549 movdqa (%eax, %ecx), %xmm1
1550 movdqa (%edx, %ecx), %xmm2
1551 movdqa %xmm2, %xmm4
1552
1553 palignr $12, %xmm3, %xmm2
1554
1555 pcmpeqb %xmm1, %xmm0
1556 pcmpeqb %xmm2, %xmm1
1557 psubb %xmm0, %xmm1
1558 pmovmskb %xmm1, %esi
1559 sub $0xffff, %esi
1560 jnz L(exit)
1561
1562#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001563 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001564 lea -16(%ebp), %ebp
1565 jbe L(more8byteseq)
1566#endif
1567 add $16, %ecx
1568 movdqa %xmm4, %xmm3
1569 jmp L(loop_ashr_12)
1570
1571 .p2align 4
1572L(nibble_ashr_12):
1573 pcmpeqb %xmm3, %xmm0
1574 pmovmskb %xmm0, %esi
1575 test $0xf000, %esi
1576 jnz L(ashr_12_exittail)
1577
1578#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001579 cmpl $4, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001580 jbe L(ashr_12_exittail)
1581#endif
1582 pxor %xmm0, %xmm0
1583 sub $0x1000, %edi
1584 jmp L(gobble_ashr_12)
1585
1586 .p2align 4
1587L(ashr_12_exittail):
1588 movdqa (%eax, %ecx), %xmm1
1589 psrldq $12, %xmm0
1590 psrldq $12, %xmm3
1591 jmp L(aftertail)
1592
1593/*
1594 * The following cases will be handled by ashr_13
1595 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1596 * n(3~15) n - 3 12(15 +(n-3) - n) ashr_13
1597 */
1598 .p2align 4
1599L(ashr_13):
1600 mov $0xffff, %esi
1601 pxor %xmm0, %xmm0
1602 movdqa (%edx), %xmm2
1603 movdqa (%eax), %xmm1
1604 pcmpeqb %xmm1, %xmm0
1605 pslldq $3, %xmm2
1606 pcmpeqb %xmm1, %xmm2
1607 psubb %xmm0, %xmm2
1608 pmovmskb %xmm2, %edi
1609 shr %cl, %esi
1610 shr %cl, %edi
1611 sub %edi, %esi
1612 lea -3(%ecx), %edi
1613 jnz L(less32bytes)
1614
1615 UPDATE_STRNCMP_COUNTER
1616
1617 movdqa (%edx), %xmm3
1618 pxor %xmm0, %xmm0
1619 mov $16, %ecx
1620 or $13, %ebx
1621 lea 13(%edx), %edi
1622 and $0xfff, %edi
1623 sub $0x1000, %edi
1624
1625 .p2align 4
1626L(loop_ashr_13):
1627 add $16, %edi
1628 jg L(nibble_ashr_13)
1629
1630L(gobble_ashr_13):
1631 movdqa (%eax, %ecx), %xmm1
1632 movdqa (%edx, %ecx), %xmm2
1633 movdqa %xmm2, %xmm4
1634
1635 palignr $13, %xmm3, %xmm2
1636
1637 pcmpeqb %xmm1, %xmm0
1638 pcmpeqb %xmm2, %xmm1
1639 psubb %xmm0, %xmm1
1640 pmovmskb %xmm1, %esi
1641 sub $0xffff, %esi
1642 jnz L(exit)
1643
1644#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001645 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001646 lea -16(%ebp), %ebp
1647 jbe L(more8byteseq)
1648#endif
1649 add $16, %ecx
1650 movdqa %xmm4, %xmm3
1651
1652 add $16, %edi
1653 jg L(nibble_ashr_13)
1654
1655 movdqa (%eax, %ecx), %xmm1
1656 movdqa (%edx, %ecx), %xmm2
1657 movdqa %xmm2, %xmm4
1658
1659 palignr $13, %xmm3, %xmm2
1660
1661 pcmpeqb %xmm1, %xmm0
1662 pcmpeqb %xmm2, %xmm1
1663 psubb %xmm0, %xmm1
1664 pmovmskb %xmm1, %esi
1665 sub $0xffff, %esi
1666 jnz L(exit)
1667
1668#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001669 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001670 lea -16(%ebp), %ebp
1671 jbe L(more8byteseq)
1672#endif
1673 add $16, %ecx
1674 movdqa %xmm4, %xmm3
1675 jmp L(loop_ashr_13)
1676
1677 .p2align 4
1678L(nibble_ashr_13):
1679 pcmpeqb %xmm3, %xmm0
1680 pmovmskb %xmm0, %esi
1681 test $0xe000, %esi
1682 jnz L(ashr_13_exittail)
1683
1684#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001685 cmpl $3, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001686 jbe L(ashr_13_exittail)
1687#endif
1688 pxor %xmm0, %xmm0
1689 sub $0x1000, %edi
1690 jmp L(gobble_ashr_13)
1691
1692 .p2align 4
1693L(ashr_13_exittail):
1694 movdqa (%eax, %ecx), %xmm1
1695 psrldq $13, %xmm0
1696 psrldq $13, %xmm3
1697 jmp L(aftertail)
1698
1699/*
1700 * The following cases will be handled by ashr_14
1701 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1702 * n(2~15) n - 2 13(15 +(n-2) - n) ashr_14
1703 */
1704 .p2align 4
1705L(ashr_14):
1706 mov $0xffff, %esi
1707 pxor %xmm0, %xmm0
1708 movdqa (%edx), %xmm2
1709 movdqa (%eax), %xmm1
1710 pcmpeqb %xmm1, %xmm0
1711 pslldq $2, %xmm2
1712 pcmpeqb %xmm1, %xmm2
1713 psubb %xmm0, %xmm2
1714 pmovmskb %xmm2, %edi
1715 shr %cl, %esi
1716 shr %cl, %edi
1717 sub %edi, %esi
1718 lea -2(%ecx), %edi
1719 jnz L(less32bytes)
1720
1721 UPDATE_STRNCMP_COUNTER
1722
1723 movdqa (%edx), %xmm3
1724 pxor %xmm0, %xmm0
1725 mov $16, %ecx
1726 or $14, %ebx
1727 lea 14(%edx), %edi
1728 and $0xfff, %edi
1729 sub $0x1000, %edi
1730
1731 .p2align 4
1732L(loop_ashr_14):
1733 add $16, %edi
1734 jg L(nibble_ashr_14)
1735
1736L(gobble_ashr_14):
1737 movdqa (%eax, %ecx), %xmm1
1738 movdqa (%edx, %ecx), %xmm2
1739 movdqa %xmm2, %xmm4
1740
1741 palignr $14, %xmm3, %xmm2
1742
1743 pcmpeqb %xmm1, %xmm0
1744 pcmpeqb %xmm2, %xmm1
1745 psubb %xmm0, %xmm1
1746 pmovmskb %xmm1, %esi
1747 sub $0xffff, %esi
1748 jnz L(exit)
1749
1750#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001751 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001752 lea -16(%ebp), %ebp
1753 jbe L(more8byteseq)
1754#endif
1755 add $16, %ecx
1756 movdqa %xmm4, %xmm3
1757
1758 add $16, %edi
1759 jg L(nibble_ashr_14)
1760
1761 movdqa (%eax, %ecx), %xmm1
1762 movdqa (%edx, %ecx), %xmm2
1763 movdqa %xmm2, %xmm4
1764
1765 palignr $14, %xmm3, %xmm2
1766
1767 pcmpeqb %xmm1, %xmm0
1768 pcmpeqb %xmm2, %xmm1
1769 psubb %xmm0, %xmm1
1770 pmovmskb %xmm1, %esi
1771 sub $0xffff, %esi
1772 jnz L(exit)
1773
1774#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001775 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001776 lea -16(%ebp), %ebp
1777 jbe L(more8byteseq)
1778#endif
1779 add $16, %ecx
1780 movdqa %xmm4, %xmm3
1781 jmp L(loop_ashr_14)
1782
1783 .p2align 4
1784L(nibble_ashr_14):
1785 pcmpeqb %xmm3, %xmm0
1786 pmovmskb %xmm0, %esi
1787 test $0xc000, %esi
1788 jnz L(ashr_14_exittail)
1789
1790#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001791 cmpl $2, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001792 jbe L(ashr_14_exittail)
1793#endif
1794 pxor %xmm0, %xmm0
1795 sub $0x1000, %edi
1796 jmp L(gobble_ashr_14)
1797
1798 .p2align 4
1799L(ashr_14_exittail):
1800 movdqa (%eax, %ecx), %xmm1
1801 psrldq $14, %xmm0
1802 psrldq $14, %xmm3
1803 jmp L(aftertail)
1804
1805/*
1806 * The following cases will be handled by ashr_14
1807 * ecx(offset of esi) eax(offset of edi) relative offset corresponding case
1808 * n(1~15) n - 1 14(15 +(n-1) - n) ashr_15
1809 */
1810
1811 .p2align 4
1812L(ashr_15):
1813 mov $0xffff, %esi
1814 pxor %xmm0, %xmm0
1815 movdqa (%edx), %xmm2
1816 movdqa (%eax), %xmm1
1817 pcmpeqb %xmm1, %xmm0
1818 pslldq $1, %xmm2
1819 pcmpeqb %xmm1, %xmm2
1820 psubb %xmm0, %xmm2
1821 pmovmskb %xmm2, %edi
1822 shr %cl, %esi
1823 shr %cl, %edi
1824 sub %edi, %esi
1825 lea -1(%ecx), %edi
1826 jnz L(less32bytes)
1827
1828 UPDATE_STRNCMP_COUNTER
1829
1830 movdqa (%edx), %xmm3
1831 pxor %xmm0, %xmm0
1832 mov $16, %ecx
1833 or $15, %ebx
1834 lea 15(%edx), %edi
1835 and $0xfff, %edi
1836 sub $0x1000, %edi
1837
1838 .p2align 4
1839L(loop_ashr_15):
1840 add $16, %edi
1841 jg L(nibble_ashr_15)
1842
1843L(gobble_ashr_15):
1844 movdqa (%eax, %ecx), %xmm1
1845 movdqa (%edx, %ecx), %xmm2
1846 movdqa %xmm2, %xmm4
1847
1848 palignr $15, %xmm3, %xmm2
1849
1850 pcmpeqb %xmm1, %xmm0
1851 pcmpeqb %xmm2, %xmm1
1852 psubb %xmm0, %xmm1
1853 pmovmskb %xmm1, %esi
1854 sub $0xffff, %esi
1855 jnz L(exit)
1856
1857#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001858 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001859 lea -16(%ebp), %ebp
1860 jbe L(more8byteseq)
1861#endif
1862 add $16, %ecx
1863 movdqa %xmm4, %xmm3
1864
1865 add $16, %edi
1866 jg L(nibble_ashr_15)
1867
1868 movdqa (%eax, %ecx), %xmm1
1869 movdqa (%edx, %ecx), %xmm2
1870 movdqa %xmm2, %xmm4
1871
1872 palignr $15, %xmm3, %xmm2
1873
1874 pcmpeqb %xmm1, %xmm0
1875 pcmpeqb %xmm2, %xmm1
1876 psubb %xmm0, %xmm1
1877 pmovmskb %xmm1, %esi
1878 sub $0xffff, %esi
1879 jnz L(exit)
1880
1881#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001882 cmpl $16, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001883 lea -16(%ebp), %ebp
1884 jbe L(more8byteseq)
1885#endif
1886 add $16, %ecx
1887 movdqa %xmm4, %xmm3
1888 jmp L(loop_ashr_15)
1889
1890 .p2align 4
1891L(nibble_ashr_15):
1892 pcmpeqb %xmm3, %xmm0
1893 pmovmskb %xmm0, %esi
1894 test $0x8000, %esi
1895 jnz L(ashr_15_exittail)
1896
1897#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001898 cmpl $1, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001899 jbe L(ashr_15_exittail)
1900#endif
1901 pxor %xmm0, %xmm0
1902 sub $0x1000, %edi
1903 jmp L(gobble_ashr_15)
1904
1905 .p2align 4
1906L(ashr_15_exittail):
1907 movdqa (%eax, %ecx), %xmm1
1908 psrldq $15, %xmm0
1909 psrldq $15, %xmm3
1910 jmp L(aftertail)
1911
1912 .p2align 4
1913L(aftertail):
1914 pcmpeqb %xmm3, %xmm1
1915 psubb %xmm0, %xmm1
1916 pmovmskb %xmm1, %esi
1917 not %esi
1918L(exit):
1919 mov %ebx, %edi
1920 and $0x1f, %edi
1921 lea -16(%edi, %ecx), %edi
1922L(less32bytes):
1923 add %edi, %edx
1924 add %ecx, %eax
1925 test $0x20, %ebx
1926 jz L(ret2)
1927 xchg %eax, %edx
1928
1929 .p2align 4
1930L(ret2):
1931 mov %esi, %ecx
1932 POP (%esi)
1933 POP (%edi)
1934 POP (%ebx)
1935L(less16bytes):
1936 test %cl, %cl
1937 jz L(2next_8_bytes)
1938
1939 test $0x01, %cl
1940 jnz L(Byte0)
1941
1942 test $0x02, %cl
1943 jnz L(Byte1)
1944
1945 test $0x04, %cl
1946 jnz L(Byte2)
1947
1948 test $0x08, %cl
1949 jnz L(Byte3)
1950
1951 test $0x10, %cl
1952 jnz L(Byte4)
1953
1954 test $0x20, %cl
1955 jnz L(Byte5)
1956
1957 test $0x40, %cl
1958 jnz L(Byte6)
1959#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001960 cmpl $7, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001961 jbe L(eq)
1962#endif
1963
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001964 movzbl 7(%eax), %ecx
1965 movzbl 7(%edx), %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -08001966
1967 sub %ecx, %eax
1968 RETURN
1969
1970 .p2align 4
1971L(Byte0):
1972#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001973 cmpl $0, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001974 jbe L(eq)
1975#endif
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001976 movzbl (%eax), %ecx
1977 movzbl (%edx), %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -08001978
1979 sub %ecx, %eax
1980 RETURN
1981
1982 .p2align 4
1983L(Byte1):
1984#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001985 cmpl $1, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001986 jbe L(eq)
1987#endif
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001988 movzbl 1(%eax), %ecx
1989 movzbl 1(%edx), %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -08001990
1991 sub %ecx, %eax
1992 RETURN
1993
1994 .p2align 4
1995L(Byte2):
1996#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04001997 cmpl $2, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08001998 jbe L(eq)
1999#endif
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002000 movzbl 2(%eax), %ecx
2001 movzbl 2(%edx), %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -08002002
2003 sub %ecx, %eax
2004 RETURN
2005
2006 .p2align 4
2007L(Byte3):
2008#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002009 cmpl $3, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002010 jbe L(eq)
2011#endif
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002012 movzbl 3(%eax), %ecx
2013 movzbl 3(%edx), %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -08002014
2015 sub %ecx, %eax
2016 RETURN
2017
2018 .p2align 4
2019L(Byte4):
2020#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002021 cmpl $4, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002022 jbe L(eq)
2023#endif
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002024 movzbl 4(%eax), %ecx
2025 movzbl 4(%edx), %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -08002026
2027 sub %ecx, %eax
2028 RETURN
2029
2030 .p2align 4
2031L(Byte5):
2032#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002033 cmpl $5, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002034 jbe L(eq)
2035#endif
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002036 movzbl 5(%eax), %ecx
2037 movzbl 5(%edx), %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -08002038
2039 sub %ecx, %eax
2040 RETURN
2041
2042 .p2align 4
2043L(Byte6):
2044#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002045 cmpl $6, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002046 jbe L(eq)
2047#endif
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002048 movzbl 6(%eax), %ecx
2049 movzbl 6(%edx), %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -08002050
2051 sub %ecx, %eax
2052 RETURN
2053
2054 .p2align 4
2055L(2next_8_bytes):
2056 add $8, %eax
2057 add $8, %edx
2058#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002059 cmpl $8, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002060 lea -8(%ebp), %ebp
2061 jbe L(eq)
2062#endif
2063
2064 test $0x01, %ch
2065 jnz L(Byte0)
2066
2067 test $0x02, %ch
2068 jnz L(Byte1)
2069
2070 test $0x04, %ch
2071 jnz L(Byte2)
2072
2073 test $0x08, %ch
2074 jnz L(Byte3)
2075
2076 test $0x10, %ch
2077 jnz L(Byte4)
2078
2079 test $0x20, %ch
2080 jnz L(Byte5)
2081
2082 test $0x40, %ch
2083 jnz L(Byte6)
2084
2085#ifdef USE_AS_STRNCMP
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002086 cmpl $7, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002087 jbe L(eq)
2088#endif
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002089 movzbl 7(%eax), %ecx
2090 movzbl 7(%edx), %eax
Bruce Beare8ff1a272010-03-04 11:03:37 -08002091
2092 sub %ecx, %eax
2093 RETURN
2094
2095 .p2align 4
2096L(neq):
2097 mov $1, %eax
2098 ja L(neq_bigger)
2099 neg %eax
2100L(neq_bigger):
2101 RETURN
2102
2103#ifdef USE_AS_STRNCMP
Bruce Beare8ff1a272010-03-04 11:03:37 -08002104 .p2align 4
2105L(more8byteseq):
2106 POP (%esi)
2107 POP (%edi)
2108 POP (%ebx)
2109#endif
2110
2111L(eq):
2112
2113#ifdef USE_AS_STRNCMP
2114 POP (%ebp)
2115#endif
2116 xorl %eax, %eax
2117 ret
2118
2119#ifdef USE_AS_STRNCMP
Christopher Ferrisbff9cca2016-02-17 16:34:02 -08002120 cfi_restore_state
Bruce Beare8ff1a272010-03-04 11:03:37 -08002121
2122 .p2align 4
2123L(less16bytes_sncmp):
2124 test %ebp, %ebp
2125 jz L(eq)
2126
2127 movzbl (%eax), %ecx
2128 cmpb %cl, (%edx)
2129 jne L(neq)
2130 test %cl, %cl
2131 je L(eq)
2132
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002133 cmpl $1, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002134 je L(eq)
2135
2136 movzbl 1(%eax), %ecx
2137 cmpb %cl, 1(%edx)
2138 jne L(neq)
2139 test %cl, %cl
2140 je L(eq)
2141
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002142 cmpl $2, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002143 je L(eq)
2144
2145 movzbl 2(%eax), %ecx
2146 cmpb %cl, 2(%edx)
2147 jne L(neq)
2148 test %cl, %cl
2149 je L(eq)
2150
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002151 cmpl $3, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002152 je L(eq)
2153
2154 movzbl 3(%eax), %ecx
2155 cmpb %cl, 3(%edx)
2156 jne L(neq)
2157 test %cl, %cl
2158 je L(eq)
2159
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002160 cmpl $4, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002161 je L(eq)
2162
2163 movzbl 4(%eax), %ecx
2164 cmpb %cl, 4(%edx)
2165 jne L(neq)
2166 test %cl, %cl
2167 je L(eq)
2168
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002169 cmpl $5, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002170 je L(eq)
2171
2172 movzbl 5(%eax), %ecx
2173 cmpb %cl, 5(%edx)
2174 jne L(neq)
2175 test %cl, %cl
2176 je L(eq)
2177
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002178 cmpl $6, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002179 je L(eq)
2180
2181 movzbl 6(%eax), %ecx
2182 cmpb %cl, 6(%edx)
2183 jne L(neq)
2184 test %cl, %cl
2185 je L(eq)
2186
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002187 cmpl $7, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002188 je L(eq)
2189
2190 movzbl 7(%eax), %ecx
2191 cmpb %cl, 7(%edx)
2192 jne L(neq)
2193 test %cl, %cl
2194 je L(eq)
2195
2196
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002197 cmpl $8, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002198 je L(eq)
2199
2200 movzbl 8(%eax), %ecx
2201 cmpb %cl, 8(%edx)
2202 jne L(neq)
2203 test %cl, %cl
2204 je L(eq)
2205
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002206 cmpl $9, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002207 je L(eq)
2208
2209 movzbl 9(%eax), %ecx
2210 cmpb %cl, 9(%edx)
2211 jne L(neq)
2212 test %cl, %cl
2213 je L(eq)
2214
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002215 cmpl $10, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002216 je L(eq)
2217
2218 movzbl 10(%eax), %ecx
2219 cmpb %cl, 10(%edx)
2220 jne L(neq)
2221 test %cl, %cl
2222 je L(eq)
2223
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002224 cmpl $11, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002225 je L(eq)
2226
2227 movzbl 11(%eax), %ecx
2228 cmpb %cl, 11(%edx)
2229 jne L(neq)
2230 test %cl, %cl
2231 je L(eq)
2232
2233
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002234 cmpl $12, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002235 je L(eq)
2236
2237 movzbl 12(%eax), %ecx
2238 cmpb %cl, 12(%edx)
2239 jne L(neq)
2240 test %cl, %cl
2241 je L(eq)
2242
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002243 cmpl $13, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002244 je L(eq)
2245
2246 movzbl 13(%eax), %ecx
2247 cmpb %cl, 13(%edx)
2248 jne L(neq)
2249 test %cl, %cl
2250 je L(eq)
2251
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002252 cmpl $14, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002253 je L(eq)
2254
2255 movzbl 14(%eax), %ecx
2256 cmpb %cl, 14(%edx)
2257 jne L(neq)
2258 test %cl, %cl
2259 je L(eq)
2260
Kevin Schoedelc0b4d182012-06-29 09:23:47 -04002261 cmpl $15, %ebp
Bruce Beare8ff1a272010-03-04 11:03:37 -08002262 je L(eq)
2263
2264 movzbl 15(%eax), %ecx
2265 cmpb %cl, 15(%edx)
2266 jne L(neq)
2267 test %cl, %cl
2268 je L(eq)
2269
2270 POP (%ebp)
2271 xor %eax, %eax
2272 ret
2273#endif
2274
Liubov Dmitrieva0a490662012-01-17 12:55:46 +04002275END (STRCMP)