| /* | 
 | Copyright (c) 2010, Intel Corporation | 
 | All rights reserved. | 
 |  | 
 | Redistribution and use in source and binary forms, with or without | 
 | modification, are permitted provided that the following conditions are met: | 
 |  | 
 |     * Redistributions of source code must retain the above copyright notice, | 
 |     * this list of conditions and the following disclaimer. | 
 |  | 
 |     * Redistributions in binary form must reproduce the above copyright notice, | 
 |     * this list of conditions and the following disclaimer in the documentation | 
 |     * and/or other materials provided with the distribution. | 
 |  | 
 |     * Neither the name of Intel Corporation nor the names of its contributors | 
 |     * may be used to endorse or promote products derived from this software | 
 |     * without specific prior written permission. | 
 |  | 
 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | 
 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | 
 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | 
 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR | 
 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | 
 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | 
 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON | 
 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 
 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | 
 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
 | */ | 
 |  | 
 | #ifndef L | 
 | # define L(label)	.L##label | 
 | #endif | 
 |  | 
 | #ifndef cfi_startproc | 
 | # define cfi_startproc			.cfi_startproc | 
 | #endif | 
 |  | 
 | #ifndef cfi_endproc | 
 | # define cfi_endproc			.cfi_endproc | 
 | #endif | 
 |  | 
 | #ifndef cfi_rel_offset | 
 | # define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off | 
 | #endif | 
 |  | 
 | #ifndef cfi_restore | 
 | # define cfi_restore(reg)		.cfi_restore reg | 
 | #endif | 
 |  | 
 | #ifndef cfi_adjust_cfa_offset | 
 | # define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off | 
 | #endif | 
 |  | 
 | #ifndef cfi_remember_state | 
 | # define cfi_remember_state		.cfi_remember_state | 
 | #endif | 
 |  | 
 | #ifndef cfi_restore_state | 
 | # define cfi_restore_state		.cfi_restore_state | 
 | #endif | 
 |  | 
 | #ifndef ENTRY | 
 | # define ENTRY(name)			\ | 
 | 	.type name,  @function; 	\ | 
 | 	.globl name;			\ | 
 | 	.p2align 4;			\ | 
 | name:					\ | 
 | 	cfi_startproc | 
 | #endif | 
 |  | 
 | #ifndef END | 
 | # define END(name)			\ | 
 | 	cfi_endproc;			\ | 
 | 	.size name, .-name | 
 | #endif | 
 |  | 
 | #define CFI_PUSH(REG)						\ | 
 |   cfi_adjust_cfa_offset (4);					\ | 
 |   cfi_rel_offset (REG, 0) | 
 |  | 
 | #define CFI_POP(REG)						\ | 
 |   cfi_adjust_cfa_offset (-4);					\ | 
 |   cfi_restore (REG) | 
 |  | 
 | #define PUSH(REG)	pushl REG; CFI_PUSH (REG) | 
 | #define POP(REG)	popl REG; CFI_POP (REG) | 
 |  | 
 | #ifndef USE_AS_STRNCMP | 
 | # define STR1		4 | 
 | # define STR2		STR1+4 | 
 | # define RETURN		ret | 
 |  | 
 | # define UPDATE_STRNCMP_COUNTER | 
 | #else | 
 | # define STR1		8 | 
 | # define STR2		STR1+4 | 
 | # define CNT		STR2+4 | 
 | # define RETURN		POP (%ebp); ret; CFI_PUSH (%ebp) | 
 |  | 
 | # define UPDATE_STRNCMP_COUNTER				\ | 
 | 	/* calculate left number to compare */		\ | 
 | 	mov	$16, %esi;				\ | 
 | 	sub	%ecx, %esi;				\ | 
 | 	cmpl	%esi, %ebp;				\ | 
 | 	jbe	L(more8byteseq);			\ | 
 | 	sub	%esi, %ebp | 
 | #endif | 
 |  | 
 | #ifndef STRCMP | 
 | # define STRCMP strcmp_ssse3 | 
 | #endif | 
 |  | 
 | 	.section .text.ssse3,"ax",@progbits | 
 | ENTRY (STRCMP) | 
 | #ifdef USE_AS_STRNCMP | 
 | 	PUSH	(%ebp) | 
 | 	cfi_remember_state | 
 | #endif | 
 | 	movl	STR1(%esp), %edx | 
 | 	movl	STR2(%esp), %eax | 
 | #ifdef USE_AS_STRNCMP | 
 | 	movl	CNT(%esp), %ebp | 
 | 	cmpl	$16, %ebp | 
 | 	jb	L(less16bytes_sncmp) | 
 | 	jmp	L(more16bytes) | 
 | #endif | 
 |  | 
 | 	movzbl	(%eax), %ecx | 
 | 	cmpb	%cl, (%edx) | 
 | 	jne	L(neq) | 
 | 	cmpl	$0, %ecx | 
 | 	je	L(eq) | 
 |  | 
 | 	movzbl	1(%eax), %ecx | 
 | 	cmpb	%cl, 1(%edx) | 
 | 	jne	L(neq) | 
 | 	cmpl	$0, %ecx | 
 | 	je	L(eq) | 
 |  | 
 | 	movzbl	2(%eax), %ecx | 
 | 	cmpb	%cl, 2(%edx) | 
 | 	jne	L(neq) | 
 | 	cmpl	$0, %ecx | 
 | 	je	L(eq) | 
 |  | 
 | 	movzbl	3(%eax), %ecx | 
 | 	cmpb	%cl, 3(%edx) | 
 | 	jne	L(neq) | 
 | 	cmpl	$0, %ecx | 
 | 	je	L(eq) | 
 |  | 
 | 	movzbl	4(%eax), %ecx | 
 | 	cmpb	%cl, 4(%edx) | 
 | 	jne	L(neq) | 
 | 	cmpl	$0, %ecx | 
 | 	je	L(eq) | 
 |  | 
 | 	movzbl	5(%eax), %ecx | 
 | 	cmpb	%cl, 5(%edx) | 
 | 	jne	L(neq) | 
 | 	cmpl	$0, %ecx | 
 | 	je	L(eq) | 
 |  | 
 | 	movzbl	6(%eax), %ecx | 
 | 	cmpb	%cl, 6(%edx) | 
 | 	jne	L(neq) | 
 | 	cmpl	$0, %ecx | 
 | 	je	L(eq) | 
 |  | 
 | 	movzbl	7(%eax), %ecx | 
 | 	cmpb	%cl, 7(%edx) | 
 | 	jne	L(neq) | 
 | 	cmpl	$0, %ecx | 
 | 	je	L(eq) | 
 |  | 
 | 	add	$8, %edx | 
 | 	add	$8, %eax | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$8, %ebp | 
 | 	lea	-8(%ebp), %ebp | 
 | 	je	L(eq) | 
 | L(more16bytes): | 
 | #endif | 
 | 	movl	%edx, %ecx | 
 | 	and	$0xfff, %ecx | 
 | 	cmpl	$0xff0, %ecx | 
 | 	ja	L(crosspage) | 
 | 	mov	%eax, %ecx | 
 | 	and	$0xfff, %ecx | 
 | 	cmpl	$0xff0, %ecx | 
 | 	ja	L(crosspage) | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	movlpd	(%eax), %xmm1 | 
 | 	movlpd	(%edx), %xmm2 | 
 | 	movhpd	8(%eax), %xmm1 | 
 | 	movhpd	8(%edx), %xmm2 | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pcmpeqb	%xmm2, %xmm1 | 
 | 	psubb	%xmm0, %xmm1 | 
 | 	pmovmskb %xmm1, %ecx | 
 | 	sub	$0xffff, %ecx | 
 | 	jnz	L(less16bytes) | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$16, %ebp | 
 | 	lea	-16(%ebp), %ebp | 
 | 	jbe	L(eq) | 
 | #endif | 
 | 	add	$16, %eax | 
 | 	add	$16, %edx | 
 |  | 
 | L(crosspage): | 
 |  | 
 | 	PUSH	(%ebx) | 
 | 	PUSH	(%edi) | 
 | 	PUSH	(%esi) | 
 |  | 
 | 	movl	%edx, %edi | 
 | 	movl	%eax, %ecx | 
 | 	and	$0xf, %ecx | 
 | 	and	$0xf, %edi | 
 | 	xor	%ecx, %eax | 
 | 	xor	%edi, %edx | 
 | 	xor	%ebx, %ebx | 
 | 	cmpl	%edi, %ecx | 
 | 	je	L(ashr_0) | 
 | 	ja	L(bigger) | 
 | 	or	$0x20, %ebx | 
 | 	xchg	%edx, %eax | 
 | 	xchg	%ecx, %edi | 
 | L(bigger): | 
 | 	lea	15(%edi), %edi | 
 | 	sub	%ecx, %edi | 
 | 	cmpl	$8, %edi | 
 | 	jle	L(ashr_less_8) | 
 | 	cmpl	$14, %edi | 
 | 	je	L(ashr_15) | 
 | 	cmpl	$13, %edi | 
 | 	je	L(ashr_14) | 
 | 	cmpl	$12, %edi | 
 | 	je	L(ashr_13) | 
 | 	cmpl	$11, %edi | 
 | 	je	L(ashr_12) | 
 | 	cmpl	$10, %edi | 
 | 	je	L(ashr_11) | 
 | 	cmpl	$9, %edi | 
 | 	je	L(ashr_10) | 
 | L(ashr_less_8): | 
 | 	je	L(ashr_9) | 
 | 	cmpl	$7, %edi | 
 | 	je	L(ashr_8) | 
 | 	cmpl	$6, %edi | 
 | 	je	L(ashr_7) | 
 | 	cmpl	$5, %edi | 
 | 	je	L(ashr_6) | 
 | 	cmpl	$4, %edi | 
 | 	je	L(ashr_5) | 
 | 	cmpl	$3, %edi | 
 | 	je	L(ashr_4) | 
 | 	cmpl	$2, %edi | 
 | 	je	L(ashr_3) | 
 | 	cmpl	$1, %edi | 
 | 	je	L(ashr_2) | 
 | 	cmpl	$0, %edi | 
 | 	je	L(ashr_1) | 
 |  | 
 | /* | 
 |  * The following cases will be handled by ashr_0 | 
 |  *  ecx(offset of esi)  eax(offset of edi)  relative offset  corresponding case | 
 |  *        n(0~15)            n(0~15)           15(15+ n-n)         ashr_0 | 
 |  */ | 
 | 	.p2align 4 | 
 | L(ashr_0): | 
 | 	mov	$0xffff, %esi | 
 | 	movdqa	(%eax), %xmm1 | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pcmpeqb	(%edx), %xmm1 | 
 | 	psubb	%xmm0, %xmm1 | 
 | 	pmovmskb %xmm1, %edi | 
 | 	shr	%cl, %esi | 
 | 	shr	%cl, %edi | 
 | 	sub	%edi, %esi | 
 | 	mov	%ecx, %edi | 
 | 	jne	L(less32bytes) | 
 | 	UPDATE_STRNCMP_COUNTER | 
 | 	mov	$0x10, %ebx | 
 | 	mov	$0x10, %ecx | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	.p2align 4 | 
 | L(loop_ashr_0): | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	movdqa	(%edx, %ecx), %xmm2 | 
 |  | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pcmpeqb	%xmm2, %xmm1 | 
 | 	psubb	%xmm0, %xmm1 | 
 | 	pmovmskb %xmm1, %esi | 
 | 	sub	$0xffff, %esi | 
 | 	jnz	L(exit) | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$16, %ebp | 
 | 	lea	-16(%ebp), %ebp | 
 | 	jbe	L(more8byteseq) | 
 | #endif | 
 | 	add	$16, %ecx | 
 | 	jmp	L(loop_ashr_0) | 
 |  | 
 | /* | 
 |  * The following cases will be handled by ashr_1 | 
 |  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case | 
 |  *        n(15)            n -15            0(15 +(n-15) - n)         ashr_1 | 
 |  */ | 
 | 	.p2align 4 | 
 | L(ashr_1): | 
 | 	mov	$0xffff, %esi | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	movdqa	(%edx), %xmm2 | 
 | 	movdqa	(%eax), %xmm1 | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pslldq	$15, %xmm2 | 
 | 	pcmpeqb	%xmm1, %xmm2 | 
 | 	psubb	%xmm0, %xmm2 | 
 | 	pmovmskb %xmm2, %edi | 
 | 	shr	%cl, %esi | 
 | 	shr	%cl, %edi | 
 | 	sub	%edi, %esi | 
 | 	lea	-15(%ecx), %edi | 
 | 	jnz	L(less32bytes) | 
 |  | 
 | 	UPDATE_STRNCMP_COUNTER | 
 |  | 
 | 	movdqa	(%edx), %xmm3 | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	mov	$16, %ecx | 
 | 	or	$1, %ebx | 
 | 	lea	1(%edx), %edi | 
 | 	and	$0xfff, %edi | 
 | 	sub	$0x1000, %edi | 
 |  | 
 | 	.p2align 4 | 
 | L(loop_ashr_1): | 
 | 	add	$16, %edi | 
 | 	jg	L(nibble_ashr_1) | 
 |  | 
 | L(gobble_ashr_1): | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	movdqa	(%edx, %ecx), %xmm2 | 
 | 	movdqa	%xmm2, %xmm4 | 
 |  | 
 | 	palignr	$1, %xmm3, %xmm2 | 
 |  | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pcmpeqb	%xmm2, %xmm1 | 
 | 	psubb	%xmm0, %xmm1 | 
 | 	pmovmskb %xmm1, %esi | 
 | 	sub	$0xffff, %esi | 
 | 	jnz	L(exit) | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$16, %ebp | 
 | 	lea	-16(%ebp), %ebp | 
 | 	jbe	L(more8byteseq) | 
 | #endif | 
 |  | 
 | 	add	$16, %ecx | 
 | 	movdqa	%xmm4, %xmm3 | 
 |  | 
 | 	add	$16, %edi | 
 | 	jg	L(nibble_ashr_1) | 
 |  | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	movdqa	(%edx, %ecx), %xmm2 | 
 | 	movdqa	%xmm2, %xmm4 | 
 |  | 
 | 	palignr	$1, %xmm3, %xmm2 | 
 |  | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pcmpeqb	%xmm2, %xmm1 | 
 | 	psubb	%xmm0, %xmm1 | 
 | 	pmovmskb %xmm1, %esi | 
 | 	sub	$0xffff, %esi | 
 | 	jnz	L(exit) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$16, %ebp | 
 | 	lea	-16(%ebp), %ebp | 
 | 	jbe	L(more8byteseq) | 
 | #endif | 
 | 	add	$16, %ecx | 
 | 	movdqa	%xmm4, %xmm3 | 
 | 	jmp	L(loop_ashr_1) | 
 |  | 
 | 	.p2align 4 | 
 | L(nibble_ashr_1): | 
 | 	pcmpeqb	%xmm3, %xmm0 | 
 | 	pmovmskb %xmm0, %esi | 
 | 	test	$0xfffe, %esi | 
 | 	jnz	L(ashr_1_exittail) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$15, %ebp | 
 | 	jbe	L(ashr_1_exittail) | 
 | #endif | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	sub	$0x1000, %edi | 
 | 	jmp	L(gobble_ashr_1) | 
 |  | 
 | 	.p2align 4 | 
 | L(ashr_1_exittail): | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	psrldq	$1, %xmm0 | 
 | 	psrldq	$1, %xmm3 | 
 | 	jmp	L(aftertail) | 
 |  | 
 | /* | 
 |  * The following cases will be handled by ashr_2 | 
 |  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case | 
 |  *        n(14~15)            n -14            1(15 +(n-14) - n)         ashr_2 | 
 |  */ | 
 | 	.p2align 4 | 
 | L(ashr_2): | 
 | 	mov	$0xffff, %esi | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	movdqa	(%edx), %xmm2 | 
 | 	movdqa	(%eax), %xmm1 | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pslldq	$14, %xmm2 | 
 | 	pcmpeqb	%xmm1, %xmm2 | 
 | 	psubb	%xmm0, %xmm2 | 
 | 	pmovmskb %xmm2, %edi | 
 | 	shr	%cl, %esi | 
 | 	shr	%cl, %edi | 
 | 	sub	%edi, %esi | 
 | 	lea	-14(%ecx), %edi | 
 | 	jnz	L(less32bytes) | 
 |  | 
 | 	UPDATE_STRNCMP_COUNTER | 
 |  | 
 | 	movdqa	(%edx), %xmm3 | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	mov	$16, %ecx | 
 | 	or	$2, %ebx | 
 | 	lea	2(%edx), %edi | 
 | 	and	$0xfff, %edi | 
 | 	sub	$0x1000, %edi | 
 |  | 
 | 	.p2align 4 | 
 | L(loop_ashr_2): | 
 | 	add	$16, %edi | 
 | 	jg	L(nibble_ashr_2) | 
 |  | 
 | L(gobble_ashr_2): | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	movdqa	(%edx, %ecx), %xmm2 | 
 | 	movdqa	%xmm2, %xmm4 | 
 |  | 
 | 	palignr	$2, %xmm3, %xmm2 | 
 |  | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pcmpeqb	%xmm2, %xmm1 | 
 | 	psubb	%xmm0, %xmm1 | 
 | 	pmovmskb %xmm1, %esi | 
 | 	sub	$0xffff, %esi | 
 | 	jnz	L(exit) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$16, %ebp | 
 | 	lea	-16(%ebp), %ebp | 
 | 	jbe	L(more8byteseq) | 
 | #endif | 
 | 	add	$16, %ecx | 
 | 	movdqa	%xmm4, %xmm3 | 
 |  | 
 | 	add	$16, %edi | 
 | 	jg	L(nibble_ashr_2) | 
 |  | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	movdqa	(%edx, %ecx), %xmm2 | 
 | 	movdqa	%xmm2, %xmm4 | 
 |  | 
 | 	palignr	$2, %xmm3, %xmm2 | 
 |  | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pcmpeqb	%xmm2, %xmm1 | 
 | 	psubb	%xmm0, %xmm1 | 
 | 	pmovmskb %xmm1, %esi | 
 | 	sub	$0xffff, %esi | 
 | 	jnz	L(exit) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$16, %ebp | 
 | 	lea	-16(%ebp), %ebp | 
 | 	jbe	L(more8byteseq) | 
 | #endif | 
 | 	add	$16, %ecx | 
 | 	movdqa	%xmm4, %xmm3 | 
 | 	jmp	L(loop_ashr_2) | 
 |  | 
 | 	.p2align 4 | 
 | L(nibble_ashr_2): | 
 | 	pcmpeqb	%xmm3, %xmm0 | 
 | 	pmovmskb %xmm0, %esi | 
 | 	test	$0xfffc, %esi | 
 | 	jnz	L(ashr_2_exittail) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$14, %ebp | 
 | 	jbe	L(ashr_2_exittail) | 
 | #endif | 
 |  | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	sub	$0x1000, %edi | 
 | 	jmp	L(gobble_ashr_2) | 
 |  | 
 | 	.p2align 4 | 
 | L(ashr_2_exittail): | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	psrldq	$2, %xmm0 | 
 | 	psrldq	$2, %xmm3 | 
 | 	jmp	L(aftertail) | 
 |  | 
 | /* | 
 |  * The following cases will be handled by ashr_3 | 
 |  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case | 
 |  *        n(13~15)            n -13            2(15 +(n-13) - n)         ashr_3 | 
 |  */ | 
 | 	.p2align 4 | 
 | L(ashr_3): | 
 | 	mov	$0xffff, %esi | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	movdqa	(%edx), %xmm2 | 
 | 	movdqa	(%eax), %xmm1 | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pslldq	$13, %xmm2 | 
 | 	pcmpeqb	%xmm1, %xmm2 | 
 | 	psubb	%xmm0, %xmm2 | 
 | 	pmovmskb %xmm2, %edi | 
 | 	shr	%cl, %esi | 
 | 	shr	%cl, %edi | 
 | 	sub	%edi, %esi | 
 | 	lea	-13(%ecx), %edi | 
 | 	jnz	L(less32bytes) | 
 |  | 
 | 	UPDATE_STRNCMP_COUNTER | 
 |  | 
 | 	movdqa	(%edx), %xmm3 | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	mov	$16, %ecx | 
 | 	or	$3, %ebx | 
 | 	lea	3(%edx), %edi | 
 | 	and	$0xfff, %edi | 
 | 	sub	$0x1000, %edi | 
 |  | 
 | 	.p2align 4 | 
 | L(loop_ashr_3): | 
 | 	add	$16, %edi | 
 | 	jg	L(nibble_ashr_3) | 
 |  | 
 | L(gobble_ashr_3): | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	movdqa	(%edx, %ecx), %xmm2 | 
 | 	movdqa	%xmm2, %xmm4 | 
 |  | 
 | 	palignr	$3, %xmm3, %xmm2 | 
 |  | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pcmpeqb	%xmm2, %xmm1 | 
 | 	psubb	%xmm0, %xmm1 | 
 | 	pmovmskb %xmm1, %esi | 
 | 	sub	$0xffff, %esi | 
 | 	jnz	L(exit) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$16, %ebp | 
 | 	lea	-16(%ebp), %ebp | 
 | 	jbe	L(more8byteseq) | 
 | #endif | 
 | 	add	$16, %ecx | 
 | 	movdqa	%xmm4, %xmm3 | 
 |  | 
 | 	add	$16, %edi | 
 | 	jg	L(nibble_ashr_3) | 
 |  | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	movdqa	(%edx, %ecx), %xmm2 | 
 | 	movdqa	%xmm2, %xmm4 | 
 |  | 
 | 	palignr	$3, %xmm3, %xmm2 | 
 |  | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pcmpeqb	%xmm2, %xmm1 | 
 | 	psubb	%xmm0, %xmm1 | 
 | 	pmovmskb %xmm1, %esi | 
 | 	sub	$0xffff, %esi | 
 | 	jnz	L(exit) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$16, %ebp | 
 | 	lea	-16(%ebp), %ebp | 
 | 	jbe	L(more8byteseq) | 
 | #endif | 
 | 	add	$16, %ecx | 
 | 	movdqa	%xmm4, %xmm3 | 
 | 	jmp	L(loop_ashr_3) | 
 |  | 
 | 	.p2align 4 | 
 | L(nibble_ashr_3): | 
 | 	pcmpeqb	%xmm3, %xmm0 | 
 | 	pmovmskb %xmm0, %esi | 
 | 	test	$0xfff8, %esi | 
 | 	jnz	L(ashr_3_exittail) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$13, %ebp | 
 | 	jbe	L(ashr_3_exittail) | 
 | #endif | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	sub	$0x1000, %edi | 
 | 	jmp	L(gobble_ashr_3) | 
 |  | 
 | 	.p2align 4 | 
 | L(ashr_3_exittail): | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	psrldq	$3, %xmm0 | 
 | 	psrldq	$3, %xmm3 | 
 | 	jmp	L(aftertail) | 
 |  | 
 | /* | 
 |  * The following cases will be handled by ashr_4 | 
 |  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case | 
 |  *        n(12~15)            n -12            3(15 +(n-12) - n)         ashr_4 | 
 |  */ | 
 | 	.p2align 4 | 
 | L(ashr_4): | 
 | 	mov	$0xffff, %esi | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	movdqa	(%edx), %xmm2 | 
 | 	movdqa	(%eax), %xmm1 | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pslldq	$12, %xmm2 | 
 | 	pcmpeqb	%xmm1, %xmm2 | 
 | 	psubb	%xmm0, %xmm2 | 
 | 	pmovmskb %xmm2, %edi | 
 | 	shr	%cl, %esi | 
 | 	shr	%cl, %edi | 
 | 	sub	%edi, %esi | 
 | 	lea	-12(%ecx), %edi | 
 | 	jnz	L(less32bytes) | 
 |  | 
 | 	UPDATE_STRNCMP_COUNTER | 
 |  | 
 | 	movdqa	(%edx), %xmm3 | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	mov	$16, %ecx | 
 | 	or	$4, %ebx | 
 | 	lea	4(%edx), %edi | 
 | 	and	$0xfff, %edi | 
 | 	sub	$0x1000, %edi | 
 |  | 
 | 	.p2align 4 | 
 | L(loop_ashr_4): | 
 | 	add	$16, %edi | 
 | 	jg	L(nibble_ashr_4) | 
 |  | 
 | L(gobble_ashr_4): | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	movdqa	(%edx, %ecx), %xmm2 | 
 | 	movdqa	%xmm2, %xmm4 | 
 |  | 
 | 	palignr	$4, %xmm3, %xmm2 | 
 |  | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pcmpeqb	%xmm2, %xmm1 | 
 | 	psubb	%xmm0, %xmm1 | 
 | 	pmovmskb %xmm1, %esi | 
 | 	sub	$0xffff, %esi | 
 | 	jnz	L(exit) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$16, %ebp | 
 | 	lea	-16(%ebp), %ebp | 
 | 	jbe	L(more8byteseq) | 
 | #endif | 
 |  | 
 | 	add	$16, %ecx | 
 | 	movdqa	%xmm4, %xmm3 | 
 |  | 
 | 	add	$16, %edi | 
 | 	jg	L(nibble_ashr_4) | 
 |  | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	movdqa	(%edx, %ecx), %xmm2 | 
 | 	movdqa	%xmm2, %xmm4 | 
 |  | 
 | 	palignr	$4, %xmm3, %xmm2 | 
 |  | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pcmpeqb	%xmm2, %xmm1 | 
 | 	psubb	%xmm0, %xmm1 | 
 | 	pmovmskb %xmm1, %esi | 
 | 	sub	$0xffff, %esi | 
 | 	jnz	L(exit) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$16, %ebp | 
 | 	lea	-16(%ebp), %ebp | 
 | 	jbe	L(more8byteseq) | 
 | #endif | 
 |  | 
 | 	add	$16, %ecx | 
 | 	movdqa	%xmm4, %xmm3 | 
 | 	jmp	L(loop_ashr_4) | 
 |  | 
 | 	.p2align 4 | 
 | L(nibble_ashr_4): | 
 | 	pcmpeqb	%xmm3, %xmm0 | 
 | 	pmovmskb %xmm0, %esi | 
 | 	test	$0xfff0, %esi | 
 | 	jnz	L(ashr_4_exittail) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$12, %ebp | 
 | 	jbe	L(ashr_4_exittail) | 
 | #endif | 
 |  | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	sub	$0x1000, %edi | 
 | 	jmp	L(gobble_ashr_4) | 
 |  | 
 | 	.p2align 4 | 
 | L(ashr_4_exittail): | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	psrldq	$4, %xmm0 | 
 | 	psrldq	$4, %xmm3 | 
 | 	jmp	L(aftertail) | 
 |  | 
 | /* | 
 |  * The following cases will be handled by ashr_5 | 
 |  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case | 
 |  *        n(11~15)            n -11            4(15 +(n-11) - n)         ashr_5 | 
 |  */ | 
 | 	.p2align 4 | 
 | L(ashr_5): | 
 | 	mov	$0xffff, %esi | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	movdqa	(%edx), %xmm2 | 
 | 	movdqa	(%eax), %xmm1 | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pslldq	$11, %xmm2 | 
 | 	pcmpeqb	%xmm1, %xmm2 | 
 | 	psubb	%xmm0, %xmm2 | 
 | 	pmovmskb %xmm2, %edi | 
 | 	shr	%cl, %esi | 
 | 	shr	%cl, %edi | 
 | 	sub	%edi, %esi | 
 | 	lea	-11(%ecx), %edi | 
 | 	jnz	L(less32bytes) | 
 |  | 
 | 	UPDATE_STRNCMP_COUNTER | 
 |  | 
 | 	movdqa	(%edx), %xmm3 | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	mov	$16, %ecx | 
 | 	or	$5, %ebx | 
 | 	lea	5(%edx), %edi | 
 | 	and	$0xfff, %edi | 
 | 	sub	$0x1000, %edi | 
 |  | 
 | 	.p2align 4 | 
 | L(loop_ashr_5): | 
 | 	add	$16, %edi | 
 | 	jg	L(nibble_ashr_5) | 
 |  | 
 | L(gobble_ashr_5): | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	movdqa	(%edx, %ecx), %xmm2 | 
 | 	movdqa	%xmm2, %xmm4 | 
 |  | 
 | 	palignr	$5, %xmm3, %xmm2 | 
 |  | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pcmpeqb	%xmm2, %xmm1 | 
 | 	psubb	%xmm0, %xmm1 | 
 | 	pmovmskb %xmm1, %esi | 
 | 	sub	$0xffff, %esi | 
 | 	jnz	L(exit) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$16, %ebp | 
 | 	lea	-16(%ebp), %ebp | 
 | 	jbe	L(more8byteseq) | 
 | #endif | 
 | 	add	$16, %ecx | 
 | 	movdqa	%xmm4, %xmm3 | 
 |  | 
 | 	add	$16, %edi | 
 | 	jg	L(nibble_ashr_5) | 
 |  | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	movdqa	(%edx, %ecx), %xmm2 | 
 | 	movdqa	%xmm2, %xmm4 | 
 |  | 
 | 	palignr	$5, %xmm3, %xmm2 | 
 |  | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pcmpeqb	%xmm2, %xmm1 | 
 | 	psubb	%xmm0, %xmm1 | 
 | 	pmovmskb %xmm1, %esi | 
 | 	sub	$0xffff, %esi | 
 | 	jnz	L(exit) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$16, %ebp | 
 | 	lea	-16(%ebp), %ebp | 
 | 	jbe	L(more8byteseq) | 
 | #endif | 
 | 	add	$16, %ecx | 
 | 	movdqa	%xmm4, %xmm3 | 
 | 	jmp	L(loop_ashr_5) | 
 |  | 
 | 	.p2align 4 | 
 | L(nibble_ashr_5): | 
 | 	pcmpeqb	%xmm3, %xmm0 | 
 | 	pmovmskb %xmm0, %esi | 
 | 	test	$0xffe0, %esi | 
 | 	jnz	L(ashr_5_exittail) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$11, %ebp | 
 | 	jbe	L(ashr_5_exittail) | 
 | #endif | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	sub	$0x1000, %edi | 
 | 	jmp	L(gobble_ashr_5) | 
 |  | 
 | 	.p2align 4 | 
 | L(ashr_5_exittail): | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	psrldq	$5, %xmm0 | 
 | 	psrldq	$5, %xmm3 | 
 | 	jmp	L(aftertail) | 
 |  | 
 | /* | 
 |  * The following cases will be handled by ashr_6 | 
 |  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case | 
 |  *        n(10~15)            n -10            5(15 +(n-10) - n)         ashr_6 | 
 |  */ | 
 |  | 
 | 	.p2align 4 | 
 | L(ashr_6): | 
 | 	mov	$0xffff, %esi | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	movdqa	(%edx), %xmm2 | 
 | 	movdqa	(%eax), %xmm1 | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pslldq	$10, %xmm2 | 
 | 	pcmpeqb	%xmm1, %xmm2 | 
 | 	psubb	%xmm0, %xmm2 | 
 | 	pmovmskb %xmm2, %edi | 
 | 	shr	%cl, %esi | 
 | 	shr	%cl, %edi | 
 | 	sub	%edi, %esi | 
 | 	lea	-10(%ecx), %edi | 
 | 	jnz	L(less32bytes) | 
 |  | 
 | 	UPDATE_STRNCMP_COUNTER | 
 |  | 
 | 	movdqa	(%edx), %xmm3 | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	mov	$16, %ecx | 
 | 	or	$6, %ebx | 
 | 	lea	6(%edx), %edi | 
 | 	and	$0xfff, %edi | 
 | 	sub	$0x1000, %edi | 
 |  | 
 | 	.p2align 4 | 
 | L(loop_ashr_6): | 
 | 	add	$16, %edi | 
 | 	jg	L(nibble_ashr_6) | 
 |  | 
 | L(gobble_ashr_6): | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	movdqa	(%edx, %ecx), %xmm2 | 
 | 	movdqa	%xmm2, %xmm4 | 
 |  | 
 | 	palignr	$6, %xmm3, %xmm2 | 
 |  | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pcmpeqb	%xmm2, %xmm1 | 
 | 	psubb	%xmm0, %xmm1 | 
 | 	pmovmskb %xmm1, %esi | 
 | 	sub	$0xffff, %esi | 
 | 	jnz	L(exit) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$16, %ebp | 
 | 	lea	-16(%ebp), %ebp | 
 | 	jbe	L(more8byteseq) | 
 | #endif | 
 |  | 
 | 	add	$16, %ecx | 
 | 	movdqa	%xmm4, %xmm3 | 
 |  | 
 | 	add	$16, %edi | 
 | 	jg	L(nibble_ashr_6) | 
 |  | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	movdqa	(%edx, %ecx), %xmm2 | 
 | 	movdqa	%xmm2, %xmm4 | 
 |  | 
 | 	palignr	$6, %xmm3, %xmm2 | 
 |  | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pcmpeqb	%xmm2, %xmm1 | 
 | 	psubb	%xmm0, %xmm1 | 
 | 	pmovmskb %xmm1, %esi | 
 | 	sub	$0xffff, %esi | 
 | 	jnz	L(exit) | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$16, %ebp | 
 | 	lea	-16(%ebp), %ebp | 
 | 	jbe	L(more8byteseq) | 
 | #endif | 
 |  | 
 | 	add	$16, %ecx | 
 | 	movdqa	%xmm4, %xmm3 | 
 | 	jmp	L(loop_ashr_6) | 
 |  | 
 | 	.p2align 4 | 
 | L(nibble_ashr_6): | 
 | 	pcmpeqb	%xmm3, %xmm0 | 
 | 	pmovmskb %xmm0, %esi | 
 | 	test	$0xffc0, %esi | 
 | 	jnz	L(ashr_6_exittail) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$10, %ebp | 
 | 	jbe	L(ashr_6_exittail) | 
 | #endif | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	sub	$0x1000, %edi | 
 | 	jmp	L(gobble_ashr_6) | 
 |  | 
 | 	.p2align 4 | 
 | L(ashr_6_exittail): | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	psrldq	$6, %xmm0 | 
 | 	psrldq	$6, %xmm3 | 
 | 	jmp	L(aftertail) | 
 |  | 
 | /* | 
 |  * The following cases will be handled by ashr_7 | 
 |  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case | 
 |  *        n(9~15)            n - 9            6(15 +(n-9) - n)         ashr_7 | 
 |  */ | 
 |  | 
 | 	.p2align 4 | 
 | L(ashr_7): | 
 | 	mov	$0xffff, %esi | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	movdqa	(%edx), %xmm2 | 
 | 	movdqa	(%eax), %xmm1 | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pslldq	$9, %xmm2 | 
 | 	pcmpeqb	%xmm1, %xmm2 | 
 | 	psubb	%xmm0, %xmm2 | 
 | 	pmovmskb %xmm2, %edi | 
 | 	shr	%cl, %esi | 
 | 	shr	%cl, %edi | 
 | 	sub	%edi, %esi | 
 | 	lea	-9(%ecx), %edi | 
 | 	jnz	L(less32bytes) | 
 |  | 
 | 	UPDATE_STRNCMP_COUNTER | 
 |  | 
 | 	movdqa	(%edx), %xmm3 | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	mov	$16, %ecx | 
 | 	or	$7, %ebx | 
 | 	lea	8(%edx), %edi | 
 | 	and	$0xfff, %edi | 
 | 	sub	$0x1000, %edi | 
 |  | 
 | 	.p2align 4 | 
 | L(loop_ashr_7): | 
 | 	add	$16, %edi | 
 | 	jg	L(nibble_ashr_7) | 
 |  | 
 | L(gobble_ashr_7): | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	movdqa	(%edx, %ecx), %xmm2 | 
 | 	movdqa	%xmm2, %xmm4 | 
 |  | 
 | 	palignr	$7, %xmm3, %xmm2 | 
 |  | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pcmpeqb	%xmm2, %xmm1 | 
 | 	psubb	%xmm0, %xmm1 | 
 | 	pmovmskb %xmm1, %esi | 
 | 	sub	$0xffff, %esi | 
 | 	jnz	L(exit) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$16, %ebp | 
 | 	lea	-16(%ebp), %ebp | 
 | 	jbe	L(more8byteseq) | 
 | #endif | 
 |  | 
 | 	add	$16, %ecx | 
 | 	movdqa	%xmm4, %xmm3 | 
 |  | 
 | 	add	$16, %edi | 
 | 	jg	L(nibble_ashr_7) | 
 |  | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	movdqa	(%edx, %ecx), %xmm2 | 
 | 	movdqa	%xmm2, %xmm4 | 
 |  | 
 | 	palignr	$7, %xmm3, %xmm2 | 
 |  | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pcmpeqb	%xmm2, %xmm1 | 
 | 	psubb	%xmm0, %xmm1 | 
 | 	pmovmskb %xmm1, %esi | 
 | 	sub	$0xffff, %esi | 
 | 	jnz	L(exit) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$16, %ebp | 
 | 	lea	-16(%ebp), %ebp | 
 | 	jbe	L(more8byteseq) | 
 | #endif | 
 |  | 
 | 	add	$16, %ecx | 
 | 	movdqa	%xmm4, %xmm3 | 
 | 	jmp	L(loop_ashr_7) | 
 |  | 
 | 	.p2align 4 | 
 | L(nibble_ashr_7): | 
 | 	pcmpeqb	%xmm3, %xmm0 | 
 | 	pmovmskb %xmm0, %esi | 
 | 	test	$0xff80, %esi | 
 | 	jnz	L(ashr_7_exittail) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$9, %ebp | 
 | 	jbe	L(ashr_7_exittail) | 
 | #endif | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	sub	$0x1000, %edi | 
 | 	jmp	L(gobble_ashr_7) | 
 |  | 
 | 	.p2align 4 | 
 | L(ashr_7_exittail): | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	psrldq	$7, %xmm0 | 
 | 	psrldq	$7, %xmm3 | 
 | 	jmp	L(aftertail) | 
 |  | 
 | /* | 
 |  * The following cases will be handled by ashr_8 | 
 |  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case | 
 |  *        n(8~15)            n - 8            7(15 +(n-8) - n)         ashr_8 | 
 |  */ | 
 | 	.p2align 4 | 
 | L(ashr_8): | 
 | 	mov	$0xffff, %esi | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	movdqa	(%edx), %xmm2 | 
 | 	movdqa	(%eax), %xmm1 | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pslldq	$8, %xmm2 | 
 | 	pcmpeqb	%xmm1, %xmm2 | 
 | 	psubb	%xmm0, %xmm2 | 
 | 	pmovmskb %xmm2, %edi | 
 | 	shr	%cl, %esi | 
 | 	shr	%cl, %edi | 
 | 	sub	%edi, %esi | 
 | 	lea	-8(%ecx), %edi | 
 | 	jnz	L(less32bytes) | 
 |  | 
 | 	UPDATE_STRNCMP_COUNTER | 
 |  | 
 | 	movdqa	(%edx), %xmm3 | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	mov	$16, %ecx | 
 | 	or	$8, %ebx | 
 | 	lea	8(%edx), %edi | 
 | 	and	$0xfff, %edi | 
 | 	sub	$0x1000, %edi | 
 |  | 
 | 	.p2align 4 | 
 | L(loop_ashr_8): | 
 | 	add	$16, %edi | 
 | 	jg	L(nibble_ashr_8) | 
 |  | 
 | L(gobble_ashr_8): | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	movdqa	(%edx, %ecx), %xmm2 | 
 | 	movdqa	%xmm2, %xmm4 | 
 |  | 
 | 	palignr	$8, %xmm3, %xmm2 | 
 |  | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pcmpeqb	%xmm2, %xmm1 | 
 | 	psubb	%xmm0, %xmm1 | 
 | 	pmovmskb %xmm1, %esi | 
 | 	sub	$0xffff, %esi | 
 | 	jnz	L(exit) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$16, %ebp | 
 | 	lea	-16(%ebp), %ebp | 
 | 	jbe	L(more8byteseq) | 
 | #endif | 
 | 	add	$16, %ecx | 
 | 	movdqa	%xmm4, %xmm3 | 
 |  | 
 | 	add	$16, %edi | 
 | 	jg	L(nibble_ashr_8) | 
 |  | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	movdqa	(%edx, %ecx), %xmm2 | 
 | 	movdqa	%xmm2, %xmm4 | 
 |  | 
 | 	palignr	$8, %xmm3, %xmm2 | 
 |  | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pcmpeqb	%xmm2, %xmm1 | 
 | 	psubb	%xmm0, %xmm1 | 
 | 	pmovmskb %xmm1, %esi | 
 | 	sub	$0xffff, %esi | 
 | 	jnz	L(exit) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$16, %ebp | 
 | 	lea	-16(%ebp), %ebp | 
 | 	jbe	L(more8byteseq) | 
 | #endif | 
 | 	add	$16, %ecx | 
 | 	movdqa	%xmm4, %xmm3 | 
 | 	jmp	L(loop_ashr_8) | 
 |  | 
 | 	.p2align 4 | 
 | L(nibble_ashr_8): | 
 | 	pcmpeqb	%xmm3, %xmm0 | 
 | 	pmovmskb %xmm0, %esi | 
 | 	test	$0xff00, %esi | 
 | 	jnz	L(ashr_8_exittail) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$8, %ebp | 
 | 	jbe	L(ashr_8_exittail) | 
 | #endif | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	sub	$0x1000, %edi | 
 | 	jmp	L(gobble_ashr_8) | 
 |  | 
 | 	.p2align 4 | 
 | L(ashr_8_exittail): | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	psrldq	$8, %xmm0 | 
 | 	psrldq	$8, %xmm3 | 
 | 	jmp	L(aftertail) | 
 |  | 
 | /* | 
 |  * The following cases will be handled by ashr_9 | 
 |  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case | 
 |  *        n(7~15)            n - 7            8(15 +(n-7) - n)         ashr_9 | 
 |  */ | 
 | 	.p2align 4 | 
 | L(ashr_9): | 
 | 	mov	$0xffff, %esi | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	movdqa	(%edx), %xmm2 | 
 | 	movdqa	(%eax), %xmm1 | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pslldq	$7, %xmm2 | 
 | 	pcmpeqb	%xmm1, %xmm2 | 
 | 	psubb	%xmm0, %xmm2 | 
 | 	pmovmskb %xmm2, %edi | 
 | 	shr	%cl, %esi | 
 | 	shr	%cl, %edi | 
 | 	sub	%edi, %esi | 
 | 	lea	-7(%ecx), %edi | 
 | 	jnz	L(less32bytes) | 
 |  | 
 | 	UPDATE_STRNCMP_COUNTER | 
 |  | 
 | 	movdqa	(%edx), %xmm3 | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	mov	$16, %ecx | 
 | 	or	$9, %ebx | 
 | 	lea	9(%edx), %edi | 
 | 	and	$0xfff, %edi | 
 | 	sub	$0x1000, %edi | 
 |  | 
 | 	.p2align 4 | 
 | L(loop_ashr_9): | 
 | 	add	$16, %edi | 
 | 	jg	L(nibble_ashr_9) | 
 |  | 
 | L(gobble_ashr_9): | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	movdqa	(%edx, %ecx), %xmm2 | 
 | 	movdqa	%xmm2, %xmm4 | 
 |  | 
 | 	palignr	$9, %xmm3, %xmm2 | 
 |  | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pcmpeqb	%xmm2, %xmm1 | 
 | 	psubb	%xmm0, %xmm1 | 
 | 	pmovmskb %xmm1, %esi | 
 | 	sub	$0xffff, %esi | 
 | 	jnz	L(exit) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$16, %ebp | 
 | 	lea	-16(%ebp), %ebp | 
 | 	jbe	L(more8byteseq) | 
 | #endif | 
 | 	add	$16, %ecx | 
 | 	movdqa	%xmm4, %xmm3 | 
 |  | 
 | 	add	$16, %edi | 
 | 	jg	L(nibble_ashr_9) | 
 |  | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	movdqa	(%edx, %ecx), %xmm2 | 
 | 	movdqa	%xmm2, %xmm4 | 
 |  | 
 | 	palignr	$9, %xmm3, %xmm2 | 
 |  | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pcmpeqb	%xmm2, %xmm1 | 
 | 	psubb	%xmm0, %xmm1 | 
 | 	pmovmskb %xmm1, %esi | 
 | 	sub	$0xffff, %esi | 
 | 	jnz	L(exit) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$16, %ebp | 
 | 	lea	-16(%ebp), %ebp | 
 | 	jbe	L(more8byteseq) | 
 | #endif | 
 | 	add	$16, %ecx | 
 | 	movdqa	%xmm4, %xmm3 | 
 | 	jmp	L(loop_ashr_9) | 
 |  | 
 | 	.p2align 4 | 
 | L(nibble_ashr_9): | 
 | 	pcmpeqb	%xmm3, %xmm0 | 
 | 	pmovmskb %xmm0, %esi | 
 | 	test	$0xfe00, %esi | 
 | 	jnz	L(ashr_9_exittail) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$7, %ebp | 
 | 	jbe	L(ashr_9_exittail) | 
 | #endif | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	sub	$0x1000, %edi | 
 | 	jmp	L(gobble_ashr_9) | 
 |  | 
 | 	.p2align 4 | 
 | L(ashr_9_exittail): | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	psrldq	$9, %xmm0 | 
 | 	psrldq	$9, %xmm3 | 
 | 	jmp	L(aftertail) | 
 |  | 
 | /* | 
 |  * The following cases will be handled by ashr_10 | 
 |  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case | 
 |  *        n(6~15)            n - 6            9(15 +(n-6) - n)         ashr_10 | 
 |  */ | 
 | 	.p2align 4 | 
 | L(ashr_10): | 
 | 	mov	$0xffff, %esi | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	movdqa	(%edx), %xmm2 | 
 | 	movdqa	(%eax), %xmm1 | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pslldq	$6, %xmm2 | 
 | 	pcmpeqb	%xmm1, %xmm2 | 
 | 	psubb	%xmm0, %xmm2 | 
 | 	pmovmskb %xmm2, %edi | 
 | 	shr	%cl, %esi | 
 | 	shr	%cl, %edi | 
 | 	sub	%edi, %esi | 
 | 	lea	-6(%ecx), %edi | 
 | 	jnz	L(less32bytes) | 
 |  | 
 | 	UPDATE_STRNCMP_COUNTER | 
 |  | 
 | 	movdqa	(%edx), %xmm3 | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	mov	$16, %ecx | 
 | 	or	$10, %ebx | 
 | 	lea	10(%edx), %edi | 
 | 	and	$0xfff, %edi | 
 | 	sub	$0x1000, %edi | 
 |  | 
 | 	.p2align 4 | 
 | L(loop_ashr_10): | 
 | 	add	$16, %edi | 
 | 	jg	L(nibble_ashr_10) | 
 |  | 
 | L(gobble_ashr_10): | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	movdqa	(%edx, %ecx), %xmm2 | 
 | 	movdqa	%xmm2, %xmm4 | 
 |  | 
 | 	palignr	$10, %xmm3, %xmm2 | 
 |  | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pcmpeqb	%xmm2, %xmm1 | 
 | 	psubb	%xmm0, %xmm1 | 
 | 	pmovmskb %xmm1, %esi | 
 | 	sub	$0xffff, %esi | 
 | 	jnz	L(exit) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$16, %ebp | 
 | 	lea	-16(%ebp), %ebp | 
 | 	jbe	L(more8byteseq) | 
 | #endif | 
 | 	add	$16, %ecx | 
 | 	movdqa	%xmm4, %xmm3 | 
 |  | 
 | 	add	$16, %edi | 
 | 	jg	L(nibble_ashr_10) | 
 |  | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	movdqa	(%edx, %ecx), %xmm2 | 
 | 	movdqa	%xmm2, %xmm4 | 
 |  | 
 | 	palignr	$10, %xmm3, %xmm2 | 
 |  | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pcmpeqb	%xmm2, %xmm1 | 
 | 	psubb	%xmm0, %xmm1 | 
 | 	pmovmskb %xmm1, %esi | 
 | 	sub	$0xffff, %esi | 
 | 	jnz	L(exit) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$16, %ebp | 
 | 	lea	-16(%ebp), %ebp | 
 | 	jbe	L(more8byteseq) | 
 | #endif | 
 | 	add	$16, %ecx | 
 | 	movdqa	%xmm4, %xmm3 | 
 | 	jmp	L(loop_ashr_10) | 
 |  | 
 | 	.p2align 4 | 
 | L(nibble_ashr_10): | 
 | 	pcmpeqb	%xmm3, %xmm0 | 
 | 	pmovmskb %xmm0, %esi | 
 | 	test	$0xfc00, %esi | 
 | 	jnz	L(ashr_10_exittail) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$6, %ebp | 
 | 	jbe	L(ashr_10_exittail) | 
 | #endif | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	sub	$0x1000, %edi | 
 | 	jmp	L(gobble_ashr_10) | 
 |  | 
 | 	.p2align 4 | 
 | L(ashr_10_exittail): | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	psrldq	$10, %xmm0 | 
 | 	psrldq	$10, %xmm3 | 
 | 	jmp	L(aftertail) | 
 |  | 
 | /* | 
 |  * The following cases will be handled by ashr_11 | 
 |  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case | 
 |  *        n(5~15)            n - 5            10(15 +(n-5) - n)         ashr_11 | 
 |  */ | 
 | 	.p2align 4 | 
 | L(ashr_11): | 
 | 	mov	$0xffff, %esi | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	movdqa	(%edx), %xmm2 | 
 | 	movdqa	(%eax), %xmm1 | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pslldq	$5, %xmm2 | 
 | 	pcmpeqb	%xmm1, %xmm2 | 
 | 	psubb	%xmm0, %xmm2 | 
 | 	pmovmskb %xmm2, %edi | 
 | 	shr	%cl, %esi | 
 | 	shr	%cl, %edi | 
 | 	sub	%edi, %esi | 
 | 	lea	-5(%ecx), %edi | 
 | 	jnz	L(less32bytes) | 
 |  | 
 | 	UPDATE_STRNCMP_COUNTER | 
 |  | 
 | 	movdqa	(%edx), %xmm3 | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	mov	$16, %ecx | 
 | 	or	$11, %ebx | 
 | 	lea	11(%edx), %edi | 
 | 	and	$0xfff, %edi | 
 | 	sub	$0x1000, %edi | 
 |  | 
 | 	.p2align 4 | 
 | L(loop_ashr_11): | 
 | 	add	$16, %edi | 
 | 	jg	L(nibble_ashr_11) | 
 |  | 
 | L(gobble_ashr_11): | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	movdqa	(%edx, %ecx), %xmm2 | 
 | 	movdqa	%xmm2, %xmm4 | 
 |  | 
 | 	palignr	$11, %xmm3, %xmm2 | 
 |  | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pcmpeqb	%xmm2, %xmm1 | 
 | 	psubb	%xmm0, %xmm1 | 
 | 	pmovmskb %xmm1, %esi | 
 | 	sub	$0xffff, %esi | 
 | 	jnz	L(exit) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$16, %ebp | 
 | 	lea	-16(%ebp), %ebp | 
 | 	jbe	L(more8byteseq) | 
 | #endif | 
 | 	add	$16, %ecx | 
 | 	movdqa	%xmm4, %xmm3 | 
 |  | 
 | 	add	$16, %edi | 
 | 	jg	L(nibble_ashr_11) | 
 |  | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	movdqa	(%edx, %ecx), %xmm2 | 
 | 	movdqa	%xmm2, %xmm4 | 
 |  | 
 | 	palignr	$11, %xmm3, %xmm2 | 
 |  | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pcmpeqb	%xmm2, %xmm1 | 
 | 	psubb	%xmm0, %xmm1 | 
 | 	pmovmskb %xmm1, %esi | 
 | 	sub	$0xffff, %esi | 
 | 	jnz	L(exit) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$16, %ebp | 
 | 	lea	-16(%ebp), %ebp | 
 | 	jbe	L(more8byteseq) | 
 | #endif | 
 | 	add	$16, %ecx | 
 | 	movdqa	%xmm4, %xmm3 | 
 | 	jmp	L(loop_ashr_11) | 
 |  | 
 | 	.p2align 4 | 
 | L(nibble_ashr_11): | 
 | 	pcmpeqb	%xmm3, %xmm0 | 
 | 	pmovmskb %xmm0, %esi | 
 | 	test	$0xf800, %esi | 
 | 	jnz	L(ashr_11_exittail) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$5, %ebp | 
 | 	jbe	L(ashr_11_exittail) | 
 | #endif | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	sub	$0x1000, %edi | 
 | 	jmp	L(gobble_ashr_11) | 
 |  | 
 | 	.p2align 4 | 
 | L(ashr_11_exittail): | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	psrldq	$11, %xmm0 | 
 | 	psrldq	$11, %xmm3 | 
 | 	jmp	L(aftertail) | 
 |  | 
 | /* | 
 |  * The following cases will be handled by ashr_12 | 
 |  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case | 
 |  *        n(4~15)            n - 4            11(15 +(n-4) - n)         ashr_12 | 
 |  */ | 
 | 	.p2align 4 | 
 | L(ashr_12): | 
 | 	mov	$0xffff, %esi | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	movdqa	(%edx), %xmm2 | 
 | 	movdqa	(%eax), %xmm1 | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pslldq	$4, %xmm2 | 
 | 	pcmpeqb	%xmm1, %xmm2 | 
 | 	psubb	%xmm0, %xmm2 | 
 | 	pmovmskb %xmm2, %edi | 
 | 	shr	%cl, %esi | 
 | 	shr	%cl, %edi | 
 | 	sub	%edi, %esi | 
 | 	lea	-4(%ecx), %edi | 
 | 	jnz	L(less32bytes) | 
 |  | 
 | 	UPDATE_STRNCMP_COUNTER | 
 |  | 
 | 	movdqa	(%edx), %xmm3 | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	mov	$16, %ecx | 
 | 	or	$12, %ebx | 
 | 	lea	12(%edx), %edi | 
 | 	and	$0xfff, %edi | 
 | 	sub	$0x1000, %edi | 
 |  | 
 | 	.p2align 4 | 
 | L(loop_ashr_12): | 
 | 	add	$16, %edi | 
 | 	jg	L(nibble_ashr_12) | 
 |  | 
 | L(gobble_ashr_12): | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	movdqa	(%edx, %ecx), %xmm2 | 
 | 	movdqa	%xmm2, %xmm4 | 
 |  | 
 | 	palignr	$12, %xmm3, %xmm2 | 
 |  | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pcmpeqb	%xmm2, %xmm1 | 
 | 	psubb	%xmm0, %xmm1 | 
 | 	pmovmskb %xmm1, %esi | 
 | 	sub	$0xffff, %esi | 
 | 	jnz	L(exit) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$16, %ebp | 
 | 	lea	-16(%ebp), %ebp | 
 | 	jbe	L(more8byteseq) | 
 | #endif | 
 |  | 
 | 	add	$16, %ecx | 
 | 	movdqa	%xmm4, %xmm3 | 
 |  | 
 | 	add	$16, %edi | 
 | 	jg	L(nibble_ashr_12) | 
 |  | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	movdqa	(%edx, %ecx), %xmm2 | 
 | 	movdqa	%xmm2, %xmm4 | 
 |  | 
 | 	palignr	$12, %xmm3, %xmm2 | 
 |  | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pcmpeqb	%xmm2, %xmm1 | 
 | 	psubb	%xmm0, %xmm1 | 
 | 	pmovmskb %xmm1, %esi | 
 | 	sub	$0xffff, %esi | 
 | 	jnz	L(exit) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$16, %ebp | 
 | 	lea	-16(%ebp), %ebp | 
 | 	jbe	L(more8byteseq) | 
 | #endif | 
 | 	add	$16, %ecx | 
 | 	movdqa	%xmm4, %xmm3 | 
 | 	jmp	L(loop_ashr_12) | 
 |  | 
 | 	.p2align 4 | 
 | L(nibble_ashr_12): | 
 | 	pcmpeqb	%xmm3, %xmm0 | 
 | 	pmovmskb %xmm0, %esi | 
 | 	test	$0xf000, %esi | 
 | 	jnz	L(ashr_12_exittail) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$4, %ebp | 
 | 	jbe	L(ashr_12_exittail) | 
 | #endif | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	sub	$0x1000, %edi | 
 | 	jmp	L(gobble_ashr_12) | 
 |  | 
 | 	.p2align 4 | 
 | L(ashr_12_exittail): | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	psrldq	$12, %xmm0 | 
 | 	psrldq	$12, %xmm3 | 
 | 	jmp	L(aftertail) | 
 |  | 
 | /* | 
 |  * The following cases will be handled by ashr_13 | 
 |  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case | 
 |  *        n(3~15)            n - 3            12(15 +(n-3) - n)         ashr_13 | 
 |  */ | 
 | 	.p2align 4 | 
 | L(ashr_13): | 
 | 	mov	$0xffff, %esi | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	movdqa	(%edx), %xmm2 | 
 | 	movdqa	(%eax), %xmm1 | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pslldq	$3, %xmm2 | 
 | 	pcmpeqb	%xmm1, %xmm2 | 
 | 	psubb	%xmm0, %xmm2 | 
 | 	pmovmskb %xmm2, %edi | 
 | 	shr	%cl, %esi | 
 | 	shr	%cl, %edi | 
 | 	sub	%edi, %esi | 
 | 	lea	-3(%ecx), %edi | 
 | 	jnz	L(less32bytes) | 
 |  | 
 | 	UPDATE_STRNCMP_COUNTER | 
 |  | 
 | 	movdqa	(%edx), %xmm3 | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	mov	$16, %ecx | 
 | 	or	$13, %ebx | 
 | 	lea	13(%edx), %edi | 
 | 	and	$0xfff, %edi | 
 | 	sub	$0x1000, %edi | 
 |  | 
 | 	.p2align 4 | 
 | L(loop_ashr_13): | 
 | 	add	$16, %edi | 
 | 	jg	L(nibble_ashr_13) | 
 |  | 
 | L(gobble_ashr_13): | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	movdqa	(%edx, %ecx), %xmm2 | 
 | 	movdqa	%xmm2, %xmm4 | 
 |  | 
 | 	palignr	$13, %xmm3, %xmm2 | 
 |  | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pcmpeqb	%xmm2, %xmm1 | 
 | 	psubb	%xmm0, %xmm1 | 
 | 	pmovmskb %xmm1, %esi | 
 | 	sub	$0xffff, %esi | 
 | 	jnz	L(exit) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$16, %ebp | 
 | 	lea	-16(%ebp), %ebp | 
 | 	jbe	L(more8byteseq) | 
 | #endif | 
 | 	add	$16, %ecx | 
 | 	movdqa	%xmm4, %xmm3 | 
 |  | 
 | 	add	$16, %edi | 
 | 	jg	L(nibble_ashr_13) | 
 |  | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	movdqa	(%edx, %ecx), %xmm2 | 
 | 	movdqa	%xmm2, %xmm4 | 
 |  | 
 | 	palignr	$13, %xmm3, %xmm2 | 
 |  | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pcmpeqb	%xmm2, %xmm1 | 
 | 	psubb	%xmm0, %xmm1 | 
 | 	pmovmskb %xmm1, %esi | 
 | 	sub	$0xffff, %esi | 
 | 	jnz	L(exit) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$16, %ebp | 
 | 	lea	-16(%ebp), %ebp | 
 | 	jbe	L(more8byteseq) | 
 | #endif | 
 | 	add	$16, %ecx | 
 | 	movdqa	%xmm4, %xmm3 | 
 | 	jmp	L(loop_ashr_13) | 
 |  | 
 | 	.p2align 4 | 
 | L(nibble_ashr_13): | 
 | 	pcmpeqb	%xmm3, %xmm0 | 
 | 	pmovmskb %xmm0, %esi | 
 | 	test	$0xe000, %esi | 
 | 	jnz	L(ashr_13_exittail) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$3, %ebp | 
 | 	jbe	L(ashr_13_exittail) | 
 | #endif | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	sub	$0x1000, %edi | 
 | 	jmp	L(gobble_ashr_13) | 
 |  | 
 | 	.p2align 4 | 
 | L(ashr_13_exittail): | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	psrldq	$13, %xmm0 | 
 | 	psrldq	$13, %xmm3 | 
 | 	jmp	L(aftertail) | 
 |  | 
 | /* | 
 |  * The following cases will be handled by ashr_14 | 
 |  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case | 
 |  *        n(2~15)            n - 2            13(15 +(n-2) - n)         ashr_14 | 
 |  */ | 
 | 	.p2align 4 | 
 | L(ashr_14): | 
 | 	mov	$0xffff, %esi | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	movdqa	(%edx), %xmm2 | 
 | 	movdqa	(%eax), %xmm1 | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pslldq	$2, %xmm2 | 
 | 	pcmpeqb	%xmm1, %xmm2 | 
 | 	psubb	%xmm0, %xmm2 | 
 | 	pmovmskb %xmm2, %edi | 
 | 	shr	%cl, %esi | 
 | 	shr	%cl, %edi | 
 | 	sub	%edi, %esi | 
 | 	lea	-2(%ecx), %edi | 
 | 	jnz	L(less32bytes) | 
 |  | 
 | 	UPDATE_STRNCMP_COUNTER | 
 |  | 
 | 	movdqa	(%edx), %xmm3 | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	mov	$16, %ecx | 
 | 	or	$14, %ebx | 
 | 	lea	14(%edx), %edi | 
 | 	and	$0xfff, %edi | 
 | 	sub	$0x1000, %edi | 
 |  | 
 | 	.p2align 4 | 
 | L(loop_ashr_14): | 
 | 	add	$16, %edi | 
 | 	jg	L(nibble_ashr_14) | 
 |  | 
 | L(gobble_ashr_14): | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	movdqa	(%edx, %ecx), %xmm2 | 
 | 	movdqa	%xmm2, %xmm4 | 
 |  | 
 | 	palignr	$14, %xmm3, %xmm2 | 
 |  | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pcmpeqb	%xmm2, %xmm1 | 
 | 	psubb	%xmm0, %xmm1 | 
 | 	pmovmskb %xmm1, %esi | 
 | 	sub	$0xffff, %esi | 
 | 	jnz	L(exit) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$16, %ebp | 
 | 	lea	-16(%ebp), %ebp | 
 | 	jbe	L(more8byteseq) | 
 | #endif | 
 | 	add	$16, %ecx | 
 | 	movdqa	%xmm4, %xmm3 | 
 |  | 
 | 	add	$16, %edi | 
 | 	jg	L(nibble_ashr_14) | 
 |  | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	movdqa	(%edx, %ecx), %xmm2 | 
 | 	movdqa	%xmm2, %xmm4 | 
 |  | 
 | 	palignr	$14, %xmm3, %xmm2 | 
 |  | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pcmpeqb	%xmm2, %xmm1 | 
 | 	psubb	%xmm0, %xmm1 | 
 | 	pmovmskb %xmm1, %esi | 
 | 	sub	$0xffff, %esi | 
 | 	jnz	L(exit) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$16, %ebp | 
 | 	lea	-16(%ebp), %ebp | 
 | 	jbe	L(more8byteseq) | 
 | #endif | 
 | 	add	$16, %ecx | 
 | 	movdqa	%xmm4, %xmm3 | 
 | 	jmp	L(loop_ashr_14) | 
 |  | 
 | 	.p2align 4 | 
 | L(nibble_ashr_14): | 
 | 	pcmpeqb	%xmm3, %xmm0 | 
 | 	pmovmskb %xmm0, %esi | 
 | 	test	$0xc000, %esi | 
 | 	jnz	L(ashr_14_exittail) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$2, %ebp | 
 | 	jbe	L(ashr_14_exittail) | 
 | #endif | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	sub	$0x1000, %edi | 
 | 	jmp	L(gobble_ashr_14) | 
 |  | 
 | 	.p2align 4 | 
 | L(ashr_14_exittail): | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	psrldq	$14, %xmm0 | 
 | 	psrldq	$14, %xmm3 | 
 | 	jmp	L(aftertail) | 
 |  | 
 | /* | 
 |  * The following cases will be handled by ashr_14 | 
 |  * ecx(offset of esi)  eax(offset of edi)   relative offset   	corresponding case | 
 |  *        n(1~15)            n - 1            14(15 +(n-1) - n)         ashr_15 | 
 |  */ | 
 |  | 
 | 	.p2align 4 | 
 | L(ashr_15): | 
 | 	mov	$0xffff, %esi | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	movdqa	(%edx), %xmm2 | 
 | 	movdqa	(%eax), %xmm1 | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pslldq	$1, %xmm2 | 
 | 	pcmpeqb	%xmm1, %xmm2 | 
 | 	psubb	%xmm0, %xmm2 | 
 | 	pmovmskb %xmm2, %edi | 
 | 	shr	%cl, %esi | 
 | 	shr	%cl, %edi | 
 | 	sub	%edi, %esi | 
 | 	lea	-1(%ecx), %edi | 
 | 	jnz	L(less32bytes) | 
 |  | 
 | 	UPDATE_STRNCMP_COUNTER | 
 |  | 
 | 	movdqa	(%edx), %xmm3 | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	mov	$16, %ecx | 
 | 	or	$15, %ebx | 
 | 	lea	15(%edx), %edi | 
 | 	and	$0xfff, %edi | 
 | 	sub	$0x1000, %edi | 
 |  | 
 | 	.p2align 4 | 
 | L(loop_ashr_15): | 
 | 	add	$16, %edi | 
 | 	jg	L(nibble_ashr_15) | 
 |  | 
 | L(gobble_ashr_15): | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	movdqa	(%edx, %ecx), %xmm2 | 
 | 	movdqa	%xmm2, %xmm4 | 
 |  | 
 | 	palignr	$15, %xmm3, %xmm2 | 
 |  | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pcmpeqb	%xmm2, %xmm1 | 
 | 	psubb	%xmm0, %xmm1 | 
 | 	pmovmskb %xmm1, %esi | 
 | 	sub	$0xffff, %esi | 
 | 	jnz	L(exit) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$16, %ebp | 
 | 	lea	-16(%ebp), %ebp | 
 | 	jbe	L(more8byteseq) | 
 | #endif | 
 | 	add	$16, %ecx | 
 | 	movdqa	%xmm4, %xmm3 | 
 |  | 
 | 	add	$16, %edi | 
 | 	jg	L(nibble_ashr_15) | 
 |  | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	movdqa	(%edx, %ecx), %xmm2 | 
 | 	movdqa	%xmm2, %xmm4 | 
 |  | 
 | 	palignr	$15, %xmm3, %xmm2 | 
 |  | 
 | 	pcmpeqb	%xmm1, %xmm0 | 
 | 	pcmpeqb	%xmm2, %xmm1 | 
 | 	psubb	%xmm0, %xmm1 | 
 | 	pmovmskb %xmm1, %esi | 
 | 	sub	$0xffff, %esi | 
 | 	jnz	L(exit) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$16, %ebp | 
 | 	lea	-16(%ebp), %ebp | 
 | 	jbe	L(more8byteseq) | 
 | #endif | 
 | 	add	$16, %ecx | 
 | 	movdqa	%xmm4, %xmm3 | 
 | 	jmp	L(loop_ashr_15) | 
 |  | 
 | 	.p2align 4 | 
 | L(nibble_ashr_15): | 
 | 	pcmpeqb	%xmm3, %xmm0 | 
 | 	pmovmskb %xmm0, %esi | 
 | 	test	$0x8000, %esi | 
 | 	jnz	L(ashr_15_exittail) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$1, %ebp | 
 | 	jbe	L(ashr_15_exittail) | 
 | #endif | 
 | 	pxor	%xmm0, %xmm0 | 
 | 	sub	$0x1000, %edi | 
 | 	jmp	L(gobble_ashr_15) | 
 |  | 
 | 	.p2align 4 | 
 | L(ashr_15_exittail): | 
 | 	movdqa	(%eax, %ecx), %xmm1 | 
 | 	psrldq	$15, %xmm0 | 
 | 	psrldq	$15, %xmm3 | 
 | 	jmp	L(aftertail) | 
 |  | 
 | 	.p2align 4 | 
 | L(aftertail): | 
 | 	pcmpeqb	%xmm3, %xmm1 | 
 | 	psubb	%xmm0, %xmm1 | 
 | 	pmovmskb %xmm1, %esi | 
 | 	not	%esi | 
 | L(exit): | 
 | 	mov	%ebx, %edi | 
 | 	and	$0x1f, %edi | 
 | 	lea	-16(%edi, %ecx), %edi | 
 | L(less32bytes): | 
 | 	add	%edi, %edx | 
 | 	add	%ecx, %eax | 
 | 	test	$0x20, %ebx | 
 | 	jz	L(ret2) | 
 | 	xchg	%eax, %edx | 
 |  | 
 | 	.p2align 4 | 
 | L(ret2): | 
 | 	mov	%esi, %ecx | 
 | 	POP	(%esi) | 
 | 	POP	(%edi) | 
 | 	POP	(%ebx) | 
 | L(less16bytes): | 
 | 	test	%cl, %cl | 
 | 	jz	L(2next_8_bytes) | 
 |  | 
 | 	test	$0x01, %cl | 
 | 	jnz	L(Byte0) | 
 |  | 
 | 	test	$0x02, %cl | 
 | 	jnz	L(Byte1) | 
 |  | 
 | 	test	$0x04, %cl | 
 | 	jnz	L(Byte2) | 
 |  | 
 | 	test	$0x08, %cl | 
 | 	jnz	L(Byte3) | 
 |  | 
 | 	test	$0x10, %cl | 
 | 	jnz	L(Byte4) | 
 |  | 
 | 	test	$0x20, %cl | 
 | 	jnz	L(Byte5) | 
 |  | 
 | 	test	$0x40, %cl | 
 | 	jnz	L(Byte6) | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$7, %ebp | 
 | 	jbe	L(eq) | 
 | #endif | 
 |  | 
 | 	movzbl	7(%eax), %ecx | 
 | 	movzbl	7(%edx), %eax | 
 |  | 
 | 	sub	%ecx, %eax | 
 | 	RETURN | 
 |  | 
 | 	.p2align 4 | 
 | L(Byte0): | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$0, %ebp | 
 | 	jbe	L(eq) | 
 | #endif | 
 | 	movzbl	(%eax), %ecx | 
 | 	movzbl	(%edx), %eax | 
 |  | 
 | 	sub	%ecx, %eax | 
 | 	RETURN | 
 |  | 
 | 	.p2align 4 | 
 | L(Byte1): | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$1, %ebp | 
 | 	jbe	L(eq) | 
 | #endif | 
 | 	movzbl	1(%eax), %ecx | 
 | 	movzbl	1(%edx), %eax | 
 |  | 
 | 	sub	%ecx, %eax | 
 | 	RETURN | 
 |  | 
 | 	.p2align 4 | 
 | L(Byte2): | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$2, %ebp | 
 | 	jbe	L(eq) | 
 | #endif | 
 | 	movzbl	2(%eax), %ecx | 
 | 	movzbl	2(%edx), %eax | 
 |  | 
 | 	sub	%ecx, %eax | 
 | 	RETURN | 
 |  | 
 | 	.p2align 4 | 
 | L(Byte3): | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$3, %ebp | 
 | 	jbe	L(eq) | 
 | #endif | 
 | 	movzbl	3(%eax), %ecx | 
 | 	movzbl	3(%edx), %eax | 
 |  | 
 | 	sub	%ecx, %eax | 
 | 	RETURN | 
 |  | 
 | 	.p2align 4 | 
 | L(Byte4): | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$4, %ebp | 
 | 	jbe	L(eq) | 
 | #endif | 
 | 	movzbl	4(%eax), %ecx | 
 | 	movzbl	4(%edx), %eax | 
 |  | 
 | 	sub	%ecx, %eax | 
 | 	RETURN | 
 |  | 
 | 	.p2align 4 | 
 | L(Byte5): | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$5, %ebp | 
 | 	jbe	L(eq) | 
 | #endif | 
 | 	movzbl	5(%eax), %ecx | 
 | 	movzbl	5(%edx), %eax | 
 |  | 
 | 	sub	%ecx, %eax | 
 | 	RETURN | 
 |  | 
 | 	.p2align 4 | 
 | L(Byte6): | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$6, %ebp | 
 | 	jbe	L(eq) | 
 | #endif | 
 | 	movzbl	6(%eax), %ecx | 
 | 	movzbl	6(%edx), %eax | 
 |  | 
 | 	sub	%ecx, %eax | 
 | 	RETURN | 
 |  | 
 | 	.p2align 4 | 
 | L(2next_8_bytes): | 
 | 	add	$8, %eax | 
 | 	add	$8, %edx | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$8, %ebp | 
 | 	lea	-8(%ebp), %ebp | 
 | 	jbe	L(eq) | 
 | #endif | 
 |  | 
 | 	test	$0x01, %ch | 
 | 	jnz	L(Byte0) | 
 |  | 
 | 	test	$0x02, %ch | 
 | 	jnz	L(Byte1) | 
 |  | 
 | 	test	$0x04, %ch | 
 | 	jnz	L(Byte2) | 
 |  | 
 | 	test	$0x08, %ch | 
 | 	jnz	L(Byte3) | 
 |  | 
 | 	test	$0x10, %ch | 
 | 	jnz	L(Byte4) | 
 |  | 
 | 	test	$0x20, %ch | 
 | 	jnz	L(Byte5) | 
 |  | 
 | 	test	$0x40, %ch | 
 | 	jnz	L(Byte6) | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cmpl	$7, %ebp | 
 | 	jbe	L(eq) | 
 | #endif | 
 | 	movzbl	7(%eax), %ecx | 
 | 	movzbl	7(%edx), %eax | 
 |  | 
 | 	sub	%ecx, %eax | 
 | 	RETURN | 
 |  | 
 | 	.p2align 4 | 
 | L(neq): | 
 | 	mov	$1, %eax | 
 | 	ja	L(neq_bigger) | 
 | 	neg	%eax | 
 | L(neq_bigger): | 
 | 	RETURN | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	.p2align 4 | 
 | L(more8byteseq): | 
 | 	POP	(%esi) | 
 | 	POP	(%edi) | 
 | 	POP	(%ebx) | 
 | #endif | 
 |  | 
 | L(eq): | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	POP	(%ebp) | 
 | #endif | 
 | 	xorl	%eax, %eax | 
 | 	ret | 
 |  | 
 | #ifdef USE_AS_STRNCMP | 
 | 	cfi_restore_state | 
 |  | 
 | 	.p2align 4 | 
 | L(less16bytes_sncmp): | 
 | 	test	%ebp, %ebp | 
 | 	jz	L(eq) | 
 |  | 
 | 	movzbl	(%eax), %ecx | 
 | 	cmpb	%cl, (%edx) | 
 | 	jne	L(neq) | 
 | 	test	%cl, %cl | 
 | 	je	L(eq) | 
 |  | 
 | 	cmpl	$1, %ebp | 
 | 	je	L(eq) | 
 |  | 
 | 	movzbl	1(%eax), %ecx | 
 | 	cmpb	%cl, 1(%edx) | 
 | 	jne	L(neq) | 
 | 	test	%cl, %cl | 
 | 	je	L(eq) | 
 |  | 
 | 	cmpl	$2, %ebp | 
 | 	je	L(eq) | 
 |  | 
 | 	movzbl	2(%eax), %ecx | 
 | 	cmpb	%cl, 2(%edx) | 
 | 	jne	L(neq) | 
 | 	test	%cl, %cl | 
 | 	je	L(eq) | 
 |  | 
 | 	cmpl	$3, %ebp | 
 | 	je	L(eq) | 
 |  | 
 | 	movzbl	3(%eax), %ecx | 
 | 	cmpb	%cl, 3(%edx) | 
 | 	jne	L(neq) | 
 | 	test	%cl, %cl | 
 | 	je	L(eq) | 
 |  | 
 | 	cmpl	$4, %ebp | 
 | 	je	L(eq) | 
 |  | 
 | 	movzbl	4(%eax), %ecx | 
 | 	cmpb	%cl, 4(%edx) | 
 | 	jne	L(neq) | 
 | 	test	%cl, %cl | 
 | 	je	L(eq) | 
 |  | 
 | 	cmpl	$5, %ebp | 
 | 	je	L(eq) | 
 |  | 
 | 	movzbl	5(%eax), %ecx | 
 | 	cmpb	%cl, 5(%edx) | 
 | 	jne	L(neq) | 
 | 	test	%cl, %cl | 
 | 	je	L(eq) | 
 |  | 
 | 	cmpl	$6, %ebp | 
 | 	je	L(eq) | 
 |  | 
 | 	movzbl	6(%eax), %ecx | 
 | 	cmpb	%cl, 6(%edx) | 
 | 	jne	L(neq) | 
 | 	test	%cl, %cl | 
 | 	je	L(eq) | 
 |  | 
 | 	cmpl	$7, %ebp | 
 | 	je	L(eq) | 
 |  | 
 | 	movzbl	7(%eax), %ecx | 
 | 	cmpb	%cl, 7(%edx) | 
 | 	jne	L(neq) | 
 | 	test	%cl, %cl | 
 | 	je	L(eq) | 
 |  | 
 |  | 
 | 	cmpl	$8, %ebp | 
 | 	je	L(eq) | 
 |  | 
 | 	movzbl	8(%eax), %ecx | 
 | 	cmpb	%cl, 8(%edx) | 
 | 	jne	L(neq) | 
 | 	test	%cl, %cl | 
 | 	je	L(eq) | 
 |  | 
 | 	cmpl	$9, %ebp | 
 | 	je	L(eq) | 
 |  | 
 | 	movzbl	9(%eax), %ecx | 
 | 	cmpb	%cl, 9(%edx) | 
 | 	jne	L(neq) | 
 | 	test	%cl, %cl | 
 | 	je	L(eq) | 
 |  | 
 | 	cmpl	$10, %ebp | 
 | 	je	L(eq) | 
 |  | 
 | 	movzbl	10(%eax), %ecx | 
 | 	cmpb	%cl, 10(%edx) | 
 | 	jne	L(neq) | 
 | 	test	%cl, %cl | 
 | 	je	L(eq) | 
 |  | 
 | 	cmpl	$11, %ebp | 
 | 	je	L(eq) | 
 |  | 
 | 	movzbl	11(%eax), %ecx | 
 | 	cmpb	%cl, 11(%edx) | 
 | 	jne	L(neq) | 
 | 	test	%cl, %cl | 
 | 	je	L(eq) | 
 |  | 
 |  | 
 | 	cmpl	$12, %ebp | 
 | 	je	L(eq) | 
 |  | 
 | 	movzbl	12(%eax), %ecx | 
 | 	cmpb	%cl, 12(%edx) | 
 | 	jne	L(neq) | 
 | 	test	%cl, %cl | 
 | 	je	L(eq) | 
 |  | 
 | 	cmpl	$13, %ebp | 
 | 	je	L(eq) | 
 |  | 
 | 	movzbl	13(%eax), %ecx | 
 | 	cmpb	%cl, 13(%edx) | 
 | 	jne	L(neq) | 
 | 	test	%cl, %cl | 
 | 	je	L(eq) | 
 |  | 
 | 	cmpl	$14, %ebp | 
 | 	je	L(eq) | 
 |  | 
 | 	movzbl	14(%eax), %ecx | 
 | 	cmpb	%cl, 14(%edx) | 
 | 	jne	L(neq) | 
 | 	test	%cl, %cl | 
 | 	je	L(eq) | 
 |  | 
 | 	cmpl	$15, %ebp | 
 | 	je	L(eq) | 
 |  | 
 | 	movzbl	15(%eax), %ecx | 
 | 	cmpb	%cl, 15(%edx) | 
 | 	jne	L(neq) | 
 | 	test	%cl, %cl | 
 | 	je	L(eq) | 
 |  | 
 | 	POP	(%ebp) | 
 | 	xor	%eax, %eax | 
 | 	ret | 
 | #endif | 
 |  | 
 | END (STRCMP) |