| /* | 
 | Copyright (c) 2011 Intel Corporation | 
 | All rights reserved. | 
 |  | 
 | Redistribution and use in source and binary forms, with or without | 
 | modification, are permitted provided that the following conditions are met: | 
 |  | 
 |     * Redistributions of source code must retain the above copyright notice, | 
 |     * this list of conditions and the following disclaimer. | 
 |  | 
 |     * Redistributions in binary form must reproduce the above copyright notice, | 
 |     * this list of conditions and the following disclaimer in the documentation | 
 |     * and/or other materials provided with the distribution. | 
 |  | 
 |     * Neither the name of Intel Corporation nor the names of its contributors | 
 |     * may be used to endorse or promote products derived from this software | 
 |     * without specific prior written permission. | 
 |  | 
 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | 
 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | 
 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | 
 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR | 
 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | 
 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | 
 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON | 
 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 
 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | 
 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
 | */ | 
 |  | 
 | #ifndef L | 
 | # define L(label)	.L##label | 
 | #endif | 
 |  | 
 | #ifndef cfi_startproc | 
 | # define cfi_startproc	.cfi_startproc | 
 | #endif | 
 |  | 
 | #ifndef cfi_endproc | 
 | # define cfi_endproc	.cfi_endproc | 
 | #endif | 
 |  | 
 | #ifndef cfi_rel_offset | 
 | # define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off | 
 | #endif | 
 |  | 
 | #ifndef cfi_restore | 
 | # define cfi_restore(reg)	.cfi_restore reg | 
 | #endif | 
 |  | 
 | #ifndef cfi_adjust_cfa_offset | 
 | # define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off | 
 | #endif | 
 |  | 
 | #ifndef ENTRY | 
 | # define ENTRY(name)	\ | 
 | 	.type name, @function;	\ | 
 | 	.globl name;	\ | 
 | 	.p2align 4;	\ | 
 | name:	\ | 
 | 	cfi_startproc | 
 | #endif | 
 |  | 
 | #ifndef END | 
 | # define END(name)	\ | 
 | 	cfi_endproc;	\ | 
 | 	.size name, .-name | 
 | #endif | 
 |  | 
 | #define CFI_PUSH(REG)	\ | 
 | 	cfi_adjust_cfa_offset (4);	\ | 
 | 	cfi_rel_offset (REG, 0) | 
 |  | 
 | #define CFI_POP(REG)	\ | 
 | 	cfi_adjust_cfa_offset (-4);	\ | 
 | 	cfi_restore (REG) | 
 |  | 
 | #define PUSH(REG)	pushl REG;	CFI_PUSH (REG) | 
 | #define POP(REG)	popl REG;	CFI_POP (REG) | 
 |  | 
 | #define PARMS  8 | 
 | #define ENTRANCE PUSH(%edi); | 
 | #define RETURN  POP(%edi);	ret;	CFI_PUSH(%edi); | 
 |  | 
 | #define STR1  PARMS | 
 | #define STR2  STR1+4 | 
 |  | 
 | 	.text | 
 | ENTRY (wcsrchr) | 
 |  | 
 | 	ENTRANCE | 
 | 	mov	STR1(%esp), %ecx | 
 | 	movd	STR2(%esp), %xmm1 | 
 |  | 
 | 	mov	%ecx, %edi | 
 | 	punpckldq %xmm1, %xmm1 | 
 | 	pxor	%xmm2, %xmm2 | 
 | 	punpckldq %xmm1, %xmm1 | 
 |  | 
 | /* ECX has OFFSET. */ | 
 | 	and	$63, %ecx | 
 | 	cmp	$48, %ecx | 
 | 	ja	L(crosscache) | 
 |  | 
 | /* unaligned string. */ | 
 | 	movdqu	(%edi), %xmm0 | 
 | 	pcmpeqd	%xmm0, %xmm2 | 
 | 	pcmpeqd	%xmm1, %xmm0 | 
 | /* Find where NULL is.  */ | 
 | 	pmovmskb %xmm2, %ecx | 
 | /* Check if there is a match.  */ | 
 | 	pmovmskb %xmm0, %eax | 
 | 	add	$16, %edi | 
 |  | 
 | 	test	%eax, %eax | 
 | 	jnz	L(unaligned_match1) | 
 |  | 
 | 	test	%ecx, %ecx | 
 | 	jnz	L(return_null) | 
 |  | 
 | 	and	$-16, %edi | 
 |  | 
 | 	PUSH	(%esi) | 
 |  | 
 | 	xor	%edx, %edx | 
 | 	jmp	L(loop) | 
 |  | 
 | 	CFI_POP	(%esi) | 
 |  | 
 | 	.p2align 4 | 
 | L(unaligned_match1): | 
 | 	test	%ecx, %ecx | 
 | 	jnz	L(prolog_find_zero_1) | 
 |  | 
 | 	PUSH	(%esi) | 
 |  | 
 | /* Save current match */ | 
 | 	mov	%eax, %edx | 
 | 	mov	%edi, %esi | 
 | 	and	$-16, %edi | 
 | 	jmp	L(loop) | 
 |  | 
 | 	CFI_POP	(%esi) | 
 |  | 
 | 	.p2align 4 | 
 | L(crosscache): | 
 | /* Hancle unaligned string.  */ | 
 | 	and	$15, %ecx | 
 | 	and	$-16, %edi | 
 | 	pxor	%xmm3, %xmm3 | 
 | 	movdqa	(%edi), %xmm0 | 
 | 	pcmpeqd	%xmm0, %xmm3 | 
 | 	pcmpeqd	%xmm1, %xmm0 | 
 | /* Find where NULL is.  */ | 
 | 	pmovmskb %xmm3, %edx | 
 | /* Check if there is a match.  */ | 
 | 	pmovmskb %xmm0, %eax | 
 | /* Remove the leading bytes.  */ | 
 | 	shr	%cl, %edx | 
 | 	shr	%cl, %eax | 
 | 	add	$16, %edi | 
 |  | 
 | 	test	%eax, %eax | 
 | 	jnz	L(unaligned_match) | 
 |  | 
 | 	test	%edx, %edx | 
 | 	jnz	L(return_null) | 
 |  | 
 | 	PUSH	(%esi) | 
 |  | 
 | 	xor	%edx, %edx | 
 | 	jmp	L(loop) | 
 |  | 
 | 	CFI_POP	(%esi) | 
 |  | 
 | 	.p2align 4 | 
 | L(unaligned_match): | 
 | 	test	%edx, %edx | 
 | 	jnz	L(prolog_find_zero) | 
 |  | 
 | 	PUSH	(%esi) | 
 |  | 
 | 	mov	%eax, %edx | 
 | 	lea	(%edi, %ecx), %esi | 
 |  | 
 | /* Loop start on aligned string.  */ | 
 | 	.p2align 4 | 
 | L(loop): | 
 | 	movdqa	(%edi), %xmm0 | 
 | 	pcmpeqd	%xmm0, %xmm2 | 
 | 	add	$16, %edi | 
 | 	pcmpeqd	%xmm1, %xmm0 | 
 | 	pmovmskb %xmm2, %ecx | 
 | 	pmovmskb %xmm0, %eax | 
 | 	or	%eax, %ecx | 
 | 	jnz	L(matches) | 
 |  | 
 | 	movdqa	(%edi), %xmm3 | 
 | 	pcmpeqd	%xmm3, %xmm2 | 
 | 	add	$16, %edi | 
 | 	pcmpeqd	%xmm1, %xmm3 | 
 | 	pmovmskb %xmm2, %ecx | 
 | 	pmovmskb %xmm3, %eax | 
 | 	or	%eax, %ecx | 
 | 	jnz	L(matches) | 
 |  | 
 | 	movdqa	(%edi), %xmm4 | 
 | 	pcmpeqd	%xmm4, %xmm2 | 
 | 	add	$16, %edi | 
 | 	pcmpeqd	%xmm1, %xmm4 | 
 | 	pmovmskb %xmm2, %ecx | 
 | 	pmovmskb %xmm4, %eax | 
 | 	or	%eax, %ecx | 
 | 	jnz	L(matches) | 
 |  | 
 | 	movdqa	(%edi), %xmm5 | 
 | 	pcmpeqd	%xmm5, %xmm2 | 
 | 	add	$16, %edi | 
 | 	pcmpeqd	%xmm1, %xmm5 | 
 | 	pmovmskb %xmm2, %ecx | 
 | 	pmovmskb %xmm5, %eax | 
 | 	or	%eax, %ecx | 
 | 	jz	L(loop) | 
 |  | 
 | 	.p2align 4 | 
 | L(matches): | 
 | 	test	%eax, %eax | 
 | 	jnz	L(match) | 
 | L(return_value): | 
 | 	test	%edx, %edx | 
 | 	jz	L(return_null_1) | 
 | 	mov	%edx, %eax | 
 | 	mov	%esi, %edi | 
 |  | 
 | 	POP	(%esi) | 
 |  | 
 | 	test	%ah, %ah | 
 | 	jnz	L(match_third_or_fourth_wchar) | 
 | 	test	$15 << 4, %al | 
 | 	jnz	L(match_second_wchar) | 
 | 	lea	-16(%edi), %eax | 
 | 	RETURN | 
 |  | 
 | 	CFI_PUSH	(%esi) | 
 |  | 
 | 	.p2align 4 | 
 | L(return_null_1): | 
 | 	POP	(%esi) | 
 |  | 
 | 	xor	%eax, %eax | 
 | 	RETURN | 
 |  | 
 | 	CFI_PUSH	(%esi) | 
 |  | 
 | 	.p2align 4 | 
 | L(match): | 
 | 	pmovmskb %xmm2, %ecx | 
 | 	test	%ecx, %ecx | 
 | 	jnz	L(find_zero) | 
 | /* save match info */ | 
 | 	mov	%eax, %edx | 
 | 	mov	%edi, %esi | 
 | 	jmp	L(loop) | 
 |  | 
 | 	.p2align 4 | 
 | L(find_zero): | 
 | 	test	%cl, %cl | 
 | 	jz	L(find_zero_in_third_or_fourth_wchar) | 
 | 	test	$15, %cl | 
 | 	jz	L(find_zero_in_second_wchar) | 
 | 	and	$1, %eax | 
 | 	jz	L(return_value) | 
 |  | 
 | 	POP	(%esi) | 
 |  | 
 | 	lea	-16(%edi), %eax | 
 | 	RETURN | 
 |  | 
 | 	CFI_PUSH	(%esi) | 
 |  | 
 | 	.p2align 4 | 
 | L(find_zero_in_second_wchar): | 
 | 	and	$1 << 5 - 1, %eax | 
 | 	jz	L(return_value) | 
 |  | 
 | 	POP	(%esi) | 
 |  | 
 | 	test	$15 << 4, %al | 
 | 	jnz	L(match_second_wchar) | 
 | 	lea	-16(%edi), %eax | 
 | 	RETURN | 
 |  | 
 | 	CFI_PUSH	(%esi) | 
 |  | 
 | 	.p2align 4 | 
 | L(find_zero_in_third_or_fourth_wchar): | 
 | 	test	$15, %ch | 
 | 	jz	L(find_zero_in_fourth_wchar) | 
 | 	and	$1 << 9 - 1, %eax | 
 | 	jz	L(return_value) | 
 |  | 
 | 	POP	(%esi) | 
 |  | 
 | 	test	%ah, %ah | 
 | 	jnz	L(match_third_wchar) | 
 | 	test	$15 << 4, %al | 
 | 	jnz	L(match_second_wchar) | 
 | 	lea	-16(%edi), %eax | 
 | 	RETURN | 
 |  | 
 | 	CFI_PUSH	(%esi) | 
 |  | 
 | 	.p2align 4 | 
 | L(find_zero_in_fourth_wchar): | 
 |  | 
 | 	POP	(%esi) | 
 |  | 
 | 	test	%ah, %ah | 
 | 	jnz	L(match_third_or_fourth_wchar) | 
 | 	test	$15 << 4, %al | 
 | 	jnz	L(match_second_wchar) | 
 | 	lea	-16(%edi), %eax | 
 | 	RETURN | 
 |  | 
 | 	CFI_PUSH	(%esi) | 
 |  | 
 | 	.p2align 4 | 
 | L(match_second_wchar): | 
 | 	lea	-12(%edi), %eax | 
 | 	RETURN | 
 |  | 
 | 	.p2align 4 | 
 | L(match_third_or_fourth_wchar): | 
 | 	test	$15 << 4, %ah | 
 | 	jnz	L(match_fourth_wchar) | 
 | 	lea	-8(%edi), %eax | 
 | 	RETURN | 
 |  | 
 | 	.p2align 4 | 
 | L(match_third_wchar): | 
 | 	lea	-8(%edi), %eax | 
 | 	RETURN | 
 |  | 
 | 	.p2align 4 | 
 | L(match_fourth_wchar): | 
 | 	lea	-4(%edi), %eax | 
 | 	RETURN | 
 |  | 
 | 	.p2align 4 | 
 | L(return_null): | 
 | 	xor	%eax, %eax | 
 | 	RETURN | 
 |  | 
 | 	.p2align 4 | 
 | L(prolog_find_zero): | 
 | 	add	%ecx, %edi | 
 | 	mov     %edx, %ecx | 
 | L(prolog_find_zero_1): | 
 | 	test	%cl, %cl | 
 | 	jz	L(prolog_find_zero_in_third_or_fourth_wchar) | 
 | 	test	$15, %cl | 
 | 	jz	L(prolog_find_zero_in_second_wchar) | 
 | 	and	$1, %eax | 
 | 	jz	L(return_null) | 
 |  | 
 | 	lea	-16(%edi), %eax | 
 | 	RETURN | 
 |  | 
 | 	.p2align 4 | 
 | L(prolog_find_zero_in_second_wchar): | 
 | 	and	$1 << 5 - 1, %eax | 
 | 	jz	L(return_null) | 
 |  | 
 | 	test	$15 << 4, %al | 
 | 	jnz	L(match_second_wchar) | 
 | 	lea	-16(%edi), %eax | 
 | 	RETURN | 
 |  | 
 | 	.p2align 4 | 
 | L(prolog_find_zero_in_third_or_fourth_wchar): | 
 | 	test	$15, %ch | 
 | 	jz	L(prolog_find_zero_in_fourth_wchar) | 
 | 	and	$1 << 9 - 1, %eax | 
 | 	jz	L(return_null) | 
 |  | 
 | 	test	%ah, %ah | 
 | 	jnz	L(match_third_wchar) | 
 | 	test	$15 << 4, %al | 
 | 	jnz	L(match_second_wchar) | 
 | 	lea	-16(%edi), %eax | 
 | 	RETURN | 
 |  | 
 | 	.p2align 4 | 
 | L(prolog_find_zero_in_fourth_wchar): | 
 | 	test	%ah, %ah | 
 | 	jnz	L(match_third_or_fourth_wchar) | 
 | 	test	$15 << 4, %al | 
 | 	jnz	L(match_second_wchar) | 
 | 	lea	-16(%edi), %eax | 
 | 	RETURN | 
 |  | 
 | END (wcsrchr) |