| /* | 
 |  * | 
 |    Copyright (c) 2014, ARM Limited | 
 |    All rights Reserved. | 
 |    Copyright (c) 2014, Linaro Ltd. | 
 |  | 
 |    Redistribution and use in source and binary forms, with or without | 
 |    modification, are permitted provided that the following conditions are met: | 
 |        * Redistributions of source code must retain the above copyright | 
 |          notice, this list of conditions and the following disclaimer. | 
 |        * Redistributions in binary form must reproduce the above copyright | 
 |          notice, this list of conditions and the following disclaimer in the | 
 |          documentation and/or other materials provided with the distribution. | 
 |        * Neither the name of the company nor the names of its contributors | 
 |          may be used to endorse or promote products derived from this | 
 |          software without specific prior written permission. | 
 |  | 
 |    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 
 |    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 
 |    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 
 |    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | 
 |    HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | 
 |    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 
 |    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 
 |    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 
 |    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 
 |    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 
 |    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
 | */ | 
 |  | 
 | /* Assumptions: | 
 |  * | 
 |  * ARMv8-a, AArch64 | 
 |  * Neon Available. | 
 |  */ | 
 |  | 
 | #include <private/bionic_asm.h> | 
 |  | 
 | /* Arguments and results.  */ | 
 | #define srcin		x0 | 
 | #define chrin		w1 | 
 |  | 
 | #define result		x0 | 
 |  | 
 | #define src		x2 | 
 | #define	tmp1		x3 | 
 | #define wtmp2		w4 | 
 | #define tmp3		x5 | 
 |  | 
 | #define vrepchr		v0 | 
 | #define vdata1		v1 | 
 | #define vdata2		v2 | 
 | #define vhas_nul1	v3 | 
 | #define vhas_nul2	v4 | 
 | #define vhas_chr1	v5 | 
 | #define vhas_chr2	v6 | 
 | #define vrepmask_0	v7 | 
 | #define vrepmask_c	v16 | 
 | #define vend1		v17 | 
 | #define vend2		v18 | 
 |  | 
 | /* Core algorithm. | 
 |  | 
 |    For each 32-byte hunk we calculate a 64-bit syndrome value, with | 
 |    two bits per byte (LSB is always in bits 0 and 1, for both big | 
 |    and little-endian systems).  For each tuple, bit 0 is set iff | 
 |    the relevant byte matched the requested character; bit 1 is set | 
 |    iff the relevant byte matched the NUL end of string (we trigger | 
 |    off bit0 for the special case of looking for NUL).  Since the bits | 
 |    in the syndrome reflect exactly the order in which things occur | 
 |    in the original string a count_trailing_zeros() operation will | 
 |    identify exactly which byte is causing the termination, and why.  */ | 
 |  | 
 | /* Locals and temporaries.  */ | 
 |  | 
 | ENTRY(strchr) | 
 | 	/* Magic constant 0x40100401 to allow us to identify which lane | 
 | 	   matches the requested byte.  Magic constant 0x80200802 used | 
 | 	   similarly for NUL termination.  */ | 
 | 	mov	wtmp2, #0x0401 | 
 | 	movk	wtmp2, #0x4010, lsl #16 | 
 | 	dup	vrepchr.16b, chrin | 
 | 	bic	src, srcin, #31		/* Work with aligned 32-byte hunks.  */ | 
 | 	dup	vrepmask_c.4s, wtmp2 | 
 | 	ands	tmp1, srcin, #31 | 
 | 	add	vrepmask_0.4s, vrepmask_c.4s, vrepmask_c.4s /* equiv: lsl #1 */ | 
 | 	b.eq	.Lloop | 
 |  | 
 | 	/* Input string is not 32-byte aligned.  Rather than forcing | 
 | 	   the padding bytes to a safe value, we calculate the syndrome | 
 | 	   for all the bytes, but then mask off those bits of the | 
 | 	   syndrome that are related to the padding.  */ | 
 | 	ld1	{vdata1.16b, vdata2.16b}, [src], #32 | 
 | 	neg	tmp1, tmp1 | 
 | 	cmeq	vhas_nul1.16b, vdata1.16b, #0 | 
 | 	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b | 
 | 	cmeq	vhas_nul2.16b, vdata2.16b, #0 | 
 | 	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b | 
 | 	and	vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b | 
 | 	and	vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b | 
 | 	and	vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b | 
 | 	and	vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b | 
 | 	orr	vend1.16b, vhas_nul1.16b, vhas_chr1.16b | 
 | 	orr	vend2.16b, vhas_nul2.16b, vhas_chr2.16b | 
 | 	lsl	tmp1, tmp1, #1 | 
 | 	addp	vend1.16b, vend1.16b, vend2.16b		// 256->128 | 
 | 	mov	tmp3, #~0 | 
 | 	addp	vend1.16b, vend1.16b, vend2.16b		// 128->64 | 
 | 	lsr	tmp1, tmp3, tmp1 | 
 |  | 
 | 	mov	tmp3, vend1.d[0] | 
 | 	bic	tmp1, tmp3, tmp1	// Mask padding bits. | 
 | 	cbnz	tmp1, .Ltail | 
 |  | 
 | .Lloop: | 
 | 	ld1	{vdata1.16b, vdata2.16b}, [src], #32 | 
 | 	cmeq	vhas_nul1.16b, vdata1.16b, #0 | 
 | 	cmeq	vhas_chr1.16b, vdata1.16b, vrepchr.16b | 
 | 	cmeq	vhas_nul2.16b, vdata2.16b, #0 | 
 | 	cmeq	vhas_chr2.16b, vdata2.16b, vrepchr.16b | 
 | 	/* Use a fast check for the termination condition.  */ | 
 | 	orr	vend1.16b, vhas_nul1.16b, vhas_chr1.16b | 
 | 	orr	vend2.16b, vhas_nul2.16b, vhas_chr2.16b | 
 | 	orr	vend1.16b, vend1.16b, vend2.16b | 
 | 	addp	vend1.2d, vend1.2d, vend1.2d | 
 | 	mov	tmp1, vend1.d[0] | 
 | 	cbz	tmp1, .Lloop | 
 |  | 
 | 	/* Termination condition found.  Now need to establish exactly why | 
 | 	   we terminated.  */ | 
 | 	and	vhas_nul1.16b, vhas_nul1.16b, vrepmask_0.16b | 
 | 	and	vhas_nul2.16b, vhas_nul2.16b, vrepmask_0.16b | 
 | 	and	vhas_chr1.16b, vhas_chr1.16b, vrepmask_c.16b | 
 | 	and	vhas_chr2.16b, vhas_chr2.16b, vrepmask_c.16b | 
 | 	orr	vend1.16b, vhas_nul1.16b, vhas_chr1.16b | 
 | 	orr	vend2.16b, vhas_nul2.16b, vhas_chr2.16b | 
 | 	addp	vend1.16b, vend1.16b, vend2.16b		// 256->128 | 
 | 	addp	vend1.16b, vend1.16b, vend2.16b		// 128->64 | 
 |  | 
 | 	mov	tmp1, vend1.d[0] | 
 | .Ltail: | 
 | 	/* Count the trailing zeros, by bit reversing...  */ | 
 | 	rbit	tmp1, tmp1 | 
 | 	/* Re-bias source.  */ | 
 | 	sub	src, src, #32 | 
 | 	clz	tmp1, tmp1	/* And counting the leading zeros.  */ | 
 | 	/* Tmp1 is even if the target charager was found first.  Otherwise | 
 | 	   we've found the end of string and we weren't looking for NUL.  */ | 
 | 	tst	tmp1, #1 | 
 | 	add	result, src, tmp1, lsr #1 | 
 | 	csel	result, result, xzr, eq | 
 | 	ret | 
 | END(strchr) |