| /* Copyright (c) 2012, Linaro Limited | 
 |    All rights reserved. | 
 |  | 
 |    Redistribution and use in source and binary forms, with or without | 
 |    modification, are permitted provided that the following conditions are met: | 
 |        * Redistributions of source code must retain the above copyright | 
 |          notice, this list of conditions and the following disclaimer. | 
 |        * Redistributions in binary form must reproduce the above copyright | 
 |          notice, this list of conditions and the following disclaimer in the | 
 |          documentation and/or other materials provided with the distribution. | 
 |        * Neither the name of the Linaro nor the | 
 |          names of its contributors may be used to endorse or promote products | 
 |          derived from this software without specific prior written permission. | 
 |  | 
 |    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 
 |    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 
 |    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 
 |    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | 
 |    HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | 
 |    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 
 |    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 
 |    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 
 |    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 
 |    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 
 |    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
 | */ | 
 |  | 
 | /* Assumptions: | 
 |  * | 
 |  * ARMv8-a, AArch64 | 
 |  */ | 
 |  | 
 | #include <private/bionic_asm.h> | 
 |  | 
 | #define REP8_01 0x0101010101010101 | 
 | #define REP8_7f 0x7f7f7f7f7f7f7f7f | 
 | #define REP8_80 0x8080808080808080 | 
 |  | 
 | /* Parameters and result.  */ | 
 | #define src1		x0 | 
 | #define src2		x1 | 
 | #define result		x0 | 
 |  | 
 | /* Internal variables.  */ | 
 | #define data1		x2 | 
 | #define data1w		w2 | 
 | #define data2		x3 | 
 | #define data2w		w3 | 
 | #define has_nul		x4 | 
 | #define diff		x5 | 
 | #define syndrome	x6 | 
 | #define tmp1		x7 | 
 | #define tmp2		x8 | 
 | #define tmp3		x9 | 
 | #define zeroones	x10 | 
 | #define pos		x11 | 
 |  | 
 | 	/* Start of performance-critical section  -- one 64B cache line.  */ | 
 | ENTRY(strcmp) | 
 | .p2align  6 | 
 | 	eor	tmp1, src1, src2 | 
 | 	mov	zeroones, #REP8_01 | 
 | 	tst	tmp1, #7 | 
 | 	b.ne	.Lmisaligned8 | 
 | 	ands	tmp1, src1, #7 | 
 | 	b.ne	.Lmutual_align | 
 | 	/* NUL detection works on the principle that (X - 1) & (~X) & 0x80 | 
 | 	   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and | 
 | 	   can be done in parallel across the entire word.  */ | 
 | .Lloop_aligned: | 
 | 	ldr	data1, [src1], #8 | 
 | 	ldr	data2, [src2], #8 | 
 | .Lstart_realigned: | 
 | 	sub	tmp1, data1, zeroones | 
 | 	orr	tmp2, data1, #REP8_7f | 
 | 	eor	diff, data1, data2	/* Non-zero if differences found.  */ | 
 | 	bic	has_nul, tmp1, tmp2	/* Non-zero if NUL terminator.  */ | 
 | 	orr	syndrome, diff, has_nul | 
 | 	cbz	syndrome, .Lloop_aligned | 
 | 	/* End of performance-critical section  -- one 64B cache line.  */ | 
 |  | 
 | #ifndef	__AARCH64EB__ | 
 | 	rev	syndrome, syndrome | 
 | 	rev	data1, data1 | 
 | 	/* The MS-non-zero bit of the syndrome marks either the first bit | 
 | 	   that is different, or the top bit of the first zero byte. | 
 | 	   Shifting left now will bring the critical information into the | 
 | 	   top bits.  */ | 
 | 	clz	pos, syndrome | 
 | 	rev	data2, data2 | 
 | 	lsl	data1, data1, pos | 
 | 	lsl	data2, data2, pos | 
 | 	/* But we need to zero-extend (char is unsigned) the value and then | 
 | 	   perform a signed 32-bit subtraction.  */ | 
 | 	lsr	data1, data1, #56 | 
 | 	sub	result, data1, data2, lsr #56 | 
 | 	ret | 
 | #else | 
 | 	/* For big-endian we cannot use the trick with the syndrome value | 
 | 	   as carry-propagation can corrupt the upper bits if the trailing | 
 | 	   bytes in the string contain 0x01.  */ | 
 | 	/* However, if there is no NUL byte in the dword, we can generate | 
 | 	   the result directly.  We can't just subtract the bytes as the | 
 | 	   MSB might be significant.  */ | 
 | 	cbnz	has_nul, 1f | 
 | 	cmp	data1, data2 | 
 | 	cset	result, ne | 
 | 	cneg	result, result, lo | 
 | 	ret | 
 | 1: | 
 | 	/* Re-compute the NUL-byte detection, using a byte-reversed value.  */ | 
 | 	rev	tmp3, data1 | 
 | 	sub	tmp1, tmp3, zeroones | 
 | 	orr	tmp2, tmp3, #REP8_7f | 
 | 	bic	has_nul, tmp1, tmp2 | 
 | 	rev	has_nul, has_nul | 
 | 	orr	syndrome, diff, has_nul | 
 | 	clz	pos, syndrome | 
 | 	/* The MS-non-zero bit of the syndrome marks either the first bit | 
 | 	   that is different, or the top bit of the first zero byte. | 
 | 	   Shifting left now will bring the critical information into the | 
 | 	   top bits.  */ | 
 | 	lsl	data1, data1, pos | 
 | 	lsl	data2, data2, pos | 
 | 	/* But we need to zero-extend (char is unsigned) the value and then | 
 | 	   perform a signed 32-bit subtraction.  */ | 
 | 	lsr	data1, data1, #56 | 
 | 	sub	result, data1, data2, lsr #56 | 
 | 	ret | 
 | #endif | 
 |  | 
 | .Lmutual_align: | 
 | 	/* Sources are mutually aligned, but are not currently at an | 
 | 	   alignment boundary.  Round down the addresses and then mask off | 
 | 	   the bytes that preceed the start point.  */ | 
 | 	bic	src1, src1, #7 | 
 | 	bic	src2, src2, #7 | 
 | 	lsl	tmp1, tmp1, #3		/* Bytes beyond alignment -> bits.  */ | 
 | 	ldr	data1, [src1], #8 | 
 | 	neg	tmp1, tmp1		/* Bits to alignment -64.  */ | 
 | 	ldr	data2, [src2], #8 | 
 | 	mov	tmp2, #~0 | 
 | #ifdef __AARCH64EB__ | 
 | 	/* Big-endian.  Early bytes are at MSB.  */ | 
 | 	lsl	tmp2, tmp2, tmp1	/* Shift (tmp1 & 63).  */ | 
 | #else | 
 | 	/* Little-endian.  Early bytes are at LSB.  */ | 
 | 	lsr	tmp2, tmp2, tmp1	/* Shift (tmp1 & 63).  */ | 
 | #endif | 
 | 	orr	data1, data1, tmp2 | 
 | 	orr	data2, data2, tmp2 | 
 | 	b	.Lstart_realigned | 
 |  | 
 | .Lmisaligned8: | 
 | 	/* We can do better than this.  */ | 
 | 	ldrb	data1w, [src1], #1 | 
 | 	ldrb	data2w, [src2], #1 | 
 | 	cmp	data1w, #1 | 
 | 	ccmp	data1w, data2w, #0, cs	/* NZCV = 0b0000.  */ | 
 | 	b.eq	.Lmisaligned8 | 
 | 	sub	result, data1, data2 | 
 | 	ret | 
 | END(strcmp) |