| /* Copyright (c) 2014, Linaro Limited | 
 |    All rights reserved. | 
 |  | 
 |    Redistribution and use in source and binary forms, with or without | 
 |    modification, are permitted provided that the following conditions are met: | 
 |        * Redistributions of source code must retain the above copyright | 
 |          notice, this list of conditions and the following disclaimer. | 
 |        * Redistributions in binary form must reproduce the above copyright | 
 |          notice, this list of conditions and the following disclaimer in the | 
 |          documentation and/or other materials provided with the distribution. | 
 |        * Neither the name of the Linaro nor the | 
 |          names of its contributors may be used to endorse or promote products | 
 |          derived from this software without specific prior written permission. | 
 |  | 
 |    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 
 |    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 
 |    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 
 |    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | 
 |    HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | 
 |    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 
 |    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 
 |    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 
 |    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 
 |    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 
 |    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
 | */ | 
 |  | 
 | /* Assumptions: | 
 |  * | 
 |  * ARMv8-a, AArch64 | 
 |  */ | 
 |  | 
 | #include <private/bionic_asm.h> | 
 |  | 
 | /* Parameters and result.  */ | 
 | #define src1		x0 | 
 | #define src2		x1 | 
 | #define limit		x2 | 
 | #define result		x0 | 
 |  | 
 | /* Internal variables.  */ | 
 | #define data1		x3 | 
 | #define data1w		w3 | 
 | #define data2		x4 | 
 | #define data2w		w4 | 
 | #define has_nul		x5 | 
 | #define diff		x6 | 
 | #define endloop		x7 | 
 | #define tmp1		x8 | 
 | #define tmp2		x9 | 
 | #define tmp3		x10 | 
 | #define pos		x11 | 
 | #define limit_wd	x12 | 
 | #define mask		x13 | 
 |  | 
 | ENTRY(memcmp) | 
 | 	cbz	limit, .Lret0 | 
 | 	eor	tmp1, src1, src2 | 
 | 	tst	tmp1, #7 | 
 | 	b.ne	.Lmisaligned8 | 
 | 	ands	tmp1, src1, #7 | 
 | 	b.ne	.Lmutual_align | 
 | 	add	limit_wd, limit, #7 | 
 | 	lsr	limit_wd, limit_wd, #3 | 
 | 	/* Start of performance-critical section  -- one 64B cache line.  */ | 
 | .Lloop_aligned: | 
 | 	ldr	data1, [src1], #8 | 
 | 	ldr	data2, [src2], #8 | 
 | .Lstart_realigned: | 
 | 	subs	limit_wd, limit_wd, #1 | 
 | 	eor	diff, data1, data2	/* Non-zero if differences found.  */ | 
 | 	csinv	endloop, diff, xzr, ne	/* Last Dword or differences.  */ | 
 | 	cbz	endloop, .Lloop_aligned | 
 | 	/* End of performance-critical section  -- one 64B cache line.  */ | 
 |  | 
 | 	/* Not reached the limit, must have found a diff.  */ | 
 | 	cbnz	limit_wd, .Lnot_limit | 
 |  | 
 | 	/* Limit % 8 == 0 => all bytes significant.  */ | 
 | 	ands	limit, limit, #7 | 
 | 	b.eq	.Lnot_limit | 
 |  | 
 | 	lsl	limit, limit, #3	/* Bits -> bytes.  */ | 
 | 	mov	mask, #~0 | 
 | #ifdef __AARCH64EB__ | 
 | 	lsr	mask, mask, limit | 
 | #else | 
 | 	lsl	mask, mask, limit | 
 | #endif | 
 | 	bic	data1, data1, mask | 
 | 	bic	data2, data2, mask | 
 |  | 
 | 	orr	diff, diff, mask | 
 | .Lnot_limit: | 
 |  | 
 | #ifndef	__AARCH64EB__ | 
 | 	rev	diff, diff | 
 | 	rev	data1, data1 | 
 | 	rev	data2, data2 | 
 | #endif | 
 | 	/* The MS-non-zero bit of DIFF marks either the first bit | 
 | 	   that is different, or the end of the significant data. | 
 | 	   Shifting left now will bring the critical information into the | 
 | 	   top bits.  */ | 
 | 	clz	pos, diff | 
 | 	lsl	data1, data1, pos | 
 | 	lsl	data2, data2, pos | 
 | 	/* But we need to zero-extend (char is unsigned) the value and then | 
 | 	   perform a signed 32-bit subtraction.  */ | 
 | 	lsr	data1, data1, #56 | 
 | 	sub	result, data1, data2, lsr #56 | 
 | 	ret | 
 |  | 
 | .Lmutual_align: | 
 | 	/* Sources are mutually aligned, but are not currently at an | 
 | 	   alignment boundary.  Round down the addresses and then mask off | 
 | 	   the bytes that precede the start point.  */ | 
 | 	bic	src1, src1, #7 | 
 | 	bic	src2, src2, #7 | 
 | 	add	limit, limit, tmp1	/* Adjust the limit for the extra.  */ | 
 | 	lsl	tmp1, tmp1, #3		/* Bytes beyond alignment -> bits.  */ | 
 | 	ldr	data1, [src1], #8 | 
 | 	neg	tmp1, tmp1		/* Bits to alignment -64.  */ | 
 | 	ldr	data2, [src2], #8 | 
 | 	mov	tmp2, #~0 | 
 | #ifdef __AARCH64EB__ | 
 | 	/* Big-endian.  Early bytes are at MSB.  */ | 
 | 	lsl	tmp2, tmp2, tmp1	/* Shift (tmp1 & 63).  */ | 
 | #else | 
 | 	/* Little-endian.  Early bytes are at LSB.  */ | 
 | 	lsr	tmp2, tmp2, tmp1	/* Shift (tmp1 & 63).  */ | 
 | #endif | 
 | 	add	limit_wd, limit, #7 | 
 | 	orr	data1, data1, tmp2 | 
 | 	orr	data2, data2, tmp2 | 
 | 	lsr	limit_wd, limit_wd, #3 | 
 | 	b	.Lstart_realigned | 
 |  | 
 | .Lret0: | 
 | 	mov	result, #0 | 
 | 	ret | 
 |  | 
 | 	.p2align 6 | 
 | .Lmisaligned8: | 
 | 	sub	limit, limit, #1 | 
 | 1: | 
 | 	/* Perhaps we can do better than this.  */ | 
 | 	ldrb	data1w, [src1], #1 | 
 | 	ldrb	data2w, [src2], #1 | 
 | 	subs	limit, limit, #1 | 
 | 	ccmp	data1w, data2w, #0, cs	/* NZCV = 0b0000.  */ | 
 | 	b.eq	1b | 
 | 	sub	result, data1, data2 | 
 | 	ret | 
 | END(memcmp) |