| /* Copyright (c) 2012-2013, Linaro Limited | 
 |    All rights reserved. | 
 |  | 
 |    Redistribution and use in source and binary forms, with or without | 
 |    modification, are permitted provided that the following conditions are met: | 
 |        * Redistributions of source code must retain the above copyright | 
 |          notice, this list of conditions and the following disclaimer. | 
 |        * Redistributions in binary form must reproduce the above copyright | 
 |          notice, this list of conditions and the following disclaimer in the | 
 |          documentation and/or other materials provided with the distribution. | 
 |        * Neither the name of the Linaro nor the | 
 |          names of its contributors may be used to endorse or promote products | 
 |          derived from this software without specific prior written permission. | 
 |  | 
 |    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 
 |    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 
 |    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 
 |    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | 
 |    HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | 
 |    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 
 |    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 
 |    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 
 |    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 
 |    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 
 |    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ | 
 |  | 
 | /* | 
 |  * Copyright (c) 2015 ARM Ltd | 
 |  * All rights reserved. | 
 |  * | 
 |  * Redistribution and use in source and binary forms, with or without | 
 |  * modification, are permitted provided that the following conditions | 
 |  * are met: | 
 |  * 1. Redistributions of source code must retain the above copyright | 
 |  *    notice, this list of conditions and the following disclaimer. | 
 |  * 2. Redistributions in binary form must reproduce the above copyright | 
 |  *    notice, this list of conditions and the following disclaimer in the | 
 |  *    documentation and/or other materials provided with the distribution. | 
 |  * 3. The name of the company may not be used to endorse or promote | 
 |  *    products derived from this software without specific prior written | 
 |  *    permission. | 
 |  * | 
 |  * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED | 
 |  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | 
 |  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. | 
 |  * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | 
 |  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED | 
 |  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | 
 |  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | 
 |  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | 
 |  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | 
 |  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
 |  */ | 
 |  | 
 | /* Assumptions: | 
 |  * | 
 |  * ARMv8-a, AArch64, unaligned accesses. | 
 |  * | 
 |  */ | 
 |  | 
 | #include <private/bionic_asm.h> | 
 |  | 
 | #define dstin	x0 | 
 | #define src	x1 | 
 | #define count	x2 | 
 | #define dst	x3 | 
 | #define srcend	x4 | 
 | #define dstend	x5 | 
 | #define A_l	x6 | 
 | #define A_lw	w6 | 
 | #define A_h	x7 | 
 | #define A_hw	w7 | 
 | #define B_l	x8 | 
 | #define B_lw   w8 | 
 | #define B_h	x9 | 
 | #define C_l	x10 | 
 | #define C_h	x11 | 
 | #define D_l	x12 | 
 | #define D_h	x13 | 
 | #define E_l	src | 
 | #define E_h	count | 
 | #define F_l	srcend | 
 | #define F_h	dst | 
 | #define tmp1	x9 | 
 |  | 
 | #define L(l) .L ## l | 
 |  | 
 | /* Copies are split into 3 main cases: small copies of up to 16 bytes, | 
 |    medium copies of 17..96 bytes which are fully unrolled. Large copies | 
 |    of more than 96 bytes align the destination and use an unrolled loop | 
 |    processing 64 bytes per iteration. | 
 |    Small and medium copies read all data before writing, allowing any | 
 |    kind of overlap, and memmove tailcalls memcpy for these cases as | 
 |    well as non-overlapping copies. | 
 | */ | 
 |  | 
 | 	prfm    PLDL1KEEP, [src] | 
 | 	add	srcend, src, count | 
 | 	add	dstend, dstin, count | 
 |         cmp     count, 16 | 
 |         b.ls    L(copy16) | 
 | 	cmp	count, 96 | 
 | 	b.hi	L(copy_long) | 
 |  | 
 | 	/* Medium copies: 17..96 bytes.  */ | 
 | 	sub	tmp1, count, 1 | 
 | 	ldp	A_l, A_h, [src] | 
 | 	tbnz	tmp1, 6, L(copy96) | 
 | 	ldp	D_l, D_h, [srcend, -16] | 
 | 	tbz	tmp1, 5, 1f | 
 | 	ldp	B_l, B_h, [src, 16] | 
 | 	ldp	C_l, C_h, [srcend, -32] | 
 | 	stp	B_l, B_h, [dstin, 16] | 
 | 	stp	C_l, C_h, [dstend, -32] | 
 | 1: | 
 | 	stp	A_l, A_h, [dstin] | 
 | 	stp	D_l, D_h, [dstend, -16] | 
 | 	ret | 
 |  | 
 | 	.p2align 4 | 
 |  | 
 | 	/* Small copies: 0..16 bytes.  */ | 
 | L(copy16): | 
 | 	cmp	count, 8 | 
 | 	b.lo	1f | 
 | 	ldr	A_l, [src] | 
 | 	ldr	A_h, [srcend, -8] | 
 | 	str	A_l, [dstin] | 
 | 	str	A_h, [dstend, -8] | 
 | 	ret | 
 | 	.p2align 4 | 
 | 1: | 
 | 	tbz	count, 2, 1f | 
 | 	ldr	A_lw, [src] | 
 | 	ldr	A_hw, [srcend, -4] | 
 | 	str	A_lw, [dstin] | 
 | 	str	A_hw, [dstend, -4] | 
 | 	ret | 
 |  | 
 | 	/* Copy 0..3 bytes.  Use a branchless sequence that copies the same | 
 | 	   byte 3 times if count==1, or the 2nd byte twice if count==2.  */ | 
 | 1: | 
 | 	cbz	count, 2f | 
 | 	lsr	tmp1, count, 1 | 
 | 	ldrb	A_lw, [src] | 
 | 	ldrb	A_hw, [srcend, -1] | 
 | 	ldrb	B_lw, [src, tmp1] | 
 | 	strb	A_lw, [dstin] | 
 | 	strb	B_lw, [dstin, tmp1] | 
 | 	strb	A_hw, [dstend, -1] | 
 | 2:	ret | 
 |  | 
 | 	.p2align 4 | 
 | 	/* Copy 64..96 bytes.  Copy 64 bytes from the start and | 
 | 	   32 bytes from the end.  */ | 
 | L(copy96): | 
 | 	ldp	B_l, B_h, [src, 16] | 
 | 	ldp	C_l, C_h, [src, 32] | 
 | 	ldp	D_l, D_h, [src, 48] | 
 | 	ldp	E_l, E_h, [srcend, -32] | 
 | 	ldp	F_l, F_h, [srcend, -16] | 
 | 	stp	A_l, A_h, [dstin] | 
 | 	stp	B_l, B_h, [dstin, 16] | 
 | 	stp	C_l, C_h, [dstin, 32] | 
 | 	stp	D_l, D_h, [dstin, 48] | 
 | 	stp	E_l, E_h, [dstend, -32] | 
 | 	stp	F_l, F_h, [dstend, -16] | 
 | 	ret | 
 |  | 
 | 	/* Align DST to 16 byte alignment so that we don't cross cache line | 
 | 	   boundaries on both loads and stores.	 There are at least 96 bytes | 
 | 	   to copy, so copy 16 bytes unaligned and then align.	The loop | 
 | 	   copies 64 bytes per iteration and prefetches one iteration ahead.  */ | 
 |  | 
 | 	.p2align 4 | 
 | L(copy_long): | 
 | 	and	tmp1, dstin, 15 | 
 | 	bic	dst, dstin, 15 | 
 | 	ldp	D_l, D_h, [src] | 
 | 	sub	src, src, tmp1 | 
 | 	add	count, count, tmp1	/* Count is now 16 too large.  */ | 
 | 	ldp	A_l, A_h, [src, 16] | 
 | 	stp	D_l, D_h, [dstin] | 
 | 	ldp	B_l, B_h, [src, 32] | 
 | 	ldp	C_l, C_h, [src, 48] | 
 | 	ldp	D_l, D_h, [src, 64]! | 
 | 	subs	count, count, 128 + 16	/* Test and readjust count.  */ | 
 | 	b.ls	2f | 
 | 1: | 
 | 	stp	A_l, A_h, [dst, 16] | 
 | 	ldp	A_l, A_h, [src, 16] | 
 | 	stp	B_l, B_h, [dst, 32] | 
 | 	ldp	B_l, B_h, [src, 32] | 
 | 	stp	C_l, C_h, [dst, 48] | 
 | 	ldp	C_l, C_h, [src, 48] | 
 | 	stp	D_l, D_h, [dst, 64]! | 
 | 	ldp	D_l, D_h, [src, 64]! | 
 | 	subs	count, count, 64 | 
 | 	b.hi	1b | 
 |  | 
 | 	/* Write the last full set of 64 bytes.	 The remainder is at most 64 | 
 | 	   bytes, so it is safe to always copy 64 bytes from the end even if | 
 | 	   there is just 1 byte left.  */ | 
 | 2: | 
 | 	ldp	E_l, E_h, [srcend, -64] | 
 | 	stp	A_l, A_h, [dst, 16] | 
 | 	ldp	A_l, A_h, [srcend, -48] | 
 | 	stp	B_l, B_h, [dst, 32] | 
 | 	ldp	B_l, B_h, [srcend, -32] | 
 | 	stp	C_l, C_h, [dst, 48] | 
 | 	ldp	C_l, C_h, [srcend, -16] | 
 | 	stp	D_l, D_h, [dst, 64] | 
 | 	stp	E_l, E_h, [dstend, -64] | 
 | 	stp	A_l, A_h, [dstend, -48] | 
 | 	stp	B_l, B_h, [dstend, -32] | 
 | 	stp	C_l, C_h, [dstend, -16] | 
 | 	ret |