Add optimized stpcpy.
Change-Id: Ifed38f92a54fef4488fd0ed26a1569059a054574
diff --git a/libc/arch-arm64/generic/bionic/strcpy.S b/libc/arch-arm64/generic/bionic/strcpy.S
index b15e06d..260c321 100644
--- a/libc/arch-arm64/generic/bionic/strcpy.S
+++ b/libc/arch-arm64/generic/bionic/strcpy.S
@@ -25,169 +25,5 @@
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
-/* Copyright (c) 2014, Linaro Limited
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name of the Linaro nor the
- names of its contributors may be used to endorse or promote products
- derived from this software without specific prior written permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-/* Assumptions:
- *
- * ARMv8-a, AArch64
- */
-
-#include <private/bionic_asm.h>
-
-/* Arguments and results. */
-#define dstin x0
-#define src x1
-
-/* Locals and temporaries. */
-#define dst x2
-#define data1 x3
-#define data1_w w3
-#define data2 x4
-#define data2_w w4
-#define has_nul1 x5
-#define has_nul1_w w5
-#define has_nul2 x6
-#define tmp1 x7
-#define tmp2 x8
-#define tmp3 x9
-#define tmp4 x10
-#define zeroones x11
-#define zeroones_w w11
-#define pos x12
-
-#define REP8_01 0x0101010101010101
-#define REP8_7f 0x7f7f7f7f7f7f7f7f
-#define REP8_80 0x8080808080808080
-
-ENTRY(strcpy)
- mov zeroones, #REP8_01
- mov dst, dstin
- ands tmp1, src, #15
- b.ne .Lmisaligned
- // NUL detection works on the principle that (X - 1) & (~X) & 0x80
- // (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
- // can be done in parallel across the entire word.
- // The inner loop deals with two Dwords at a time. This has a
- // slightly higher start-up cost, but we should win quite quickly,
- // especially on cores with a high number of issue slots per
- // cycle, as we get much better parallelism out of the operations.
-.Lloop:
- ldp data1, data2, [src], #16
- sub tmp1, data1, zeroones
- orr tmp2, data1, #REP8_7f
- bic has_nul1, tmp1, tmp2
- cbnz has_nul1, .Lnul_in_data1
- sub tmp3, data2, zeroones
- orr tmp4, data2, #REP8_7f
- bic has_nul2, tmp3, tmp4
- cbnz has_nul2, .Lnul_in_data2
- // No NUL in either register, copy it in a single instruction.
- stp data1, data2, [dst], #16
- b .Lloop
-
-.Lnul_in_data1:
- rev has_nul1, has_nul1
- clz pos, has_nul1
- add tmp1, pos, #0x8
-
- tbz tmp1, #6, 1f
- str data1, [dst]
- ret
-1:
- tbz tmp1, #5, 1f
- str data1_w, [dst], #4
- lsr data1, data1, #32
-1:
- tbz tmp1, #4, 1f
- strh data1_w, [dst], #2
- lsr data1, data1, #16
-1:
- tbz tmp1, #3, 1f
- strb data1_w, [dst]
-1:
- ret
-
-.Lnul_in_data2:
- str data1, [dst], #8
- rev has_nul2, has_nul2
- clz pos, has_nul2
- add tmp1, pos, #0x8
-
- tbz tmp1, #6, 1f
- str data2, [dst]
- ret
-1:
- tbz tmp1, #5, 1f
- str data2_w, [dst], #4
- lsr data2, data2, #32
-1:
- tbz tmp1, #4, 1f
- strh data2_w, [dst], #2
- lsr data2, data2, #16
-1:
- tbz tmp1, #3, 1f
- strb data2_w, [dst]
-1:
- ret
-
-.Lmisaligned:
- tbz src, #0, 1f
- ldrb data1_w, [src], #1
- strb data1_w, [dst], #1
- cbnz data1_w, 1f
- ret
-1:
- tbz src, #1, 1f
- ldrb data1_w, [src], #1
- strb data1_w, [dst], #1
- cbz data1_w, .Ldone
- ldrb data2_w, [src], #1
- strb data2_w, [dst], #1
- cbnz data2_w, 1f
-.Ldone:
- ret
-1:
- tbz src, #2, 1f
- ldr data1_w, [src], #4
- // Check for a zero.
- sub has_nul1_w, data1_w, zeroones_w
- bic has_nul1_w, has_nul1_w, data1_w
- ands has_nul1_w, has_nul1_w, #0x80808080
- b.ne .Lnul_in_data1
- str data1_w, [dst], #4
-1:
- tbz src, #3, .Lloop
- ldr data1, [src], #8
- // Check for a zero.
- sub tmp1, data1, zeroones
- orr tmp2, data1, #REP8_7f
- bics has_nul1, tmp1, tmp2
- b.ne .Lnul_in_data1
- str data1, [dst], #8
- b .Lloop
-END(strcpy)
+#define STRCPY
+#include "string_copy.S"