Add stack unwinding directives to memcpy.
Also include some Android specific header files.
Change-Id: Idbcbd43458ba945ca8c61bfbc04ea15fc0ae4e00
diff --git a/libc/arch-arm/bionic/memcpy.a15.S b/libc/arch-arm/bionic/memcpy.a15.S
index d1bfb7c..516e20c 100644
--- a/libc/arch-arm/bionic/memcpy.a15.S
+++ b/libc/arch-arm/bionic/memcpy.a15.S
@@ -26,12 +26,6 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#if (defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \
- (!(defined (__ARM_ARCH_7A__))))
-
- /* Do nothing here. See memcpy-stub.c in the same directory. */
-
-#else
/* Prototype: void *memcpy (void *dst, const void *src, size_t count). */
/* Use the version of memcpy implemented using LDRD and STRD.
@@ -50,16 +44,12 @@
destination register must be even and the second consecutive in
ARM state, but not in Thumb state. */
+#include <machine/cpu-features.h>
+#include <machine/asm.h>
+
.syntax unified
-#if defined (__thumb__)
- .thumb
- .thumb_func
-#endif
-
- .global memcpy
- .type memcpy, %function
-memcpy:
+ENTRY(memcpy)
/* Assumes that n >= 0, and dst, src are valid pointers.
If there is at least 8 bytes to copy, use LDRD/STRD.
@@ -69,12 +59,16 @@
When less than 8 left, copy a word and then byte by byte. */
/* Save registers (r0 holds the return value):
- optimized push {r0, r4, r5, lr}.
+ optimized push {r0, r4, r5, r6, r7, lr}.
To try and improve performance, stack layout changed,
i.e., not keeping the stack looking like users expect
(highest numbered register at highest address). */
- push {r0, lr}
- strd r4, r5, [sp, #-8]!
+ .save {r0, lr}
+ push {r0, lr}
+ .save {r4, r5}
+ strd r4, r5, [sp, #-8]!
+ .save {r6, r7}
+ strd r6, r7, [sp, #-8]!
/* TODO: Add debug frame directives.
We don't need exception unwind directives, because the code below
@@ -194,9 +188,11 @@
strbcs r5, [r0]
return:
- /* Restore registers: optimized pop {r0, r4, r5, pc} */
+ /* Restore registers: optimized pop {r0, r4, r5, r6, r7, pc} */
+ /* This is the only return point of memcpy. */
+ ldrd r6, r7, [sp], #8
ldrd r4, r5, [sp], #8
- pop {r0, pc} /* This is the only return point of memcpy. */
+ pop {r0, pc}
#ifndef __ARM_FEATURE_UNALIGNED
@@ -223,12 +219,6 @@
/* Get here if there is more than 8 bytes to copy.
The number of bytes to copy is r2+8, r2 >= 0. */
- /* Save registers: push { r6, r7 }.
- We need additional registers for LDRD and STRD, because in ARM state
- the first destination register must be even and the second
- consecutive. */
- strd r6, r7, [sp, #-8]!
-
subs r2, r2, #56
blt 4f /* Go to misaligned copy of less than 64 bytes. */
@@ -259,10 +249,6 @@
/* Restore the count if there is more than 7 bytes to copy. */
adds r2, r2, #56
- /* If less than 8 bytes to copy,
- restore registers saved for this loop: optimized poplt { r6, r7 }. */
- itt lt
- ldrdlt r6, r7, [sp], #8
blt 6f /* Go to misaligned copy of less than 8 bytes. */
5:
@@ -278,9 +264,6 @@
subs r2, r2, #8
bge 5b /* If there is more to copy. */
- /* Restore registers saved for this loop: optimized pop { r6, r7 }. */
- ldrd r6, r7, [sp], #8
-
6:
/* Get here if there less than 8 bytes to copy (-8 <= r2 < 0)
and they are misaligned. */
@@ -420,4 +403,4 @@
#endif /* not __ARM_FEATURE_UNALIGNED */
-#endif /* memcpy */
+END(memcpy)