Merge "Remove two -D flags for unused macros."
diff --git a/libc/arch-arm/cortex-a15/bionic/__strcat_chk.S b/libc/arch-arm/cortex-a15/bionic/__strcat_chk.S
index 08dc78a..4aaa9f1 100644
--- a/libc/arch-arm/cortex-a15/bionic/__strcat_chk.S
+++ b/libc/arch-arm/cortex-a15/bionic/__strcat_chk.S
@@ -41,13 +41,15 @@
     .cfi_startproc
     pld     [r0, #0]
     push    {r0, lr}
+    .save   {r0, lr}
     .cfi_def_cfa_offset 8
     .cfi_rel_offset r0, 0
     .cfi_rel_offset lr, 4
     push    {r4, r5}
+    .save   {r4, r5}
     .cfi_adjust_cfa_offset 8
     .cfi_rel_offset r4, 0
-    .cfi_rel_offset r5, 0
+    .cfi_rel_offset r5, 4
 
     mov     lr, r2
 
@@ -178,7 +180,7 @@
 .L_strlen_done:
     add     r2, r3, r4
     cmp     r2, lr
-    bgt     .L_fortify_check_failed
+    bhi     __strcat_chk_failed
 
     // Set up the registers for the memcpy code.
     mov     r1, r5
@@ -186,13 +188,23 @@
     mov     r2, r4
     add     r0, r0, r3
     pop     {r4, r5}
-    .cfi_adjust_cfa_offset -8
-    .cfi_restore r4
-    .cfi_restore r5
 
-    #include "memcpy_base.S"
+    .cfi_endproc
+END(__strcat_chk)
 
-.L_fortify_check_failed:
+#define MEMCPY_BASE         __strcat_chk_memcpy_base
+#define MEMCPY_BASE_ALIGNED __strcat_chk_memcpy_base_aligned
+
+#include "memcpy_base.S"
+
+ENTRY(__strcat_chk_failed)
+    .cfi_startproc
+    .save   {r0, lr}
+    .save   {r4, r5}
+
+    .cfi_def_cfa_offset 8
+    .cfi_rel_offset r0, 0
+    .cfi_rel_offset lr, 4
     .cfi_adjust_cfa_offset 8
     .cfi_rel_offset r4, 0
     .cfi_rel_offset r5, 4
@@ -208,7 +220,7 @@
     .word   error_string-(1b+4)
 
     .cfi_endproc
-END(__strcat_chk)
+END(__strcat_chk_failed)
 
     .data
 error_string:
diff --git a/libc/arch-arm/cortex-a15/bionic/__strcpy_chk.S b/libc/arch-arm/cortex-a15/bionic/__strcpy_chk.S
index 9fde590..05152e6 100644
--- a/libc/arch-arm/cortex-a15/bionic/__strcpy_chk.S
+++ b/libc/arch-arm/cortex-a15/bionic/__strcpy_chk.S
@@ -40,6 +40,7 @@
     .cfi_startproc
     pld     [r0, #0]
     push    {r0, lr}
+    .save   {r0, lr}
     .cfi_def_cfa_offset 8
     .cfi_rel_offset r0, 0
     .cfi_rel_offset lr, 4
@@ -150,14 +151,25 @@
     pld     [r1, #64]
     ldr     r0, [sp]
     cmp     r3, lr
-    bge     .L_fortify_check_failed
+    bhs     __strcpy_chk_failed
 
     // Add 1 for copy length to get the string terminator.
     add     r2, r3, #1
 
-    #include "memcpy_base.S"
+    .cfi_endproc
+END(__strcpy_chk)
 
-.L_fortify_check_failed:
+#define MEMCPY_BASE         __strcpy_chk_memcpy_base
+#define MEMCPY_BASE_ALIGNED __strcpy_chk_memcpy_base_aligned
+#include "memcpy_base.S"
+
+ENTRY(__strcpy_chk_failed)
+    .cfi_startproc
+    .save   {r0, lr}
+    .cfi_def_cfa_offset 8
+    .cfi_rel_offset r0, 0
+    .cfi_rel_offset lr, 4
+
     ldr     r0, error_message
     ldr     r1, error_code
 1:
@@ -169,7 +181,7 @@
     .word   error_string-(1b+4)
 
     .cfi_endproc
-END(__strcpy_chk)
+END(__strcpy_chk_failed)
 
     .data
 error_string:
diff --git a/libc/arch-arm/cortex-a15/bionic/memcpy.S b/libc/arch-arm/cortex-a15/bionic/memcpy.S
index 8052d62..a843230 100644
--- a/libc/arch-arm/cortex-a15/bionic/memcpy.S
+++ b/libc/arch-arm/cortex-a15/bionic/memcpy.S
@@ -65,7 +65,7 @@
 ENTRY(__memcpy_chk)
         .cfi_startproc
         cmp     r2, r3
-        bgt     __memcpy_chk_fail
+        bhi     __memcpy_chk_fail
 
         // Fall through to memcpy...
         .cfi_endproc
@@ -75,18 +75,23 @@
         .cfi_startproc
         pld     [r1, #64]
         push    {r0, lr}
+        .save   {r0, lr}
         .cfi_def_cfa_offset 8
         .cfi_rel_offset r0, 0
         .cfi_rel_offset lr, 4
 
-        #include "memcpy_base.S"
         .cfi_endproc
 END(memcpy)
 
+#define MEMCPY_BASE         __memcpy_base
+#define MEMCPY_BASE_ALIGNED __memcpy_base_aligned
+#include "memcpy_base.S"
+
+ENTRY(__memcpy_chk_fail)
         .cfi_startproc
-__memcpy_chk_fail:
         // Preserve lr for backtrace.
         push    {lr}
+        .save   {lr}
         .cfi_def_cfa_offset 4
         .cfi_rel_offset lr, 0
 
@@ -100,6 +105,7 @@
 error_message:
         .word   error_string-(1b+8)
         .cfi_endproc
+END(__memcpy_chk_fail)
 
         .data
 error_string:
diff --git a/libc/arch-arm/cortex-a15/bionic/memcpy_base.S b/libc/arch-arm/cortex-a15/bionic/memcpy_base.S
index 647e065..0154676 100644
--- a/libc/arch-arm/cortex-a15/bionic/memcpy_base.S
+++ b/libc/arch-arm/cortex-a15/bionic/memcpy_base.S
@@ -53,6 +53,13 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+ENTRY(MEMCPY_BASE)
+        .cfi_startproc
+        .save   {r0, lr}
+        .cfi_def_cfa_offset 8
+        .cfi_rel_offset r0, 0
+        .cfi_rel_offset lr, 4
+
         // Assumes that n >= 0, and dst, src are valid pointers.
         // For any sizes less than 832 use the neon code that doesn't
         // care about the src alignment. This avoids any checks
@@ -162,20 +169,34 @@
         ands    r3, r3, #0x3
         bne     .L_copy_unknown_alignment
 
+        .cfi_endproc
+END(MEMCPY_BASE)
+
+ENTRY(MEMCPY_BASE_ALIGNED)
+        .cfi_startproc
+        .save   {r0, lr}
+        .cfi_def_cfa_offset 8
+        .cfi_rel_offset r0, 0
+        .cfi_rel_offset lr, 4
+
         // To try and improve performance, stack layout changed,
         // i.e., not keeping the stack looking like users expect
         // (highest numbered register at highest address).
-        // TODO: Add debug frame directives.
-        // We don't need exception unwind directives, because the code below
-        // does not throw any exceptions and does not call any other functions.
-        // Generally, newlib functions like this lack debug information for
-        // assembler source.
-        .save   {r4, r5}
         strd    r4, r5, [sp, #-8]!
-        .save   {r6, r7}
+        .save   {r4, r5}
+        .cfi_adjust_cfa_offset 8
+        .cfi_rel_offset r4, 0
+        .cfi_rel_offset r5, 4
         strd    r6, r7, [sp, #-8]!
-        .save   {r8, r9}
+        .save   {r6, r7}
+        .cfi_adjust_cfa_offset 8
+        .cfi_rel_offset r6, 0
+        .cfi_rel_offset r7, 0
         strd    r8, r9, [sp, #-8]!
+        .save   {r8, r9}
+        .cfi_adjust_cfa_offset 8
+        .cfi_rel_offset r8, 0
+        .cfi_rel_offset r9, 4
 
         // Optimized for already aligned dst code.
         ands    ip, r0, #3
@@ -301,3 +322,6 @@
 
         // Src is guaranteed to be at least word aligned by this point.
         b       .L_word_aligned
+
+        .cfi_endproc
+END(MEMCPY_BASE_ALIGNED)
diff --git a/libc/arch-arm/cortex-a15/bionic/memset.S b/libc/arch-arm/cortex-a15/bionic/memset.S
index 5593be6..b5fc6ba 100644
--- a/libc/arch-arm/cortex-a15/bionic/memset.S
+++ b/libc/arch-arm/cortex-a15/bionic/memset.S
@@ -45,6 +45,7 @@
         bls         .L_done
 
         // Preserve lr for backtrace.
+        .save       {lr}
         push        {lr}
         .cfi_def_cfa_offset 4
         .cfi_rel_offset lr, 0
@@ -73,6 +74,7 @@
 
 ENTRY(memset)
         .cfi_startproc
+        .save       {r0}
         stmfd       sp!, {r0}
         .cfi_def_cfa_offset 4
         .cfi_rel_offset r0, 0
diff --git a/libc/arch-arm/cortex-a15/bionic/strcmp.S b/libc/arch-arm/cortex-a15/bionic/strcmp.S
index 7aff7c4..13b329f 100644
--- a/libc/arch-arm/cortex-a15/bionic/strcmp.S
+++ b/libc/arch-arm/cortex-a15/bionic/strcmp.S
@@ -123,8 +123,13 @@
         .macro  init
         /* Macro to save temporary registers and prepare magic values.  */
         subs    sp, sp, #16
+        .cfi_def_cfa_offset 16
         strd    r4, r5, [sp, #8]
+        .cfi_rel_offset r4, 0
+        .cfi_rel_offset r5, 4
         strd    r6, r7, [sp]
+        .cfi_rel_offset r6, 8
+        .cfi_rel_offset r7, 12
         mvn     r6, #0  /* all F */
         mov     r7, #0  /* all 0 */
         .endm   /* init */
@@ -165,18 +170,20 @@
 #endif /* not  __ARMEB__ */
         .endm /* setup_return */
 
+        .cfi_startproc
         pld [r0, #0]
         pld [r1, #0]
 
         /* Are both strings double-word aligned?  */
         orr     ip, r0, r1
         tst     ip, #7
-        bne     do_align
+        bne     .L_do_align
 
         /* Fast path.  */
+        .save   {r4-r7}
         init
 
-doubleword_aligned:
+.L_doubleword_aligned:
 
         /* Get here when the strings to compare are double-word aligned.  */
         /* Compare two words in every iteration.  */
@@ -189,14 +196,14 @@
         ldrd    r2, r3, [r0], #8
         ldrd    r4, r5, [r1], #8
 
-        magic_compare_and_branch w1=r2, w2=r4, label=return_24
-        magic_compare_and_branch w1=r3, w2=r5, label=return_35
+        magic_compare_and_branch w1=r2, w2=r4, label=.L_return_24
+        magic_compare_and_branch w1=r3, w2=r5, label=.L_return_35
         b       2b
 
-do_align:
+.L_do_align:
         /* Is the first string word-aligned?  */
         ands    ip, r0, #3
-        beq     word_aligned_r0
+        beq     .L_word_aligned_r0
 
         /* Fast compare byte by byte until the first string is word-aligned.  */
         /* The offset of r0 from a word boundary is in ip. Thus, the number of bytes
@@ -204,58 +211,58 @@
         bic     r0, r0, #3
         ldr     r2, [r0], #4
         lsls    ip, ip, #31
-        beq     byte2
-        bcs     byte3
+        beq     .L_byte2
+        bcs     .L_byte3
 
-byte1:
+.L_byte1:
         ldrb    ip, [r1], #1
         uxtb    r3, r2, ror #BYTE1_OFFSET
         subs    ip, r3, ip
-        bne     fast_return
-        m_cbz   reg=r3, label=fast_return
+        bne     .L_fast_return
+        m_cbz   reg=r3, label=.L_fast_return
 
-byte2:
+.L_byte2:
         ldrb    ip, [r1], #1
         uxtb    r3, r2, ror #BYTE2_OFFSET
         subs    ip, r3, ip
-        bne     fast_return
-        m_cbz   reg=r3, label=fast_return
+        bne     .L_fast_return
+        m_cbz   reg=r3, label=.L_fast_return
 
-byte3:
+.L_byte3:
         ldrb    ip, [r1], #1
         uxtb    r3, r2, ror #BYTE3_OFFSET
         subs    ip, r3, ip
-        bne     fast_return
-        m_cbnz  reg=r3, label=word_aligned_r0
+        bne     .L_fast_return
+        m_cbnz  reg=r3, label=.L_word_aligned_r0
 
-fast_return:
+.L_fast_return:
         mov     r0, ip
         bx      lr
 
-word_aligned_r0:
+.L_word_aligned_r0:
         init
         /* The first string is word-aligned.  */
         /* Is the second string word-aligned?  */
         ands    ip, r1, #3
-        bne     strcmp_unaligned
+        bne     .L_strcmp_unaligned
 
-word_aligned:
+.L_word_aligned:
         /* The strings are word-aligned. */
         /* Is the first string double-word aligned?  */
         tst     r0, #4
-        beq     doubleword_aligned_r0
+        beq     .L_doubleword_aligned_r0
 
         /* If r0 is not double-word aligned yet, align it by loading
         and comparing the next word from each string.  */
         ldr     r2, [r0], #4
         ldr     r4, [r1], #4
-        magic_compare_and_branch w1=r2 w2=r4 label=return_24
+        magic_compare_and_branch w1=r2 w2=r4 label=.L_return_24
 
-doubleword_aligned_r0:
+.L_doubleword_aligned_r0:
         /* Get here when r0 is double-word aligned.  */
         /* Is r1 doubleword_aligned?  */
         tst     r1, #4
-        beq     doubleword_aligned
+        beq     .L_doubleword_aligned
 
         /* Get here when the strings to compare are word-aligned,
         r0 is double-word aligned, but r1 is not double-word aligned.  */
@@ -271,9 +278,9 @@
 
         /* Load the next double-word from each string and compare.  */
         ldrd    r2, r3, [r0], #8
-        magic_compare_and_branch w1=r2 w2=r5 label=return_25
+        magic_compare_and_branch w1=r2 w2=r5 label=.L_return_25
         ldrd    r4, r5, [r1], #8
-        magic_compare_and_branch w1=r3 w2=r4 label=return_34
+        magic_compare_and_branch w1=r3 w2=r4 label=.L_return_34
         b       3b
 
         .macro miscmp_word offsetlo offsethi
@@ -297,47 +304,47 @@
         and     r2, r3, r6, S2LOMEM #\offsetlo
         it      eq
         cmpeq   r2, r5
-        bne     return_25
+        bne     .L_return_25
         ldr     r5, [r1], #4
         cmp     ip, #0
         eor r3, r2, r3
         S2HIMEM r2, r5, #\offsethi
         it      eq
         cmpeq   r3, r2
-        bne     return_32
+        bne     .L_return_32
         b       7b
         .endm /* miscmp_word */
 
-strcmp_unaligned:
+.L_strcmp_unaligned:
         /* r0 is word-aligned, r1 is at offset ip from a word.  */
         /* Align r1 to the (previous) word-boundary.  */
         bic     r1, r1, #3
 
         /* Unaligned comparison word by word using LDRs. */
         cmp     ip, #2
-        beq     miscmp_word_16                    /* If ip == 2.  */
-        bge     miscmp_word_24                    /* If ip == 3.  */
+        beq     .L_miscmp_word_16                 /* If ip == 2.  */
+        bge     .L_miscmp_word_24                 /* If ip == 3.  */
         miscmp_word offsetlo=8 offsethi=24        /* If ip == 1.  */
-miscmp_word_16:  miscmp_word offsetlo=16 offsethi=16
-miscmp_word_24:  miscmp_word offsetlo=24 offsethi=8
+.L_miscmp_word_16:  miscmp_word offsetlo=16 offsethi=16
+.L_miscmp_word_24:  miscmp_word offsetlo=24 offsethi=8
 
 
-return_32:
+.L_return_32:
         setup_return w1=r3, w2=r2
-        b       do_return
-return_34:
+        b       .L_do_return
+.L_return_34:
         setup_return w1=r3, w2=r4
-        b       do_return
-return_25:
+        b       .L_do_return
+.L_return_25:
         setup_return w1=r2, w2=r5
-        b       do_return
-return_35:
+        b       .L_do_return
+.L_return_35:
         setup_return w1=r3, w2=r5
-        b       do_return
-return_24:
+        b       .L_do_return
+.L_return_24:
         setup_return w1=r2, w2=r4
 
-do_return:
+.L_do_return:
 
 #ifdef __ARMEB__
         mov     r0, ip
@@ -349,11 +356,16 @@
         ldrd    r6, r7, [sp]
         ldrd    r4, r5, [sp, #8]
         adds    sp, sp, #16
+        .cfi_def_cfa_offset 0
+        .cfi_restore r4
+        .cfi_restore r5
+        .cfi_restore r6
+        .cfi_restore r7
 
         /* There is a zero or a different byte between r1 and r2.  */
         /* r0 contains a mask of all-zero bytes in r1.  */
         /* Using r0 and not ip here because cbz requires low register.  */
-        m_cbz   reg=r0, label=compute_return_value
+        m_cbz   reg=r0, label=.L_compute_return_value
         clz     r0, r0
         /* r0 contains the number of bits on the left of the first all-zero byte in r1.  */
         rsb     r0, r0, #24
@@ -361,7 +373,7 @@
         lsr     r1, r1, r0
         lsr     r2, r2, r0
 
-compute_return_value:
+.L_compute_return_value:
         movs    r0, #1
         cmp     r1, r2
         /* The return value is computed as follows.
@@ -374,4 +386,5 @@
         it      ls
         sbcls   r0, r0, r0
         bx      lr
+        .cfi_endproc
 END(strcmp)
diff --git a/libc/arch-arm/cortex-a9/bionic/__strcat_chk.S b/libc/arch-arm/cortex-a9/bionic/__strcat_chk.S
index 3f86636..78cf19a 100644
--- a/libc/arch-arm/cortex-a9/bionic/__strcat_chk.S
+++ b/libc/arch-arm/cortex-a9/bionic/__strcat_chk.S
@@ -41,10 +41,12 @@
     .cfi_startproc
     pld     [r0, #0]
     push    {r0, lr}
+    .save   {r0, lr}
     .cfi_def_cfa_offset 8
     .cfi_rel_offset r0, 0
     .cfi_rel_offset lr, 4
     push    {r4, r5}
+    .save   {r4, r5}
     .cfi_adjust_cfa_offset 8
     .cfi_rel_offset r4, 0
     .cfi_rel_offset r5, 4
@@ -181,7 +183,7 @@
 .L_strlen_done:
     add     r2, r3, r4
     cmp     r2, lr
-    bgt     .L_fortify_check_failed
+    bhi     __strcat_chk_fail
 
     // Set up the registers for the memcpy code.
     mov     r1, r5
@@ -189,13 +191,23 @@
     mov     r2, r4
     add     r0, r0, r3
     pop     {r4, r5}
-    .cfi_adjust_cfa_offset -8
-    .cfi_restore r4
-    .cfi_restore r5
 
-    #include "memcpy_base.S"
+    // Fall through into the memcpy_base function.
+    .cfi_endproc
+END(__strcat_chk)
 
-.L_fortify_check_failed:
+#define MEMCPY_BASE         __strcat_chk_memcpy_base
+#define MEMCPY_BASE_ALIGNED __strcat_chk_memcpy_base_aligned
+#include "memcpy_base.S"
+
+ENTRY(__strcat_chk_fail)
+    .cfi_startproc
+
+    .save   {r0, lr}
+    .save   {r4, r5}
+    .cfi_def_cfa_offset 8
+    .cfi_rel_offset r0, 0
+    .cfi_rel_offset lr, 4
     .cfi_adjust_cfa_offset 8
     .cfi_rel_offset r4, 0
     .cfi_rel_offset r5, 4
@@ -211,7 +223,7 @@
     .word   error_string-(1b+4)
 
     .cfi_endproc
-END(__strcat_chk)
+END(__strcat_chk_fail)
 
     .data
 error_string:
diff --git a/libc/arch-arm/cortex-a9/bionic/__strcpy_chk.S b/libc/arch-arm/cortex-a9/bionic/__strcpy_chk.S
index 787b057..d0acf1e 100644
--- a/libc/arch-arm/cortex-a9/bionic/__strcpy_chk.S
+++ b/libc/arch-arm/cortex-a9/bionic/__strcpy_chk.S
@@ -40,6 +40,7 @@
     .cfi_startproc
     pld     [r0, #0]
     push    {r0, lr}
+    .save   {r0, lr}
     .cfi_def_cfa_offset 8
     .cfi_rel_offset r0, 0
     .cfi_rel_offset lr, 4
@@ -152,26 +153,41 @@
     pld     [r1, #64]
     ldr     r0, [sp]
     cmp     r3, lr
-    bge     .L_fortify_check_failed
+    bhs     __strcpy_chk_fail
 
     // Add 1 for copy length to get the string terminator.
     add     r2, r3, #1
 
-    #include "memcpy_base.S"
+    .cfi_endproc
 
-.L_fortify_check_failed:
+    // Fall through into the memcpy_base function.
+END(__strcpy_chk)
+
+#define MEMCPY_BASE         __strcpy_chk_memcpy_base
+#define MEMCPY_BASE_ALIGNED __strcpy_chk_memcpy_base_aligned
+#include "memcpy_base.S"
+
+ENTRY(__strcpy_chk_fail)
+    .cfi_startproc
+
+    .save   {r0, lr}
+    .cfi_def_cfa_offset 8
+    .cfi_rel_offset r0, 0
+    .cfi_rel_offset lr, 4
+
     ldr     r0, error_message
     ldr     r1, error_code
 1:
     add     r0, pc
     bl      __fortify_chk_fail
+
 error_code:
     .word   BIONIC_EVENT_STRCPY_BUFFER_OVERFLOW
 error_message:
     .word   error_string-(1b+4)
 
     .cfi_endproc
-END(__strcpy_chk)
+END(__strcpy_chk_fail)
 
     .data
 error_string:
diff --git a/libc/arch-arm/cortex-a9/bionic/memcpy.S b/libc/arch-arm/cortex-a9/bionic/memcpy.S
index e7beb25..5c4c428 100644
--- a/libc/arch-arm/cortex-a9/bionic/memcpy.S
+++ b/libc/arch-arm/cortex-a9/bionic/memcpy.S
@@ -43,7 +43,7 @@
 ENTRY(__memcpy_chk)
         .cfi_startproc
         cmp         r2, r3
-        bgt         __memcpy_chk_fail
+        bhi         __memcpy_chk_fail
 
         // Fall through to memcpy...
         .cfi_endproc
@@ -51,21 +51,27 @@
 
 ENTRY(memcpy)
         .cfi_startproc
+
         pld     [r1, #0]
         stmfd   sp!, {r0, lr}
+        .save   {r0, lr}
         .cfi_def_cfa_offset 8
         .cfi_rel_offset r0, 0
         .cfi_rel_offset lr, 4
         pld     [r1, #64]
 
-        #include "memcpy_base.S"
         .cfi_endproc
 END(memcpy)
 
+#define MEMCPY_BASE         __memcpy_base
+#define MEMCPY_BASE_ALIGNED __memcpy_base_aligned
+#include "memcpy_base.S"
+
+ENTRY(__memcpy_chk_fail)
         .cfi_startproc
-__memcpy_chk_fail:
         // Preserve lr for backtrace.
         push    {lr}
+        .save   {lr}
         .cfi_def_cfa_offset 4
         .cfi_rel_offset lr, 0
 
@@ -79,6 +85,7 @@
 error_message:
         .word   error_string-(1b+4)
         .cfi_endproc
+END(__memcpy_chk_fail)
 
         .data
 error_string:
diff --git a/libc/arch-arm/cortex-a9/bionic/memcpy_base.S b/libc/arch-arm/cortex-a9/bionic/memcpy_base.S
index 46b5a93..e8ff4f5 100644
--- a/libc/arch-arm/cortex-a9/bionic/memcpy_base.S
+++ b/libc/arch-arm/cortex-a9/bionic/memcpy_base.S
@@ -32,15 +32,21 @@
  * cache line.
  */
 
+ENTRY(MEMCPY_BASE)
+        .cfi_startproc
+        .save       {r0, lr}
+        .cfi_def_cfa_offset 8
+        .cfi_rel_offset r0, 0
+        .cfi_rel_offset lr, 4
+
         // Check so divider is at least 16 bytes, needed for alignment code.
         cmp         r2, #16
         blo         5f
 
-
         /* check if buffers are aligned. If so, run arm-only version */
         eor         r3, r0, r1
         ands        r3, r3, #0x3
-        beq         11f
+        beq         __memcpy_base_aligned
 
         /* Check the upper size limit for Neon unaligned memory access in memcpy */
         cmp         r2, #224
@@ -131,9 +137,27 @@
 
         ldmfd       sp!, {r0, lr}
         bx          lr
-11:
+
+        .cfi_endproc
+END(MEMCPY_BASE)
+
+ENTRY(MEMCPY_BASE_ALIGNED)
+        .cfi_startproc
+
+        .save       {r0, lr}
+        .cfi_def_cfa_offset 8
+        .cfi_rel_offset r0, 0
+        .cfi_rel_offset lr, 4
+
         /* Simple arm-only copy loop to handle aligned copy operations */
-        stmfd       sp!, {r4, r5, r6, r7, r8}
+        stmfd       sp!, {r4-r8}
+        .save       {r4-r8}
+        .cfi_adjust_cfa_offset 20
+        .cfi_rel_offset r4, 0
+        .cfi_rel_offset r5, 4
+        .cfi_rel_offset r6, 8
+        .cfi_rel_offset r7, 12
+        .cfi_rel_offset r8, 16
         pld         [r1, #(32 * 4)]
 
         /* Check alignment */
@@ -202,5 +226,8 @@
         ldrbne      r3, [r1]                /*  last byte  */
         strbne      r3, [r0]
 6:
-        ldmfd       sp!, {r4, r5, r6, r7, r8}
+        ldmfd       sp!, {r4-r8}
         ldmfd       sp!, {r0, pc}
+
+        .cfi_endproc
+END(MEMCPY_BASE_ALIGNED)
diff --git a/libc/arch-arm/cortex-a9/bionic/memset.S b/libc/arch-arm/cortex-a9/bionic/memset.S
index bc25a3e..87d2c08 100644
--- a/libc/arch-arm/cortex-a9/bionic/memset.S
+++ b/libc/arch-arm/cortex-a9/bionic/memset.S
@@ -44,6 +44,7 @@
 
         // Preserve lr for backtrace.
         push        {lr}
+        .save       {lr}
         .cfi_def_cfa_offset 4
         .cfi_rel_offset lr, 0
 
@@ -73,11 +74,13 @@
 /* memset() returns its first argument.  */
 ENTRY(memset)
         .cfi_startproc
+
         # The neon memset only wins for less than 132.
         cmp         r2, #132
-        bhi         11f
+        bhi         __memset_large_copy
 
         stmfd       sp!, {r0}
+        .save       {r0}
         .cfi_def_cfa_offset 4
         .cfi_rel_offset r0, 0
 
@@ -112,12 +115,18 @@
         strcsb      r1, [r0], #1
         ldmfd       sp!, {r0}
         bx          lr
-11:
+
+        .cfi_endproc
+END(memset)
+
+ENTRY(__memset_large_copy)
+        .cfi_startproc
+
         /* compute the offset to align the destination
          * offset = (4-(src&3))&3 = -src & 3
          */
-
         stmfd       sp!, {r0, r4-r7, lr}
+        .save       {r0, r4-r7, lr}
         .cfi_def_cfa_offset 24
         .cfi_rel_offset r0, 0
         .cfi_rel_offset r4, 4
@@ -188,7 +197,7 @@
         ldmfd       sp!, {r0, r4-r7, lr}
         bx          lr
         .cfi_endproc
-END(memset)
+END(__memset_large_copy)
 
         .data
 error_string:
diff --git a/libc/arch-arm/cortex-a9/bionic/strcmp.S b/libc/arch-arm/cortex-a9/bionic/strcmp.S
index 9597d0d..232df75 100644
--- a/libc/arch-arm/cortex-a9/bionic/strcmp.S
+++ b/libc/arch-arm/cortex-a9/bionic/strcmp.S
@@ -123,8 +123,13 @@
         .macro  init
         /* Macro to save temporary registers and prepare magic values.  */
         subs    sp, sp, #16
+        .cfi_def_cfa_offset 16
         strd    r4, r5, [sp, #8]
+        .cfi_rel_offset r4, 0
+        .cfi_rel_offset r5, 4
         strd    r6, r7, [sp]
+        .cfi_rel_offset r6, 8
+        .cfi_rel_offset r7, 12
         mvn     r6, #0  /* all F */
         mov     r7, #0  /* all 0 */
         .endm   /* init */
@@ -165,18 +170,20 @@
 #endif /* not  __ARMEB__ */
         .endm /* setup_return */
 
+        .cfi_startproc
         pld [r0, #0]
         pld [r1, #0]
 
         /* Are both strings double-word aligned?  */
         orr     ip, r0, r1
         tst     ip, #7
-        bne     do_align
+        bne     .L_do_align
 
         /* Fast path.  */
+        .save   {r4-r7}
         init
 
-doubleword_aligned:
+.L_doubleword_aligned:
 
         /* Get here when the strings to compare are double-word aligned.  */
         /* Compare two words in every iteration.  */
@@ -189,14 +196,14 @@
         ldrd    r2, r3, [r0], #8
         ldrd    r4, r5, [r1], #8
 
-        magic_compare_and_branch w1=r2, w2=r4, label=return_24
-        magic_compare_and_branch w1=r3, w2=r5, label=return_35
+        magic_compare_and_branch w1=r2, w2=r4, label=.L_return_24
+        magic_compare_and_branch w1=r3, w2=r5, label=.L_return_35
         b       2b
 
-do_align:
+.L_do_align:
         /* Is the first string word-aligned?  */
         ands    ip, r0, #3
-        beq     word_aligned_r0
+        beq     .L_word_aligned_r0
 
         /* Fast compare byte by byte until the first string is word-aligned.  */
         /* The offset of r0 from a word boundary is in ip. Thus, the number of bytes
@@ -204,58 +211,58 @@
         bic     r0, r0, #3
         ldr     r2, [r0], #4
         lsls    ip, ip, #31
-        beq     byte2
-        bcs     byte3
+        beq     .L_byte2
+        bcs     .L_byte3
 
-byte1:
+.L_byte1:
         ldrb    ip, [r1], #1
         uxtb    r3, r2, ror #BYTE1_OFFSET
         subs    ip, r3, ip
-        bne     fast_return
-        m_cbz   reg=r3, label=fast_return
+        bne     .L_fast_return
+        m_cbz   reg=r3, label=.L_fast_return
 
-byte2:
+.L_byte2:
         ldrb    ip, [r1], #1
         uxtb    r3, r2, ror #BYTE2_OFFSET
         subs    ip, r3, ip
-        bne     fast_return
-        m_cbz   reg=r3, label=fast_return
+        bne     .L_fast_return
+        m_cbz   reg=r3, label=.L_fast_return
 
-byte3:
+.L_byte3:
         ldrb    ip, [r1], #1
         uxtb    r3, r2, ror #BYTE3_OFFSET
         subs    ip, r3, ip
-        bne     fast_return
-        m_cbnz  reg=r3, label=word_aligned_r0
+        bne     .L_fast_return
+        m_cbnz  reg=r3, label=.L_word_aligned_r0
 
-fast_return:
+.L_fast_return:
         mov     r0, ip
         bx      lr
 
-word_aligned_r0:
+.L_word_aligned_r0:
         init
         /* The first string is word-aligned.  */
         /* Is the second string word-aligned?  */
         ands    ip, r1, #3
-        bne     strcmp_unaligned
+        bne     .L_strcmp_unaligned
 
-word_aligned:
+.L_word_aligned:
         /* The strings are word-aligned. */
         /* Is the first string double-word aligned?  */
         tst     r0, #4
-        beq     doubleword_aligned_r0
+        beq     .L_doubleword_aligned_r0
 
         /* If r0 is not double-word aligned yet, align it by loading
         and comparing the next word from each string.  */
         ldr     r2, [r0], #4
         ldr     r4, [r1], #4
-        magic_compare_and_branch w1=r2 w2=r4 label=return_24
+        magic_compare_and_branch w1=r2 w2=r4 label=.L_return_24
 
-doubleword_aligned_r0:
+.L_doubleword_aligned_r0:
         /* Get here when r0 is double-word aligned.  */
         /* Is r1 doubleword_aligned?  */
         tst     r1, #4
-        beq     doubleword_aligned
+        beq     .L_doubleword_aligned
 
         /* Get here when the strings to compare are word-aligned,
         r0 is double-word aligned, but r1 is not double-word aligned.  */
@@ -271,9 +278,9 @@
 
         /* Load the next double-word from each string and compare.  */
         ldrd    r2, r3, [r0], #8
-        magic_compare_and_branch w1=r2 w2=r5 label=return_25
+        magic_compare_and_branch w1=r2 w2=r5 label=.L_return_25
         ldrd    r4, r5, [r1], #8
-        magic_compare_and_branch w1=r3 w2=r4 label=return_34
+        magic_compare_and_branch w1=r3 w2=r4 label=.L_return_34
         b       3b
 
         .macro miscmp_word offsetlo offsethi
@@ -297,33 +304,33 @@
         and     r2, r3, r6, S2LOMEM #\offsetlo
         it      eq
         cmpeq   r2, r5
-        bne     return_25
+        bne     .L_return_25
         ldr     r5, [r1], #4
         cmp     ip, #0
         eor r3, r2, r3
         S2HIMEM r2, r5, #\offsethi
         it      eq
         cmpeq   r3, r2
-        bne     return_32
+        bne     .L_return_32
         b       7b
         .endm /* miscmp_word */
 
-return_32:
+.L_return_32:
         setup_return w1=r3, w2=r2
-        b       do_return
-return_34:
+        b       .L_do_return
+.L_return_34:
         setup_return w1=r3, w2=r4
-        b       do_return
-return_25:
+        b       .L_do_return
+.L_return_25:
         setup_return w1=r2, w2=r5
-        b       do_return
-return_35:
+        b       .L_do_return
+.L_return_35:
         setup_return w1=r3, w2=r5
-        b       do_return
-return_24:
+        b       .L_do_return
+.L_return_24:
         setup_return w1=r2, w2=r4
 
-do_return:
+.L_do_return:
 
 #ifdef __ARMEB__
         mov     r0, ip
@@ -335,11 +342,16 @@
         ldrd    r6, r7, [sp]
         ldrd    r4, r5, [sp, #8]
         adds    sp, sp, #16
+        .cfi_def_cfa_offset 0
+        .cfi_restore r4
+        .cfi_restore r5
+        .cfi_restore r6
+        .cfi_restore r7
 
         /* There is a zero or a different byte between r1 and r2.  */
         /* r0 contains a mask of all-zero bytes in r1.  */
         /* Using r0 and not ip here because cbz requires low register.  */
-        m_cbz   reg=r0, label=compute_return_value
+        m_cbz   reg=r0, label=.L_compute_return_value
         clz     r0, r0
         /* r0 contains the number of bits on the left of the first all-zero byte in r1.  */
         rsb     r0, r0, #24
@@ -347,7 +359,7 @@
         lsr     r1, r1, r0
         lsr     r2, r2, r0
 
-compute_return_value:
+.L_compute_return_value:
         movs    r0, #1
         cmp     r1, r2
         /* The return value is computed as follows.
@@ -367,7 +379,7 @@
      * bionic/libc/arch-arm/cortex-a15/bionic/strcmp.S for the unedited
      * version of the code.
      */
-strcmp_unaligned:
+.L_strcmp_unaligned:
 	wp1 .req r0
 	wp2 .req r1
 	b1  .req r2
@@ -520,6 +532,11 @@
     ldrd    r6, r7, [sp]
     ldrd    r4, r5, [sp, #8]
     adds    sp, sp, #16
+    .cfi_def_cfa_offset 0
+    .cfi_restore r4
+    .cfi_restore r5
+    .cfi_restore r6
+    .cfi_restore r7
 
 	bx	lr
 
@@ -541,4 +558,5 @@
     adds    sp, sp, #16
 
 	bx	lr
+    .cfi_endproc
 END(strcmp)
diff --git a/libc/arch-arm/krait/bionic/__strcat_chk.S b/libc/arch-arm/krait/bionic/__strcat_chk.S
index 4516d30..956b461 100644
--- a/libc/arch-arm/krait/bionic/__strcat_chk.S
+++ b/libc/arch-arm/krait/bionic/__strcat_chk.S
@@ -41,10 +41,12 @@
     .cfi_startproc
     pld     [r0, #0]
     push    {r0, lr}
+    .save   {r0, lr}
     .cfi_def_cfa_offset 8
     .cfi_rel_offset r0, 0
     .cfi_rel_offset lr, 4
     push    {r4, r5}
+    .save   {r4, r5}
     .cfi_adjust_cfa_offset 8
     .cfi_rel_offset r4, 0
     .cfi_rel_offset r5, 4
@@ -178,7 +180,7 @@
 .L_strlen_done:
     add     r2, r3, r4
     cmp     r2, lr
-    bgt     .L_fortify_check_failed
+    bhi     __strcat_chk_failed
 
     // Set up the registers for the memcpy code.
     mov     r1, r5
@@ -186,13 +188,21 @@
     mov     r2, r4
     add     r0, r0, r3
     pop     {r4, r5}
-    .cfi_adjust_cfa_offset -8
-    .cfi_restore r4
-    .cfi_restore r5
 
-    #include "memcpy_base.S"
+    .cfi_endproc
+END(__strcat_chk)
 
-.L_fortify_check_failed:
+#define MEMCPY_BASE         __strcat_chk_memcpy_base
+#define MEMCPY_BASE_ALIGNED __strcat_chk_memcpy_base_aligned
+#include "memcpy_base.S"
+
+ENTRY(__strcat_chk_failed)
+    .cfi_startproc
+    .save   {r0, lr}
+    .save   {r4, r5}
+    .cfi_def_cfa_offset 8
+    .cfi_rel_offset r0, 0
+    .cfi_rel_offset lr, 4
     .cfi_adjust_cfa_offset 8
     .cfi_rel_offset r4, 0
     .cfi_rel_offset r5, 4
@@ -208,7 +218,7 @@
     .word   error_string-(1b+4)
 
     .cfi_endproc
-END(__strcat_chk)
+END(__strcat_chk_failed)
 
     .data
 error_string:
diff --git a/libc/arch-arm/krait/bionic/__strcpy_chk.S b/libc/arch-arm/krait/bionic/__strcpy_chk.S
index c57268c..402cac6 100644
--- a/libc/arch-arm/krait/bionic/__strcpy_chk.S
+++ b/libc/arch-arm/krait/bionic/__strcpy_chk.S
@@ -40,6 +40,7 @@
     .cfi_startproc
     pld     [r0, #0]
     push    {r0, lr}
+    .save   {r0, lr}
     .cfi_def_cfa_offset 8
     .cfi_rel_offset r0, 0
     .cfi_rel_offset lr, 4
@@ -150,14 +151,25 @@
     pld     [r1, #64]
     ldr     r0, [sp]
     cmp     r3, lr
-    bge     .L_fortify_check_failed
+    bhs     __strcpy_chk_failed
 
     // Add 1 for copy length to get the string terminator.
     add     r2, r3, #1
 
-    #include "memcpy_base.S"
+    .cfi_endproc
+END(__strcpy_chk)
 
-.L_fortify_check_failed:
+#define MEMCPY_BASE         __strcpy_chk_memcpy_base
+#define MEMCPY_BASE_ALIGNED __strcpy_chk_memcpy_base_aligned
+#include "memcpy_base.S"
+
+ENTRY(__strcpy_chk_failed)
+    .cfi_startproc
+    .save   {r0, lr}
+    .cfi_def_cfa_offset 8
+    .cfi_rel_offset r0, 0
+    .cfi_rel_offset lr, 4
+
     ldr     r0, error_message
     ldr     r1, error_code
 1:
@@ -168,7 +180,7 @@
 error_message:
     .word   error_string-(1b+4)
     .cfi_endproc
-END(__strcpy_chk)
+END(__strcpy_chk_failed)
 
     .data
 error_string:
diff --git a/libc/arch-arm/krait/bionic/memcpy.S b/libc/arch-arm/krait/bionic/memcpy.S
index 75b2395..ea040bc 100644
--- a/libc/arch-arm/krait/bionic/memcpy.S
+++ b/libc/arch-arm/krait/bionic/memcpy.S
@@ -46,7 +46,7 @@
 ENTRY(__memcpy_chk)
         .cfi_startproc
         cmp         r2, r3
-        bgt         __memcpy_chk_fail
+        bhi         __memcpy_chk_fail
 
         // Fall through to memcpy...
         .cfi_endproc
@@ -56,18 +56,22 @@
         .cfi_startproc
         pld     [r1, #64]
         stmfd   sp!, {r0, lr}
+        .save   {r0, lr}
         .cfi_def_cfa_offset 8
         .cfi_rel_offset r0, 0
         .cfi_rel_offset lr, 4
-
-        #include "memcpy_base.S"
         .cfi_endproc
 END(memcpy)
 
+#define MEMCPY_BASE         __memcpy_base
+#define MEMCPY_BASE_ALIGNED __memcpy_base_aligned
+#include "memcpy_base.S"
+
+ENTRY(__memcpy_chk_fail)
         .cfi_startproc
-__memcpy_chk_fail:
         // Preserve lr for backtrace.
         push    {lr}
+        .save   {lr}
         .cfi_def_cfa_offset 4
         .cfi_rel_offset lr, 0
 
@@ -81,6 +85,7 @@
 error_message:
         .word   error_string-(1b+4)
         .cfi_endproc
+END(__memcpy_chk_fail)
 
         .data
 error_string:
diff --git a/libc/arch-arm/krait/bionic/memcpy_base.S b/libc/arch-arm/krait/bionic/memcpy_base.S
index 48ce477..d87a542 100644
--- a/libc/arch-arm/krait/bionic/memcpy_base.S
+++ b/libc/arch-arm/krait/bionic/memcpy_base.S
@@ -35,6 +35,13 @@
 
 // Assumes neon instructions and a cache line size of 32 bytes.
 
+ENTRY(MEMCPY_BASE)
+        .cfi_startproc
+        .save {r0, lr}
+        .cfi_def_cfa_offset 8
+        .cfi_rel_offset r0, 0
+        .cfi_rel_offset lr, 4
+
         /* do we have at least 16-bytes to copy (needed for alignment below) */
         cmp         r2, #16
         blo         5f
@@ -115,3 +122,6 @@
 
         ldmfd       sp!, {r0, lr}
         bx          lr
+
+        .cfi_endproc
+END(MEMCPY_BASE)
diff --git a/libc/arch-arm/krait/bionic/memset.S b/libc/arch-arm/krait/bionic/memset.S
index 1566132..005dfd8 100644
--- a/libc/arch-arm/krait/bionic/memset.S
+++ b/libc/arch-arm/krait/bionic/memset.S
@@ -44,6 +44,7 @@
         bls         .L_done
 
         // Preserve lr for backtrace.
+        .save       {lr}
         push        {lr}
         .cfi_def_cfa_offset 4
         .cfi_rel_offset lr, 0
@@ -74,6 +75,7 @@
 /* memset() returns its first argument.  */
 ENTRY(memset)
         .cfi_startproc
+        .save       {r0}
         stmfd       sp!, {r0}
         .cfi_def_cfa_offset 4
         .cfi_rel_offset r0, 0
diff --git a/libc/arch-arm/krait/bionic/strcmp.S b/libc/arch-arm/krait/bionic/strcmp.S
index d614b9d..d4cf3f4 100644
--- a/libc/arch-arm/krait/bionic/strcmp.S
+++ b/libc/arch-arm/krait/bionic/strcmp.S
@@ -123,8 +123,13 @@
         .macro  init
         /* Macro to save temporary registers and prepare magic values.  */
         subs    sp, sp, #16
+        .cfi_def_cfa_offset 16
         strd    r4, r5, [sp, #8]
+        .cfi_rel_offset r4, 0
+        .cfi_rel_offset r5, 4
         strd    r6, r7, [sp]
+        .cfi_rel_offset r6, 8
+        .cfi_rel_offset r7, 12
         mvn     r6, #0  /* all F */
         mov     r7, #0  /* all 0 */
         .endm   /* init */
@@ -165,18 +170,20 @@
 #endif /* not  __ARMEB__ */
         .endm /* setup_return */
 
+        .cfi_startproc
         pld [r0, #0]
         pld [r1, #0]
 
         /* Are both strings double-word aligned?  */
         orr     ip, r0, r1
         tst     ip, #7
-        bne     do_align
+        bne     .L_do_align
 
         /* Fast path.  */
+        .save   {r4-r7}
         init
 
-doubleword_aligned:
+.L_doubleword_aligned:
 
         /* Get here when the strings to compare are double-word aligned.  */
         /* Compare two words in every iteration.  */
@@ -189,14 +196,14 @@
         ldrd    r2, r3, [r0], #8
         ldrd    r4, r5, [r1], #8
 
-        magic_compare_and_branch w1=r2, w2=r4, label=return_24
-        magic_compare_and_branch w1=r3, w2=r5, label=return_35
+        magic_compare_and_branch w1=r2, w2=r4, label=.L_return_24
+        magic_compare_and_branch w1=r3, w2=r5, label=.L_return_35
         b       2b
 
-do_align:
+.L_do_align:
         /* Is the first string word-aligned?  */
         ands    ip, r0, #3
-        beq     word_aligned_r0
+        beq     .L_word_aligned_r0
 
         /* Fast compare byte by byte until the first string is word-aligned.  */
         /* The offset of r0 from a word boundary is in ip. Thus, the number of bytes
@@ -204,58 +211,58 @@
         bic     r0, r0, #3
         ldr     r2, [r0], #4
         lsls    ip, ip, #31
-        beq     byte2
-        bcs     byte3
+        beq     .L_byte2
+        bcs     .L_byte3
 
-byte1:
+.L_byte1:
         ldrb    ip, [r1], #1
         uxtb    r3, r2, ror #BYTE1_OFFSET
         subs    ip, r3, ip
-        bne     fast_return
-        m_cbz   reg=r3, label=fast_return
+        bne     .L_fast_return
+        m_cbz   reg=r3, label=.L_fast_return
 
-byte2:
+.L_byte2:
         ldrb    ip, [r1], #1
         uxtb    r3, r2, ror #BYTE2_OFFSET
         subs    ip, r3, ip
-        bne     fast_return
-        m_cbz   reg=r3, label=fast_return
+        bne     .L_fast_return
+        m_cbz   reg=r3, label=.L_fast_return
 
-byte3:
+.L_byte3:
         ldrb    ip, [r1], #1
         uxtb    r3, r2, ror #BYTE3_OFFSET
         subs    ip, r3, ip
-        bne     fast_return
-        m_cbnz  reg=r3, label=word_aligned_r0
+        bne     .L_fast_return
+        m_cbnz  reg=r3, label=.L_word_aligned_r0
 
-fast_return:
+.L_fast_return:
         mov     r0, ip
         bx      lr
 
-word_aligned_r0:
+.L_word_aligned_r0:
         init
         /* The first string is word-aligned.  */
         /* Is the second string word-aligned?  */
         ands    ip, r1, #3
-        bne     strcmp_unaligned
+        bne     .L_strcmp_unaligned
 
-word_aligned:
+.L_word_aligned:
         /* The strings are word-aligned. */
         /* Is the first string double-word aligned?  */
         tst     r0, #4
-        beq     doubleword_aligned_r0
+        beq     .L_doubleword_aligned_r0
 
         /* If r0 is not double-word aligned yet, align it by loading
         and comparing the next word from each string.  */
         ldr     r2, [r0], #4
         ldr     r4, [r1], #4
-        magic_compare_and_branch w1=r2 w2=r4 label=return_24
+        magic_compare_and_branch w1=r2 w2=r4 label=.L_return_24
 
-doubleword_aligned_r0:
+.L_doubleword_aligned_r0:
         /* Get here when r0 is double-word aligned.  */
         /* Is r1 doubleword_aligned?  */
         tst     r1, #4
-        beq     doubleword_aligned
+        beq     .L_doubleword_aligned
 
         /* Get here when the strings to compare are word-aligned,
         r0 is double-word aligned, but r1 is not double-word aligned.  */
@@ -271,9 +278,9 @@
 
         /* Load the next double-word from each string and compare.  */
         ldrd    r2, r3, [r0], #8
-        magic_compare_and_branch w1=r2 w2=r5 label=return_25
+        magic_compare_and_branch w1=r2 w2=r5 label=.L_return_25
         ldrd    r4, r5, [r1], #8
-        magic_compare_and_branch w1=r3 w2=r4 label=return_34
+        magic_compare_and_branch w1=r3 w2=r4 label=.L_return_34
         b       3b
 
         .macro miscmp_word offsetlo offsethi
@@ -297,46 +304,46 @@
         and     r2, r3, r6, S2LOMEM #\offsetlo
         it      eq
         cmpeq   r2, r5
-        bne     return_25
+        bne     .L_return_25
         ldr     r5, [r1], #4
         cmp     ip, #0
         eor r3, r2, r3
         S2HIMEM r2, r5, #\offsethi
         it      eq
         cmpeq   r3, r2
-        bne     return_32
+        bne     .L_return_32
         b       7b
         .endm /* miscmp_word */
 
-strcmp_unaligned:
+.L_strcmp_unaligned:
         /* r0 is word-aligned, r1 is at offset ip from a word.  */
         /* Align r1 to the (previous) word-boundary.  */
         bic     r1, r1, #3
 
         /* Unaligned comparison word by word using LDRs. */
         cmp     ip, #2
-        beq     miscmp_word_16                    /* If ip == 2.  */
-        bge     miscmp_word_24                    /* If ip == 3.  */
+        beq     .L_miscmp_word_16                 /* If ip == 2.  */
+        bge     .L_miscmp_word_24                 /* If ip == 3.  */
         miscmp_word offsetlo=8 offsethi=24        /* If ip == 1.  */
-miscmp_word_24:  miscmp_word offsetlo=24 offsethi=8
+.L_miscmp_word_24:  miscmp_word offsetlo=24 offsethi=8
 
 
-return_32:
+.L_return_32:
         setup_return w1=r3, w2=r2
-        b       do_return
-return_34:
+        b       .L_do_return
+.L_return_34:
         setup_return w1=r3, w2=r4
-        b       do_return
-return_25:
+        b       .L_do_return
+.L_return_25:
         setup_return w1=r2, w2=r5
-        b       do_return
-return_35:
+        b       .L_do_return
+.L_return_35:
         setup_return w1=r3, w2=r5
-        b       do_return
-return_24:
+        b       .L_do_return
+.L_return_24:
         setup_return w1=r2, w2=r4
 
-do_return:
+.L_do_return:
 
 #ifdef __ARMEB__
         mov     r0, ip
@@ -348,11 +355,16 @@
         ldrd    r6, r7, [sp]
         ldrd    r4, r5, [sp, #8]
         adds    sp, sp, #16
+        .cfi_def_cfa_offset 0
+        .cfi_restore r4
+        .cfi_restore r5
+        .cfi_restore r6
+        .cfi_restore r7
 
         /* There is a zero or a different byte between r1 and r2.  */
         /* r0 contains a mask of all-zero bytes in r1.  */
         /* Using r0 and not ip here because cbz requires low register.  */
-        m_cbz   reg=r0, label=compute_return_value
+        m_cbz   reg=r0, label=.L_compute_return_value
         clz     r0, r0
         /* r0 contains the number of bits on the left of the first all-zero byte in r1.  */
         rsb     r0, r0, #24
@@ -360,7 +372,7 @@
         lsr     r1, r1, r0
         lsr     r2, r2, r0
 
-compute_return_value:
+.L_compute_return_value:
         movs    r0, #1
         cmp     r1, r2
         /* The return value is computed as follows.
@@ -380,7 +392,7 @@
      * previous version. See bionic/libc/arch-arm/cortex-a15/bionic/strcmp.S
      * for the unedited version of this code.
      */
-miscmp_word_16:
+.L_miscmp_word_16:
 	wp1 .req r0
 	wp2 .req r1
 	b1  .req r2
@@ -453,6 +465,11 @@
     ldrd    r6, r7, [sp]
     ldrd    r4, r5, [sp, #8]
     adds    sp, sp, #16
+    .cfi_def_cfa_offset 0
+    .cfi_restore r4
+    .cfi_restore r5
+    .cfi_restore r6
+    .cfi_restore r7
 
 	bx	lr
 
@@ -472,6 +489,12 @@
     ldrd    r6, r7, [sp]
     ldrd    r4, r5, [sp, #8]
     adds    sp, sp, #16
+    .cfi_def_cfa_offset 0
+    .cfi_restore r4
+    .cfi_restore r5
+    .cfi_restore r6
+    .cfi_restore r7
 
 	bx	lr
+    .cfi_endproc
 END(strcmp)
diff --git a/tests/fortify_test.cpp b/tests/fortify_test.cpp
index aa13736..5ec15b8 100644
--- a/tests/fortify_test.cpp
+++ b/tests/fortify_test.cpp
@@ -717,3 +717,58 @@
   ASSERT_EQ('\0', dst[13]);
   ASSERT_EQ('\0', dst[14]);
 }
+
+TEST(TEST_NAME, strcat_chk_max_int_size) {
+  char buf[10];
+  memset(buf, 'A', sizeof(buf));
+  buf[0] = 'a';
+  buf[1] = '\0';
+  char* res = __strcat_chk(buf, "01234567", (size_t)-1);
+  ASSERT_EQ(buf, res);
+  ASSERT_EQ('a',  buf[0]);
+  ASSERT_EQ('0',  buf[1]);
+  ASSERT_EQ('1',  buf[2]);
+  ASSERT_EQ('2',  buf[3]);
+  ASSERT_EQ('3',  buf[4]);
+  ASSERT_EQ('4',  buf[5]);
+  ASSERT_EQ('5',  buf[6]);
+  ASSERT_EQ('6',  buf[7]);
+  ASSERT_EQ('7',  buf[8]);
+  ASSERT_EQ('\0', buf[9]);
+}
+
+extern "C" char* __strcpy_chk(char*, const char*, size_t);
+
+TEST(TEST_NAME, strcpy_chk_max_int_size) {
+  char buf[10];
+  char* res = __strcpy_chk(buf, "012345678", (size_t)-1);
+  ASSERT_EQ(buf, res);
+  ASSERT_EQ('0',  buf[0]);
+  ASSERT_EQ('1',  buf[1]);
+  ASSERT_EQ('2',  buf[2]);
+  ASSERT_EQ('3',  buf[3]);
+  ASSERT_EQ('4',  buf[4]);
+  ASSERT_EQ('5',  buf[5]);
+  ASSERT_EQ('6',  buf[6]);
+  ASSERT_EQ('7',  buf[7]);
+  ASSERT_EQ('8',  buf[8]);
+  ASSERT_EQ('\0', buf[9]);
+}
+
+extern "C" void* __memcpy_chk(void*, const void*, size_t, size_t);
+
+TEST(TEST_NAME, memcpy_chk_max_int_size) {
+  char buf[10];
+  void* res = __memcpy_chk(buf, "012345678", sizeof(buf), (size_t)-1);
+  ASSERT_EQ((void*)buf, res);
+  ASSERT_EQ('0',  buf[0]);
+  ASSERT_EQ('1',  buf[1]);
+  ASSERT_EQ('2',  buf[2]);
+  ASSERT_EQ('3',  buf[3]);
+  ASSERT_EQ('4',  buf[4]);
+  ASSERT_EQ('5',  buf[5]);
+  ASSERT_EQ('6',  buf[6]);
+  ASSERT_EQ('7',  buf[7]);
+  ASSERT_EQ('8',  buf[8]);
+  ASSERT_EQ('\0', buf[9]);
+}