Merge "pwd/grp: fix pwd _r reentrancy, new tests, clean up"
diff --git a/libc/Android.bp b/libc/Android.bp
index 5554f28..57d4039 100644
--- a/libc/Android.bp
+++ b/libc/Android.bp
@@ -790,13 +790,11 @@
                 "arch-arm/cortex-a9/bionic/memset.S",
                 "arch-arm/cortex-a9/bionic/stpcpy.S",
                 "arch-arm/cortex-a9/bionic/strcat.S",
-                "arch-arm/cortex-a9/bionic/strcmp.S",
                 "arch-arm/cortex-a9/bionic/strcpy.S",
                 "arch-arm/cortex-a9/bionic/strlen.S",
 
                 "arch-arm/krait/bionic/memcpy.S",
                 "arch-arm/krait/bionic/memset.S",
-                "arch-arm/krait/bionic/strcmp.S",
 
                 "arch-arm/cortex-a53/bionic/memcpy.S",
 
@@ -1584,12 +1582,6 @@
         "tz_version", // Version metadata for tzdata to help debugging.
     ],
 
-    // Leave the symbols in the shared library so that stack unwinders can produce
-    // meaningful name resolution.
-    strip: {
-        keep_symbols: true,
-    },
-
     // Do not pack libc.so relocations; see http://b/20645321 for details.
     pack_relocations: false,
 
@@ -1630,9 +1622,23 @@
             static: {
                 srcs: [":libc_sources_static_arm"],
             },
+
+            // Arm 32 bit does not produce complete exidx unwind information
+            // so keep the .debug_frame which is relatively small and does
+            // include needed unwind information.
+            // See b/132992102 for details.
+            strip: {
+                keep_symbols_and_debug_frame: true,
+            },
         },
         arm64: {
             version_script: ":libc.arm64.map",
+
+            // Leave the symbols in the shared library so that stack unwinders can produce
+            // meaningful name resolution.
+            strip: {
+                keep_symbols: true,
+            },
         },
         x86: {
             // TODO: This is to work around b/24465209. Remove after root cause is fixed.
@@ -1640,9 +1646,21 @@
             ldflags: ["-Wl,--hash-style=both"],
 
             version_script: ":libc.x86.map",
+
+            // Leave the symbols in the shared library so that stack unwinders can produce
+            // meaningful name resolution.
+            strip: {
+                keep_symbols: true,
+            },
         },
         x86_64: {
             version_script: ":libc.x86_64.map",
+
+            // Leave the symbols in the shared library so that stack unwinders can produce
+            // meaningful name resolution.
+            strip: {
+                keep_symbols: true,
+            },
         },
     },
 
diff --git a/libc/NOTICE b/libc/NOTICE
index 298901f..ef31733 100644
--- a/libc/NOTICE
+++ b/libc/NOTICE
@@ -5330,6 +5330,34 @@
 
 -------------------------------------------------------------------
 
+Copyright (c) 2012-2014 ARM Ltd
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+3. The name of the company may not be used to endorse or promote
+   products derived from this software without specific prior written
+   permission.
+
+THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+-------------------------------------------------------------------
+
 Copyright (c) 2013
      MIPS Technologies, Inc., California.
 
diff --git a/libc/arch-arm/cortex-a15/bionic/strcmp.S b/libc/arch-arm/cortex-a15/bionic/strcmp.S
index 58dbf17..467201b 100644
--- a/libc/arch-arm/cortex-a15/bionic/strcmp.S
+++ b/libc/arch-arm/cortex-a15/bionic/strcmp.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013 ARM Ltd
+ * Copyright (c) 2012-2014 ARM Ltd
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -29,9 +29,9 @@
 #include <private/bionic_asm.h>
 
 #ifdef __ARMEB__
-#define S2LOMEM lsl
-#define S2LOMEMEQ lsleq
-#define S2HIMEM lsr
+#define S2LO lsl
+#define S2LOEQ lsleq
+#define S2HI lsr
 #define MSB 0x000000ff
 #define LSB 0xff000000
 #define BYTE0_OFFSET 24
@@ -39,9 +39,9 @@
 #define BYTE2_OFFSET 8
 #define BYTE3_OFFSET 0
 #else /* not  __ARMEB__ */
-#define S2LOMEM lsr
-#define S2LOMEMEQ lsreq
-#define S2HIMEM lsl
+#define S2LO lsr
+#define S2LOEQ lsreq
+#define S2HI lsl
 #define BYTE0_OFFSET 0
 #define BYTE1_OFFSET 8
 #define BYTE2_OFFSET 16
@@ -50,330 +50,446 @@
 #define LSB 0x000000ff
 #endif /* not  __ARMEB__ */
 
-.syntax         unified
+/* Parameters and result.  */
+#define src1		r0
+#define src2		r1
+#define result		r0	/* Overlaps src1.  */
 
-#if defined (__thumb__)
-        .thumb
-        .thumb_func
-#endif
+/* Internal variables.  */
+#define tmp1		r4
+#define tmp2		r5
+#define const_m1	r12
+
+/* Additional internal variables for 64-bit aligned data.  */
+#define data1a		r2
+#define data1b		r3
+#define data2a		r6
+#define data2b		r7
+#define syndrome_a	tmp1
+#define syndrome_b	tmp2
+
+/* Additional internal variables for 32-bit aligned data.  */
+#define data1		r2
+#define data2		r3
+#define syndrome	tmp2
+
+	/* Implementation of strcmp for ARMv7 when DSP instructions are
+	   available.  Use ldrd to support wider loads, provided the data
+	   is sufficiently aligned.  Use saturating arithmetic to optimize
+	   the compares.  */
+
+	/* Build Options:
+	   STRCMP_NO_PRECHECK: Don't run a quick pre-check of the first
+	   byte in the string.  If comparing completely random strings
+	   the pre-check will save time, since there is a very high
+	   probability of a mismatch in the first character: we save
+	   significant overhead if this is the common case.  However,
+	   if strings are likely to be identical (eg because we're
+	   verifying a hit in a hash table), then this check is largely
+	   redundant.  */
+
+
+.syntax         unified
+.thumb
 
         // To avoid warning about deprecated instructions, add an explicit
         // arch. The code generated is exactly the same.
         .arch armv7-a
 
+	/* Macro to compute and return the result value for word-aligned
+	   cases.  */
+	.macro strcmp_epilogue_aligned synd d1 d2 restore_r6
+#ifdef __ARM_BIG_ENDIAN
+	/* If data1 contains a zero byte, then syndrome will contain a 1 in
+	   bit 7 of that byte.  Otherwise, the highest set bit in the
+	   syndrome will highlight the first different bit.  It is therefore
+	   sufficient to extract the eight bits starting with the syndrome
+	   bit.  */
+	clz	tmp1, \synd
+	lsl	r1, \d2, tmp1
+	.if \restore_r6
+	ldrd	r6, r7, [sp, #8]
+	.endif
+	.cfi_restore 6
+	.cfi_restore 7
+	lsl	\d1, \d1, tmp1
+	.cfi_remember_state
+	lsr	result, \d1, #24
+	ldrd	r4, r5, [sp], #16
+	.cfi_restore 4
+	.cfi_restore 5
+	sub	result, result, r1, lsr #24
+	bx	lr
+#else
+	/* To use the big-endian trick we'd have to reverse all three words.
+	   that's slower than this approach.  */
+	rev	\synd, \synd
+	clz	tmp1, \synd
+	bic	tmp1, tmp1, #7
+	lsr	r1, \d2, tmp1
+	.cfi_remember_state
+	.if \restore_r6
+	ldrd	r6, r7, [sp, #8]
+	.endif
+	.cfi_restore 6
+	.cfi_restore 7
+	lsr	\d1, \d1, tmp1
+	and	result, \d1, #255
+	and	r1, r1, #255
+	ldrd	r4, r5, [sp], #16
+	.cfi_restore 4
+	.cfi_restore 5
+	sub	result, result, r1
+
+	bx	lr
+#endif
+	.endm
+
+	.text
+	.p2align	5
+.Lstrcmp_start_addr:
+#ifndef STRCMP_NO_PRECHECK
+.Lfastpath_exit:
+	sub	r0, r2, r3
+	bx	lr
+	nop
+#endif
+
 ENTRY(strcmp_a15)
-      /* Use LDRD whenever possible.  */
+#ifndef STRCMP_NO_PRECHECK
+	ldrb	r2, [src1]
+	ldrb	r3, [src2]
+	cmp	r2, #1
+	it	cs
+	cmpcs	r2, r3
+	bne	.Lfastpath_exit
+#endif
+	.cfi_sections .debug_frame
+	strd	r4, r5, [sp, #-16]!
+	.cfi_def_cfa_offset 16
+	.cfi_offset 4, -16
+	.cfi_offset 5, -12
+	orr	tmp1, src1, src2
+	strd	r6, r7, [sp, #8]
+	.cfi_offset 6, -8
+	.cfi_offset 7, -4
+	mvn	const_m1, #0
+	lsl	r2, tmp1, #29
+	cbz	r2, .Lloop_aligned8
 
-/* The main thing to look out for when comparing large blocks is that
-   the loads do not cross a page boundary when loading past the index
-   of the byte with the first difference or the first string-terminator.
+.Lnot_aligned:
+	eor	tmp1, src1, src2
+	tst	tmp1, #7
+	bne	.Lmisaligned8
 
-   For example, if the strings are identical and the string-terminator
-   is at index k, byte by byte comparison will not load beyond address
-   s1+k and s2+k; word by word comparison may load up to 3 bytes beyond
-   k; double word - up to 7 bytes.  If the load of these bytes crosses
-   a page boundary, it might cause a memory fault (if the page is not mapped)
-   that would not have happened in byte by byte comparison.
+	/* Deal with mutual misalignment by aligning downwards and then
+	   masking off the unwanted loaded data to prevent a difference.  */
+	and	tmp1, src1, #7
+	bic	src1, src1, #7
+	and	tmp2, tmp1, #3
+	bic	src2, src2, #7
+	lsl	tmp2, tmp2, #3	/* Bytes -> bits.  */
+	ldrd	data1a, data1b, [src1], #16
+	tst	tmp1, #4
+	ldrd	data2a, data2b, [src2], #16
+	/* In thumb code we can't use MVN with a register shift, but
+	   we do have ORN.  */
+	S2HI	tmp1, const_m1, tmp2
+	orn	data1a, data1a, tmp1
+	orn	data2a, data2a, tmp1
+	beq	.Lstart_realigned8
+	orn	data1b, data1b, tmp1
+	mov	data1a, const_m1
+	orn	data2b, data2b, tmp1
+	mov	data2a, const_m1
+	b	.Lstart_realigned8
 
-   If an address is (double) word aligned, then a load of a (double) word
-   from that address will not cross a page boundary.
-   Therefore, the algorithm below considers word and double-word alignment
-   of strings separately.  */
+	/* Unwind the inner loop by a factor of 2, giving 16 bytes per
+	   pass.  */
+	.p2align 5,,12  /* Don't start in the tail bytes of a cache line.  */
+	.p2align 2	/* Always word aligned.  */
+.Lloop_aligned8:
+	ldrd	data1a, data1b, [src1], #16
+	ldrd	data2a, data2b, [src2], #16
+.Lstart_realigned8:
+	uadd8	syndrome_b, data1a, const_m1	/* Only want GE bits,  */
+	eor	syndrome_a, data1a, data2a
+	sel	syndrome_a, syndrome_a, const_m1
+	cbnz	syndrome_a, .Ldiff_in_a
+	uadd8	syndrome_b, data1b, const_m1	/* Only want GE bits.  */
+	eor	syndrome_b, data1b, data2b
+	sel	syndrome_b, syndrome_b, const_m1
+	cbnz	syndrome_b, .Ldiff_in_b
 
-/* High-level description of the algorithm.
+	ldrd	data1a, data1b, [src1, #-8]
+	ldrd	data2a, data2b, [src2, #-8]
+	uadd8	syndrome_b, data1a, const_m1	/* Only want GE bits,  */
+	eor	syndrome_a, data1a, data2a
+	sel	syndrome_a, syndrome_a, const_m1
+	uadd8	syndrome_b, data1b, const_m1	/* Only want GE bits.  */
+	eor	syndrome_b, data1b, data2b
+	sel	syndrome_b, syndrome_b, const_m1
+	/* Can't use CBZ for backwards branch.  */
+	orrs	syndrome_b, syndrome_b, syndrome_a /* Only need if s_a == 0 */
+	beq	.Lloop_aligned8
 
-   * The fast path: if both strings are double-word aligned,
-     use LDRD to load two words from each string in every loop iteration.
-   * If the strings have the same offset from a word boundary,
-     use LDRB to load and compare byte by byte until
-     the first string is aligned to a word boundary (at most 3 bytes).
-     This is optimized for quick return on short unaligned strings.
-   * If the strings have the same offset from a double-word boundary,
-     use LDRD to load two words from each string in every loop iteration, as in the fast path.
-   * If the strings do not have the same offset from a double-word boundary,
-     load a word from the second string before the loop to initialize the queue.
-     Use LDRD to load two words from every string in every loop iteration.
-     Inside the loop, load the second word from the second string only after comparing
-     the first word, using the queued value, to guarantee safety across page boundaries.
-   * If the strings do not have the same offset from a word boundary,
-     use LDR and a shift queue. Order of loads and comparisons matters,
-     similarly to the previous case.
+.Ldiff_found:
+	cbnz	syndrome_a, .Ldiff_in_a
 
-   * Use UADD8 and SEL to compare words, and use REV and CLZ to compute the return value.
-   * The only difference between ARM and Thumb modes is the use of CBZ instruction.
-   * The only difference between big and little endian is the use of REV in little endian
-     to compute the return value, instead of MOV.
-*/
+.Ldiff_in_b:
+	strcmp_epilogue_aligned syndrome_b, data1b, data2b 1
 
-        .macro m_cbz reg label
-#ifdef __thumb2__
-        cbz     \reg, \label
-#else   /* not defined __thumb2__ */
-        cmp     \reg, #0
-        beq     \label
-#endif /* not defined __thumb2__ */
-        .endm /* m_cbz */
+.Ldiff_in_a:
+	.cfi_restore_state
+	strcmp_epilogue_aligned syndrome_a, data1a, data2a 1
 
-        .macro m_cbnz reg label
-#ifdef __thumb2__
-        cbnz    \reg, \label
-#else   /* not defined __thumb2__ */
-        cmp     \reg, #0
-        bne     \label
-#endif /* not defined __thumb2__ */
-        .endm /* m_cbnz */
+	.cfi_restore_state
+.Lmisaligned8:
+	tst	tmp1, #3
+	bne	.Lmisaligned4
+	ands	tmp1, src1, #3
+	bne	.Lmutual_align4
 
-        .macro  init
-        /* Macro to save temporary registers and prepare magic values.  */
-        subs    sp, sp, #16
-        .cfi_def_cfa_offset 16
-        strd    r4, r5, [sp, #8]
-        .cfi_rel_offset r4, 0
-        .cfi_rel_offset r5, 4
-        strd    r6, r7, [sp]
-        .cfi_rel_offset r6, 8
-        .cfi_rel_offset r7, 12
-        mvn     r6, #0  /* all F */
-        mov     r7, #0  /* all 0 */
-        .endm   /* init */
+	/* Unrolled by a factor of 2, to reduce the number of post-increment
+	   operations.  */
+.Lloop_aligned4:
+	ldr	data1, [src1], #8
+	ldr	data2, [src2], #8
+.Lstart_realigned4:
+	uadd8	syndrome, data1, const_m1	/* Only need GE bits.  */
+	eor	syndrome, data1, data2
+	sel	syndrome, syndrome, const_m1
+	cbnz	syndrome, .Laligned4_done
+	ldr	data1, [src1, #-4]
+	ldr	data2, [src2, #-4]
+	uadd8	syndrome, data1, const_m1
+	eor	syndrome, data1, data2
+	sel	syndrome, syndrome, const_m1
+	cmp	syndrome, #0
+	beq	.Lloop_aligned4
 
-        .macro  magic_compare_and_branch w1 w2 label
-        /* Macro to compare registers w1 and w2 and conditionally branch to label.  */
-        cmp     \w1, \w2        /* Are w1 and w2 the same?  */
-        magic_find_zero_bytes \w1
-        it      eq
-        cmpeq   ip, #0          /* Is there a zero byte in w1?  */
-        bne     \label
-        .endm /* magic_compare_and_branch */
+.Laligned4_done:
+	strcmp_epilogue_aligned syndrome, data1, data2, 0
 
-        .macro  magic_find_zero_bytes w1
-        /* Macro to find all-zero bytes in w1, result is in ip.  */
-        uadd8   ip, \w1, r6
-        sel     ip, r7, r6
-        .endm /* magic_find_zero_bytes */
+.Lmutual_align4:
+	.cfi_restore_state
+	/* Deal with mutual misalignment by aligning downwards and then
+	   masking off the unwanted loaded data to prevent a difference.  */
+	lsl	tmp1, tmp1, #3	/* Bytes -> bits.  */
+	bic	src1, src1, #3
+	ldr	data1, [src1], #8
+	bic	src2, src2, #3
+	ldr	data2, [src2], #8
 
-        .macro  setup_return w1 w2
-#ifdef __ARMEB__
-        mov     r1, \w1
-        mov     r2, \w2
-#else /* not  __ARMEB__ */
-        rev     r1, \w1
-        rev     r2, \w2
-#endif /* not  __ARMEB__ */
-        .endm /* setup_return */
+	/* In thumb code we can't use MVN with a register shift, but
+	   we do have ORN.  */
+	S2HI	tmp1, const_m1, tmp1
+	orn	data1, data1, tmp1
+	orn	data2, data2, tmp1
+	b	.Lstart_realigned4
 
-        pld [r0, #0]
-        pld [r1, #0]
+.Lmisaligned4:
+	ands	tmp1, src1, #3
+	beq	.Lsrc1_aligned
+	sub	src2, src2, tmp1
+	bic	src1, src1, #3
+	lsls	tmp1, tmp1, #31
+	ldr	data1, [src1], #4
+	beq	.Laligned_m2
+	bcs	.Laligned_m1
 
-        /* Are both strings double-word aligned?  */
-        orr     ip, r0, r1
-        tst     ip, #7
-        bne     .L_do_align
+#ifdef STRCMP_NO_PRECHECK
+	ldrb	data2, [src2, #1]
+	uxtb	tmp1, data1, ror #BYTE1_OFFSET
+	subs	tmp1, tmp1, data2
+	bne	.Lmisaligned_exit
+	cbz	data2, .Lmisaligned_exit
 
-        /* Fast path.  */
-        init
+.Laligned_m2:
+	ldrb	data2, [src2, #2]
+	uxtb	tmp1, data1, ror #BYTE2_OFFSET
+	subs	tmp1, tmp1, data2
+	bne	.Lmisaligned_exit
+	cbz	data2, .Lmisaligned_exit
 
-.L_doubleword_aligned:
+.Laligned_m1:
+	ldrb	data2, [src2, #3]
+	uxtb	tmp1, data1, ror #BYTE3_OFFSET
+	subs	tmp1, tmp1, data2
+	bne	.Lmisaligned_exit
+	add	src2, src2, #4
+	cbnz	data2, .Lsrc1_aligned
+#else  /* STRCMP_NO_PRECHECK */
+	/* If we've done the pre-check, then we don't need to check the
+	   first byte again here.  */
+	ldrb	data2, [src2, #2]
+	uxtb	tmp1, data1, ror #BYTE2_OFFSET
+	subs	tmp1, tmp1, data2
+	bne	.Lmisaligned_exit
+	cbz	data2, .Lmisaligned_exit
 
-        /* Get here when the strings to compare are double-word aligned.  */
-        /* Compare two words in every iteration.  */
-        .p2align        2
-2:
-        pld [r0, #16]
-        pld [r1, #16]
+.Laligned_m2:
+	ldrb	data2, [src2, #3]
+	uxtb	tmp1, data1, ror #BYTE3_OFFSET
+	subs	tmp1, tmp1, data2
+	bne	.Lmisaligned_exit
+	cbnz	data2, .Laligned_m1
+#endif
 
-        /* Load the next double-word from each string.  */
-        ldrd    r2, r3, [r0], #8
-        ldrd    r4, r5, [r1], #8
+.Lmisaligned_exit:
+	.cfi_remember_state
+	mov	result, tmp1
+	ldr	r4, [sp], #16
+	.cfi_restore 4
+	bx	lr
 
-        magic_compare_and_branch w1=r2, w2=r4, label=.L_return_24
-        magic_compare_and_branch w1=r3, w2=r5, label=.L_return_35
-        b       2b
+#ifndef STRCMP_NO_PRECHECK
+.Laligned_m1:
+	add	src2, src2, #4
+#endif
+.Lsrc1_aligned:
+	.cfi_restore_state
+	/* src1 is word aligned, but src2 has no common alignment
+	   with it.  */
+	ldr	data1, [src1], #4
+	lsls	tmp1, src2, #31		/* C=src2[1], Z=src2[0].  */
 
-.L_do_align:
-        /* Is the first string word-aligned?  */
-        ands    ip, r0, #3
-        beq     .L_word_aligned_r0
+	bic	src2, src2, #3
+	ldr	data2, [src2], #4
+	bhi	.Loverlap1		/* C=1, Z=0 => src2[1:0] = 0b11.  */
+	bcs	.Loverlap2		/* C=1, Z=1 => src2[1:0] = 0b10.  */
 
-        /* Fast compare byte by byte until the first string is word-aligned.  */
-        /* The offset of r0 from a word boundary is in ip. Thus, the number of bytes
-        to read until the next word boundary is 4-ip.  */
-        bic     r0, r0, #3
-        ldr     r2, [r0], #4
-        lsls    ip, ip, #31
-        beq     .L_byte2
-        bcs     .L_byte3
+	/* (overlap3) C=0, Z=0 => src2[1:0] = 0b01.  */
+.Loverlap3:
+	bic	tmp1, data1, #MSB
+	uadd8	syndrome, data1, const_m1
+	eors	syndrome, tmp1, data2, S2LO #8
+	sel	syndrome, syndrome, const_m1
+	bne	4f
+	cbnz	syndrome, 5f
+	ldr	data2, [src2], #4
+	eor	tmp1, tmp1, data1
+	cmp	tmp1, data2, S2HI #24
+	bne	6f
+	ldr	data1, [src1], #4
+	b	.Loverlap3
+4:
+	S2LO	data2, data2, #8
+	b	.Lstrcmp_tail
 
-.L_byte1:
-        ldrb    ip, [r1], #1
-        uxtb    r3, r2, ror #BYTE1_OFFSET
-        subs    ip, r3, ip
-        bne     .L_fast_return
-        m_cbz   reg=r3, label=.L_fast_return
+5:
+	bics	syndrome, syndrome, #MSB
+	bne	.Lstrcmp_done_equal
 
-.L_byte2:
-        ldrb    ip, [r1], #1
-        uxtb    r3, r2, ror #BYTE2_OFFSET
-        subs    ip, r3, ip
-        bne     .L_fast_return
-        m_cbz   reg=r3, label=.L_fast_return
+	/* We can only get here if the MSB of data1 contains 0, so
+	   fast-path the exit.  */
+	ldrb	result, [src2]
+	.cfi_remember_state
+	ldrd	r4, r5, [sp], #16
+	.cfi_restore 4
+	.cfi_restore 5
+	/* R6/7 Not used in this sequence.  */
+	.cfi_restore 6
+	.cfi_restore 7
+	neg	result, result
+	bx	lr
 
-.L_byte3:
-        ldrb    ip, [r1], #1
-        uxtb    r3, r2, ror #BYTE3_OFFSET
-        subs    ip, r3, ip
-        bne     .L_fast_return
-        m_cbnz  reg=r3, label=.L_word_aligned_r0
+6:
+	.cfi_restore_state
+	S2LO	data1, data1, #24
+	and	data2, data2, #LSB
+	b	.Lstrcmp_tail
 
-.L_fast_return:
-        mov     r0, ip
-        bx      lr
+	.p2align 5,,12	/* Ensure at least 3 instructions in cache line.  */
+.Loverlap2:
+	and	tmp1, data1, const_m1, S2LO #16
+	uadd8	syndrome, data1, const_m1
+	eors	syndrome, tmp1, data2, S2LO #16
+	sel	syndrome, syndrome, const_m1
+	bne	4f
+	cbnz	syndrome, 5f
+	ldr	data2, [src2], #4
+	eor	tmp1, tmp1, data1
+	cmp	tmp1, data2, S2HI #16
+	bne	6f
+	ldr	data1, [src1], #4
+	b	.Loverlap2
+4:
+	S2LO	data2, data2, #16
+	b	.Lstrcmp_tail
+5:
+	ands	syndrome, syndrome, const_m1, S2LO #16
+	bne	.Lstrcmp_done_equal
 
-.L_word_aligned_r0:
-        init
-        /* The first string is word-aligned.  */
-        /* Is the second string word-aligned?  */
-        ands    ip, r1, #3
-        bne     .L_strcmp_unaligned
+	ldrh	data2, [src2]
+	S2LO	data1, data1, #16
+#ifdef __ARM_BIG_ENDIAN
+	lsl	data2, data2, #16
+#endif
+	b	.Lstrcmp_tail
 
-.L_word_aligned:
-        /* The strings are word-aligned. */
-        /* Is the first string double-word aligned?  */
-        tst     r0, #4
-        beq     .L_doubleword_aligned_r0
+6:
+	S2LO	data1, data1, #16
+	and	data2, data2, const_m1, S2LO #16
+	b	.Lstrcmp_tail
 
-        /* If r0 is not double-word aligned yet, align it by loading
-        and comparing the next word from each string.  */
-        ldr     r2, [r0], #4
-        ldr     r4, [r1], #4
-        magic_compare_and_branch w1=r2 w2=r4 label=.L_return_24
+	.p2align 5,,12	/* Ensure at least 3 instructions in cache line.  */
+.Loverlap1:
+	and	tmp1, data1, #LSB
+	uadd8	syndrome, data1, const_m1
+	eors	syndrome, tmp1, data2, S2LO #24
+	sel	syndrome, syndrome, const_m1
+	bne	4f
+	cbnz	syndrome, 5f
+	ldr	data2, [src2], #4
+	eor	tmp1, tmp1, data1
+	cmp	tmp1, data2, S2HI #8
+	bne	6f
+	ldr	data1, [src1], #4
+	b	.Loverlap1
+4:
+	S2LO	data2, data2, #24
+	b	.Lstrcmp_tail
+5:
+	tst	syndrome, #LSB
+	bne	.Lstrcmp_done_equal
+	ldr	data2, [src2]
+6:
+	S2LO	data1, data1, #8
+	bic	data2, data2, #MSB
+	b	.Lstrcmp_tail
 
-.L_doubleword_aligned_r0:
-        /* Get here when r0 is double-word aligned.  */
-        /* Is r1 doubleword_aligned?  */
-        tst     r1, #4
-        beq     .L_doubleword_aligned
+.Lstrcmp_done_equal:
+	mov	result, #0
+	.cfi_remember_state
+	ldrd	r4, r5, [sp], #16
+	.cfi_restore 4
+	.cfi_restore 5
+	/* R6/7 not used in this sequence.  */
+	.cfi_restore 6
+	.cfi_restore 7
+	bx	lr
 
-        /* Get here when the strings to compare are word-aligned,
-        r0 is double-word aligned, but r1 is not double-word aligned.  */
-
-        /* Initialize the queue.  */
-        ldr     r5, [r1], #4
-
-        /* Compare two words in every iteration.  */
-        .p2align        2
-3:
-        pld [r0, #16]
-        pld [r1, #16]
-
-        /* Load the next double-word from each string and compare.  */
-        ldrd    r2, r3, [r0], #8
-        magic_compare_and_branch w1=r2 w2=r5 label=.L_return_25
-        ldrd    r4, r5, [r1], #8
-        magic_compare_and_branch w1=r3 w2=r4 label=.L_return_34
-        b       3b
-
-        .macro miscmp_word offsetlo offsethi
-        /* Macro to compare misaligned strings.  */
-        /* r0, r1 are word-aligned, and at least one of the strings
-        is not double-word aligned.  */
-        /* Compare one word in every loop iteration.  */
-        /* OFFSETLO is the original bit-offset of r1 from a word-boundary,
-        OFFSETHI is 32 - OFFSETLO (i.e., offset from the next word).  */
-
-        /* Initialize the shift queue.  */
-        ldr     r5, [r1], #4
-
-        /* Compare one word from each string in every loop iteration.  */
-        .p2align        2
-7:
-        ldr     r3, [r0], #4
-        S2LOMEM r5, r5, #\offsetlo
-        magic_find_zero_bytes w1=r3
-        cmp     r7, ip, S2HIMEM #\offsetlo
-        and     r2, r3, r6, S2LOMEM #\offsetlo
-        it      eq
-        cmpeq   r2, r5
-        bne     .L_return_25
-        ldr     r5, [r1], #4
-        cmp     ip, #0
-        eor r3, r2, r3
-        S2HIMEM r2, r5, #\offsethi
-        it      eq
-        cmpeq   r3, r2
-        bne     .L_return_32
-        b       7b
-        .endm /* miscmp_word */
-
-.L_strcmp_unaligned:
-        /* r0 is word-aligned, r1 is at offset ip from a word.  */
-        /* Align r1 to the (previous) word-boundary.  */
-        bic     r1, r1, #3
-
-        /* Unaligned comparison word by word using LDRs. */
-        cmp     ip, #2
-        beq     .L_miscmp_word_16                 /* If ip == 2.  */
-        bge     .L_miscmp_word_24                 /* If ip == 3.  */
-        miscmp_word offsetlo=8 offsethi=24        /* If ip == 1.  */
-.L_miscmp_word_16:  miscmp_word offsetlo=16 offsethi=16
-.L_miscmp_word_24:  miscmp_word offsetlo=24 offsethi=8
-
-
-.L_return_32:
-        setup_return w1=r3, w2=r2
-        b       .L_do_return
-.L_return_34:
-        setup_return w1=r3, w2=r4
-        b       .L_do_return
-.L_return_25:
-        setup_return w1=r2, w2=r5
-        b       .L_do_return
-.L_return_35:
-        setup_return w1=r3, w2=r5
-        b       .L_do_return
-.L_return_24:
-        setup_return w1=r2, w2=r4
-
-.L_do_return:
-
-#ifdef __ARMEB__
-        mov     r0, ip
-#else /* not  __ARMEB__ */
-        rev     r0, ip
-#endif /* not  __ARMEB__ */
-
-        /* Restore temporaries early, before computing the return value.  */
-        ldrd    r6, r7, [sp]
-        ldrd    r4, r5, [sp, #8]
-        adds    sp, sp, #16
-        .cfi_def_cfa_offset 0
-        .cfi_restore r4
-        .cfi_restore r5
-        .cfi_restore r6
-        .cfi_restore r7
-
-        /* There is a zero or a different byte between r1 and r2.  */
-        /* r0 contains a mask of all-zero bytes in r1.  */
-        /* Using r0 and not ip here because cbz requires low register.  */
-        m_cbz   reg=r0, label=.L_compute_return_value
-        clz     r0, r0
-        /* r0 contains the number of bits on the left of the first all-zero byte in r1.  */
-        rsb     r0, r0, #24
-        /* Here, r0 contains the number of bits on the right of the first all-zero byte in r1.  */
-        lsr     r1, r1, r0
-        lsr     r2, r2, r0
-
-.L_compute_return_value:
-        movs    r0, #1
-        cmp     r1, r2
-        /* The return value is computed as follows.
-        If r1>r2 then (C==1 and Z==0) and LS doesn't hold and r0 is #1 at return.
-        If r1<r2 then (C==0 and Z==0) and we execute SBC with carry_in=0,
-        which means r0:=r0-r0-1 and r0 is #-1 at return.
-        If r1=r2 then (C==1 and Z==1) and we execute SBC with carry_in=1,
-        which means r0:=r0-r0 and r0 is #0 at return.
-        (C==0 and Z==1) cannot happen because the carry bit is "not borrow".  */
-        it      ls
-        sbcls   r0, r0, r0
-        bx      lr
+.Lstrcmp_tail:
+	.cfi_restore_state
+#ifndef __ARM_BIG_ENDIAN
+	rev	data1, data1
+	rev	data2, data2
+	/* Now everything looks big-endian...  */
+#endif
+	uadd8	tmp1, data1, const_m1
+	eor	tmp1, data1, data2
+	sel	syndrome, tmp1, const_m1
+	clz	tmp1, syndrome
+	lsl	data1, data1, tmp1
+	lsl	data2, data2, tmp1
+	lsr	result, data1, #24
+	ldrd	r4, r5, [sp], #16
+	.cfi_restore 4
+	.cfi_restore 5
+	/* R6/7 not used in this sequence.  */
+	.cfi_restore 6
+	.cfi_restore 7
+	sub	result, result, data2, lsr #24
+	bx	lr
 END(strcmp_a15)
diff --git a/libc/arch-arm/cortex-a9/bionic/strcmp.S b/libc/arch-arm/cortex-a9/bionic/strcmp.S
deleted file mode 100644
index ba7ea13..0000000
--- a/libc/arch-arm/cortex-a9/bionic/strcmp.S
+++ /dev/null
@@ -1,551 +0,0 @@
-/*
- * Copyright (c) 2013 ARM Ltd
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. The name of the company may not be used to endorse or promote
- *    products derived from this software without specific prior written
- *    permission.
- *
- * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <private/bionic_asm.h>
-
-#ifdef __ARMEB__
-#define S2LOMEM lsl
-#define S2LOMEMEQ lsleq
-#define S2HIMEM lsr
-#define MSB 0x000000ff
-#define LSB 0xff000000
-#define BYTE0_OFFSET 24
-#define BYTE1_OFFSET 16
-#define BYTE2_OFFSET 8
-#define BYTE3_OFFSET 0
-#else /* not  __ARMEB__ */
-#define S2LOMEM lsr
-#define S2LOMEMEQ lsreq
-#define S2HIMEM lsl
-#define BYTE0_OFFSET 0
-#define BYTE1_OFFSET 8
-#define BYTE2_OFFSET 16
-#define BYTE3_OFFSET 24
-#define MSB 0xff000000
-#define LSB 0x000000ff
-#endif /* not  __ARMEB__ */
-
-.syntax         unified
-
-// To avoid warning about deprecated instructions, add an explicit
-// arch. The code generated is exactly the same.
-.arch armv7-a
-
-#if defined (__thumb__)
-        .thumb
-        .thumb_func
-#endif
-
-ENTRY(strcmp_a9)
-      /* Use LDRD whenever possible.  */
-
-/* The main thing to look out for when comparing large blocks is that
-   the loads do not cross a page boundary when loading past the index
-   of the byte with the first difference or the first string-terminator.
-
-   For example, if the strings are identical and the string-terminator
-   is at index k, byte by byte comparison will not load beyond address
-   s1+k and s2+k; word by word comparison may load up to 3 bytes beyond
-   k; double word - up to 7 bytes.  If the load of these bytes crosses
-   a page boundary, it might cause a memory fault (if the page is not mapped)
-   that would not have happened in byte by byte comparison.
-
-   If an address is (double) word aligned, then a load of a (double) word
-   from that address will not cross a page boundary.
-   Therefore, the algorithm below considers word and double-word alignment
-   of strings separately.  */
-
-/* High-level description of the algorithm.
-
-   * The fast path: if both strings are double-word aligned,
-     use LDRD to load two words from each string in every loop iteration.
-   * If the strings have the same offset from a word boundary,
-     use LDRB to load and compare byte by byte until
-     the first string is aligned to a word boundary (at most 3 bytes).
-     This is optimized for quick return on short unaligned strings.
-   * If the strings have the same offset from a double-word boundary,
-     use LDRD to load two words from each string in every loop iteration, as in the fast path.
-   * If the strings do not have the same offset from a double-word boundary,
-     load a word from the second string before the loop to initialize the queue.
-     Use LDRD to load two words from every string in every loop iteration.
-     Inside the loop, load the second word from the second string only after comparing
-     the first word, using the queued value, to guarantee safety across page boundaries.
-   * If the strings do not have the same offset from a word boundary,
-     use LDR and a shift queue. Order of loads and comparisons matters,
-     similarly to the previous case.
-
-   * Use UADD8 and SEL to compare words, and use REV and CLZ to compute the return value.
-   * The only difference between ARM and Thumb modes is the use of CBZ instruction.
-   * The only difference between big and little endian is the use of REV in little endian
-     to compute the return value, instead of MOV.
-*/
-
-        .macro m_cbz reg label
-#ifdef __thumb2__
-        cbz     \reg, \label
-#else   /* not defined __thumb2__ */
-        cmp     \reg, #0
-        beq     \label
-#endif /* not defined __thumb2__ */
-        .endm /* m_cbz */
-
-        .macro m_cbnz reg label
-#ifdef __thumb2__
-        cbnz    \reg, \label
-#else   /* not defined __thumb2__ */
-        cmp     \reg, #0
-        bne     \label
-#endif /* not defined __thumb2__ */
-        .endm /* m_cbnz */
-
-        .macro  init
-        /* Macro to save temporary registers and prepare magic values.  */
-        subs    sp, sp, #16
-        .cfi_def_cfa_offset 16
-        strd    r4, r5, [sp, #8]
-        .cfi_rel_offset r4, 0
-        .cfi_rel_offset r5, 4
-        strd    r6, r7, [sp]
-        .cfi_rel_offset r6, 8
-        .cfi_rel_offset r7, 12
-        mvn     r6, #0  /* all F */
-        mov     r7, #0  /* all 0 */
-        .endm   /* init */
-
-        .macro  magic_compare_and_branch w1 w2 label
-        /* Macro to compare registers w1 and w2 and conditionally branch to label.  */
-        cmp     \w1, \w2        /* Are w1 and w2 the same?  */
-        magic_find_zero_bytes \w1
-        it      eq
-        cmpeq   ip, #0          /* Is there a zero byte in w1?  */
-        bne     \label
-        .endm /* magic_compare_and_branch */
-
-        .macro  magic_find_zero_bytes w1
-        /* Macro to find all-zero bytes in w1, result is in ip.  */
-        uadd8   ip, \w1, r6
-        sel     ip, r7, r6
-        .endm /* magic_find_zero_bytes */
-
-        .macro  setup_return w1 w2
-#ifdef __ARMEB__
-        mov     r1, \w1
-        mov     r2, \w2
-#else /* not  __ARMEB__ */
-        rev     r1, \w1
-        rev     r2, \w2
-#endif /* not  __ARMEB__ */
-        .endm /* setup_return */
-
-        pld [r0, #0]
-        pld [r1, #0]
-
-        /* Are both strings double-word aligned?  */
-        orr     ip, r0, r1
-        tst     ip, #7
-        bne     .L_do_align
-
-        /* Fast path.  */
-        init
-
-.L_doubleword_aligned:
-
-        /* Get here when the strings to compare are double-word aligned.  */
-        /* Compare two words in every iteration.  */
-        .p2align        2
-2:
-        pld [r0, #16]
-        pld [r1, #16]
-
-        /* Load the next double-word from each string.  */
-        ldrd    r2, r3, [r0], #8
-        ldrd    r4, r5, [r1], #8
-
-        magic_compare_and_branch w1=r2, w2=r4, label=.L_return_24
-        magic_compare_and_branch w1=r3, w2=r5, label=.L_return_35
-        b       2b
-
-.L_do_align:
-        /* Is the first string word-aligned?  */
-        ands    ip, r0, #3
-        beq     .L_word_aligned_r0
-
-        /* Fast compare byte by byte until the first string is word-aligned.  */
-        /* The offset of r0 from a word boundary is in ip. Thus, the number of bytes
-        to read until the next word boundary is 4-ip.  */
-        bic     r0, r0, #3
-        ldr     r2, [r0], #4
-        lsls    ip, ip, #31
-        beq     .L_byte2
-        bcs     .L_byte3
-
-.L_byte1:
-        ldrb    ip, [r1], #1
-        uxtb    r3, r2, ror #BYTE1_OFFSET
-        subs    ip, r3, ip
-        bne     .L_fast_return
-        m_cbz   reg=r3, label=.L_fast_return
-
-.L_byte2:
-        ldrb    ip, [r1], #1
-        uxtb    r3, r2, ror #BYTE2_OFFSET
-        subs    ip, r3, ip
-        bne     .L_fast_return
-        m_cbz   reg=r3, label=.L_fast_return
-
-.L_byte3:
-        ldrb    ip, [r1], #1
-        uxtb    r3, r2, ror #BYTE3_OFFSET
-        subs    ip, r3, ip
-        bne     .L_fast_return
-        m_cbnz  reg=r3, label=.L_word_aligned_r0
-
-.L_fast_return:
-        mov     r0, ip
-        bx      lr
-
-.L_word_aligned_r0:
-        init
-        /* The first string is word-aligned.  */
-        /* Is the second string word-aligned?  */
-        ands    ip, r1, #3
-        bne     .L_strcmp_unaligned
-
-.L_word_aligned:
-        /* The strings are word-aligned. */
-        /* Is the first string double-word aligned?  */
-        tst     r0, #4
-        beq     .L_doubleword_aligned_r0
-
-        /* If r0 is not double-word aligned yet, align it by loading
-        and comparing the next word from each string.  */
-        ldr     r2, [r0], #4
-        ldr     r4, [r1], #4
-        magic_compare_and_branch w1=r2 w2=r4 label=.L_return_24
-
-.L_doubleword_aligned_r0:
-        /* Get here when r0 is double-word aligned.  */
-        /* Is r1 doubleword_aligned?  */
-        tst     r1, #4
-        beq     .L_doubleword_aligned
-
-        /* Get here when the strings to compare are word-aligned,
-        r0 is double-word aligned, but r1 is not double-word aligned.  */
-
-        /* Initialize the queue.  */
-        ldr     r5, [r1], #4
-
-        /* Compare two words in every iteration.  */
-        .p2align        2
-3:
-        pld [r0, #16]
-        pld [r1, #16]
-
-        /* Load the next double-word from each string and compare.  */
-        ldrd    r2, r3, [r0], #8
-        magic_compare_and_branch w1=r2 w2=r5 label=.L_return_25
-        ldrd    r4, r5, [r1], #8
-        magic_compare_and_branch w1=r3 w2=r4 label=.L_return_34
-        b       3b
-
-        .macro miscmp_word offsetlo offsethi
-        /* Macro to compare misaligned strings.  */
-        /* r0, r1 are word-aligned, and at least one of the strings
-        is not double-word aligned.  */
-        /* Compare one word in every loop iteration.  */
-        /* OFFSETLO is the original bit-offset of r1 from a word-boundary,
-        OFFSETHI is 32 - OFFSETLO (i.e., offset from the next word).  */
-
-        /* Initialize the shift queue.  */
-        ldr     r5, [r1], #4
-
-        /* Compare one word from each string in every loop iteration.  */
-        .p2align        2
-7:
-        ldr     r3, [r0], #4
-        S2LOMEM r5, r5, #\offsetlo
-        magic_find_zero_bytes w1=r3
-        cmp     r7, ip, S2HIMEM #\offsetlo
-        and     r2, r3, r6, S2LOMEM #\offsetlo
-        it      eq
-        cmpeq   r2, r5
-        bne     .L_return_25
-        ldr     r5, [r1], #4
-        cmp     ip, #0
-        eor r3, r2, r3
-        S2HIMEM r2, r5, #\offsethi
-        it      eq
-        cmpeq   r3, r2
-        bne     .L_return_32
-        b       7b
-        .endm /* miscmp_word */
-
-.L_return_32:
-        setup_return w1=r3, w2=r2
-        b       .L_do_return
-.L_return_34:
-        setup_return w1=r3, w2=r4
-        b       .L_do_return
-.L_return_25:
-        setup_return w1=r2, w2=r5
-        b       .L_do_return
-.L_return_35:
-        setup_return w1=r3, w2=r5
-        b       .L_do_return
-.L_return_24:
-        setup_return w1=r2, w2=r4
-
-.L_do_return:
-
-#ifdef __ARMEB__
-        mov     r0, ip
-#else /* not  __ARMEB__ */
-        rev     r0, ip
-#endif /* not  __ARMEB__ */
-
-        /* Restore temporaries early, before computing the return value.  */
-        ldrd    r6, r7, [sp]
-        ldrd    r4, r5, [sp, #8]
-        adds    sp, sp, #16
-        .cfi_def_cfa_offset 0
-        .cfi_restore r4
-        .cfi_restore r5
-        .cfi_restore r6
-        .cfi_restore r7
-
-        /* There is a zero or a different byte between r1 and r2.  */
-        /* r0 contains a mask of all-zero bytes in r1.  */
-        /* Using r0 and not ip here because cbz requires low register.  */
-        m_cbz   reg=r0, label=.L_compute_return_value
-        clz     r0, r0
-        /* r0 contains the number of bits on the left of the first all-zero byte in r1.  */
-        rsb     r0, r0, #24
-        /* Here, r0 contains the number of bits on the right of the first all-zero byte in r1.  */
-        lsr     r1, r1, r0
-        lsr     r2, r2, r0
-
-.L_compute_return_value:
-        movs    r0, #1
-        cmp     r1, r2
-        /* The return value is computed as follows.
-        If r1>r2 then (C==1 and Z==0) and LS doesn't hold and r0 is #1 at return.
-        If r1<r2 then (C==0 and Z==0) and we execute SBC with carry_in=0,
-        which means r0:=r0-r0-1 and r0 is #-1 at return.
-        If r1=r2 then (C==1 and Z==1) and we execute SBC with carry_in=1,
-        which means r0:=r0-r0 and r0 is #0 at return.
-        (C==0 and Z==1) cannot happen because the carry bit is "not borrow".  */
-        it      ls
-        sbcls   r0, r0, r0
-        bx      lr
-
-    /* The code from the previous version of strcmp.S handles all of the
-     * cases where the first string and seconds string cannot both be
-     * aligned to a word boundary faster than the new algorithm. See
-     * bionic/libc/arch-arm/cortex-a15/bionic/strcmp.S for the unedited
-     * version of the code.
-     */
-.L_strcmp_unaligned:
-	wp1 .req r0
-	wp2 .req r1
-	b1  .req r2
-	w1  .req r4
-	w2  .req r5
-	t1  .req ip
-	@ r3 is scratch
-
-2:
-	mov	b1, #1
-	orr	b1, b1, b1, lsl #8
-	orr	b1, b1, b1, lsl #16
-
-	and	t1, wp2, #3
-	bic	wp2, wp2, #3
-	ldr	w1, [wp1], #4
-	ldr	w2, [wp2], #4
-	cmp	t1, #2
-	beq	2f
-	bhi	3f
-
-	/* Critical inner Loop: Block with 3 bytes initial overlap */
-	.p2align	2
-1:
-	bic	t1, w1, #MSB
-	cmp	t1, w2, S2LOMEM #8
-	sub	r3, w1, b1
-	bic	r3, r3, w1
-	bne	4f
-	ands	r3, r3, b1, lsl #7
-	it	eq
-	ldreq	w2, [wp2], #4
-	bne	5f
-	eor	t1, t1, w1
-	cmp	t1, w2, S2HIMEM #24
-	bne	6f
-	ldr	w1, [wp1], #4
-	b	1b
-4:
-	S2LOMEM	w2, w2, #8
-	b	8f
-
-5:
-#ifdef __ARMEB__
-	/* The syndrome value may contain false ones if the string ends
-	 * with the bytes 0x01 0x00
-	 */
-	tst	w1, #0xff000000
-	itt	ne
-	tstne	w1, #0x00ff0000
-	tstne	w1, #0x0000ff00
-	beq	7f
-#else
-	bics	r3, r3, #0xff000000
-	bne	7f
-#endif
-	ldrb	w2, [wp2]
-	S2LOMEM	t1, w1, #24
-#ifdef __ARMEB__
-	lsl	w2, w2, #24
-#endif
-	b	8f
-
-6:
-	S2LOMEM	t1, w1, #24
-	and	w2, w2, #LSB
-	b	8f
-
-	/* Critical inner Loop: Block with 2 bytes initial overlap */
-	.p2align	2
-2:
-	S2HIMEM	t1, w1, #16
-	sub	r3, w1, b1
-	S2LOMEM	t1, t1, #16
-	bic	r3, r3, w1
-	cmp	t1, w2, S2LOMEM #16
-	bne	4f
-	ands	r3, r3, b1, lsl #7
-	it	eq
-	ldreq	w2, [wp2], #4
-	bne	5f
-	eor	t1, t1, w1
-	cmp	t1, w2, S2HIMEM #16
-	bne	6f
-	ldr	w1, [wp1], #4
-	b	2b
-
-5:
-#ifdef __ARMEB__
-	/* The syndrome value may contain false ones if the string ends
-	 * with the bytes 0x01 0x00
-	 */
-	tst	w1, #0xff000000
-	it	ne
-	tstne	w1, #0x00ff0000
-	beq	7f
-#else
-	lsls	r3, r3, #16
-	bne	7f
-#endif
-	ldrh	w2, [wp2]
-	S2LOMEM	t1, w1, #16
-#ifdef __ARMEB__
-	lsl	w2, w2, #16
-#endif
-	b	8f
-
-6:
-	S2HIMEM	w2, w2, #16
-	S2LOMEM	t1, w1, #16
-4:
-	S2LOMEM	w2, w2, #16
-	b	8f
-
-	/* Critical inner Loop: Block with 1 byte initial overlap */
-	.p2align	2
-3:
-	and	t1, w1, #LSB
-	cmp	t1, w2, S2LOMEM #24
-	sub	r3, w1, b1
-	bic	r3, r3, w1
-	bne	4f
-	ands	r3, r3, b1, lsl #7
-	it	eq
-	ldreq	w2, [wp2], #4
-	bne	5f
-	eor	t1, t1, w1
-	cmp	t1, w2, S2HIMEM #8
-	bne	6f
-	ldr	w1, [wp1], #4
-	b	3b
-4:
-	S2LOMEM	w2, w2, #24
-	b	8f
-5:
-	/* The syndrome value may contain false ones if the string ends
-	 * with the bytes 0x01 0x00
-	 */
-	tst	w1, #LSB
-	beq	7f
-	ldr	w2, [wp2], #4
-6:
-	S2LOMEM	t1, w1, #8
-	bic	w2, w2, #MSB
-	b	8f
-7:
-	mov	r0, #0
-
-    /* Restore registers and stack. */
-    ldrd    r6, r7, [sp]
-    ldrd    r4, r5, [sp, #8]
-    adds    sp, sp, #16
-    .cfi_def_cfa_offset 0
-    .cfi_restore r4
-    .cfi_restore r5
-    .cfi_restore r6
-    .cfi_restore r7
-
-	bx	lr
-
-8:
-	and	r2, t1, #LSB
-	and	r0, w2, #LSB
-	cmp	r0, #1
-	it	cs
-	cmpcs	r0, r2
-	itt	eq
-	S2LOMEMEQ	t1, t1, #8
-	S2LOMEMEQ	w2, w2, #8
-	beq	8b
-	sub	r0, r2, r0
-
-    /* Restore registers and stack. */
-    ldrd    r6, r7, [sp]
-    ldrd    r4, r5, [sp, #8]
-    adds    sp, sp, #16
-
-	bx	lr
-END(strcmp_a9)
diff --git a/libc/arch-arm/dynamic_function_dispatch.cpp b/libc/arch-arm/dynamic_function_dispatch.cpp
index 640f330..5e1b8b0 100644
--- a/libc/arch-arm/dynamic_function_dispatch.cpp
+++ b/libc/arch-arm/dynamic_function_dispatch.cpp
@@ -288,16 +288,7 @@
 
 typedef int strcmp_func(const char* __lhs, const char* __rhs);
 DEFINE_IFUNC(strcmp) {
-    switch(get_cpu_variant()) {
-        case kCortexA9:
-            RETURN_FUNC(strcmp_func, strcmp_a9);
-        case kCortexA55:
-        case kKrait:
-        case kKryo:
-            RETURN_FUNC(strcmp_func, strcmp_krait);
-        default:
-            RETURN_FUNC(strcmp_func, strcmp_a15);
-    }
+    RETURN_FUNC(strcmp_func, strcmp_a15);
 }
 
 typedef size_t strlen_func(const char* __s);
diff --git a/libc/arch-arm/krait/bionic/strcmp.S b/libc/arch-arm/krait/bionic/strcmp.S
deleted file mode 100644
index ec692e5..0000000
--- a/libc/arch-arm/krait/bionic/strcmp.S
+++ /dev/null
@@ -1,489 +0,0 @@
-/*
- * Copyright (c) 2013 ARM Ltd
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. The name of the company may not be used to endorse or promote
- *    products derived from this software without specific prior written
- *    permission.
- *
- * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <private/bionic_asm.h>
-
-#ifdef __ARMEB__
-#define S2LOMEM lsl
-#define S2LOMEMEQ lsleq
-#define S2HIMEM lsr
-#define MSB 0x000000ff
-#define LSB 0xff000000
-#define BYTE0_OFFSET 24
-#define BYTE1_OFFSET 16
-#define BYTE2_OFFSET 8
-#define BYTE3_OFFSET 0
-#else /* not  __ARMEB__ */
-#define S2LOMEM lsr
-#define S2LOMEMEQ lsreq
-#define S2HIMEM lsl
-#define BYTE0_OFFSET 0
-#define BYTE1_OFFSET 8
-#define BYTE2_OFFSET 16
-#define BYTE3_OFFSET 24
-#define MSB 0xff000000
-#define LSB 0x000000ff
-#endif /* not  __ARMEB__ */
-
-.syntax         unified
-
-// To avoid warning about deprecated instructions, add an explicit
-// arch. The code generated is exactly the same.
-.arch armv7-a
-
-#if defined (__thumb__)
-        .thumb
-        .thumb_func
-#endif
-
-ENTRY(strcmp_krait)
-      /* Use LDRD whenever possible.  */
-
-/* The main thing to look out for when comparing large blocks is that
-   the loads do not cross a page boundary when loading past the index
-   of the byte with the first difference or the first string-terminator.
-
-   For example, if the strings are identical and the string-terminator
-   is at index k, byte by byte comparison will not load beyond address
-   s1+k and s2+k; word by word comparison may load up to 3 bytes beyond
-   k; double word - up to 7 bytes.  If the load of these bytes crosses
-   a page boundary, it might cause a memory fault (if the page is not mapped)
-   that would not have happened in byte by byte comparison.
-
-   If an address is (double) word aligned, then a load of a (double) word
-   from that address will not cross a page boundary.
-   Therefore, the algorithm below considers word and double-word alignment
-   of strings separately.  */
-
-/* High-level description of the algorithm.
-
-   * The fast path: if both strings are double-word aligned,
-     use LDRD to load two words from each string in every loop iteration.
-   * If the strings have the same offset from a word boundary,
-     use LDRB to load and compare byte by byte until
-     the first string is aligned to a word boundary (at most 3 bytes).
-     This is optimized for quick return on short unaligned strings.
-   * If the strings have the same offset from a double-word boundary,
-     use LDRD to load two words from each string in every loop iteration, as in the fast path.
-   * If the strings do not have the same offset from a double-word boundary,
-     load a word from the second string before the loop to initialize the queue.
-     Use LDRD to load two words from every string in every loop iteration.
-     Inside the loop, load the second word from the second string only after comparing
-     the first word, using the queued value, to guarantee safety across page boundaries.
-   * If the strings do not have the same offset from a word boundary,
-     use LDR and a shift queue. Order of loads and comparisons matters,
-     similarly to the previous case.
-
-   * Use UADD8 and SEL to compare words, and use REV and CLZ to compute the return value.
-   * The only difference between ARM and Thumb modes is the use of CBZ instruction.
-   * The only difference between big and little endian is the use of REV in little endian
-     to compute the return value, instead of MOV.
-*/
-
-        .macro m_cbz reg label
-#ifdef __thumb2__
-        cbz     \reg, \label
-#else   /* not defined __thumb2__ */
-        cmp     \reg, #0
-        beq     \label
-#endif /* not defined __thumb2__ */
-        .endm /* m_cbz */
-
-        .macro m_cbnz reg label
-#ifdef __thumb2__
-        cbnz    \reg, \label
-#else   /* not defined __thumb2__ */
-        cmp     \reg, #0
-        bne     \label
-#endif /* not defined __thumb2__ */
-        .endm /* m_cbnz */
-
-        .macro  init
-        /* Macro to save temporary registers and prepare magic values.  */
-        subs    sp, sp, #16
-        .cfi_def_cfa_offset 16
-        strd    r4, r5, [sp, #8]
-        .cfi_rel_offset r4, 0
-        .cfi_rel_offset r5, 4
-        strd    r6, r7, [sp]
-        .cfi_rel_offset r6, 8
-        .cfi_rel_offset r7, 12
-        mvn     r6, #0  /* all F */
-        mov     r7, #0  /* all 0 */
-        .endm   /* init */
-
-        .macro  magic_compare_and_branch w1 w2 label
-        /* Macro to compare registers w1 and w2 and conditionally branch to label.  */
-        cmp     \w1, \w2        /* Are w1 and w2 the same?  */
-        magic_find_zero_bytes \w1
-        it      eq
-        cmpeq   ip, #0          /* Is there a zero byte in w1?  */
-        bne     \label
-        .endm /* magic_compare_and_branch */
-
-        .macro  magic_find_zero_bytes w1
-        /* Macro to find all-zero bytes in w1, result is in ip.  */
-        uadd8   ip, \w1, r6
-        sel     ip, r7, r6
-        .endm /* magic_find_zero_bytes */
-
-        .macro  setup_return w1 w2
-#ifdef __ARMEB__
-        mov     r1, \w1
-        mov     r2, \w2
-#else /* not  __ARMEB__ */
-        rev     r1, \w1
-        rev     r2, \w2
-#endif /* not  __ARMEB__ */
-        .endm /* setup_return */
-
-        pld [r0, #0]
-        pld [r1, #0]
-
-        /* Are both strings double-word aligned?  */
-        orr     ip, r0, r1
-        tst     ip, #7
-        bne     .L_do_align
-
-        /* Fast path.  */
-        init
-
-.L_doubleword_aligned:
-
-        /* Get here when the strings to compare are double-word aligned.  */
-        /* Compare two words in every iteration.  */
-        .p2align        2
-2:
-        pld [r0, #16]
-        pld [r1, #16]
-
-        /* Load the next double-word from each string.  */
-        ldrd    r2, r3, [r0], #8
-        ldrd    r4, r5, [r1], #8
-
-        magic_compare_and_branch w1=r2, w2=r4, label=.L_return_24
-        magic_compare_and_branch w1=r3, w2=r5, label=.L_return_35
-        b       2b
-
-.L_do_align:
-        /* Is the first string word-aligned?  */
-        ands    ip, r0, #3
-        beq     .L_word_aligned_r0
-
-        /* Fast compare byte by byte until the first string is word-aligned.  */
-        /* The offset of r0 from a word boundary is in ip. Thus, the number of bytes
-        to read until the next word boundary is 4-ip.  */
-        bic     r0, r0, #3
-        ldr     r2, [r0], #4
-        lsls    ip, ip, #31
-        beq     .L_byte2
-        bcs     .L_byte3
-
-.L_byte1:
-        ldrb    ip, [r1], #1
-        uxtb    r3, r2, ror #BYTE1_OFFSET
-        subs    ip, r3, ip
-        bne     .L_fast_return
-        m_cbz   reg=r3, label=.L_fast_return
-
-.L_byte2:
-        ldrb    ip, [r1], #1
-        uxtb    r3, r2, ror #BYTE2_OFFSET
-        subs    ip, r3, ip
-        bne     .L_fast_return
-        m_cbz   reg=r3, label=.L_fast_return
-
-.L_byte3:
-        ldrb    ip, [r1], #1
-        uxtb    r3, r2, ror #BYTE3_OFFSET
-        subs    ip, r3, ip
-        bne     .L_fast_return
-        m_cbnz  reg=r3, label=.L_word_aligned_r0
-
-.L_fast_return:
-        mov     r0, ip
-        bx      lr
-
-.L_word_aligned_r0:
-        init
-        /* The first string is word-aligned.  */
-        /* Is the second string word-aligned?  */
-        ands    ip, r1, #3
-        bne     .L_strcmp_unaligned
-
-.L_word_aligned:
-        /* The strings are word-aligned. */
-        /* Is the first string double-word aligned?  */
-        tst     r0, #4
-        beq     .L_doubleword_aligned_r0
-
-        /* If r0 is not double-word aligned yet, align it by loading
-        and comparing the next word from each string.  */
-        ldr     r2, [r0], #4
-        ldr     r4, [r1], #4
-        magic_compare_and_branch w1=r2 w2=r4 label=.L_return_24
-
-.L_doubleword_aligned_r0:
-        /* Get here when r0 is double-word aligned.  */
-        /* Is r1 doubleword_aligned?  */
-        tst     r1, #4
-        beq     .L_doubleword_aligned
-
-        /* Get here when the strings to compare are word-aligned,
-        r0 is double-word aligned, but r1 is not double-word aligned.  */
-
-        /* Initialize the queue.  */
-        ldr     r5, [r1], #4
-
-        /* Compare two words in every iteration.  */
-        .p2align        2
-3:
-        pld [r0, #16]
-        pld [r1, #16]
-
-        /* Load the next double-word from each string and compare.  */
-        ldrd    r2, r3, [r0], #8
-        magic_compare_and_branch w1=r2 w2=r5 label=.L_return_25
-        ldrd    r4, r5, [r1], #8
-        magic_compare_and_branch w1=r3 w2=r4 label=.L_return_34
-        b       3b
-
-        .macro miscmp_word offsetlo offsethi
-        /* Macro to compare misaligned strings.  */
-        /* r0, r1 are word-aligned, and at least one of the strings
-        is not double-word aligned.  */
-        /* Compare one word in every loop iteration.  */
-        /* OFFSETLO is the original bit-offset of r1 from a word-boundary,
-        OFFSETHI is 32 - OFFSETLO (i.e., offset from the next word).  */
-
-        /* Initialize the shift queue.  */
-        ldr     r5, [r1], #4
-
-        /* Compare one word from each string in every loop iteration.  */
-        .p2align        2
-7:
-        ldr     r3, [r0], #4
-        S2LOMEM r5, r5, #\offsetlo
-        magic_find_zero_bytes w1=r3
-        cmp     r7, ip, S2HIMEM #\offsetlo
-        and     r2, r3, r6, S2LOMEM #\offsetlo
-        it      eq
-        cmpeq   r2, r5
-        bne     .L_return_25
-        ldr     r5, [r1], #4
-        cmp     ip, #0
-        eor r3, r2, r3
-        S2HIMEM r2, r5, #\offsethi
-        it      eq
-        cmpeq   r3, r2
-        bne     .L_return_32
-        b       7b
-        .endm /* miscmp_word */
-
-.L_strcmp_unaligned:
-        /* r0 is word-aligned, r1 is at offset ip from a word.  */
-        /* Align r1 to the (previous) word-boundary.  */
-        bic     r1, r1, #3
-
-        /* Unaligned comparison word by word using LDRs. */
-        cmp     ip, #2
-        beq     .L_miscmp_word_16                 /* If ip == 2.  */
-        bge     .L_miscmp_word_24                 /* If ip == 3.  */
-        miscmp_word offsetlo=8 offsethi=24        /* If ip == 1.  */
-.L_miscmp_word_24:  miscmp_word offsetlo=24 offsethi=8
-
-
-.L_return_32:
-        setup_return w1=r3, w2=r2
-        b       .L_do_return
-.L_return_34:
-        setup_return w1=r3, w2=r4
-        b       .L_do_return
-.L_return_25:
-        setup_return w1=r2, w2=r5
-        b       .L_do_return
-.L_return_35:
-        setup_return w1=r3, w2=r5
-        b       .L_do_return
-.L_return_24:
-        setup_return w1=r2, w2=r4
-
-.L_do_return:
-
-#ifdef __ARMEB__
-        mov     r0, ip
-#else /* not  __ARMEB__ */
-        rev     r0, ip
-#endif /* not  __ARMEB__ */
-
-        /* Restore temporaries early, before computing the return value.  */
-        ldrd    r6, r7, [sp]
-        ldrd    r4, r5, [sp, #8]
-        adds    sp, sp, #16
-        .cfi_def_cfa_offset 0
-        .cfi_restore r4
-        .cfi_restore r5
-        .cfi_restore r6
-        .cfi_restore r7
-
-        /* There is a zero or a different byte between r1 and r2.  */
-        /* r0 contains a mask of all-zero bytes in r1.  */
-        /* Using r0 and not ip here because cbz requires low register.  */
-        m_cbz   reg=r0, label=.L_compute_return_value
-        clz     r0, r0
-        /* r0 contains the number of bits on the left of the first all-zero byte in r1.  */
-        rsb     r0, r0, #24
-        /* Here, r0 contains the number of bits on the right of the first all-zero byte in r1.  */
-        lsr     r1, r1, r0
-        lsr     r2, r2, r0
-
-.L_compute_return_value:
-        movs    r0, #1
-        cmp     r1, r2
-        /* The return value is computed as follows.
-        If r1>r2 then (C==1 and Z==0) and LS doesn't hold and r0 is #1 at return.
-        If r1<r2 then (C==0 and Z==0) and we execute SBC with carry_in=0,
-        which means r0:=r0-r0-1 and r0 is #-1 at return.
-        If r1=r2 then (C==1 and Z==1) and we execute SBC with carry_in=1,
-        which means r0:=r0-r0 and r0 is #0 at return.
-        (C==0 and Z==1) cannot happen because the carry bit is "not borrow".  */
-        it      ls
-        sbcls   r0, r0, r0
-        bx      lr
-
-    /* The code from the previous version of strcmp.S handles this
-     * particular case (the second string is 2 bytes off a word alignment)
-     * faster than any current version. In this very specific case, use the
-     * previous version. See bionic/libc/arch-arm/cortex-a15/bionic/strcmp.S
-     * for the unedited version of this code.
-     */
-.L_miscmp_word_16:
-	wp1 .req r0
-	wp2 .req r1
-	b1  .req r2
-	w1  .req r4
-	w2  .req r5
-	t1  .req ip
-	@ r3 is scratch
-
-    /* At this point, wp1 (r0) has already been word-aligned. */
-2:
-	mov	b1, #1
-	orr	b1, b1, b1, lsl #8
-	orr	b1, b1, b1, lsl #16
-
-	and	t1, wp2, #3
-	bic	wp2, wp2, #3
-	ldr	w1, [wp1], #4
-	ldr	w2, [wp2], #4
-
-	/* Critical inner Loop: Block with 2 bytes initial overlap */
-	.p2align	2
-2:
-	S2HIMEM	t1, w1, #16
-	sub	r3, w1, b1
-	S2LOMEM	t1, t1, #16
-	bic	r3, r3, w1
-	cmp	t1, w2, S2LOMEM #16
-	bne	4f
-	ands	r3, r3, b1, lsl #7
-	it	eq
-	ldreq	w2, [wp2], #4
-	bne	5f
-	eor	t1, t1, w1
-	cmp	t1, w2, S2HIMEM #16
-	bne	6f
-	ldr	w1, [wp1], #4
-	b	2b
-
-5:
-#ifdef __ARMEB__
-	/* The syndrome value may contain false ones if the string ends
-	 * with the bytes 0x01 0x00
-	 */
-	tst	w1, #0xff000000
-	it	ne
-	tstne	w1, #0x00ff0000
-	beq	7f
-#else
-	lsls	r3, r3, #16
-	bne	7f
-#endif
-	ldrh	w2, [wp2]
-	S2LOMEM	t1, w1, #16
-#ifdef __ARMEB__
-	lsl	w2, w2, #16
-#endif
-	b	8f
-
-6:
-	S2HIMEM	w2, w2, #16
-	S2LOMEM	t1, w1, #16
-4:
-	S2LOMEM	w2, w2, #16
-	b	8f
-
-7:
-	mov	r0, #0
-
-    /* Restore registers and stack. */
-    ldrd    r6, r7, [sp]
-    ldrd    r4, r5, [sp, #8]
-    adds    sp, sp, #16
-    .cfi_def_cfa_offset 0
-    .cfi_restore r4
-    .cfi_restore r5
-    .cfi_restore r6
-    .cfi_restore r7
-
-	bx	lr
-
-8:
-	and	r2, t1, #LSB
-	and	r0, w2, #LSB
-	cmp	r0, #1
-	it	cs
-	cmpcs	r0, r2
-	itt	eq
-	S2LOMEMEQ	t1, t1, #8
-	S2LOMEMEQ	w2, w2, #8
-	beq	8b
-	sub	r0, r2, r0
-
-    /* Restore registers and stack. */
-    ldrd    r6, r7, [sp]
-    ldrd    r4, r5, [sp, #8]
-    adds    sp, sp, #16
-    .cfi_def_cfa_offset 0
-    .cfi_restore r4
-    .cfi_restore r5
-    .cfi_restore r6
-    .cfi_restore r7
-
-	bx	lr
-END(strcmp_krait)
diff --git a/libc/bionic/malloc_common_dynamic.cpp b/libc/bionic/malloc_common_dynamic.cpp
index 599ac6a..8035746 100644
--- a/libc/bionic/malloc_common_dynamic.cpp
+++ b/libc/bionic/malloc_common_dynamic.cpp
@@ -500,3 +500,40 @@
   return HeapprofdMallopt(opcode, arg, arg_size);
 }
 // =============================================================================
+
+#if !defined(__LP64__) && defined(__arm__)
+// =============================================================================
+// Old platform only functions that some old 32 bit apps are still using.
+// See b/132175052.
+// Only compile the functions for 32 bit arm, so that new apps do not use
+// these functions.
+// =============================================================================
+extern "C" void get_malloc_leak_info(uint8_t** info, size_t* overall_size, size_t* info_size,
+                                     size_t* total_memory, size_t* backtrace_size) {
+  if (info == nullptr || overall_size == nullptr || info_size == nullptr ||
+      total_memory == nullptr || backtrace_size == nullptr) {
+    return;
+  }
+
+  *info = nullptr;
+  *overall_size = 0;
+  *info_size = 0;
+  *total_memory = 0;
+  *backtrace_size = 0;
+
+  android_mallopt_leak_info_t leak_info = {};
+  if (android_mallopt(M_GET_MALLOC_LEAK_INFO, &leak_info, sizeof(leak_info))) {
+    *info = leak_info.buffer;
+    *overall_size = leak_info.overall_size;
+    *info_size = leak_info.info_size;
+    *total_memory = leak_info.total_memory;
+    *backtrace_size = leak_info.backtrace_size;
+  }
+}
+
+extern "C" void free_malloc_leak_info(uint8_t* info) {
+  android_mallopt_leak_info_t leak_info = { .buffer = info };
+  android_mallopt(M_FREE_MALLOC_LEAK_INFO, &leak_info, sizeof(leak_info));
+}
+// =============================================================================
+#endif
diff --git a/libc/include/setjmp.h b/libc/include/setjmp.h
index 195f251..67d3c2f 100644
--- a/libc/include/setjmp.h
+++ b/libc/include/setjmp.h
@@ -65,18 +65,18 @@
 
 __BEGIN_DECLS
 
-int _setjmp(jmp_buf __env);
-void _longjmp(jmp_buf __env, int __value);
+int _setjmp(jmp_buf __env) __returns_twice;
+__noreturn void _longjmp(jmp_buf __env, int __value);
 
-int setjmp(jmp_buf __env);
-void longjmp(jmp_buf __env, int __value);
+int setjmp(jmp_buf __env) __returns_twice;
+__noreturn void longjmp(jmp_buf __env, int __value);
 
 #define setjmp(__env) setjmp(__env)
 
-int sigsetjmp(sigjmp_buf __env, int __save_signal_mask)
-    __INTRODUCED_IN_ARM(9) __INTRODUCED_IN_MIPS(12) __INTRODUCED_IN_X86(12);
-void siglongjmp(sigjmp_buf __env, int __value)
-    __INTRODUCED_IN_ARM(9) __INTRODUCED_IN_MIPS(12) __INTRODUCED_IN_X86(12);
+int sigsetjmp(sigjmp_buf __env, int __save_signal_mask) __returns_twice __INTRODUCED_IN_ARM(9)
+    __INTRODUCED_IN_MIPS(12) __INTRODUCED_IN_X86(12);
+__noreturn void siglongjmp(sigjmp_buf __env, int __value) __INTRODUCED_IN_ARM(9)
+    __INTRODUCED_IN_MIPS(12) __INTRODUCED_IN_X86(12);
 
 __END_DECLS
 
diff --git a/libc/include/sys/cdefs.h b/libc/include/sys/cdefs.h
index a919a79..ca9374e 100644
--- a/libc/include/sys/cdefs.h
+++ b/libc/include/sys/cdefs.h
@@ -86,6 +86,7 @@
 #define __noreturn __attribute__((__noreturn__))
 #define __mallocfunc  __attribute__((__malloc__))
 #define __packed __attribute__((__packed__))
+#define __returns_twice __attribute__((__returns_twice__))
 #define __unused __attribute__((__unused__))
 #define __used __attribute__((__used__))
 
diff --git a/libc/include/unistd.h b/libc/include/unistd.h
index 90173aa..9000407 100644
--- a/libc/include/unistd.h
+++ b/libc/include/unistd.h
@@ -78,7 +78,7 @@
 __noreturn void _exit(int __status);
 
 pid_t  fork(void);
-pid_t  vfork(void);
+pid_t  vfork(void) __returns_twice;
 pid_t  getpid(void);
 pid_t  gettid(void) __attribute_const__;
 pid_t  getpgid(pid_t __pid);
diff --git a/libc/libc.map.txt b/libc/libc.map.txt
index 4a734fc..bc26d2a 100644
--- a/libc/libc.map.txt
+++ b/libc/libc.map.txt
@@ -1742,6 +1742,8 @@
 LIBC_DEPRECATED {
   global:
     __system_property_wait_any;
+    free_malloc_leak_info; # arm
+    get_malloc_leak_info; # arm
 };
 
 LIBC_PLATFORM {
diff --git a/linker/Android.bp b/linker/Android.bp
index 23ef5ac..5e7a921 100644
--- a/linker/Android.bp
+++ b/linker/Android.bp
@@ -307,6 +307,11 @@
     xom: false,
 }
 
+sh_binary {
+    name: "ldd",
+    src: "ldd",
+}
+
 cc_library {
     // NOTE: --exclude-libs=libgcc.a makes sure that any symbols ld-android.so pulls from
     // libgcc.a are made static to ld-android.so.  This in turn ensures that libraries that
diff --git a/linker/ldd b/linker/ldd
new file mode 100644
index 0000000..3a0aff9
--- /dev/null
+++ b/linker/ldd
@@ -0,0 +1,23 @@
+#!/system/bin/sh
+
+# Rather than have ldd and ldd64, this script does the right thing depending
+# on the argument.
+
+function error() {
+  echo "$1"
+  exit 1
+}
+
+[ $# -eq 1 ] || error "usage: ldd FILE"
+
+case `file -L "$1"` in
+  *32-bit*)
+    linker --list "$1"
+    ;;
+  *64-bit*)
+    linker64 --list "$1"
+    ;;
+  *)
+    error "$1: not an ELF file"
+    ;;
+esac
diff --git a/linker/linker.cpp b/linker/linker.cpp
index 0de17f7..32dce38 100644
--- a/linker/linker.cpp
+++ b/linker/linker.cpp
@@ -1162,6 +1162,13 @@
     }
   }
 
+#if !defined(__ANDROID_APEX__)
+  if (fd == -1) {
+    std::vector<std::string> bootstrap_paths = { std::string(kSystemLibDir) + "/bootstrap" };
+    fd = open_library_on_paths(zip_archive_cache, name, file_offset, bootstrap_paths, realpath);
+  }
+#endif
+
   if (fd == -1) {
     fd = open_library_on_paths(zip_archive_cache, name, file_offset, ns->get_default_library_paths(), realpath);
   }
@@ -2596,6 +2603,8 @@
 }
 
 ElfW(Addr) call_ifunc_resolver(ElfW(Addr) resolver_addr) {
+  if (g_is_ldd) return 0;
+
   typedef ElfW(Addr) (*ifunc_resolver_t)(void);
   ifunc_resolver_t ifunc_resolver = reinterpret_cast<ifunc_resolver_t>(resolver_addr);
   ElfW(Addr) ifunc_addr = ifunc_resolver();
@@ -3876,6 +3885,11 @@
     return true;
   }
 
+  if (g_is_ldd && !is_main_executable()) {
+    async_safe_format_fd(STDOUT_FILENO, "\t%s => %s (%p)\n", get_soname(),
+                         get_realpath(), reinterpret_cast<void*>(base));
+  }
+
   local_group_root_ = local_group.front();
   if (local_group_root_ == nullptr) {
     local_group_root_ = this;
diff --git a/linker/linker_config.cpp b/linker/linker_config.cpp
index 7741904..46c91a3 100644
--- a/linker/linker_config.cpp
+++ b/linker/linker_config.cpp
@@ -417,9 +417,22 @@
 
     if (resolve) {
       std::vector<std::string> resolved_paths;
-
-      // do not remove paths that do not exist
-      resolve_paths(paths, &resolved_paths);
+      for (const auto& path : paths) {
+        if (path.empty()) {
+          continue;
+        }
+        // this is single threaded. no need to lock
+        auto cached = resolved_paths_.find(path);
+        if (cached == resolved_paths_.end()) {
+          resolved_paths_[path] = resolve_path(path);
+          cached = resolved_paths_.find(path);
+        }
+        CHECK(cached != resolved_paths_.end());
+        if (cached->second.empty()) {
+          continue;
+        }
+        resolved_paths.push_back(cached->second);
+      }
 
       return resolved_paths;
     } else {
@@ -442,6 +455,7 @@
     return it;
   }
   std::unordered_map<std::string, PropertyValue> properties_;
+  std::unordered_map<std::string, std::string> resolved_paths_;
   int target_sdk_version_;
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(Properties);
diff --git a/linker/linker_globals.h b/linker/linker_globals.h
index 32aa09d..de05238 100644
--- a/linker/linker_globals.h
+++ b/linker/linker_globals.h
@@ -87,3 +87,5 @@
  private:
   std::string saved_error_msg_;
 };
+
+__LIBC_HIDDEN__ extern bool g_is_ldd;
diff --git a/linker/linker_main.cpp b/linker/linker_main.cpp
index f6e4f67..f576023 100644
--- a/linker/linker_main.cpp
+++ b/linker/linker_main.cpp
@@ -117,6 +117,7 @@
   return vdso;
 }
 
+bool g_is_ldd;
 int g_ld_debug_verbosity;
 
 static std::vector<std::string> g_ld_preload_names;
@@ -397,7 +398,7 @@
                          "\"%s\": error: Android 5.0 and later only support "
                          "position-independent executables (-fPIE).\n",
                          g_argv[0]);
-    exit(EXIT_FAILURE);
+    _exit(EXIT_FAILURE);
   }
 
   // Use LD_LIBRARY_PATH and LD_PRELOAD (but only if we aren't setuid/setgid).
@@ -660,22 +661,29 @@
   // linker's _start.
   const char* exe_to_load = nullptr;
   if (getauxval(AT_ENTRY) == reinterpret_cast<uintptr_t>(&_start)) {
-    if (args.argc <= 1 || !strcmp(args.argv[1], "--help")) {
+    if (args.argc == 3 && !strcmp(args.argv[1], "--list")) {
+      // We're being asked to behave like ldd(1).
+      g_is_ldd = true;
+      exe_to_load = args.argv[2];
+    } else if (args.argc <= 1 || !strcmp(args.argv[1], "--help")) {
       async_safe_format_fd(STDOUT_FILENO,
-         "Usage: %s program [arguments...]\n"
-         "       %s path.zip!/program [arguments...]\n"
+         "Usage: %s [--list] PROGRAM [ARGS-FOR-PROGRAM...]\n"
+         "       %s [--list] path.zip!/PROGRAM [ARGS-FOR-PROGRAM...]\n"
          "\n"
          "A helper program for linking dynamic executables. Typically, the kernel loads\n"
          "this program because it's the PT_INTERP of a dynamic executable.\n"
          "\n"
          "This program can also be run directly to load and run a dynamic executable. The\n"
          "executable can be inside a zip file if it's stored uncompressed and at a\n"
-         "page-aligned offset.\n",
+         "page-aligned offset.\n"
+         "\n"
+         "The --list option gives behavior equivalent to ldd(1) on other systems.\n",
          args.argv[0], args.argv[0]);
-      exit(0);
+      _exit(EXIT_SUCCESS);
+    } else {
+      exe_to_load = args.argv[1];
+      __libc_shared_globals()->initial_linker_arg_count = 1;
     }
-    exe_to_load = args.argv[1];
-    __libc_shared_globals()->initial_linker_arg_count = 1;
   }
 
   // store argc/argv/envp to use them for calling constructors
@@ -693,6 +701,8 @@
 
   ElfW(Addr) start_address = linker_main(args, exe_to_load);
 
+  if (g_is_ldd) _exit(EXIT_SUCCESS);
+
   INFO("[ Jumping to _start (%p)... ]", reinterpret_cast<void*>(start_address));
 
   // Return the address that the calling assembly stub should jump to.
diff --git a/linker/linker_soinfo.cpp b/linker/linker_soinfo.cpp
index 31ee74c..5f40528 100644
--- a/linker/linker_soinfo.cpp
+++ b/linker/linker_soinfo.cpp
@@ -391,13 +391,15 @@
 }
 
 void soinfo::call_pre_init_constructors() {
+  if (g_is_ldd) return;
+
   // DT_PREINIT_ARRAY functions are called before any other constructors for executables,
   // but ignored in a shared library.
   call_array("DT_PREINIT_ARRAY", preinit_array_, preinit_array_count_, false, get_realpath());
 }
 
 void soinfo::call_constructors() {
-  if (constructors_called) {
+  if (constructors_called || g_is_ldd) {
     return;
   }
 
diff --git a/linker/linker_utils.cpp b/linker/linker_utils.cpp
index e926671..29110ed 100644
--- a/linker/linker_utils.cpp
+++ b/linker/linker_utils.cpp
@@ -204,47 +204,54 @@
     if (path.empty()) {
       continue;
     }
+    std::string resolved = resolve_path(path);
+    if (!resolved.empty()) {
+      resolved_paths->push_back(std::move(resolved));
+    }
+  }
+}
 
-    char resolved_path[PATH_MAX];
-    const char* original_path = path.c_str();
-    if (realpath(original_path, resolved_path) != nullptr) {
-      struct stat s;
-      if (stat(resolved_path, &s) == -1) {
-        DL_WARN("Warning: cannot stat file \"%s\": %s (ignoring)", resolved_path, strerror(errno));
-        continue;
+std::string resolve_path(const std::string& path) {
+  char resolved_path[PATH_MAX];
+  const char* original_path = path.c_str();
+  if (realpath(original_path, resolved_path) != nullptr) {
+    struct stat s;
+    if (stat(resolved_path, &s) == -1) {
+      DL_WARN("Warning: cannot stat file \"%s\": %s (ignoring)", resolved_path, strerror(errno));
+      return "";
+    }
+    if (!S_ISDIR(s.st_mode)) {
+      DL_WARN("Warning: \"%s\" is not a directory (ignoring)", resolved_path);
+      return "";
+    }
+    return resolved_path;
+  } else {
+    std::string normalized_path;
+    if (!normalize_path(original_path, &normalized_path)) {
+      DL_WARN("Warning: unable to normalize \"%s\" (ignoring)", original_path);
+      return "";
+    }
+
+    std::string zip_path;
+    std::string entry_path;
+    if (parse_zip_path(normalized_path.c_str(), &zip_path, &entry_path)) {
+      if (realpath(zip_path.c_str(), resolved_path) == nullptr) {
+        DL_WARN("Warning: unable to resolve \"%s\": %s (ignoring)",
+                zip_path.c_str(), strerror(errno));
+        return "";
       }
-      if (!S_ISDIR(s.st_mode)) {
-        DL_WARN("Warning: \"%s\" is not a directory (ignoring)", resolved_path);
-        continue;
-      }
-      resolved_paths->push_back(resolved_path);
+
+      return std::string(resolved_path) + kZipFileSeparator + entry_path;
     } else {
-      std::string normalized_path;
-      if (!normalize_path(original_path, &normalized_path)) {
-        DL_WARN("Warning: unable to normalize \"%s\" (ignoring)", original_path);
-        continue;
-      }
-
-      std::string zip_path;
-      std::string entry_path;
-      if (parse_zip_path(normalized_path.c_str(), &zip_path, &entry_path)) {
-        if (realpath(zip_path.c_str(), resolved_path) == nullptr) {
-          DL_WARN("Warning: unable to resolve \"%s\": %s (ignoring)",
-                  zip_path.c_str(), strerror(errno));
-          continue;
-        }
-
-        resolved_paths->push_back(std::string(resolved_path) + kZipFileSeparator + entry_path);
-      } else {
-        struct stat s;
-        if (stat(normalized_path.c_str(), &s) == 0 && S_ISDIR(s.st_mode)) {
-          // Path is not a zip path, but an existing directory. Then add it
-          // although we failed to resolve it. b/119656753
-          resolved_paths->push_back(normalized_path);
-        }
+      struct stat s;
+      if (stat(normalized_path.c_str(), &s) == 0 && S_ISDIR(s.st_mode)) {
+        // Path is not a zip path, but an existing directory. Then add it
+        // although we failed to resolve it. b/119656753
+        return normalized_path;
       }
     }
   }
+  return "";
 }
 
 bool is_first_stage_init() {
diff --git a/linker/linker_utils.h b/linker/linker_utils.h
index 34a597b..5073b10 100644
--- a/linker/linker_utils.h
+++ b/linker/linker_utils.h
@@ -47,6 +47,9 @@
 //    normalizes entry name by calling normalize_path function.
 void resolve_paths(std::vector<std::string>& paths,
                    std::vector<std::string>* resolved_paths);
+// Resolve a single path. Return empty string when the path is invalid or can't
+// be resolved.
+std::string resolve_path(const std::string& path);
 
 void split_path(const char* path, const char* delimiters, std::vector<std::string>* paths);
 
diff --git a/tests/Android.bp b/tests/Android.bp
index 85bb29a..d65780c 100644
--- a/tests/Android.bp
+++ b/tests/Android.bp
@@ -288,6 +288,7 @@
         // enabled. Since the intent is just to build this, we can get away with
         // passing this flag on its own.
         "-fsanitize=address",
+        "-Wno-memset-transposed-args",
     ],
     // Ignore that we don't have ASAN symbols linked in.
     allow_undefined_symbols: true,
@@ -304,6 +305,7 @@
         "-Werror",
         "-D_FORTIFY_SOURCE=2",
         "-D__clang_analyzer__",
+        "-Wno-memset-transposed-args",
     ],
     srcs: ["fortify_filecheck_diagnostics_test.cpp"],
 }
diff --git a/tests/Android.mk b/tests/Android.mk
index fc7b940..848d291 100644
--- a/tests/Android.mk
+++ b/tests/Android.mk
@@ -65,18 +65,15 @@
 include $(CLEAR_VARS)
 LOCAL_ADDITIONAL_DEPENDENCIES := \
     $(LOCAL_PATH)/Android.mk \
-    $(LOCAL_PATH)/file-check-cxx \
-    | $(HOST_OUT_EXECUTABLES)/FileCheck$(HOST_EXECUTABLE_SUFFIX) \
+    $(LOCAL_PATH)/touch-obj-on-success
 
-LOCAL_CXX := $(LOCAL_PATH)/file-check-cxx \
-    $(HOST_OUT_EXECUTABLES)/FileCheck \
+LOCAL_CXX := $(LOCAL_PATH)/touch-obj-on-success \
     $(LLVM_PREBUILTS_PATH)/clang++ \
-    CLANG \
 
 LOCAL_CLANG := true
 LOCAL_MODULE := bionic-compile-time-tests-clang++
-LOCAL_CPPFLAGS := -Wall -Werror
-LOCAL_CPPFLAGS += -fno-color-diagnostics -ferror-limit=10000
+LOCAL_CPPFLAGS := -Wall -Wno-error
+LOCAL_CPPFLAGS += -fno-color-diagnostics -ferror-limit=10000 -Xclang -verify
 LOCAL_SRC_FILES := fortify_filecheck_diagnostics_test.cpp
 include $(BUILD_STATIC_LIBRARY)
 
diff --git a/tests/dlfcn_test.cpp b/tests/dlfcn_test.cpp
index c6d7ddb..d815dba 100644
--- a/tests/dlfcn_test.cpp
+++ b/tests/dlfcn_test.cpp
@@ -999,6 +999,7 @@
 #error "Unknown architecture"
 #endif
 #define PATH_TO_LIBC PATH_TO_SYSTEM_LIB "libc.so"
+#define PATH_TO_BOOTSTRAP_LIBC PATH_TO_SYSTEM_LIB "bootstrap/libc.so"
 #define ALTERNATE_PATH_TO_LIBC ALTERNATE_PATH_TO_SYSTEM_LIB "libc.so"
 
 TEST(dlfcn, dladdr_libc) {
@@ -1018,6 +1019,9 @@
               sizeof(ALTERNATE_PATH_TO_SYSTEM_LIB) - 1) == 0) {
     // Platform with emulated architecture.  Symlink on ARC++.
     ASSERT_TRUE(realpath(ALTERNATE_PATH_TO_LIBC, libc_realpath) == libc_realpath);
+  } else if (strncmp(PATH_TO_BOOTSTRAP_LIBC, info.dli_fname,
+                     sizeof(PATH_TO_BOOTSTRAP_LIBC) - 1) == 0) {
+    ASSERT_TRUE(realpath(PATH_TO_BOOTSTRAP_LIBC, libc_realpath) == libc_realpath);
   } else {
     // /system/lib is symlink when this test is executed on host.
     ASSERT_TRUE(realpath(PATH_TO_LIBC, libc_realpath) == libc_realpath);
diff --git a/tests/file-check-cxx b/tests/file-check-cxx
deleted file mode 100755
index d3bc5f7..0000000
--- a/tests/file-check-cxx
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/bin/bash
-FILECHECK=$1
-CXX=$2
-PREFIX=$3
-shift 3
-SOURCE=$(echo "$@" | grep -oP '\S+\.cpp\b')
-OBJ=$(echo "$@" | grep -oP '\S+\.o\b')
-$CXX "$@" -Wno-error 2>&1 | $FILECHECK -check-prefix=$PREFIX $SOURCE
-if [ "$?" -eq 0 ]; then
-  touch $OBJ
-else
-  exit 1
-fi
diff --git a/tests/fortify_filecheck_diagnostics_test.cpp b/tests/fortify_filecheck_diagnostics_test.cpp
index e79a9a6..ac9853c 100644
--- a/tests/fortify_filecheck_diagnostics_test.cpp
+++ b/tests/fortify_filecheck_diagnostics_test.cpp
@@ -15,19 +15,14 @@
  */
 
 /*
- * If this test fails, you can see the compiler's output by erasing a few args from the failing
- * command. Specifically, delete everything before the path/to/the/compiler, then delete the first
- * arg after the path/to/the/compiler. For example, given the following command:
- *
- * bionic/tests/file-check-cxx out/host/linux-x86/bin/FileCheck \
- * prebuilts/clang/host/linux-x86/clang-4053586/bin/clang++ CLANG    -I bionic/tests -I ...
- *
- * If you delete everything before clang++ and delete "CLANG", then you'll end up with:
- *
- * prebuilts/clang/host/linux-x86/clang-4053586/bin/clang++ -I bionic/tests -I ...
- *
- * Which is the command that FileCheck executes.
+ * Silence all notes about enable_if-related 'candidates'; they're nice to know
+ * about for users, but this test doesn't care.
  */
+// expected-note@* 0+{{candidate function}}
+
+/* Similarly, ignore all "from 'diagnose_if'"s. */
+// expected-note@* 0+{{from 'diagnose_if'}}
+
 
 #undef _FORTIFY_SOURCE
 #define _FORTIFY_SOURCE 2
@@ -43,11 +38,15 @@
 #include <time.h>
 #include <unistd.h>
 
+#if !defined(__BIONIC__)
+#  error "This only works with Bionic."
+#endif
+
 void test_sprintf() {
   char buf[4];
 
   // NOLINTNEXTLINE(whitespace/line_length)
-  // CLANG: error: call to unavailable function 'sprintf': format string will always overflow destination buffer
+  // expected-error@+1{{call to unavailable function 'sprintf': format string will always overflow destination buffer}}
   sprintf(buf, "foobar");  // NOLINT(runtime/printf)
 
   // TODO: clang should emit a warning, but doesn't
@@ -58,7 +57,7 @@
   char buf[4];
 
   // NOLINTNEXTLINE(whitespace/line_length)
-  // CLANG: error: call to unavailable function 'snprintf': format string will always overflow destination buffer
+  // expected-error@+1{{call to unavailable function 'snprintf': format string will always overflow destination buffer}}
   snprintf(buf, 5, "foobar");  // NOLINT(runtime/printf)
 
   // TODO: clang should emit a warning, but doesn't
@@ -74,41 +73,41 @@
 void test_memcpy() {
   char buf[4];
 
-  // CLANG: error: 'memcpy' called with size bigger than buffer
+  // expected-error@+1{{'memcpy' called with size bigger than buffer}}
   memcpy(buf, "foobar", sizeof("foobar") + 100);
 }
 
 void test_memmove() {
   char buf[4];
 
-  // CLANG: error: 'memmove' called with size bigger than buffer
+  // expected-error@+1{{'memmove' called with size bigger than buffer}}
   memmove(buf, "foobar", sizeof("foobar"));
 }
 
 void test_memset() {
   char buf[4];
 
-  // CLANG: error: 'memset' called with size bigger than buffer
+  // expected-error@+1{{'memset' called with size bigger than buffer}}
   memset(buf, 0, 6);
 }
 
 void test_strcpy() {
   char buf[4];
 
-  // CLANG: error: 'strcpy' called with string bigger than buffer
+  // expected-error@+1{{'strcpy' called with string bigger than buffer}}
   strcpy(buf, "foobar");  // NOLINT(runtime/printf)
 
-  // CLANG: error: 'strcpy' called with string bigger than buffer
+  // expected-error@+1{{'strcpy' called with string bigger than buffer}}
   strcpy(buf, "quux");
 }
 
 void test_stpcpy() {
   char buf[4];
 
-  // CLANG: error: 'stpcpy' called with string bigger than buffer
+  // expected-error@+1{{'stpcpy' called with string bigger than buffer}}
   stpcpy(buf, "foobar");
 
-  // CLANG: error: 'stpcpy' called with string bigger than buffer
+  // expected-error@+1{{'stpcpy' called with string bigger than buffer}}
   stpcpy(buf, "quux");
 }
 
@@ -157,10 +156,10 @@
 void test_fgets() {
   char buf[4];
 
-  // CLANG: error: in call to 'fgets', size should not be negative
+  // expected-error@+1{{in call to 'fgets', size should not be negative}}
   fgets(buf, -1, stdin);
 
-  // CLANG: error: in call to 'fgets', size is larger than the destination buffer
+  // expected-error@+1{{in call to 'fgets', size is larger than the destination buffer}}
   fgets(buf, 6, stdin);
 }
 
@@ -168,58 +167,58 @@
   char buf[4];
   sockaddr_in addr;
 
-  // CLANG: error: 'recvfrom' called with size bigger than buffer
+  // expected-error@+1{{'recvfrom' called with size bigger than buffer}}
   recvfrom(0, buf, 6, 0, reinterpret_cast<sockaddr*>(&addr), nullptr);
 }
 
 void test_recv() {
   char buf[4] = {0};
 
-  // CLANG: error: 'recv' called with size bigger than buffer
+  // expected-error@+1{{'recv' called with size bigger than buffer}}
   recv(0, buf, 6, 0);
 }
 
 void test_umask() {
-  // CLANG: error: 'umask' called with invalid mode
+  // expected-error@+1{{'umask' called with invalid mode}}
   umask(01777);
 }
 
 void test_read() {
   char buf[4];
-  // CLANG: error: in call to 'read', 'count' bytes overflows the given object
+  // expected-error@+1{{in call to 'read', 'count' bytes overflows the given object}}
   read(0, buf, 6);
 }
 
 void test_open() {
-  // CLANG: error: 'open' called with O_CREAT or O_TMPFILE, but missing mode
+  // expected-error@+1{{'open' called with O_CREAT or O_TMPFILE, but missing mode}}
   open("/dev/null", O_CREAT);
 
-  // CLANG: error: 'open' called with O_CREAT or O_TMPFILE, but missing mode
+  // expected-error@+1{{'open' called with O_CREAT or O_TMPFILE, but missing mode}}
   open("/dev/null", O_TMPFILE);
 
-  // CLANG: error: call to unavailable function 'open': too many arguments
+  // expected-error@+1{{call to unavailable function 'open': too many arguments}}
   open("/dev/null", O_CREAT, 0, 0);
 
-  // CLANG: error: call to unavailable function 'open': too many arguments
+  // expected-error@+1{{call to unavailable function 'open': too many arguments}}
   open("/dev/null", O_TMPFILE, 0, 0);
 
-  // CLANG: warning: 'open' has superfluous mode bits; missing O_CREAT?
+  // expected-warning@+1{{'open' has superfluous mode bits; missing O_CREAT?}}
   open("/dev/null", O_RDONLY, 0644);
 
-  // CLANG: warning: 'open' has superfluous mode bits; missing O_CREAT?
+  // expected-warning@+1{{'open' has superfluous mode bits; missing O_CREAT?}}
   open("/dev/null", O_DIRECTORY, 0644);
 }
 
 void test_poll() {
   pollfd fds[1];
-  // CLANG: error: in call to 'poll', fd_count is larger than the given buffer
+  // expected-error@+1{{in call to 'poll', fd_count is larger than the given buffer}}
   poll(fds, 2, 0);
 }
 
 void test_ppoll() {
   pollfd fds[1];
   timespec timeout;
-  // CLANG: error: in call to 'ppoll', fd_count is larger than the given buffer
+  // expected-error@+1{{in call to 'ppoll', fd_count is larger than the given buffer}}
   ppoll(fds, 2, &timeout, nullptr);
 }
 
@@ -227,101 +226,98 @@
   pollfd fds[1];
   timespec timeout;
   // NOLINTNEXTLINE(whitespace/line_length)
-  // CLANG: error: in call to 'ppoll64', fd_count is larger than the given buffer
+  // expected-error@+1{{in call to 'ppoll64', fd_count is larger than the given buffer}}
   ppoll64(fds, 2, &timeout, nullptr);
 }
 
 void test_fread_overflow() {
   char buf[4];
-  // CLANG: error: in call to 'fread', size * count overflows
+  // expected-error@+1{{in call to 'fread', size * count overflows}}
   fread(buf, 2, (size_t)-1, stdin);
 }
 
 void test_fread_too_big() {
   char buf[4];
   // NOLINTNEXTLINE(whitespace/line_length)
-  // CLANG: error: in call to 'fread', size * count is too large for the given buffer
+  // expected-error@+1{{in call to 'fread', size * count is too large for the given buffer}}
   fread(buf, 1, 5, stdin);
 }
 
 void test_fwrite_overflow() {
   char buf[4] = {0};
-  // CLANG: error: in call to 'fwrite', size * count overflows
+  // expected-error@+1{{in call to 'fwrite', size * count overflows}}
   fwrite(buf, 2, (size_t)-1, stdout);
 }
 
 void test_fwrite_too_big() {
   char buf[4] = {0};
   // NOLINTNEXTLINE(whitespace/line_length)
-  // CLANG: error: in call to 'fwrite', size * count is too large for the given buffer
+  // expected-error@+1{{in call to 'fwrite', size * count is too large for the given buffer}}
   fwrite(buf, 1, 5, stdout);
 }
 
 void test_getcwd() {
   char buf[4];
-  // CLANG: error: in call to 'getcwd', 'size' bytes overflows the given object
+  // expected-error@+1{{in call to 'getcwd', 'size' bytes overflows the given object}}
   getcwd(buf, 5);
 }
 
 void test_pwrite64_size() {
   char buf[4] = {0};
-  // CLANG: error: in call to 'pwrite64', 'count' bytes overflows the given object
+  // expected-error@+1{{in call to 'pwrite64', 'count' bytes overflows the given object}}
   pwrite64(STDOUT_FILENO, buf, 5, 0);
 }
 
 void test_pwrite64_too_big_malloc() {
   void *buf = calloc(atoi("5"), 1);
-  // clang should emit a warning, but probably never will.
+  // expected-error@+1{{in call to 'pwrite64', 'count' must be <= SSIZE_MAX}}
   pwrite64(STDOUT_FILENO, buf, SIZE_MAX, 0);
 }
 
 void test_pwrite64_too_big() {
   char buf[4] = {0};
-  // CLANG: error: in call to 'pwrite64', 'count' must be <= SSIZE_MAX
+  // expected-error@+1{{in call to 'pwrite64', 'count' must be <= SSIZE_MAX}}
   pwrite64(STDOUT_FILENO, buf, SIZE_MAX, 0);
 }
 
 void test_write_size() {
   char buf[4] = {0};
-  // CLANG: error: in call to 'write', 'count' bytes overflows the given object
+  // expected-error@+1{{in call to 'write', 'count' bytes overflows the given object}}
   write(STDOUT_FILENO, buf, 5);
 }
 
 void test_memset_args_flipped() {
   char from[4] = {0};
   // NOLINTNEXTLINE(whitespace/line_length)
-  // CLANG: 'memset' will set 0 bytes; maybe the arguments got flipped?
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wmemset-transposed-args"
+  // expected-warning@+1{{'memset' will set 0 bytes; maybe the arguments got flipped?}}
   memset(from, sizeof(from), 0);
-#pragma clang diagnostic pop
 }
 
 void test_sendto() {
   char buf[4] = {0};
   sockaddr_in addr;
 
-  // CLANG: error: 'sendto' called with size bigger than buffer
+  // expected-error@+1{{'sendto' called with size bigger than buffer}}
   sendto(0, buf, 6, 0, reinterpret_cast<sockaddr*>(&addr), sizeof(sockaddr_in));
 }
 
 void test_send() {
   char buf[4] = {0};
 
-  // CLANG: error: 'send' called with size bigger than buffer
+  // expected-error@+1{{'send' called with size bigger than buffer}}
   send(0, buf, 6, 0);
 }
 
 void test_realpath() {
   char buf[4] = {0};
   // NOLINTNEXTLINE(whitespace/line_length)
-  // CLANG: error: 'realpath' output parameter must be NULL or a pointer to a buffer with >= PATH_MAX bytes
+  // expected-error@+1{{'realpath' output parameter must be NULL or a pointer to a buffer with >= PATH_MAX bytes}}
   realpath(".", buf);
 
   // This is fine.
   realpath(".", nullptr);
 
   char bigbuf[PATH_MAX];
-  // CLANG: error: 'realpath': NULL path is never correct; flipped arguments?
+  // expected-error@+1{{'realpath': NULL path is never correct; flipped arguments?}}
   realpath(nullptr, bigbuf);
 }
diff --git a/tests/leak_test.cpp b/tests/leak_test.cpp
index 1fa9e56..6005209 100644
--- a/tests/leak_test.cpp
+++ b/tests/leak_test.cpp
@@ -124,17 +124,22 @@
 // http://b/36045112
 TEST(pthread_leak, detach) {
   LeakChecker lc;
+  constexpr int kThreadCount = 100;
 
-  for (size_t pass = 0; pass < 2; ++pass) {
-    constexpr int kThreadCount = 100;
+  // Devices with low power cores/low number of cores can not finish test in time hence decreasing
+  // threads count to 90.
+  // http://b/129924384.
+  int threads_count = (sysconf(_SC_NPROCESSORS_CONF) > 2) ? kThreadCount : (kThreadCount - 10);
+
+  for (size_t pass = 0; pass < 1; ++pass) {
     struct thread_data { pthread_barrier_t* barrier; pid_t* tid; } threads[kThreadCount] = {};
 
     pthread_barrier_t barrier;
-    ASSERT_EQ(pthread_barrier_init(&barrier, nullptr, kThreadCount + 1), 0);
+    ASSERT_EQ(pthread_barrier_init(&barrier, nullptr, threads_count + 1), 0);
 
     // Start child threads.
     pid_t tids[kThreadCount];
-    for (int i = 0; i < kThreadCount; ++i) {
+    for (int i = 0; i < threads_count; ++i) {
       threads[i] = {&barrier, &tids[i]};
       const auto thread_function = +[](void* ptr) -> void* {
         thread_data* data = static_cast<thread_data*>(ptr);
diff --git a/tests/libs/bionic_tests_zipalign.cpp b/tests/libs/bionic_tests_zipalign.cpp
index 56183ba..ec500d4 100644
--- a/tests/libs/bionic_tests_zipalign.cpp
+++ b/tests/libs/bionic_tests_zipalign.cpp
@@ -41,7 +41,7 @@
 
 static bool GetEntries(ZipArchiveHandle handle, std::vector<ZipData>* entries) {
   void* cookie;
-  int32_t return_value = StartIteration(handle, &cookie, nullptr, nullptr);
+  int32_t return_value = StartIteration(handle, &cookie);
   if (return_value != 0) {
     fprintf(stderr, "Unable to iterate over entries: %s\n", ErrorCodeString(return_value));
     return false;
diff --git a/tests/stack_unwinding_test.cpp b/tests/stack_unwinding_test.cpp
index 0ff6f30..e620ecd 100644
--- a/tests/stack_unwinding_test.cpp
+++ b/tests/stack_unwinding_test.cpp
@@ -112,6 +112,10 @@
 }
 
 TEST(stack_unwinding, unwind_through_signal_frame) {
+#if defined(__i386__)
+  GTEST_SKIP() << "Temporarily skip test since it fails on x86 see b/132763120.";
+#endif
+
   ScopedSignalHandler ssh(SIGUSR1, UnwindSignalHandler);
 
   UnwindTest();
@@ -119,6 +123,10 @@
 
 // On LP32, the SA_SIGINFO flag gets you __restore_rt instead of __restore.
 TEST(stack_unwinding, unwind_through_signal_frame_SA_SIGINFO) {
+#if defined(__i386__)
+  GTEST_SKIP() << "Temporarily skip test since it fails on x86 see b/132763120.";
+#endif
+
   ScopedSignalHandler ssh(SIGUSR1, UnwindSignalHandler, SA_SIGINFO);
 
   UnwindTest();
diff --git a/tests/touch-obj-on-success b/tests/touch-obj-on-success
new file mode 100755
index 0000000..df08a49
--- /dev/null
+++ b/tests/touch-obj-on-success
@@ -0,0 +1,8 @@
+#!/bin/bash -eu
+#
+# Runs the given C/C++ compile-ish command. On success, scrapes an object file
+# from that command line and touches it.
+
+"$@"
+obj="$(echo "$@" | grep -oP '\S+\.o\b')"
+touch "${obj}"