Merge "Add bootstrap directory to bootstrap linker's search path."
diff --git a/libc/Android.bp b/libc/Android.bp
index fa1eab6..1da7262 100644
--- a/libc/Android.bp
+++ b/libc/Android.bp
@@ -76,6 +76,7 @@
     },
     native_coverage: false,
     recovery_available: true,
+    native_bridge_supported: true,
 
     // lld complains about duplicate symbols in libcrt and libgcc. Suppress the
     // warning since this is intended right now.
@@ -789,13 +790,11 @@
                 "arch-arm/cortex-a9/bionic/memset.S",
                 "arch-arm/cortex-a9/bionic/stpcpy.S",
                 "arch-arm/cortex-a9/bionic/strcat.S",
-                "arch-arm/cortex-a9/bionic/strcmp.S",
                 "arch-arm/cortex-a9/bionic/strcpy.S",
                 "arch-arm/cortex-a9/bionic/strlen.S",
 
                 "arch-arm/krait/bionic/memcpy.S",
                 "arch-arm/krait/bionic/memset.S",
-                "arch-arm/krait/bionic/strcmp.S",
 
                 "arch-arm/cortex-a53/bionic/memcpy.S",
 
@@ -1692,6 +1691,7 @@
     host_supported: true,
     vendor_available: true,
     recovery_available: true,
+    native_bridge_supported: true,
 
     no_libcrt: true,
     no_libgcc: true,
@@ -1824,6 +1824,7 @@
     defaults: ["linux_bionic_supported"],
     vendor_available: true,
     recovery_available: true,
+    native_bridge_supported: true,
 
     cflags: [
         "-Wno-gcc-compat",
@@ -2082,6 +2083,7 @@
 
 ndk_library {
     name: "libc",
+    native_bridge_supported: true,
     symbol_file: "libc.map.txt",
     first_version: "9",
 }
@@ -2091,6 +2093,7 @@
     symbol_file: "libc.map.txt",
     export_headers_as_system: true,
     export_preprocessed_headers: ["include"],
+    native_bridge_supported: true,
     export_include_dirs: [
         "kernel/android/uapi",
         "kernel/uapi",
diff --git a/libc/NOTICE b/libc/NOTICE
index 298901f..ef31733 100644
--- a/libc/NOTICE
+++ b/libc/NOTICE
@@ -5330,6 +5330,34 @@
 
 -------------------------------------------------------------------
 
+Copyright (c) 2012-2014 ARM Ltd
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+3. The name of the company may not be used to endorse or promote
+   products derived from this software without specific prior written
+   permission.
+
+THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+-------------------------------------------------------------------
+
 Copyright (c) 2013
      MIPS Technologies, Inc., California.
 
diff --git a/libc/arch-arm/cortex-a15/bionic/strcmp.S b/libc/arch-arm/cortex-a15/bionic/strcmp.S
index 58dbf17..467201b 100644
--- a/libc/arch-arm/cortex-a15/bionic/strcmp.S
+++ b/libc/arch-arm/cortex-a15/bionic/strcmp.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013 ARM Ltd
+ * Copyright (c) 2012-2014 ARM Ltd
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -29,9 +29,9 @@
 #include <private/bionic_asm.h>
 
 #ifdef __ARMEB__
-#define S2LOMEM lsl
-#define S2LOMEMEQ lsleq
-#define S2HIMEM lsr
+#define S2LO lsl
+#define S2LOEQ lsleq
+#define S2HI lsr
 #define MSB 0x000000ff
 #define LSB 0xff000000
 #define BYTE0_OFFSET 24
@@ -39,9 +39,9 @@
 #define BYTE2_OFFSET 8
 #define BYTE3_OFFSET 0
 #else /* not  __ARMEB__ */
-#define S2LOMEM lsr
-#define S2LOMEMEQ lsreq
-#define S2HIMEM lsl
+#define S2LO lsr
+#define S2LOEQ lsreq
+#define S2HI lsl
 #define BYTE0_OFFSET 0
 #define BYTE1_OFFSET 8
 #define BYTE2_OFFSET 16
@@ -50,330 +50,446 @@
 #define LSB 0x000000ff
 #endif /* not  __ARMEB__ */
 
-.syntax         unified
+/* Parameters and result.  */
+#define src1		r0
+#define src2		r1
+#define result		r0	/* Overlaps src1.  */
 
-#if defined (__thumb__)
-        .thumb
-        .thumb_func
-#endif
+/* Internal variables.  */
+#define tmp1		r4
+#define tmp2		r5
+#define const_m1	r12
+
+/* Additional internal variables for 64-bit aligned data.  */
+#define data1a		r2
+#define data1b		r3
+#define data2a		r6
+#define data2b		r7
+#define syndrome_a	tmp1
+#define syndrome_b	tmp2
+
+/* Additional internal variables for 32-bit aligned data.  */
+#define data1		r2
+#define data2		r3
+#define syndrome	tmp2
+
+	/* Implementation of strcmp for ARMv7 when DSP instructions are
+	   available.  Use ldrd to support wider loads, provided the data
+	   is sufficiently aligned.  Use saturating arithmetic to optimize
+	   the compares.  */
+
+	/* Build Options:
+	   STRCMP_NO_PRECHECK: Don't run a quick pre-check of the first
+	   byte in the string.  If comparing completely random strings
+	   the pre-check will save time, since there is a very high
+	   probability of a mismatch in the first character: we save
+	   significant overhead if this is the common case.  However,
+	   if strings are likely to be identical (eg because we're
+	   verifying a hit in a hash table), then this check is largely
+	   redundant.  */
+
+
+.syntax         unified
+.thumb
 
         // To avoid warning about deprecated instructions, add an explicit
         // arch. The code generated is exactly the same.
         .arch armv7-a
 
+	/* Macro to compute and return the result value for word-aligned
+	   cases.  */
+	.macro strcmp_epilogue_aligned synd d1 d2 restore_r6
+#ifdef __ARM_BIG_ENDIAN
+	/* If data1 contains a zero byte, then syndrome will contain a 1 in
+	   bit 7 of that byte.  Otherwise, the highest set bit in the
+	   syndrome will highlight the first different bit.  It is therefore
+	   sufficient to extract the eight bits starting with the syndrome
+	   bit.  */
+	clz	tmp1, \synd
+	lsl	r1, \d2, tmp1
+	.if \restore_r6
+	ldrd	r6, r7, [sp, #8]
+	.endif
+	.cfi_restore 6
+	.cfi_restore 7
+	lsl	\d1, \d1, tmp1
+	.cfi_remember_state
+	lsr	result, \d1, #24
+	ldrd	r4, r5, [sp], #16
+	.cfi_restore 4
+	.cfi_restore 5
+	sub	result, result, r1, lsr #24
+	bx	lr
+#else
+	/* To use the big-endian trick we'd have to reverse all three words.
+	   that's slower than this approach.  */
+	rev	\synd, \synd
+	clz	tmp1, \synd
+	bic	tmp1, tmp1, #7
+	lsr	r1, \d2, tmp1
+	.cfi_remember_state
+	.if \restore_r6
+	ldrd	r6, r7, [sp, #8]
+	.endif
+	.cfi_restore 6
+	.cfi_restore 7
+	lsr	\d1, \d1, tmp1
+	and	result, \d1, #255
+	and	r1, r1, #255
+	ldrd	r4, r5, [sp], #16
+	.cfi_restore 4
+	.cfi_restore 5
+	sub	result, result, r1
+
+	bx	lr
+#endif
+	.endm
+
+	.text
+	.p2align	5
+.Lstrcmp_start_addr:
+#ifndef STRCMP_NO_PRECHECK
+.Lfastpath_exit:
+	sub	r0, r2, r3
+	bx	lr
+	nop
+#endif
+
 ENTRY(strcmp_a15)
-      /* Use LDRD whenever possible.  */
+#ifndef STRCMP_NO_PRECHECK
+	ldrb	r2, [src1]
+	ldrb	r3, [src2]
+	cmp	r2, #1
+	it	cs
+	cmpcs	r2, r3
+	bne	.Lfastpath_exit
+#endif
+	.cfi_sections .debug_frame
+	strd	r4, r5, [sp, #-16]!
+	.cfi_def_cfa_offset 16
+	.cfi_offset 4, -16
+	.cfi_offset 5, -12
+	orr	tmp1, src1, src2
+	strd	r6, r7, [sp, #8]
+	.cfi_offset 6, -8
+	.cfi_offset 7, -4
+	mvn	const_m1, #0
+	lsl	r2, tmp1, #29
+	cbz	r2, .Lloop_aligned8
 
-/* The main thing to look out for when comparing large blocks is that
-   the loads do not cross a page boundary when loading past the index
-   of the byte with the first difference or the first string-terminator.
+.Lnot_aligned:
+	eor	tmp1, src1, src2
+	tst	tmp1, #7
+	bne	.Lmisaligned8
 
-   For example, if the strings are identical and the string-terminator
-   is at index k, byte by byte comparison will not load beyond address
-   s1+k and s2+k; word by word comparison may load up to 3 bytes beyond
-   k; double word - up to 7 bytes.  If the load of these bytes crosses
-   a page boundary, it might cause a memory fault (if the page is not mapped)
-   that would not have happened in byte by byte comparison.
+	/* Deal with mutual misalignment by aligning downwards and then
+	   masking off the unwanted loaded data to prevent a difference.  */
+	and	tmp1, src1, #7
+	bic	src1, src1, #7
+	and	tmp2, tmp1, #3
+	bic	src2, src2, #7
+	lsl	tmp2, tmp2, #3	/* Bytes -> bits.  */
+	ldrd	data1a, data1b, [src1], #16
+	tst	tmp1, #4
+	ldrd	data2a, data2b, [src2], #16
+	/* In thumb code we can't use MVN with a register shift, but
+	   we do have ORN.  */
+	S2HI	tmp1, const_m1, tmp2
+	orn	data1a, data1a, tmp1
+	orn	data2a, data2a, tmp1
+	beq	.Lstart_realigned8
+	orn	data1b, data1b, tmp1
+	mov	data1a, const_m1
+	orn	data2b, data2b, tmp1
+	mov	data2a, const_m1
+	b	.Lstart_realigned8
 
-   If an address is (double) word aligned, then a load of a (double) word
-   from that address will not cross a page boundary.
-   Therefore, the algorithm below considers word and double-word alignment
-   of strings separately.  */
+	/* Unwind the inner loop by a factor of 2, giving 16 bytes per
+	   pass.  */
+	.p2align 5,,12  /* Don't start in the tail bytes of a cache line.  */
+	.p2align 2	/* Always word aligned.  */
+.Lloop_aligned8:
+	ldrd	data1a, data1b, [src1], #16
+	ldrd	data2a, data2b, [src2], #16
+.Lstart_realigned8:
+	uadd8	syndrome_b, data1a, const_m1	/* Only want GE bits,  */
+	eor	syndrome_a, data1a, data2a
+	sel	syndrome_a, syndrome_a, const_m1
+	cbnz	syndrome_a, .Ldiff_in_a
+	uadd8	syndrome_b, data1b, const_m1	/* Only want GE bits.  */
+	eor	syndrome_b, data1b, data2b
+	sel	syndrome_b, syndrome_b, const_m1
+	cbnz	syndrome_b, .Ldiff_in_b
 
-/* High-level description of the algorithm.
+	ldrd	data1a, data1b, [src1, #-8]
+	ldrd	data2a, data2b, [src2, #-8]
+	uadd8	syndrome_b, data1a, const_m1	/* Only want GE bits,  */
+	eor	syndrome_a, data1a, data2a
+	sel	syndrome_a, syndrome_a, const_m1
+	uadd8	syndrome_b, data1b, const_m1	/* Only want GE bits.  */
+	eor	syndrome_b, data1b, data2b
+	sel	syndrome_b, syndrome_b, const_m1
+	/* Can't use CBZ for backwards branch.  */
+	orrs	syndrome_b, syndrome_b, syndrome_a /* Only need if s_a == 0 */
+	beq	.Lloop_aligned8
 
-   * The fast path: if both strings are double-word aligned,
-     use LDRD to load two words from each string in every loop iteration.
-   * If the strings have the same offset from a word boundary,
-     use LDRB to load and compare byte by byte until
-     the first string is aligned to a word boundary (at most 3 bytes).
-     This is optimized for quick return on short unaligned strings.
-   * If the strings have the same offset from a double-word boundary,
-     use LDRD to load two words from each string in every loop iteration, as in the fast path.
-   * If the strings do not have the same offset from a double-word boundary,
-     load a word from the second string before the loop to initialize the queue.
-     Use LDRD to load two words from every string in every loop iteration.
-     Inside the loop, load the second word from the second string only after comparing
-     the first word, using the queued value, to guarantee safety across page boundaries.
-   * If the strings do not have the same offset from a word boundary,
-     use LDR and a shift queue. Order of loads and comparisons matters,
-     similarly to the previous case.
+.Ldiff_found:
+	cbnz	syndrome_a, .Ldiff_in_a
 
-   * Use UADD8 and SEL to compare words, and use REV and CLZ to compute the return value.
-   * The only difference between ARM and Thumb modes is the use of CBZ instruction.
-   * The only difference between big and little endian is the use of REV in little endian
-     to compute the return value, instead of MOV.
-*/
+.Ldiff_in_b:
+	strcmp_epilogue_aligned syndrome_b, data1b, data2b 1
 
-        .macro m_cbz reg label
-#ifdef __thumb2__
-        cbz     \reg, \label
-#else   /* not defined __thumb2__ */
-        cmp     \reg, #0
-        beq     \label
-#endif /* not defined __thumb2__ */
-        .endm /* m_cbz */
+.Ldiff_in_a:
+	.cfi_restore_state
+	strcmp_epilogue_aligned syndrome_a, data1a, data2a 1
 
-        .macro m_cbnz reg label
-#ifdef __thumb2__
-        cbnz    \reg, \label
-#else   /* not defined __thumb2__ */
-        cmp     \reg, #0
-        bne     \label
-#endif /* not defined __thumb2__ */
-        .endm /* m_cbnz */
+	.cfi_restore_state
+.Lmisaligned8:
+	tst	tmp1, #3
+	bne	.Lmisaligned4
+	ands	tmp1, src1, #3
+	bne	.Lmutual_align4
 
-        .macro  init
-        /* Macro to save temporary registers and prepare magic values.  */
-        subs    sp, sp, #16
-        .cfi_def_cfa_offset 16
-        strd    r4, r5, [sp, #8]
-        .cfi_rel_offset r4, 0
-        .cfi_rel_offset r5, 4
-        strd    r6, r7, [sp]
-        .cfi_rel_offset r6, 8
-        .cfi_rel_offset r7, 12
-        mvn     r6, #0  /* all F */
-        mov     r7, #0  /* all 0 */
-        .endm   /* init */
+	/* Unrolled by a factor of 2, to reduce the number of post-increment
+	   operations.  */
+.Lloop_aligned4:
+	ldr	data1, [src1], #8
+	ldr	data2, [src2], #8
+.Lstart_realigned4:
+	uadd8	syndrome, data1, const_m1	/* Only need GE bits.  */
+	eor	syndrome, data1, data2
+	sel	syndrome, syndrome, const_m1
+	cbnz	syndrome, .Laligned4_done
+	ldr	data1, [src1, #-4]
+	ldr	data2, [src2, #-4]
+	uadd8	syndrome, data1, const_m1
+	eor	syndrome, data1, data2
+	sel	syndrome, syndrome, const_m1
+	cmp	syndrome, #0
+	beq	.Lloop_aligned4
 
-        .macro  magic_compare_and_branch w1 w2 label
-        /* Macro to compare registers w1 and w2 and conditionally branch to label.  */
-        cmp     \w1, \w2        /* Are w1 and w2 the same?  */
-        magic_find_zero_bytes \w1
-        it      eq
-        cmpeq   ip, #0          /* Is there a zero byte in w1?  */
-        bne     \label
-        .endm /* magic_compare_and_branch */
+.Laligned4_done:
+	strcmp_epilogue_aligned syndrome, data1, data2, 0
 
-        .macro  magic_find_zero_bytes w1
-        /* Macro to find all-zero bytes in w1, result is in ip.  */
-        uadd8   ip, \w1, r6
-        sel     ip, r7, r6
-        .endm /* magic_find_zero_bytes */
+.Lmutual_align4:
+	.cfi_restore_state
+	/* Deal with mutual misalignment by aligning downwards and then
+	   masking off the unwanted loaded data to prevent a difference.  */
+	lsl	tmp1, tmp1, #3	/* Bytes -> bits.  */
+	bic	src1, src1, #3
+	ldr	data1, [src1], #8
+	bic	src2, src2, #3
+	ldr	data2, [src2], #8
 
-        .macro  setup_return w1 w2
-#ifdef __ARMEB__
-        mov     r1, \w1
-        mov     r2, \w2
-#else /* not  __ARMEB__ */
-        rev     r1, \w1
-        rev     r2, \w2
-#endif /* not  __ARMEB__ */
-        .endm /* setup_return */
+	/* In thumb code we can't use MVN with a register shift, but
+	   we do have ORN.  */
+	S2HI	tmp1, const_m1, tmp1
+	orn	data1, data1, tmp1
+	orn	data2, data2, tmp1
+	b	.Lstart_realigned4
 
-        pld [r0, #0]
-        pld [r1, #0]
+.Lmisaligned4:
+	ands	tmp1, src1, #3
+	beq	.Lsrc1_aligned
+	sub	src2, src2, tmp1
+	bic	src1, src1, #3
+	lsls	tmp1, tmp1, #31
+	ldr	data1, [src1], #4
+	beq	.Laligned_m2
+	bcs	.Laligned_m1
 
-        /* Are both strings double-word aligned?  */
-        orr     ip, r0, r1
-        tst     ip, #7
-        bne     .L_do_align
+#ifdef STRCMP_NO_PRECHECK
+	ldrb	data2, [src2, #1]
+	uxtb	tmp1, data1, ror #BYTE1_OFFSET
+	subs	tmp1, tmp1, data2
+	bne	.Lmisaligned_exit
+	cbz	data2, .Lmisaligned_exit
 
-        /* Fast path.  */
-        init
+.Laligned_m2:
+	ldrb	data2, [src2, #2]
+	uxtb	tmp1, data1, ror #BYTE2_OFFSET
+	subs	tmp1, tmp1, data2
+	bne	.Lmisaligned_exit
+	cbz	data2, .Lmisaligned_exit
 
-.L_doubleword_aligned:
+.Laligned_m1:
+	ldrb	data2, [src2, #3]
+	uxtb	tmp1, data1, ror #BYTE3_OFFSET
+	subs	tmp1, tmp1, data2
+	bne	.Lmisaligned_exit
+	add	src2, src2, #4
+	cbnz	data2, .Lsrc1_aligned
+#else  /* STRCMP_NO_PRECHECK */
+	/* If we've done the pre-check, then we don't need to check the
+	   first byte again here.  */
+	ldrb	data2, [src2, #2]
+	uxtb	tmp1, data1, ror #BYTE2_OFFSET
+	subs	tmp1, tmp1, data2
+	bne	.Lmisaligned_exit
+	cbz	data2, .Lmisaligned_exit
 
-        /* Get here when the strings to compare are double-word aligned.  */
-        /* Compare two words in every iteration.  */
-        .p2align        2
-2:
-        pld [r0, #16]
-        pld [r1, #16]
+.Laligned_m2:
+	ldrb	data2, [src2, #3]
+	uxtb	tmp1, data1, ror #BYTE3_OFFSET
+	subs	tmp1, tmp1, data2
+	bne	.Lmisaligned_exit
+	cbnz	data2, .Laligned_m1
+#endif
 
-        /* Load the next double-word from each string.  */
-        ldrd    r2, r3, [r0], #8
-        ldrd    r4, r5, [r1], #8
+.Lmisaligned_exit:
+	.cfi_remember_state
+	mov	result, tmp1
+	ldr	r4, [sp], #16
+	.cfi_restore 4
+	bx	lr
 
-        magic_compare_and_branch w1=r2, w2=r4, label=.L_return_24
-        magic_compare_and_branch w1=r3, w2=r5, label=.L_return_35
-        b       2b
+#ifndef STRCMP_NO_PRECHECK
+.Laligned_m1:
+	add	src2, src2, #4
+#endif
+.Lsrc1_aligned:
+	.cfi_restore_state
+	/* src1 is word aligned, but src2 has no common alignment
+	   with it.  */
+	ldr	data1, [src1], #4
+	lsls	tmp1, src2, #31		/* C=src2[1], Z=src2[0].  */
 
-.L_do_align:
-        /* Is the first string word-aligned?  */
-        ands    ip, r0, #3
-        beq     .L_word_aligned_r0
+	bic	src2, src2, #3
+	ldr	data2, [src2], #4
+	bhi	.Loverlap1		/* C=1, Z=0 => src2[1:0] = 0b11.  */
+	bcs	.Loverlap2		/* C=1, Z=1 => src2[1:0] = 0b10.  */
 
-        /* Fast compare byte by byte until the first string is word-aligned.  */
-        /* The offset of r0 from a word boundary is in ip. Thus, the number of bytes
-        to read until the next word boundary is 4-ip.  */
-        bic     r0, r0, #3
-        ldr     r2, [r0], #4
-        lsls    ip, ip, #31
-        beq     .L_byte2
-        bcs     .L_byte3
+	/* (overlap3) C=0, Z=0 => src2[1:0] = 0b01.  */
+.Loverlap3:
+	bic	tmp1, data1, #MSB
+	uadd8	syndrome, data1, const_m1
+	eors	syndrome, tmp1, data2, S2LO #8
+	sel	syndrome, syndrome, const_m1
+	bne	4f
+	cbnz	syndrome, 5f
+	ldr	data2, [src2], #4
+	eor	tmp1, tmp1, data1
+	cmp	tmp1, data2, S2HI #24
+	bne	6f
+	ldr	data1, [src1], #4
+	b	.Loverlap3
+4:
+	S2LO	data2, data2, #8
+	b	.Lstrcmp_tail
 
-.L_byte1:
-        ldrb    ip, [r1], #1
-        uxtb    r3, r2, ror #BYTE1_OFFSET
-        subs    ip, r3, ip
-        bne     .L_fast_return
-        m_cbz   reg=r3, label=.L_fast_return
+5:
+	bics	syndrome, syndrome, #MSB
+	bne	.Lstrcmp_done_equal
 
-.L_byte2:
-        ldrb    ip, [r1], #1
-        uxtb    r3, r2, ror #BYTE2_OFFSET
-        subs    ip, r3, ip
-        bne     .L_fast_return
-        m_cbz   reg=r3, label=.L_fast_return
+	/* We can only get here if the MSB of data1 contains 0, so
+	   fast-path the exit.  */
+	ldrb	result, [src2]
+	.cfi_remember_state
+	ldrd	r4, r5, [sp], #16
+	.cfi_restore 4
+	.cfi_restore 5
+	/* R6/7 Not used in this sequence.  */
+	.cfi_restore 6
+	.cfi_restore 7
+	neg	result, result
+	bx	lr
 
-.L_byte3:
-        ldrb    ip, [r1], #1
-        uxtb    r3, r2, ror #BYTE3_OFFSET
-        subs    ip, r3, ip
-        bne     .L_fast_return
-        m_cbnz  reg=r3, label=.L_word_aligned_r0
+6:
+	.cfi_restore_state
+	S2LO	data1, data1, #24
+	and	data2, data2, #LSB
+	b	.Lstrcmp_tail
 
-.L_fast_return:
-        mov     r0, ip
-        bx      lr
+	.p2align 5,,12	/* Ensure at least 3 instructions in cache line.  */
+.Loverlap2:
+	and	tmp1, data1, const_m1, S2LO #16
+	uadd8	syndrome, data1, const_m1
+	eors	syndrome, tmp1, data2, S2LO #16
+	sel	syndrome, syndrome, const_m1
+	bne	4f
+	cbnz	syndrome, 5f
+	ldr	data2, [src2], #4
+	eor	tmp1, tmp1, data1
+	cmp	tmp1, data2, S2HI #16
+	bne	6f
+	ldr	data1, [src1], #4
+	b	.Loverlap2
+4:
+	S2LO	data2, data2, #16
+	b	.Lstrcmp_tail
+5:
+	ands	syndrome, syndrome, const_m1, S2LO #16
+	bne	.Lstrcmp_done_equal
 
-.L_word_aligned_r0:
-        init
-        /* The first string is word-aligned.  */
-        /* Is the second string word-aligned?  */
-        ands    ip, r1, #3
-        bne     .L_strcmp_unaligned
+	ldrh	data2, [src2]
+	S2LO	data1, data1, #16
+#ifdef __ARM_BIG_ENDIAN
+	lsl	data2, data2, #16
+#endif
+	b	.Lstrcmp_tail
 
-.L_word_aligned:
-        /* The strings are word-aligned. */
-        /* Is the first string double-word aligned?  */
-        tst     r0, #4
-        beq     .L_doubleword_aligned_r0
+6:
+	S2LO	data1, data1, #16
+	and	data2, data2, const_m1, S2LO #16
+	b	.Lstrcmp_tail
 
-        /* If r0 is not double-word aligned yet, align it by loading
-        and comparing the next word from each string.  */
-        ldr     r2, [r0], #4
-        ldr     r4, [r1], #4
-        magic_compare_and_branch w1=r2 w2=r4 label=.L_return_24
+	.p2align 5,,12	/* Ensure at least 3 instructions in cache line.  */
+.Loverlap1:
+	and	tmp1, data1, #LSB
+	uadd8	syndrome, data1, const_m1
+	eors	syndrome, tmp1, data2, S2LO #24
+	sel	syndrome, syndrome, const_m1
+	bne	4f
+	cbnz	syndrome, 5f
+	ldr	data2, [src2], #4
+	eor	tmp1, tmp1, data1
+	cmp	tmp1, data2, S2HI #8
+	bne	6f
+	ldr	data1, [src1], #4
+	b	.Loverlap1
+4:
+	S2LO	data2, data2, #24
+	b	.Lstrcmp_tail
+5:
+	tst	syndrome, #LSB
+	bne	.Lstrcmp_done_equal
+	ldr	data2, [src2]
+6:
+	S2LO	data1, data1, #8
+	bic	data2, data2, #MSB
+	b	.Lstrcmp_tail
 
-.L_doubleword_aligned_r0:
-        /* Get here when r0 is double-word aligned.  */
-        /* Is r1 doubleword_aligned?  */
-        tst     r1, #4
-        beq     .L_doubleword_aligned
+.Lstrcmp_done_equal:
+	mov	result, #0
+	.cfi_remember_state
+	ldrd	r4, r5, [sp], #16
+	.cfi_restore 4
+	.cfi_restore 5
+	/* R6/7 not used in this sequence.  */
+	.cfi_restore 6
+	.cfi_restore 7
+	bx	lr
 
-        /* Get here when the strings to compare are word-aligned,
-        r0 is double-word aligned, but r1 is not double-word aligned.  */
-
-        /* Initialize the queue.  */
-        ldr     r5, [r1], #4
-
-        /* Compare two words in every iteration.  */
-        .p2align        2
-3:
-        pld [r0, #16]
-        pld [r1, #16]
-
-        /* Load the next double-word from each string and compare.  */
-        ldrd    r2, r3, [r0], #8
-        magic_compare_and_branch w1=r2 w2=r5 label=.L_return_25
-        ldrd    r4, r5, [r1], #8
-        magic_compare_and_branch w1=r3 w2=r4 label=.L_return_34
-        b       3b
-
-        .macro miscmp_word offsetlo offsethi
-        /* Macro to compare misaligned strings.  */
-        /* r0, r1 are word-aligned, and at least one of the strings
-        is not double-word aligned.  */
-        /* Compare one word in every loop iteration.  */
-        /* OFFSETLO is the original bit-offset of r1 from a word-boundary,
-        OFFSETHI is 32 - OFFSETLO (i.e., offset from the next word).  */
-
-        /* Initialize the shift queue.  */
-        ldr     r5, [r1], #4
-
-        /* Compare one word from each string in every loop iteration.  */
-        .p2align        2
-7:
-        ldr     r3, [r0], #4
-        S2LOMEM r5, r5, #\offsetlo
-        magic_find_zero_bytes w1=r3
-        cmp     r7, ip, S2HIMEM #\offsetlo
-        and     r2, r3, r6, S2LOMEM #\offsetlo
-        it      eq
-        cmpeq   r2, r5
-        bne     .L_return_25
-        ldr     r5, [r1], #4
-        cmp     ip, #0
-        eor r3, r2, r3
-        S2HIMEM r2, r5, #\offsethi
-        it      eq
-        cmpeq   r3, r2
-        bne     .L_return_32
-        b       7b
-        .endm /* miscmp_word */
-
-.L_strcmp_unaligned:
-        /* r0 is word-aligned, r1 is at offset ip from a word.  */
-        /* Align r1 to the (previous) word-boundary.  */
-        bic     r1, r1, #3
-
-        /* Unaligned comparison word by word using LDRs. */
-        cmp     ip, #2
-        beq     .L_miscmp_word_16                 /* If ip == 2.  */
-        bge     .L_miscmp_word_24                 /* If ip == 3.  */
-        miscmp_word offsetlo=8 offsethi=24        /* If ip == 1.  */
-.L_miscmp_word_16:  miscmp_word offsetlo=16 offsethi=16
-.L_miscmp_word_24:  miscmp_word offsetlo=24 offsethi=8
-
-
-.L_return_32:
-        setup_return w1=r3, w2=r2
-        b       .L_do_return
-.L_return_34:
-        setup_return w1=r3, w2=r4
-        b       .L_do_return
-.L_return_25:
-        setup_return w1=r2, w2=r5
-        b       .L_do_return
-.L_return_35:
-        setup_return w1=r3, w2=r5
-        b       .L_do_return
-.L_return_24:
-        setup_return w1=r2, w2=r4
-
-.L_do_return:
-
-#ifdef __ARMEB__
-        mov     r0, ip
-#else /* not  __ARMEB__ */
-        rev     r0, ip
-#endif /* not  __ARMEB__ */
-
-        /* Restore temporaries early, before computing the return value.  */
-        ldrd    r6, r7, [sp]
-        ldrd    r4, r5, [sp, #8]
-        adds    sp, sp, #16
-        .cfi_def_cfa_offset 0
-        .cfi_restore r4
-        .cfi_restore r5
-        .cfi_restore r6
-        .cfi_restore r7
-
-        /* There is a zero or a different byte between r1 and r2.  */
-        /* r0 contains a mask of all-zero bytes in r1.  */
-        /* Using r0 and not ip here because cbz requires low register.  */
-        m_cbz   reg=r0, label=.L_compute_return_value
-        clz     r0, r0
-        /* r0 contains the number of bits on the left of the first all-zero byte in r1.  */
-        rsb     r0, r0, #24
-        /* Here, r0 contains the number of bits on the right of the first all-zero byte in r1.  */
-        lsr     r1, r1, r0
-        lsr     r2, r2, r0
-
-.L_compute_return_value:
-        movs    r0, #1
-        cmp     r1, r2
-        /* The return value is computed as follows.
-        If r1>r2 then (C==1 and Z==0) and LS doesn't hold and r0 is #1 at return.
-        If r1<r2 then (C==0 and Z==0) and we execute SBC with carry_in=0,
-        which means r0:=r0-r0-1 and r0 is #-1 at return.
-        If r1=r2 then (C==1 and Z==1) and we execute SBC with carry_in=1,
-        which means r0:=r0-r0 and r0 is #0 at return.
-        (C==0 and Z==1) cannot happen because the carry bit is "not borrow".  */
-        it      ls
-        sbcls   r0, r0, r0
-        bx      lr
+.Lstrcmp_tail:
+	.cfi_restore_state
+#ifndef __ARM_BIG_ENDIAN
+	rev	data1, data1
+	rev	data2, data2
+	/* Now everything looks big-endian...  */
+#endif
+	uadd8	tmp1, data1, const_m1
+	eor	tmp1, data1, data2
+	sel	syndrome, tmp1, const_m1
+	clz	tmp1, syndrome
+	lsl	data1, data1, tmp1
+	lsl	data2, data2, tmp1
+	lsr	result, data1, #24
+	ldrd	r4, r5, [sp], #16
+	.cfi_restore 4
+	.cfi_restore 5
+	/* R6/7 not used in this sequence.  */
+	.cfi_restore 6
+	.cfi_restore 7
+	sub	result, result, data2, lsr #24
+	bx	lr
 END(strcmp_a15)
diff --git a/libc/arch-arm/cortex-a9/bionic/strcmp.S b/libc/arch-arm/cortex-a9/bionic/strcmp.S
deleted file mode 100644
index ba7ea13..0000000
--- a/libc/arch-arm/cortex-a9/bionic/strcmp.S
+++ /dev/null
@@ -1,551 +0,0 @@
-/*
- * Copyright (c) 2013 ARM Ltd
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. The name of the company may not be used to endorse or promote
- *    products derived from this software without specific prior written
- *    permission.
- *
- * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <private/bionic_asm.h>
-
-#ifdef __ARMEB__
-#define S2LOMEM lsl
-#define S2LOMEMEQ lsleq
-#define S2HIMEM lsr
-#define MSB 0x000000ff
-#define LSB 0xff000000
-#define BYTE0_OFFSET 24
-#define BYTE1_OFFSET 16
-#define BYTE2_OFFSET 8
-#define BYTE3_OFFSET 0
-#else /* not  __ARMEB__ */
-#define S2LOMEM lsr
-#define S2LOMEMEQ lsreq
-#define S2HIMEM lsl
-#define BYTE0_OFFSET 0
-#define BYTE1_OFFSET 8
-#define BYTE2_OFFSET 16
-#define BYTE3_OFFSET 24
-#define MSB 0xff000000
-#define LSB 0x000000ff
-#endif /* not  __ARMEB__ */
-
-.syntax         unified
-
-// To avoid warning about deprecated instructions, add an explicit
-// arch. The code generated is exactly the same.
-.arch armv7-a
-
-#if defined (__thumb__)
-        .thumb
-        .thumb_func
-#endif
-
-ENTRY(strcmp_a9)
-      /* Use LDRD whenever possible.  */
-
-/* The main thing to look out for when comparing large blocks is that
-   the loads do not cross a page boundary when loading past the index
-   of the byte with the first difference or the first string-terminator.
-
-   For example, if the strings are identical and the string-terminator
-   is at index k, byte by byte comparison will not load beyond address
-   s1+k and s2+k; word by word comparison may load up to 3 bytes beyond
-   k; double word - up to 7 bytes.  If the load of these bytes crosses
-   a page boundary, it might cause a memory fault (if the page is not mapped)
-   that would not have happened in byte by byte comparison.
-
-   If an address is (double) word aligned, then a load of a (double) word
-   from that address will not cross a page boundary.
-   Therefore, the algorithm below considers word and double-word alignment
-   of strings separately.  */
-
-/* High-level description of the algorithm.
-
-   * The fast path: if both strings are double-word aligned,
-     use LDRD to load two words from each string in every loop iteration.
-   * If the strings have the same offset from a word boundary,
-     use LDRB to load and compare byte by byte until
-     the first string is aligned to a word boundary (at most 3 bytes).
-     This is optimized for quick return on short unaligned strings.
-   * If the strings have the same offset from a double-word boundary,
-     use LDRD to load two words from each string in every loop iteration, as in the fast path.
-   * If the strings do not have the same offset from a double-word boundary,
-     load a word from the second string before the loop to initialize the queue.
-     Use LDRD to load two words from every string in every loop iteration.
-     Inside the loop, load the second word from the second string only after comparing
-     the first word, using the queued value, to guarantee safety across page boundaries.
-   * If the strings do not have the same offset from a word boundary,
-     use LDR and a shift queue. Order of loads and comparisons matters,
-     similarly to the previous case.
-
-   * Use UADD8 and SEL to compare words, and use REV and CLZ to compute the return value.
-   * The only difference between ARM and Thumb modes is the use of CBZ instruction.
-   * The only difference between big and little endian is the use of REV in little endian
-     to compute the return value, instead of MOV.
-*/
-
-        .macro m_cbz reg label
-#ifdef __thumb2__
-        cbz     \reg, \label
-#else   /* not defined __thumb2__ */
-        cmp     \reg, #0
-        beq     \label
-#endif /* not defined __thumb2__ */
-        .endm /* m_cbz */
-
-        .macro m_cbnz reg label
-#ifdef __thumb2__
-        cbnz    \reg, \label
-#else   /* not defined __thumb2__ */
-        cmp     \reg, #0
-        bne     \label
-#endif /* not defined __thumb2__ */
-        .endm /* m_cbnz */
-
-        .macro  init
-        /* Macro to save temporary registers and prepare magic values.  */
-        subs    sp, sp, #16
-        .cfi_def_cfa_offset 16
-        strd    r4, r5, [sp, #8]
-        .cfi_rel_offset r4, 0
-        .cfi_rel_offset r5, 4
-        strd    r6, r7, [sp]
-        .cfi_rel_offset r6, 8
-        .cfi_rel_offset r7, 12
-        mvn     r6, #0  /* all F */
-        mov     r7, #0  /* all 0 */
-        .endm   /* init */
-
-        .macro  magic_compare_and_branch w1 w2 label
-        /* Macro to compare registers w1 and w2 and conditionally branch to label.  */
-        cmp     \w1, \w2        /* Are w1 and w2 the same?  */
-        magic_find_zero_bytes \w1
-        it      eq
-        cmpeq   ip, #0          /* Is there a zero byte in w1?  */
-        bne     \label
-        .endm /* magic_compare_and_branch */
-
-        .macro  magic_find_zero_bytes w1
-        /* Macro to find all-zero bytes in w1, result is in ip.  */
-        uadd8   ip, \w1, r6
-        sel     ip, r7, r6
-        .endm /* magic_find_zero_bytes */
-
-        .macro  setup_return w1 w2
-#ifdef __ARMEB__
-        mov     r1, \w1
-        mov     r2, \w2
-#else /* not  __ARMEB__ */
-        rev     r1, \w1
-        rev     r2, \w2
-#endif /* not  __ARMEB__ */
-        .endm /* setup_return */
-
-        pld [r0, #0]
-        pld [r1, #0]
-
-        /* Are both strings double-word aligned?  */
-        orr     ip, r0, r1
-        tst     ip, #7
-        bne     .L_do_align
-
-        /* Fast path.  */
-        init
-
-.L_doubleword_aligned:
-
-        /* Get here when the strings to compare are double-word aligned.  */
-        /* Compare two words in every iteration.  */
-        .p2align        2
-2:
-        pld [r0, #16]
-        pld [r1, #16]
-
-        /* Load the next double-word from each string.  */
-        ldrd    r2, r3, [r0], #8
-        ldrd    r4, r5, [r1], #8
-
-        magic_compare_and_branch w1=r2, w2=r4, label=.L_return_24
-        magic_compare_and_branch w1=r3, w2=r5, label=.L_return_35
-        b       2b
-
-.L_do_align:
-        /* Is the first string word-aligned?  */
-        ands    ip, r0, #3
-        beq     .L_word_aligned_r0
-
-        /* Fast compare byte by byte until the first string is word-aligned.  */
-        /* The offset of r0 from a word boundary is in ip. Thus, the number of bytes
-        to read until the next word boundary is 4-ip.  */
-        bic     r0, r0, #3
-        ldr     r2, [r0], #4
-        lsls    ip, ip, #31
-        beq     .L_byte2
-        bcs     .L_byte3
-
-.L_byte1:
-        ldrb    ip, [r1], #1
-        uxtb    r3, r2, ror #BYTE1_OFFSET
-        subs    ip, r3, ip
-        bne     .L_fast_return
-        m_cbz   reg=r3, label=.L_fast_return
-
-.L_byte2:
-        ldrb    ip, [r1], #1
-        uxtb    r3, r2, ror #BYTE2_OFFSET
-        subs    ip, r3, ip
-        bne     .L_fast_return
-        m_cbz   reg=r3, label=.L_fast_return
-
-.L_byte3:
-        ldrb    ip, [r1], #1
-        uxtb    r3, r2, ror #BYTE3_OFFSET
-        subs    ip, r3, ip
-        bne     .L_fast_return
-        m_cbnz  reg=r3, label=.L_word_aligned_r0
-
-.L_fast_return:
-        mov     r0, ip
-        bx      lr
-
-.L_word_aligned_r0:
-        init
-        /* The first string is word-aligned.  */
-        /* Is the second string word-aligned?  */
-        ands    ip, r1, #3
-        bne     .L_strcmp_unaligned
-
-.L_word_aligned:
-        /* The strings are word-aligned. */
-        /* Is the first string double-word aligned?  */
-        tst     r0, #4
-        beq     .L_doubleword_aligned_r0
-
-        /* If r0 is not double-word aligned yet, align it by loading
-        and comparing the next word from each string.  */
-        ldr     r2, [r0], #4
-        ldr     r4, [r1], #4
-        magic_compare_and_branch w1=r2 w2=r4 label=.L_return_24
-
-.L_doubleword_aligned_r0:
-        /* Get here when r0 is double-word aligned.  */
-        /* Is r1 doubleword_aligned?  */
-        tst     r1, #4
-        beq     .L_doubleword_aligned
-
-        /* Get here when the strings to compare are word-aligned,
-        r0 is double-word aligned, but r1 is not double-word aligned.  */
-
-        /* Initialize the queue.  */
-        ldr     r5, [r1], #4
-
-        /* Compare two words in every iteration.  */
-        .p2align        2
-3:
-        pld [r0, #16]
-        pld [r1, #16]
-
-        /* Load the next double-word from each string and compare.  */
-        ldrd    r2, r3, [r0], #8
-        magic_compare_and_branch w1=r2 w2=r5 label=.L_return_25
-        ldrd    r4, r5, [r1], #8
-        magic_compare_and_branch w1=r3 w2=r4 label=.L_return_34
-        b       3b
-
-        .macro miscmp_word offsetlo offsethi
-        /* Macro to compare misaligned strings.  */
-        /* r0, r1 are word-aligned, and at least one of the strings
-        is not double-word aligned.  */
-        /* Compare one word in every loop iteration.  */
-        /* OFFSETLO is the original bit-offset of r1 from a word-boundary,
-        OFFSETHI is 32 - OFFSETLO (i.e., offset from the next word).  */
-
-        /* Initialize the shift queue.  */
-        ldr     r5, [r1], #4
-
-        /* Compare one word from each string in every loop iteration.  */
-        .p2align        2
-7:
-        ldr     r3, [r0], #4
-        S2LOMEM r5, r5, #\offsetlo
-        magic_find_zero_bytes w1=r3
-        cmp     r7, ip, S2HIMEM #\offsetlo
-        and     r2, r3, r6, S2LOMEM #\offsetlo
-        it      eq
-        cmpeq   r2, r5
-        bne     .L_return_25
-        ldr     r5, [r1], #4
-        cmp     ip, #0
-        eor r3, r2, r3
-        S2HIMEM r2, r5, #\offsethi
-        it      eq
-        cmpeq   r3, r2
-        bne     .L_return_32
-        b       7b
-        .endm /* miscmp_word */
-
-.L_return_32:
-        setup_return w1=r3, w2=r2
-        b       .L_do_return
-.L_return_34:
-        setup_return w1=r3, w2=r4
-        b       .L_do_return
-.L_return_25:
-        setup_return w1=r2, w2=r5
-        b       .L_do_return
-.L_return_35:
-        setup_return w1=r3, w2=r5
-        b       .L_do_return
-.L_return_24:
-        setup_return w1=r2, w2=r4
-
-.L_do_return:
-
-#ifdef __ARMEB__
-        mov     r0, ip
-#else /* not  __ARMEB__ */
-        rev     r0, ip
-#endif /* not  __ARMEB__ */
-
-        /* Restore temporaries early, before computing the return value.  */
-        ldrd    r6, r7, [sp]
-        ldrd    r4, r5, [sp, #8]
-        adds    sp, sp, #16
-        .cfi_def_cfa_offset 0
-        .cfi_restore r4
-        .cfi_restore r5
-        .cfi_restore r6
-        .cfi_restore r7
-
-        /* There is a zero or a different byte between r1 and r2.  */
-        /* r0 contains a mask of all-zero bytes in r1.  */
-        /* Using r0 and not ip here because cbz requires low register.  */
-        m_cbz   reg=r0, label=.L_compute_return_value
-        clz     r0, r0
-        /* r0 contains the number of bits on the left of the first all-zero byte in r1.  */
-        rsb     r0, r0, #24
-        /* Here, r0 contains the number of bits on the right of the first all-zero byte in r1.  */
-        lsr     r1, r1, r0
-        lsr     r2, r2, r0
-
-.L_compute_return_value:
-        movs    r0, #1
-        cmp     r1, r2
-        /* The return value is computed as follows.
-        If r1>r2 then (C==1 and Z==0) and LS doesn't hold and r0 is #1 at return.
-        If r1<r2 then (C==0 and Z==0) and we execute SBC with carry_in=0,
-        which means r0:=r0-r0-1 and r0 is #-1 at return.
-        If r1=r2 then (C==1 and Z==1) and we execute SBC with carry_in=1,
-        which means r0:=r0-r0 and r0 is #0 at return.
-        (C==0 and Z==1) cannot happen because the carry bit is "not borrow".  */
-        it      ls
-        sbcls   r0, r0, r0
-        bx      lr
-
-    /* The code from the previous version of strcmp.S handles all of the
-     * cases where the first string and seconds string cannot both be
-     * aligned to a word boundary faster than the new algorithm. See
-     * bionic/libc/arch-arm/cortex-a15/bionic/strcmp.S for the unedited
-     * version of the code.
-     */
-.L_strcmp_unaligned:
-	wp1 .req r0
-	wp2 .req r1
-	b1  .req r2
-	w1  .req r4
-	w2  .req r5
-	t1  .req ip
-	@ r3 is scratch
-
-2:
-	mov	b1, #1
-	orr	b1, b1, b1, lsl #8
-	orr	b1, b1, b1, lsl #16
-
-	and	t1, wp2, #3
-	bic	wp2, wp2, #3
-	ldr	w1, [wp1], #4
-	ldr	w2, [wp2], #4
-	cmp	t1, #2
-	beq	2f
-	bhi	3f
-
-	/* Critical inner Loop: Block with 3 bytes initial overlap */
-	.p2align	2
-1:
-	bic	t1, w1, #MSB
-	cmp	t1, w2, S2LOMEM #8
-	sub	r3, w1, b1
-	bic	r3, r3, w1
-	bne	4f
-	ands	r3, r3, b1, lsl #7
-	it	eq
-	ldreq	w2, [wp2], #4
-	bne	5f
-	eor	t1, t1, w1
-	cmp	t1, w2, S2HIMEM #24
-	bne	6f
-	ldr	w1, [wp1], #4
-	b	1b
-4:
-	S2LOMEM	w2, w2, #8
-	b	8f
-
-5:
-#ifdef __ARMEB__
-	/* The syndrome value may contain false ones if the string ends
-	 * with the bytes 0x01 0x00
-	 */
-	tst	w1, #0xff000000
-	itt	ne
-	tstne	w1, #0x00ff0000
-	tstne	w1, #0x0000ff00
-	beq	7f
-#else
-	bics	r3, r3, #0xff000000
-	bne	7f
-#endif
-	ldrb	w2, [wp2]
-	S2LOMEM	t1, w1, #24
-#ifdef __ARMEB__
-	lsl	w2, w2, #24
-#endif
-	b	8f
-
-6:
-	S2LOMEM	t1, w1, #24
-	and	w2, w2, #LSB
-	b	8f
-
-	/* Critical inner Loop: Block with 2 bytes initial overlap */
-	.p2align	2
-2:
-	S2HIMEM	t1, w1, #16
-	sub	r3, w1, b1
-	S2LOMEM	t1, t1, #16
-	bic	r3, r3, w1
-	cmp	t1, w2, S2LOMEM #16
-	bne	4f
-	ands	r3, r3, b1, lsl #7
-	it	eq
-	ldreq	w2, [wp2], #4
-	bne	5f
-	eor	t1, t1, w1
-	cmp	t1, w2, S2HIMEM #16
-	bne	6f
-	ldr	w1, [wp1], #4
-	b	2b
-
-5:
-#ifdef __ARMEB__
-	/* The syndrome value may contain false ones if the string ends
-	 * with the bytes 0x01 0x00
-	 */
-	tst	w1, #0xff000000
-	it	ne
-	tstne	w1, #0x00ff0000
-	beq	7f
-#else
-	lsls	r3, r3, #16
-	bne	7f
-#endif
-	ldrh	w2, [wp2]
-	S2LOMEM	t1, w1, #16
-#ifdef __ARMEB__
-	lsl	w2, w2, #16
-#endif
-	b	8f
-
-6:
-	S2HIMEM	w2, w2, #16
-	S2LOMEM	t1, w1, #16
-4:
-	S2LOMEM	w2, w2, #16
-	b	8f
-
-	/* Critical inner Loop: Block with 1 byte initial overlap */
-	.p2align	2
-3:
-	and	t1, w1, #LSB
-	cmp	t1, w2, S2LOMEM #24
-	sub	r3, w1, b1
-	bic	r3, r3, w1
-	bne	4f
-	ands	r3, r3, b1, lsl #7
-	it	eq
-	ldreq	w2, [wp2], #4
-	bne	5f
-	eor	t1, t1, w1
-	cmp	t1, w2, S2HIMEM #8
-	bne	6f
-	ldr	w1, [wp1], #4
-	b	3b
-4:
-	S2LOMEM	w2, w2, #24
-	b	8f
-5:
-	/* The syndrome value may contain false ones if the string ends
-	 * with the bytes 0x01 0x00
-	 */
-	tst	w1, #LSB
-	beq	7f
-	ldr	w2, [wp2], #4
-6:
-	S2LOMEM	t1, w1, #8
-	bic	w2, w2, #MSB
-	b	8f
-7:
-	mov	r0, #0
-
-    /* Restore registers and stack. */
-    ldrd    r6, r7, [sp]
-    ldrd    r4, r5, [sp, #8]
-    adds    sp, sp, #16
-    .cfi_def_cfa_offset 0
-    .cfi_restore r4
-    .cfi_restore r5
-    .cfi_restore r6
-    .cfi_restore r7
-
-	bx	lr
-
-8:
-	and	r2, t1, #LSB
-	and	r0, w2, #LSB
-	cmp	r0, #1
-	it	cs
-	cmpcs	r0, r2
-	itt	eq
-	S2LOMEMEQ	t1, t1, #8
-	S2LOMEMEQ	w2, w2, #8
-	beq	8b
-	sub	r0, r2, r0
-
-    /* Restore registers and stack. */
-    ldrd    r6, r7, [sp]
-    ldrd    r4, r5, [sp, #8]
-    adds    sp, sp, #16
-
-	bx	lr
-END(strcmp_a9)
diff --git a/libc/arch-arm/dynamic_function_dispatch.cpp b/libc/arch-arm/dynamic_function_dispatch.cpp
index 640f330..5e1b8b0 100644
--- a/libc/arch-arm/dynamic_function_dispatch.cpp
+++ b/libc/arch-arm/dynamic_function_dispatch.cpp
@@ -288,16 +288,7 @@
 
 typedef int strcmp_func(const char* __lhs, const char* __rhs);
 DEFINE_IFUNC(strcmp) {
-    switch(get_cpu_variant()) {
-        case kCortexA9:
-            RETURN_FUNC(strcmp_func, strcmp_a9);
-        case kCortexA55:
-        case kKrait:
-        case kKryo:
-            RETURN_FUNC(strcmp_func, strcmp_krait);
-        default:
-            RETURN_FUNC(strcmp_func, strcmp_a15);
-    }
+    RETURN_FUNC(strcmp_func, strcmp_a15);
 }
 
 typedef size_t strlen_func(const char* __s);
diff --git a/libc/arch-arm/krait/bionic/strcmp.S b/libc/arch-arm/krait/bionic/strcmp.S
deleted file mode 100644
index ec692e5..0000000
--- a/libc/arch-arm/krait/bionic/strcmp.S
+++ /dev/null
@@ -1,489 +0,0 @@
-/*
- * Copyright (c) 2013 ARM Ltd
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. The name of the company may not be used to endorse or promote
- *    products derived from this software without specific prior written
- *    permission.
- *
- * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
- * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <private/bionic_asm.h>
-
-#ifdef __ARMEB__
-#define S2LOMEM lsl
-#define S2LOMEMEQ lsleq
-#define S2HIMEM lsr
-#define MSB 0x000000ff
-#define LSB 0xff000000
-#define BYTE0_OFFSET 24
-#define BYTE1_OFFSET 16
-#define BYTE2_OFFSET 8
-#define BYTE3_OFFSET 0
-#else /* not  __ARMEB__ */
-#define S2LOMEM lsr
-#define S2LOMEMEQ lsreq
-#define S2HIMEM lsl
-#define BYTE0_OFFSET 0
-#define BYTE1_OFFSET 8
-#define BYTE2_OFFSET 16
-#define BYTE3_OFFSET 24
-#define MSB 0xff000000
-#define LSB 0x000000ff
-#endif /* not  __ARMEB__ */
-
-.syntax         unified
-
-// To avoid warning about deprecated instructions, add an explicit
-// arch. The code generated is exactly the same.
-.arch armv7-a
-
-#if defined (__thumb__)
-        .thumb
-        .thumb_func
-#endif
-
-ENTRY(strcmp_krait)
-      /* Use LDRD whenever possible.  */
-
-/* The main thing to look out for when comparing large blocks is that
-   the loads do not cross a page boundary when loading past the index
-   of the byte with the first difference or the first string-terminator.
-
-   For example, if the strings are identical and the string-terminator
-   is at index k, byte by byte comparison will not load beyond address
-   s1+k and s2+k; word by word comparison may load up to 3 bytes beyond
-   k; double word - up to 7 bytes.  If the load of these bytes crosses
-   a page boundary, it might cause a memory fault (if the page is not mapped)
-   that would not have happened in byte by byte comparison.
-
-   If an address is (double) word aligned, then a load of a (double) word
-   from that address will not cross a page boundary.
-   Therefore, the algorithm below considers word and double-word alignment
-   of strings separately.  */
-
-/* High-level description of the algorithm.
-
-   * The fast path: if both strings are double-word aligned,
-     use LDRD to load two words from each string in every loop iteration.
-   * If the strings have the same offset from a word boundary,
-     use LDRB to load and compare byte by byte until
-     the first string is aligned to a word boundary (at most 3 bytes).
-     This is optimized for quick return on short unaligned strings.
-   * If the strings have the same offset from a double-word boundary,
-     use LDRD to load two words from each string in every loop iteration, as in the fast path.
-   * If the strings do not have the same offset from a double-word boundary,
-     load a word from the second string before the loop to initialize the queue.
-     Use LDRD to load two words from every string in every loop iteration.
-     Inside the loop, load the second word from the second string only after comparing
-     the first word, using the queued value, to guarantee safety across page boundaries.
-   * If the strings do not have the same offset from a word boundary,
-     use LDR and a shift queue. Order of loads and comparisons matters,
-     similarly to the previous case.
-
-   * Use UADD8 and SEL to compare words, and use REV and CLZ to compute the return value.
-   * The only difference between ARM and Thumb modes is the use of CBZ instruction.
-   * The only difference between big and little endian is the use of REV in little endian
-     to compute the return value, instead of MOV.
-*/
-
-        .macro m_cbz reg label
-#ifdef __thumb2__
-        cbz     \reg, \label
-#else   /* not defined __thumb2__ */
-        cmp     \reg, #0
-        beq     \label
-#endif /* not defined __thumb2__ */
-        .endm /* m_cbz */
-
-        .macro m_cbnz reg label
-#ifdef __thumb2__
-        cbnz    \reg, \label
-#else   /* not defined __thumb2__ */
-        cmp     \reg, #0
-        bne     \label
-#endif /* not defined __thumb2__ */
-        .endm /* m_cbnz */
-
-        .macro  init
-        /* Macro to save temporary registers and prepare magic values.  */
-        subs    sp, sp, #16
-        .cfi_def_cfa_offset 16
-        strd    r4, r5, [sp, #8]
-        .cfi_rel_offset r4, 0
-        .cfi_rel_offset r5, 4
-        strd    r6, r7, [sp]
-        .cfi_rel_offset r6, 8
-        .cfi_rel_offset r7, 12
-        mvn     r6, #0  /* all F */
-        mov     r7, #0  /* all 0 */
-        .endm   /* init */
-
-        .macro  magic_compare_and_branch w1 w2 label
-        /* Macro to compare registers w1 and w2 and conditionally branch to label.  */
-        cmp     \w1, \w2        /* Are w1 and w2 the same?  */
-        magic_find_zero_bytes \w1
-        it      eq
-        cmpeq   ip, #0          /* Is there a zero byte in w1?  */
-        bne     \label
-        .endm /* magic_compare_and_branch */
-
-        .macro  magic_find_zero_bytes w1
-        /* Macro to find all-zero bytes in w1, result is in ip.  */
-        uadd8   ip, \w1, r6
-        sel     ip, r7, r6
-        .endm /* magic_find_zero_bytes */
-
-        .macro  setup_return w1 w2
-#ifdef __ARMEB__
-        mov     r1, \w1
-        mov     r2, \w2
-#else /* not  __ARMEB__ */
-        rev     r1, \w1
-        rev     r2, \w2
-#endif /* not  __ARMEB__ */
-        .endm /* setup_return */
-
-        pld [r0, #0]
-        pld [r1, #0]
-
-        /* Are both strings double-word aligned?  */
-        orr     ip, r0, r1
-        tst     ip, #7
-        bne     .L_do_align
-
-        /* Fast path.  */
-        init
-
-.L_doubleword_aligned:
-
-        /* Get here when the strings to compare are double-word aligned.  */
-        /* Compare two words in every iteration.  */
-        .p2align        2
-2:
-        pld [r0, #16]
-        pld [r1, #16]
-
-        /* Load the next double-word from each string.  */
-        ldrd    r2, r3, [r0], #8
-        ldrd    r4, r5, [r1], #8
-
-        magic_compare_and_branch w1=r2, w2=r4, label=.L_return_24
-        magic_compare_and_branch w1=r3, w2=r5, label=.L_return_35
-        b       2b
-
-.L_do_align:
-        /* Is the first string word-aligned?  */
-        ands    ip, r0, #3
-        beq     .L_word_aligned_r0
-
-        /* Fast compare byte by byte until the first string is word-aligned.  */
-        /* The offset of r0 from a word boundary is in ip. Thus, the number of bytes
-        to read until the next word boundary is 4-ip.  */
-        bic     r0, r0, #3
-        ldr     r2, [r0], #4
-        lsls    ip, ip, #31
-        beq     .L_byte2
-        bcs     .L_byte3
-
-.L_byte1:
-        ldrb    ip, [r1], #1
-        uxtb    r3, r2, ror #BYTE1_OFFSET
-        subs    ip, r3, ip
-        bne     .L_fast_return
-        m_cbz   reg=r3, label=.L_fast_return
-
-.L_byte2:
-        ldrb    ip, [r1], #1
-        uxtb    r3, r2, ror #BYTE2_OFFSET
-        subs    ip, r3, ip
-        bne     .L_fast_return
-        m_cbz   reg=r3, label=.L_fast_return
-
-.L_byte3:
-        ldrb    ip, [r1], #1
-        uxtb    r3, r2, ror #BYTE3_OFFSET
-        subs    ip, r3, ip
-        bne     .L_fast_return
-        m_cbnz  reg=r3, label=.L_word_aligned_r0
-
-.L_fast_return:
-        mov     r0, ip
-        bx      lr
-
-.L_word_aligned_r0:
-        init
-        /* The first string is word-aligned.  */
-        /* Is the second string word-aligned?  */
-        ands    ip, r1, #3
-        bne     .L_strcmp_unaligned
-
-.L_word_aligned:
-        /* The strings are word-aligned. */
-        /* Is the first string double-word aligned?  */
-        tst     r0, #4
-        beq     .L_doubleword_aligned_r0
-
-        /* If r0 is not double-word aligned yet, align it by loading
-        and comparing the next word from each string.  */
-        ldr     r2, [r0], #4
-        ldr     r4, [r1], #4
-        magic_compare_and_branch w1=r2 w2=r4 label=.L_return_24
-
-.L_doubleword_aligned_r0:
-        /* Get here when r0 is double-word aligned.  */
-        /* Is r1 doubleword_aligned?  */
-        tst     r1, #4
-        beq     .L_doubleword_aligned
-
-        /* Get here when the strings to compare are word-aligned,
-        r0 is double-word aligned, but r1 is not double-word aligned.  */
-
-        /* Initialize the queue.  */
-        ldr     r5, [r1], #4
-
-        /* Compare two words in every iteration.  */
-        .p2align        2
-3:
-        pld [r0, #16]
-        pld [r1, #16]
-
-        /* Load the next double-word from each string and compare.  */
-        ldrd    r2, r3, [r0], #8
-        magic_compare_and_branch w1=r2 w2=r5 label=.L_return_25
-        ldrd    r4, r5, [r1], #8
-        magic_compare_and_branch w1=r3 w2=r4 label=.L_return_34
-        b       3b
-
-        .macro miscmp_word offsetlo offsethi
-        /* Macro to compare misaligned strings.  */
-        /* r0, r1 are word-aligned, and at least one of the strings
-        is not double-word aligned.  */
-        /* Compare one word in every loop iteration.  */
-        /* OFFSETLO is the original bit-offset of r1 from a word-boundary,
-        OFFSETHI is 32 - OFFSETLO (i.e., offset from the next word).  */
-
-        /* Initialize the shift queue.  */
-        ldr     r5, [r1], #4
-
-        /* Compare one word from each string in every loop iteration.  */
-        .p2align        2
-7:
-        ldr     r3, [r0], #4
-        S2LOMEM r5, r5, #\offsetlo
-        magic_find_zero_bytes w1=r3
-        cmp     r7, ip, S2HIMEM #\offsetlo
-        and     r2, r3, r6, S2LOMEM #\offsetlo
-        it      eq
-        cmpeq   r2, r5
-        bne     .L_return_25
-        ldr     r5, [r1], #4
-        cmp     ip, #0
-        eor r3, r2, r3
-        S2HIMEM r2, r5, #\offsethi
-        it      eq
-        cmpeq   r3, r2
-        bne     .L_return_32
-        b       7b
-        .endm /* miscmp_word */
-
-.L_strcmp_unaligned:
-        /* r0 is word-aligned, r1 is at offset ip from a word.  */
-        /* Align r1 to the (previous) word-boundary.  */
-        bic     r1, r1, #3
-
-        /* Unaligned comparison word by word using LDRs. */
-        cmp     ip, #2
-        beq     .L_miscmp_word_16                 /* If ip == 2.  */
-        bge     .L_miscmp_word_24                 /* If ip == 3.  */
-        miscmp_word offsetlo=8 offsethi=24        /* If ip == 1.  */
-.L_miscmp_word_24:  miscmp_word offsetlo=24 offsethi=8
-
-
-.L_return_32:
-        setup_return w1=r3, w2=r2
-        b       .L_do_return
-.L_return_34:
-        setup_return w1=r3, w2=r4
-        b       .L_do_return
-.L_return_25:
-        setup_return w1=r2, w2=r5
-        b       .L_do_return
-.L_return_35:
-        setup_return w1=r3, w2=r5
-        b       .L_do_return
-.L_return_24:
-        setup_return w1=r2, w2=r4
-
-.L_do_return:
-
-#ifdef __ARMEB__
-        mov     r0, ip
-#else /* not  __ARMEB__ */
-        rev     r0, ip
-#endif /* not  __ARMEB__ */
-
-        /* Restore temporaries early, before computing the return value.  */
-        ldrd    r6, r7, [sp]
-        ldrd    r4, r5, [sp, #8]
-        adds    sp, sp, #16
-        .cfi_def_cfa_offset 0
-        .cfi_restore r4
-        .cfi_restore r5
-        .cfi_restore r6
-        .cfi_restore r7
-
-        /* There is a zero or a different byte between r1 and r2.  */
-        /* r0 contains a mask of all-zero bytes in r1.  */
-        /* Using r0 and not ip here because cbz requires low register.  */
-        m_cbz   reg=r0, label=.L_compute_return_value
-        clz     r0, r0
-        /* r0 contains the number of bits on the left of the first all-zero byte in r1.  */
-        rsb     r0, r0, #24
-        /* Here, r0 contains the number of bits on the right of the first all-zero byte in r1.  */
-        lsr     r1, r1, r0
-        lsr     r2, r2, r0
-
-.L_compute_return_value:
-        movs    r0, #1
-        cmp     r1, r2
-        /* The return value is computed as follows.
-        If r1>r2 then (C==1 and Z==0) and LS doesn't hold and r0 is #1 at return.
-        If r1<r2 then (C==0 and Z==0) and we execute SBC with carry_in=0,
-        which means r0:=r0-r0-1 and r0 is #-1 at return.
-        If r1=r2 then (C==1 and Z==1) and we execute SBC with carry_in=1,
-        which means r0:=r0-r0 and r0 is #0 at return.
-        (C==0 and Z==1) cannot happen because the carry bit is "not borrow".  */
-        it      ls
-        sbcls   r0, r0, r0
-        bx      lr
-
-    /* The code from the previous version of strcmp.S handles this
-     * particular case (the second string is 2 bytes off a word alignment)
-     * faster than any current version. In this very specific case, use the
-     * previous version. See bionic/libc/arch-arm/cortex-a15/bionic/strcmp.S
-     * for the unedited version of this code.
-     */
-.L_miscmp_word_16:
-	wp1 .req r0
-	wp2 .req r1
-	b1  .req r2
-	w1  .req r4
-	w2  .req r5
-	t1  .req ip
-	@ r3 is scratch
-
-    /* At this point, wp1 (r0) has already been word-aligned. */
-2:
-	mov	b1, #1
-	orr	b1, b1, b1, lsl #8
-	orr	b1, b1, b1, lsl #16
-
-	and	t1, wp2, #3
-	bic	wp2, wp2, #3
-	ldr	w1, [wp1], #4
-	ldr	w2, [wp2], #4
-
-	/* Critical inner Loop: Block with 2 bytes initial overlap */
-	.p2align	2
-2:
-	S2HIMEM	t1, w1, #16
-	sub	r3, w1, b1
-	S2LOMEM	t1, t1, #16
-	bic	r3, r3, w1
-	cmp	t1, w2, S2LOMEM #16
-	bne	4f
-	ands	r3, r3, b1, lsl #7
-	it	eq
-	ldreq	w2, [wp2], #4
-	bne	5f
-	eor	t1, t1, w1
-	cmp	t1, w2, S2HIMEM #16
-	bne	6f
-	ldr	w1, [wp1], #4
-	b	2b
-
-5:
-#ifdef __ARMEB__
-	/* The syndrome value may contain false ones if the string ends
-	 * with the bytes 0x01 0x00
-	 */
-	tst	w1, #0xff000000
-	it	ne
-	tstne	w1, #0x00ff0000
-	beq	7f
-#else
-	lsls	r3, r3, #16
-	bne	7f
-#endif
-	ldrh	w2, [wp2]
-	S2LOMEM	t1, w1, #16
-#ifdef __ARMEB__
-	lsl	w2, w2, #16
-#endif
-	b	8f
-
-6:
-	S2HIMEM	w2, w2, #16
-	S2LOMEM	t1, w1, #16
-4:
-	S2LOMEM	w2, w2, #16
-	b	8f
-
-7:
-	mov	r0, #0
-
-    /* Restore registers and stack. */
-    ldrd    r6, r7, [sp]
-    ldrd    r4, r5, [sp, #8]
-    adds    sp, sp, #16
-    .cfi_def_cfa_offset 0
-    .cfi_restore r4
-    .cfi_restore r5
-    .cfi_restore r6
-    .cfi_restore r7
-
-	bx	lr
-
-8:
-	and	r2, t1, #LSB
-	and	r0, w2, #LSB
-	cmp	r0, #1
-	it	cs
-	cmpcs	r0, r2
-	itt	eq
-	S2LOMEMEQ	t1, t1, #8
-	S2LOMEMEQ	w2, w2, #8
-	beq	8b
-	sub	r0, r2, r0
-
-    /* Restore registers and stack. */
-    ldrd    r6, r7, [sp]
-    ldrd    r4, r5, [sp, #8]
-    adds    sp, sp, #16
-    .cfi_def_cfa_offset 0
-    .cfi_restore r4
-    .cfi_restore r5
-    .cfi_restore r6
-    .cfi_restore r7
-
-	bx	lr
-END(strcmp_krait)
diff --git a/libc/async_safe/Android.bp b/libc/async_safe/Android.bp
index fbaaad1..c28d53a 100644
--- a/libc/async_safe/Android.bp
+++ b/libc/async_safe/Android.bp
@@ -10,6 +10,7 @@
     name: "libasync_safe",
     vendor_available: true,
     recovery_available: true,
+    native_bridge_supported: true,
 
     include_dirs: ["bionic/libc"],
     header_libs: ["libc_headers", "liblog_headers"],
@@ -22,6 +23,7 @@
 cc_library_headers {
     name: "libasync_safe_headers",
     recovery_available: true,
+    native_bridge_supported: true,
     defaults: ["linux_bionic_supported"],
 
     export_include_dirs: ["include"],
diff --git a/libc/bionic/jemalloc_wrapper.cpp b/libc/bionic/jemalloc_wrapper.cpp
index bc3a9dc..bf0494d 100644
--- a/libc/bionic/jemalloc_wrapper.cpp
+++ b/libc/bionic/jemalloc_wrapper.cpp
@@ -103,6 +103,14 @@
     }
     return 1;
   } else if (param == M_PURGE) {
+    // Only clear the current thread cache since there is no easy way to
+    // clear the caches of other threads.
+    // This must be done first so that cleared allocations get purged
+    // in the next calls.
+    if (je_mallctl("thread.tcache.flush", nullptr, nullptr, nullptr, 0) != 0) {
+      return 0;
+    }
+
     unsigned narenas;
     size_t sz = sizeof(unsigned);
     if (je_mallctl("arenas.narenas", &narenas, &sz, nullptr, 0) != 0) {
@@ -120,10 +128,10 @@
 
 __BEGIN_DECLS
 
-size_t __mallinfo_narenas();
-size_t __mallinfo_nbins();
-struct mallinfo __mallinfo_arena_info(size_t);
-struct mallinfo __mallinfo_bin_info(size_t, size_t);
+size_t je_mallinfo_narenas();
+size_t je_mallinfo_nbins();
+struct mallinfo je_mallinfo_arena_info(size_t);
+struct mallinfo je_mallinfo_bin_info(size_t, size_t);
 
 __END_DECLS
 
@@ -136,8 +144,8 @@
   MallocXmlElem root(fp, "malloc", "version=\"jemalloc-1\"");
 
   // Dump all of the large allocations in the arenas.
-  for (size_t i = 0; i < __mallinfo_narenas(); i++) {
-    struct mallinfo mi = __mallinfo_arena_info(i);
+  for (size_t i = 0; i < je_mallinfo_narenas(); i++) {
+    struct mallinfo mi = je_mallinfo_arena_info(i);
     if (mi.hblkhd != 0) {
       MallocXmlElem arena_elem(fp, "heap", "nr=\"%d\"", i);
       {
@@ -146,8 +154,8 @@
         MallocXmlElem(fp, "allocated-bins").Contents("%zu", mi.fsmblks);
 
         size_t total = 0;
-        for (size_t j = 0; j < __mallinfo_nbins(); j++) {
-          struct mallinfo mi = __mallinfo_bin_info(i, j);
+        for (size_t j = 0; j < je_mallinfo_nbins(); j++) {
+          struct mallinfo mi = je_mallinfo_bin_info(i, j);
           if (mi.ordblks != 0) {
             MallocXmlElem bin_elem(fp, "bin", "nr=\"%d\"", j);
             MallocXmlElem(fp, "allocated").Contents("%zu", mi.ordblks);
diff --git a/libc/bionic/malloc_common_dynamic.cpp b/libc/bionic/malloc_common_dynamic.cpp
index 599ac6a..8035746 100644
--- a/libc/bionic/malloc_common_dynamic.cpp
+++ b/libc/bionic/malloc_common_dynamic.cpp
@@ -500,3 +500,40 @@
   return HeapprofdMallopt(opcode, arg, arg_size);
 }
 // =============================================================================
+
+#if !defined(__LP64__) && defined(__arm__)
+// =============================================================================
+// Old platform only functions that some old 32 bit apps are still using.
+// See b/132175052.
+// Only compile the functions for 32 bit arm, so that new apps do not use
+// these functions.
+// =============================================================================
+extern "C" void get_malloc_leak_info(uint8_t** info, size_t* overall_size, size_t* info_size,
+                                     size_t* total_memory, size_t* backtrace_size) {
+  if (info == nullptr || overall_size == nullptr || info_size == nullptr ||
+      total_memory == nullptr || backtrace_size == nullptr) {
+    return;
+  }
+
+  *info = nullptr;
+  *overall_size = 0;
+  *info_size = 0;
+  *total_memory = 0;
+  *backtrace_size = 0;
+
+  android_mallopt_leak_info_t leak_info = {};
+  if (android_mallopt(M_GET_MALLOC_LEAK_INFO, &leak_info, sizeof(leak_info))) {
+    *info = leak_info.buffer;
+    *overall_size = leak_info.overall_size;
+    *info_size = leak_info.info_size;
+    *total_memory = leak_info.total_memory;
+    *backtrace_size = leak_info.backtrace_size;
+  }
+}
+
+extern "C" void free_malloc_leak_info(uint8_t* info) {
+  android_mallopt_leak_info_t leak_info = { .buffer = info };
+  android_mallopt(M_FREE_MALLOC_LEAK_INFO, &leak_info, sizeof(leak_info));
+}
+// =============================================================================
+#endif
diff --git a/libc/bionic/malloc_heapprofd.cpp b/libc/bionic/malloc_heapprofd.cpp
index 2aeb9bf..5d3735d 100644
--- a/libc/bionic/malloc_heapprofd.cpp
+++ b/libc/bionic/malloc_heapprofd.cpp
@@ -143,22 +143,23 @@
   }
 }
 
+constexpr char kHeapprofdProgramPropertyPrefix[] = "heapprofd.enable.";
+constexpr size_t kHeapprofdProgramPropertyPrefixSize = sizeof(kHeapprofdProgramPropertyPrefix) - 1;
+constexpr size_t kMaxCmdlineSize = 512;
+
 static bool GetHeapprofdProgramProperty(char* data, size_t size) {
-  constexpr char prefix[] = "heapprofd.enable.";
-  // - 1 to skip nullbyte, which we will write later.
-  constexpr size_t prefix_size = sizeof(prefix) - 1;
-  if (size < prefix_size) {
+  if (size < kHeapprofdProgramPropertyPrefixSize) {
     error_log("%s: Overflow constructing heapprofd property", getprogname());
     return false;
   }
-  memcpy(data, prefix, prefix_size);
+  memcpy(data, kHeapprofdProgramPropertyPrefix, kHeapprofdProgramPropertyPrefixSize);
 
   int fd = open("/proc/self/cmdline", O_RDONLY | O_CLOEXEC);
   if (fd == -1) {
     error_log("%s: Failed to open /proc/self/cmdline", getprogname());
     return false;
   }
-  char cmdline[128];
+  char cmdline[kMaxCmdlineSize];
   ssize_t rd = read(fd, cmdline, sizeof(cmdline) - 1);
   close(fd);
   if (rd == -1) {
@@ -167,7 +168,7 @@
   }
   cmdline[rd] = '\0';
   char* first_arg = static_cast<char*>(memchr(cmdline, '\0', rd));
-  if (first_arg == nullptr || first_arg == cmdline + size - 1) {
+  if (first_arg == nullptr) {
     error_log("%s: Overflow reading cmdline", getprogname());
     return false;
   }
@@ -192,12 +193,12 @@
   }
 
   size_t name_size = static_cast<size_t>(first_arg - start);
-  if (name_size >= size - prefix_size) {
+  if (name_size >= size - kHeapprofdProgramPropertyPrefixSize) {
     error_log("%s: overflow constructing heapprofd property.", getprogname());
     return false;
   }
   // + 1 to also copy the trailing null byte.
-  memcpy(data + prefix_size, start, name_size + 1);
+  memcpy(data + kHeapprofdProgramPropertyPrefixSize, start, name_size + 1);
   return true;
 }
 
@@ -213,7 +214,7 @@
     return true;
   }
 
-  char program_property[128];
+  char program_property[kHeapprofdProgramPropertyPrefixSize + kMaxCmdlineSize];
   if (!GetHeapprofdProgramProperty(program_property,
                                    sizeof(program_property))) {
     return false;
diff --git a/libc/include/bits/seek_constants.h b/libc/include/bits/seek_constants.h
index 6b88606..6f3f22d 100644
--- a/libc/include/bits/seek_constants.h
+++ b/libc/include/bits/seek_constants.h
@@ -39,3 +39,23 @@
 #define SEEK_CUR 1
 /** Seek relative to the end of the file. */
 #define SEEK_END 2
+
+#if defined(__USE_GNU)
+
+/**
+ * Seek to the first data (non-hole) location in the file
+ * greater than or equal to the given offset.
+ *
+ * See [lseek(2)](http://man7.org/linux/man-pages/man2/lseek.2.html).
+ */
+#define SEEK_DATA 3
+
+/**
+ * Seek to the first hole (non-data) location in the file
+ * greater than or equal to the given offset.
+ *
+ * See [lseek(2)](http://man7.org/linux/man-pages/man2/lseek.2.html).
+ */
+#define SEEK_HOLE 4
+
+#endif
diff --git a/libc/include/malloc.h b/libc/include/malloc.h
index eba18a8..aa046b9 100644
--- a/libc/include/malloc.h
+++ b/libc/include/malloc.h
@@ -125,7 +125,8 @@
 
 /**
  * [mallinfo(3)](http://man7.org/linux/man-pages/man3/mallinfo.3.html) returns
- * information about the current state of the heap.
+ * information about the current state of the heap. Note that mallinfo() is
+ * inherently unreliable and consider using malloc_info() instead.
  */
 struct mallinfo mallinfo(void);
 
diff --git a/libc/include/setjmp.h b/libc/include/setjmp.h
index 195f251..67d3c2f 100644
--- a/libc/include/setjmp.h
+++ b/libc/include/setjmp.h
@@ -65,18 +65,18 @@
 
 __BEGIN_DECLS
 
-int _setjmp(jmp_buf __env);
-void _longjmp(jmp_buf __env, int __value);
+int _setjmp(jmp_buf __env) __returns_twice;
+__noreturn void _longjmp(jmp_buf __env, int __value);
 
-int setjmp(jmp_buf __env);
-void longjmp(jmp_buf __env, int __value);
+int setjmp(jmp_buf __env) __returns_twice;
+__noreturn void longjmp(jmp_buf __env, int __value);
 
 #define setjmp(__env) setjmp(__env)
 
-int sigsetjmp(sigjmp_buf __env, int __save_signal_mask)
-    __INTRODUCED_IN_ARM(9) __INTRODUCED_IN_MIPS(12) __INTRODUCED_IN_X86(12);
-void siglongjmp(sigjmp_buf __env, int __value)
-    __INTRODUCED_IN_ARM(9) __INTRODUCED_IN_MIPS(12) __INTRODUCED_IN_X86(12);
+int sigsetjmp(sigjmp_buf __env, int __save_signal_mask) __returns_twice __INTRODUCED_IN_ARM(9)
+    __INTRODUCED_IN_MIPS(12) __INTRODUCED_IN_X86(12);
+__noreturn void siglongjmp(sigjmp_buf __env, int __value) __INTRODUCED_IN_ARM(9)
+    __INTRODUCED_IN_MIPS(12) __INTRODUCED_IN_X86(12);
 
 __END_DECLS
 
diff --git a/libc/include/sys/cdefs.h b/libc/include/sys/cdefs.h
index a919a79..ca9374e 100644
--- a/libc/include/sys/cdefs.h
+++ b/libc/include/sys/cdefs.h
@@ -86,6 +86,7 @@
 #define __noreturn __attribute__((__noreturn__))
 #define __mallocfunc  __attribute__((__malloc__))
 #define __packed __attribute__((__packed__))
+#define __returns_twice __attribute__((__returns_twice__))
 #define __unused __attribute__((__unused__))
 #define __used __attribute__((__used__))
 
diff --git a/libc/include/unistd.h b/libc/include/unistd.h
index 90173aa..9000407 100644
--- a/libc/include/unistd.h
+++ b/libc/include/unistd.h
@@ -78,7 +78,7 @@
 __noreturn void _exit(int __status);
 
 pid_t  fork(void);
-pid_t  vfork(void);
+pid_t  vfork(void) __returns_twice;
 pid_t  getpid(void);
 pid_t  gettid(void) __attribute_const__;
 pid_t  getpgid(pid_t __pid);
diff --git a/libc/libc.map.txt b/libc/libc.map.txt
index 4a734fc..bc26d2a 100644
--- a/libc/libc.map.txt
+++ b/libc/libc.map.txt
@@ -1742,6 +1742,8 @@
 LIBC_DEPRECATED {
   global:
     __system_property_wait_any;
+    free_malloc_leak_info; # arm
+    get_malloc_leak_info; # arm
 };
 
 LIBC_PLATFORM {
diff --git a/libc/malloc_debug/malloc_debug.cpp b/libc/malloc_debug/malloc_debug.cpp
index 91e1d26..53fcead 100644
--- a/libc/malloc_debug/malloc_debug.cpp
+++ b/libc/malloc_debug/malloc_debug.cpp
@@ -29,6 +29,7 @@
 #include <errno.h>
 #include <inttypes.h>
 #include <malloc.h>
+#include <pthread.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -103,6 +104,32 @@
 __END_DECLS
 // ------------------------------------------------------------------------
 
+class ScopedConcurrentLock {
+ public:
+  ScopedConcurrentLock() {
+    pthread_rwlock_rdlock(&lock_);
+  }
+  ~ScopedConcurrentLock() {
+    pthread_rwlock_unlock(&lock_);
+  }
+
+  static void Init() {
+    pthread_rwlockattr_t attr;
+    // Set the attribute so that when a write lock is pending, read locks are no
+    // longer granted.
+    pthread_rwlockattr_setkind_np(&attr, PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP);
+    pthread_rwlock_init(&lock_, &attr);
+  }
+
+  static void BlockAllOperations() {
+    pthread_rwlock_wrlock(&lock_);
+  }
+
+ private:
+  static pthread_rwlock_t lock_;
+};
+pthread_rwlock_t ScopedConcurrentLock::lock_;
+
 static void InitAtfork() {
   static pthread_once_t atfork_init = PTHREAD_ONCE_INIT;
   pthread_once(&atfork_init, []() {
@@ -257,6 +284,8 @@
     info_log("%s: malloc debug enabled", getprogname());
   }
 
+  ScopedConcurrentLock::Init();
+
   return true;
 }
 
@@ -265,6 +294,10 @@
     return;
   }
 
+  // Make sure that there are no other threads doing debug allocations
+  // before we kill everything.
+  ScopedConcurrentLock::BlockAllOperations();
+
   // Turn off capturing allocations calls.
   DebugDisableSet(true);
 
@@ -292,6 +325,8 @@
 
 void debug_get_malloc_leak_info(uint8_t** info, size_t* overall_size, size_t* info_size,
                                 size_t* total_memory, size_t* backtrace_size) {
+  ScopedConcurrentLock lock;
+
   ScopedDisableDebugCalls disable;
 
   // Verify the arguments.
@@ -325,6 +360,7 @@
   if (DebugCallsDisabled() || pointer == nullptr) {
     return g_dispatch->malloc_usable_size(pointer);
   }
+  ScopedConcurrentLock lock;
   ScopedDisableDebugCalls disable;
 
   if (!VerifyPointer(pointer, "malloc_usable_size")) {
@@ -388,6 +424,7 @@
   if (DebugCallsDisabled()) {
     return g_dispatch->malloc(size);
   }
+  ScopedConcurrentLock lock;
   ScopedDisableDebugCalls disable;
 
   void* pointer = InternalMalloc(size);
@@ -463,6 +500,7 @@
   if (DebugCallsDisabled() || pointer == nullptr) {
     return g_dispatch->free(pointer);
   }
+  ScopedConcurrentLock lock;
   ScopedDisableDebugCalls disable;
 
   if (g_debug->config().options() & RECORD_ALLOCS) {
@@ -480,6 +518,7 @@
   if (DebugCallsDisabled()) {
     return g_dispatch->memalign(alignment, bytes);
   }
+  ScopedConcurrentLock lock;
   ScopedDisableDebugCalls disable;
 
   if (bytes == 0) {
@@ -558,6 +597,7 @@
   if (DebugCallsDisabled()) {
     return g_dispatch->realloc(pointer, bytes);
   }
+  ScopedConcurrentLock lock;
   ScopedDisableDebugCalls disable;
 
   if (pointer == nullptr) {
@@ -676,6 +716,7 @@
   if (DebugCallsDisabled()) {
     return g_dispatch->calloc(nmemb, bytes);
   }
+  ScopedConcurrentLock lock;
   ScopedDisableDebugCalls disable;
 
   size_t size;
@@ -737,6 +778,8 @@
   if (DebugCallsDisabled() || !g_debug->TrackPointers()) {
     return g_dispatch->malloc_info(options, fp);
   }
+  ScopedConcurrentLock lock;
+  ScopedDisableDebugCalls disable;
 
   MallocXmlElem root(fp, "malloc", "version=\"debug-malloc-1\"");
   std::vector<ListInfoType> list;
@@ -786,6 +829,7 @@
 
 int debug_iterate(uintptr_t base, size_t size, void (*callback)(uintptr_t, size_t, void*),
                   void* arg) {
+  ScopedConcurrentLock lock;
   if (g_debug->TrackPointers()) {
     // Since malloc is disabled, don't bother acquiring any locks.
     for (auto it = PointerData::begin(); it != PointerData::end(); ++it) {
@@ -800,6 +844,7 @@
 }
 
 void debug_malloc_disable() {
+  ScopedConcurrentLock lock;
   g_dispatch->malloc_disable();
   if (g_debug->pointer) {
     g_debug->pointer->PrepareFork();
@@ -807,6 +852,7 @@
 }
 
 void debug_malloc_enable() {
+  ScopedConcurrentLock lock;
   if (g_debug->pointer) {
     g_debug->pointer->PostForkParent();
   }
@@ -817,6 +863,7 @@
   if (DebugCallsDisabled() || pointer == nullptr) {
     return 0;
   }
+  ScopedConcurrentLock lock;
   ScopedDisableDebugCalls disable;
 
   if (!(g_debug->config().options() & BACKTRACE)) {
@@ -870,6 +917,7 @@
 }
 
 bool debug_write_malloc_leak_info(FILE* fp) {
+  ScopedConcurrentLock lock;
   ScopedDisableDebugCalls disable;
 
   std::lock_guard<std::mutex> guard(g_dump_lock);
@@ -883,6 +931,7 @@
 }
 
 void debug_dump_heap(const char* file_name) {
+  ScopedConcurrentLock lock;
   ScopedDisableDebugCalls disable;
 
   std::lock_guard<std::mutex> guard(g_dump_lock);
diff --git a/libc/malloc_debug/tests/malloc_debug_system_tests.cpp b/libc/malloc_debug/tests/malloc_debug_system_tests.cpp
index 71e8ebf..f85c45b 100644
--- a/libc/malloc_debug/tests/malloc_debug_system_tests.cpp
+++ b/libc/malloc_debug/tests/malloc_debug_system_tests.cpp
@@ -42,13 +42,15 @@
 #include <log/log.h>
 
 #include <string>
+#include <thread>
 #include <vector>
 
 #include "private/bionic_malloc.h"
 
-static constexpr time_t kTimeoutSeconds = 5;
+static constexpr time_t kTimeoutSeconds = 10;
 
-static void Exec(const char* test_name, const char* debug_options, pid_t* pid) {
+static void Exec(const char* test_name, const char* debug_options, pid_t* pid, int exit_code = 0,
+                 time_t timeout_seconds = kTimeoutSeconds) {
   int fds[2];
   ASSERT_NE(-1, pipe(fds));
   ASSERT_NE(-1, fcntl(fds[0], F_SETFL, O_NONBLOCK));
@@ -94,7 +96,8 @@
       output.append(buffer.data(), bytes);
     }
 
-    if ((time(nullptr) - start_time) > kTimeoutSeconds) {
+    if ((time(nullptr) - start_time) > timeout_seconds) {
+      kill(*pid, SIGINT);
       break;
     }
   }
@@ -109,7 +112,7 @@
       done = true;
       break;
     }
-    if ((time(nullptr) - start_time) > kTimeoutSeconds) {
+    if ((time(nullptr) - start_time) > timeout_seconds) {
       break;
     }
   }
@@ -119,21 +122,23 @@
     while (true) {
       int kill_status;
       int wait_pid = waitpid(*pid, &kill_status, WNOHANG);
-      if (wait_pid == *pid || (time(nullptr) - start_time) > kTimeoutSeconds) {
+      if (wait_pid == *pid || (time(nullptr) - start_time) > timeout_seconds) {
         break;
       }
     }
   }
 
   ASSERT_TRUE(done) << "Timed out waiting for waitpid, output:\n" << output;
-  ASSERT_EQ(0, WEXITSTATUS(status)) << "Output:\n" << output;
+  ASSERT_FALSE(WIFSIGNALED(status))
+      << "Failed with signal " << WTERMSIG(status) << "\nOutput:\n" << output;
+  ASSERT_EQ(exit_code, WEXITSTATUS(status)) << "Output:\n" << output;
 }
 
-static void GetLogStr(pid_t pid, std::string* log_str) {
+static void GetLogStr(pid_t pid, std::string* log_str, log_id log = LOG_ID_MAIN) {
   log_str->clear();
 
   logger_list* list;
-  list = android_logger_list_open(LOG_ID_MAIN, ANDROID_LOG_RDONLY | ANDROID_LOG_NONBLOCK, 1000, pid);
+  list = android_logger_list_open(log, ANDROID_LOG_RDONLY | ANDROID_LOG_NONBLOCK, 1000, pid);
   ASSERT_TRUE(list != nullptr);
 
   while (true) {
@@ -168,7 +173,8 @@
   android_logger_list_close(list);
 }
 
-static void FindStrings(pid_t pid, std::vector<const char*> match_strings) {
+static void FindStrings(pid_t pid, std::vector<const char*> match_strings,
+                        time_t timeout_seconds = kTimeoutSeconds) {
   std::string log_str;
   time_t start = time(nullptr);
   bool found_all;
@@ -184,7 +190,7 @@
     if (found_all) {
       return;
     }
-    if ((time(nullptr) - start) > kTimeoutSeconds) {
+    if ((time(nullptr) - start) > timeout_seconds) {
       break;
     }
   }
@@ -414,3 +420,47 @@
 TEST(MallocDebugSystemTest, verify_leak_allocation_limit) {
   VerifyLeak("leak_memory_limit_");
 }
+
+static constexpr int kExpectedExitCode = 30;
+
+TEST(MallocTests, DISABLED_exit_while_threads_allocating) {
+  std::atomic_uint32_t thread_mask;
+  thread_mask = 0;
+
+  for (size_t i = 0; i < 32; i++) {
+    std::thread malloc_thread([&thread_mask, i] {
+      while (true) {
+        void* ptr = malloc(100);
+        if (ptr == nullptr) {
+          exit(1000);
+        }
+        free(ptr);
+        thread_mask.fetch_or(1 << i);
+      }
+    });
+    malloc_thread.detach();
+  }
+
+  // Wait until each thread has done at least one allocation.
+  while (thread_mask.load() != 0xffffffff)
+    ;
+  exit(kExpectedExitCode);
+}
+
+// Verify that exiting while other threads are doing malloc operations,
+// that there are no crashes.
+TEST(MallocDebugSystemTest, exit_while_threads_allocating) {
+  for (size_t i = 0; i < 100; i++) {
+    SCOPED_TRACE(::testing::Message() << "Run " << i);
+    pid_t pid;
+    ASSERT_NO_FATAL_FAILURE(Exec("MallocTests.DISABLED_exit_while_threads_allocating",
+                                 "verbose backtrace", &pid, kExpectedExitCode));
+
+    ASSERT_NO_FATAL_FAILURE(FindStrings(pid, std::vector<const char*>{"malloc debug enabled"}));
+
+    std::string log_str;
+    GetLogStr(pid, &log_str, LOG_ID_CRASH);
+    ASSERT_TRUE(log_str.find("Fatal signal") == std::string::npos)
+        << "Found crash in log.\nLog message: " << log_str;
+  }
+}
diff --git a/libc/stdio/local.h b/libc/stdio/local.h
index d306a21..1ecf122 100644
--- a/libc/stdio/local.h
+++ b/libc/stdio/local.h
@@ -204,6 +204,7 @@
 __LIBC32_LEGACY_PUBLIC__ int _fwalk(int (*)(FILE*));
 
 off64_t __sseek64(void*, off64_t, int);
+int __sflush_locked(FILE*);
 int __swhatbuf(FILE*, size_t*, int*);
 wint_t __fgetwc_unlock(FILE*);
 wint_t __ungetwc(wint_t, FILE*);
diff --git a/libc/stdio/refill.c b/libc/stdio/refill.c
index cfa2bfd..1df4191 100644
--- a/libc/stdio/refill.c
+++ b/libc/stdio/refill.c
@@ -40,7 +40,7 @@
 lflush(FILE *fp)
 {
 	if ((fp->_flags & (__SLBF|__SWR)) == (__SLBF|__SWR))
-		return (__sflush(fp));	/* ignored... */
+		return (__sflush_locked(fp));	/* ignored... */
 	return (0);
 }
 
diff --git a/libc/stdio/stdio.cpp b/libc/stdio/stdio.cpp
index 4cec757..91c7689 100644
--- a/libc/stdio/stdio.cpp
+++ b/libc/stdio/stdio.cpp
@@ -106,7 +106,7 @@
 FILE* stdout = &__sF[1];
 FILE* stderr = &__sF[2];
 
-static pthread_mutex_t __stdio_mutex = PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP;
+static pthread_mutex_t __stdio_mutex = PTHREAD_MUTEX_INITIALIZER;
 
 static uint64_t __get_file_tag(FILE* fp) {
   // Don't use a tag for the standard streams.
@@ -211,21 +211,23 @@
 }
 
 int _fwalk(int (*callback)(FILE*)) {
-  pthread_mutex_lock(&__stdio_mutex);
   int result = 0;
   for (glue* g = &__sglue; g != nullptr; g = g->next) {
     FILE* fp = g->iobs;
     for (int n = g->niobs; --n >= 0; ++fp) {
-      ScopedFileLock sfl(fp);
       if (fp->_flags != 0 && (fp->_flags & __SIGN) == 0) {
         result |= (*callback)(fp);
       }
     }
   }
-  pthread_mutex_unlock(&__stdio_mutex);
   return result;
 }
 
+extern "C" __LIBC_HIDDEN__ void __libc_stdio_cleanup(void) {
+  // Equivalent to fflush(nullptr), but without all the locking since we're shutting down anyway.
+  _fwalk(__sflush);
+}
+
 static FILE* __fopen(int fd, int flags) {
 #if !defined(__LP64__)
   if (fd > SHRT_MAX) {
@@ -520,6 +522,11 @@
   return 0;
 }
 
+int __sflush_locked(FILE* fp) {
+  ScopedFileLock sfl(fp);
+  return __sflush(fp);
+}
+
 int __sread(void* cookie, char* buf, int n) {
   FILE* fp = reinterpret_cast<FILE*>(cookie);
   return TEMP_FAILURE_RETRY(read(fp->_file, buf, n));
@@ -1061,7 +1068,7 @@
 }
 
 static int fflush_all() {
-  return _fwalk(__sflush);
+  return _fwalk(__sflush_locked);
 }
 
 int fflush(FILE* fp) {
diff --git a/libc/stdlib/atexit.c b/libc/stdlib/atexit.c
index 692e0c0..0efb118 100644
--- a/libc/stdlib/atexit.c
+++ b/libc/stdlib/atexit.c
@@ -188,7 +188,8 @@
 
 	/* If called via exit(), flush output of all open files. */
 	if (dso == NULL) {
-		fflush(NULL);
+		extern void __libc_stdio_cleanup(void);
+		__libc_stdio_cleanup();
 	}
 
   /* BEGIN android-changed: call __unregister_atfork if dso is not null */
diff --git a/libc/system_properties/Android.bp b/libc/system_properties/Android.bp
index 911afb1..8780dda 100644
--- a/libc/system_properties/Android.bp
+++ b/libc/system_properties/Android.bp
@@ -1,6 +1,7 @@
 cc_library_static {
     name: "libsystemproperties",
     defaults: ["libc_defaults"],
+    native_bridge_supported: true,
     srcs: [
         "context_node.cpp",
         "contexts_split.cpp",
diff --git a/libdl/Android.bp b/libdl/Android.bp
index b1ee5ab..e36ddc5 100644
--- a/libdl/Android.bp
+++ b/libdl/Android.bp
@@ -5,6 +5,7 @@
     name: "libdl_static",
     defaults: ["linux_bionic_supported"],
     recovery_available: true,
+    native_bridge_supported: true,
 
     srcs: [
         "libdl.cpp",
@@ -32,6 +33,7 @@
 cc_library {
     name: "libdl",
     recovery_available: true,
+    native_bridge_supported: true,
     static_ndk_lib: true,
 
     defaults: ["linux_bionic_supported"],
@@ -47,6 +49,7 @@
 
     ldflags: [
         "-Wl,--exclude-libs=libgcc.a",
+        "-Wl,--exclude-libs=libgcc_stripped.a",
         "-Wl,--exclude-libs=libclang_rt.builtins-arm-android.a",
         "-Wl,--exclude-libs=libclang_rt.builtins-aarch64-android.a",
         "-Wl,--exclude-libs=libclang_rt.builtins-x86-android.a",
@@ -115,6 +118,7 @@
 
     defaults: ["linux_bionic_supported"],
     recovery_available: true,
+    native_bridge_supported: true,
 
     // NOTE: --exclude-libs=libgcc.a makes sure that any symbols libdl.so pulls from
     // libgcc.a are made static to libdl.so.  This in turn ensures that libraries that
@@ -127,6 +131,7 @@
 
     ldflags: [
         "-Wl,--exclude-libs=libgcc.a",
+        "-Wl,--exclude-libs=libgcc_stripped.a",
         "-Wl,--exclude-libs=libclang_rt.builtins-arm-android.a",
         "-Wl,--exclude-libs=libclang_rt.builtins-aarch64-android.a",
         "-Wl,--exclude-libs=libclang_rt.builtins-x86-android.a",
@@ -179,12 +184,14 @@
 
 ndk_library {
     name: "libdl",
+    native_bridge_supported: true,
     symbol_file: "libdl.map.txt",
     first_version: "9",
 }
 
 llndk_library {
     name: "libdl",
+    native_bridge_supported: true,
     symbol_file: "libdl.map.txt",
 }
 
diff --git a/libm/Android.bp b/libm/Android.bp
index 8c32810..48b9a5f 100644
--- a/libm/Android.bp
+++ b/libm/Android.bp
@@ -507,6 +507,7 @@
         integer_overflow: false,
     },
     stl: "none",
+    native_bridge_supported: true,
 
     stubs: {
         symbol_file: "libm.map.txt",
@@ -516,12 +517,14 @@
 
 ndk_library {
     name: "libm",
+    native_bridge_supported: true,
     symbol_file: "libm.map.txt",
     first_version: "9",
 }
 
 llndk_library {
     name: "libm",
+    native_bridge_supported: true,
     symbol_file: "libm.map.txt",
 }
 
diff --git a/linker/Android.bp b/linker/Android.bp
index 73328da..5e7a921 100644
--- a/linker/Android.bp
+++ b/linker/Android.bp
@@ -2,6 +2,7 @@
     name: "liblinker_malloc",
     defaults: ["linux_bionic_supported"],
     recovery_available: true,
+    native_bridge_supported: true,
 
     srcs: [
         "linker_memory.cpp",
@@ -267,7 +268,6 @@
         "libc_nomalloc",
         "libm",
         "libziparchive",
-        "libutils",
         "libbase",
         "libz",
 
@@ -307,6 +307,11 @@
     xom: false,
 }
 
+sh_binary {
+    name: "ldd",
+    src: "ldd",
+}
+
 cc_library {
     // NOTE: --exclude-libs=libgcc.a makes sure that any symbols ld-android.so pulls from
     // libgcc.a are made static to ld-android.so.  This in turn ensures that libraries that
@@ -319,6 +324,7 @@
 
     ldflags: [
         "-Wl,--exclude-libs=libgcc.a",
+        "-Wl,--exclude-libs=libgcc_stripped.a",
         "-Wl,--exclude-libs=libclang_rt.builtins-arm-android.a",
         "-Wl,--exclude-libs=libclang_rt.builtins-aarch64-android.a",
         "-Wl,--exclude-libs=libclang_rt.builtins-x86-android.a",
@@ -361,6 +367,7 @@
     name: "ld-android",
     defaults: ["linux_bionic_supported"],
     recovery_available: true,
+    native_bridge_supported: true,
 
     nocrt: true,
     system_shared_libs: [],
diff --git a/linker/ldd b/linker/ldd
new file mode 100644
index 0000000..3a0aff9
--- /dev/null
+++ b/linker/ldd
@@ -0,0 +1,23 @@
+#!/system/bin/sh
+
+# Rather than have ldd and ldd64, this script does the right thing depending
+# on the argument.
+
+function error() {
+  echo "$1"
+  exit 1
+}
+
+[ $# -eq 1 ] || error "usage: ldd FILE"
+
+case `file -L "$1"` in
+  *32-bit*)
+    linker --list "$1"
+    ;;
+  *64-bit*)
+    linker64 --list "$1"
+    ;;
+  *)
+    error "$1: not an ELF file"
+    ;;
+esac
diff --git a/linker/linker.cpp b/linker/linker.cpp
index 324f3ef..32dce38 100644
--- a/linker/linker.cpp
+++ b/linker/linker.cpp
@@ -1039,7 +1039,7 @@
 
   ZipEntry entry;
 
-  if (FindEntry(handle, ZipString(file_path), &entry) != 0) {
+  if (FindEntry(handle, file_path, &entry) != 0) {
     // Entry was not found.
     close(fd);
     return -1;
@@ -2603,6 +2603,8 @@
 }
 
 ElfW(Addr) call_ifunc_resolver(ElfW(Addr) resolver_addr) {
+  if (g_is_ldd) return 0;
+
   typedef ElfW(Addr) (*ifunc_resolver_t)(void);
   ifunc_resolver_t ifunc_resolver = reinterpret_cast<ifunc_resolver_t>(resolver_addr);
   ElfW(Addr) ifunc_addr = ifunc_resolver();
@@ -3883,6 +3885,11 @@
     return true;
   }
 
+  if (g_is_ldd && !is_main_executable()) {
+    async_safe_format_fd(STDOUT_FILENO, "\t%s => %s (%p)\n", get_soname(),
+                         get_realpath(), reinterpret_cast<void*>(base));
+  }
+
   local_group_root_ = local_group.front();
   if (local_group_root_ == nullptr) {
     local_group_root_ = this;
diff --git a/linker/linker_globals.h b/linker/linker_globals.h
index 32aa09d..de05238 100644
--- a/linker/linker_globals.h
+++ b/linker/linker_globals.h
@@ -87,3 +87,5 @@
  private:
   std::string saved_error_msg_;
 };
+
+__LIBC_HIDDEN__ extern bool g_is_ldd;
diff --git a/linker/linker_main.cpp b/linker/linker_main.cpp
index f6e4f67..f576023 100644
--- a/linker/linker_main.cpp
+++ b/linker/linker_main.cpp
@@ -117,6 +117,7 @@
   return vdso;
 }
 
+bool g_is_ldd;
 int g_ld_debug_verbosity;
 
 static std::vector<std::string> g_ld_preload_names;
@@ -397,7 +398,7 @@
                          "\"%s\": error: Android 5.0 and later only support "
                          "position-independent executables (-fPIE).\n",
                          g_argv[0]);
-    exit(EXIT_FAILURE);
+    _exit(EXIT_FAILURE);
   }
 
   // Use LD_LIBRARY_PATH and LD_PRELOAD (but only if we aren't setuid/setgid).
@@ -660,22 +661,29 @@
   // linker's _start.
   const char* exe_to_load = nullptr;
   if (getauxval(AT_ENTRY) == reinterpret_cast<uintptr_t>(&_start)) {
-    if (args.argc <= 1 || !strcmp(args.argv[1], "--help")) {
+    if (args.argc == 3 && !strcmp(args.argv[1], "--list")) {
+      // We're being asked to behave like ldd(1).
+      g_is_ldd = true;
+      exe_to_load = args.argv[2];
+    } else if (args.argc <= 1 || !strcmp(args.argv[1], "--help")) {
       async_safe_format_fd(STDOUT_FILENO,
-         "Usage: %s program [arguments...]\n"
-         "       %s path.zip!/program [arguments...]\n"
+         "Usage: %s [--list] PROGRAM [ARGS-FOR-PROGRAM...]\n"
+         "       %s [--list] path.zip!/PROGRAM [ARGS-FOR-PROGRAM...]\n"
          "\n"
          "A helper program for linking dynamic executables. Typically, the kernel loads\n"
          "this program because it's the PT_INTERP of a dynamic executable.\n"
          "\n"
          "This program can also be run directly to load and run a dynamic executable. The\n"
          "executable can be inside a zip file if it's stored uncompressed and at a\n"
-         "page-aligned offset.\n",
+         "page-aligned offset.\n"
+         "\n"
+         "The --list option gives behavior equivalent to ldd(1) on other systems.\n",
          args.argv[0], args.argv[0]);
-      exit(0);
+      _exit(EXIT_SUCCESS);
+    } else {
+      exe_to_load = args.argv[1];
+      __libc_shared_globals()->initial_linker_arg_count = 1;
     }
-    exe_to_load = args.argv[1];
-    __libc_shared_globals()->initial_linker_arg_count = 1;
   }
 
   // store argc/argv/envp to use them for calling constructors
@@ -693,6 +701,8 @@
 
   ElfW(Addr) start_address = linker_main(args, exe_to_load);
 
+  if (g_is_ldd) _exit(EXIT_SUCCESS);
+
   INFO("[ Jumping to _start (%p)... ]", reinterpret_cast<void*>(start_address));
 
   // Return the address that the calling assembly stub should jump to.
diff --git a/linker/linker_soinfo.cpp b/linker/linker_soinfo.cpp
index 31ee74c..5f40528 100644
--- a/linker/linker_soinfo.cpp
+++ b/linker/linker_soinfo.cpp
@@ -391,13 +391,15 @@
 }
 
 void soinfo::call_pre_init_constructors() {
+  if (g_is_ldd) return;
+
   // DT_PREINIT_ARRAY functions are called before any other constructors for executables,
   // but ignored in a shared library.
   call_array("DT_PREINIT_ARRAY", preinit_array_, preinit_array_count_, false, get_realpath());
 }
 
 void soinfo::call_constructors() {
-  if (constructors_called) {
+  if (constructors_called || g_is_ldd) {
     return;
   }
 
diff --git a/tests/Android.bp b/tests/Android.bp
index 85bb29a..d65780c 100644
--- a/tests/Android.bp
+++ b/tests/Android.bp
@@ -288,6 +288,7 @@
         // enabled. Since the intent is just to build this, we can get away with
         // passing this flag on its own.
         "-fsanitize=address",
+        "-Wno-memset-transposed-args",
     ],
     // Ignore that we don't have ASAN symbols linked in.
     allow_undefined_symbols: true,
@@ -304,6 +305,7 @@
         "-Werror",
         "-D_FORTIFY_SOURCE=2",
         "-D__clang_analyzer__",
+        "-Wno-memset-transposed-args",
     ],
     srcs: ["fortify_filecheck_diagnostics_test.cpp"],
 }
diff --git a/tests/Android.mk b/tests/Android.mk
index fc7b940..848d291 100644
--- a/tests/Android.mk
+++ b/tests/Android.mk
@@ -65,18 +65,15 @@
 include $(CLEAR_VARS)
 LOCAL_ADDITIONAL_DEPENDENCIES := \
     $(LOCAL_PATH)/Android.mk \
-    $(LOCAL_PATH)/file-check-cxx \
-    | $(HOST_OUT_EXECUTABLES)/FileCheck$(HOST_EXECUTABLE_SUFFIX) \
+    $(LOCAL_PATH)/touch-obj-on-success
 
-LOCAL_CXX := $(LOCAL_PATH)/file-check-cxx \
-    $(HOST_OUT_EXECUTABLES)/FileCheck \
+LOCAL_CXX := $(LOCAL_PATH)/touch-obj-on-success \
     $(LLVM_PREBUILTS_PATH)/clang++ \
-    CLANG \
 
 LOCAL_CLANG := true
 LOCAL_MODULE := bionic-compile-time-tests-clang++
-LOCAL_CPPFLAGS := -Wall -Werror
-LOCAL_CPPFLAGS += -fno-color-diagnostics -ferror-limit=10000
+LOCAL_CPPFLAGS := -Wall -Wno-error
+LOCAL_CPPFLAGS += -fno-color-diagnostics -ferror-limit=10000 -Xclang -verify
 LOCAL_SRC_FILES := fortify_filecheck_diagnostics_test.cpp
 include $(BUILD_STATIC_LIBRARY)
 
diff --git a/tests/dlext_test.cpp b/tests/dlext_test.cpp
index eed84a4..67ebf37 100644
--- a/tests/dlext_test.cpp
+++ b/tests/dlext_test.cpp
@@ -134,10 +134,7 @@
   ZipArchiveHandle handle;
   ASSERT_EQ(0, OpenArchive(lib_path.c_str(), &handle));
   ZipEntry zip_entry;
-  ZipString zip_name;
-  zip_name.name = reinterpret_cast<const uint8_t*>(kLibZipSimpleZip);
-  zip_name.name_length = strlen(kLibZipSimpleZip);
-  ASSERT_EQ(0, FindEntry(handle, zip_name, &zip_entry));
+  ASSERT_EQ(0, FindEntry(handle, kLibZipSimpleZip, &zip_entry));
   extinfo.library_fd_offset = zip_entry.offset;
   CloseArchive(handle);
 
diff --git a/tests/file-check-cxx b/tests/file-check-cxx
deleted file mode 100755
index d3bc5f7..0000000
--- a/tests/file-check-cxx
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/bin/bash
-FILECHECK=$1
-CXX=$2
-PREFIX=$3
-shift 3
-SOURCE=$(echo "$@" | grep -oP '\S+\.cpp\b')
-OBJ=$(echo "$@" | grep -oP '\S+\.o\b')
-$CXX "$@" -Wno-error 2>&1 | $FILECHECK -check-prefix=$PREFIX $SOURCE
-if [ "$?" -eq 0 ]; then
-  touch $OBJ
-else
-  exit 1
-fi
diff --git a/tests/fortify_filecheck_diagnostics_test.cpp b/tests/fortify_filecheck_diagnostics_test.cpp
index e79a9a6..ac9853c 100644
--- a/tests/fortify_filecheck_diagnostics_test.cpp
+++ b/tests/fortify_filecheck_diagnostics_test.cpp
@@ -15,19 +15,14 @@
  */
 
 /*
- * If this test fails, you can see the compiler's output by erasing a few args from the failing
- * command. Specifically, delete everything before the path/to/the/compiler, then delete the first
- * arg after the path/to/the/compiler. For example, given the following command:
- *
- * bionic/tests/file-check-cxx out/host/linux-x86/bin/FileCheck \
- * prebuilts/clang/host/linux-x86/clang-4053586/bin/clang++ CLANG    -I bionic/tests -I ...
- *
- * If you delete everything before clang++ and delete "CLANG", then you'll end up with:
- *
- * prebuilts/clang/host/linux-x86/clang-4053586/bin/clang++ -I bionic/tests -I ...
- *
- * Which is the command that FileCheck executes.
+ * Silence all notes about enable_if-related 'candidates'; they're nice to know
+ * about for users, but this test doesn't care.
  */
+// expected-note@* 0+{{candidate function}}
+
+/* Similarly, ignore all "from 'diagnose_if'"s. */
+// expected-note@* 0+{{from 'diagnose_if'}}
+
 
 #undef _FORTIFY_SOURCE
 #define _FORTIFY_SOURCE 2
@@ -43,11 +38,15 @@
 #include <time.h>
 #include <unistd.h>
 
+#if !defined(__BIONIC__)
+#  error "This only works with Bionic."
+#endif
+
 void test_sprintf() {
   char buf[4];
 
   // NOLINTNEXTLINE(whitespace/line_length)
-  // CLANG: error: call to unavailable function 'sprintf': format string will always overflow destination buffer
+  // expected-error@+1{{call to unavailable function 'sprintf': format string will always overflow destination buffer}}
   sprintf(buf, "foobar");  // NOLINT(runtime/printf)
 
   // TODO: clang should emit a warning, but doesn't
@@ -58,7 +57,7 @@
   char buf[4];
 
   // NOLINTNEXTLINE(whitespace/line_length)
-  // CLANG: error: call to unavailable function 'snprintf': format string will always overflow destination buffer
+  // expected-error@+1{{call to unavailable function 'snprintf': format string will always overflow destination buffer}}
   snprintf(buf, 5, "foobar");  // NOLINT(runtime/printf)
 
   // TODO: clang should emit a warning, but doesn't
@@ -74,41 +73,41 @@
 void test_memcpy() {
   char buf[4];
 
-  // CLANG: error: 'memcpy' called with size bigger than buffer
+  // expected-error@+1{{'memcpy' called with size bigger than buffer}}
   memcpy(buf, "foobar", sizeof("foobar") + 100);
 }
 
 void test_memmove() {
   char buf[4];
 
-  // CLANG: error: 'memmove' called with size bigger than buffer
+  // expected-error@+1{{'memmove' called with size bigger than buffer}}
   memmove(buf, "foobar", sizeof("foobar"));
 }
 
 void test_memset() {
   char buf[4];
 
-  // CLANG: error: 'memset' called with size bigger than buffer
+  // expected-error@+1{{'memset' called with size bigger than buffer}}
   memset(buf, 0, 6);
 }
 
 void test_strcpy() {
   char buf[4];
 
-  // CLANG: error: 'strcpy' called with string bigger than buffer
+  // expected-error@+1{{'strcpy' called with string bigger than buffer}}
   strcpy(buf, "foobar");  // NOLINT(runtime/printf)
 
-  // CLANG: error: 'strcpy' called with string bigger than buffer
+  // expected-error@+1{{'strcpy' called with string bigger than buffer}}
   strcpy(buf, "quux");
 }
 
 void test_stpcpy() {
   char buf[4];
 
-  // CLANG: error: 'stpcpy' called with string bigger than buffer
+  // expected-error@+1{{'stpcpy' called with string bigger than buffer}}
   stpcpy(buf, "foobar");
 
-  // CLANG: error: 'stpcpy' called with string bigger than buffer
+  // expected-error@+1{{'stpcpy' called with string bigger than buffer}}
   stpcpy(buf, "quux");
 }
 
@@ -157,10 +156,10 @@
 void test_fgets() {
   char buf[4];
 
-  // CLANG: error: in call to 'fgets', size should not be negative
+  // expected-error@+1{{in call to 'fgets', size should not be negative}}
   fgets(buf, -1, stdin);
 
-  // CLANG: error: in call to 'fgets', size is larger than the destination buffer
+  // expected-error@+1{{in call to 'fgets', size is larger than the destination buffer}}
   fgets(buf, 6, stdin);
 }
 
@@ -168,58 +167,58 @@
   char buf[4];
   sockaddr_in addr;
 
-  // CLANG: error: 'recvfrom' called with size bigger than buffer
+  // expected-error@+1{{'recvfrom' called with size bigger than buffer}}
   recvfrom(0, buf, 6, 0, reinterpret_cast<sockaddr*>(&addr), nullptr);
 }
 
 void test_recv() {
   char buf[4] = {0};
 
-  // CLANG: error: 'recv' called with size bigger than buffer
+  // expected-error@+1{{'recv' called with size bigger than buffer}}
   recv(0, buf, 6, 0);
 }
 
 void test_umask() {
-  // CLANG: error: 'umask' called with invalid mode
+  // expected-error@+1{{'umask' called with invalid mode}}
   umask(01777);
 }
 
 void test_read() {
   char buf[4];
-  // CLANG: error: in call to 'read', 'count' bytes overflows the given object
+  // expected-error@+1{{in call to 'read', 'count' bytes overflows the given object}}
   read(0, buf, 6);
 }
 
 void test_open() {
-  // CLANG: error: 'open' called with O_CREAT or O_TMPFILE, but missing mode
+  // expected-error@+1{{'open' called with O_CREAT or O_TMPFILE, but missing mode}}
   open("/dev/null", O_CREAT);
 
-  // CLANG: error: 'open' called with O_CREAT or O_TMPFILE, but missing mode
+  // expected-error@+1{{'open' called with O_CREAT or O_TMPFILE, but missing mode}}
   open("/dev/null", O_TMPFILE);
 
-  // CLANG: error: call to unavailable function 'open': too many arguments
+  // expected-error@+1{{call to unavailable function 'open': too many arguments}}
   open("/dev/null", O_CREAT, 0, 0);
 
-  // CLANG: error: call to unavailable function 'open': too many arguments
+  // expected-error@+1{{call to unavailable function 'open': too many arguments}}
   open("/dev/null", O_TMPFILE, 0, 0);
 
-  // CLANG: warning: 'open' has superfluous mode bits; missing O_CREAT?
+  // expected-warning@+1{{'open' has superfluous mode bits; missing O_CREAT?}}
   open("/dev/null", O_RDONLY, 0644);
 
-  // CLANG: warning: 'open' has superfluous mode bits; missing O_CREAT?
+  // expected-warning@+1{{'open' has superfluous mode bits; missing O_CREAT?}}
   open("/dev/null", O_DIRECTORY, 0644);
 }
 
 void test_poll() {
   pollfd fds[1];
-  // CLANG: error: in call to 'poll', fd_count is larger than the given buffer
+  // expected-error@+1{{in call to 'poll', fd_count is larger than the given buffer}}
   poll(fds, 2, 0);
 }
 
 void test_ppoll() {
   pollfd fds[1];
   timespec timeout;
-  // CLANG: error: in call to 'ppoll', fd_count is larger than the given buffer
+  // expected-error@+1{{in call to 'ppoll', fd_count is larger than the given buffer}}
   ppoll(fds, 2, &timeout, nullptr);
 }
 
@@ -227,101 +226,98 @@
   pollfd fds[1];
   timespec timeout;
   // NOLINTNEXTLINE(whitespace/line_length)
-  // CLANG: error: in call to 'ppoll64', fd_count is larger than the given buffer
+  // expected-error@+1{{in call to 'ppoll64', fd_count is larger than the given buffer}}
   ppoll64(fds, 2, &timeout, nullptr);
 }
 
 void test_fread_overflow() {
   char buf[4];
-  // CLANG: error: in call to 'fread', size * count overflows
+  // expected-error@+1{{in call to 'fread', size * count overflows}}
   fread(buf, 2, (size_t)-1, stdin);
 }
 
 void test_fread_too_big() {
   char buf[4];
   // NOLINTNEXTLINE(whitespace/line_length)
-  // CLANG: error: in call to 'fread', size * count is too large for the given buffer
+  // expected-error@+1{{in call to 'fread', size * count is too large for the given buffer}}
   fread(buf, 1, 5, stdin);
 }
 
 void test_fwrite_overflow() {
   char buf[4] = {0};
-  // CLANG: error: in call to 'fwrite', size * count overflows
+  // expected-error@+1{{in call to 'fwrite', size * count overflows}}
   fwrite(buf, 2, (size_t)-1, stdout);
 }
 
 void test_fwrite_too_big() {
   char buf[4] = {0};
   // NOLINTNEXTLINE(whitespace/line_length)
-  // CLANG: error: in call to 'fwrite', size * count is too large for the given buffer
+  // expected-error@+1{{in call to 'fwrite', size * count is too large for the given buffer}}
   fwrite(buf, 1, 5, stdout);
 }
 
 void test_getcwd() {
   char buf[4];
-  // CLANG: error: in call to 'getcwd', 'size' bytes overflows the given object
+  // expected-error@+1{{in call to 'getcwd', 'size' bytes overflows the given object}}
   getcwd(buf, 5);
 }
 
 void test_pwrite64_size() {
   char buf[4] = {0};
-  // CLANG: error: in call to 'pwrite64', 'count' bytes overflows the given object
+  // expected-error@+1{{in call to 'pwrite64', 'count' bytes overflows the given object}}
   pwrite64(STDOUT_FILENO, buf, 5, 0);
 }
 
 void test_pwrite64_too_big_malloc() {
   void *buf = calloc(atoi("5"), 1);
-  // clang should emit a warning, but probably never will.
+  // expected-error@+1{{in call to 'pwrite64', 'count' must be <= SSIZE_MAX}}
   pwrite64(STDOUT_FILENO, buf, SIZE_MAX, 0);
 }
 
 void test_pwrite64_too_big() {
   char buf[4] = {0};
-  // CLANG: error: in call to 'pwrite64', 'count' must be <= SSIZE_MAX
+  // expected-error@+1{{in call to 'pwrite64', 'count' must be <= SSIZE_MAX}}
   pwrite64(STDOUT_FILENO, buf, SIZE_MAX, 0);
 }
 
 void test_write_size() {
   char buf[4] = {0};
-  // CLANG: error: in call to 'write', 'count' bytes overflows the given object
+  // expected-error@+1{{in call to 'write', 'count' bytes overflows the given object}}
   write(STDOUT_FILENO, buf, 5);
 }
 
 void test_memset_args_flipped() {
   char from[4] = {0};
   // NOLINTNEXTLINE(whitespace/line_length)
-  // CLANG: 'memset' will set 0 bytes; maybe the arguments got flipped?
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wmemset-transposed-args"
+  // expected-warning@+1{{'memset' will set 0 bytes; maybe the arguments got flipped?}}
   memset(from, sizeof(from), 0);
-#pragma clang diagnostic pop
 }
 
 void test_sendto() {
   char buf[4] = {0};
   sockaddr_in addr;
 
-  // CLANG: error: 'sendto' called with size bigger than buffer
+  // expected-error@+1{{'sendto' called with size bigger than buffer}}
   sendto(0, buf, 6, 0, reinterpret_cast<sockaddr*>(&addr), sizeof(sockaddr_in));
 }
 
 void test_send() {
   char buf[4] = {0};
 
-  // CLANG: error: 'send' called with size bigger than buffer
+  // expected-error@+1{{'send' called with size bigger than buffer}}
   send(0, buf, 6, 0);
 }
 
 void test_realpath() {
   char buf[4] = {0};
   // NOLINTNEXTLINE(whitespace/line_length)
-  // CLANG: error: 'realpath' output parameter must be NULL or a pointer to a buffer with >= PATH_MAX bytes
+  // expected-error@+1{{'realpath' output parameter must be NULL or a pointer to a buffer with >= PATH_MAX bytes}}
   realpath(".", buf);
 
   // This is fine.
   realpath(".", nullptr);
 
   char bigbuf[PATH_MAX];
-  // CLANG: error: 'realpath': NULL path is never correct; flipped arguments?
+  // expected-error@+1{{'realpath': NULL path is never correct; flipped arguments?}}
   realpath(nullptr, bigbuf);
 }
diff --git a/tests/libs/bionic_tests_zipalign.cpp b/tests/libs/bionic_tests_zipalign.cpp
index 56183ba..ec500d4 100644
--- a/tests/libs/bionic_tests_zipalign.cpp
+++ b/tests/libs/bionic_tests_zipalign.cpp
@@ -41,7 +41,7 @@
 
 static bool GetEntries(ZipArchiveHandle handle, std::vector<ZipData>* entries) {
   void* cookie;
-  int32_t return_value = StartIteration(handle, &cookie, nullptr, nullptr);
+  int32_t return_value = StartIteration(handle, &cookie);
   if (return_value != 0) {
     fprintf(stderr, "Unable to iterate over entries: %s\n", ErrorCodeString(return_value));
     return false;
diff --git a/tests/malloc_test.cpp b/tests/malloc_test.cpp
index 706de15..983592f 100644
--- a/tests/malloc_test.cpp
+++ b/tests/malloc_test.cpp
@@ -395,6 +395,60 @@
 #endif
 }
 
+TEST(malloc, malloc_info_matches_mallinfo) {
+#ifdef __BIONIC__
+  SKIP_WITH_HWASAN; // hwasan does not implement malloc_info
+
+  char* buf;
+  size_t bufsize;
+  FILE* memstream = open_memstream(&buf, &bufsize);
+  ASSERT_NE(nullptr, memstream);
+  size_t mallinfo_before_allocated_bytes = mallinfo().uordblks;
+  ASSERT_EQ(0, malloc_info(0, memstream));
+  size_t mallinfo_after_allocated_bytes = mallinfo().uordblks;
+  ASSERT_EQ(0, fclose(memstream));
+
+  tinyxml2::XMLDocument doc;
+  ASSERT_EQ(tinyxml2::XML_SUCCESS, doc.Parse(buf));
+
+  size_t total_allocated_bytes = 0;
+  auto root = doc.FirstChildElement();
+  ASSERT_NE(nullptr, root);
+  ASSERT_STREQ("malloc", root->Name());
+  if (std::string(root->Attribute("version")) == "jemalloc-1") {
+    // Verify jemalloc version of this data.
+    ASSERT_STREQ("jemalloc-1", root->Attribute("version"));
+
+    auto arena = root->FirstChildElement();
+    for (; arena != nullptr; arena = arena->NextSiblingElement()) {
+      int val;
+
+      ASSERT_STREQ("heap", arena->Name());
+      ASSERT_EQ(tinyxml2::XML_SUCCESS, arena->QueryIntAttribute("nr", &val));
+      ASSERT_EQ(tinyxml2::XML_SUCCESS,
+                arena->FirstChildElement("allocated-large")->QueryIntText(&val));
+      total_allocated_bytes += val;
+      ASSERT_EQ(tinyxml2::XML_SUCCESS,
+                arena->FirstChildElement("allocated-huge")->QueryIntText(&val));
+      total_allocated_bytes += val;
+      ASSERT_EQ(tinyxml2::XML_SUCCESS,
+                arena->FirstChildElement("allocated-bins")->QueryIntText(&val));
+      total_allocated_bytes += val;
+      ASSERT_EQ(tinyxml2::XML_SUCCESS,
+                arena->FirstChildElement("bins-total")->QueryIntText(&val));
+    }
+    // The total needs to be between the mallinfo call before and after
+    // since malloc_info allocates some memory.
+    EXPECT_LE(mallinfo_before_allocated_bytes, total_allocated_bytes);
+    EXPECT_GE(mallinfo_after_allocated_bytes, total_allocated_bytes);
+  } else {
+    // Only verify that this is debug-malloc-1, the malloc debug unit tests
+    // verify the output.
+    ASSERT_STREQ("debug-malloc-1", root->Attribute("version"));
+  }
+#endif
+}
+
 TEST(malloc, calloc_usable_size) {
   for (size_t size = 1; size <= 2048; size++) {
     void* pointer = malloc(size);
diff --git a/tests/stdio_test.cpp b/tests/stdio_test.cpp
index 65a942c..01b4dba 100644
--- a/tests/stdio_test.cpp
+++ b/tests/stdio_test.cpp
@@ -19,6 +19,7 @@
 #include <errno.h>
 #include <fcntl.h>
 #include <limits.h>
+#include <linux/fs.h>
 #include <math.h>
 #include <stdio.h>
 #include <sys/types.h>
@@ -29,6 +30,7 @@
 #include <locale.h>
 
 #include <string>
+#include <thread>
 #include <vector>
 
 #include <android-base/file.h>
@@ -2577,3 +2579,34 @@
   ASSERT_LT(0, length);
   ASSERT_EQ("/proc/self/fd/2", std::string(path, length));
 }
+
+TEST(STDIO_TEST, fread_with_locked_file) {
+  // Reading an unbuffered/line-buffered file from one thread shouldn't block on
+  // files locked on other threads, even if it flushes some line-buffered files.
+  FILE* fp1 = fopen("/dev/zero", "r");
+  ASSERT_TRUE(fp1 != nullptr);
+  flockfile(fp1);
+
+  std::thread([] {
+    for (int mode : { _IONBF, _IOLBF }) {
+      FILE* fp2 = fopen("/dev/zero", "r");
+      ASSERT_TRUE(fp2 != nullptr);
+      setvbuf(fp2, nullptr, mode, 0);
+      ASSERT_EQ('\0', fgetc(fp2));
+      fclose(fp2);
+    }
+  }).join();
+
+  funlockfile(fp1);
+  fclose(fp1);
+}
+
+TEST(STDIO_TEST, SEEK_macros) {
+  ASSERT_EQ(0, SEEK_SET);
+  ASSERT_EQ(1, SEEK_CUR);
+  ASSERT_EQ(2, SEEK_END);
+  ASSERT_EQ(3, SEEK_DATA);
+  ASSERT_EQ(4, SEEK_HOLE);
+  // So we'll notice if Linux grows another constant in <linux/fs.h>...
+  ASSERT_EQ(SEEK_MAX, SEEK_HOLE);
+}
diff --git a/tests/touch-obj-on-success b/tests/touch-obj-on-success
new file mode 100755
index 0000000..df08a49
--- /dev/null
+++ b/tests/touch-obj-on-success
@@ -0,0 +1,8 @@
+#!/bin/bash -eu
+#
+# Runs the given C/C++ compile-ish command. On success, scrapes an object file
+# from that command line and touches it.
+
+"$@"
+obj="$(echo "$@" | grep -oP '\S+\.o\b')"
+touch "${obj}"