Merge "Update to kernel headers v4.8.14."
diff --git a/libc/Android.bp b/libc/Android.bp
index b1d077d..3f7e94c 100644
--- a/libc/Android.bp
+++ b/libc/Android.bp
@@ -1029,6 +1029,14 @@
                     "arch-arm64/generic/bionic/memset.S",
                 ],
             },
+            cortex_a53: {
+                srcs: [
+                    "arch-arm64/cortex-a53/bionic/memmove.S",
+                ],
+                exclude_srcs: [
+                    "arch-arm64/generic/bionic/memmove.S",
+                ],
+            },
         },
 
         mips: {
diff --git a/libc/NOTICE b/libc/NOTICE
index f3ed69c..fd6cee6 100644
--- a/libc/NOTICE
+++ b/libc/NOTICE
@@ -5245,6 +5245,33 @@
 
 -------------------------------------------------------------------
 
+Copyright (c) 2012-2013, Linaro Limited
+   All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+       * Redistributions of source code must retain the above copyright
+         notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above copyright
+         notice, this list of conditions and the following disclaimer in the
+         documentation and/or other materials provided with the distribution.
+       * Neither the name of the Linaro nor the
+         names of its contributors may be used to endorse or promote products
+         derived from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+
+-------------------------------------------------------------------
+
 Copyright (c) 2012-2015
      MIPS Technologies, Inc., California.
 
@@ -5402,6 +5429,33 @@
 
 -------------------------------------------------------------------
 
+Copyright (c) 2013, Linaro Limited
+   All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+       * Redistributions of source code must retain the above copyright
+         notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above copyright
+         notice, this list of conditions and the following disclaimer in the
+         documentation and/or other materials provided with the distribution.
+       * Neither the name of the Linaro nor the
+         names of its contributors may be used to endorse or promote products
+         derived from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+
+-------------------------------------------------------------------
+
 Copyright (c) 2013-2014, NVIDIA Corporation.  All rights reserved.
 Johnny Qiu <joqiu@nvidia.com>
 Shu Zhang <chazhang@nvidia.com>
@@ -5433,6 +5487,33 @@
 
 -------------------------------------------------------------------
 
+Copyright (c) 2013-2015, Linaro Limited
+   All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+       * Redistributions of source code must retain the above copyright
+     notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above copyright
+     notice, this list of conditions and the following disclaimer in the
+     documentation and/or other materials provided with the distribution.
+       * Neither the name of the Linaro nor the
+     names of its contributors may be used to endorse or promote products
+     derived from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+
+-------------------------------------------------------------------
+
 Copyright (c) 2014
      Imagination Technologies Limited.
 
@@ -5540,6 +5621,34 @@
 
 -------------------------------------------------------------------
 
+Copyright (c) 2015 ARM Ltd
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+3. The name of the company may not be used to endorse or promote
+   products derived from this software without specific prior written
+   permission.
+
+THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+-------------------------------------------------------------------
+
 Copyright (c) 2015 Joerg Sonnenberger <joerg@NetBSD.org>.
 All rights reserved.
 
diff --git a/libc/arch-arm64/cortex-a53/bionic/memmove.S b/libc/arch-arm64/cortex-a53/bionic/memmove.S
new file mode 100644
index 0000000..c50112d
--- /dev/null
+++ b/libc/arch-arm64/cortex-a53/bionic/memmove.S
@@ -0,0 +1,153 @@
+/* Copyright (c) 2013, Linaro Limited
+   All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+       * Redistributions of source code must retain the above copyright
+         notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above copyright
+         notice, this list of conditions and the following disclaimer in the
+         documentation and/or other materials provided with the distribution.
+       * Neither the name of the Linaro nor the
+         names of its contributors may be used to endorse or promote products
+         derived from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
+
+/*
+ * Copyright (c) 2015 ARM Ltd
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the company may not be used to endorse or promote
+ *    products derived from this software without specific prior written
+ *    permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64, unaligned accesses, wchar_t is 4 bytes
+ */
+
+#include <private/bionic_asm.h>
+
+/* Parameters and result.  */
+#define dstin	x0
+#define src	x1
+#define count	x2
+#define srcend	x3
+#define dstend	x4
+#define tmp1	x5
+#define A_l	x6
+#define A_h	x7
+#define B_l	x8
+#define B_h	x9
+#define C_l	x10
+#define C_h	x11
+#define D_l	x12
+#define D_h	x13
+#define E_l	count
+#define E_h	tmp1
+
+/* All memmoves up to 96 bytes are done by memcpy as it supports overlaps.
+   Larger backwards copies are also handled by memcpy. The only remaining
+   case is forward large copies.  The destination is aligned, and an
+   unrolled loop processes 64 bytes per iteration.
+*/
+
+#if defined(WMEMMOVE)
+ENTRY(wmemmove)
+	lsl	count, count, #2
+#else
+ENTRY(memmove)
+#endif
+	sub	tmp1, dstin, src
+	cmp	count, 96
+	ccmp	tmp1, count, 2, hi
+	b.hs	memcpy
+
+	cbz	tmp1, 3f
+	add	dstend, dstin, count
+	add	srcend, src, count
+
+	/* Align dstend to 16 byte alignment so that we don't cross cache line
+	   boundaries on both loads and stores.	 There are at least 96 bytes
+	   to copy, so copy 16 bytes unaligned and then align.	The loop
+	   copies 64 bytes per iteration and prefetches one iteration ahead.  */
+
+	and	tmp1, dstend, 15
+	ldp	D_l, D_h, [srcend, -16]
+	sub	srcend, srcend, tmp1
+	sub	count, count, tmp1
+	ldp	A_l, A_h, [srcend, -16]
+	stp	D_l, D_h, [dstend, -16]
+	ldp	B_l, B_h, [srcend, -32]
+	ldp	C_l, C_h, [srcend, -48]
+	ldp	D_l, D_h, [srcend, -64]!
+	sub	dstend, dstend, tmp1
+	subs	count, count, 128
+	b.ls	2f
+	nop
+1:
+	stp	A_l, A_h, [dstend, -16]
+	ldp	A_l, A_h, [srcend, -16]
+	stp	B_l, B_h, [dstend, -32]
+	ldp	B_l, B_h, [srcend, -32]
+	stp	C_l, C_h, [dstend, -48]
+	ldp	C_l, C_h, [srcend, -48]
+	stp	D_l, D_h, [dstend, -64]!
+	ldp	D_l, D_h, [srcend, -64]!
+	subs	count, count, 64
+	b.hi	1b
+
+	/* Write the last full set of 64 bytes.	 The remainder is at most 64
+	   bytes, so it is safe to always copy 64 bytes from the start even if
+	   there is just 1 byte left.  */
+2:
+	ldp	E_l, E_h, [src, 48]
+	stp	A_l, A_h, [dstend, -16]
+	ldp	A_l, A_h, [src, 32]
+	stp	B_l, B_h, [dstend, -32]
+	ldp	B_l, B_h, [src, 16]
+	stp	C_l, C_h, [dstend, -48]
+	ldp	C_l, C_h, [src]
+	stp	D_l, D_h, [dstend, -64]
+	stp	E_l, E_h, [dstin, 48]
+	stp	A_l, A_h, [dstin, 32]
+	stp	B_l, B_h, [dstin, 16]
+	stp	C_l, C_h, [dstin]
+3:	ret
+
+#if defined(WMEMMOVE)
+END(wmemmove)
+#else
+END(memmove)
+#endif
diff --git a/libc/arch-arm64/generic/bionic/memcpy_base.S b/libc/arch-arm64/generic/bionic/memcpy_base.S
index c5d42ce..f850624 100644
--- a/libc/arch-arm64/generic/bionic/memcpy_base.S
+++ b/libc/arch-arm64/generic/bionic/memcpy_base.S
@@ -1,4 +1,4 @@
-/* Copyright (c) 2012, Linaro Limited
+/* Copyright (c) 2012-2013, Linaro Limited
    All rights reserved.
 
    Redistribution and use in source and binary forms, with or without
@@ -22,158 +22,196 @@
    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
+
+/*
+ * Copyright (c) 2015 ARM Ltd
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the company may not be used to endorse or promote
+ *    products derived from this software without specific prior written
+ *    permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
 
 /* Assumptions:
  *
- * ARMv8-a, AArch64
- * Unaligned accesses
+ * ARMv8-a, AArch64, unaligned accesses.
  *
  */
 
+#include <private/bionic_asm.h>
+
 #define dstin	x0
 #define src	x1
 #define count	x2
-#define tmp1	x3
-#define tmp1w	w3
-#define tmp2	x4
-#define tmp2w	w4
-#define tmp3	x5
-#define tmp3w	w5
-#define dst	x6
+#define dst	x3
+#define srcend	x4
+#define dstend	x5
+#define A_l	x6
+#define A_lw	w6
+#define A_h	x7
+#define A_hw	w7
+#define B_l	x8
+#define B_lw   w8
+#define B_h	x9
+#define C_l	x10
+#define C_h	x11
+#define D_l	x12
+#define D_h	x13
+#define E_l	src
+#define E_h	count
+#define F_l	srcend
+#define F_h	dst
+#define tmp1	x9
 
-#define A_l	x7
-#define A_h	x8
-#define B_l	x9
-#define B_h	x10
-#define C_l	x11
-#define C_h	x12
-#define D_l	x13
-#define D_h	x14
+#define L(l) .L ## l
 
-	mov	dst, dstin
-	cmp	count, #64
-	b.ge	.Lcpy_not_short
-	cmp	count, #15
-	b.le	.Ltail15tiny
+/* Copies are split into 3 main cases: small copies of up to 16 bytes,
+   medium copies of 17..96 bytes which are fully unrolled. Large copies
+   of more than 96 bytes align the destination and use an unrolled loop
+   processing 64 bytes per iteration.
+   Small and medium copies read all data before writing, allowing any
+   kind of overlap, and memmove tailcalls memcpy for these cases as
+   well as non-overlapping copies.
+*/
 
-	/* Deal with small copies quickly by dropping straight into the
-	 * exit block.  */
-.Ltail63:
-	/* Copy up to 48 bytes of data.  At this point we only need the
-	 * bottom 6 bits of count to be accurate.  */
-	ands	tmp1, count, #0x30
-	b.eq	.Ltail15
-	add	dst, dst, tmp1
-	add	src, src, tmp1
-	cmp	tmp1w, #0x20
-	b.eq	1f
-	b.lt	2f
-	ldp	A_l, A_h, [src, #-48]
-	stp	A_l, A_h, [dst, #-48]
-1:
-	ldp	A_l, A_h, [src, #-32]
-	stp	A_l, A_h, [dst, #-32]
-2:
-	ldp	A_l, A_h, [src, #-16]
-	stp	A_l, A_h, [dst, #-16]
+	prfm    PLDL1KEEP, [src]
+	add	srcend, src, count
+	add	dstend, dstin, count
+        cmp     count, 16
+        b.ls    L(copy16)
+	cmp	count, 96
+	b.hi	L(copy_long)
 
-.Ltail15:
-	ands	count, count, #15
-	beq	1f
-	add	src, src, count
-	ldp	A_l, A_h, [src, #-16]
-	add	dst, dst, count
-	stp	A_l, A_h, [dst, #-16]
-1:
-	ret
-
-.Ltail15tiny:
-	/* Copy up to 15 bytes of data.  Does not assume additional data
-	   being copied.  */
-	tbz	count, #3, 1f
-	ldr	tmp1, [src], #8
-	str	tmp1, [dst], #8
-1:
-	tbz	count, #2, 1f
-	ldr	tmp1w, [src], #4
-	str	tmp1w, [dst], #4
-1:
-	tbz	count, #1, 1f
-	ldrh	tmp1w, [src], #2
-	strh	tmp1w, [dst], #2
-1:
-	tbz	count, #0, 1f
-	ldrb	tmp1w, [src]
-	strb	tmp1w, [dst]
-1:
-	ret
-
-.Lcpy_not_short:
-	/* We don't much care about the alignment of DST, but we want SRC
-	 * to be 128-bit (16 byte) aligned so that we don't cross cache line
-	 * boundaries on both loads and stores.  */
-	neg	tmp2, src
-	ands	tmp2, tmp2, #15		/* Bytes to reach alignment.  */
-	b.eq	2f
-	sub	count, count, tmp2
-	/* Copy more data than needed; it's faster than jumping
-	 * around copying sub-Quadword quantities.  We know that
-	 * it can't overrun.  */
+	/* Medium copies: 17..96 bytes.  */
+	sub	tmp1, count, 1
 	ldp	A_l, A_h, [src]
-	add	src, src, tmp2
-	stp	A_l, A_h, [dst]
-	add	dst, dst, tmp2
-	/* There may be less than 63 bytes to go now.  */
-	cmp	count, #63
-	b.le	.Ltail63
-2:
-	subs	count, count, #128
-	b.ge	.Lcpy_body_large
-	/* Less than 128 bytes to copy, so handle 64 here and then jump
-	 * to the tail.  */
-	ldp	A_l, A_h, [src]
-	ldp	B_l, B_h, [src, #16]
-	ldp	C_l, C_h, [src, #32]
-	ldp	D_l, D_h, [src, #48]
-	stp	A_l, A_h, [dst]
-	stp	B_l, B_h, [dst, #16]
-	stp	C_l, C_h, [dst, #32]
-	stp	D_l, D_h, [dst, #48]
-	tst	count, #0x3f
-	add	src, src, #64
-	add	dst, dst, #64
-	b.ne	.Ltail63
+	tbnz	tmp1, 6, L(copy96)
+	ldp	D_l, D_h, [srcend, -16]
+	tbz	tmp1, 5, 1f
+	ldp	B_l, B_h, [src, 16]
+	ldp	C_l, C_h, [srcend, -32]
+	stp	B_l, B_h, [dstin, 16]
+	stp	C_l, C_h, [dstend, -32]
+1:
+	stp	A_l, A_h, [dstin]
+	stp	D_l, D_h, [dstend, -16]
 	ret
 
-	/* Critical loop.  Start at a new cache line boundary.  Assuming
-	 * 64 bytes per line this ensures the entire loop is in one line.  */
-	.p2align 6
-.Lcpy_body_large:
-	/* There are at least 128 bytes to copy.  */
-	ldp	A_l, A_h, [src, #0]
-	sub	dst, dst, #16		/* Pre-bias.  */
-	ldp	B_l, B_h, [src, #16]
-	ldp	C_l, C_h, [src, #32]
-	ldp	D_l, D_h, [src, #48]!	/* src += 64 - Pre-bias.  */
+	.p2align 4
+
+	/* Small copies: 0..16 bytes.  */
+L(copy16):
+	cmp	count, 8
+	b.lo	1f
+	ldr	A_l, [src]
+	ldr	A_h, [srcend, -8]
+	str	A_l, [dstin]
+	str	A_h, [dstend, -8]
+	ret
+	.p2align 4
 1:
-	stp	A_l, A_h, [dst, #16]
-	ldp	A_l, A_h, [src, #16]
-	stp	B_l, B_h, [dst, #32]
-	ldp	B_l, B_h, [src, #32]
-	stp	C_l, C_h, [dst, #48]
-	ldp	C_l, C_h, [src, #48]
-	stp	D_l, D_h, [dst, #64]!
-	ldp	D_l, D_h, [src, #64]!
-	subs	count, count, #64
-	b.ge	1b
-	stp	A_l, A_h, [dst, #16]
-	stp	B_l, B_h, [dst, #32]
-	stp	C_l, C_h, [dst, #48]
-	stp	D_l, D_h, [dst, #64]
-	add	src, src, #16
-	add	dst, dst, #64 + 16
-	tst	count, #0x3f
-	b.ne	.Ltail63
+	tbz	count, 2, 1f
+	ldr	A_lw, [src]
+	ldr	A_hw, [srcend, -4]
+	str	A_lw, [dstin]
+	str	A_hw, [dstend, -4]
+	ret
+
+	/* Copy 0..3 bytes.  Use a branchless sequence that copies the same
+	   byte 3 times if count==1, or the 2nd byte twice if count==2.  */
+1:
+	cbz	count, 2f
+	lsr	tmp1, count, 1
+	ldrb	A_lw, [src]
+	ldrb	A_hw, [srcend, -1]
+	ldrb	B_lw, [src, tmp1]
+	strb	A_lw, [dstin]
+	strb	B_lw, [dstin, tmp1]
+	strb	A_hw, [dstend, -1]
+2:	ret
+
+	.p2align 4
+	/* Copy 64..96 bytes.  Copy 64 bytes from the start and
+	   32 bytes from the end.  */
+L(copy96):
+	ldp	B_l, B_h, [src, 16]
+	ldp	C_l, C_h, [src, 32]
+	ldp	D_l, D_h, [src, 48]
+	ldp	E_l, E_h, [srcend, -32]
+	ldp	F_l, F_h, [srcend, -16]
+	stp	A_l, A_h, [dstin]
+	stp	B_l, B_h, [dstin, 16]
+	stp	C_l, C_h, [dstin, 32]
+	stp	D_l, D_h, [dstin, 48]
+	stp	E_l, E_h, [dstend, -32]
+	stp	F_l, F_h, [dstend, -16]
+	ret
+
+	/* Align DST to 16 byte alignment so that we don't cross cache line
+	   boundaries on both loads and stores.	 There are at least 96 bytes
+	   to copy, so copy 16 bytes unaligned and then align.	The loop
+	   copies 64 bytes per iteration and prefetches one iteration ahead.  */
+
+	.p2align 4
+L(copy_long):
+	and	tmp1, dstin, 15
+	bic	dst, dstin, 15
+	ldp	D_l, D_h, [src]
+	sub	src, src, tmp1
+	add	count, count, tmp1	/* Count is now 16 too large.  */
+	ldp	A_l, A_h, [src, 16]
+	stp	D_l, D_h, [dstin]
+	ldp	B_l, B_h, [src, 32]
+	ldp	C_l, C_h, [src, 48]
+	ldp	D_l, D_h, [src, 64]!
+	subs	count, count, 128 + 16	/* Test and readjust count.  */
+	b.ls	2f
+1:
+	stp	A_l, A_h, [dst, 16]
+	ldp	A_l, A_h, [src, 16]
+	stp	B_l, B_h, [dst, 32]
+	ldp	B_l, B_h, [src, 32]
+	stp	C_l, C_h, [dst, 48]
+	ldp	C_l, C_h, [src, 48]
+	stp	D_l, D_h, [dst, 64]!
+	ldp	D_l, D_h, [src, 64]!
+	subs	count, count, 64
+	b.hi	1b
+
+	/* Write the last full set of 64 bytes.	 The remainder is at most 64
+	   bytes, so it is safe to always copy 64 bytes from the end even if
+	   there is just 1 byte left.  */
+2:
+	ldp	E_l, E_h, [srcend, -64]
+	stp	A_l, A_h, [dst, 16]
+	ldp	A_l, A_h, [srcend, -48]
+	stp	B_l, B_h, [dst, 32]
+	ldp	B_l, B_h, [srcend, -32]
+	stp	C_l, C_h, [dst, 48]
+	ldp	C_l, C_h, [srcend, -16]
+	stp	D_l, D_h, [dst, 64]
+	stp	E_l, E_h, [dstend, -64]
+	stp	A_l, A_h, [dstend, -48]
+	stp	B_l, B_h, [dstend, -32]
+	stp	C_l, C_h, [dstend, -16]
 	ret
diff --git a/libc/arch-arm64/generic/bionic/memset.S b/libc/arch-arm64/generic/bionic/memset.S
index 9626c0b..3416cfb 100644
--- a/libc/arch-arm64/generic/bionic/memset.S
+++ b/libc/arch-arm64/generic/bionic/memset.S
@@ -1,4 +1,4 @@
-/* Copyright (c) 2012, Linaro Limited
+/* Copyright (c) 2012-2013, Linaro Limited
    All rights reserved.
 
    Redistribution and use in source and binary forms, with or without
@@ -22,13 +22,39 @@
    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
+
+/*
+ * Copyright (c) 2015 ARM Ltd
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the company may not be used to endorse or promote
+ *    products derived from this software without specific prior written
+ *    permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
 
 /* Assumptions:
  *
- * ARMv8-a, AArch64
- * Unaligned accesses
+ * ARMv8-a, AArch64, unaligned accesses
  *
  */
 
@@ -45,24 +71,22 @@
    values rather than re-reading them each call.  */
 
 #define dstin		x0
-#define val		w1
+#define val		x1
+#define valw		w1
 #define count		x2
-#define dst_count x3 /* for __memset_chk */
-#define tmp1		x3
-#define tmp1w		w3
-#define tmp2		x4
-#define tmp2w		w4
-#define zva_len_x	x5
-#define zva_len		w5
-#define zva_bits_x	x6
+#define dst 		x3
+#define dstend		x4
+#define tmp1		x5
+#define tmp1w		w5
+#define tmp2		x6
+#define tmp2w		w6
+#define zva_len		x5
+#define zva_lenw	w7
 
-#define A_l		x7
-#define A_lw		w7
-#define dst		x8
-#define tmp3w		w9
+#define L(l) .L ## l
 
 ENTRY(__memset_chk)
-  cmp count, dst_count
+  cmp count, dst
   bls memset
 
   // Preserve for accurate backtrace.
@@ -76,171 +100,152 @@
 
 ENTRY(memset)
 
-	mov	dst, dstin		/* Preserve return value.  */
-	ands	A_lw, val, #255
-	b.eq	.Lzero_mem
-	orr	A_lw, A_lw, A_lw, lsl #8
-	orr	A_lw, A_lw, A_lw, lsl #16
-	orr	A_l, A_l, A_l, lsl #32
-.Ltail_maybe_long:
-	cmp	count, #64
-	b.ge	.Lnot_short
-.Ltail_maybe_tiny:
-	cmp	count, #15
-	b.le	.Ltail15tiny
-.Ltail63:
-	ands	tmp1, count, #0x30
-	b.eq	.Ltail15
-	add	dst, dst, tmp1
-	cmp	tmp1w, #0x20
-	b.eq	1f
-	b.lt	2f
-	stp	A_l, A_l, [dst, #-48]
-1:
-	stp	A_l, A_l, [dst, #-32]
-2:
-	stp	A_l, A_l, [dst, #-16]
+	dup	v0.16B, valw
+	add	dstend, dstin, count
 
-.Ltail15:
-	and	count, count, #15
-	add	dst, dst, count
-	stp	A_l, A_l, [dst, #-16]	/* Repeat some/all of last store. */
+	cmp	count, 96
+	b.hi	L(set_long)
+	cmp	count, 16
+	b.hs	L(set_medium)
+	mov	val, v0.D[0]
+
+	/* Set 0..15 bytes.  */
+	tbz	count, 3, 1f
+	str	val, [dstin]
+	str	val, [dstend, -8]
+	ret
+	nop
+1:	tbz	count, 2, 2f
+	str	valw, [dstin]
+	str	valw, [dstend, -4]
+	ret
+2:	cbz	count, 3f
+	strb	valw, [dstin]
+	tbz	count, 1, 3f
+	strh	valw, [dstend, -2]
+3:	ret
+
+	/* Set 17..96 bytes.  */
+L(set_medium):
+	str	q0, [dstin]
+	tbnz	count, 6, L(set96)
+	str	q0, [dstend, -16]
+	tbz	count, 5, 1f
+	str	q0, [dstin, 16]
+	str	q0, [dstend, -32]
+1:	ret
+
+	.p2align 4
+	/* Set 64..96 bytes.  Write 64 bytes from the start and
+	   32 bytes from the end.  */
+L(set96):
+	str	q0, [dstin, 16]
+	stp	q0, q0, [dstin, 32]
+	stp	q0, q0, [dstend, -32]
 	ret
 
-.Ltail15tiny:
-	/* Set up to 15 bytes.  Does not assume earlier memory
-	   being set.  */
-	tbz	count, #3, 1f
-	str	A_l, [dst], #8
-1:
-	tbz	count, #2, 1f
-	str	A_lw, [dst], #4
-1:
-	tbz	count, #1, 1f
-	strh	A_lw, [dst], #2
-1:
-	tbz	count, #0, 1f
-	strb	A_lw, [dst]
-1:
+	.p2align 3
+	nop
+L(set_long):
+	and	valw, valw, 255
+	bic	dst, dstin, 15
+	str	q0, [dstin]
+	cmp	count, 256
+	ccmp	valw, 0, 0, cs
+	b.eq	L(try_zva)
+L(no_zva):
+	sub	count, dstend, dst	/* Count is 16 too large.  */
+	add	dst, dst, 16
+	sub	count, count, 64 + 16	/* Adjust count and bias for loop.  */
+1:	stp	q0, q0, [dst], 64
+	stp	q0, q0, [dst, -32]
+L(tail64):
+	subs	count, count, 64
+	b.hi	1b
+2:	stp	q0, q0, [dstend, -64]
+	stp	q0, q0, [dstend, -32]
 	ret
 
-	/* Critical loop.  Start at a new cache line boundary.  Assuming
-	 * 64 bytes per line, this ensures the entire loop is in one line.  */
-	.p2align 6
-.Lnot_short:
-	neg	tmp2, dst
-	ands	tmp2, tmp2, #15
-	b.eq	2f
-	/* Bring DST to 128-bit (16-byte) alignment.  We know that there's
-	 * more than that to set, so we simply store 16 bytes and advance by
-	 * the amount required to reach alignment.  */
-	sub	count, count, tmp2
-	stp	A_l, A_l, [dst]
-	add	dst, dst, tmp2
-	/* There may be less than 63 bytes to go now.  */
-	cmp	count, #63
-	b.le	.Ltail63
-2:
-	sub	dst, dst, #16		/* Pre-bias.  */
-	sub	count, count, #64
-1:
-	stp	A_l, A_l, [dst, #16]
-	stp	A_l, A_l, [dst, #32]
-	stp	A_l, A_l, [dst, #48]
-	stp	A_l, A_l, [dst, #64]!
-	subs	count, count, #64
-	b.ge	1b
-	tst	count, #0x3f
-	add	dst, dst, #16
-	b.ne	.Ltail63
-	ret
-
-	/* For zeroing memory, check to see if we can use the ZVA feature to
-	 * zero entire 'cache' lines.  */
-.Lzero_mem:
-	mov	A_l, #0
-	cmp	count, #63
-	b.le	.Ltail_maybe_tiny
-	neg	tmp2, dst
-	ands	tmp2, tmp2, #15
-	b.eq	1f
-	sub	count, count, tmp2
-	stp	A_l, A_l, [dst]
-	add	dst, dst, tmp2
-	cmp	count, #63
-	b.le	.Ltail63
-1:
-	/* For zeroing small amounts of memory, it's not worth setting up
-	 * the line-clear code.  */
-	cmp	count, #128
-	b.lt	.Lnot_short
-#ifdef MAYBE_VIRT
-	/* For efficiency when virtualized, we cache the ZVA capability.  */
-	adrp	tmp2, .Lcache_clear
-	ldr	zva_len, [tmp2, #:lo12:.Lcache_clear]
-	tbnz	zva_len, #31, .Lnot_short
-	cbnz	zva_len, .Lzero_by_line
+	.p2align 3
+L(try_zva):
 	mrs	tmp1, dczid_el0
-	tbz	tmp1, #4, 1f
-	/* ZVA not available.  Remember this for next time.  */
-	mov	zva_len, #~0
-	str	zva_len, [tmp2, #:lo12:.Lcache_clear]
-	b	.Lnot_short
-1:
-	mov	tmp3w, #4
-	and	zva_len, tmp1w, #15	/* Safety: other bits reserved.  */
-	lsl	zva_len, tmp3w, zva_len
-	str	zva_len, [tmp2, #:lo12:.Lcache_clear]
-#else
-	mrs	tmp1, dczid_el0
-	tbnz	tmp1, #4, .Lnot_short
-	mov	tmp3w, #4
-	and	zva_len, tmp1w, #15	/* Safety: other bits reserved.  */
-	lsl	zva_len, tmp3w, zva_len
-#endif
+	tbnz	tmp1w, 4, L(no_zva)
+	and	tmp1w, tmp1w, 15
+	cmp	tmp1w, 4	/* ZVA size is 64 bytes.  */
+	b.ne	 L(zva_128)
 
-.Lzero_by_line:
-	/* Compute how far we need to go to become suitably aligned.  We're
-	 * already at quad-word alignment.  */
-	cmp	count, zva_len_x
-	b.lt	.Lnot_short		/* Not enough to reach alignment.  */
-	sub	zva_bits_x, zva_len_x, #1
-	neg	tmp2, dst
-	ands	tmp2, tmp2, zva_bits_x
-	b.eq	1f			/* Already aligned.  */
-	/* Not aligned, check that there's enough to copy after alignment.  */
-	sub	tmp1, count, tmp2
-	cmp	tmp1, #64
-	ccmp	tmp1, zva_len_x, #8, ge	/* NZCV=0b1000 */
-	b.lt	.Lnot_short
-	/* We know that there's at least 64 bytes to zero and that it's safe
-	 * to overrun by 64 bytes.  */
-	mov	count, tmp1
-2:
-	stp	A_l, A_l, [dst]
-	stp	A_l, A_l, [dst, #16]
-	stp	A_l, A_l, [dst, #32]
-	subs	tmp2, tmp2, #64
-	stp	A_l, A_l, [dst, #48]
-	add	dst, dst, #64
-	b.ge	2b
-	/* We've overrun a bit, so adjust dst downwards.  */
-	add	dst, dst, tmp2
-1:
-	sub	count, count, zva_len_x
-3:
-	dc	zva, dst
-	add	dst, dst, zva_len_x
-	subs	count, count, zva_len_x
-	b.ge	3b
-	ands	count, count, zva_bits_x
-	b.ne	.Ltail_maybe_long
+	/* Write the first and last 64 byte aligned block using stp rather
+	   than using DC ZVA.  This is faster on some cores.
+	 */
+L(zva_64):
+	str	q0, [dst, 16]
+	stp	q0, q0, [dst, 32]
+	bic	dst, dst, 63
+	stp	q0, q0, [dst, 64]
+	stp	q0, q0, [dst, 96]
+	sub	count, dstend, dst	/* Count is now 128 too large.	*/
+	sub	count, count, 128+64+64	/* Adjust count and bias for loop.  */
+	add	dst, dst, 128
+	nop
+1:	dc	zva, dst
+	add	dst, dst, 64
+	subs	count, count, 64
+	b.hi	1b
+	stp	q0, q0, [dst, 0]
+	stp	q0, q0, [dst, 32]
+	stp	q0, q0, [dstend, -64]
+	stp	q0, q0, [dstend, -32]
 	ret
+
+	.p2align 3
+L(zva_128):
+	cmp	tmp1w, 5	/* ZVA size is 128 bytes.  */
+	b.ne	L(zva_other)
+
+	str	q0, [dst, 16]
+	stp	q0, q0, [dst, 32]
+	stp	q0, q0, [dst, 64]
+	stp	q0, q0, [dst, 96]
+	bic	dst, dst, 127
+	sub	count, dstend, dst	/* Count is now 128 too large.	*/
+	sub	count, count, 128+128	/* Adjust count and bias for loop.  */
+	add	dst, dst, 128
+1:	dc	zva, dst
+	add	dst, dst, 128
+	subs	count, count, 128
+	b.hi	1b
+	stp	q0, q0, [dstend, -128]
+	stp	q0, q0, [dstend, -96]
+	stp	q0, q0, [dstend, -64]
+	stp	q0, q0, [dstend, -32]
+	ret
+
+L(zva_other):
+	mov	tmp2w, 4
+	lsl	zva_lenw, tmp2w, tmp1w
+	add	tmp1, zva_len, 64	/* Max alignment bytes written.	 */
+	cmp	count, tmp1
+	blo	L(no_zva)
+
+	sub	tmp2, zva_len, 1
+	add	tmp1, dst, zva_len
+	add	dst, dst, 16
+	subs	count, tmp1, dst	/* Actual alignment bytes to write.  */
+	bic	tmp1, tmp1, tmp2	/* Aligned dc zva start address.  */
+	beq	2f
+1:	stp	q0, q0, [dst], 64
+	stp	q0, q0, [dst, -32]
+	subs	count, count, 64
+	b.hi	1b
+2:	mov	dst, tmp1
+	sub	count, dstend, tmp1	/* Remaining bytes to write.  */
+	subs	count, count, zva_len
+	b.lo	4f
+3:	dc	zva, dst
+	add	dst, dst, zva_len
+	subs	count, count, zva_len
+	b.hs	3b
+4:	add	count, count, zva_len
+	b	L(tail64)
+
 END(memset)
-
-#ifdef MAYBE_VIRT
-	.bss
-	.p2align 2
-.Lcache_clear:
-	.space 4
-#endif
diff --git a/libc/arch-arm64/generic/bionic/strlen.S b/libc/arch-arm64/generic/bionic/strlen.S
index 3bd9809..6e540fc 100644
--- a/libc/arch-arm64/generic/bionic/strlen.S
+++ b/libc/arch-arm64/generic/bionic/strlen.S
@@ -1,16 +1,16 @@
-/* Copyright (c) 2014, Linaro Limited
+/* Copyright (c) 2013-2015, Linaro Limited
    All rights reserved.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are met:
        * Redistributions of source code must retain the above copyright
-         notice, this list of conditions and the following disclaimer.
+	 notice, this list of conditions and the following disclaimer.
        * Redistributions in binary form must reproduce the above copyright
-         notice, this list of conditions and the following disclaimer in the
-         documentation and/or other materials provided with the distribution.
+	 notice, this list of conditions and the following disclaimer in the
+	 documentation and/or other materials provided with the distribution.
        * Neither the name of the Linaro nor the
-         names of its contributors may be used to endorse or promote products
-         derived from this software without specific prior written permission.
+	 names of its contributors may be used to endorse or promote products
+	 derived from this software without specific prior written permission.
 
    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
@@ -22,16 +22,19 @@
    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
 
 /* Assumptions:
  *
- * ARMv8-a, AArch64
+ * ARMv8-a, AArch64, unaligned accesses, min page size 4k.
  */
 
 #include <private/bionic_asm.h>
 
+/* To test the page crossing code path more thoroughly, compile with
+   -DTEST_PAGE_CROSS - this will force all calls through the slower
+   entry path.  This option is not intended for production use.	 */
+
 /* Arguments and results.  */
 #define srcin		x0
 #define len		x0
@@ -40,87 +43,185 @@
 #define src		x1
 #define data1		x2
 #define data2		x3
-#define data2a		x4
-#define has_nul1	x5
-#define has_nul2	x6
-#define tmp1		x7
-#define tmp2		x8
-#define tmp3		x9
-#define tmp4		x10
-#define zeroones	x11
-#define pos		x12
+#define has_nul1	x4
+#define has_nul2	x5
+#define tmp1		x4
+#define tmp2		x5
+#define tmp3		x6
+#define tmp4		x7
+#define zeroones	x8
+
+#define L(l) .L ## l
+
+	/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
+	   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
+	   can be done in parallel across the entire word. A faster check
+	   (X - 1) & 0x80 is zero for non-NUL ASCII characters, but gives
+	   false hits for characters 129..255.	*/
 
 #define REP8_01 0x0101010101010101
 #define REP8_7f 0x7f7f7f7f7f7f7f7f
 #define REP8_80 0x8080808080808080
 
-	/* Start of critial section -- keep to one 64Byte cache line.  */
-ENTRY(strlen)
-	mov	zeroones, #REP8_01
-	bic	src, srcin, #15
-	ands	tmp1, srcin, #15
-	b.ne	.Lmisaligned
-	/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
-	   (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
-	   can be done in parallel across the entire word.  */
-	/* The inner loop deals with two Dwords at a time.  This has a
-	   slightly higher start-up cost, but we should win quite quickly,
-	   especially on cores with a high number of issue slots per
-	   cycle, as we get much better parallelism out of the operations.  */
-.Lloop:
-	ldp	data1, data2, [src], #16
-.Lrealigned:
-	sub	tmp1, data1, zeroones
-	orr	tmp2, data1, #REP8_7f
-	sub	tmp3, data2, zeroones
-	orr	tmp4, data2, #REP8_7f
-	bic	has_nul1, tmp1, tmp2
-	bics	has_nul2, tmp3, tmp4
-	ccmp	has_nul1, #0, #0, eq	/* NZCV = 0000  */
-	b.eq	.Lloop
-	/* End of critical section -- keep to one 64Byte cache line.  */
-
-	sub	len, src, srcin
-	cbz	has_nul1, .Lnul_in_data2
-#ifdef __AARCH64EB__
-	mov	data2, data1
+#ifdef TEST_PAGE_CROSS
+# define MIN_PAGE_SIZE 15
+#else
+# define MIN_PAGE_SIZE 4096
 #endif
-	sub	len, len, #8
-	mov	has_nul2, has_nul1
-.Lnul_in_data2:
+
+	/* Since strings are short on average, we check the first 16 bytes
+	   of the string for a NUL character.  In order to do an unaligned ldp
+	   safely we have to do a page cross check first.  If there is a NUL
+	   byte we calculate the length from the 2 8-byte words using
+	   conditional select to reduce branch mispredictions (it is unlikely
+	   strlen will be repeatedly called on strings with the same length).
+
+	   If the string is longer than 16 bytes, we align src so don't need
+	   further page cross checks, and process 32 bytes per iteration
+	   using the fast NUL check.  If we encounter non-ASCII characters,
+	   fallback to a second loop using the full NUL check.
+
+	   If the page cross check fails, we read 16 bytes from an aligned
+	   address, remove any characters before the string, and continue
+	   in the main loop using aligned loads.  Since strings crossing a
+	   page in the first 16 bytes are rare (probability of
+	   16/MIN_PAGE_SIZE ~= 0.4%), this case does not need to be optimized.
+
+	   AArch64 systems have a minimum page size of 4k.  We don't bother
+	   checking for larger page sizes - the cost of setting up the correct
+	   page size is just not worth the extra gain from a small reduction in
+	   the cases taking the slow path.  Note that we only care about
+	   whether the first fetch, which may be misaligned, crosses a page
+	   boundary.  */
+
+ENTRY(strlen)
+	and	tmp1, srcin, MIN_PAGE_SIZE - 1
+	mov	zeroones, REP8_01
+	cmp	tmp1, MIN_PAGE_SIZE - 16
+	b.gt	L(page_cross)
+	ldp	data1, data2, [srcin]
 #ifdef __AARCH64EB__
 	/* For big-endian, carry propagation (if the final byte in the
-	   string is 0x01) means we cannot use has_nul directly.  The
-	   easiest way to get the correct byte is to byte-swap the data
-	   and calculate the syndrome a second time.  */
+	   string is 0x01) means we cannot use has_nul1/2 directly.
+	   Since we expect strings to be small and early-exit,
+	   byte-swap the data now so has_null1/2 will be correct.  */
+	rev	data1, data1
 	rev	data2, data2
-	sub	tmp1, data2, zeroones
-	orr	tmp2, data2, #REP8_7f
-	bic	has_nul2, tmp1, tmp2
 #endif
-	sub	len, len, #8
-	rev	has_nul2, has_nul2
-	clz	pos, has_nul2
-	add	len, len, pos, lsr #3		/* Bits to bytes.  */
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, REP8_7f
+	sub	tmp3, data2, zeroones
+	orr	tmp4, data2, REP8_7f
+	bics	has_nul1, tmp1, tmp2
+	bic	has_nul2, tmp3, tmp4
+	ccmp	has_nul2, 0, 0, eq
+	beq	L(main_loop_entry)
+
+	/* Enter with C = has_nul1 == 0.  */
+	csel	has_nul1, has_nul1, has_nul2, cc
+	mov	len, 8
+	rev	has_nul1, has_nul1
+	clz	tmp1, has_nul1
+	csel	len, xzr, len, cc
+	add	len, len, tmp1, lsr 3
 	ret
 
-.Lmisaligned:
-	cmp	tmp1, #8
-	neg	tmp1, tmp1
-	ldp	data1, data2, [src], #16
-	lsl	tmp1, tmp1, #3		/* Bytes beyond alignment -> bits.  */
-	mov	tmp2, #~0
+	/* The inner loop processes 32 bytes per iteration and uses the fast
+	   NUL check.  If we encounter non-ASCII characters, use a second
+	   loop with the accurate NUL check.  */
+	.p2align 4
+L(main_loop_entry):
+	bic	src, srcin, 15
+	sub	src, src, 16
+L(main_loop):
+	ldp	data1, data2, [src, 32]!
+.Lpage_cross_entry:
+	sub	tmp1, data1, zeroones
+	sub	tmp3, data2, zeroones
+	orr	tmp2, tmp1, tmp3
+	tst	tmp2, zeroones, lsl 7
+	bne	1f
+	ldp	data1, data2, [src, 16]
+	sub	tmp1, data1, zeroones
+	sub	tmp3, data2, zeroones
+	orr	tmp2, tmp1, tmp3
+	tst	tmp2, zeroones, lsl 7
+	beq	L(main_loop)
+	add	src, src, 16
+1:
+	/* The fast check failed, so do the slower, accurate NUL check.	 */
+	orr	tmp2, data1, REP8_7f
+	orr	tmp4, data2, REP8_7f
+	bics	has_nul1, tmp1, tmp2
+	bic	has_nul2, tmp3, tmp4
+	ccmp	has_nul2, 0, 0, eq
+	beq	L(nonascii_loop)
+
+	/* Enter with C = has_nul1 == 0.  */
+L(tail):
 #ifdef __AARCH64EB__
-	/* Big-endian.  Early bytes are at MSB.  */
-	lsl	tmp2, tmp2, tmp1	/* Shift (tmp1 & 63).  */
+	/* For big-endian, carry propagation (if the final byte in the
+	   string is 0x01) means we cannot use has_nul1/2 directly.  The
+	   easiest way to get the correct byte is to byte-swap the data
+	   and calculate the syndrome a second time.  */
+	csel	data1, data1, data2, cc
+	rev	data1, data1
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, REP8_7f
+	bic	has_nul1, tmp1, tmp2
+#else
+	csel	has_nul1, has_nul1, has_nul2, cc
+#endif
+	sub	len, src, srcin
+	rev	has_nul1, has_nul1
+	add	tmp2, len, 8
+	clz	tmp1, has_nul1
+	csel	len, len, tmp2, cc
+	add	len, len, tmp1, lsr 3
+	ret
+
+L(nonascii_loop):
+	ldp	data1, data2, [src, 16]!
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, REP8_7f
+	sub	tmp3, data2, zeroones
+	orr	tmp4, data2, REP8_7f
+	bics	has_nul1, tmp1, tmp2
+	bic	has_nul2, tmp3, tmp4
+	ccmp	has_nul2, 0, 0, eq
+	bne	L(tail)
+	ldp	data1, data2, [src, 16]!
+	sub	tmp1, data1, zeroones
+	orr	tmp2, data1, REP8_7f
+	sub	tmp3, data2, zeroones
+	orr	tmp4, data2, REP8_7f
+	bics	has_nul1, tmp1, tmp2
+	bic	has_nul2, tmp3, tmp4
+	ccmp	has_nul2, 0, 0, eq
+	beq	L(nonascii_loop)
+	b	L(tail)
+
+	/* Load 16 bytes from [srcin & ~15] and force the bytes that precede
+	   srcin to 0x7f, so we ignore any NUL bytes before the string.
+	   Then continue in the aligned loop.  */
+L(page_cross):
+	bic	src, srcin, 15
+	ldp	data1, data2, [src]
+	lsl	tmp1, srcin, 3
+	mov	tmp4, -1
+#ifdef __AARCH64EB__
+	/* Big-endian.	Early bytes are at MSB.	 */
+	lsr	tmp1, tmp4, tmp1	/* Shift (tmp1 & 63).  */
 #else
 	/* Little-endian.  Early bytes are at LSB.  */
-	lsr	tmp2, tmp2, tmp1	/* Shift (tmp1 & 63).  */
+	lsl	tmp1, tmp4, tmp1	/* Shift (tmp1 & 63).  */
 #endif
-	orr	data1, data1, tmp2
-	orr	data2a, data2, tmp2
-	csinv	data1, data1, xzr, le
-	csel	data2, data2, data2a, le
-	b	.Lrealigned
+	orr	tmp1, tmp1, REP8_80
+	orn	data1, data1, tmp1
+	orn	tmp2, data2, tmp1
+	tst	srcin, 8
+	csel	data1, data1, tmp4, eq
+	csel	data2, data2, tmp2, eq
+	b	L(page_cross_entry)
 
 END(strlen)
diff --git a/libc/include/pwd.h b/libc/include/pwd.h
index a686d05..223fc5a 100644
--- a/libc/include/pwd.h
+++ b/libc/include/pwd.h
@@ -65,39 +65,6 @@
 
 __BEGIN_DECLS
 
-#define _PATH_PASSWD        "/etc/passwd"
-#define _PATH_MASTERPASSWD  "/etc/master.passwd"
-#define _PATH_MASTERPASSWD_LOCK "/etc/ptmp"
-
-#define _PATH_PASSWD_CONF   "/etc/passwd.conf"
-#define _PATH_PASSWDCONF    _PATH_PASSWD_CONF   /* XXX: compat */
-#define _PATH_USERMGMT_CONF "/etc/usermgmt.conf"
-
-#define _PATH_MP_DB     "/etc/pwd.db"
-#define _PATH_SMP_DB        "/etc/spwd.db"
-
-#define _PATH_PWD_MKDB      "/usr/sbin/pwd_mkdb"
-
-#define _PW_KEYBYNAME       '1' /* stored by name */
-#define _PW_KEYBYNUM        '2' /* stored by entry in the "file" */
-#define _PW_KEYBYUID        '3' /* stored by uid */
-
-#define _PASSWORD_EFMT1     '_' /* extended DES encryption format */
-#define _PASSWORD_NONDES    '$' /* non-DES encryption formats */
-
-#define _PASSWORD_LEN       128 /* max length, not counting NUL */
-
-#define _PASSWORD_NOUID     0x01    /* flag for no specified uid. */
-#define _PASSWORD_NOGID     0x02    /* flag for no specified gid. */
-#define _PASSWORD_NOCHG     0x04    /* flag for no specified change. */
-#define _PASSWORD_NOEXP     0x08    /* flag for no specified expire. */
-
-#define _PASSWORD_OLDFMT    0x10    /* flag to expect an old style entry */
-#define _PASSWORD_NOWARN    0x20    /* no warnings for bad entries */
-
-#define _PASSWORD_WARNDAYS  14  /* days to warn about expiry */
-#define _PASSWORD_CHGNOW    -1  /* special day to force password change at next login */
-
 struct passwd {
   char* pw_name;
   char* pw_passwd;
diff --git a/libdl/libdl.c b/libdl/libdl.c
index 8329468..3fbd7e5 100644
--- a/libdl/libdl.c
+++ b/libdl/libdl.c
@@ -85,6 +85,9 @@
                                 struct android_namespace_t* parent,
                                 const void* caller_addr);
 
+__attribute__((__weak__, visibility("default")))
+void __loader_android_dlwarning(void* obj, void (*f)(void*, const char*));
+
 // Proxy calls to bionic loader
 void* dlopen(const char* filename, int flag) {
   const void* caller_addr = __builtin_return_address(0);
@@ -164,4 +167,6 @@
                                            caller_addr);
 }
 
-void android_dlwarning(void* obj, void (*f)(void*, const char*)) { f(obj, 0); }
+void android_dlwarning(void* obj, void (*f)(void*, const char*)) {
+  __loader_android_dlwarning(obj, f);
+}
diff --git a/linker/linker.cpp b/linker/linker.cpp
index d3ab8d8..fc8d1ef 100644
--- a/linker/linker.cpp
+++ b/linker/linker.cpp
@@ -1869,15 +1869,33 @@
   }
 #endif
 
-  if (sym_name == nullptr) {
-    DL_ERR("dlsym failed: symbol name is null");
-    return false;
-  }
-
   soinfo* found = nullptr;
   const ElfW(Sym)* sym = nullptr;
   soinfo* caller = find_containing_library(caller_addr);
   android_namespace_t* ns = get_caller_namespace(caller);
+  soinfo* si = nullptr;
+  if (handle != RTLD_DEFAULT && handle != RTLD_NEXT) {
+    si = soinfo_from_handle(handle);
+  }
+
+  LD_LOG(kLogDlsym,
+         "dlsym(handle=%p(\"%s\"), sym_name=\"%s\", sym_ver=\"%s\", caller=\"%s\", caller_ns=%s@%p) ...",
+         handle,
+         si != nullptr ? si->get_realpath() : "n/a",
+         sym_name,
+         sym_ver,
+         caller == nullptr ? "(null)" : caller->get_realpath(),
+         ns == nullptr ? "(null)" : ns->get_name(),
+         ns);
+
+  auto failure_guard = make_scope_guard([&]() {
+    LD_LOG(kLogDlsym, "... dlsym failed: %s", linker_get_error_buffer());
+  });
+
+  if (sym_name == nullptr) {
+    DL_ERR("dlsym failed: symbol name is null");
+    return false;
+  }
 
   version_info vi_instance;
   version_info* vi = nullptr;
@@ -1891,7 +1909,6 @@
   if (handle == RTLD_DEFAULT || handle == RTLD_NEXT) {
     sym = dlsym_linear_lookup(ns, sym_name, vi, &found, caller, handle);
   } else {
-    soinfo* si = soinfo_from_handle(handle);
     if (si == nullptr) {
       DL_ERR("dlsym failed: invalid handle: %p", handle);
       return false;
@@ -1904,6 +1921,10 @@
 
     if ((bind == STB_GLOBAL || bind == STB_WEAK) && sym->st_shndx != 0) {
       *symbol = reinterpret_cast<void*>(found->resolve_symbol_address(sym));
+      failure_guard.disable();
+      LD_LOG(kLogDlsym,
+             "... dlsym successful: sym_name=\"%s\", sym_ver=\"%s\", found in=\"%s\", address=%p",
+             sym_name, sym_ver, found->get_soname(), *symbol);
       return true;
     }
 
diff --git a/linker/linker_logger.h b/linker/linker_logger.h
index 0932471..502f872 100644
--- a/linker/linker_logger.h
+++ b/linker/linker_logger.h
@@ -40,6 +40,7 @@
 
 constexpr const uint32_t kLogErrors = 1 << 0;
 constexpr const uint32_t kLogDlopen = 1 << 1;
+constexpr const uint32_t kLogDlsym  = 1 << 2;
 
 class LinkerLogger {
  public:
diff --git a/tests/Android.build.mk b/tests/Android.build.mk
index cce4344..9236558 100644
--- a/tests/Android.build.mk
+++ b/tests/Android.build.mk
@@ -20,24 +20,30 @@
 LOCAL_MODULE := $(module)
 LOCAL_MODULE_TAGS := $(module_tag)
 ifeq ($(build_type),host)
-# Always make host multilib
-LOCAL_MULTILIB := both
+  # Always make host multilib
+  LOCAL_MULTILIB := both
 endif
 
 ifneq ($(findstring LIBRARY, $(build_target)),LIBRARY)
-    LOCAL_MODULE_STEM_32 := $(module)32
-    LOCAL_MODULE_STEM_64 := $(module)64
+  LOCAL_MODULE_STEM_32 := $(module)32
+  LOCAL_MODULE_STEM_64 := $(module)64
 else
-ifneq ($($(module)_install_to_out_data_dir),)
-    LOCAL_MODULE_PATH_32 := $($(TARGET_2ND_ARCH_VAR_PREFIX)TARGET_OUT_DATA_NATIVE_TESTS)/$($(module)_install_to_out_data_dir)
-    LOCAL_MODULE_PATH_64 := $(TARGET_OUT_DATA_NATIVE_TESTS)/$($(module)_install_to_out_data_dir)
+ifneq ($($(module)_install_to_native_tests_dir),)
+  ifeq ($(build_type),host)
+    native_tests_var := HOST_OUT_NATIVE_TESTS
+  else
+    native_tests_var := TARGET_OUT_DATA_NATIVE_TESTS
+  endif
+
+  LOCAL_MODULE_PATH_32 := $($(TARGET_2ND_ARCH_VAR_PREFIX)$(native_tests_var))/$($(module)_install_to_native_tests_dir)
+  LOCAL_MODULE_PATH_64 := $($(native_tests_var))/$($(module)_install_to_native_tests_dir)
 endif
 endif
 
 LOCAL_CLANG := $($(module)_clang_$(build_type))
 
 ifneq ($($(module)_allow_asan),true)
-LOCAL_SANITIZE := never
+  LOCAL_SANITIZE := never
 endif
 
 LOCAL_FORCE_STATIC_EXECUTABLE := $($(module)_force_static_executable)
@@ -45,11 +51,11 @@
 LOCAL_ALLOW_UNDEFINED_SYMBOLS := $($(module)_allow_undefined_symbols)
 
 ifneq ($($(module)_multilib),)
-    LOCAL_MULTILIB := $($(module)_multilib)
+  LOCAL_MULTILIB := $($(module)_multilib)
 endif
 
 ifneq ($($(module)_relative_path),)
-    LOCAL_MODULE_RELATIVE_PATH := $($(module)_relative_path)
+  LOCAL_MODULE_RELATIVE_PATH := $($(module)_relative_path)
 endif
 
 LOCAL_CFLAGS := \
@@ -97,9 +103,9 @@
     $($(module)_ldlibs_$(build_type)) \
 
 ifeq ($(LOCAL_FORCE_STATIC_EXECUTABLE),true)
-LOCAL_CXX_STL := libc++_static
+  LOCAL_CXX_STL := libc++_static
 else
-LOCAL_CXX_STL := libc++
+  LOCAL_CXX_STL := libc++
 endif
 
 ifeq ($(build_type),target)
diff --git a/tests/grp_pwd_test.cpp b/tests/grp_pwd_test.cpp
index a684780..54729b1 100644
--- a/tests/grp_pwd_test.cpp
+++ b/tests/grp_pwd_test.cpp
@@ -114,76 +114,76 @@
 
 #endif
 
-TEST(getpwnam, system_id_root) {
+TEST(pwd, getpwnam_system_id_root) {
   check_get_passwd("root", 0, TYPE_SYSTEM);
 }
 
-TEST(getpwnam, system_id_system) {
+TEST(pwd, getpwnam_system_id_system) {
   check_get_passwd("system", 1000, TYPE_SYSTEM);
 }
 
-TEST(getpwnam, app_id_radio) {
+TEST(pwd, getpwnam_app_id_radio) {
   check_get_passwd("radio", 1001, TYPE_SYSTEM);
 }
 
-TEST(getpwnam, oem_id_5000) {
+TEST(pwd, getpwnam_oem_id_5000) {
   check_get_passwd("oem_5000", 5000, TYPE_SYSTEM);
 }
 
-TEST(getpwnam, oem_id_5999) {
+TEST(pwd, getpwnam_oem_id_5999) {
   check_get_passwd("oem_5999", 5999, TYPE_SYSTEM);
 }
 
-TEST(getpwnam, oem_id_2900) {
+TEST(pwd, getpwnam_oem_id_2900) {
   check_get_passwd("oem_2900", 2900, TYPE_SYSTEM);
 }
 
-TEST(getpwnam, oem_id_2999) {
+TEST(pwd, getpwnam_oem_id_2999) {
   check_get_passwd("oem_2999", 2999, TYPE_SYSTEM);
 }
 
-TEST(getpwnam, app_id_nobody) {
+TEST(pwd, getpwnam_app_id_nobody) {
   check_get_passwd("nobody", 9999, TYPE_SYSTEM);
 }
 
-TEST(getpwnam, app_id_u0_a0) {
+TEST(pwd, getpwnam_app_id_u0_a0) {
   check_get_passwd("u0_a0", 10000, TYPE_APP);
 }
 
-TEST(getpwnam, app_id_u0_a1234) {
+TEST(pwd, getpwnam_app_id_u0_a1234) {
   check_get_passwd("u0_a1234", 11234, TYPE_APP);
 }
 
 // Test the difference between uid and shared gid.
-TEST(getpwnam, app_id_u0_a49999) {
+TEST(pwd, getpwnam_app_id_u0_a49999) {
   check_get_passwd("u0_a49999", 59999, TYPE_APP);
 }
 
-TEST(getpwnam, app_id_u0_i1) {
+TEST(pwd, getpwnam_app_id_u0_i1) {
   check_get_passwd("u0_i1", 99001, TYPE_APP);
 }
 
-TEST(getpwnam, app_id_u1_root) {
+TEST(pwd, getpwnam_app_id_u1_root) {
   check_get_passwd("u1_root", 100000, TYPE_SYSTEM);
 }
 
-TEST(getpwnam, app_id_u1_radio) {
+TEST(pwd, getpwnam_app_id_u1_radio) {
   check_get_passwd("u1_radio", 101001, TYPE_SYSTEM);
 }
 
-TEST(getpwnam, app_id_u1_a0) {
+TEST(pwd, getpwnam_app_id_u1_a0) {
   check_get_passwd("u1_a0", 110000, TYPE_APP);
 }
 
-TEST(getpwnam, app_id_u1_a40000) {
+TEST(pwd, getpwnam_app_id_u1_a40000) {
   check_get_passwd("u1_a40000", 150000, TYPE_APP);
 }
 
-TEST(getpwnam, app_id_u1_i0) {
+TEST(pwd, getpwnam_app_id_u1_i0) {
   check_get_passwd("u1_i0", 199000, TYPE_APP);
 }
 
-TEST(getpwent, iterate) {
+TEST(pwd, getpwent_iterate) {
   passwd* pwd;
   std::bitset<10000> exist;
   bool application = false;
@@ -295,80 +295,80 @@
 
 #endif
 
-TEST(getgrnam, system_id_root) {
+TEST(grp, getgrnam_system_id_root) {
   check_get_group("root", 0);
 }
 
-TEST(getgrnam, system_id_system) {
+TEST(grp, getgrnam_system_id_system) {
   check_get_group("system", 1000);
 }
 
-TEST(getgrnam, app_id_radio) {
+TEST(grp, getgrnam_app_id_radio) {
   check_get_group("radio", 1001);
 }
 
-TEST(getgrnam, oem_id_5000) {
+TEST(grp, getgrnam_oem_id_5000) {
   check_get_group("oem_5000", 5000);
 }
 
-TEST(getgrnam, oem_id_5999) {
+TEST(grp, getgrnam_oem_id_5999) {
   check_get_group("oem_5999", 5999);
 }
 
-TEST(getgrnam, oem_id_2900) {
+TEST(grp, getgrnam_oem_id_2900) {
   check_get_group("oem_2900", 2900);
 }
 
-TEST(getgrnam, oem_id_2999) {
+TEST(grp, getgrnam_oem_id_2999) {
   check_get_group("oem_2999", 2999);
 }
 
-TEST(getgrnam, app_id_nobody) {
+TEST(grp, getgrnam_app_id_nobody) {
   check_get_group("nobody", 9999);
 }
 
-TEST(getgrnam, app_id_u0_a0) {
+TEST(grp, getgrnam_app_id_u0_a0) {
   check_get_group("u0_a0", 10000);
 }
 
-TEST(getgrnam, app_id_u0_a1234) {
+TEST(grp, getgrnam_app_id_u0_a1234) {
   check_get_group("u0_a1234", 11234);
 }
 
-TEST(getgrnam, app_id_u0_a9999) {
+TEST(grp, getgrnam_app_id_u0_a9999) {
   check_get_group("u0_a9999", 19999);
 }
 
 // Test the difference between uid and shared gid.
-TEST(getgrnam, app_id_all_a9999) {
+TEST(grp, getgrnam_app_id_all_a9999) {
   check_get_group("all_a9999", 59999);
 }
 
-TEST(getgrnam, app_id_u0_i1) {
+TEST(grp, getgrnam_app_id_u0_i1) {
   check_get_group("u0_i1", 99001);
 }
 
-TEST(getgrnam, app_id_u1_root) {
+TEST(grp, getgrnam_app_id_u1_root) {
   check_get_group("u1_root", 100000);
 }
 
-TEST(getgrnam, app_id_u1_radio) {
+TEST(grp, getgrnam_app_id_u1_radio) {
   check_get_group("u1_radio", 101001);
 }
 
-TEST(getgrnam, app_id_u1_a0) {
+TEST(grp, getgrnam_app_id_u1_a0) {
   check_get_group("u1_a0", 110000);
 }
 
-TEST(getgrnam, app_id_u1_a40000) {
+TEST(grp, getgrnam_app_id_u1_a40000) {
   check_get_group("u1_a40000", 150000);
 }
 
-TEST(getgrnam, app_id_u1_i0) {
+TEST(grp, getgrnam_app_id_u1_i0) {
   check_get_group("u1_i0", 199000);
 }
 
-TEST(getgrnam_r, reentrancy) {
+TEST(grp, getgrnam_r_reentrancy) {
 #if defined(__BIONIC__)
   group grp_storage[2];
   char buf[2][512];
@@ -388,7 +388,7 @@
 #endif
 }
 
-TEST(getgrgid_r, reentrancy) {
+TEST(grp, getgrgid_r_reentrancy) {
 #if defined(__BIONIC__)
   group grp_storage[2];
   char buf[2][512];
@@ -408,7 +408,7 @@
 #endif
 }
 
-TEST(getgrnam_r, large_enough_suggested_buffer_size) {
+TEST(grp, getgrnam_r_large_enough_suggested_buffer_size) {
   long size = sysconf(_SC_GETGR_R_SIZE_MAX);
   ASSERT_GT(size, 0);
   char buf[size];
@@ -418,7 +418,7 @@
   check_group(grp, "root", 0);
 }
 
-TEST(getgrent, iterate) {
+TEST(grp, getgrent_iterate) {
   group* grp;
   std::bitset<10000> exist;
   bool application = false;
diff --git a/tests/libs/Android.build.dt_runpath.mk b/tests/libs/Android.build.dt_runpath.mk
index 60844e5..a3fcac5 100644
--- a/tests/libs/Android.build.dt_runpath.mk
+++ b/tests/libs/Android.build.dt_runpath.mk
@@ -80,7 +80,7 @@
 libtest_dt_runpath_d_zip_shared_libraries := libtest_dt_runpath_b libtest_dt_runpath_c
 libtest_dt_runpath_d_zip_ldflags := -Wl,--rpath,\$${ORIGIN}/dt_runpath_b_c_x -Wl,--enable-new-dtags
 libtest_dt_runpath_d_zip_ldlibs := -ldl
-libtest_dt_runpath_d_zip_install_to_out_data_dir := $(module)
+libtest_dt_runpath_d_zip_install_to_native_tests_dir := $(module)
 
 module_tag := optional
 build_type := target
diff --git a/tests/libs/Android.build.target.testlib.mk b/tests/libs/Android.build.testlib.host.mk
similarity index 81%
rename from tests/libs/Android.build.target.testlib.mk
rename to tests/libs/Android.build.testlib.host.mk
index 1e767c2..38970dc 100644
--- a/tests/libs/Android.build.target.testlib.mk
+++ b/tests/libs/Android.build.testlib.host.mk
@@ -1,5 +1,5 @@
 #
-# Copyright (C) 2015 The Android Open Source Project
+# Copyright (C) 2016 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,6 +15,5 @@
 #
 
 build_target := SHARED_LIBRARY
-build_type := target
-include $(TEST_PATH)/Android.build.mk
-
+build_type := host
+include $(LOCAL_PATH)/Android.build.testlib.internal.mk
diff --git a/tests/libs/Android.build.testlib.internal.mk b/tests/libs/Android.build.testlib.internal.mk
new file mode 100644
index 0000000..e1fec2b
--- /dev/null
+++ b/tests/libs/Android.build.testlib.internal.mk
@@ -0,0 +1,27 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# 1. Install test libraries to $ANDROID_DATA/nativetests../bionic-loader-test-libs/
+#    by default.
+ifeq ($($(module)_relative_install_path),)
+  $(module)_install_to_native_tests_dir := bionic-loader-test-libs
+else
+  $(module)_install_to_native_tests_dir := bionic-loader-test-libs/$($(module)_relative_install_path)
+endif
+# 2. Set dt_runpath to origin to resolve dependencies
+$(module)_ldflags += -Wl,--rpath,\$${ORIGIN} -Wl,--enable-new-dtags
+
+include $(TEST_PATH)/Android.build.mk
diff --git a/tests/libs/Android.build.testlib.mk b/tests/libs/Android.build.testlib.mk
index 833caf6..a46c457 100644
--- a/tests/libs/Android.build.testlib.mk
+++ b/tests/libs/Android.build.testlib.mk
@@ -14,9 +14,6 @@
 # limitations under the License.
 #
 
-build_target := SHARED_LIBRARY
-build_type := host
-include $(TEST_PATH)/Android.build.mk
+include $(LOCAL_PATH)/Android.build.testlib.host.mk
 
 include $(LOCAL_PATH)/Android.build.testlib.target.mk
-
diff --git a/tests/libs/Android.build.testlib.target.mk b/tests/libs/Android.build.testlib.target.mk
index b79dbfc..5ec0d71 100644
--- a/tests/libs/Android.build.testlib.target.mk
+++ b/tests/libs/Android.build.testlib.target.mk
@@ -14,14 +14,6 @@
 # limitations under the License.
 #
 
+build_target := SHARED_LIBRARY
 build_type := target
-# 1. Install test libraries to /data/nativetests../bionic-loader-test-libs/
-#    by default.
-ifeq ($($(module)_relative_install_path),)
-  $(module)_install_to_out_data_dir := bionic-loader-test-libs
-else
-  $(module)_install_to_out_data_dir := bionic-loader-test-libs/$($(module)_relative_install_path)
-endif
-# 2. Set dt_runpath to origin to resolve dependencies
-$(module)_ldflags += -Wl,--rpath,\$${ORIGIN} -Wl,--enable-new-dtags
-include $(TEST_PATH)/Android.build.mk
+include $(LOCAL_PATH)/Android.build.testlib.internal.mk