Merge "Add pthread_getaffinity_np()/pthread_setaffinity_np()." into main
diff --git a/OWNERS b/OWNERS
index 3818b1d..1859b9e 100644
--- a/OWNERS
+++ b/OWNERS
@@ -5,3 +5,5 @@
 danalbert@google.com
 rprichard@google.com
 yabinc@google.com
+
+per-file docs/mte.md=eugenis@google.com,fmayer@google.com,pcc@google.com
diff --git a/libc/Android.bp b/libc/Android.bp
index e0d8994..a8beb05 100644
--- a/libc/Android.bp
+++ b/libc/Android.bp
@@ -471,7 +471,6 @@
         "upstream-netbsd/lib/libc/regex/regerror.c",
         "upstream-netbsd/lib/libc/regex/regexec.c",
         "upstream-netbsd/lib/libc/regex/regfree.c",
-        "upstream-netbsd/lib/libc/stdlib/bsearch.c",
         "upstream-netbsd/lib/libc/stdlib/drand48.c",
         "upstream-netbsd/lib/libc/stdlib/erand48.c",
         "upstream-netbsd/lib/libc/stdlib/jrand48.c",
@@ -626,8 +625,6 @@
                 "upstream-openbsd/lib/libc/string/memchr.c",
                 "upstream-openbsd/lib/libc/string/memrchr.c",
                 "upstream-openbsd/lib/libc/string/stpncpy.c",
-                "upstream-openbsd/lib/libc/string/strlcat.c",
-                "upstream-openbsd/lib/libc/string/strlcpy.c",
                 "upstream-openbsd/lib/libc/string/strncat.c",
                 "upstream-openbsd/lib/libc/string/strncmp.c",
                 "upstream-openbsd/lib/libc/string/strncpy.c",
@@ -637,8 +634,6 @@
             srcs: [
                 "upstream-openbsd/lib/libc/string/strcat.c",
                 "upstream-openbsd/lib/libc/string/stpncpy.c",
-                "upstream-openbsd/lib/libc/string/strlcat.c",
-                "upstream-openbsd/lib/libc/string/strlcpy.c",
                 "upstream-openbsd/lib/libc/string/strncat.c",
                 "upstream-openbsd/lib/libc/string/strncpy.c",
             ],
@@ -647,8 +642,6 @@
             srcs: [
                 "upstream-openbsd/lib/libc/string/memrchr.c",
                 "upstream-openbsd/lib/libc/string/stpncpy.c",
-                "upstream-openbsd/lib/libc/string/strlcat.c",
-                "upstream-openbsd/lib/libc/string/strlcpy.c",
             ],
         },
         x86: {
@@ -658,10 +651,7 @@
         },
         x86_64: {
             srcs: [
-                "upstream-openbsd/lib/libc/string/memchr.c",
-                "upstream-openbsd/lib/libc/string/memrchr.c",
-                "upstream-openbsd/lib/libc/string/strlcat.c",
-                "upstream-openbsd/lib/libc/string/strlcpy.c",
+                // x86_64 has custom/llvm-libc implementations of all of these.
             ],
         },
     },
@@ -1185,8 +1175,6 @@
                 "arch-x86/string/ssse3-memcmp-atom.S",
                 "arch-x86/string/ssse3-strcat-atom.S",
                 "arch-x86/string/ssse3-strcmp-atom.S",
-                "arch-x86/string/ssse3-strlcat-atom.S",
-                "arch-x86/string/ssse3-strlcpy-atom.S",
                 "arch-x86/string/ssse3-strncat-atom.S",
                 "arch-x86/string/ssse3-strncmp-atom.S",
 
@@ -1217,11 +1205,6 @@
                 "arch-x86_64/string/sse4-memcmp-slm.S",
                 "arch-x86_64/string/ssse3-strcmp-slm.S",
                 "arch-x86_64/string/ssse3-strncmp-slm.S",
-
-                "bionic/strchr.cpp",
-                "bionic/strchrnul.cpp",
-                "bionic/strnlen.cpp",
-                "bionic/strrchr.cpp",
             ],
         },
     },
@@ -1240,6 +1223,7 @@
     generated_headers: ["generated_android_ids"],
 
     whole_static_libs: [
+        "//external/llvm-libc:llvmlibc",
         "libsystemproperties",
     ],
 
@@ -2167,6 +2151,7 @@
         },
     },
     whole_static_libs: [
+        "//external/llvm-libc:llvmlibc",
         "libarm-optimized-routines-mem",
         "libc_netbsd",
     ],
diff --git a/libc/arch-x86/string/ssse3-strlcat-atom.S b/libc/arch-x86/string/ssse3-strlcat-atom.S
deleted file mode 100644
index daaf254..0000000
--- a/libc/arch-x86/string/ssse3-strlcat-atom.S
+++ /dev/null
@@ -1,1225 +0,0 @@
-/*
-Copyright (c) 2011, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-/* Optimized strlcat with SSSE3 */
-
-#ifndef cfi_startproc
-# define cfi_startproc	.cfi_startproc
-#endif
-
-#ifndef cfi_endproc
-# define cfi_endproc	.cfi_endproc
-#endif
-
-#ifndef cfi_rel_offset
-# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
-#endif
-
-#ifndef cfi_restore
-# define cfi_restore(reg)	.cfi_restore reg
-#endif
-
-#ifndef cfi_adjust_cfa_offset
-# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
-#endif
-
-#ifndef ENTRY
-# define ENTRY(name)	\
-	.type name,  @function;	\
-	.globl name;	\
-	.p2align 4;	\
-name:	\
-	cfi_startproc
-#endif
-
-#ifndef END
-# define END(name)	\
-	cfi_endproc;	\
-	.size name, .-name
-#endif
-
-#define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-#define PUSH(REG)	pushl	REG;	CFI_PUSH (REG)
-#define POP(REG)	popl	REG;	CFI_POP (REG)
-#define L(label)	.L##Prolog_##label
-
-#define DST	4
-#define SRC	DST+8
-#define LEN	SRC+4
-
-	.text
-ENTRY (strlcat)
-	mov	DST(%esp), %edx
-	PUSH	(%ebx)
-	mov	LEN(%esp), %ebx
-	sub	$4, %ebx
-	jbe	L(len_less4_prolog)
-
-#define RETURN	jmp	L(StrcpyStep)
-#define edi	ebx
-
-#define USE_AS_STRNLEN
-#define USE_AS_STRCAT
-#define USE_AS_STRLCAT
-
-#include "sse2-strlen-atom.S"
-
-	.p2align 4
-L(StrcpyStep):
-
-#undef edi
-#undef L
-#define L(label) .L##label
-#undef RETURN
-#define RETURN	POP (%ebx); ret; CFI_PUSH (%ebx);
-#define RETURN1	POP (%edi); POP (%ebx); ret; CFI_PUSH (%ebx); CFI_PUSH (%edi)
-
-        movl	SRC(%esp), %ecx
-	movl	LEN(%esp), %ebx
-
-	cmp	%eax, %ebx
-	je	L(CalculateLengthOfSrcProlog)
-	sub	%eax, %ebx
-
-	test	%ebx, %ebx
-	jz	L(CalculateLengthOfSrcProlog)
-
-	mov	DST + 4(%esp), %edx
-
-	PUSH	(%edi)
-	add	%eax, %edx
-	mov	%ecx, %edi
-	sub	%eax, %edi
-
-	cmp	$8, %ebx
-	jbe	L(StrncpyExit8Bytes)
-
-	cmpb	$0, (%ecx)
-	jz	L(Exit1)
-	cmpb	$0, 1(%ecx)
-	jz	L(Exit2)
-	cmpb	$0, 2(%ecx)
-	jz	L(Exit3)
-	cmpb	$0, 3(%ecx)
-	jz	L(Exit4)
-	cmpb	$0, 4(%ecx)
-	jz	L(Exit5)
-	cmpb	$0, 5(%ecx)
-	jz	L(Exit6)
-	cmpb	$0, 6(%ecx)
-	jz	L(Exit7)
-	cmpb	$0, 7(%ecx)
-	jz	L(Exit8)
-	cmp	$16, %ebx
-	jb	L(StrncpyExit15Bytes)
-	cmpb	$0, 8(%ecx)
-	jz	L(Exit9)
-	cmpb	$0, 9(%ecx)
-	jz	L(Exit10)
-	cmpb	$0, 10(%ecx)
-	jz	L(Exit11)
-	cmpb	$0, 11(%ecx)
-	jz	L(Exit12)
-	cmpb	$0, 12(%ecx)
-	jz	L(Exit13)
-	cmpb	$0, 13(%ecx)
-	jz	L(Exit14)
-	cmpb	$0, 14(%ecx)
-	jz	L(Exit15)
-	cmpb	$0, 15(%ecx)
-	jz	L(Exit16)
-	cmp	$16, %ebx
-	je	L(StrlcpyExit16)
-
-#define USE_AS_STRNCPY
-#include "ssse3-strcpy-atom.S"
-
-	.p2align 4
-L(CopyFrom1To16Bytes):
-	add	%esi, %edx
-	add	%esi, %ecx
-
-	POP	(%esi)
-	test	%al, %al
-	jz	L(ExitHigh8)
-
-L(CopyFrom1To16BytesLess8):
-	mov	%al, %ah
-	and	$15, %ah
-	jz	L(ExitHigh4)
-
-	test	$0x01, %al
-	jnz	L(Exit1)
-	test	$0x02, %al
-	jnz	L(Exit2)
-	test	$0x04, %al
-	jnz	L(Exit3)
-L(Exit4):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-
-	lea	3(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(ExitHigh4):
-	test	$0x10, %al
-	jnz	L(Exit5)
-	test	$0x20, %al
-	jnz	L(Exit6)
-	test	$0x40, %al
-	jnz	L(Exit7)
-L(Exit8):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-
-	lea	7(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(ExitHigh8):
-	mov	%ah, %al
-	and	$15, %al
-	jz	L(ExitHigh12)
-
-	test	$0x01, %ah
-	jnz	L(Exit9)
-	test	$0x02, %ah
-	jnz	L(Exit10)
-	test	$0x04, %ah
-	jnz	L(Exit11)
-L(Exit12):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	8(%ecx), %eax
-	movl	%eax, 8(%edx)
-
-	lea	11(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(ExitHigh12):
-	test	$0x10, %ah
-	jnz	L(Exit13)
-	test	$0x20, %ah
-	jnz	L(Exit14)
-	test	$0x40, %ah
-	jnz	L(Exit15)
-L(Exit16):
-	movlpd	(%ecx), %xmm0
-	movlpd	8(%ecx), %xmm1
-	movlpd	%xmm0, (%edx)
-	movlpd	%xmm1, 8(%edx)
-
-	lea	15(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	CFI_PUSH(%esi)
-
-	.p2align 4
-L(CopyFrom1To16BytesCase2):
-	add	$16, %ebx
-	add	%esi, %ecx
-	add	%esi, %edx
-
-	POP	(%esi)
-
-	test	%al, %al
-	jz	L(ExitHighCase2)
-
-	cmp	$8, %ebx
-	ja	L(CopyFrom1To16BytesLess8)
-
-	test	$0x01, %al
-	jnz	L(Exit1)
-	cmp	$1, %ebx
-	je	L(StrlcpyExit1)
-	test	$0x02, %al
-	jnz	L(Exit2)
-	cmp	$2, %ebx
-	je	L(StrlcpyExit2)
-	test	$0x04, %al
-	jnz	L(Exit3)
-	cmp	$3, %ebx
-	je	L(StrlcpyExit3)
-	test	$0x08, %al
-	jnz	L(Exit4)
-	cmp	$4, %ebx
-	je	L(StrlcpyExit4)
-	test	$0x10, %al
-	jnz	L(Exit5)
-	cmp	$5, %ebx
-	je	L(StrlcpyExit5)
-	test	$0x20, %al
-	jnz	L(Exit6)
-	cmp	$6, %ebx
-	je	L(StrlcpyExit6)
-	test	$0x40, %al
-	jnz	L(Exit7)
-	cmp	$7, %ebx
-	je	L(StrlcpyExit7)
-	test	$0x80, %al
-	jnz	L(Exit8)
-	jmp	L(StrlcpyExit8)
-
-	.p2align 4
-L(ExitHighCase2):
-	cmp	$8, %ebx
-	jbe	L(CopyFrom1To16BytesLess8Case3)
-
-	test	$0x01, %ah
-	jnz	L(Exit9)
-	cmp	$9, %ebx
-	je	L(StrlcpyExit9)
-	test	$0x02, %ah
-	jnz	L(Exit10)
-	cmp	$10, %ebx
-	je	L(StrlcpyExit10)
-	test	$0x04, %ah
-	jnz	L(Exit11)
-	cmp	$11, %ebx
-	je	L(StrlcpyExit11)
-	test	$0x8, %ah
-	jnz	L(Exit12)
-	cmp	$12, %ebx
-	je	L(StrlcpyExit12)
-	test	$0x10, %ah
-	jnz	L(Exit13)
-	cmp	$13, %ebx
-	je	L(StrlcpyExit13)
-	test	$0x20, %ah
-	jnz	L(Exit14)
-	cmp	$14, %ebx
-	je	L(StrlcpyExit14)
-	test	$0x40, %ah
-	jnz	L(Exit15)
-	cmp	$15, %ebx
-	je	L(StrlcpyExit15)
-	test	$0x80, %ah
-	jnz	L(Exit16)
-	jmp	L(StrlcpyExit16)
-
-	CFI_PUSH(%esi)
-
-	.p2align 4
-L(CopyFrom1To16BytesCase2OrCase3):
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16BytesCase2)
-
-	.p2align 4
-L(CopyFrom1To16BytesCase3):
-	add	$16, %ebx
-	add	%esi, %edx
-	add	%esi, %ecx
-
-	POP	(%esi)
-
-	cmp	$8, %ebx
-	ja	L(ExitHigh8Case3)
-
-L(CopyFrom1To16BytesLess8Case3):
-	cmp	$4, %ebx
-	ja	L(ExitHigh4Case3)
-
-	cmp	$1, %ebx
-	je	L(StrlcpyExit1)
-	cmp	$2, %ebx
-	je	L(StrlcpyExit2)
-	cmp	$3, %ebx
-	je	L(StrlcpyExit3)
-L(StrlcpyExit4):
-	movb	%bh, 3(%edx)
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-	movb	2(%ecx), %al
-	movb	%al, 2(%edx)
-
-	lea	4(%ecx), %edx
-	mov	%edi, %ecx
-	POP	(%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(ExitHigh4Case3):
-	cmp	$5, %ebx
-	je	L(StrlcpyExit5)
-	cmp	$6, %ebx
-	je	L(StrlcpyExit6)
-	cmp	$7, %ebx
-	je	L(StrlcpyExit7)
-L(StrlcpyExit8):
-	movb	%bh, 7(%edx)
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	3(%ecx), %eax
-	movl	%eax, 3(%edx)
-
-	lea	8(%ecx), %edx
-	mov	%edi, %ecx
-	POP	(%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(ExitHigh8Case3):
-	cmp	$12, %ebx
-	ja	L(ExitHigh12Case3)
-
-	cmp	$9, %ebx
-	je	L(StrlcpyExit9)
-	cmp	$10, %ebx
-	je	L(StrlcpyExit10)
-	cmp	$11, %ebx
-	je	L(StrlcpyExit11)
-L(StrlcpyExit12):
-	movb	%bh, 11(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	7(%ecx), %eax
-	movl	%eax, 7(%edx)
-
-	lea	12(%ecx), %edx
-	mov	%edi, %ecx
-	POP	(%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(ExitHigh12Case3):
-	cmp	$13, %ebx
-	je	L(StrlcpyExit13)
-	cmp	$14, %ebx
-	je	L(StrlcpyExit14)
-	cmp	$15, %ebx
-	je	L(StrlcpyExit15)
-L(StrlcpyExit16):
-	movb	%bh, 15(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	7(%ecx), %xmm0
-	movlpd	%xmm0, 7(%edx)
-
-	lea	16(%ecx), %edx
-	mov	%edi, %ecx
-	POP	(%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(StrlcpyExit1):
-	movb	%bh, (%edx)
-
-	lea	1(%ecx), %edx
-	mov	%edi, %ecx
-	POP	(%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit1):
-	movb	(%ecx), %al
-	movb	%al, (%edx)
-
-	mov	%ecx, %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit2):
-	movb	%bh, 1(%edx)
-	movb	(%ecx), %al
-	movb	%al, (%edx)
-
-	lea	2(%ecx), %edx
-	mov	%edi, %ecx
-	POP	(%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit2):
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-	movl	%edi, %eax
-
-	lea	1(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit3):
-	movb	%bh, 2(%edx)
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-
-	lea	3(%ecx), %edx
-	mov	%edi, %ecx
-	POP	(%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit3):
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-	movb	2(%ecx), %al
-	movb	%al, 2(%edx)
-
-	lea	2(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit5):
-	movb	%bh, 4(%edx)
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	%edi, %eax
-
-	lea	5(%ecx), %edx
-	mov	%edi, %ecx
-	POP	(%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit5):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movb	4(%ecx), %al
-	movb	%al, 4(%edx)
-
-	lea	4(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit6):
-	movb	%bh, 5(%edx)
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movb	4(%ecx), %al
-	movb	%al, 4(%edx)
-
-	lea	6(%ecx), %edx
-	mov	%edi, %ecx
-	POP	(%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit6):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movw	4(%ecx), %ax
-	movw	%ax, 4(%edx)
-
-	lea	5(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit7):
-	movb	%bh, 6(%edx)
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movw	4(%ecx), %ax
-	movw	%ax, 4(%edx)
-
-	lea	7(%ecx), %edx
-	mov	%edi, %ecx
-	POP	(%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit7):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	3(%ecx), %eax
-	movl	%eax, 3(%edx)
-
-	lea	6(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit9):
-	movb	%bh, 8(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-
-	lea	9(%ecx), %edx
-	mov	%edi, %ecx
-	POP	(%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit9):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movb	8(%ecx), %al
-	movb	%al, 8(%edx)
-
-	lea	8(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit10):
-	movb	%bh, 9(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movb	8(%ecx), %al
-	movb	%al, 8(%edx)
-
-	lea	10(%ecx), %edx
-	mov	%edi, %ecx
-	POP	(%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit10):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movw	8(%ecx), %ax
-	movw	%ax, 8(%edx)
-
-	lea	9(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit11):
-	movb	%bh, 10(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movw	8(%ecx), %ax
-	movw	%ax, 8(%edx)
-
-	lea	11(%ecx), %edx
-	mov	%edi, %ecx
-	POP	(%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit11):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	7(%ecx), %eax
-	movl	%eax, 7(%edx)
-
-	lea	10(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit13):
-	movb	%bh, 12(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	8(%ecx), %eax
-	movl	%eax, 8(%edx)
-
-	lea	13(%ecx), %edx
-	mov	%edi, %ecx
-	POP	(%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit13):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	5(%ecx), %xmm0
-	movlpd	%xmm0, 5(%edx)
-
-	lea	12(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit14):
-	movb	%bh, 13(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	5(%ecx), %xmm0
-	movlpd	%xmm0, 5(%edx)
-
-	lea	14(%ecx), %edx
-	mov	%edi, %ecx
-	POP	(%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit14):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	6(%ecx), %xmm0
-	movlpd	%xmm0, 6(%edx)
-
-	lea	13(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit15):
-	movb	%bh, 14(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	6(%ecx), %xmm0
-	movlpd	%xmm0, 6(%edx)
-
-	lea	15(%ecx), %edx
-	mov	%edi, %ecx
-	POP	(%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit15):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	7(%ecx), %xmm0
-	movlpd	%xmm0, 7(%edx)
-
-	lea	14(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrncpyExit15Bytes):
-	cmp	$12, %ebx
-	ja	L(StrncpyExit15Bytes1)
-
-	cmpb	$0, 8(%ecx)
-	jz	L(Exit9)
-	cmp	$9, %ebx
-	je	L(StrlcpyExit9)
-
-	cmpb	$0, 9(%ecx)
-	jz	L(Exit10)
-	cmp	$10, %ebx
-	je	L(StrlcpyExit10)
-
-	cmpb	$0, 10(%ecx)
-	jz	L(Exit11)
-	cmp	$11, %ebx
-	je	L(StrlcpyExit11)
-
-	cmpb	$0, 11(%ecx)
-	jz	L(Exit12)
-	jmp	L(StrlcpyExit12)
-
-	.p2align 4
-L(StrncpyExit15Bytes1):
-	cmpb	$0, 8(%ecx)
-	jz	L(Exit9)
-	cmpb	$0, 9(%ecx)
-	jz	L(Exit10)
-	cmpb	$0, 10(%ecx)
-	jz	L(Exit11)
-	cmpb	$0, 11(%ecx)
-	jz	L(Exit12)
-
-	cmpb	$0, 12(%ecx)
-	jz	L(Exit13)
-	cmp	$13, %ebx
-	je	L(StrlcpyExit13)
-
-	cmpb	$0, 13(%ecx)
-	jz	L(Exit14)
-	cmp	$14, %ebx
-	je	L(StrlcpyExit14)
-
-	cmpb	$0, 14(%ecx)
-	jz	L(Exit15)
-	jmp	L(StrlcpyExit15)
-
-	.p2align 4
-L(StrncpyExit8Bytes):
-	cmp	$4, %ebx
-	ja	L(StrncpyExit8Bytes1)
-
-	cmpb	$0, (%ecx)
-	jz	L(Exit1)
-	cmp	$1, %ebx
-	je	L(StrlcpyExit1)
-
-	cmpb	$0, 1(%ecx)
-	jz	L(Exit2)
-	cmp	$2, %ebx
-	je	L(StrlcpyExit2)
-
-	cmpb	$0, 2(%ecx)
-	jz	L(Exit3)
-	cmp	$3, %ebx
-	je	L(StrlcpyExit3)
-
-	cmpb	$0, 3(%ecx)
-	jz	L(Exit4)
-	jmp	L(StrlcpyExit4)
-
-	.p2align 4
-L(StrncpyExit8Bytes1):
-	cmpb	$0, (%ecx)
-	jz	L(Exit1)
-	cmpb	$0, 1(%ecx)
-	jz	L(Exit2)
-	cmpb	$0, 2(%ecx)
-	jz	L(Exit3)
-	cmpb	$0, 3(%ecx)
-	jz	L(Exit4)
-
-	cmpb	$0, 4(%ecx)
-	jz	L(Exit5)
-	cmp	$5, %ebx
-	je	L(StrlcpyExit5)
-
-	cmpb	$0, 5(%ecx)
-	jz	L(Exit6)
-	cmp	$6, %ebx
-	je	L(StrlcpyExit6)
-
-	cmpb	$0, 6(%ecx)
-	jz	L(Exit7)
-	cmp	$7, %ebx
-	je	L(StrlcpyExit7)
-
-	cmpb	$0, 7(%ecx)
-	jz	L(Exit8)
-	jmp	L(StrlcpyExit8)
-
-	CFI_POP	(%edi)
-
-
-	.p2align 4
-L(Prolog_return_start_len):
-	movl	LEN(%esp), %ebx
-        movl	SRC(%esp), %ecx
-L(CalculateLengthOfSrcProlog):
-	mov	%ecx, %edx
-	sub	%ebx, %ecx
-
-	.p2align 4
-L(CalculateLengthOfSrc):
-	cmpb	$0, (%edx)
-	jz	L(exit_tail0)
-	cmpb	$0, 1(%edx)
-	jz	L(exit_tail1)
-	cmpb	$0, 2(%edx)
-	jz	L(exit_tail2)
-	cmpb	$0, 3(%edx)
-	jz	L(exit_tail3)
-
-	cmpb	$0, 4(%edx)
-	jz	L(exit_tail4)
-	cmpb	$0, 5(%edx)
-	jz	L(exit_tail5)
-	cmpb	$0, 6(%edx)
-	jz	L(exit_tail6)
-	cmpb	$0, 7(%edx)
-	jz	L(exit_tail7)
-
-	cmpb	$0, 8(%edx)
-	jz	L(exit_tail8)
-	cmpb	$0, 9(%edx)
-	jz	L(exit_tail9)
-	cmpb	$0, 10(%edx)
-	jz	L(exit_tail10)
-	cmpb	$0, 11(%edx)
-	jz	L(exit_tail11)
-
-	cmpb	$0, 12(%edx)
-	jz	L(exit_tail12)
-	cmpb	$0, 13(%edx)
-	jz	L(exit_tail13)
-	cmpb	$0, 14(%edx)
-	jz	L(exit_tail14)
-	cmpb	$0, 15(%edx)
-	jz	L(exit_tail15)
-
-	pxor	%xmm0, %xmm0
-	lea	16(%edx), %eax
-	add	$16, %ecx
-	and	$-16, %eax
-
-	pcmpeqb	(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	pxor	%xmm1, %xmm1
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	pxor	%xmm2, %xmm2
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm2
-	pmovmskb %xmm2, %edx
-	pxor	%xmm3, %xmm3
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm2
-	pmovmskb %xmm2, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm2
-	pmovmskb %xmm2, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm2
-	pmovmskb %xmm2, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	and	$-0x40, %eax
-
-	.p2align 4
-L(aligned_64_loop):
-	movaps	(%eax), %xmm0
-	movaps	16(%eax), %xmm1
-	movaps	32(%eax), %xmm2
-	movaps	48(%eax), %xmm6
-	pminub	%xmm1, %xmm0
-	pminub	%xmm6, %xmm2
-	pminub	%xmm0, %xmm2
-	pcmpeqb	%xmm3, %xmm2
-	pmovmskb %xmm2, %edx
-	lea	64(%eax), %eax
-	test	%edx, %edx
-	jz	L(aligned_64_loop)
-
-	pcmpeqb	-64(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	48(%ecx), %ecx
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	%xmm1, %xmm3
-	pmovmskb %xmm3, %edx
-	lea	-16(%ecx), %ecx
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	-32(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	-16(%ecx), %ecx
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	%xmm6, %xmm3
-	pmovmskb %xmm3, %edx
-	lea	-16(%ecx), %ecx
-
-	.p2align 4
-L(exit):
-	sub	%ecx, %eax
-	test	%dl, %dl
-	jz	L(exit_more_8)
-
-	mov	%dl, %cl
-	and	$15, %cl
-	jz	L(exit_more_4)
-	test	$0x01, %dl
-	jnz	L(exit_0)
-	test	$0x02, %dl
-	jnz	L(exit_1)
-	test	$0x04, %dl
-	jnz	L(exit_2)
-	add	$3, %eax
-	RETURN
-
-	.p2align 4
-L(exit_more_4):
-	test	$0x10, %dl
-	jnz	L(exit_4)
-	test	$0x20, %dl
-	jnz	L(exit_5)
-	test	$0x40, %dl
-	jnz	L(exit_6)
-	add	$7, %eax
-	RETURN
-
-	.p2align 4
-L(exit_more_8):
-	mov	%dh, %ch
-	and	$15, %ch
-	jz	L(exit_more_12)
-	test	$0x01, %dh
-	jnz	L(exit_8)
-	test	$0x02, %dh
-	jnz	L(exit_9)
-	test	$0x04, %dh
-	jnz	L(exit_10)
-	add	$11, %eax
-	RETURN
-
-	.p2align 4
-L(exit_more_12):
-	test	$0x10, %dh
-	jnz	L(exit_12)
-	test	$0x20, %dh
-	jnz	L(exit_13)
-	test	$0x40, %dh
-	jnz	L(exit_14)
-	add	$15, %eax
-L(exit_0):
-	RETURN
-
-	.p2align 4
-L(exit_1):
-	add	$1, %eax
-	RETURN
-
-L(exit_2):
-	add	$2, %eax
-	RETURN
-
-L(exit_3):
-	add	$3, %eax
-	RETURN
-
-L(exit_4):
-	add	$4, %eax
-	RETURN
-
-L(exit_5):
-	add	$5, %eax
-	RETURN
-
-L(exit_6):
-	add	$6, %eax
-	RETURN
-
-L(exit_7):
-	add	$7, %eax
-	RETURN
-
-L(exit_8):
-	add	$8, %eax
-	RETURN
-
-L(exit_9):
-	add	$9, %eax
-	RETURN
-
-L(exit_10):
-	add	$10, %eax
-	RETURN
-
-L(exit_11):
-	add	$11, %eax
-	RETURN
-
-L(exit_12):
-	add	$12, %eax
-	RETURN
-
-L(exit_13):
-	add	$13, %eax
-	RETURN
-
-L(exit_14):
-	add	$14, %eax
-	RETURN
-
-L(exit_15):
-	add	$15, %eax
-	RETURN
-
-L(exit_tail0):
-	mov	%edx, %eax
-	sub	%ecx, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail1):
-	lea	1(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail2):
-	lea	2(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail3):
-	lea	3(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail4):
-	lea	4(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail5):
-	lea	5(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail6):
-	lea	6(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail7):
-	lea	7(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail8):
-	lea	8(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail9):
-	lea	9(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail10):
-	lea	10(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail11):
-	lea	11(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail12):
-	lea	12(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail13):
-	lea	13(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail14):
-	lea	14(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail15):
-	lea	15(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-END (strlcat)
diff --git a/libc/arch-x86/string/ssse3-strlcpy-atom.S b/libc/arch-x86/string/ssse3-strlcpy-atom.S
deleted file mode 100644
index cdb17cc..0000000
--- a/libc/arch-x86/string/ssse3-strlcpy-atom.S
+++ /dev/null
@@ -1,1403 +0,0 @@
-/*
-Copyright (c) 2011, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#define USE_AS_STRNCPY
-#define STRCPY strlcpy
-#define STRLEN strlcpy
-#define USE_AS_STRLCPY
-#include "ssse3-strcpy-atom.S"
-
-	.p2align 4
-L(CopyFrom1To16Bytes):
-	add	%esi, %edx
-	add	%esi, %ecx
-
-	POP	(%esi)
-	test	%al, %al
-	jz	L(ExitHigh8)
-
-L(CopyFrom1To16BytesLess8):
-	mov	%al, %ah
-	and	$15, %ah
-	jz	L(ExitHigh4)
-
-	test	$0x01, %al
-	jnz	L(Exit1)
-	test	$0x02, %al
-	jnz	L(Exit2)
-	test	$0x04, %al
-	jnz	L(Exit3)
-L(Exit4):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-
-	lea	3(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(ExitHigh4):
-	test	$0x10, %al
-	jnz	L(Exit5)
-	test	$0x20, %al
-	jnz	L(Exit6)
-	test	$0x40, %al
-	jnz	L(Exit7)
-L(Exit8):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-
-	lea	7(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(ExitHigh8):
-	mov	%ah, %al
-	and	$15, %al
-	jz	L(ExitHigh12)
-
-	test	$0x01, %ah
-	jnz	L(Exit9)
-	test	$0x02, %ah
-	jnz	L(Exit10)
-	test	$0x04, %ah
-	jnz	L(Exit11)
-L(Exit12):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	8(%ecx), %eax
-	movl	%eax, 8(%edx)
-
-	lea	11(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(ExitHigh12):
-	test	$0x10, %ah
-	jnz	L(Exit13)
-	test	$0x20, %ah
-	jnz	L(Exit14)
-	test	$0x40, %ah
-	jnz	L(Exit15)
-L(Exit16):
-	movlpd	(%ecx), %xmm0
-	movlpd	8(%ecx), %xmm1
-	movlpd	%xmm0, (%edx)
-	movlpd	%xmm1, 8(%edx)
-
-	lea	15(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	CFI_PUSH(%esi)
-
-	.p2align 4
-L(CopyFrom1To16BytesCase2):
-	add	$16, %ebx
-	add	%esi, %ecx
-        add     %esi, %edx
-
-	POP	(%esi)
-
-        test    %al, %al
-        jz      L(ExitHighCase2)
-
-        cmp     $8, %ebx
-        ja      L(CopyFrom1To16BytesLess8)
-
-	test	$0x01, %al
-	jnz	L(Exit1)
-	cmp	$1, %ebx
-	je	L(StrlcpyExit1)
-	test	$0x02, %al
-	jnz	L(Exit2)
-	cmp	$2, %ebx
-	je	L(StrlcpyExit2)
-	test	$0x04, %al
-	jnz	L(Exit3)
-	cmp	$3, %ebx
-	je	L(StrlcpyExit3)
-	test	$0x08, %al
-	jnz	L(Exit4)
-	cmp	$4, %ebx
-	je	L(StrlcpyExit4)
-	test	$0x10, %al
-	jnz	L(Exit5)
-	cmp	$5, %ebx
-	je	L(StrlcpyExit5)
-	test	$0x20, %al
-	jnz	L(Exit6)
-	cmp	$6, %ebx
-	je	L(StrlcpyExit6)
-	test	$0x40, %al
-	jnz	L(Exit7)
-	cmp	$7, %ebx
-	je	L(StrlcpyExit7)
-	test	$0x80, %al
-	jnz	L(Exit8)
-	jmp	L(StrlcpyExit8)
-
-	.p2align 4
-L(ExitHighCase2):
-        cmp     $8, %ebx
-        jbe      L(CopyFrom1To16BytesLess8Case3)
-
-	test	$0x01, %ah
-	jnz	L(Exit9)
-	cmp	$9, %ebx
-	je	L(StrlcpyExit9)
-	test	$0x02, %ah
-	jnz	L(Exit10)
-	cmp	$10, %ebx
-	je	L(StrlcpyExit10)
-	test	$0x04, %ah
-	jnz	L(Exit11)
-	cmp	$11, %ebx
-	je	L(StrlcpyExit11)
-	test	$0x8, %ah
-	jnz	L(Exit12)
-	cmp	$12, %ebx
-	je	L(StrlcpyExit12)
-	test	$0x10, %ah
-	jnz	L(Exit13)
-	cmp	$13, %ebx
-	je	L(StrlcpyExit13)
-	test	$0x20, %ah
-	jnz	L(Exit14)
-	cmp	$14, %ebx
-	je	L(StrlcpyExit14)
-	test	$0x40, %ah
-	jnz	L(Exit15)
-	cmp	$15, %ebx
-	je	L(StrlcpyExit15)
-	test	$0x80, %ah
-	jnz	L(Exit16)
-	jmp	L(StrlcpyExit16)
-
-	CFI_PUSH(%esi)
-
-	.p2align 4
-L(CopyFrom1To16BytesCase2OrCase3):
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16BytesCase2)
-
-	.p2align 4
-L(CopyFrom1To16BytesCase3):
-	add	$16, %ebx
-	add	%esi, %edx
-	add	%esi, %ecx
-
-	POP	(%esi)
-
-	cmp	$8, %ebx
-	ja	L(ExitHigh8Case3)
-
-L(CopyFrom1To16BytesLess8Case3):
-	cmp	$4, %ebx
-	ja	L(ExitHigh4Case3)
-
-	cmp	$1, %ebx
-	je	L(StrlcpyExit1)
-	cmp	$2, %ebx
-	je	L(StrlcpyExit2)
-	cmp	$3, %ebx
-	je	L(StrlcpyExit3)
-L(StrlcpyExit4):
-	movb	%bh, 3(%edx)
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-	movb	2(%ecx), %al
-	movb	%al, 2(%edx)
-
-	lea	4(%ecx), %edx
-	mov	%edi, %ecx
-        POP     (%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(ExitHigh4Case3):
-	cmp	$5, %ebx
-	je	L(StrlcpyExit5)
-	cmp	$6, %ebx
-	je	L(StrlcpyExit6)
-	cmp	$7, %ebx
-	je	L(StrlcpyExit7)
-L(StrlcpyExit8):
-	movb	%bh, 7(%edx)
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	3(%ecx), %eax
-	movl	%eax, 3(%edx)
-
-	lea	8(%ecx), %edx
-	mov	%edi, %ecx
-        POP     (%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(ExitHigh8Case3):
-	cmp	$12, %ebx
-	ja	L(ExitHigh12Case3)
-
-	cmp	$9, %ebx
-	je	L(StrlcpyExit9)
-	cmp	$10, %ebx
-	je	L(StrlcpyExit10)
-	cmp	$11, %ebx
-	je	L(StrlcpyExit11)
-L(StrlcpyExit12):
-	movb	%bh, 11(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	7(%ecx), %eax
-	movl	%eax, 7(%edx)
-
-	lea	12(%ecx), %edx
-	mov	%edi, %ecx
-        POP     (%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(ExitHigh12Case3):
-	cmp	$13, %ebx
-	je	L(StrlcpyExit13)
-	cmp	$14, %ebx
-	je	L(StrlcpyExit14)
-	cmp	$15, %ebx
-	je	L(StrlcpyExit15)
-L(StrlcpyExit16):
-	movb	%bh, 15(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	7(%ecx), %xmm0
-	movlpd	%xmm0, 7(%edx)
-
-	lea	16(%ecx), %edx
-	mov	%edi, %ecx
-        POP     (%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(StrlcpyExit1):
-	movb	%bh, (%edx)
-
-	lea	1(%ecx), %edx
-	mov	%edi, %ecx
-        POP     (%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit1):
-	movb	(%ecx), %al
-	movb	%al, (%edx)
-
-	mov	%ecx, %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit2):
-	movb	%bh, 1(%edx)
-	movb	(%ecx), %al
-	movb	%al, (%edx)
-
-	lea	2(%ecx), %edx
-	mov	%edi, %ecx
-        POP     (%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit2):
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-	movl	%edi, %eax
-
-	lea	1(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit3):
-	movb	%bh, 2(%edx)
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-
-	lea	3(%ecx), %edx
-	mov	%edi, %ecx
-        POP     (%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit3):
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-	movb	2(%ecx), %al
-	movb	%al, 2(%edx)
-
-	lea	2(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit5):
-	movb	%bh, 4(%edx)
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	%edi, %eax
-
-	lea	5(%ecx), %edx
-	mov	%edi, %ecx
-        POP     (%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit5):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movb	4(%ecx), %al
-	movb	%al, 4(%edx)
-
-	lea	4(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit6):
-	movb	%bh, 5(%edx)
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movb	4(%ecx), %al
-	movb	%al, 4(%edx)
-
-	lea	6(%ecx), %edx
-	mov	%edi, %ecx
-        POP     (%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit6):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movw	4(%ecx), %ax
-	movw	%ax, 4(%edx)
-
-	lea	5(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit7):
-	movb	%bh, 6(%edx)
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movw	4(%ecx), %ax
-	movw	%ax, 4(%edx)
-
-	lea	7(%ecx), %edx
-	mov	%edi, %ecx
-        POP     (%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit7):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	3(%ecx), %eax
-	movl	%eax, 3(%edx)
-
-	lea	6(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit9):
-	movb	%bh, 8(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-
-	lea	9(%ecx), %edx
-	mov	%edi, %ecx
-        POP     (%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit9):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movb	8(%ecx), %al
-	movb	%al, 8(%edx)
-
-	lea	8(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit10):
-	movb	%bh, 9(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movb	8(%ecx), %al
-	movb	%al, 8(%edx)
-
-	lea	10(%ecx), %edx
-	mov	%edi, %ecx
-        POP     (%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit10):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movw	8(%ecx), %ax
-	movw	%ax, 8(%edx)
-
-	lea	9(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit11):
-	movb	%bh, 10(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movw	8(%ecx), %ax
-	movw	%ax, 8(%edx)
-
-	lea	11(%ecx), %edx
-	mov	%edi, %ecx
-        POP     (%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit11):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	7(%ecx), %eax
-	movl	%eax, 7(%edx)
-
-	lea	10(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit13):
-	movb	%bh, 12(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	8(%ecx), %eax
-	movl	%eax, 8(%edx)
-
-	lea	13(%ecx), %edx
-	mov	%edi, %ecx
-        POP     (%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit13):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	5(%ecx), %xmm0
-	movlpd	%xmm0, 5(%edx)
-
-	lea	12(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit14):
-	movb	%bh, 13(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	5(%ecx), %xmm0
-	movlpd	%xmm0, 5(%edx)
-
-	lea	14(%ecx), %edx
-	mov	%edi, %ecx
-        POP     (%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit14):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	6(%ecx), %xmm0
-	movlpd	%xmm0, 6(%edx)
-
-	lea	13(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-	.p2align 4
-L(StrlcpyExit15):
-	movb	%bh, 14(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	6(%ecx), %xmm0
-	movlpd	%xmm0, 6(%edx)
-
-	lea	15(%ecx), %edx
-	mov	%edi, %ecx
-        POP     (%edi)
-	jmp	L(CalculateLengthOfSrc)
-        CFI_PUSH     (%edi)
-
-	.p2align 4
-L(Exit15):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	7(%ecx), %xmm0
-	movlpd	%xmm0, 7(%edx)
-
-	lea	14(%ecx), %eax
-	sub	%edi, %eax
-	RETURN1
-
-        CFI_POP (%edi)
-
-	.p2align 4
-L(StrlcpyExit0):
-	movl	$0, %eax
-	RETURN
-
-	.p2align 4
-L(StrncpyExit15Bytes):
-	cmp	$12, %ebx
-	ja	L(StrncpyExit15Bytes1)
-
-	cmpb	$0, 8(%ecx)
-	jz	L(ExitTail9)
-	cmp	$9, %ebx
-	je	L(StrlcpyExitTail9)
-
-	cmpb	$0, 9(%ecx)
-	jz	L(ExitTail10)
-	cmp	$10, %ebx
-	je	L(StrlcpyExitTail10)
-
-	cmpb	$0, 10(%ecx)
-	jz	L(ExitTail11)
-	cmp	$11, %ebx
-	je	L(StrlcpyExitTail11)
-
-	cmpb	$0, 11(%ecx)
-	jz	L(ExitTail12)
-
-	movb	%bh, 11(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	7(%ecx), %eax
-	movl	%eax, 7(%edx)
-
-	lea	12(%ecx), %edx
-	jmp	L(CalculateLengthOfSrc)
-
-	.p2align 4
-L(StrncpyExit15Bytes1):
-	cmpb	$0, 8(%ecx)
-	jz	L(ExitTail9)
-	cmpb	$0, 9(%ecx)
-	jz	L(ExitTail10)
-	cmpb	$0, 10(%ecx)
-	jz	L(ExitTail11)
-	cmpb	$0, 11(%ecx)
-	jz	L(ExitTail12)
-
-	cmpb	$0, 12(%ecx)
-	jz	L(ExitTail13)
-	cmp	$13, %ebx
-	je	L(StrlcpyExitTail13)
-
-	cmpb	$0, 13(%ecx)
-	jz	L(ExitTail14)
-	cmp	$14, %ebx
-	je	L(StrlcpyExitTail14)
-
-	cmpb	$0, 14(%ecx)
-	jz	L(ExitTail15)
-
-	movb	%bh, 14(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	6(%ecx), %xmm0
-	movlpd	%xmm0, 6(%edx)
-
-	lea	15(%ecx), %edx
-	jmp	L(CalculateLengthOfSrc)
-
-	.p2align 4
-L(StrncpyExit8Bytes):
-	cmp	$4, %ebx
-	ja	L(StrncpyExit8Bytes1)
-
-	test	%ebx, %ebx
-	jz	L(StrlcpyExitTail0)
-
-	cmpb	$0, (%ecx)
-	jz	L(ExitTail1)
-	cmp	$1, %ebx
-	je	L(StrlcpyExitTail1)
-
-	cmpb	$0, 1(%ecx)
-	jz	L(ExitTail2)
-	cmp	$2, %ebx
-	je	L(StrlcpyExitTail2)
-
-	cmpb	$0, 2(%ecx)
-	jz	L(ExitTail3)
-	cmp	$3, %ebx
-	je	L(StrlcpyExitTail3)
-
-	cmpb	$0, 3(%ecx)
-	jz	L(ExitTail4)
-
-	movb	%bh, 3(%edx)
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-	movb	2(%ecx), %al
-	movb	%al, 2(%edx)
-
-	lea	4(%ecx), %edx
-	jmp	L(CalculateLengthOfSrc)
-
-	.p2align 4
-L(StrncpyExit8Bytes1):
-	cmpb	$0, (%ecx)
-	jz	L(ExitTail1)
-	cmpb	$0, 1(%ecx)
-	jz	L(ExitTail2)
-	cmpb	$0, 2(%ecx)
-	jz	L(ExitTail3)
-	cmpb	$0, 3(%ecx)
-	jz	L(ExitTail4)
-
-	cmpb	$0, 4(%ecx)
-	jz	L(ExitTail5)
-	cmp	$5, %ebx
-	je	L(StrlcpyExitTail5)
-
-	cmpb	$0, 5(%ecx)
-	jz	L(ExitTail6)
-	cmp	$6, %ebx
-	je	L(StrlcpyExitTail6)
-
-	cmpb	$0, 6(%ecx)
-	jz	L(ExitTail7)
-	cmp	$7, %ebx
-	je	L(StrlcpyExitTail7)
-
-	cmpb	$0, 7(%ecx)
-	jz	L(ExitTail8)
-
-	movb	%bh, 7(%edx)
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	3(%ecx), %eax
-	movl	%eax, 3(%edx)
-
-	lea	8(%ecx), %edx
-	jmp	L(CalculateLengthOfSrc)
-
-	.p2align 4
-L(StrlcpyExitTail0):
-	mov	%ecx, %edx
-	jmp	L(CalculateLengthOfSrc)
-
-	.p2align 4
-L(StrlcpyExitTail1):
-	movb	%bh, (%edx)
-
-	lea	1(%ecx), %edx
-	jmp	L(CalculateLengthOfSrc)
-
-	.p2align 4
-L(ExitTail1):
-	movb	(%ecx), %al
-	movb	%al, (%edx)
-
-	mov	$0, %eax
-	RETURN
-
-	.p2align 4
-L(StrlcpyExitTail2):
-	movb	%bh, 1(%edx)
-	movb	(%ecx), %al
-	movb	%al, (%edx)
-
-	lea	2(%ecx), %edx
-	jmp	L(CalculateLengthOfSrc)
-
-	.p2align 4
-L(ExitTail2):
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-	movl	%edx, %eax
-
-	mov	$1, %eax
-	RETURN
-
-	.p2align 4
-L(StrlcpyExitTail3):
-	movb	%bh, 2(%edx)
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-
-	lea	3(%ecx), %edx
-	jmp	L(CalculateLengthOfSrc)
-
-	.p2align 4
-L(ExitTail3):
-	movw	(%ecx), %ax
-	movw	%ax, (%edx)
-	movb	2(%ecx), %al
-	movb	%al, 2(%edx)
-
-	mov	$2, %eax
-	RETURN
-
-	.p2align 4
-L(ExitTail4):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-
-	mov	$3, %eax
-	RETURN
-
-	.p2align 4
-L(StrlcpyExitTail5):
-	movb	%bh, 4(%edx)
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	%edx, %eax
-
-	lea	5(%ecx), %edx
-	jmp	L(CalculateLengthOfSrc)
-
-	.p2align 4
-L(ExitTail5):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movb	4(%ecx), %al
-	movb	%al, 4(%edx)
-
-	mov	$4, %eax
-	RETURN
-
-	.p2align 4
-L(StrlcpyExitTail6):
-	movb	%bh, 5(%edx)
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movb	4(%ecx), %al
-	movb	%al, 4(%edx)
-
-	lea	6(%ecx), %edx
-	jmp	L(CalculateLengthOfSrc)
-
-	.p2align 4
-L(ExitTail6):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movw	4(%ecx), %ax
-	movw	%ax, 4(%edx)
-
-	mov	$5, %eax
-	RETURN
-
-	.p2align 4
-L(StrlcpyExitTail7):
-	movb	%bh, 6(%edx)
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movw	4(%ecx), %ax
-	movw	%ax, 4(%edx)
-
-	lea	7(%ecx), %edx
-	jmp	L(CalculateLengthOfSrc)
-
-	.p2align 4
-L(ExitTail7):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	3(%ecx), %eax
-	movl	%eax, 3(%edx)
-
-	mov	$6, %eax
-	RETURN
-
-	.p2align 4
-L(ExitTail8):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-
-	mov	$7, %eax
-	RETURN
-
-	.p2align 4
-L(StrlcpyExitTail9):
-	movb	%bh, 8(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-
-	lea	9(%ecx), %edx
-	jmp	L(CalculateLengthOfSrc)
-
-	.p2align 4
-L(ExitTail9):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movb	8(%ecx), %al
-	movb	%al, 8(%edx)
-
-	mov	$8, %eax
-	RETURN
-
-	.p2align 4
-L(StrlcpyExitTail10):
-	movb	%bh, 9(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movb	8(%ecx), %al
-	movb	%al, 8(%edx)
-
-	lea	10(%ecx), %edx
-	jmp	L(CalculateLengthOfSrc)
-
-	.p2align 4
-L(ExitTail10):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movw	8(%ecx), %ax
-	movw	%ax, 8(%edx)
-
-	mov	$9, %eax
-	RETURN
-
-	.p2align 4
-L(StrlcpyExitTail11):
-	movb	%bh, 10(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movw	8(%ecx), %ax
-	movw	%ax, 8(%edx)
-
-	lea	11(%ecx), %edx
-	jmp	L(CalculateLengthOfSrc)
-
-	.p2align 4
-L(ExitTail11):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	7(%ecx), %eax
-	movl	%eax, 7(%edx)
-
-	mov	$10, %eax
-	RETURN
-
-	.p2align 4
-L(ExitTail12):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	8(%ecx), %eax
-	movl	%eax, 8(%edx)
-
-	mov	$11, %eax
-	RETURN
-
-	.p2align 4
-L(StrlcpyExitTail13):
-	movb	%bh, 12(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	8(%ecx), %eax
-	movl	%eax, 8(%edx)
-
-	lea	13(%ecx), %edx
-	jmp	L(CalculateLengthOfSrc)
-
-	.p2align 4
-L(ExitTail13):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	5(%ecx), %xmm0
-	movlpd	%xmm0, 5(%edx)
-
-	mov	$12, %eax
-	RETURN
-
-	.p2align 4
-L(StrlcpyExitTail14):
-	movb	%bh, 13(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	5(%ecx), %xmm0
-	movlpd	%xmm0, 5(%edx)
-
-	lea	14(%ecx), %edx
-	jmp	L(CalculateLengthOfSrc)
-
-	.p2align 4
-L(ExitTail14):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	6(%ecx), %xmm0
-	movlpd	%xmm0, 6(%edx)
-
-	mov	$13, %eax
-	RETURN
-
-	.p2align 4
-L(ExitTail15):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	7(%ecx), %xmm0
-	movlpd	%xmm0, 7(%edx)
-
-	mov	$14, %eax
-	RETURN
-
-	.p2align 4
-L(StrlcpyExitTail16):
-	movb	%bh, 15(%edx)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movlpd	7(%ecx), %xmm0
-	movlpd	%xmm0, 7(%edx)
-
-	lea	16(%ecx), %edx
-	jmp	L(CalculateLengthOfSrc)
-
-	.p2align 4
-L(ExitTail16):
-	movlpd	(%ecx), %xmm0
-	movlpd	8(%ecx), %xmm1
-	movlpd	%xmm0, (%edx)
-	movlpd	%xmm1, 8(%edx)
-
-	mov	$15, %eax
-	RETURN
-
-	.p2align 4
-L(CalculateLengthOfSrc):
-	xor	%eax, %eax
-	cmpb	$0, (%edx)
-	jz	L(exit_tail0)
-	cmpb	$0, 1(%edx)
-	jz	L(exit_tail1)
-	cmpb	$0, 2(%edx)
-	jz	L(exit_tail2)
-	cmpb	$0, 3(%edx)
-	jz	L(exit_tail3)
-
-	cmpb	$0, 4(%edx)
-	jz	L(exit_tail4)
-	cmpb	$0, 5(%edx)
-	jz	L(exit_tail5)
-	cmpb	$0, 6(%edx)
-	jz	L(exit_tail6)
-	cmpb	$0, 7(%edx)
-	jz	L(exit_tail7)
-
-	cmpb	$0, 8(%edx)
-	jz	L(exit_tail8)
-	cmpb	$0, 9(%edx)
-	jz	L(exit_tail9)
-	cmpb	$0, 10(%edx)
-	jz	L(exit_tail10)
-	cmpb	$0, 11(%edx)
-	jz	L(exit_tail11)
-
-	cmpb	$0, 12(%edx)
-	jz	L(exit_tail12)
-	cmpb	$0, 13(%edx)
-	jz	L(exit_tail13)
-	cmpb	$0, 14(%edx)
-	jz	L(exit_tail14)
-	cmpb	$0, 15(%edx)
-	jz	L(exit_tail15)
-
-	pxor	%xmm0, %xmm0
-	lea	16(%edx), %eax
-	add	$16, %ecx
-	and	$-16, %eax
-
-	pcmpeqb	(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	pxor	%xmm1, %xmm1
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	pxor	%xmm2, %xmm2
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm2
-	pmovmskb %xmm2, %edx
-	pxor	%xmm3, %xmm3
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm2
-	pmovmskb %xmm2, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm2
-	pmovmskb %xmm2, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm2
-	pmovmskb %xmm2, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	and	$-0x40, %eax
-
-	.p2align 4
-L(aligned_64_loop):
-	movaps	(%eax), %xmm0
-	movaps	16(%eax), %xmm1
-	movaps	32(%eax), %xmm2
-	movaps	48(%eax), %xmm6
-	pminub	%xmm1, %xmm0
-	pminub	%xmm6, %xmm2
-	pminub	%xmm0, %xmm2
-	pcmpeqb	%xmm3, %xmm2
-	pmovmskb %xmm2, %edx
-	lea	64(%eax), %eax
-	test	%edx, %edx
-	jz	L(aligned_64_loop)
-
-	pcmpeqb	-64(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	48(%ecx), %ecx
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	%xmm1, %xmm3
-	pmovmskb %xmm3, %edx
-	lea	-16(%ecx), %ecx
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	-32(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	-16(%ecx), %ecx
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqb	%xmm6, %xmm3
-	pmovmskb %xmm3, %edx
-	lea	-16(%ecx), %ecx
-
-	.p2align 4
-L(exit):
-	sub	%ecx, %eax
-	test	%dl, %dl
-	jz	L(exit_more_8)
-
-	mov	%dl, %cl
-	and	$15, %cl
-	jz	L(exit_more_4)
-	test	$0x01, %dl
-	jnz	L(exit_0)
-	test	$0x02, %dl
-	jnz	L(exit_1)
-	test	$0x04, %dl
-	jnz	L(exit_2)
-	add	$3, %eax
-	RETURN
-
-	.p2align 4
-L(exit_more_4):
-	test	$0x10, %dl
-	jnz	L(exit_4)
-	test	$0x20, %dl
-	jnz	L(exit_5)
-	test	$0x40, %dl
-	jnz	L(exit_6)
-	add	$7, %eax
-	RETURN
-
-	.p2align 4
-L(exit_more_8):
-	mov	%dh, %ch
-	and	$15, %ch
-	jz	L(exit_more_12)
-	test	$0x01, %dh
-	jnz	L(exit_8)
-	test	$0x02, %dh
-	jnz	L(exit_9)
-	test	$0x04, %dh
-	jnz	L(exit_10)
-	add	$11, %eax
-	RETURN
-
-	.p2align 4
-L(exit_more_12):
-	test	$0x10, %dh
-	jnz	L(exit_12)
-	test	$0x20, %dh
-	jnz	L(exit_13)
-	test	$0x40, %dh
-	jnz	L(exit_14)
-	add	$15, %eax
-L(exit_0):
-	RETURN
-
-	.p2align 4
-L(exit_1):
-	add	$1, %eax
-	RETURN
-
-L(exit_2):
-	add	$2, %eax
-	RETURN
-
-L(exit_3):
-	add	$3, %eax
-	RETURN
-
-L(exit_4):
-	add	$4, %eax
-	RETURN
-
-L(exit_5):
-	add	$5, %eax
-	RETURN
-
-L(exit_6):
-	add	$6, %eax
-	RETURN
-
-L(exit_7):
-	add	$7, %eax
-	RETURN
-
-L(exit_8):
-	add	$8, %eax
-	RETURN
-
-L(exit_9):
-	add	$9, %eax
-	RETURN
-
-L(exit_10):
-	add	$10, %eax
-	RETURN
-
-L(exit_11):
-	add	$11, %eax
-	RETURN
-
-L(exit_12):
-	add	$12, %eax
-	RETURN
-
-L(exit_13):
-	add	$13, %eax
-	RETURN
-
-L(exit_14):
-	add	$14, %eax
-	RETURN
-
-L(exit_15):
-	add	$15, %eax
-	RETURN
-
-L(exit_tail0):
-	mov	%edx, %eax
-	sub	%ecx, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail1):
-	lea	1(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail2):
-	lea	2(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail3):
-	lea	3(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail4):
-	lea	4(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail5):
-	lea	5(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail6):
-	lea	6(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail7):
-	lea	7(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail8):
-	lea	8(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail9):
-	lea	9(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail10):
-	lea	10(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail11):
-	lea	11(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail12):
-	lea	12(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail13):
-	lea	13(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail14):
-	lea	14(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-L(exit_tail15):
-	lea	15(%edx), %eax
-	sub	%ecx, %eax
-	RETURN
-
-END (STRCPY)
-
diff --git a/libdl/libdl_android.cpp b/libdl/libdl_android.cpp
index 47a164a..f0959eb 100644
--- a/libdl/libdl_android.cpp
+++ b/libdl/libdl_android.cpp
@@ -59,6 +59,9 @@
 __attribute__((__weak__, visibility("default")))
 struct android_namespace_t* __loader_android_get_exported_namespace(const char* name);
 
+__attribute__((__weak__, visibility("default"))) void __loader_android_set_16kb_appcompat_mode(
+    bool enable_app_compat);
+
 // Proxy calls to bionic loader
 __attribute__((__weak__))
 void android_get_LD_LIBRARY_PATH(char* buffer, size_t buffer_size) {
@@ -115,4 +118,8 @@
   return __loader_android_get_exported_namespace(name);
 }
 
+__attribute__((__weak__)) void android_set_16kb_appcompat_mode(bool enable_app_compat) {
+  __loader_android_set_16kb_appcompat_mode(enable_app_compat);
+}
+
 } // extern "C"
diff --git a/libdl/libdl_android.map.txt b/libdl/libdl_android.map.txt
index 7afcd9c..efbc841 100644
--- a/libdl/libdl_android.map.txt
+++ b/libdl/libdl_android.map.txt
@@ -24,6 +24,7 @@
     android_init_anonymous_namespace; # apex
     android_link_namespaces; # apex
     android_set_application_target_sdk_version; # apex
+    android_set_16kb_appcompat_mode; #apex
   local:
     *;
 };
diff --git a/linker/Android.bp b/linker/Android.bp
index 847a9b2..4863b92 100644
--- a/linker/Android.bp
+++ b/linker/Android.bp
@@ -500,6 +500,7 @@
         "linker_mapped_file_fragment.cpp",
         "linker_sdk_versions.cpp",
         "linker_dlwarning.cpp",
+        "linker_phdr_16kib_compat.cpp"
     ],
 
     static_libs: [
diff --git a/linker/dlfcn.cpp b/linker/dlfcn.cpp
index 82f2728..f811d6d 100644
--- a/linker/dlfcn.cpp
+++ b/linker/dlfcn.cpp
@@ -89,6 +89,7 @@
                       const void* caller_addr) __LINKER_PUBLIC__;
 void __loader_add_thread_local_dtor(void* dso_handle) __LINKER_PUBLIC__;
 void __loader_remove_thread_local_dtor(void* dso_handle) __LINKER_PUBLIC__;
+void __loader_android_set_16kb_appcompat_mode(bool enable_app_compat) __LINKER_PUBLIC__;
 libc_shared_globals* __loader_shared_globals() __LINKER_PUBLIC__;
 #if defined(__arm__)
 _Unwind_Ptr __loader_dl_unwind_find_exidx(_Unwind_Ptr pc, int* pcount) __LINKER_PUBLIC__;
@@ -301,6 +302,11 @@
   decrement_dso_handle_reference_counter(dso_handle);
 }
 
+void __loader_android_set_16kb_appcompat_mode(bool enable_app_compat) {
+  ScopedPthreadMutexLocker locker(&g_dl_mutex);
+  set_16kb_appcompat_mode(enable_app_compat);
+}
+
 libc_shared_globals* __loader_shared_globals() {
   return __libc_shared_globals();
 }
diff --git a/linker/ld_android.cpp b/linker/ld_android.cpp
index 1c03106..c938a16 100644
--- a/linker/ld_android.cpp
+++ b/linker/ld_android.cpp
@@ -55,6 +55,7 @@
 __strong_alias(__loader_add_thread_local_dtor, __internal_linker_error);
 __strong_alias(__loader_remove_thread_local_dtor, __internal_linker_error);
 __strong_alias(__loader_shared_globals, __internal_linker_error);
+__strong_alias(__loader_android_set_16kb_appcompat_mode, __internal_linker_error);
 #if defined(__arm__)
 __strong_alias(__loader_dl_unwind_find_exidx, __internal_linker_error);
 #endif
diff --git a/linker/linker.arm.map b/linker/linker.arm.map
index b805cd6..edfa249 100644
--- a/linker/linker.arm.map
+++ b/linker/linker.arm.map
@@ -25,6 +25,7 @@
     __loader_shared_globals;
     rtld_db_dlactivity;
     __loader_android_handle_signal;
+    __loader_android_set_16kb_appcompat_mode;
   local:
     *;
 };
diff --git a/linker/linker.generic.map b/linker/linker.generic.map
index 4d7f236..2beae65 100644
--- a/linker/linker.generic.map
+++ b/linker/linker.generic.map
@@ -24,6 +24,7 @@
     __loader_shared_globals;
     rtld_db_dlactivity;
     __loader_android_handle_signal;
+    __loader_android_set_16kb_appcompat_mode;
   local:
     *;
 };
diff --git a/linker/linker.h b/linker/linker.h
index b696fd9..7afa0d7 100644
--- a/linker/linker.h
+++ b/linker/linker.h
@@ -108,6 +108,9 @@
 
 bool get_transparent_hugepages_supported();
 
+void set_16kb_appcompat_mode(bool enable_app_compat);
+bool get_16kb_appcompat_mode();
+
 enum {
   /* A regular namespace is the namespace with a custom search path that does
    * not impose any restrictions on the location of native libraries.
diff --git a/linker/linker_phdr.cpp b/linker/linker_phdr.cpp
index c37066b..14bf208 100644
--- a/linker/linker_phdr.cpp
+++ b/linker/linker_phdr.cpp
@@ -186,7 +186,8 @@
     // It cannot be cached since the developer may toggle app compat on/off.
     // This check will be removed once app compat is made the default on 16KiB devices.
     should_use_16kib_app_compat_ =
-        ::android::base::GetBoolProperty("bionic.linker.16kb.app_compat.enabled", false);
+        ::android::base::GetBoolProperty("bionic.linker.16kb.app_compat.enabled", false) ||
+        get_16kb_appcompat_mode();
   }
 
   return did_read_;
diff --git a/linker/linker_phdr_16kib_compat.cpp b/linker/linker_phdr_16kib_compat.cpp
index a4d8459..bad20ba 100644
--- a/linker/linker_phdr_16kib_compat.cpp
+++ b/linker/linker_phdr_16kib_compat.cpp
@@ -42,10 +42,20 @@
 
 #include <string>
 
+static bool g_enable_16kb_app_compat;
+
 static inline bool segment_contains_prefix(const ElfW(Phdr)* segment, const ElfW(Phdr)* prefix) {
   return segment && prefix && segment->p_vaddr == prefix->p_vaddr;
 }
 
+void set_16kb_appcompat_mode(bool enable_app_compat) {
+  g_enable_16kb_app_compat = enable_app_compat;
+}
+
+bool get_16kb_appcompat_mode() {
+  return g_enable_16kb_app_compat;
+}
+
 /*
  * Returns true if the ELF contains at most 1 RELRO segment; and populates @relro_phdr
  * with the relro phdr or nullptr if none.
diff --git a/tests/page_size_16kib_compat_test.cpp b/tests/page_size_16kib_compat_test.cpp
index 9aecfba..a5d91b8 100644
--- a/tests/page_size_16kib_compat_test.cpp
+++ b/tests/page_size_16kib_compat_test.cpp
@@ -30,6 +30,8 @@
 
 #include <android-base/properties.h>
 
+extern "C" void android_set_16kb_appcompat_mode(bool enable_app_compat);
+
 TEST(PageSize16KiBCompatTest, ElfAlignment4KiB_LoadElf) {
   if (getpagesize() != 0x4000) {
     GTEST_SKIP() << "This test is only applicable to 16kB page-size devices";
@@ -44,3 +46,17 @@
 
   if (app_compat_enabled) CallTestFunction(handle);
 }
+
+TEST(PageSize16KiBCompatTest, ElfAlignment4KiB_LoadElf_perAppOption) {
+  if (getpagesize() != 0x4000) {
+    GTEST_SKIP() << "This test is only applicable to 16kB page-size devices";
+  }
+
+  android_set_16kb_appcompat_mode(true);
+  std::string lib = GetTestLibRoot() + "/libtest_elf_max_page_size_4kib.so";
+  void* handle = nullptr;
+
+  OpenTestLibrary(lib, false /*should_fail*/, &handle);
+  CallTestFunction(handle);
+  android_set_16kb_appcompat_mode(false);
+}