Merge "librust_baremetal: Use cc_baremetal_defaults" into main
diff --git a/libc/Android.bp b/libc/Android.bp
index 8e1ddfc..ae6df52 100644
--- a/libc/Android.bp
+++ b/libc/Android.bp
@@ -418,20 +418,6 @@
         "upstream-freebsd/lib/libc/string/wmemmove.c",
         "upstream-freebsd/lib/libc/string/wmemset.c",
     ],
-    arch: {
-        x86: {
-            exclude_srcs: [
-                "upstream-freebsd/lib/libc/string/wcschr.c",
-                "upstream-freebsd/lib/libc/string/wcscmp.c",
-                "upstream-freebsd/lib/libc/string/wcslen.c",
-                "upstream-freebsd/lib/libc/string/wcsrchr.c",
-                "upstream-freebsd/lib/libc/string/wmemcmp.c",
-                "upstream-freebsd/lib/libc/string/wcscat.c",
-                "upstream-freebsd/lib/libc/string/wcscpy.c",
-                "upstream-freebsd/lib/libc/string/wmemcmp.c",
-            ],
-        },
-    },
 
     cflags: [
         "-Wno-sign-compare",
@@ -1193,10 +1179,6 @@
                 "arch-x86/string/sse2-strncpy-slm.S",
                 "arch-x86/string/sse2-strnlen-atom.S",
                 "arch-x86/string/sse2-strrchr-atom.S",
-                "arch-x86/string/sse2-wcschr-atom.S",
-                "arch-x86/string/sse2-wcsrchr-atom.S",
-                "arch-x86/string/sse2-wcslen-atom.S",
-                "arch-x86/string/sse2-wcscmp-atom.S",
 
                 "arch-x86/string/ssse3-memcmp-atom.S",
                 "arch-x86/string/ssse3-strcat-atom.S",
@@ -1205,12 +1187,8 @@
                 "arch-x86/string/ssse3-strlcpy-atom.S",
                 "arch-x86/string/ssse3-strncat-atom.S",
                 "arch-x86/string/ssse3-strncmp-atom.S",
-                "arch-x86/string/ssse3-wcscat-atom.S",
-                "arch-x86/string/ssse3-wcscpy-atom.S",
-                "arch-x86/string/ssse3-wmemcmp-atom.S",
 
                 "arch-x86/string/sse4-memcmp-slm.S",
-                "arch-x86/string/sse4-wmemcmp-slm.S",
 
                 "bionic/strchrnul.cpp",
             ],
@@ -1783,6 +1761,7 @@
     name: "libc_uapi_headers",
     visibility: [
         "//external/musl",
+        "//external/rust/crates/v4l2r/android",
     ],
     llndk: {
         llndk_headers: true,
@@ -2018,15 +1997,6 @@
 cc_defaults {
     name: "crt_so_defaults",
     defaults: ["crt_defaults"],
-
-    arch: {
-        x86: {
-            cflags: ["-fPIC"],
-        },
-        x86_64: {
-            cflags: ["-fPIC"],
-        },
-    },
     stl: "none",
 }
 
diff --git a/libc/NOTICE b/libc/NOTICE
index 1a84d3c..1fa036a 100644
--- a/libc/NOTICE
+++ b/libc/NOTICE
@@ -3862,36 +3862,6 @@
 
 -------------------------------------------------------------------
 
-Copyright (c) 2011 Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
--------------------------------------------------------------------
-
 Copyright (c) 2011 Martin Pieuchot <mpi@openbsd.org>
 
 Permission to use, copy, modify, and distribute this software for any
@@ -3937,36 +3907,6 @@
 
 -------------------------------------------------------------------
 
-Copyright (c) 2011, 2012, 2013 Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
--------------------------------------------------------------------
-
 Copyright (c) 2011, Intel Corporation
 All rights reserved.
 
diff --git a/libc/arch-common/bionic/crt_pad_segment.S b/libc/arch-common/bionic/crt_pad_segment.S
index 86c730d..2fbe0b9 100644
--- a/libc/arch-common/bionic/crt_pad_segment.S
+++ b/libc/arch-common/bionic/crt_pad_segment.S
@@ -26,6 +26,12 @@
  * SUCH DAMAGE.
  */
 
+#if defined(__aarch64__)
+#include <private/bionic_asm_arm64.h>
+
+__bionic_asm_custom_note_gnu_section()
+#endif
+
 #include <private/bionic_asm_note.h>
 
   .section ".note.android.pad_segment", "a", %note
diff --git a/libc/arch-x86/dynamic_function_dispatch.cpp b/libc/arch-x86/dynamic_function_dispatch.cpp
index 98d7ec2..240fcdf 100644
--- a/libc/arch-x86/dynamic_function_dispatch.cpp
+++ b/libc/arch-x86/dynamic_function_dispatch.cpp
@@ -38,14 +38,4 @@
 }
 MEMCMP_SHIM()
 
-typedef int wmemcmp_func_t(const wchar_t*, const wchar_t*, size_t);
-DEFINE_IFUNC_FOR(wmemcmp) {
-  __builtin_cpu_init();
-  if (__builtin_cpu_supports("sse4.1")) RETURN_FUNC(wmemcmp_func_t, wmemcmp_sse4);
-  RETURN_FUNC(wmemcmp_func_t, wmemcmp_atom);
-}
-DEFINE_STATIC_SHIM(int wmemcmp(const wchar_t* lhs, const wchar_t* rhs, size_t n) {
-  FORWARD(wmemcmp)(lhs, rhs, n);
-})
-
 }  // extern "C"
diff --git a/libc/arch-x86/string/sse2-wcschr-atom.S b/libc/arch-x86/string/sse2-wcschr-atom.S
deleted file mode 100644
index 729302b..0000000
--- a/libc/arch-x86/string/sse2-wcschr-atom.S
+++ /dev/null
@@ -1,267 +0,0 @@
-/*
-Copyright (c) 2011 Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef L
-# define L(label)	.L##label
-#endif
-
-#ifndef cfi_startproc
-# define cfi_startproc	.cfi_startproc
-#endif
-
-#ifndef cfi_endproc
-# define cfi_endproc	.cfi_endproc
-#endif
-
-#ifndef cfi_rel_offset
-# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
-#endif
-
-#ifndef cfi_restore
-# define cfi_restore(reg)	.cfi_restore reg
-#endif
-
-#ifndef cfi_adjust_cfa_offset
-# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
-#endif
-
-#ifndef ENTRY
-# define ENTRY(name)	\
-	.type name,  @function;	\
-	.globl name;	\
-	.p2align 4;	\
-name:	\
-	cfi_startproc
-#endif
-
-#ifndef END
-# define END(name)	\
-	cfi_endproc;	\
-	.size name,	.-name
-#endif
-
-#define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
-#define POP(REG)	popl REG; CFI_POP (REG)
-
-#define PARMS	4
-
-
-#define STR1  PARMS
-#define STR2  STR1+4
-
-	.text
-ENTRY (wcschr)
-
-	mov	STR1(%esp), %ecx
-	movd	STR2(%esp), %xmm1
-
-	mov	%ecx, %eax
-	punpckldq %xmm1, %xmm1
-	pxor	%xmm2, %xmm2
-	punpckldq %xmm1, %xmm1
-
-	and	$63, %eax
-	cmp	$48, %eax
-	ja	L(cross_cache)
-
-	movdqu	(%ecx), %xmm0
-	pcmpeqd	%xmm0, %xmm2
-	pcmpeqd	%xmm1, %xmm0
-	pmovmskb %xmm2, %edx
-	pmovmskb %xmm0, %eax
-	or	%eax, %edx
-	jnz	L(matches)
-	and	$-16, %ecx
-	jmp	L(loop)
-
-	.p2align 4
-L(cross_cache):
-	PUSH	(%edi)
-	mov	%ecx, %edi
-	mov	%eax, %ecx
-	and	$-16, %edi
-	and	$15, %ecx
-	movdqa	(%edi), %xmm0
-	pcmpeqd	%xmm0, %xmm2
-	pcmpeqd	%xmm1, %xmm0
-	pmovmskb %xmm2, %edx
-	pmovmskb %xmm0, %eax
-
-	sarl	%cl, %edx
-	sarl	%cl, %eax
-	test	%eax, %eax
-	jz	L(unaligned_no_match)
-
-	add	%edi, %ecx
-	POP	(%edi)
-
-	test	%edx, %edx
-	jz	L(match_case1)
-	test	%al, %al
-	jz	L(match_higth_case2)
-	test	$15, %al
-	jnz	L(match_case2_4)
-	test	$15, %dl
-	jnz	L(return_null)
-	lea	4(%ecx), %eax
-	ret
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(unaligned_no_match):
-	mov	%edi, %ecx
-	POP	(%edi)
-
-	test	%edx, %edx
-	jnz	L(return_null)
-
-	pxor	%xmm2, %xmm2
-
-/* Loop start on aligned string.  */
-	.p2align 4
-L(loop):
-	add	$16, %ecx
-	movdqa	(%ecx), %xmm0
-	pcmpeqd	%xmm0, %xmm2
-	pcmpeqd	%xmm1, %xmm0
-	pmovmskb %xmm2, %edx
-	pmovmskb %xmm0, %eax
-	or	%eax, %edx
-	jnz	L(matches)
-	add	$16, %ecx
-
-	movdqa	(%ecx), %xmm0
-	pcmpeqd	%xmm0, %xmm2
-	pcmpeqd	%xmm1, %xmm0
-	pmovmskb %xmm2, %edx
-	pmovmskb %xmm0, %eax
-	or	%eax, %edx
-	jnz	L(matches)
-	add	$16, %ecx
-
-	movdqa	(%ecx), %xmm0
-	pcmpeqd	%xmm0, %xmm2
-	pcmpeqd	%xmm1, %xmm0
-	pmovmskb %xmm2, %edx
-	pmovmskb %xmm0, %eax
-	or	%eax, %edx
-	jnz	L(matches)
-	add	$16, %ecx
-
-	movdqa	(%ecx), %xmm0
-	pcmpeqd	%xmm0, %xmm2
-	pcmpeqd	%xmm1, %xmm0
-	pmovmskb %xmm2, %edx
-	pmovmskb %xmm0, %eax
-	or	%eax, %edx
-	jz	L(loop)
-
-	.p2align 4
-L(matches):
-	pmovmskb %xmm2, %edx
-	test	%eax, %eax
-	jz	L(return_null)
-	test	%edx, %edx
-	jz	L(match_case1)
-
-	.p2align 4
-L(match_case2):
-	test	%al, %al
-	jz	L(match_higth_case2)
-	test	$15, %al
-	jnz	L(match_case2_4)
-	test	$15, %dl
-	jnz	L(return_null)
-	lea	4(%ecx), %eax
-	ret
-
-	.p2align 4
-L(match_case2_4):
-	mov	%ecx, %eax
-	ret
-
-	.p2align 4
-L(match_higth_case2):
-	test	%dl, %dl
-	jnz	L(return_null)
-	test	$15, %ah
-	jnz	L(match_case2_12)
-	test	$15, %dh
-	jnz	L(return_null)
-	lea	12(%ecx), %eax
-	ret
-
-	.p2align 4
-L(match_case2_12):
-	lea	8(%ecx), %eax
-	ret
-
-	.p2align 4
-L(match_case1):
-	test	%al, %al
-	jz	L(match_higth_case1)
-
-	test	$0x01, %al
-	jnz	L(exit0)
-	lea	4(%ecx), %eax
-	ret
-
-	.p2align 4
-L(match_higth_case1):
-	test	$0x01, %ah
-	jnz	L(exit3)
-	lea	12(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit0):
-	mov	%ecx, %eax
-	ret
-
-	.p2align 4
-L(exit3):
-	lea	8(%ecx), %eax
-	ret
-
-	.p2align 4
-L(return_null):
-	xor	%eax, %eax
-	ret
-
-END (wcschr)
diff --git a/libc/arch-x86/string/sse2-wcscmp-atom.S b/libc/arch-x86/string/sse2-wcscmp-atom.S
deleted file mode 100644
index 8867d28..0000000
--- a/libc/arch-x86/string/sse2-wcscmp-atom.S
+++ /dev/null
@@ -1,1062 +0,0 @@
-/*
-Copyright (c) 2011 Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef L
-# define L(label)	.L##label
-#endif
-
-#ifndef cfi_startproc
-# define cfi_startproc	.cfi_startproc
-#endif
-
-#ifndef cfi_endproc
-# define cfi_endproc	.cfi_endproc
-#endif
-
-#ifndef cfi_rel_offset
-# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
-#endif
-
-#ifndef cfi_restore
-# define cfi_restore(reg)	.cfi_restore reg
-#endif
-
-#ifndef cfi_adjust_cfa_offset
-# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
-#endif
-
-#ifndef ENTRY
-# define ENTRY(name)	\
-	.type name, @function;	\
-	.globl name;	\
-	.p2align 4;	\
-name:	\
-	cfi_startproc
-#endif
-
-#ifndef END
-# define END(name)	\
-	cfi_endproc;	\
-	.size name, .-name
-#endif
-
-#define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-#define PUSH(REG) pushl REG; CFI_PUSH (REG)
-#define POP(REG) popl REG; CFI_POP (REG)
-
-#define ENTRANCE PUSH(%esi); PUSH(%edi)
-#define RETURN  POP(%edi); POP(%esi); ret; CFI_PUSH(%esi); CFI_PUSH(%edi);
-#define PARMS  4
-#define STR1  PARMS
-#define STR2  STR1+4
-
-	.text
-ENTRY (wcscmp)
-/*
-	* This implementation uses SSE to compare up to 16 bytes at a time.
-*/
-	mov	STR1(%esp), %edx
-	mov	STR2(%esp), %eax
-
-	mov	(%eax), %ecx
-	cmp	%ecx, (%edx)
-	jne	L(neq)
-	test	%ecx, %ecx
-	jz	L(eq)
-
-	mov	4(%eax), %ecx
-	cmp	%ecx, 4(%edx)
-	jne	L(neq)
-	test	%ecx, %ecx
-	jz	L(eq)
-
-	mov	8(%eax), %ecx
-	cmp	%ecx, 8(%edx)
-	jne	L(neq)
-	test	%ecx, %ecx
-	jz	L(eq)
-
-	mov	12(%eax), %ecx
-	cmp	%ecx, 12(%edx)
-	jne	L(neq)
-	test	%ecx, %ecx
-	jz	L(eq)
-
-	ENTRANCE
-	add	$16, %eax
-	add	$16, %edx
-
-	mov	%eax, %esi
-	mov	%edx, %edi
-	pxor	%xmm0, %xmm0		/* clear %xmm0 for null char checks */
-	mov	%al, %ch
-	mov	%dl, %cl
-	and	$63, %eax		/* esi alignment in cache line */
-	and	$63, %edx		/* edi alignment in cache line */
-	and	$15, %cl
-	jz	L(continue_00)
-	cmp	$16, %edx
-	jb	L(continue_0)
-	cmp	$32, %edx
-	jb	L(continue_16)
-	cmp	$48, %edx
-	jb	L(continue_32)
-
-L(continue_48):
-	and	$15, %ch
-	jz	L(continue_48_00)
-	cmp	$16, %eax
-	jb	L(continue_0_48)
-	cmp	$32, %eax
-	jb	L(continue_16_48)
-	cmp	$48, %eax
-	jb	L(continue_32_48)
-
-	.p2align 4
-L(continue_48_48):
-	mov	(%esi), %ecx
-	cmp	%ecx, (%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	4(%esi), %ecx
-	cmp	%ecx, 4(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	8(%esi), %ecx
-	cmp	%ecx, 8(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	12(%esi), %ecx
-	cmp	%ecx, 12(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	movdqu	16(%edi), %xmm1
-	movdqu	16(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_16)
-
-	movdqu	32(%edi), %xmm1
-	movdqu	32(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_32)
-
-	movdqu	48(%edi), %xmm1
-	movdqu	48(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_48)
-
-	add	$64, %esi
-	add	$64, %edi
-	jmp	L(continue_48_48)
-
-L(continue_0):
-	and	$15, %ch
-	jz	L(continue_0_00)
-	cmp	$16, %eax
-	jb	L(continue_0_0)
-	cmp	$32, %eax
-	jb	L(continue_0_16)
-	cmp	$48, %eax
-	jb	L(continue_0_32)
-
-	.p2align 4
-L(continue_0_48):
-	mov	(%esi), %ecx
-	cmp	%ecx, (%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	4(%esi), %ecx
-	cmp	%ecx, 4(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	8(%esi), %ecx
-	cmp	%ecx, 8(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	12(%esi), %ecx
-	cmp	%ecx, 12(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	movdqu	16(%edi), %xmm1
-	movdqu	16(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_16)
-
-	movdqu	32(%edi), %xmm1
-	movdqu	32(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_32)
-
-	mov	48(%esi), %ecx
-	cmp	%ecx, 48(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	52(%esi), %ecx
-	cmp	%ecx, 52(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	56(%esi), %ecx
-	cmp	%ecx, 56(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	60(%esi), %ecx
-	cmp	%ecx, 60(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	add	$64, %esi
-	add	$64, %edi
-	jmp	L(continue_0_48)
-
-	.p2align 4
-L(continue_00):
-	and	$15, %ch
-	jz	L(continue_00_00)
-	cmp	$16, %eax
-	jb	L(continue_00_0)
-	cmp	$32, %eax
-	jb	L(continue_00_16)
-	cmp	$48, %eax
-	jb	L(continue_00_32)
-
-	.p2align 4
-L(continue_00_48):
-	pcmpeqd	(%edi), %xmm0
-	mov	(%edi), %eax
-	pmovmskb %xmm0, %ecx
-	test	%ecx, %ecx
-	jnz	L(less4_double_words1)
-
-	cmp	(%esi), %eax
-	jne	L(nequal)
-
-	mov	4(%edi), %eax
-	cmp	4(%esi), %eax
-	jne	L(nequal)
-
-	mov	8(%edi), %eax
-	cmp	8(%esi), %eax
-	jne	L(nequal)
-
-	mov	12(%edi), %eax
-	cmp	12(%esi), %eax
-	jne	L(nequal)
-
-	movdqu	16(%esi), %xmm2
-	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
-	pcmpeqd	16(%edi), %xmm2		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
-	pmovmskb %xmm2, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_16)
-
-	movdqu	32(%esi), %xmm2
-	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
-	pcmpeqd	32(%edi), %xmm2		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
-	pmovmskb %xmm2, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_32)
-
-	movdqu	48(%esi), %xmm2
-	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
-	pcmpeqd	48(%edi), %xmm2		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
-	pmovmskb %xmm2, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_48)
-
-	add	$64, %esi
-	add	$64, %edi
-	jmp	L(continue_00_48)
-
-	.p2align 4
-L(continue_32):
-	and	$15, %ch
-	jz	L(continue_32_00)
-	cmp	$16, %eax
-	jb	L(continue_0_32)
-	cmp	$32, %eax
-	jb	L(continue_16_32)
-	cmp	$48, %eax
-	jb	L(continue_32_32)
-
-	.p2align 4
-L(continue_32_48):
-	mov	(%esi), %ecx
-	cmp	%ecx, (%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	4(%esi), %ecx
-	cmp	%ecx, 4(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	8(%esi), %ecx
-	cmp	%ecx, 8(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	12(%esi), %ecx
-	cmp	%ecx, 12(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	16(%esi), %ecx
-	cmp	%ecx, 16(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	20(%esi), %ecx
-	cmp	%ecx, 20(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	24(%esi), %ecx
-	cmp	%ecx, 24(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	28(%esi), %ecx
-	cmp	%ecx, 28(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	movdqu	32(%edi), %xmm1
-	movdqu	32(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_32)
-
-	movdqu	48(%edi), %xmm1
-	movdqu	48(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_48)
-
-	add	$64, %esi
-	add	$64, %edi
-	jmp	L(continue_32_48)
-
-	.p2align 4
-L(continue_16):
-	and	$15, %ch
-	jz	L(continue_16_00)
-	cmp	$16, %eax
-	jb	L(continue_0_16)
-	cmp	$32, %eax
-	jb	L(continue_16_16)
-	cmp	$48, %eax
-	jb	L(continue_16_32)
-
-	.p2align 4
-L(continue_16_48):
-	mov	(%esi), %ecx
-	cmp	%ecx, (%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	4(%esi), %ecx
-	cmp	%ecx, 4(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	8(%esi), %ecx
-	cmp	%ecx, 8(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	12(%esi), %ecx
-	cmp	%ecx, 12(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	movdqu	16(%edi), %xmm1
-	movdqu	16(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_16)
-
-	mov	32(%esi), %ecx
-	cmp	%ecx, 32(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	36(%esi), %ecx
-	cmp	%ecx, 36(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	40(%esi), %ecx
-	cmp	%ecx, 40(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	44(%esi), %ecx
-	cmp	%ecx, 44(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	movdqu	48(%edi), %xmm1
-	movdqu	48(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_48)
-
-	add	$64, %esi
-	add	$64, %edi
-	jmp	L(continue_16_48)
-
-	.p2align 4
-L(continue_00_00):
-	movdqa	(%edi), %xmm1
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	(%esi), %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words)
-
-	movdqa	16(%edi), %xmm3
-	pcmpeqd	%xmm3, %xmm0		/* Any null double_word? */
-	pcmpeqd	16(%esi), %xmm3		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm3		/* packed sub of comparison results*/
-	pmovmskb %xmm3, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_16)
-
-	movdqa	32(%edi), %xmm5
-	pcmpeqd	%xmm5, %xmm0		/* Any null double_word? */
-	pcmpeqd	32(%esi), %xmm5		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm5		/* packed sub of comparison results*/
-	pmovmskb %xmm5, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_32)
-
-	movdqa	48(%edi), %xmm1
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	48(%esi), %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_48)
-
-	add	$64, %esi
-	add	$64, %edi
-	jmp	L(continue_00_00)
-
-	.p2align 4
-L(continue_00_32):
-	movdqu	(%esi), %xmm2
-	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
-	pcmpeqd	(%edi), %xmm2		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
-	pmovmskb %xmm2, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words)
-
-	add	$16, %esi
-	add	$16, %edi
-	jmp	L(continue_00_48)
-
-	.p2align 4
-L(continue_00_16):
-	movdqu	(%esi), %xmm2
-	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
-	pcmpeqd	(%edi), %xmm2		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
-	pmovmskb %xmm2, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words)
-
-	movdqu	16(%esi), %xmm2
-	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
-	pcmpeqd	16(%edi), %xmm2		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
-	pmovmskb %xmm2, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_16)
-
-	add	$32, %esi
-	add	$32, %edi
-	jmp	L(continue_00_48)
-
-	.p2align 4
-L(continue_00_0):
-	movdqu	(%esi), %xmm2
-	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
-	pcmpeqd	(%edi), %xmm2		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
-	pmovmskb %xmm2, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words)
-
-	movdqu	16(%esi), %xmm2
-	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
-	pcmpeqd	16(%edi), %xmm2		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
-	pmovmskb %xmm2, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_16)
-
-	movdqu	32(%esi), %xmm2
-	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
-	pcmpeqd	32(%edi), %xmm2		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
-	pmovmskb %xmm2, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_32)
-
-	add	$48, %esi
-	add	$48, %edi
-	jmp	L(continue_00_48)
-
-	.p2align 4
-L(continue_48_00):
-	pcmpeqd	(%esi), %xmm0
-	mov	(%edi), %eax
-	pmovmskb %xmm0, %ecx
-	test	%ecx, %ecx
-	jnz	L(less4_double_words1)
-
-	cmp	(%esi), %eax
-	jne	L(nequal)
-
-	mov	4(%edi), %eax
-	cmp	4(%esi), %eax
-	jne	L(nequal)
-
-	mov	8(%edi), %eax
-	cmp	8(%esi), %eax
-	jne	L(nequal)
-
-	mov	12(%edi), %eax
-	cmp	12(%esi), %eax
-	jne	L(nequal)
-
-	movdqu	16(%edi), %xmm1
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	16(%esi), %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_16)
-
-	movdqu	32(%edi), %xmm1
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	32(%esi), %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_32)
-
-	movdqu	48(%edi), %xmm1
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	48(%esi), %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_48)
-
-	add	$64, %esi
-	add	$64, %edi
-	jmp	L(continue_48_00)
-
-	.p2align 4
-L(continue_32_00):
-	movdqu	(%edi), %xmm1
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	(%esi), %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words)
-
-	add	$16, %esi
-	add	$16, %edi
-	jmp	L(continue_48_00)
-
-	.p2align 4
-L(continue_16_00):
-	movdqu	(%edi), %xmm1
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	(%esi), %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words)
-
-	movdqu	16(%edi), %xmm1
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	16(%esi), %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_16)
-
-	add	$32, %esi
-	add	$32, %edi
-	jmp	L(continue_48_00)
-
-	.p2align 4
-L(continue_0_00):
-	movdqu	(%edi), %xmm1
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	(%esi), %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words)
-
-	movdqu	16(%edi), %xmm1
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	16(%esi), %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_16)
-
-	movdqu	32(%edi), %xmm1
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	32(%esi), %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_32)
-
-	add	$48, %esi
-	add	$48, %edi
-	jmp	L(continue_48_00)
-
-	.p2align 4
-L(continue_32_32):
-	movdqu	(%edi), %xmm1
-	movdqu	(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words)
-
-	add	$16, %esi
-	add	$16, %edi
-	jmp	L(continue_48_48)
-
-	.p2align 4
-L(continue_16_16):
-	movdqu	(%edi), %xmm1
-	movdqu	(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words)
-
-	movdqu	16(%edi), %xmm3
-	movdqu	16(%esi), %xmm4
-	pcmpeqd	%xmm3, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm4, %xmm3		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm3		/* packed sub of comparison results*/
-	pmovmskb %xmm3, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_16)
-
-	add	$32, %esi
-	add	$32, %edi
-	jmp	L(continue_48_48)
-
-	.p2align 4
-L(continue_0_0):
-	movdqu	(%edi), %xmm1
-	movdqu	(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words)
-
-	movdqu	16(%edi), %xmm3
-	movdqu	16(%esi), %xmm4
-	pcmpeqd	%xmm3, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm4, %xmm3		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm3		/* packed sub of comparison results*/
-	pmovmskb %xmm3, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_16)
-
-	movdqu	32(%edi), %xmm1
-	movdqu	32(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_32)
-
-	add	$48, %esi
-	add	$48, %edi
-	jmp	L(continue_48_48)
-
-	.p2align 4
-L(continue_0_16):
-	movdqu	(%edi), %xmm1
-	movdqu	(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words)
-
-	movdqu	16(%edi), %xmm1
-	movdqu	16(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_16)
-
-	add	$32, %esi
-	add	$32, %edi
-	jmp	L(continue_32_48)
-
-	.p2align 4
-L(continue_0_32):
-	movdqu	(%edi), %xmm1
-	movdqu	(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words)
-
-	add	$16, %esi
-	add	$16, %edi
-	jmp	L(continue_16_48)
-
-	.p2align 4
-L(continue_16_32):
-	movdqu	(%edi), %xmm1
-	movdqu	(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words)
-
-	add	$16, %esi
-	add	$16, %edi
-	jmp	L(continue_32_48)
-
-	.p2align 4
-L(less4_double_words1):
-	cmp	(%esi), %eax
-	jne	L(nequal)
-	test	%eax, %eax
-	jz	L(equal)
-
-	mov	4(%esi), %ecx
-	cmp	%ecx, 4(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	8(%esi), %ecx
-	cmp	%ecx, 8(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	12(%esi), %ecx
-	cmp	%ecx, 12(%edi)
-	jne	L(nequal)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(less4_double_words):
-	xor	%eax, %eax
-	test	%dl, %dl
-	jz	L(next_two_double_words)
-	and	$15, %dl
-	jz	L(second_double_word)
-	mov	(%esi), %ecx
-	cmp	%ecx, (%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(second_double_word):
-	mov	4(%esi), %ecx
-	cmp	%ecx, 4(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(next_two_double_words):
-	and	$15, %dh
-	jz	L(fourth_double_word)
-	mov	8(%esi), %ecx
-	cmp	%ecx, 8(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(fourth_double_word):
-	mov	12(%esi), %ecx
-	cmp	%ecx, 12(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(less4_double_words_16):
-	xor	%eax, %eax
-	test	%dl, %dl
-	jz	L(next_two_double_words_16)
-	and	$15, %dl
-	jz	L(second_double_word_16)
-	mov	16(%esi), %ecx
-	cmp	%ecx, 16(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(second_double_word_16):
-	mov	20(%esi), %ecx
-	cmp	%ecx, 20(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(next_two_double_words_16):
-	and	$15, %dh
-	jz	L(fourth_double_word_16)
-	mov	24(%esi), %ecx
-	cmp	%ecx, 24(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(fourth_double_word_16):
-	mov	28(%esi), %ecx
-	cmp	%ecx, 28(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(less4_double_words_32):
-	xor	%eax, %eax
-	test	%dl, %dl
-	jz	L(next_two_double_words_32)
-	and	$15, %dl
-	jz	L(second_double_word_32)
-	mov	32(%esi), %ecx
-	cmp	%ecx, 32(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(second_double_word_32):
-	mov	36(%esi), %ecx
-	cmp	%ecx, 36(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(next_two_double_words_32):
-	and	$15, %dh
-	jz	L(fourth_double_word_32)
-	mov	40(%esi), %ecx
-	cmp	%ecx, 40(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(fourth_double_word_32):
-	mov	44(%esi), %ecx
-	cmp	%ecx, 44(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(less4_double_words_48):
-	xor	%eax, %eax
-	test	%dl, %dl
-	jz	L(next_two_double_words_48)
-	and	$15, %dl
-	jz	L(second_double_word_48)
-	mov	48(%esi), %ecx
-	cmp	%ecx, 48(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(second_double_word_48):
-	mov	52(%esi), %ecx
-	cmp	%ecx, 52(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(next_two_double_words_48):
-	and	$15, %dh
-	jz	L(fourth_double_word_48)
-	mov	56(%esi), %ecx
-	cmp	%ecx, 56(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(fourth_double_word_48):
-	mov	60(%esi), %ecx
-	cmp	%ecx, 60(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(nequal):
-	mov	$1, %eax
-	jg	L(return)
-	neg	%eax
-	RETURN
-
-	.p2align 4
-L(return):
-	RETURN
-
-	.p2align 4
-L(equal):
-	xorl	%eax, %eax
-	RETURN
-
-	CFI_POP (%edi)
-	CFI_POP (%esi)
-
-	.p2align 4
-L(neq):
-	mov	$1, %eax
-	jg	L(neq_bigger)
-	neg	%eax
-
-L(neq_bigger):
-	ret
-
-	.p2align 4
-L(eq):
-	xorl	%eax, %eax
-	ret
-
-END (wcscmp)
-
diff --git a/libc/arch-x86/string/sse2-wcslen-atom.S b/libc/arch-x86/string/sse2-wcslen-atom.S
deleted file mode 100644
index 2f10db4..0000000
--- a/libc/arch-x86/string/sse2-wcslen-atom.S
+++ /dev/null
@@ -1,306 +0,0 @@
-/*
-Copyright (c) 2011 Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef USE_AS_WCSCAT
-
-# ifndef L
-#  define L(label)	.L##label
-# endif
-
-# ifndef cfi_startproc
-#  define cfi_startproc	.cfi_startproc
-# endif
-
-# ifndef cfi_endproc
-#  define cfi_endproc	.cfi_endproc
-# endif
-
-# ifndef ENTRY
-#  define ENTRY(name)	\
-	.type name,  @function;	\
-	.globl name;	\
-	.p2align 4;	\
-name:	\
-	cfi_startproc
-# endif
-
-# ifndef END
-#  define END(name)	\
-	cfi_endproc;	\
-	.size name, .-name
-# endif
-
-# define PARMS	4
-# define STR	PARMS
-# define RETURN ret
-
-	.text
-ENTRY (wcslen)
-	mov	STR(%esp), %edx
-#endif
-	cmpl	$0, (%edx)
-	jz	L(exit_tail0)
-	cmpl	$0, 4(%edx)
-	jz	L(exit_tail1)
-	cmpl	$0, 8(%edx)
-	jz	L(exit_tail2)
-	cmpl	$0, 12(%edx)
-	jz	L(exit_tail3)
-	cmpl	$0, 16(%edx)
-	jz	L(exit_tail4)
-	cmpl	$0, 20(%edx)
-	jz	L(exit_tail5)
-	cmpl	$0, 24(%edx)
-	jz	L(exit_tail6)
-	cmpl	$0, 28(%edx)
-	jz	L(exit_tail7)
-
-	pxor	%xmm0, %xmm0
-
-	lea	32(%edx), %eax
-	lea	-16(%eax), %ecx
-	and	$-16, %eax
-
-	pcmpeqd	(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	pxor	%xmm1, %xmm1
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	pxor	%xmm2, %xmm2
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm2
-	pmovmskb %xmm2, %edx
-	pxor	%xmm3, %xmm3
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm2
-	pmovmskb %xmm2, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm2
-	pmovmskb %xmm2, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm2
-	pmovmskb %xmm2, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	and	$-0x40, %eax
-
-	.p2align 4
-L(aligned_64_loop):
-	movaps	(%eax), %xmm0
-	movaps	16(%eax), %xmm1
-	movaps	32(%eax), %xmm2
-	movaps	48(%eax), %xmm6
-
-	pminub	%xmm1, %xmm0
-	pminub	%xmm6, %xmm2
-	pminub	%xmm0, %xmm2
-	pcmpeqd	%xmm3, %xmm2
-	pmovmskb %xmm2, %edx
-	lea	64(%eax), %eax
-	test	%edx, %edx
-	jz	L(aligned_64_loop)
-
-	pcmpeqd	-64(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	48(%ecx), %ecx
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	%xmm1, %xmm3
-	pmovmskb %xmm3, %edx
-	lea	-16(%ecx), %ecx
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	-32(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	-16(%ecx), %ecx
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	%xmm6, %xmm3
-	pmovmskb %xmm3, %edx
-	lea	-16(%ecx), %ecx
-	test	%edx, %edx
-	jnz	L(exit)
-
-	jmp	L(aligned_64_loop)
-
-	.p2align 4
-L(exit):
-	sub	%ecx, %eax
-	shr	$2, %eax
-	test	%dl, %dl
-	jz	L(exit_high)
-
-	mov	%dl, %cl
-	and	$15, %cl
-	jz	L(exit_1)
-	RETURN
-
-	.p2align 4
-L(exit_high):
-	mov	%dh, %ch
-	and	$15, %ch
-	jz	L(exit_3)
-	add	$2, %eax
-	RETURN
-
-	.p2align 4
-L(exit_1):
-	add	$1, %eax
-	RETURN
-
-	.p2align 4
-L(exit_3):
-	add	$3, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail0):
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail1):
-	mov	$1, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail2):
-	mov	$2, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail3):
-	mov	$3, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail4):
-	mov	$4, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail5):
-	mov	$5, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail6):
-	mov	$6, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail7):
-	mov	$7, %eax
-#ifndef USE_AS_WCSCAT
-	RETURN
-
-END (wcslen)
-#endif
diff --git a/libc/arch-x86/string/sse2-wcsrchr-atom.S b/libc/arch-x86/string/sse2-wcsrchr-atom.S
deleted file mode 100644
index 1a55df2..0000000
--- a/libc/arch-x86/string/sse2-wcsrchr-atom.S
+++ /dev/null
@@ -1,402 +0,0 @@
-/*
-Copyright (c) 2011 Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef L
-# define L(label)	.L##label
-#endif
-
-#ifndef cfi_startproc
-# define cfi_startproc	.cfi_startproc
-#endif
-
-#ifndef cfi_endproc
-# define cfi_endproc	.cfi_endproc
-#endif
-
-#ifndef cfi_rel_offset
-# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
-#endif
-
-#ifndef cfi_restore
-# define cfi_restore(reg)	.cfi_restore reg
-#endif
-
-#ifndef cfi_adjust_cfa_offset
-# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
-#endif
-
-#ifndef ENTRY
-# define ENTRY(name)	\
-	.type name, @function;	\
-	.globl name;	\
-	.p2align 4;	\
-name:	\
-	cfi_startproc
-#endif
-
-#ifndef END
-# define END(name)	\
-	cfi_endproc;	\
-	.size name, .-name
-#endif
-
-#define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-#define PUSH(REG)	pushl REG;	CFI_PUSH (REG)
-#define POP(REG)	popl REG;	CFI_POP (REG)
-
-#define PARMS  8
-#define ENTRANCE PUSH(%edi);
-#define RETURN  POP(%edi);	ret;	CFI_PUSH(%edi);
-
-#define STR1  PARMS
-#define STR2  STR1+4
-
-	.text
-ENTRY (wcsrchr)
-
-	ENTRANCE
-	mov	STR1(%esp), %ecx
-	movd	STR2(%esp), %xmm1
-
-	mov	%ecx, %edi
-	punpckldq %xmm1, %xmm1
-	pxor	%xmm2, %xmm2
-	punpckldq %xmm1, %xmm1
-
-/* ECX has OFFSET. */
-	and	$63, %ecx
-	cmp	$48, %ecx
-	ja	L(crosscache)
-
-/* unaligned string. */
-	movdqu	(%edi), %xmm0
-	pcmpeqd	%xmm0, %xmm2
-	pcmpeqd	%xmm1, %xmm0
-/* Find where NULL is.  */
-	pmovmskb %xmm2, %ecx
-/* Check if there is a match.  */
-	pmovmskb %xmm0, %eax
-	add	$16, %edi
-
-	test	%eax, %eax
-	jnz	L(unaligned_match1)
-
-	test	%ecx, %ecx
-	jnz	L(return_null)
-
-	and	$-16, %edi
-
-	PUSH	(%esi)
-
-	xor	%edx, %edx
-	jmp	L(loop)
-
-	CFI_POP	(%esi)
-
-	.p2align 4
-L(unaligned_match1):
-	test	%ecx, %ecx
-	jnz	L(prolog_find_zero_1)
-
-	PUSH	(%esi)
-
-/* Save current match */
-	mov	%eax, %edx
-	mov	%edi, %esi
-	and	$-16, %edi
-	jmp	L(loop)
-
-	CFI_POP	(%esi)
-
-	.p2align 4
-L(crosscache):
-/* Hancle unaligned string.  */
-	and	$15, %ecx
-	and	$-16, %edi
-	pxor	%xmm3, %xmm3
-	movdqa	(%edi), %xmm0
-	pcmpeqd	%xmm0, %xmm3
-	pcmpeqd	%xmm1, %xmm0
-/* Find where NULL is.  */
-	pmovmskb %xmm3, %edx
-/* Check if there is a match.  */
-	pmovmskb %xmm0, %eax
-/* Remove the leading bytes.  */
-	shr	%cl, %edx
-	shr	%cl, %eax
-	add	$16, %edi
-
-	test	%eax, %eax
-	jnz	L(unaligned_match)
-
-	test	%edx, %edx
-	jnz	L(return_null)
-
-	PUSH	(%esi)
-
-	xor	%edx, %edx
-	jmp	L(loop)
-
-	CFI_POP	(%esi)
-
-	.p2align 4
-L(unaligned_match):
-	test	%edx, %edx
-	jnz	L(prolog_find_zero)
-
-	PUSH	(%esi)
-
-	mov	%eax, %edx
-	lea	(%edi, %ecx), %esi
-
-/* Loop start on aligned string.  */
-	.p2align 4
-L(loop):
-	movdqa	(%edi), %xmm0
-	pcmpeqd	%xmm0, %xmm2
-	add	$16, %edi
-	pcmpeqd	%xmm1, %xmm0
-	pmovmskb %xmm2, %ecx
-	pmovmskb %xmm0, %eax
-	or	%eax, %ecx
-	jnz	L(matches)
-
-	movdqa	(%edi), %xmm3
-	pcmpeqd	%xmm3, %xmm2
-	add	$16, %edi
-	pcmpeqd	%xmm1, %xmm3
-	pmovmskb %xmm2, %ecx
-	pmovmskb %xmm3, %eax
-	or	%eax, %ecx
-	jnz	L(matches)
-
-	movdqa	(%edi), %xmm4
-	pcmpeqd	%xmm4, %xmm2
-	add	$16, %edi
-	pcmpeqd	%xmm1, %xmm4
-	pmovmskb %xmm2, %ecx
-	pmovmskb %xmm4, %eax
-	or	%eax, %ecx
-	jnz	L(matches)
-
-	movdqa	(%edi), %xmm5
-	pcmpeqd	%xmm5, %xmm2
-	add	$16, %edi
-	pcmpeqd	%xmm1, %xmm5
-	pmovmskb %xmm2, %ecx
-	pmovmskb %xmm5, %eax
-	or	%eax, %ecx
-	jz	L(loop)
-
-	.p2align 4
-L(matches):
-	test	%eax, %eax
-	jnz	L(match)
-L(return_value):
-	test	%edx, %edx
-	jz	L(return_null_1)
-	mov	%edx, %eax
-	mov	%esi, %edi
-
-	POP	(%esi)
-
-	test	%ah, %ah
-	jnz	L(match_third_or_fourth_wchar)
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%edi), %eax
-	RETURN
-
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(return_null_1):
-	POP	(%esi)
-
-	xor	%eax, %eax
-	RETURN
-
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(match):
-	pmovmskb %xmm2, %ecx
-	test	%ecx, %ecx
-	jnz	L(find_zero)
-/* save match info */
-	mov	%eax, %edx
-	mov	%edi, %esi
-	jmp	L(loop)
-
-	.p2align 4
-L(find_zero):
-	test	%cl, %cl
-	jz	L(find_zero_in_third_or_fourth_wchar)
-	test	$15, %cl
-	jz	L(find_zero_in_second_wchar)
-	and	$1, %eax
-	jz	L(return_value)
-
-	POP	(%esi)
-
-	lea	-16(%edi), %eax
-	RETURN
-
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(find_zero_in_second_wchar):
-	and	$(1 << 5) - 1, %eax
-	jz	L(return_value)
-
-	POP	(%esi)
-
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%edi), %eax
-	RETURN
-
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(find_zero_in_third_or_fourth_wchar):
-	test	$15, %ch
-	jz	L(find_zero_in_fourth_wchar)
-	and	$(1 << 9) - 1, %eax
-	jz	L(return_value)
-
-	POP	(%esi)
-
-	test	%ah, %ah
-	jnz	L(match_third_wchar)
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%edi), %eax
-	RETURN
-
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(find_zero_in_fourth_wchar):
-
-	POP	(%esi)
-
-	test	%ah, %ah
-	jnz	L(match_third_or_fourth_wchar)
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%edi), %eax
-	RETURN
-
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(match_second_wchar):
-	lea	-12(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_third_or_fourth_wchar):
-	test	$15 << 4, %ah
-	jnz	L(match_fourth_wchar)
-	lea	-8(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_third_wchar):
-	lea	-8(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_fourth_wchar):
-	lea	-4(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(return_null):
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(prolog_find_zero):
-	add	%ecx, %edi
-	mov     %edx, %ecx
-L(prolog_find_zero_1):
-	test	%cl, %cl
-	jz	L(prolog_find_zero_in_third_or_fourth_wchar)
-	test	$15, %cl
-	jz	L(prolog_find_zero_in_second_wchar)
-	and	$1, %eax
-	jz	L(return_null)
-
-	lea	-16(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(prolog_find_zero_in_second_wchar):
-	and	$(1 << 5) - 1, %eax
-	jz	L(return_null)
-
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(prolog_find_zero_in_third_or_fourth_wchar):
-	test	$15, %ch
-	jz	L(prolog_find_zero_in_fourth_wchar)
-	and	$(1 << 9) - 1, %eax
-	jz	L(return_null)
-
-	test	%ah, %ah
-	jnz	L(match_third_wchar)
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(prolog_find_zero_in_fourth_wchar):
-	test	%ah, %ah
-	jnz	L(match_third_or_fourth_wchar)
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%edi), %eax
-	RETURN
-
-END (wcsrchr)
diff --git a/libc/arch-x86/string/sse4-wmemcmp-slm.S b/libc/arch-x86/string/sse4-wmemcmp-slm.S
deleted file mode 100644
index 2bf92f5..0000000
--- a/libc/arch-x86/string/sse4-wmemcmp-slm.S
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
-Copyright (c) 2014, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#define USE_AS_WMEMCMP
-#define MEMCMP wmemcmp_sse4
-#include "sse4-memcmp-slm.S"
diff --git a/libc/arch-x86/string/ssse3-wcscat-atom.S b/libc/arch-x86/string/ssse3-wcscat-atom.S
deleted file mode 100644
index 8a389a3..0000000
--- a/libc/arch-x86/string/ssse3-wcscat-atom.S
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
-Copyright (c) 2011 Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef L
-# define L(label)	.L##label
-#endif
-
-#ifndef cfi_startproc
-# define cfi_startproc                  .cfi_startproc
-#endif
-
-#ifndef cfi_endproc
-# define cfi_endproc                    .cfi_endproc
-#endif
-
-#ifndef cfi_rel_offset
-# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
-#endif
-
-#ifndef cfi_restore
-# define cfi_restore(reg)	.cfi_restore reg
-#endif
-
-#ifndef cfi_adjust_cfa_offset
-# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
-#endif
-
-#ifndef ENTRY
-# define ENTRY(name)	\
-	.type name,  @function;	\
-	.globl name;	\
-	.p2align 4;	\
-name:	\
-	cfi_startproc
-#endif
-
-#ifndef END
-# define END(name)	\
-	cfi_endproc;	\
-	.size name, .-name
-#endif
-
-#define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-#define PUSH(REG)	pushl REG;	CFI_PUSH (REG)
-#define POP(REG)	popl REG;	CFI_POP (REG)
-
-#define PARMS  4
-#define STR1  PARMS+4
-#define STR2  STR1+4
-
-#define USE_AS_WCSCAT
-
-.text
-ENTRY (wcscat)
-	PUSH    (%edi)
-	mov	STR1(%esp), %edi
-	mov	%edi, %edx
-
-#define RETURN  jmp L(WcscpyAtom)
-#include "sse2-wcslen-atom.S"
-
-L(WcscpyAtom):
-	shl	$2, %eax
-	mov	STR2(%esp), %ecx
-	lea	(%edi, %eax), %edx
-
-	cmpl	$0, (%ecx)
-	jz	L(Exit4)
-	cmpl	$0, 4(%ecx)
-	jz	L(Exit8)
-	cmpl	$0, 8(%ecx)
-	jz	L(Exit12)
-	cmpl	$0, 12(%ecx)
-	jz	L(Exit16)
-
-#undef RETURN
-#define RETURN  POP(%edi);	ret;	CFI_PUSH(%edi)
-#include "ssse3-wcscpy-atom.S"
-
-END (wcscat)
diff --git a/libc/arch-x86/string/ssse3-wcscpy-atom.S b/libc/arch-x86/string/ssse3-wcscpy-atom.S
deleted file mode 100644
index 27cb61e..0000000
--- a/libc/arch-x86/string/ssse3-wcscpy-atom.S
+++ /dev/null
@@ -1,652 +0,0 @@
-/*
-Copyright (c) 2011, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef USE_AS_WCSCAT
-
-# ifndef L
-#  define L(label)	.L##label
-# endif
-
-# ifndef cfi_startproc
-#  define cfi_startproc	.cfi_startproc
-# endif
-
-# ifndef cfi_endproc
-#  define cfi_endproc	.cfi_endproc
-# endif
-
-# ifndef cfi_rel_offset
-#  define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
-# endif
-
-# ifndef cfi_restore
-#  define cfi_restore(reg)	.cfi_restore reg
-# endif
-
-# ifndef cfi_adjust_cfa_offset
-#  define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
-# endif
-
-# ifndef ENTRY
-#  define ENTRY(name)	\
-	.type name, @function;	\
-	.globl name;	\
-	.p2align 4;	\
-name:	\
-	cfi_startproc
-# endif
-
-# ifndef END
-#  define END(name)	\
-	cfi_endproc;	\
-	.size name, .-name
-# endif
-
-# define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-# define PUSH(REG)	pushl REG; CFI_PUSH (REG)
-# define POP(REG)	popl REG; CFI_POP (REG)
-
-# define PARMS	4
-# define RETURN	POP (%edi); ret; CFI_PUSH (%edi)
-
-# define STR1	PARMS
-# define STR2	STR1+4
-# define LEN	STR2+4
-
-.text
-ENTRY (wcscpy)
-	mov	STR1(%esp), %edx
-	mov	STR2(%esp), %ecx
-
-	cmpl	$0, (%ecx)
-	jz	L(ExitTail4)
-	cmpl	$0, 4(%ecx)
-	jz	L(ExitTail8)
-	cmpl	$0, 8(%ecx)
-	jz	L(ExitTail12)
-	cmpl	$0, 12(%ecx)
-	jz	L(ExitTail16)
-
-	PUSH	(%edi)
-	mov	%edx, %edi
-#endif
-	PUSH	(%esi)
-	lea	16(%ecx), %esi
-
-	and	$-16, %esi
-
-	pxor	%xmm0, %xmm0
-	pcmpeqd	(%esi), %xmm0
-	movdqu	(%ecx), %xmm1
-	movdqu	%xmm1, (%edx)
-
-	pmovmskb %xmm0, %eax
-	sub	%ecx, %esi
-
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	mov	%edx, %eax
-	lea	16(%edx), %edx
-	and	$-16, %edx
-	sub	%edx, %eax
-
-	sub	%eax, %ecx
-	mov	%ecx, %eax
-	and	$0xf, %eax
-	mov	$0, %esi
-
-	jz	L(Align16Both)
-	cmp	$4, %eax
-	je	L(Shl4)
-	cmp	$8, %eax
-	je	L(Shl8)
-	jmp	L(Shl12)
-
-L(Align16Both):
-	movaps	(%ecx), %xmm1
-	movaps	16(%ecx), %xmm2
-	movaps	%xmm1, (%edx)
-	pcmpeqd	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%ecx, %esi), %xmm3
-	movaps	%xmm2, (%edx, %esi)
-	pcmpeqd	%xmm3, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%ecx, %esi), %xmm4
-	movaps	%xmm3, (%edx, %esi)
-	pcmpeqd	%xmm4, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%ecx, %esi), %xmm1
-	movaps	%xmm4, (%edx, %esi)
-	pcmpeqd	%xmm1, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%ecx, %esi), %xmm2
-	movaps	%xmm1, (%edx, %esi)
-	pcmpeqd	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%ecx, %esi), %xmm3
-	movaps	%xmm2, (%edx, %esi)
-	pcmpeqd	%xmm3, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	%xmm3, (%edx, %esi)
-	mov	%ecx, %eax
-	lea	16(%ecx, %esi), %ecx
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	sub	%eax, %edx
-
-	mov	$-0x40, %esi
-
-L(Aligned64Loop):
-	movaps	(%ecx), %xmm2
-	movaps	32(%ecx), %xmm3
-	movaps	%xmm2, %xmm4
-	movaps	16(%ecx), %xmm5
-	movaps	%xmm3, %xmm6
-	movaps	48(%ecx), %xmm7
-	pminub	%xmm5, %xmm2
-	pminub	%xmm7, %xmm3
-	pminub	%xmm2, %xmm3
-	lea	64(%edx), %edx
-	pcmpeqd	%xmm0, %xmm3
-	lea	64(%ecx), %ecx
-	pmovmskb %xmm3, %eax
-
-	test	%eax, %eax
-	jnz	L(Aligned64Leave)
-	movaps	%xmm4, -64(%edx)
-	movaps	%xmm5, -48(%edx)
-	movaps	%xmm6, -32(%edx)
-	movaps	%xmm7, -16(%edx)
-	jmp	L(Aligned64Loop)
-
-L(Aligned64Leave):
-	pcmpeqd	%xmm4, %xmm0
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	pcmpeqd	%xmm5, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm4, -64(%edx)
-	lea	16(%esi), %esi
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	pcmpeqd	%xmm6, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm5, -48(%edx)
-	lea	16(%esi), %esi
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	%xmm6, -32(%edx)
-	pcmpeqd	%xmm7, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	mov	$-0x40, %esi
-	movaps	%xmm7, -16(%edx)
-	jmp	L(Aligned64Loop)
-
-	.p2align 4
-L(Shl4):
-	movaps	-4(%ecx), %xmm1
-	movaps	12(%ecx), %xmm2
-L(Shl4Start):
-	pcmpeqd	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-
-	test	%eax, %eax
-	jnz	L(Shl4LoopExit)
-
-	palignr	$4, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	28(%ecx), %xmm2
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm1
-
-	test	%eax, %eax
-	jnz	L(Shl4LoopExit)
-
-	palignr	$4, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	28(%ecx), %xmm2
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-
-	test	%eax, %eax
-	jnz	L(Shl4LoopExit)
-
-	palignr	$4, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	28(%ecx), %xmm2
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-
-	test	%eax, %eax
-	jnz	L(Shl4LoopExit)
-
-	palignr	$4, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	28(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-12(%ecx), %ecx
-	sub	%eax, %edx
-
-	movaps	-4(%ecx), %xmm1
-
-L(Shl4LoopStart):
-	movaps	12(%ecx), %xmm2
-	movaps	28(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	44(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	60(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqd	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$4, %xmm4, %xmm5
-	palignr	$4, %xmm3, %xmm4
-	test	%eax, %eax
-	jnz	L(Shl4Start)
-
-	palignr	$4, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$4, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl4LoopStart)
-
-L(Shl4LoopExit):
-	movlpd	(%ecx), %xmm0
-	movl	8(%ecx), %esi
-	movlpd	%xmm0, (%edx)
-	movl	%esi, 8(%edx)
-	POP	(%esi)
-	add	$12, %edx
-	add	$12, %ecx
-	test	%al, %al
-	jz	L(ExitHigh)
-	test	$0x01, %al
-	jnz	L(Exit4)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	%edi, %eax
-	RETURN
-
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(Shl8):
-	movaps	-8(%ecx), %xmm1
-	movaps	8(%ecx), %xmm2
-L(Shl8Start):
-	pcmpeqd	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-
-	test	%eax, %eax
-	jnz	L(Shl8LoopExit)
-
-	palignr	$8, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	24(%ecx), %xmm2
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm1
-
-	test	%eax, %eax
-	jnz	L(Shl8LoopExit)
-
-	palignr	$8, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	24(%ecx), %xmm2
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-
-	test	%eax, %eax
-	jnz	L(Shl8LoopExit)
-
-	palignr	$8, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	24(%ecx), %xmm2
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-
-	test	%eax, %eax
-	jnz	L(Shl8LoopExit)
-
-	palignr	$8, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	24(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-8(%ecx), %ecx
-	sub	%eax, %edx
-
-	movaps	-8(%ecx), %xmm1
-
-L(Shl8LoopStart):
-	movaps	8(%ecx), %xmm2
-	movaps	24(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	40(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	56(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqd	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$8, %xmm4, %xmm5
-	palignr	$8, %xmm3, %xmm4
-	test	%eax, %eax
-	jnz	L(Shl8Start)
-
-	palignr	$8, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$8, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl8LoopStart)
-
-L(Shl8LoopExit):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	POP	(%esi)
-	add	$8, %edx
-	add	$8, %ecx
-	test	%al, %al
-	jz	L(ExitHigh)
-	test	$0x01, %al
-	jnz	L(Exit4)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	%edi, %eax
-	RETURN
-
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(Shl12):
-	movaps	-12(%ecx), %xmm1
-	movaps	4(%ecx), %xmm2
-L(Shl12Start):
-	pcmpeqd	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-
-	test	%eax, %eax
-	jnz	L(Shl12LoopExit)
-
-	palignr	$12, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	20(%ecx), %xmm2
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm1
-
-	test	%eax, %eax
-	jnz	L(Shl12LoopExit)
-
-	palignr	$12, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	20(%ecx), %xmm2
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-
-	test	%eax, %eax
-	jnz	L(Shl12LoopExit)
-
-	palignr	$12, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	20(%ecx), %xmm2
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-
-	test	%eax, %eax
-	jnz	L(Shl12LoopExit)
-
-	palignr	$12, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	20(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-4(%ecx), %ecx
-	sub	%eax, %edx
-
-	movaps	-12(%ecx), %xmm1
-
-L(Shl12LoopStart):
-	movaps	4(%ecx), %xmm2
-	movaps	20(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	36(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	52(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqd	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$12, %xmm4, %xmm5
-	palignr	$12, %xmm3, %xmm4
-	test	%eax, %eax
-	jnz	L(Shl12Start)
-
-	palignr	$12, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$12, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl12LoopStart)
-
-L(Shl12LoopExit):
-	movl	(%ecx), %esi
-	movl	%esi, (%edx)
-	mov	$4, %esi
-
-	.p2align 4
-L(CopyFrom1To16Bytes):
-	add	%esi, %edx
-	add	%esi, %ecx
-
-	POP	(%esi)
-	test	%al, %al
-	jz	L(ExitHigh)
-	test	$0x01, %al
-	jnz	L(Exit4)
-L(Exit8):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	%edi, %eax
-	RETURN
-
-	.p2align 4
-L(ExitHigh):
-	test	$0x01, %ah
-	jnz	L(Exit12)
-L(Exit16):
-	movdqu	(%ecx), %xmm0
-	movdqu	%xmm0, (%edx)
-	movl	%edi, %eax
-	RETURN
-
-	.p2align 4
-L(Exit4):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	%edi, %eax
-	RETURN
-
-	.p2align 4
-L(Exit12):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	8(%ecx), %eax
-	movl	%eax, 8(%edx)
-	movl	%edi, %eax
-	RETURN
-
-CFI_POP	(%edi)
-
-	.p2align 4
-L(ExitTail4):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	%edx, %eax
-	ret
-
-	.p2align 4
-L(ExitTail8):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	%edx, %eax
-	ret
-
-	.p2align 4
-L(ExitTail12):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	8(%ecx), %eax
-	movl	%eax, 8(%edx)
-	movl	%edx, %eax
-	ret
-
-	.p2align 4
-L(ExitTail16):
-	movdqu	(%ecx), %xmm0
-	movdqu	%xmm0, (%edx)
-	movl	%edx, %eax
-	ret
-
-#ifndef USE_AS_WCSCAT
-END (wcscpy)
-#endif
diff --git a/libc/arch-x86/string/ssse3-wmemcmp-atom.S b/libc/arch-x86/string/ssse3-wmemcmp-atom.S
deleted file mode 100644
index a81b78b..0000000
--- a/libc/arch-x86/string/ssse3-wmemcmp-atom.S
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
-Copyright (c) 2011, 2012, 2013 Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#define MEMCMP  wmemcmp_atom
-
-#define USE_WCHAR
-#define USE_AS_WMEMCMP 1
-#include "ssse3-memcmp-atom.S"
diff --git a/libc/include/bits/glibc-syscalls.h b/libc/include/bits/glibc-syscalls.h
index 879b110..8c5a91d 100644
--- a/libc/include/bits/glibc-syscalls.h
+++ b/libc/include/bits/glibc-syscalls.h
@@ -36,9 +36,6 @@
 #if defined(__NR_arch_prctl)
   #define SYS_arch_prctl __NR_arch_prctl
 #endif
-#if defined(__NR_arch_specific_syscall)
-  #define SYS_arch_specific_syscall __NR_arch_specific_syscall
-#endif
 #if defined(__NR_arm_fadvise64_64)
   #define SYS_arm_fadvise64_64 __NR_arm_fadvise64_64
 #endif
@@ -1272,9 +1269,6 @@
 #if defined(__NR_syscall)
   #define SYS_syscall __NR_syscall
 #endif
-#if defined(__NR_syscalls)
-  #define SYS_syscalls __NR_syscalls
-#endif
 #if defined(__NR_sysfs)
   #define SYS_sysfs __NR_sysfs
 #endif
diff --git a/libc/kernel/tools/update_all.py b/libc/kernel/tools/update_all.py
index 9e5ed42..331a957 100755
--- a/libc/kernel/tools/update_all.py
+++ b/libc/kernel/tools/update_all.py
@@ -88,16 +88,8 @@
     # Collect the set of all syscalls for all architectures.
     syscalls = set()
     pattern = re.compile(r'^\s*#\s*define\s*__NR_([a-z_]\S+)')
-    for unistd_h in ['kernel/uapi/asm-generic/unistd.h',
-                     'kernel/uapi/asm-arm/asm/unistd.h',
-                     'kernel/uapi/asm-arm/asm/unistd-eabi.h',
-                     'kernel/uapi/asm-arm/asm/unistd-oabi.h',
-                     'kernel/uapi/asm-riscv/asm/unistd_32.h',
-                     'kernel/uapi/asm-riscv/asm/unistd_64.h',
-                     'kernel/uapi/asm-x86/asm/unistd_32.h',
-                     'kernel/uapi/asm-x86/asm/unistd_64.h',
-                     'kernel/uapi/asm-x86/asm/unistd_x32.h']:
-        for line in open(os.path.join(libc_root, unistd_h)):
+    for unistd_h in glob.glob('%s/kernel/uapi/asm-*/asm/unistd*.h' % libc_root):
+        for line in open(unistd_h):
             m = re.search(pattern, line)
             if m:
                 nr_name = m.group(1)
diff --git a/linker/linker_main.cpp b/linker/linker_main.cpp
index 6ccd75b..74cd517 100644
--- a/linker/linker_main.cpp
+++ b/linker/linker_main.cpp
@@ -722,6 +722,8 @@
   tmp_linker_so.set_linker_flag();
 
   if (!tmp_linker_so.prelink_image()) __linker_cannot_link(args.argv[0]);
+  // There is special logic in soinfo::relocate to avoid duplicating the
+  // relocations we did in relocate_linker().
   if (!tmp_linker_so.link_image(SymbolLookupList(&tmp_linker_so), &tmp_linker_so, nullptr, nullptr)) __linker_cannot_link(args.argv[0]);
 
   return __linker_init_post_relocation(args, tmp_linker_so);
diff --git a/tests/prebuilt-elf-files/arm64/libtest_invalid-local-tls.so b/tests/prebuilt-elf-files/arm64/libtest_invalid-local-tls.so
index 20c5765..c902bbe 100755
--- a/tests/prebuilt-elf-files/arm64/libtest_invalid-local-tls.so
+++ b/tests/prebuilt-elf-files/arm64/libtest_invalid-local-tls.so
Binary files differ
diff --git a/tests/prebuilt-elf-files/arm64/libtest_invalid-unaligned_shdr_offset.so b/tests/prebuilt-elf-files/arm64/libtest_invalid-unaligned_shdr_offset.so
index 6e5a6e3..fb86bca 100755
--- a/tests/prebuilt-elf-files/arm64/libtest_invalid-unaligned_shdr_offset.so
+++ b/tests/prebuilt-elf-files/arm64/libtest_invalid-unaligned_shdr_offset.so
Binary files differ
diff --git a/tests/prebuilt-elf-files/arm64/libtest_invalid-zero_shdr_table_content.so b/tests/prebuilt-elf-files/arm64/libtest_invalid-zero_shdr_table_content.so
index 14b80b5..0416db2 100755
--- a/tests/prebuilt-elf-files/arm64/libtest_invalid-zero_shdr_table_content.so
+++ b/tests/prebuilt-elf-files/arm64/libtest_invalid-zero_shdr_table_content.so
Binary files differ
diff --git a/tests/prebuilt-elf-files/gen-libtest_invalid-local-tls.sh b/tests/prebuilt-elf-files/gen-libtest_invalid-local-tls.sh
index 0f3e736..98a2b00 100755
--- a/tests/prebuilt-elf-files/gen-libtest_invalid-local-tls.sh
+++ b/tests/prebuilt-elf-files/gen-libtest_invalid-local-tls.sh
@@ -19,12 +19,18 @@
 build() {
   arch=$1
   target=$2
+
+  if [[ "$arch" == "arm64" || "$arch" == "x86_64" ]]; then
+    alignment="-Wl,-z,max-page-size=16384"
+  fi
+
   $NDK21E/toolchains/llvm/prebuilt/linux-x86_64/bin/clang -O2 --target=$target \
       -fpic -shared -o $arch/libtest_invalid-local-tls.so -fno-emulated-tls \
-      -fuse-ld=gold test.c test2.c
+      $alignment -fuse-ld=gold test.c test2.c
 }
 
 build arm armv7a-linux-androideabi29
 build arm64 aarch64-linux-android29
 build x86 i686-linux-android29
 build x86_64 x86_64-linux-android29
+
diff --git a/tests/prebuilt-elf-files/x86_64/libtest_invalid-local-tls.so b/tests/prebuilt-elf-files/x86_64/libtest_invalid-local-tls.so
index 5b689ba..31d2b37 100755
--- a/tests/prebuilt-elf-files/x86_64/libtest_invalid-local-tls.so
+++ b/tests/prebuilt-elf-files/x86_64/libtest_invalid-local-tls.so
Binary files differ
diff --git a/tests/prebuilt-elf-files/x86_64/libtest_invalid-unaligned_shdr_offset.so b/tests/prebuilt-elf-files/x86_64/libtest_invalid-unaligned_shdr_offset.so
index 87631af..f9c310f 100755
--- a/tests/prebuilt-elf-files/x86_64/libtest_invalid-unaligned_shdr_offset.so
+++ b/tests/prebuilt-elf-files/x86_64/libtest_invalid-unaligned_shdr_offset.so
Binary files differ
diff --git a/tests/prebuilt-elf-files/x86_64/libtest_invalid-zero_shdr_table_content.so b/tests/prebuilt-elf-files/x86_64/libtest_invalid-zero_shdr_table_content.so
index 27d1138..3d1f5d3 100755
--- a/tests/prebuilt-elf-files/x86_64/libtest_invalid-zero_shdr_table_content.so
+++ b/tests/prebuilt-elf-files/x86_64/libtest_invalid-zero_shdr_table_content.so
Binary files differ
diff --git a/tests/string_test.cpp b/tests/string_test.cpp
index 6e1fcfc..289a483 100644
--- a/tests/string_test.cpp
+++ b/tests/string_test.cpp
@@ -748,7 +748,7 @@
       expected_end = copy_len;
     }
 
-    ASSERT_EQ(state.ptr2 + expected_end, stpncpy(state.ptr2, state.ptr1, copy_len));
+    ASSERT_EQ(state.ptr2, strncpy(state.ptr2, state.ptr1, copy_len));
 
     // Verify ptr1 was not modified.
     ASSERT_EQ(0, memcmp(state.ptr1, state.ptr, state.MAX_LEN));