Merge "Allow v4l2r access to libc_uapi_headers" into main
diff --git a/libc/Android.bp b/libc/Android.bp
index 1bf7ac3..ce60cef 100644
--- a/libc/Android.bp
+++ b/libc/Android.bp
@@ -418,20 +418,6 @@
         "upstream-freebsd/lib/libc/string/wmemmove.c",
         "upstream-freebsd/lib/libc/string/wmemset.c",
     ],
-    arch: {
-        x86: {
-            exclude_srcs: [
-                "upstream-freebsd/lib/libc/string/wcschr.c",
-                "upstream-freebsd/lib/libc/string/wcscmp.c",
-                "upstream-freebsd/lib/libc/string/wcslen.c",
-                "upstream-freebsd/lib/libc/string/wcsrchr.c",
-                "upstream-freebsd/lib/libc/string/wmemcmp.c",
-                "upstream-freebsd/lib/libc/string/wcscat.c",
-                "upstream-freebsd/lib/libc/string/wcscpy.c",
-                "upstream-freebsd/lib/libc/string/wmemcmp.c",
-            ],
-        },
-    },
 
     cflags: [
         "-Wno-sign-compare",
@@ -1181,12 +1167,9 @@
                 "arch-x86/bionic/vfork.S",
                 "arch-x86/bionic/__x86.get_pc_thunk.S",
 
-                "arch-x86/generic/string/memcmp.S",
-
                 "arch-x86/string/sse2-memchr-atom.S",
                 "arch-x86/string/sse2-memmove-slm.S",
                 "arch-x86/string/sse2-memrchr-atom.S",
-                "arch-x86/string/sse2-memset-atom.S",
                 "arch-x86/string/sse2-memset-slm.S",
                 "arch-x86/string/sse2-stpcpy-slm.S",
                 "arch-x86/string/sse2-stpncpy-slm.S",
@@ -1196,28 +1179,16 @@
                 "arch-x86/string/sse2-strncpy-slm.S",
                 "arch-x86/string/sse2-strnlen-atom.S",
                 "arch-x86/string/sse2-strrchr-atom.S",
-                "arch-x86/string/sse2-wcschr-atom.S",
-                "arch-x86/string/sse2-wcsrchr-atom.S",
-                "arch-x86/string/sse2-wcslen-atom.S",
-                "arch-x86/string/sse2-wcscmp-atom.S",
-                "arch-x86/string/sse2-strlen-atom.S",
 
                 "arch-x86/string/ssse3-memcmp-atom.S",
-                "arch-x86/string/ssse3-memmove-atom.S",
                 "arch-x86/string/ssse3-strcat-atom.S",
                 "arch-x86/string/ssse3-strcmp-atom.S",
-                "arch-x86/string/ssse3-strcpy-atom.S",
                 "arch-x86/string/ssse3-strlcat-atom.S",
                 "arch-x86/string/ssse3-strlcpy-atom.S",
                 "arch-x86/string/ssse3-strncat-atom.S",
                 "arch-x86/string/ssse3-strncmp-atom.S",
-                "arch-x86/string/ssse3-strncpy-atom.S",
-                "arch-x86/string/ssse3-wcscat-atom.S",
-                "arch-x86/string/ssse3-wcscpy-atom.S",
-                "arch-x86/string/ssse3-wmemcmp-atom.S",
 
                 "arch-x86/string/sse4-memcmp-slm.S",
-                "arch-x86/string/sse4-wmemcmp-slm.S",
 
                 "bionic/strchrnul.cpp",
             ],
@@ -2026,15 +1997,6 @@
 cc_defaults {
     name: "crt_so_defaults",
     defaults: ["crt_defaults"],
-
-    arch: {
-        x86: {
-            cflags: ["-fPIC"],
-        },
-        x86_64: {
-            cflags: ["-fPIC"],
-        },
-    },
     stl: "none",
 }
 
diff --git a/libc/NOTICE b/libc/NOTICE
index 1a84d3c..1fa036a 100644
--- a/libc/NOTICE
+++ b/libc/NOTICE
@@ -3862,36 +3862,6 @@
 
 -------------------------------------------------------------------
 
-Copyright (c) 2011 Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
--------------------------------------------------------------------
-
 Copyright (c) 2011 Martin Pieuchot <mpi@openbsd.org>
 
 Permission to use, copy, modify, and distribute this software for any
@@ -3937,36 +3907,6 @@
 
 -------------------------------------------------------------------
 
-Copyright (c) 2011, 2012, 2013 Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
--------------------------------------------------------------------
-
 Copyright (c) 2011, Intel Corporation
 All rights reserved.
 
diff --git a/libc/arch-x86/dynamic_function_dispatch.cpp b/libc/arch-x86/dynamic_function_dispatch.cpp
index e6cc5fb..240fcdf 100644
--- a/libc/arch-x86/dynamic_function_dispatch.cpp
+++ b/libc/arch-x86/dynamic_function_dispatch.cpp
@@ -33,65 +33,9 @@
 
 DEFINE_IFUNC_FOR(memcmp) {
   __builtin_cpu_init();
-  if (__builtin_cpu_is("atom")) RETURN_FUNC(memcmp_func_t, memcmp_atom);
   if (__builtin_cpu_supports("sse4.1")) RETURN_FUNC(memcmp_func_t, memcmp_sse4);
-  RETURN_FUNC(memcmp_func_t, memcmp_generic);
+  RETURN_FUNC(memcmp_func_t, memcmp_atom);
 }
 MEMCMP_SHIM()
 
-DEFINE_IFUNC_FOR(memset) {
-  __builtin_cpu_init();
-  if (__builtin_cpu_is("atom")) RETURN_FUNC(memset_func_t, memset_atom);
-  RETURN_FUNC(memset_func_t, memset_generic);
-}
-MEMSET_SHIM()
-
-DEFINE_IFUNC_FOR(__memset_chk) {
-  __builtin_cpu_init();
-  if (__builtin_cpu_is("atom")) RETURN_FUNC(__memset_chk_func_t, __memset_chk_atom);
-  RETURN_FUNC(__memset_chk_func_t, __memset_chk_generic);
-}
-__MEMSET_CHK_SHIM()
-
-DEFINE_IFUNC_FOR(memmove) {
-  __builtin_cpu_init();
-  if (__builtin_cpu_is("atom")) RETURN_FUNC(memmove_func_t, memmove_atom);
-  RETURN_FUNC(memmove_func_t, memmove_generic);
-}
-MEMMOVE_SHIM()
-
-DEFINE_IFUNC_FOR(memcpy) { return memmove_resolver(); }
-MEMCPY_SHIM()
-
-DEFINE_IFUNC_FOR(strcpy) {
-  __builtin_cpu_init();
-  if (__builtin_cpu_is("atom")) RETURN_FUNC(strcpy_func_t, strcpy_atom);
-  RETURN_FUNC(strcpy_func_t, strcpy_generic);
-}
-STRCPY_SHIM()
-
-DEFINE_IFUNC_FOR(strncpy) {
-  __builtin_cpu_init();
-  if (__builtin_cpu_is("atom")) RETURN_FUNC(strncpy_func_t, strncpy_atom);
-  RETURN_FUNC(strncpy_func_t, strncpy_generic);
-}
-STRNCPY_SHIM()
-
-DEFINE_IFUNC_FOR(strlen) {
-  __builtin_cpu_init();
-  if (__builtin_cpu_is("atom")) RETURN_FUNC(strlen_func_t, strlen_atom);
-  RETURN_FUNC(strlen_func_t, strlen_generic);
-}
-STRLEN_SHIM()
-
-typedef int wmemcmp_func_t(const wchar_t*, const wchar_t*, size_t);
-DEFINE_IFUNC_FOR(wmemcmp) {
-  __builtin_cpu_init();
-  if (__builtin_cpu_supports("sse4.1")) RETURN_FUNC(wmemcmp_func_t, wmemcmp_sse4);
-  RETURN_FUNC(wmemcmp_func_t, wmemcmp_atom);
-}
-DEFINE_STATIC_SHIM(int wmemcmp(const wchar_t* lhs, const wchar_t* rhs, size_t n) {
-  FORWARD(wmemcmp)(lhs, rhs, n);
-})
-
 }  // extern "C"
diff --git a/libc/arch-x86/generic/string/memcmp.S b/libc/arch-x86/generic/string/memcmp.S
deleted file mode 100644
index 1d327c7..0000000
--- a/libc/arch-x86/generic/string/memcmp.S
+++ /dev/null
@@ -1,44 +0,0 @@
-/*	$OpenBSD: memcmp.S,v 1.4 2005/08/07 11:30:38 espie Exp $ */
-/*
- * Written by J.T. Conklin <jtc@netbsd.org>.
- * Public domain.
- */
-
-#include <private/bionic_asm.h>
-
-ENTRY(memcmp_generic)
-	pushl	%edi
-	pushl	%esi
-	movl	12(%esp),%edi
-	movl	16(%esp),%esi
-	cld				/* set compare direction forward */
-
-	movl	20(%esp),%ecx		/* compare by words */
-	shrl	$2,%ecx
-	repe
-	cmpsl
-	jne	L5			/* do we match so far? */
-
-	movl	20(%esp),%ecx		/* compare remainder by bytes */
-	andl	$3,%ecx
-	repe
-	cmpsb
-	jne	L6			/* do we match? */
-
-	xorl	%eax,%eax		/* we match, return zero	*/
-	popl	%esi
-	popl	%edi
-	ret
-
-L5:	movl	$4,%ecx			/* We know that one of the next	*/
-	subl	%ecx,%edi		/* four pairs of bytes do not	*/
-	subl	%ecx,%esi		/* match.			*/
-	repe
-	cmpsb
-L6:	movzbl  -1(%edi),%eax		/* Perform unsigned comparison	*/
-	movzbl	-1(%esi),%edx
-	subl	%edx,%eax
-	popl	%esi
-	popl	%edi
-	ret
-END(memcmp_generic)
diff --git a/libc/arch-x86/string/sse2-memmove-slm.S b/libc/arch-x86/string/sse2-memmove-slm.S
index 7f42374..2ed4e7b 100644
--- a/libc/arch-x86/string/sse2-memmove-slm.S
+++ b/libc/arch-x86/string/sse2-memmove-slm.S
@@ -31,7 +31,7 @@
 #define FOR_SILVERMONT
 
 #ifndef MEMMOVE
-# define MEMMOVE	memmove_generic
+# define MEMMOVE	memmove
 #endif
 
 #ifndef L
@@ -551,3 +551,9 @@
 	jmp	L(mm_recalc_len)
 
 END (MEMMOVE)
+
+// N.B., `private/bionic_asm.h` provides ALIAS_SYMBOL, but that file provides
+// conflicting definitions for some macros in this file. Since ALIAS_SYMBOL is
+// small, inline it here.
+.globl memcpy;
+.equ memcpy, MEMMOVE
diff --git a/libc/arch-x86/string/sse2-memset-atom.S b/libc/arch-x86/string/sse2-memset-atom.S
deleted file mode 100644
index e43ead0..0000000
--- a/libc/arch-x86/string/sse2-memset-atom.S
+++ /dev/null
@@ -1,841 +0,0 @@
-/*
-Copyright (c) 2010, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#include <private/bionic_asm.h>
-
-#define FOR_ATOM
-
-#ifndef L
-# define L(label)	.L##label
-#endif
-
-#ifndef ALIGN
-# define ALIGN(n)	.p2align n
-#endif
-
-#define CFI_PUSH(REG)						\
-  .cfi_adjust_cfa_offset 4;					\
-  .cfi_rel_offset REG, 0
-
-#define CFI_POP(REG)						\
-  .cfi_adjust_cfa_offset -4;					\
-  .cfi_restore REG
-
-#define PUSH(REG)	pushl REG; CFI_PUSH(REG)
-#define POP(REG)	popl REG; CFI_POP(REG)
-
-#define PARMS 8  /* Preserve EBX. */
-#define DST PARMS
-#define CHR (DST+4)
-#define LEN (CHR+4)
-#define CHK_DST_LEN (LEN+4)
-#define SETRTNVAL	movl DST(%esp), %eax
-
-#define ENTRANCE	PUSH(%ebx);
-#define RETURN_END	POP(%ebx); ret
-#define RETURN		RETURN_END; CFI_PUSH(%ebx)
-#define JMPTBL(I, B)	I - B
-
-#define SETUP_PIC_REG(x)	call	__x86.get_pc_thunk.x
-
-/* Load an entry in a jump table into EBX and branch to it.  TABLE is a
-   jump table with relative offsets.   */
-# define BRANCH_TO_JMPTBL_ENTRY(TABLE)				\
-    /* We first load PC into EBX.  */				\
-    call	__x86.get_pc_thunk.bx;				\
-    /* Get the address of the jump table.  */			\
-    add		$(TABLE - .), %ebx;				\
-    /* Get the entry and convert the relative offset to the	\
-       absolute address.  */					\
-    add		(%ebx,%ecx,4), %ebx;				\
-    add		%ecx, %edx;					\
-    /* We loaded the jump table and adjusted EDX. Go.  */	\
-    jmp		*%ebx
-
-ENTRY(__memset_chk_atom)
-  ENTRANCE
-
-  movl LEN(%esp), %ecx
-  cmpl CHK_DST_LEN(%esp), %ecx
-  jna L(memset_length_loaded)
-
-  POP(%ebx) // Undo ENTRANCE without returning.
-  jmp __memset_chk_fail
-END(__memset_chk_atom)
-
-	.section .text.sse2,"ax",@progbits
-	ALIGN(4)
-ENTRY(memset_atom)
-	ENTRANCE
-
-	movl	LEN(%esp), %ecx
-L(memset_length_loaded):
-	movzbl	CHR(%esp), %eax
-	movb	%al, %ah
-	/* Fill the whole EAX with pattern.  */
-	movl	%eax, %edx
-	shl	$16, %eax
-	or	%edx, %eax
-	movl	DST(%esp), %edx
-	cmp	$32, %ecx
-	jae	L(32bytesormore)
-
-L(write_less32bytes):
-	BRANCH_TO_JMPTBL_ENTRY(L(table_less_32bytes))
-
-
-	.pushsection .rodata.sse2,"a",@progbits
-	ALIGN(2)
-L(table_less_32bytes):
-	.int	JMPTBL(L(write_0bytes), L(table_less_32bytes))
-	.int	JMPTBL(L(write_1bytes), L(table_less_32bytes))
-	.int	JMPTBL(L(write_2bytes), L(table_less_32bytes))
-	.int	JMPTBL(L(write_3bytes), L(table_less_32bytes))
-	.int	JMPTBL(L(write_4bytes), L(table_less_32bytes))
-	.int	JMPTBL(L(write_5bytes), L(table_less_32bytes))
-	.int	JMPTBL(L(write_6bytes), L(table_less_32bytes))
-	.int	JMPTBL(L(write_7bytes), L(table_less_32bytes))
-	.int	JMPTBL(L(write_8bytes), L(table_less_32bytes))
-	.int	JMPTBL(L(write_9bytes), L(table_less_32bytes))
-	.int	JMPTBL(L(write_10bytes), L(table_less_32bytes))
-	.int	JMPTBL(L(write_11bytes), L(table_less_32bytes))
-	.int	JMPTBL(L(write_12bytes), L(table_less_32bytes))
-	.int	JMPTBL(L(write_13bytes), L(table_less_32bytes))
-	.int	JMPTBL(L(write_14bytes), L(table_less_32bytes))
-	.int	JMPTBL(L(write_15bytes), L(table_less_32bytes))
-	.int	JMPTBL(L(write_16bytes), L(table_less_32bytes))
-	.int	JMPTBL(L(write_17bytes), L(table_less_32bytes))
-	.int	JMPTBL(L(write_18bytes), L(table_less_32bytes))
-	.int	JMPTBL(L(write_19bytes), L(table_less_32bytes))
-	.int	JMPTBL(L(write_20bytes), L(table_less_32bytes))
-	.int	JMPTBL(L(write_21bytes), L(table_less_32bytes))
-	.int	JMPTBL(L(write_22bytes), L(table_less_32bytes))
-	.int	JMPTBL(L(write_23bytes), L(table_less_32bytes))
-	.int	JMPTBL(L(write_24bytes), L(table_less_32bytes))
-	.int	JMPTBL(L(write_25bytes), L(table_less_32bytes))
-	.int	JMPTBL(L(write_26bytes), L(table_less_32bytes))
-	.int	JMPTBL(L(write_27bytes), L(table_less_32bytes))
-	.int	JMPTBL(L(write_28bytes), L(table_less_32bytes))
-	.int	JMPTBL(L(write_29bytes), L(table_less_32bytes))
-	.int	JMPTBL(L(write_30bytes), L(table_less_32bytes))
-	.int	JMPTBL(L(write_31bytes), L(table_less_32bytes))
-	.popsection
-
-	ALIGN(4)
-L(write_28bytes):
-	movl	%eax, -28(%edx)
-L(write_24bytes):
-	movl	%eax, -24(%edx)
-L(write_20bytes):
-	movl	%eax, -20(%edx)
-L(write_16bytes):
-	movl	%eax, -16(%edx)
-L(write_12bytes):
-	movl	%eax, -12(%edx)
-L(write_8bytes):
-	movl	%eax, -8(%edx)
-L(write_4bytes):
-	movl	%eax, -4(%edx)
-L(write_0bytes):
-	SETRTNVAL
-	RETURN
-
-	ALIGN(4)
-L(write_29bytes):
-	movl	%eax, -29(%edx)
-L(write_25bytes):
-	movl	%eax, -25(%edx)
-L(write_21bytes):
-	movl	%eax, -21(%edx)
-L(write_17bytes):
-	movl	%eax, -17(%edx)
-L(write_13bytes):
-	movl	%eax, -13(%edx)
-L(write_9bytes):
-	movl	%eax, -9(%edx)
-L(write_5bytes):
-	movl	%eax, -5(%edx)
-L(write_1bytes):
-	movb	%al, -1(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN(4)
-L(write_30bytes):
-	movl	%eax, -30(%edx)
-L(write_26bytes):
-	movl	%eax, -26(%edx)
-L(write_22bytes):
-	movl	%eax, -22(%edx)
-L(write_18bytes):
-	movl	%eax, -18(%edx)
-L(write_14bytes):
-	movl	%eax, -14(%edx)
-L(write_10bytes):
-	movl	%eax, -10(%edx)
-L(write_6bytes):
-	movl	%eax, -6(%edx)
-L(write_2bytes):
-	movw	%ax, -2(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN(4)
-L(write_31bytes):
-	movl	%eax, -31(%edx)
-L(write_27bytes):
-	movl	%eax, -27(%edx)
-L(write_23bytes):
-	movl	%eax, -23(%edx)
-L(write_19bytes):
-	movl	%eax, -19(%edx)
-L(write_15bytes):
-	movl	%eax, -15(%edx)
-L(write_11bytes):
-	movl	%eax, -11(%edx)
-L(write_7bytes):
-	movl	%eax, -7(%edx)
-L(write_3bytes):
-	movw	%ax, -3(%edx)
-	movb	%al, -1(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN(4)
-/* ECX > 32 and EDX is 4 byte aligned.  */
-L(32bytesormore):
-	/* Fill xmm0 with the pattern.  */
-	movd	%eax, %xmm0
-	pshufd	$0, %xmm0, %xmm0
-	testl	$0xf, %edx
-	jz	L(aligned_16)
-/* ECX > 32 and EDX is not 16 byte aligned.  */
-L(not_aligned_16):
-	movdqu	%xmm0, (%edx)
-	movl	%edx, %eax
-	and	$-16, %edx
-	add	$16, %edx
-	sub	%edx, %eax
-	add	%eax, %ecx
-	movd	%xmm0, %eax
-
-	ALIGN(4)
-L(aligned_16):
-	cmp	$128, %ecx
-	jae	L(128bytesormore)
-
-L(aligned_16_less128bytes):
-	BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
-
-	ALIGN(4)
-L(128bytesormore):
-	PUSH(%ebx)
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	mov	__x86_shared_cache_size@GOTOFF(%ebx), %ebx
-	cmp	%ebx, %ecx
-	jae	L(128bytesormore_nt_start)
-
-
-	POP(%ebx)
-# define RESTORE_EBX_STATE CFI_PUSH(%ebx)
-	PUSH(%ebx)
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_data_cache_size@GOTOFF(%ebx), %ecx
-	POP(%ebx)
-
-	jae	L(128bytes_L2_normal)
-	subl	$128, %ecx
-L(128bytesormore_normal):
-	sub	$128, %ecx
-	movdqa	%xmm0, (%edx)
-	movdqa	%xmm0, 0x10(%edx)
-	movdqa	%xmm0, 0x20(%edx)
-	movdqa	%xmm0, 0x30(%edx)
-	movdqa	%xmm0, 0x40(%edx)
-	movdqa	%xmm0, 0x50(%edx)
-	movdqa	%xmm0, 0x60(%edx)
-	movdqa	%xmm0, 0x70(%edx)
-	lea	128(%edx), %edx
-	jb	L(128bytesless_normal)
-
-
-	sub	$128, %ecx
-	movdqa	%xmm0, (%edx)
-	movdqa	%xmm0, 0x10(%edx)
-	movdqa	%xmm0, 0x20(%edx)
-	movdqa	%xmm0, 0x30(%edx)
-	movdqa	%xmm0, 0x40(%edx)
-	movdqa	%xmm0, 0x50(%edx)
-	movdqa	%xmm0, 0x60(%edx)
-	movdqa	%xmm0, 0x70(%edx)
-	lea	128(%edx), %edx
-	jae	L(128bytesormore_normal)
-
-L(128bytesless_normal):
-	add	$128, %ecx
-	BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
-
-	ALIGN(4)
-L(128bytes_L2_normal):
-	prefetcht0	0x380(%edx)
-	prefetcht0	0x3c0(%edx)
-	sub	$128, %ecx
-	movdqa	%xmm0, (%edx)
-	movaps	%xmm0, 0x10(%edx)
-	movaps	%xmm0, 0x20(%edx)
-	movaps	%xmm0, 0x30(%edx)
-	movaps	%xmm0, 0x40(%edx)
-	movaps	%xmm0, 0x50(%edx)
-	movaps	%xmm0, 0x60(%edx)
-	movaps	%xmm0, 0x70(%edx)
-	add	$128, %edx
-	cmp	$128, %ecx
-	jae	L(128bytes_L2_normal)
-
-L(128bytesless_L2_normal):
-	BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
-
-	RESTORE_EBX_STATE
-L(128bytesormore_nt_start):
-	sub	%ebx, %ecx
-	mov	%ebx, %eax
-	and	$0x7f, %eax
-	add	%eax, %ecx
-	movd	%xmm0, %eax
-	ALIGN(4)
-L(128bytesormore_shared_cache_loop):
-	prefetcht0	0x3c0(%edx)
-	prefetcht0	0x380(%edx)
-	sub	$0x80, %ebx
-	movdqa	%xmm0, (%edx)
-	movdqa	%xmm0, 0x10(%edx)
-	movdqa	%xmm0, 0x20(%edx)
-	movdqa	%xmm0, 0x30(%edx)
-	movdqa	%xmm0, 0x40(%edx)
-	movdqa	%xmm0, 0x50(%edx)
-	movdqa	%xmm0, 0x60(%edx)
-	movdqa	%xmm0, 0x70(%edx)
-	add	$0x80, %edx
-	cmp	$0x80, %ebx
-	jae	L(128bytesormore_shared_cache_loop)
-	cmp	$0x80, %ecx
-	jb	L(shared_cache_loop_end)
-	ALIGN(4)
-L(128bytesormore_nt):
-	sub	$0x80, %ecx
-	movntdq	%xmm0, (%edx)
-	movntdq	%xmm0, 0x10(%edx)
-	movntdq	%xmm0, 0x20(%edx)
-	movntdq	%xmm0, 0x30(%edx)
-	movntdq	%xmm0, 0x40(%edx)
-	movntdq	%xmm0, 0x50(%edx)
-	movntdq	%xmm0, 0x60(%edx)
-	movntdq	%xmm0, 0x70(%edx)
-	add	$0x80, %edx
-	cmp	$0x80, %ecx
-	jae	L(128bytesormore_nt)
-	sfence
-L(shared_cache_loop_end):
-	POP(%ebx)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
-
-
-	.pushsection .rodata.sse2,"a",@progbits
-	ALIGN(2)
-L(table_16_128bytes):
-	.int	JMPTBL(L(aligned_16_0bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_1bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_2bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_3bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_4bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_5bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_6bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_7bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_8bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_9bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_10bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_11bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_12bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_13bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_14bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_15bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_16bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_17bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_18bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_19bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_20bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_21bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_22bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_23bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_24bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_25bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_26bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_27bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_28bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_29bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_30bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_31bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_32bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_33bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_34bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_35bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_36bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_37bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_38bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_39bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_40bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_41bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_42bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_43bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_44bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_45bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_46bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_47bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_48bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_49bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_50bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_51bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_52bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_53bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_54bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_55bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_56bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_57bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_58bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_59bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_60bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_61bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_62bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_63bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_64bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_65bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_66bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_67bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_68bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_69bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_70bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_71bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_72bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_73bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_74bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_75bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_76bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_77bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_78bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_79bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_80bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_81bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_82bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_83bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_84bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_85bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_86bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_87bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_88bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_89bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_90bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_91bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_92bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_93bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_94bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_95bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_96bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_97bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_98bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_99bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_100bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_101bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_102bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_103bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_104bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_105bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_106bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_107bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_108bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_109bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_110bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_111bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_112bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_113bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_114bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_115bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_116bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_117bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_118bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_119bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_120bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_121bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_122bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_123bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_124bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_125bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_126bytes), L(table_16_128bytes))
-	.int	JMPTBL(L(aligned_16_127bytes), L(table_16_128bytes))
-	.popsection
-
-	ALIGN(4)
-L(aligned_16_112bytes):
-	movdqa	%xmm0, -112(%edx)
-L(aligned_16_96bytes):
-	movdqa	%xmm0, -96(%edx)
-L(aligned_16_80bytes):
-	movdqa	%xmm0, -80(%edx)
-L(aligned_16_64bytes):
-	movdqa	%xmm0, -64(%edx)
-L(aligned_16_48bytes):
-	movdqa	%xmm0, -48(%edx)
-L(aligned_16_32bytes):
-	movdqa	%xmm0, -32(%edx)
-L(aligned_16_16bytes):
-	movdqa	%xmm0, -16(%edx)
-L(aligned_16_0bytes):
-	SETRTNVAL
-	RETURN
-
-	ALIGN(4)
-L(aligned_16_113bytes):
-	movdqa	%xmm0, -113(%edx)
-L(aligned_16_97bytes):
-	movdqa	%xmm0, -97(%edx)
-L(aligned_16_81bytes):
-	movdqa	%xmm0, -81(%edx)
-L(aligned_16_65bytes):
-	movdqa	%xmm0, -65(%edx)
-L(aligned_16_49bytes):
-	movdqa	%xmm0, -49(%edx)
-L(aligned_16_33bytes):
-	movdqa	%xmm0, -33(%edx)
-L(aligned_16_17bytes):
-	movdqa	%xmm0, -17(%edx)
-L(aligned_16_1bytes):
-	movb	%al, -1(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN(4)
-L(aligned_16_114bytes):
-	movdqa	%xmm0, -114(%edx)
-L(aligned_16_98bytes):
-	movdqa	%xmm0, -98(%edx)
-L(aligned_16_82bytes):
-	movdqa	%xmm0, -82(%edx)
-L(aligned_16_66bytes):
-	movdqa	%xmm0, -66(%edx)
-L(aligned_16_50bytes):
-	movdqa	%xmm0, -50(%edx)
-L(aligned_16_34bytes):
-	movdqa	%xmm0, -34(%edx)
-L(aligned_16_18bytes):
-	movdqa	%xmm0, -18(%edx)
-L(aligned_16_2bytes):
-	movw	%ax, -2(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN(4)
-L(aligned_16_115bytes):
-	movdqa	%xmm0, -115(%edx)
-L(aligned_16_99bytes):
-	movdqa	%xmm0, -99(%edx)
-L(aligned_16_83bytes):
-	movdqa	%xmm0, -83(%edx)
-L(aligned_16_67bytes):
-	movdqa	%xmm0, -67(%edx)
-L(aligned_16_51bytes):
-	movdqa	%xmm0, -51(%edx)
-L(aligned_16_35bytes):
-	movdqa	%xmm0, -35(%edx)
-L(aligned_16_19bytes):
-	movdqa	%xmm0, -19(%edx)
-L(aligned_16_3bytes):
-	movw	%ax, -3(%edx)
-	movb	%al, -1(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN(4)
-L(aligned_16_116bytes):
-	movdqa	%xmm0, -116(%edx)
-L(aligned_16_100bytes):
-	movdqa	%xmm0, -100(%edx)
-L(aligned_16_84bytes):
-	movdqa	%xmm0, -84(%edx)
-L(aligned_16_68bytes):
-	movdqa	%xmm0, -68(%edx)
-L(aligned_16_52bytes):
-	movdqa	%xmm0, -52(%edx)
-L(aligned_16_36bytes):
-	movdqa	%xmm0, -36(%edx)
-L(aligned_16_20bytes):
-	movdqa	%xmm0, -20(%edx)
-L(aligned_16_4bytes):
-	movl	%eax, -4(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN(4)
-L(aligned_16_117bytes):
-	movdqa	%xmm0, -117(%edx)
-L(aligned_16_101bytes):
-	movdqa	%xmm0, -101(%edx)
-L(aligned_16_85bytes):
-	movdqa	%xmm0, -85(%edx)
-L(aligned_16_69bytes):
-	movdqa	%xmm0, -69(%edx)
-L(aligned_16_53bytes):
-	movdqa	%xmm0, -53(%edx)
-L(aligned_16_37bytes):
-	movdqa	%xmm0, -37(%edx)
-L(aligned_16_21bytes):
-	movdqa	%xmm0, -21(%edx)
-L(aligned_16_5bytes):
-	movl	%eax, -5(%edx)
-	movb	%al, -1(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN(4)
-L(aligned_16_118bytes):
-	movdqa	%xmm0, -118(%edx)
-L(aligned_16_102bytes):
-	movdqa	%xmm0, -102(%edx)
-L(aligned_16_86bytes):
-	movdqa	%xmm0, -86(%edx)
-L(aligned_16_70bytes):
-	movdqa	%xmm0, -70(%edx)
-L(aligned_16_54bytes):
-	movdqa	%xmm0, -54(%edx)
-L(aligned_16_38bytes):
-	movdqa	%xmm0, -38(%edx)
-L(aligned_16_22bytes):
-	movdqa	%xmm0, -22(%edx)
-L(aligned_16_6bytes):
-	movl	%eax, -6(%edx)
-	movw	%ax, -2(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN(4)
-L(aligned_16_119bytes):
-	movdqa	%xmm0, -119(%edx)
-L(aligned_16_103bytes):
-	movdqa	%xmm0, -103(%edx)
-L(aligned_16_87bytes):
-	movdqa	%xmm0, -87(%edx)
-L(aligned_16_71bytes):
-	movdqa	%xmm0, -71(%edx)
-L(aligned_16_55bytes):
-	movdqa	%xmm0, -55(%edx)
-L(aligned_16_39bytes):
-	movdqa	%xmm0, -39(%edx)
-L(aligned_16_23bytes):
-	movdqa	%xmm0, -23(%edx)
-L(aligned_16_7bytes):
-	movl	%eax, -7(%edx)
-	movw	%ax, -3(%edx)
-	movb	%al, -1(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN(4)
-L(aligned_16_120bytes):
-	movdqa	%xmm0, -120(%edx)
-L(aligned_16_104bytes):
-	movdqa	%xmm0, -104(%edx)
-L(aligned_16_88bytes):
-	movdqa	%xmm0, -88(%edx)
-L(aligned_16_72bytes):
-	movdqa	%xmm0, -72(%edx)
-L(aligned_16_56bytes):
-	movdqa	%xmm0, -56(%edx)
-L(aligned_16_40bytes):
-	movdqa	%xmm0, -40(%edx)
-L(aligned_16_24bytes):
-	movdqa	%xmm0, -24(%edx)
-L(aligned_16_8bytes):
-	movq	%xmm0, -8(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN(4)
-L(aligned_16_121bytes):
-	movdqa	%xmm0, -121(%edx)
-L(aligned_16_105bytes):
-	movdqa	%xmm0, -105(%edx)
-L(aligned_16_89bytes):
-	movdqa	%xmm0, -89(%edx)
-L(aligned_16_73bytes):
-	movdqa	%xmm0, -73(%edx)
-L(aligned_16_57bytes):
-	movdqa	%xmm0, -57(%edx)
-L(aligned_16_41bytes):
-	movdqa	%xmm0, -41(%edx)
-L(aligned_16_25bytes):
-	movdqa	%xmm0, -25(%edx)
-L(aligned_16_9bytes):
-	movq	%xmm0, -9(%edx)
-	movb	%al, -1(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN(4)
-L(aligned_16_122bytes):
-	movdqa	%xmm0, -122(%edx)
-L(aligned_16_106bytes):
-	movdqa	%xmm0, -106(%edx)
-L(aligned_16_90bytes):
-	movdqa	%xmm0, -90(%edx)
-L(aligned_16_74bytes):
-	movdqa	%xmm0, -74(%edx)
-L(aligned_16_58bytes):
-	movdqa	%xmm0, -58(%edx)
-L(aligned_16_42bytes):
-	movdqa	%xmm0, -42(%edx)
-L(aligned_16_26bytes):
-	movdqa	%xmm0, -26(%edx)
-L(aligned_16_10bytes):
-	movq	%xmm0, -10(%edx)
-	movw	%ax, -2(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN(4)
-L(aligned_16_123bytes):
-	movdqa	%xmm0, -123(%edx)
-L(aligned_16_107bytes):
-	movdqa	%xmm0, -107(%edx)
-L(aligned_16_91bytes):
-	movdqa	%xmm0, -91(%edx)
-L(aligned_16_75bytes):
-	movdqa	%xmm0, -75(%edx)
-L(aligned_16_59bytes):
-	movdqa	%xmm0, -59(%edx)
-L(aligned_16_43bytes):
-	movdqa	%xmm0, -43(%edx)
-L(aligned_16_27bytes):
-	movdqa	%xmm0, -27(%edx)
-L(aligned_16_11bytes):
-	movq	%xmm0, -11(%edx)
-	movw	%ax, -3(%edx)
-	movb	%al, -1(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN(4)
-L(aligned_16_124bytes):
-	movdqa	%xmm0, -124(%edx)
-L(aligned_16_108bytes):
-	movdqa	%xmm0, -108(%edx)
-L(aligned_16_92bytes):
-	movdqa	%xmm0, -92(%edx)
-L(aligned_16_76bytes):
-	movdqa	%xmm0, -76(%edx)
-L(aligned_16_60bytes):
-	movdqa	%xmm0, -60(%edx)
-L(aligned_16_44bytes):
-	movdqa	%xmm0, -44(%edx)
-L(aligned_16_28bytes):
-	movdqa	%xmm0, -28(%edx)
-L(aligned_16_12bytes):
-	movq	%xmm0, -12(%edx)
-	movl	%eax, -4(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN(4)
-L(aligned_16_125bytes):
-	movdqa	%xmm0, -125(%edx)
-L(aligned_16_109bytes):
-	movdqa	%xmm0, -109(%edx)
-L(aligned_16_93bytes):
-	movdqa	%xmm0, -93(%edx)
-L(aligned_16_77bytes):
-	movdqa	%xmm0, -77(%edx)
-L(aligned_16_61bytes):
-	movdqa	%xmm0, -61(%edx)
-L(aligned_16_45bytes):
-	movdqa	%xmm0, -45(%edx)
-L(aligned_16_29bytes):
-	movdqa	%xmm0, -29(%edx)
-L(aligned_16_13bytes):
-	movq	%xmm0, -13(%edx)
-	movl	%eax, -5(%edx)
-	movb	%al, -1(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN(4)
-L(aligned_16_126bytes):
-	movdqa	%xmm0, -126(%edx)
-L(aligned_16_110bytes):
-	movdqa	%xmm0, -110(%edx)
-L(aligned_16_94bytes):
-	movdqa	%xmm0, -94(%edx)
-L(aligned_16_78bytes):
-	movdqa	%xmm0, -78(%edx)
-L(aligned_16_62bytes):
-	movdqa	%xmm0, -62(%edx)
-L(aligned_16_46bytes):
-	movdqa	%xmm0, -46(%edx)
-L(aligned_16_30bytes):
-	movdqa	%xmm0, -30(%edx)
-L(aligned_16_14bytes):
-	movq	%xmm0, -14(%edx)
-	movl	%eax, -6(%edx)
-	movw	%ax, -2(%edx)
-	SETRTNVAL
-	RETURN
-
-	ALIGN(4)
-L(aligned_16_127bytes):
-	movdqa	%xmm0, -127(%edx)
-L(aligned_16_111bytes):
-	movdqa	%xmm0, -111(%edx)
-L(aligned_16_95bytes):
-	movdqa	%xmm0, -95(%edx)
-L(aligned_16_79bytes):
-	movdqa	%xmm0, -79(%edx)
-L(aligned_16_63bytes):
-	movdqa	%xmm0, -63(%edx)
-L(aligned_16_47bytes):
-	movdqa	%xmm0, -47(%edx)
-L(aligned_16_31bytes):
-	movdqa	%xmm0, -31(%edx)
-L(aligned_16_15bytes):
-	movq	%xmm0, -15(%edx)
-	movl	%eax, -7(%edx)
-	movw	%ax, -3(%edx)
-	movb	%al, -1(%edx)
-	SETRTNVAL
-	RETURN_END
-
-END(memset_atom)
diff --git a/libc/arch-x86/string/sse2-memset-slm.S b/libc/arch-x86/string/sse2-memset-slm.S
index e4c8fa1..ec2ee52 100644
--- a/libc/arch-x86/string/sse2-memset-slm.S
+++ b/libc/arch-x86/string/sse2-memset-slm.S
@@ -79,7 +79,7 @@
     /* We loaded the jump table and adjusted EDX. Go.  */	\
     jmp		*%ebx
 
-ENTRY(__memset_chk_generic)
+ENTRY(__memset_chk)
   ENTRANCE
 
   movl LEN(%esp), %ecx
@@ -88,11 +88,11 @@
 
   POP(%ebx) // Undo ENTRANCE without returning.
   jmp __memset_chk_fail
-END(__memset_chk_generic)
+END(__memset_chk)
 
 	.section .text.sse2,"ax",@progbits
 	ALIGN(4)
-ENTRY(memset_generic)
+ENTRY(memset)
 	ENTRANCE
 
 	movl	LEN(%esp), %ecx
@@ -755,4 +755,4 @@
 	SETRTNVAL
 	RETURN_END
 
-END(memset_generic)
+END(memset)
diff --git a/libc/arch-x86/string/sse2-stpcpy-slm.S b/libc/arch-x86/string/sse2-stpcpy-slm.S
old mode 100755
new mode 100644
diff --git a/libc/arch-x86/string/sse2-strcpy-slm.S b/libc/arch-x86/string/sse2-strcpy-slm.S
old mode 100755
new mode 100644
index 22ceeab..b5d84b5
--- a/libc/arch-x86/string/sse2-strcpy-slm.S
+++ b/libc/arch-x86/string/sse2-strcpy-slm.S
@@ -79,7 +79,7 @@
 #define POP(REG) popl REG; CFI_POP (REG)
 
 #ifndef STRCPY
-# define STRCPY  strcpy_generic
+# define STRCPY  strcpy
 #endif
 
 #ifdef USE_AS_STPNCPY
diff --git a/libc/arch-x86/string/sse2-strlen-slm.S b/libc/arch-x86/string/sse2-strlen-slm.S
old mode 100755
new mode 100644
index b805ad6..27cc025
--- a/libc/arch-x86/string/sse2-strlen-slm.S
+++ b/libc/arch-x86/string/sse2-strlen-slm.S
@@ -29,7 +29,7 @@
 */
 
 #ifndef STRLEN
-# define STRLEN strlen_generic
+# define STRLEN strlen
 #endif
 
 #ifndef L
diff --git a/libc/arch-x86/string/sse2-strncpy-slm.S b/libc/arch-x86/string/sse2-strncpy-slm.S
old mode 100755
new mode 100644
index aff7fb9..591419f
--- a/libc/arch-x86/string/sse2-strncpy-slm.S
+++ b/libc/arch-x86/string/sse2-strncpy-slm.S
@@ -29,5 +29,5 @@
 */

 

 #define USE_AS_STRNCPY

-#define STRCPY strncpy_generic

+#define STRCPY strncpy

 #include "sse2-strcpy-slm.S"

diff --git a/libc/arch-x86/string/sse2-wcschr-atom.S b/libc/arch-x86/string/sse2-wcschr-atom.S
deleted file mode 100644
index 729302b..0000000
--- a/libc/arch-x86/string/sse2-wcschr-atom.S
+++ /dev/null
@@ -1,267 +0,0 @@
-/*
-Copyright (c) 2011 Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef L
-# define L(label)	.L##label
-#endif
-
-#ifndef cfi_startproc
-# define cfi_startproc	.cfi_startproc
-#endif
-
-#ifndef cfi_endproc
-# define cfi_endproc	.cfi_endproc
-#endif
-
-#ifndef cfi_rel_offset
-# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
-#endif
-
-#ifndef cfi_restore
-# define cfi_restore(reg)	.cfi_restore reg
-#endif
-
-#ifndef cfi_adjust_cfa_offset
-# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
-#endif
-
-#ifndef ENTRY
-# define ENTRY(name)	\
-	.type name,  @function;	\
-	.globl name;	\
-	.p2align 4;	\
-name:	\
-	cfi_startproc
-#endif
-
-#ifndef END
-# define END(name)	\
-	cfi_endproc;	\
-	.size name,	.-name
-#endif
-
-#define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
-#define POP(REG)	popl REG; CFI_POP (REG)
-
-#define PARMS	4
-
-
-#define STR1  PARMS
-#define STR2  STR1+4
-
-	.text
-ENTRY (wcschr)
-
-	mov	STR1(%esp), %ecx
-	movd	STR2(%esp), %xmm1
-
-	mov	%ecx, %eax
-	punpckldq %xmm1, %xmm1
-	pxor	%xmm2, %xmm2
-	punpckldq %xmm1, %xmm1
-
-	and	$63, %eax
-	cmp	$48, %eax
-	ja	L(cross_cache)
-
-	movdqu	(%ecx), %xmm0
-	pcmpeqd	%xmm0, %xmm2
-	pcmpeqd	%xmm1, %xmm0
-	pmovmskb %xmm2, %edx
-	pmovmskb %xmm0, %eax
-	or	%eax, %edx
-	jnz	L(matches)
-	and	$-16, %ecx
-	jmp	L(loop)
-
-	.p2align 4
-L(cross_cache):
-	PUSH	(%edi)
-	mov	%ecx, %edi
-	mov	%eax, %ecx
-	and	$-16, %edi
-	and	$15, %ecx
-	movdqa	(%edi), %xmm0
-	pcmpeqd	%xmm0, %xmm2
-	pcmpeqd	%xmm1, %xmm0
-	pmovmskb %xmm2, %edx
-	pmovmskb %xmm0, %eax
-
-	sarl	%cl, %edx
-	sarl	%cl, %eax
-	test	%eax, %eax
-	jz	L(unaligned_no_match)
-
-	add	%edi, %ecx
-	POP	(%edi)
-
-	test	%edx, %edx
-	jz	L(match_case1)
-	test	%al, %al
-	jz	L(match_higth_case2)
-	test	$15, %al
-	jnz	L(match_case2_4)
-	test	$15, %dl
-	jnz	L(return_null)
-	lea	4(%ecx), %eax
-	ret
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(unaligned_no_match):
-	mov	%edi, %ecx
-	POP	(%edi)
-
-	test	%edx, %edx
-	jnz	L(return_null)
-
-	pxor	%xmm2, %xmm2
-
-/* Loop start on aligned string.  */
-	.p2align 4
-L(loop):
-	add	$16, %ecx
-	movdqa	(%ecx), %xmm0
-	pcmpeqd	%xmm0, %xmm2
-	pcmpeqd	%xmm1, %xmm0
-	pmovmskb %xmm2, %edx
-	pmovmskb %xmm0, %eax
-	or	%eax, %edx
-	jnz	L(matches)
-	add	$16, %ecx
-
-	movdqa	(%ecx), %xmm0
-	pcmpeqd	%xmm0, %xmm2
-	pcmpeqd	%xmm1, %xmm0
-	pmovmskb %xmm2, %edx
-	pmovmskb %xmm0, %eax
-	or	%eax, %edx
-	jnz	L(matches)
-	add	$16, %ecx
-
-	movdqa	(%ecx), %xmm0
-	pcmpeqd	%xmm0, %xmm2
-	pcmpeqd	%xmm1, %xmm0
-	pmovmskb %xmm2, %edx
-	pmovmskb %xmm0, %eax
-	or	%eax, %edx
-	jnz	L(matches)
-	add	$16, %ecx
-
-	movdqa	(%ecx), %xmm0
-	pcmpeqd	%xmm0, %xmm2
-	pcmpeqd	%xmm1, %xmm0
-	pmovmskb %xmm2, %edx
-	pmovmskb %xmm0, %eax
-	or	%eax, %edx
-	jz	L(loop)
-
-	.p2align 4
-L(matches):
-	pmovmskb %xmm2, %edx
-	test	%eax, %eax
-	jz	L(return_null)
-	test	%edx, %edx
-	jz	L(match_case1)
-
-	.p2align 4
-L(match_case2):
-	test	%al, %al
-	jz	L(match_higth_case2)
-	test	$15, %al
-	jnz	L(match_case2_4)
-	test	$15, %dl
-	jnz	L(return_null)
-	lea	4(%ecx), %eax
-	ret
-
-	.p2align 4
-L(match_case2_4):
-	mov	%ecx, %eax
-	ret
-
-	.p2align 4
-L(match_higth_case2):
-	test	%dl, %dl
-	jnz	L(return_null)
-	test	$15, %ah
-	jnz	L(match_case2_12)
-	test	$15, %dh
-	jnz	L(return_null)
-	lea	12(%ecx), %eax
-	ret
-
-	.p2align 4
-L(match_case2_12):
-	lea	8(%ecx), %eax
-	ret
-
-	.p2align 4
-L(match_case1):
-	test	%al, %al
-	jz	L(match_higth_case1)
-
-	test	$0x01, %al
-	jnz	L(exit0)
-	lea	4(%ecx), %eax
-	ret
-
-	.p2align 4
-L(match_higth_case1):
-	test	$0x01, %ah
-	jnz	L(exit3)
-	lea	12(%ecx), %eax
-	ret
-
-	.p2align 4
-L(exit0):
-	mov	%ecx, %eax
-	ret
-
-	.p2align 4
-L(exit3):
-	lea	8(%ecx), %eax
-	ret
-
-	.p2align 4
-L(return_null):
-	xor	%eax, %eax
-	ret
-
-END (wcschr)
diff --git a/libc/arch-x86/string/sse2-wcscmp-atom.S b/libc/arch-x86/string/sse2-wcscmp-atom.S
deleted file mode 100644
index 8867d28..0000000
--- a/libc/arch-x86/string/sse2-wcscmp-atom.S
+++ /dev/null
@@ -1,1062 +0,0 @@
-/*
-Copyright (c) 2011 Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef L
-# define L(label)	.L##label
-#endif
-
-#ifndef cfi_startproc
-# define cfi_startproc	.cfi_startproc
-#endif
-
-#ifndef cfi_endproc
-# define cfi_endproc	.cfi_endproc
-#endif
-
-#ifndef cfi_rel_offset
-# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
-#endif
-
-#ifndef cfi_restore
-# define cfi_restore(reg)	.cfi_restore reg
-#endif
-
-#ifndef cfi_adjust_cfa_offset
-# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
-#endif
-
-#ifndef ENTRY
-# define ENTRY(name)	\
-	.type name, @function;	\
-	.globl name;	\
-	.p2align 4;	\
-name:	\
-	cfi_startproc
-#endif
-
-#ifndef END
-# define END(name)	\
-	cfi_endproc;	\
-	.size name, .-name
-#endif
-
-#define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-#define PUSH(REG) pushl REG; CFI_PUSH (REG)
-#define POP(REG) popl REG; CFI_POP (REG)
-
-#define ENTRANCE PUSH(%esi); PUSH(%edi)
-#define RETURN  POP(%edi); POP(%esi); ret; CFI_PUSH(%esi); CFI_PUSH(%edi);
-#define PARMS  4
-#define STR1  PARMS
-#define STR2  STR1+4
-
-	.text
-ENTRY (wcscmp)
-/*
-	* This implementation uses SSE to compare up to 16 bytes at a time.
-*/
-	mov	STR1(%esp), %edx
-	mov	STR2(%esp), %eax
-
-	mov	(%eax), %ecx
-	cmp	%ecx, (%edx)
-	jne	L(neq)
-	test	%ecx, %ecx
-	jz	L(eq)
-
-	mov	4(%eax), %ecx
-	cmp	%ecx, 4(%edx)
-	jne	L(neq)
-	test	%ecx, %ecx
-	jz	L(eq)
-
-	mov	8(%eax), %ecx
-	cmp	%ecx, 8(%edx)
-	jne	L(neq)
-	test	%ecx, %ecx
-	jz	L(eq)
-
-	mov	12(%eax), %ecx
-	cmp	%ecx, 12(%edx)
-	jne	L(neq)
-	test	%ecx, %ecx
-	jz	L(eq)
-
-	ENTRANCE
-	add	$16, %eax
-	add	$16, %edx
-
-	mov	%eax, %esi
-	mov	%edx, %edi
-	pxor	%xmm0, %xmm0		/* clear %xmm0 for null char checks */
-	mov	%al, %ch
-	mov	%dl, %cl
-	and	$63, %eax		/* esi alignment in cache line */
-	and	$63, %edx		/* edi alignment in cache line */
-	and	$15, %cl
-	jz	L(continue_00)
-	cmp	$16, %edx
-	jb	L(continue_0)
-	cmp	$32, %edx
-	jb	L(continue_16)
-	cmp	$48, %edx
-	jb	L(continue_32)
-
-L(continue_48):
-	and	$15, %ch
-	jz	L(continue_48_00)
-	cmp	$16, %eax
-	jb	L(continue_0_48)
-	cmp	$32, %eax
-	jb	L(continue_16_48)
-	cmp	$48, %eax
-	jb	L(continue_32_48)
-
-	.p2align 4
-L(continue_48_48):
-	mov	(%esi), %ecx
-	cmp	%ecx, (%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	4(%esi), %ecx
-	cmp	%ecx, 4(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	8(%esi), %ecx
-	cmp	%ecx, 8(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	12(%esi), %ecx
-	cmp	%ecx, 12(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	movdqu	16(%edi), %xmm1
-	movdqu	16(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_16)
-
-	movdqu	32(%edi), %xmm1
-	movdqu	32(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_32)
-
-	movdqu	48(%edi), %xmm1
-	movdqu	48(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_48)
-
-	add	$64, %esi
-	add	$64, %edi
-	jmp	L(continue_48_48)
-
-L(continue_0):
-	and	$15, %ch
-	jz	L(continue_0_00)
-	cmp	$16, %eax
-	jb	L(continue_0_0)
-	cmp	$32, %eax
-	jb	L(continue_0_16)
-	cmp	$48, %eax
-	jb	L(continue_0_32)
-
-	.p2align 4
-L(continue_0_48):
-	mov	(%esi), %ecx
-	cmp	%ecx, (%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	4(%esi), %ecx
-	cmp	%ecx, 4(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	8(%esi), %ecx
-	cmp	%ecx, 8(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	12(%esi), %ecx
-	cmp	%ecx, 12(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	movdqu	16(%edi), %xmm1
-	movdqu	16(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_16)
-
-	movdqu	32(%edi), %xmm1
-	movdqu	32(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_32)
-
-	mov	48(%esi), %ecx
-	cmp	%ecx, 48(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	52(%esi), %ecx
-	cmp	%ecx, 52(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	56(%esi), %ecx
-	cmp	%ecx, 56(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	60(%esi), %ecx
-	cmp	%ecx, 60(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	add	$64, %esi
-	add	$64, %edi
-	jmp	L(continue_0_48)
-
-	.p2align 4
-L(continue_00):
-	and	$15, %ch
-	jz	L(continue_00_00)
-	cmp	$16, %eax
-	jb	L(continue_00_0)
-	cmp	$32, %eax
-	jb	L(continue_00_16)
-	cmp	$48, %eax
-	jb	L(continue_00_32)
-
-	.p2align 4
-L(continue_00_48):
-	pcmpeqd	(%edi), %xmm0
-	mov	(%edi), %eax
-	pmovmskb %xmm0, %ecx
-	test	%ecx, %ecx
-	jnz	L(less4_double_words1)
-
-	cmp	(%esi), %eax
-	jne	L(nequal)
-
-	mov	4(%edi), %eax
-	cmp	4(%esi), %eax
-	jne	L(nequal)
-
-	mov	8(%edi), %eax
-	cmp	8(%esi), %eax
-	jne	L(nequal)
-
-	mov	12(%edi), %eax
-	cmp	12(%esi), %eax
-	jne	L(nequal)
-
-	movdqu	16(%esi), %xmm2
-	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
-	pcmpeqd	16(%edi), %xmm2		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
-	pmovmskb %xmm2, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_16)
-
-	movdqu	32(%esi), %xmm2
-	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
-	pcmpeqd	32(%edi), %xmm2		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
-	pmovmskb %xmm2, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_32)
-
-	movdqu	48(%esi), %xmm2
-	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
-	pcmpeqd	48(%edi), %xmm2		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
-	pmovmskb %xmm2, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_48)
-
-	add	$64, %esi
-	add	$64, %edi
-	jmp	L(continue_00_48)
-
-	.p2align 4
-L(continue_32):
-	and	$15, %ch
-	jz	L(continue_32_00)
-	cmp	$16, %eax
-	jb	L(continue_0_32)
-	cmp	$32, %eax
-	jb	L(continue_16_32)
-	cmp	$48, %eax
-	jb	L(continue_32_32)
-
-	.p2align 4
-L(continue_32_48):
-	mov	(%esi), %ecx
-	cmp	%ecx, (%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	4(%esi), %ecx
-	cmp	%ecx, 4(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	8(%esi), %ecx
-	cmp	%ecx, 8(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	12(%esi), %ecx
-	cmp	%ecx, 12(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	16(%esi), %ecx
-	cmp	%ecx, 16(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	20(%esi), %ecx
-	cmp	%ecx, 20(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	24(%esi), %ecx
-	cmp	%ecx, 24(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	28(%esi), %ecx
-	cmp	%ecx, 28(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	movdqu	32(%edi), %xmm1
-	movdqu	32(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_32)
-
-	movdqu	48(%edi), %xmm1
-	movdqu	48(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_48)
-
-	add	$64, %esi
-	add	$64, %edi
-	jmp	L(continue_32_48)
-
-	.p2align 4
-L(continue_16):
-	and	$15, %ch
-	jz	L(continue_16_00)
-	cmp	$16, %eax
-	jb	L(continue_0_16)
-	cmp	$32, %eax
-	jb	L(continue_16_16)
-	cmp	$48, %eax
-	jb	L(continue_16_32)
-
-	.p2align 4
-L(continue_16_48):
-	mov	(%esi), %ecx
-	cmp	%ecx, (%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	4(%esi), %ecx
-	cmp	%ecx, 4(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	8(%esi), %ecx
-	cmp	%ecx, 8(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	12(%esi), %ecx
-	cmp	%ecx, 12(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	movdqu	16(%edi), %xmm1
-	movdqu	16(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_16)
-
-	mov	32(%esi), %ecx
-	cmp	%ecx, 32(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	36(%esi), %ecx
-	cmp	%ecx, 36(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	40(%esi), %ecx
-	cmp	%ecx, 40(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	44(%esi), %ecx
-	cmp	%ecx, 44(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	movdqu	48(%edi), %xmm1
-	movdqu	48(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_48)
-
-	add	$64, %esi
-	add	$64, %edi
-	jmp	L(continue_16_48)
-
-	.p2align 4
-L(continue_00_00):
-	movdqa	(%edi), %xmm1
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	(%esi), %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words)
-
-	movdqa	16(%edi), %xmm3
-	pcmpeqd	%xmm3, %xmm0		/* Any null double_word? */
-	pcmpeqd	16(%esi), %xmm3		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm3		/* packed sub of comparison results*/
-	pmovmskb %xmm3, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_16)
-
-	movdqa	32(%edi), %xmm5
-	pcmpeqd	%xmm5, %xmm0		/* Any null double_word? */
-	pcmpeqd	32(%esi), %xmm5		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm5		/* packed sub of comparison results*/
-	pmovmskb %xmm5, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_32)
-
-	movdqa	48(%edi), %xmm1
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	48(%esi), %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_48)
-
-	add	$64, %esi
-	add	$64, %edi
-	jmp	L(continue_00_00)
-
-	.p2align 4
-L(continue_00_32):
-	movdqu	(%esi), %xmm2
-	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
-	pcmpeqd	(%edi), %xmm2		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
-	pmovmskb %xmm2, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words)
-
-	add	$16, %esi
-	add	$16, %edi
-	jmp	L(continue_00_48)
-
-	.p2align 4
-L(continue_00_16):
-	movdqu	(%esi), %xmm2
-	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
-	pcmpeqd	(%edi), %xmm2		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
-	pmovmskb %xmm2, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words)
-
-	movdqu	16(%esi), %xmm2
-	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
-	pcmpeqd	16(%edi), %xmm2		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
-	pmovmskb %xmm2, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_16)
-
-	add	$32, %esi
-	add	$32, %edi
-	jmp	L(continue_00_48)
-
-	.p2align 4
-L(continue_00_0):
-	movdqu	(%esi), %xmm2
-	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
-	pcmpeqd	(%edi), %xmm2		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
-	pmovmskb %xmm2, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words)
-
-	movdqu	16(%esi), %xmm2
-	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
-	pcmpeqd	16(%edi), %xmm2		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
-	pmovmskb %xmm2, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_16)
-
-	movdqu	32(%esi), %xmm2
-	pcmpeqd	%xmm2, %xmm0		/* Any null double_word? */
-	pcmpeqd	32(%edi), %xmm2		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
-	pmovmskb %xmm2, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_32)
-
-	add	$48, %esi
-	add	$48, %edi
-	jmp	L(continue_00_48)
-
-	.p2align 4
-L(continue_48_00):
-	pcmpeqd	(%esi), %xmm0
-	mov	(%edi), %eax
-	pmovmskb %xmm0, %ecx
-	test	%ecx, %ecx
-	jnz	L(less4_double_words1)
-
-	cmp	(%esi), %eax
-	jne	L(nequal)
-
-	mov	4(%edi), %eax
-	cmp	4(%esi), %eax
-	jne	L(nequal)
-
-	mov	8(%edi), %eax
-	cmp	8(%esi), %eax
-	jne	L(nequal)
-
-	mov	12(%edi), %eax
-	cmp	12(%esi), %eax
-	jne	L(nequal)
-
-	movdqu	16(%edi), %xmm1
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	16(%esi), %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_16)
-
-	movdqu	32(%edi), %xmm1
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	32(%esi), %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_32)
-
-	movdqu	48(%edi), %xmm1
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	48(%esi), %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_48)
-
-	add	$64, %esi
-	add	$64, %edi
-	jmp	L(continue_48_00)
-
-	.p2align 4
-L(continue_32_00):
-	movdqu	(%edi), %xmm1
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	(%esi), %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words)
-
-	add	$16, %esi
-	add	$16, %edi
-	jmp	L(continue_48_00)
-
-	.p2align 4
-L(continue_16_00):
-	movdqu	(%edi), %xmm1
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	(%esi), %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words)
-
-	movdqu	16(%edi), %xmm1
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	16(%esi), %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_16)
-
-	add	$32, %esi
-	add	$32, %edi
-	jmp	L(continue_48_00)
-
-	.p2align 4
-L(continue_0_00):
-	movdqu	(%edi), %xmm1
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	(%esi), %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words)
-
-	movdqu	16(%edi), %xmm1
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	16(%esi), %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_16)
-
-	movdqu	32(%edi), %xmm1
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	32(%esi), %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_32)
-
-	add	$48, %esi
-	add	$48, %edi
-	jmp	L(continue_48_00)
-
-	.p2align 4
-L(continue_32_32):
-	movdqu	(%edi), %xmm1
-	movdqu	(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words)
-
-	add	$16, %esi
-	add	$16, %edi
-	jmp	L(continue_48_48)
-
-	.p2align 4
-L(continue_16_16):
-	movdqu	(%edi), %xmm1
-	movdqu	(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words)
-
-	movdqu	16(%edi), %xmm3
-	movdqu	16(%esi), %xmm4
-	pcmpeqd	%xmm3, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm4, %xmm3		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm3		/* packed sub of comparison results*/
-	pmovmskb %xmm3, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_16)
-
-	add	$32, %esi
-	add	$32, %edi
-	jmp	L(continue_48_48)
-
-	.p2align 4
-L(continue_0_0):
-	movdqu	(%edi), %xmm1
-	movdqu	(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words)
-
-	movdqu	16(%edi), %xmm3
-	movdqu	16(%esi), %xmm4
-	pcmpeqd	%xmm3, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm4, %xmm3		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm3		/* packed sub of comparison results*/
-	pmovmskb %xmm3, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_16)
-
-	movdqu	32(%edi), %xmm1
-	movdqu	32(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_32)
-
-	add	$48, %esi
-	add	$48, %edi
-	jmp	L(continue_48_48)
-
-	.p2align 4
-L(continue_0_16):
-	movdqu	(%edi), %xmm1
-	movdqu	(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words)
-
-	movdqu	16(%edi), %xmm1
-	movdqu	16(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words_16)
-
-	add	$32, %esi
-	add	$32, %edi
-	jmp	L(continue_32_48)
-
-	.p2align 4
-L(continue_0_32):
-	movdqu	(%edi), %xmm1
-	movdqu	(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words)
-
-	add	$16, %esi
-	add	$16, %edi
-	jmp	L(continue_16_48)
-
-	.p2align 4
-L(continue_16_32):
-	movdqu	(%edi), %xmm1
-	movdqu	(%esi), %xmm2
-	pcmpeqd	%xmm1, %xmm0		/* Any null double_word? */
-	pcmpeqd	%xmm2, %xmm1		/* compare first 4 double_words for equality */
-	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
-	pmovmskb %xmm1, %edx
-	sub	$0xffff, %edx		/* if first 4 double_words are same, edx == 0xffff */
-	jnz	L(less4_double_words)
-
-	add	$16, %esi
-	add	$16, %edi
-	jmp	L(continue_32_48)
-
-	.p2align 4
-L(less4_double_words1):
-	cmp	(%esi), %eax
-	jne	L(nequal)
-	test	%eax, %eax
-	jz	L(equal)
-
-	mov	4(%esi), %ecx
-	cmp	%ecx, 4(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	8(%esi), %ecx
-	cmp	%ecx, 8(%edi)
-	jne	L(nequal)
-	test	%ecx, %ecx
-	jz	L(equal)
-
-	mov	12(%esi), %ecx
-	cmp	%ecx, 12(%edi)
-	jne	L(nequal)
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(less4_double_words):
-	xor	%eax, %eax
-	test	%dl, %dl
-	jz	L(next_two_double_words)
-	and	$15, %dl
-	jz	L(second_double_word)
-	mov	(%esi), %ecx
-	cmp	%ecx, (%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(second_double_word):
-	mov	4(%esi), %ecx
-	cmp	%ecx, 4(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(next_two_double_words):
-	and	$15, %dh
-	jz	L(fourth_double_word)
-	mov	8(%esi), %ecx
-	cmp	%ecx, 8(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(fourth_double_word):
-	mov	12(%esi), %ecx
-	cmp	%ecx, 12(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(less4_double_words_16):
-	xor	%eax, %eax
-	test	%dl, %dl
-	jz	L(next_two_double_words_16)
-	and	$15, %dl
-	jz	L(second_double_word_16)
-	mov	16(%esi), %ecx
-	cmp	%ecx, 16(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(second_double_word_16):
-	mov	20(%esi), %ecx
-	cmp	%ecx, 20(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(next_two_double_words_16):
-	and	$15, %dh
-	jz	L(fourth_double_word_16)
-	mov	24(%esi), %ecx
-	cmp	%ecx, 24(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(fourth_double_word_16):
-	mov	28(%esi), %ecx
-	cmp	%ecx, 28(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(less4_double_words_32):
-	xor	%eax, %eax
-	test	%dl, %dl
-	jz	L(next_two_double_words_32)
-	and	$15, %dl
-	jz	L(second_double_word_32)
-	mov	32(%esi), %ecx
-	cmp	%ecx, 32(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(second_double_word_32):
-	mov	36(%esi), %ecx
-	cmp	%ecx, 36(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(next_two_double_words_32):
-	and	$15, %dh
-	jz	L(fourth_double_word_32)
-	mov	40(%esi), %ecx
-	cmp	%ecx, 40(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(fourth_double_word_32):
-	mov	44(%esi), %ecx
-	cmp	%ecx, 44(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(less4_double_words_48):
-	xor	%eax, %eax
-	test	%dl, %dl
-	jz	L(next_two_double_words_48)
-	and	$15, %dl
-	jz	L(second_double_word_48)
-	mov	48(%esi), %ecx
-	cmp	%ecx, 48(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(second_double_word_48):
-	mov	52(%esi), %ecx
-	cmp	%ecx, 52(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(next_two_double_words_48):
-	and	$15, %dh
-	jz	L(fourth_double_word_48)
-	mov	56(%esi), %ecx
-	cmp	%ecx, 56(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(fourth_double_word_48):
-	mov	60(%esi), %ecx
-	cmp	%ecx, 60(%edi)
-	jne	L(nequal)
-	RETURN
-
-	.p2align 4
-L(nequal):
-	mov	$1, %eax
-	jg	L(return)
-	neg	%eax
-	RETURN
-
-	.p2align 4
-L(return):
-	RETURN
-
-	.p2align 4
-L(equal):
-	xorl	%eax, %eax
-	RETURN
-
-	CFI_POP (%edi)
-	CFI_POP (%esi)
-
-	.p2align 4
-L(neq):
-	mov	$1, %eax
-	jg	L(neq_bigger)
-	neg	%eax
-
-L(neq_bigger):
-	ret
-
-	.p2align 4
-L(eq):
-	xorl	%eax, %eax
-	ret
-
-END (wcscmp)
-
diff --git a/libc/arch-x86/string/sse2-wcslen-atom.S b/libc/arch-x86/string/sse2-wcslen-atom.S
deleted file mode 100644
index 2f10db4..0000000
--- a/libc/arch-x86/string/sse2-wcslen-atom.S
+++ /dev/null
@@ -1,306 +0,0 @@
-/*
-Copyright (c) 2011 Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef USE_AS_WCSCAT
-
-# ifndef L
-#  define L(label)	.L##label
-# endif
-
-# ifndef cfi_startproc
-#  define cfi_startproc	.cfi_startproc
-# endif
-
-# ifndef cfi_endproc
-#  define cfi_endproc	.cfi_endproc
-# endif
-
-# ifndef ENTRY
-#  define ENTRY(name)	\
-	.type name,  @function;	\
-	.globl name;	\
-	.p2align 4;	\
-name:	\
-	cfi_startproc
-# endif
-
-# ifndef END
-#  define END(name)	\
-	cfi_endproc;	\
-	.size name, .-name
-# endif
-
-# define PARMS	4
-# define STR	PARMS
-# define RETURN ret
-
-	.text
-ENTRY (wcslen)
-	mov	STR(%esp), %edx
-#endif
-	cmpl	$0, (%edx)
-	jz	L(exit_tail0)
-	cmpl	$0, 4(%edx)
-	jz	L(exit_tail1)
-	cmpl	$0, 8(%edx)
-	jz	L(exit_tail2)
-	cmpl	$0, 12(%edx)
-	jz	L(exit_tail3)
-	cmpl	$0, 16(%edx)
-	jz	L(exit_tail4)
-	cmpl	$0, 20(%edx)
-	jz	L(exit_tail5)
-	cmpl	$0, 24(%edx)
-	jz	L(exit_tail6)
-	cmpl	$0, 28(%edx)
-	jz	L(exit_tail7)
-
-	pxor	%xmm0, %xmm0
-
-	lea	32(%edx), %eax
-	lea	-16(%eax), %ecx
-	and	$-16, %eax
-
-	pcmpeqd	(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	pxor	%xmm1, %xmm1
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	pxor	%xmm2, %xmm2
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm2
-	pmovmskb %xmm2, %edx
-	pxor	%xmm3, %xmm3
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm2
-	pmovmskb %xmm2, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm2
-	pmovmskb %xmm2, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm0
-	pmovmskb %xmm0, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm1
-	pmovmskb %xmm1, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm2
-	pmovmskb %xmm2, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	16(%eax), %eax
-	test	%edx, %edx
-	jnz	L(exit)
-
-	and	$-0x40, %eax
-
-	.p2align 4
-L(aligned_64_loop):
-	movaps	(%eax), %xmm0
-	movaps	16(%eax), %xmm1
-	movaps	32(%eax), %xmm2
-	movaps	48(%eax), %xmm6
-
-	pminub	%xmm1, %xmm0
-	pminub	%xmm6, %xmm2
-	pminub	%xmm0, %xmm2
-	pcmpeqd	%xmm3, %xmm2
-	pmovmskb %xmm2, %edx
-	lea	64(%eax), %eax
-	test	%edx, %edx
-	jz	L(aligned_64_loop)
-
-	pcmpeqd	-64(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	48(%ecx), %ecx
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	%xmm1, %xmm3
-	pmovmskb %xmm3, %edx
-	lea	-16(%ecx), %ecx
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	-32(%eax), %xmm3
-	pmovmskb %xmm3, %edx
-	lea	-16(%ecx), %ecx
-	test	%edx, %edx
-	jnz	L(exit)
-
-	pcmpeqd	%xmm6, %xmm3
-	pmovmskb %xmm3, %edx
-	lea	-16(%ecx), %ecx
-	test	%edx, %edx
-	jnz	L(exit)
-
-	jmp	L(aligned_64_loop)
-
-	.p2align 4
-L(exit):
-	sub	%ecx, %eax
-	shr	$2, %eax
-	test	%dl, %dl
-	jz	L(exit_high)
-
-	mov	%dl, %cl
-	and	$15, %cl
-	jz	L(exit_1)
-	RETURN
-
-	.p2align 4
-L(exit_high):
-	mov	%dh, %ch
-	and	$15, %ch
-	jz	L(exit_3)
-	add	$2, %eax
-	RETURN
-
-	.p2align 4
-L(exit_1):
-	add	$1, %eax
-	RETURN
-
-	.p2align 4
-L(exit_3):
-	add	$3, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail0):
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail1):
-	mov	$1, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail2):
-	mov	$2, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail3):
-	mov	$3, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail4):
-	mov	$4, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail5):
-	mov	$5, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail6):
-	mov	$6, %eax
-	RETURN
-
-	.p2align 4
-L(exit_tail7):
-	mov	$7, %eax
-#ifndef USE_AS_WCSCAT
-	RETURN
-
-END (wcslen)
-#endif
diff --git a/libc/arch-x86/string/sse2-wcsrchr-atom.S b/libc/arch-x86/string/sse2-wcsrchr-atom.S
deleted file mode 100644
index 1a55df2..0000000
--- a/libc/arch-x86/string/sse2-wcsrchr-atom.S
+++ /dev/null
@@ -1,402 +0,0 @@
-/*
-Copyright (c) 2011 Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef L
-# define L(label)	.L##label
-#endif
-
-#ifndef cfi_startproc
-# define cfi_startproc	.cfi_startproc
-#endif
-
-#ifndef cfi_endproc
-# define cfi_endproc	.cfi_endproc
-#endif
-
-#ifndef cfi_rel_offset
-# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
-#endif
-
-#ifndef cfi_restore
-# define cfi_restore(reg)	.cfi_restore reg
-#endif
-
-#ifndef cfi_adjust_cfa_offset
-# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
-#endif
-
-#ifndef ENTRY
-# define ENTRY(name)	\
-	.type name, @function;	\
-	.globl name;	\
-	.p2align 4;	\
-name:	\
-	cfi_startproc
-#endif
-
-#ifndef END
-# define END(name)	\
-	cfi_endproc;	\
-	.size name, .-name
-#endif
-
-#define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-#define PUSH(REG)	pushl REG;	CFI_PUSH (REG)
-#define POP(REG)	popl REG;	CFI_POP (REG)
-
-#define PARMS  8
-#define ENTRANCE PUSH(%edi);
-#define RETURN  POP(%edi);	ret;	CFI_PUSH(%edi);
-
-#define STR1  PARMS
-#define STR2  STR1+4
-
-	.text
-ENTRY (wcsrchr)
-
-	ENTRANCE
-	mov	STR1(%esp), %ecx
-	movd	STR2(%esp), %xmm1
-
-	mov	%ecx, %edi
-	punpckldq %xmm1, %xmm1
-	pxor	%xmm2, %xmm2
-	punpckldq %xmm1, %xmm1
-
-/* ECX has OFFSET. */
-	and	$63, %ecx
-	cmp	$48, %ecx
-	ja	L(crosscache)
-
-/* unaligned string. */
-	movdqu	(%edi), %xmm0
-	pcmpeqd	%xmm0, %xmm2
-	pcmpeqd	%xmm1, %xmm0
-/* Find where NULL is.  */
-	pmovmskb %xmm2, %ecx
-/* Check if there is a match.  */
-	pmovmskb %xmm0, %eax
-	add	$16, %edi
-
-	test	%eax, %eax
-	jnz	L(unaligned_match1)
-
-	test	%ecx, %ecx
-	jnz	L(return_null)
-
-	and	$-16, %edi
-
-	PUSH	(%esi)
-
-	xor	%edx, %edx
-	jmp	L(loop)
-
-	CFI_POP	(%esi)
-
-	.p2align 4
-L(unaligned_match1):
-	test	%ecx, %ecx
-	jnz	L(prolog_find_zero_1)
-
-	PUSH	(%esi)
-
-/* Save current match */
-	mov	%eax, %edx
-	mov	%edi, %esi
-	and	$-16, %edi
-	jmp	L(loop)
-
-	CFI_POP	(%esi)
-
-	.p2align 4
-L(crosscache):
-/* Hancle unaligned string.  */
-	and	$15, %ecx
-	and	$-16, %edi
-	pxor	%xmm3, %xmm3
-	movdqa	(%edi), %xmm0
-	pcmpeqd	%xmm0, %xmm3
-	pcmpeqd	%xmm1, %xmm0
-/* Find where NULL is.  */
-	pmovmskb %xmm3, %edx
-/* Check if there is a match.  */
-	pmovmskb %xmm0, %eax
-/* Remove the leading bytes.  */
-	shr	%cl, %edx
-	shr	%cl, %eax
-	add	$16, %edi
-
-	test	%eax, %eax
-	jnz	L(unaligned_match)
-
-	test	%edx, %edx
-	jnz	L(return_null)
-
-	PUSH	(%esi)
-
-	xor	%edx, %edx
-	jmp	L(loop)
-
-	CFI_POP	(%esi)
-
-	.p2align 4
-L(unaligned_match):
-	test	%edx, %edx
-	jnz	L(prolog_find_zero)
-
-	PUSH	(%esi)
-
-	mov	%eax, %edx
-	lea	(%edi, %ecx), %esi
-
-/* Loop start on aligned string.  */
-	.p2align 4
-L(loop):
-	movdqa	(%edi), %xmm0
-	pcmpeqd	%xmm0, %xmm2
-	add	$16, %edi
-	pcmpeqd	%xmm1, %xmm0
-	pmovmskb %xmm2, %ecx
-	pmovmskb %xmm0, %eax
-	or	%eax, %ecx
-	jnz	L(matches)
-
-	movdqa	(%edi), %xmm3
-	pcmpeqd	%xmm3, %xmm2
-	add	$16, %edi
-	pcmpeqd	%xmm1, %xmm3
-	pmovmskb %xmm2, %ecx
-	pmovmskb %xmm3, %eax
-	or	%eax, %ecx
-	jnz	L(matches)
-
-	movdqa	(%edi), %xmm4
-	pcmpeqd	%xmm4, %xmm2
-	add	$16, %edi
-	pcmpeqd	%xmm1, %xmm4
-	pmovmskb %xmm2, %ecx
-	pmovmskb %xmm4, %eax
-	or	%eax, %ecx
-	jnz	L(matches)
-
-	movdqa	(%edi), %xmm5
-	pcmpeqd	%xmm5, %xmm2
-	add	$16, %edi
-	pcmpeqd	%xmm1, %xmm5
-	pmovmskb %xmm2, %ecx
-	pmovmskb %xmm5, %eax
-	or	%eax, %ecx
-	jz	L(loop)
-
-	.p2align 4
-L(matches):
-	test	%eax, %eax
-	jnz	L(match)
-L(return_value):
-	test	%edx, %edx
-	jz	L(return_null_1)
-	mov	%edx, %eax
-	mov	%esi, %edi
-
-	POP	(%esi)
-
-	test	%ah, %ah
-	jnz	L(match_third_or_fourth_wchar)
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%edi), %eax
-	RETURN
-
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(return_null_1):
-	POP	(%esi)
-
-	xor	%eax, %eax
-	RETURN
-
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(match):
-	pmovmskb %xmm2, %ecx
-	test	%ecx, %ecx
-	jnz	L(find_zero)
-/* save match info */
-	mov	%eax, %edx
-	mov	%edi, %esi
-	jmp	L(loop)
-
-	.p2align 4
-L(find_zero):
-	test	%cl, %cl
-	jz	L(find_zero_in_third_or_fourth_wchar)
-	test	$15, %cl
-	jz	L(find_zero_in_second_wchar)
-	and	$1, %eax
-	jz	L(return_value)
-
-	POP	(%esi)
-
-	lea	-16(%edi), %eax
-	RETURN
-
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(find_zero_in_second_wchar):
-	and	$(1 << 5) - 1, %eax
-	jz	L(return_value)
-
-	POP	(%esi)
-
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%edi), %eax
-	RETURN
-
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(find_zero_in_third_or_fourth_wchar):
-	test	$15, %ch
-	jz	L(find_zero_in_fourth_wchar)
-	and	$(1 << 9) - 1, %eax
-	jz	L(return_value)
-
-	POP	(%esi)
-
-	test	%ah, %ah
-	jnz	L(match_third_wchar)
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%edi), %eax
-	RETURN
-
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(find_zero_in_fourth_wchar):
-
-	POP	(%esi)
-
-	test	%ah, %ah
-	jnz	L(match_third_or_fourth_wchar)
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%edi), %eax
-	RETURN
-
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(match_second_wchar):
-	lea	-12(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_third_or_fourth_wchar):
-	test	$15 << 4, %ah
-	jnz	L(match_fourth_wchar)
-	lea	-8(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_third_wchar):
-	lea	-8(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(match_fourth_wchar):
-	lea	-4(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(return_null):
-	xor	%eax, %eax
-	RETURN
-
-	.p2align 4
-L(prolog_find_zero):
-	add	%ecx, %edi
-	mov     %edx, %ecx
-L(prolog_find_zero_1):
-	test	%cl, %cl
-	jz	L(prolog_find_zero_in_third_or_fourth_wchar)
-	test	$15, %cl
-	jz	L(prolog_find_zero_in_second_wchar)
-	and	$1, %eax
-	jz	L(return_null)
-
-	lea	-16(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(prolog_find_zero_in_second_wchar):
-	and	$(1 << 5) - 1, %eax
-	jz	L(return_null)
-
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(prolog_find_zero_in_third_or_fourth_wchar):
-	test	$15, %ch
-	jz	L(prolog_find_zero_in_fourth_wchar)
-	and	$(1 << 9) - 1, %eax
-	jz	L(return_null)
-
-	test	%ah, %ah
-	jnz	L(match_third_wchar)
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%edi), %eax
-	RETURN
-
-	.p2align 4
-L(prolog_find_zero_in_fourth_wchar):
-	test	%ah, %ah
-	jnz	L(match_third_or_fourth_wchar)
-	test	$15 << 4, %al
-	jnz	L(match_second_wchar)
-	lea	-16(%edi), %eax
-	RETURN
-
-END (wcsrchr)
diff --git a/libc/arch-x86/string/sse4-memcmp-slm.S b/libc/arch-x86/string/sse4-memcmp-slm.S
old mode 100755
new mode 100644
diff --git a/libc/arch-x86/string/sse4-wmemcmp-slm.S b/libc/arch-x86/string/sse4-wmemcmp-slm.S
deleted file mode 100755
index 2bf92f5..0000000
--- a/libc/arch-x86/string/sse4-wmemcmp-slm.S
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
-Copyright (c) 2014, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#define USE_AS_WMEMCMP
-#define MEMCMP wmemcmp_sse4
-#include "sse4-memcmp-slm.S"
diff --git a/libc/arch-x86/string/ssse3-memcpy-atom.S b/libc/arch-x86/string/ssse3-memcpy-atom.S
deleted file mode 100644
index 83e1985..0000000
--- a/libc/arch-x86/string/ssse3-memcpy-atom.S
+++ /dev/null
@@ -1,3124 +0,0 @@
-/*
-Copyright (c) 2010, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#define FOR_ATOM
-
-#ifndef MEMCPY
-# define MEMCPY	memcpy_atom
-#endif
-
-#ifndef L
-# define L(label)	.L##label
-#endif
-
-#ifndef cfi_startproc
-# define cfi_startproc	.cfi_startproc
-#endif
-
-#ifndef cfi_endproc
-# define cfi_endproc	.cfi_endproc
-#endif
-
-#ifndef cfi_rel_offset
-# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
-#endif
-
-#ifndef cfi_restore
-# define cfi_restore(reg)	.cfi_restore reg
-#endif
-
-#ifndef cfi_adjust_cfa_offset
-# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
-#endif
-
-#ifndef ENTRY
-# define ENTRY(name)		\
-	.type name,  @function;		\
-	.globl name;		\
-	.p2align 4;		\
-name:		\
-	cfi_startproc
-#endif
-
-#ifndef END
-# define END(name)		\
-	cfi_endproc;		\
-	.size name, .-name
-#endif
-
-#define DEST		PARMS
-#define SRC		DEST+4
-#define LEN		SRC+4
-
-#define CFI_PUSH(REG)		\
-  cfi_adjust_cfa_offset (4);		\
-  cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG)		\
-  cfi_adjust_cfa_offset (-4);		\
-  cfi_restore (REG)
-
-#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
-#define POP(REG)	popl REG; CFI_POP (REG)
-
-#if (defined SHARED || defined __PIC__)
-# define PARMS		8		/* Preserve EBX.  */
-# define ENTRANCE	PUSH (%ebx);
-# define RETURN_END	POP (%ebx); ret
-# define RETURN		RETURN_END; CFI_PUSH (%ebx)
-# define JMPTBL(I, B)	I - B
-
-# define SETUP_PIC_REG(x)	call	__x86.get_pc_thunk.x
-
-/* Load an entry in a jump table into EBX and branch to it.  TABLE is a
-	jump table with relative offsets.  INDEX is a register contains the
-	index into the jump table.   SCALE is the scale of INDEX. */
-
-# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE)		\
-    /* We first load PC into EBX.  */		\
-	SETUP_PIC_REG(bx);		\
-    /* Get the address of the jump table.  */		\
-	addl	$(TABLE - .), %ebx;		\
-    /* Get the entry and convert the relative offset to the		\
-	absolute	address.  */		\
-	addl	(%ebx, INDEX, SCALE), %ebx;		\
-    /* We loaded the jump table.  Go.  */		\
-	jmp	*%ebx
-#else
-
-# define PARMS		4
-# define ENTRANCE
-# define RETURN_END	ret
-# define RETURN		RETURN_END
-# define JMPTBL(I, B)	I
-
-/* Branch to an entry in a jump table.  TABLE is a jump table with
-	absolute offsets.  INDEX is a register contains the index into the
-	jump table.  SCALE is the scale of INDEX. */
-
-# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE)		\
-	jmp	*TABLE(, INDEX, SCALE)
-#endif
-
-	.section .text.ssse3,"ax",@progbits
-ENTRY (MEMCPY)
-	ENTRANCE
-	movl	LEN(%esp), %ecx
-	movl	SRC(%esp), %eax
-	movl	DEST(%esp), %edx
-
-#ifdef USE_AS_MEMMOVE
-	cmp	%eax, %edx
-	jb	L(copy_forward)
-	je	L(fwd_write_0bytes)
-	cmp	$32, %ecx
-	jae	L(memmove_bwd)
-	jmp	L(bk_write_less32bytes_2)
-
-	.p2align 4
-L(memmove_bwd):
-	add	%ecx, %eax
-	cmp	%eax, %edx
-	movl	SRC(%esp), %eax
-	jb	L(copy_backward)
-
-L(copy_forward):
-#endif
-	cmp	$48, %ecx
-	jae	L(48bytesormore)
-
-L(fwd_write_less32bytes):
-#ifndef USE_AS_MEMMOVE
-	cmp	%dl, %al
-	jb	L(bk_write)
-#endif
-	add	%ecx, %edx
-	add	%ecx, %eax
-	BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
-#ifndef USE_AS_MEMMOVE
-	.p2align 4
-L(bk_write):
-	BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4)
-#endif
-
-	.p2align 4
-L(48bytesormore):
-#ifndef USE_AS_MEMMOVE
-	movlpd	(%eax), %xmm0
-	movlpd	8(%eax), %xmm1
-	movlpd	%xmm0, (%edx)
-	movlpd	%xmm1, 8(%edx)
-#else
-	movdqu	(%eax), %xmm0
-#endif
-	PUSH (%edi)
-	movl	%edx, %edi
-	and	$-16, %edx
-	add	$16, %edx
-	sub	%edx, %edi
-	add	%edi, %ecx
-	sub	%edi, %eax
-
-#ifdef SHARED_CACHE_SIZE_HALF
-	cmp	$SHARED_CACHE_SIZE_HALF, %ecx
-#else
-# if (defined SHARED || defined __PIC__)
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_shared_cache_size_half@GOTOFF(%ebx), %ecx
-# else
-	cmp	__x86_shared_cache_size_half, %ecx
-# endif
-#endif
-
-	mov	%eax, %edi
-	jae	L(large_page)
-	and	$0xf, %edi
-	jz	L(shl_0)
-	BRANCH_TO_JMPTBL_ENTRY (L(shl_table), %edi, 4)
-
-	.p2align 4
-L(shl_0):
-#ifdef USE_AS_MEMMOVE
-	movl	DEST+4(%esp), %edi
-	movdqu	%xmm0, (%edi)
-#endif
-	xor	%edi, %edi
-	cmp	$127, %ecx
-	ja	L(shl_0_gobble)
-	lea	-32(%ecx), %ecx
-
-	.p2align 4
-L(shl_0_loop):
-	movdqa	(%eax, %edi), %xmm0
-	movdqa	16(%eax, %edi), %xmm1
-	sub	$32, %ecx
-	movdqa	%xmm0, (%edx, %edi)
-	movdqa	%xmm1, 16(%edx, %edi)
-	lea	32(%edi), %edi
-	jb	L(shl_0_end)
-
-	movdqa	(%eax, %edi), %xmm0
-	movdqa	16(%eax, %edi), %xmm1
-	sub	$32, %ecx
-	movdqa	%xmm0, (%edx, %edi)
-	movdqa	%xmm1, 16(%edx, %edi)
-	lea	32(%edi), %edi
-	jb	L(shl_0_end)
-
-	movdqa	(%eax, %edi), %xmm0
-	movdqa	16(%eax, %edi), %xmm1
-	sub	$32, %ecx
-	movdqa	%xmm0, (%edx, %edi)
-	movdqa	%xmm1, 16(%edx, %edi)
-	lea	32(%edi), %edi
-	jb	L(shl_0_end)
-
-	movdqa	(%eax, %edi), %xmm0
-	movdqa	16(%eax, %edi), %xmm1
-	sub	$32, %ecx
-	movdqa	%xmm0, (%edx, %edi)
-	movdqa	%xmm1, 16(%edx, %edi)
-	lea	32(%edi), %edi
-
-L(shl_0_end):
-	lea	32(%ecx), %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	add	%edi, %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd_align), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(shl_0_gobble):
-#ifdef DATA_CACHE_SIZE_HALF
-	cmp	$DATA_CACHE_SIZE_HALF, %ecx
-#else
-# if (defined SHARED || defined __PIC__)
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
-	cmp	__x86_data_cache_size_half, %ecx
-# endif
-#endif
-	POP	(%edi)
-	lea	-128(%ecx), %ecx
-	jae	L(shl_0_gobble_mem_loop)
-
-	.p2align 4
-L(shl_0_gobble_cache_loop):
-	movdqa	(%eax), %xmm0
-	movdqa	0x10(%eax), %xmm1
-	movdqa	0x20(%eax), %xmm2
-	movdqa	0x30(%eax), %xmm3
-	movdqa	0x40(%eax), %xmm4
-	movdqa	0x50(%eax), %xmm5
-	movdqa	0x60(%eax), %xmm6
-	movdqa	0x70(%eax), %xmm7
-	lea	0x80(%eax), %eax
-	sub	$128, %ecx
-	movdqa	%xmm0, (%edx)
-	movdqa	%xmm1, 0x10(%edx)
-	movdqa	%xmm2, 0x20(%edx)
-	movdqa	%xmm3, 0x30(%edx)
-	movdqa	%xmm4, 0x40(%edx)
-	movdqa	%xmm5, 0x50(%edx)
-	movdqa	%xmm6, 0x60(%edx)
-	movdqa	%xmm7, 0x70(%edx)
-	lea	0x80(%edx), %edx
-
-	jae	L(shl_0_gobble_cache_loop)
-	cmp	$-0x40, %ecx
-	lea	0x80(%ecx), %ecx
-	jl	L(shl_0_cache_less_64bytes)
-
-	movdqa	(%eax), %xmm0
-	sub	$0x40, %ecx
-	movdqa	0x10(%eax), %xmm1
-	movdqa	%xmm0, (%edx)
-	movdqa	%xmm1, 0x10(%edx)
-	movdqa	0x20(%eax), %xmm0
-	movdqa	0x30(%eax), %xmm1
-	add	$0x40, %eax
-	movdqa	%xmm0, 0x20(%edx)
-	movdqa	%xmm1, 0x30(%edx)
-	add	$0x40, %edx
-
-L(shl_0_cache_less_64bytes):
-	cmp	$0x20, %ecx
-	jb	L(shl_0_cache_less_32bytes)
-	movdqa	(%eax), %xmm0
-	sub	$0x20, %ecx
-	movdqa	0x10(%eax), %xmm1
-	add	$0x20, %eax
-	movdqa	%xmm0, (%edx)
-	movdqa	%xmm1, 0x10(%edx)
-	add	$0x20, %edx
-
-L(shl_0_cache_less_32bytes):
-	cmp	$0x10, %ecx
-	jb	L(shl_0_cache_less_16bytes)
-	sub	$0x10, %ecx
-	movdqa	(%eax), %xmm0
-	add	$0x10, %eax
-	movdqa	%xmm0, (%edx)
-	add	$0x10, %edx
-
-L(shl_0_cache_less_16bytes):
-	add	%ecx, %edx
-	add	%ecx, %eax
-	BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
-
-	.p2align 4
-L(shl_0_gobble_mem_loop):
-	prefetcht0 0x1c0(%eax)
-	prefetcht0 0x280(%eax)
-	prefetcht0 0x1c0(%edx)
-
-	movdqa	(%eax), %xmm0
-	movdqa	0x10(%eax), %xmm1
-	movdqa	0x20(%eax), %xmm2
-	movdqa	0x30(%eax), %xmm3
-	movdqa	0x40(%eax), %xmm4
-	movdqa	0x50(%eax), %xmm5
-	movdqa	0x60(%eax), %xmm6
-	movdqa	0x70(%eax), %xmm7
-	lea	0x80(%eax), %eax
-	sub	$0x80, %ecx
-	movdqa	%xmm0, (%edx)
-	movdqa	%xmm1, 0x10(%edx)
-	movdqa	%xmm2, 0x20(%edx)
-	movdqa	%xmm3, 0x30(%edx)
-	movdqa	%xmm4, 0x40(%edx)
-	movdqa	%xmm5, 0x50(%edx)
-	movdqa	%xmm6, 0x60(%edx)
-	movdqa	%xmm7, 0x70(%edx)
-	lea	0x80(%edx), %edx
-
-	jae	L(shl_0_gobble_mem_loop)
-	cmp	$-0x40, %ecx
-	lea	0x80(%ecx), %ecx
-	jl	L(shl_0_mem_less_64bytes)
-
-	movdqa	(%eax), %xmm0
-	sub	$0x40, %ecx
-	movdqa	0x10(%eax), %xmm1
-
-	movdqa	%xmm0, (%edx)
-	movdqa	%xmm1, 0x10(%edx)
-
-	movdqa	0x20(%eax), %xmm0
-	movdqa	0x30(%eax), %xmm1
-	add	$0x40, %eax
-
-	movdqa	%xmm0, 0x20(%edx)
-	movdqa	%xmm1, 0x30(%edx)
-	add	$0x40, %edx
-
-L(shl_0_mem_less_64bytes):
-	cmp	$0x20, %ecx
-	jb	L(shl_0_mem_less_32bytes)
-	movdqa	(%eax), %xmm0
-	sub	$0x20, %ecx
-	movdqa	0x10(%eax), %xmm1
-	add	$0x20, %eax
-	movdqa	%xmm0, (%edx)
-	movdqa	%xmm1, 0x10(%edx)
-	add	$0x20, %edx
-
-L(shl_0_mem_less_32bytes):
-	cmp	$0x10, %ecx
-	jb	L(shl_0_mem_less_16bytes)
-	sub	$0x10, %ecx
-	movdqa	(%eax), %xmm0
-	add	$0x10, %eax
-	movdqa	%xmm0, (%edx)
-	add	$0x10, %edx
-
-L(shl_0_mem_less_16bytes):
-	add	%ecx, %edx
-	add	%ecx, %eax
-	BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd_align), %ecx, 4)
-
-	.p2align 4
-L(shl_1):
-#ifndef USE_AS_MEMMOVE
-	movaps	-1(%eax), %xmm1
-#else
-	movl	DEST+4(%esp), %edi
-	movaps	-1(%eax), %xmm1
-	movdqu	%xmm0, (%edi)
-#endif
-#ifdef DATA_CACHE_SIZE_HALF
-	cmp	$DATA_CACHE_SIZE_HALF, %ecx
-#else
-# if (defined SHARED || defined __PIC__)
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
-	cmp	__x86_data_cache_size_half, %ecx
-# endif
-#endif
-	jb L(sh_1_no_prefetch)
-
-	lea	-64(%ecx), %ecx
-
-	.p2align 4
-L(Shl1LoopStart):
-	prefetcht0 0x1c0(%eax)
-	prefetcht0 0x1c0(%edx)
-	movaps	15(%eax), %xmm2
-	movaps	31(%eax), %xmm3
-	movaps	47(%eax), %xmm4
-	movaps	63(%eax), %xmm5
-	movaps	%xmm5, %xmm7
-	palignr	$1, %xmm4, %xmm5
-	palignr	$1, %xmm3, %xmm4
-	movaps	%xmm5, 48(%edx)
-	palignr	$1, %xmm2, %xmm3
-	lea	64(%eax), %eax
-	palignr	$1, %xmm1, %xmm2
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm7, %xmm1
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	sub	$64, %ecx
-	ja	L(Shl1LoopStart)
-
-L(Shl1LoopLeave):
-	add	$32, %ecx
-	jle	L(shl_end_0)
-
-	movaps	15(%eax), %xmm2
-	movaps	31(%eax), %xmm3
-	palignr	$1, %xmm2, %xmm3
-	palignr	$1, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	%xmm3, 16(%edx)
-	lea	32(%edx, %ecx), %edx
-	lea	32(%eax, %ecx), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(sh_1_no_prefetch):
-	lea	-32(%ecx), %ecx
-	lea	-1(%eax), %eax
-	xor	%edi, %edi
-
-	.p2align 4
-L(sh_1_no_prefetch_loop):
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$1, %xmm2, %xmm3
-	palignr	$1, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jb	L(sh_1_end_no_prefetch_loop)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$1, %xmm2, %xmm3
-	palignr	$1, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jae	L(sh_1_no_prefetch_loop)
-
-L(sh_1_end_no_prefetch_loop):
-	lea	32(%ecx), %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	1(%edi, %eax), %eax
-	POP	(%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(shl_2):
-#ifndef USE_AS_MEMMOVE
-	movaps	-2(%eax), %xmm1
-#else
-	movl	DEST+4(%esp), %edi
-	movaps	-2(%eax), %xmm1
-	movdqu	%xmm0, (%edi)
-#endif
-#ifdef DATA_CACHE_SIZE_HALF
-	cmp	$DATA_CACHE_SIZE_HALF, %ecx
-#else
-# if (defined SHARED || defined __PIC__)
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
-	cmp	__x86_data_cache_size_half, %ecx
-# endif
-#endif
-	jb L(sh_2_no_prefetch)
-
-	lea	-64(%ecx), %ecx
-
-	.p2align 4
-L(Shl2LoopStart):
-	prefetcht0 0x1c0(%eax)
-	prefetcht0 0x1c0(%edx)
-	movaps	14(%eax), %xmm2
-	movaps	30(%eax), %xmm3
-	movaps	46(%eax), %xmm4
-	movaps	62(%eax), %xmm5
-	movaps	%xmm5, %xmm7
-	palignr	$2, %xmm4, %xmm5
-	palignr	$2, %xmm3, %xmm4
-	movaps	%xmm5, 48(%edx)
-	palignr	$2, %xmm2, %xmm3
-	lea	64(%eax), %eax
-	palignr	$2, %xmm1, %xmm2
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm7, %xmm1
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	sub	$64, %ecx
-	ja	L(Shl2LoopStart)
-
-L(Shl2LoopLeave):
-	add	$32, %ecx
-	jle	L(shl_end_0)
-
-	movaps	14(%eax), %xmm2
-	movaps	30(%eax), %xmm3
-	palignr	$2, %xmm2, %xmm3
-	palignr	$2, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	%xmm3, 16(%edx)
-	lea	32(%edx, %ecx), %edx
-	lea	32(%eax, %ecx), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(sh_2_no_prefetch):
-	lea	-32(%ecx), %ecx
-	lea	-2(%eax), %eax
-	xor	%edi, %edi
-
-	.p2align 4
-L(sh_2_no_prefetch_loop):
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$2, %xmm2, %xmm3
-	palignr	$2, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jb	L(sh_2_end_no_prefetch_loop)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$2, %xmm2, %xmm3
-	palignr	$2, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jae	L(sh_2_no_prefetch_loop)
-
-L(sh_2_end_no_prefetch_loop):
-	lea	32(%ecx), %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	2(%edi, %eax), %eax
-	POP	(%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(shl_3):
-#ifndef USE_AS_MEMMOVE
-	movaps	-3(%eax), %xmm1
-#else
-	movl	DEST+4(%esp), %edi
-	movaps	-3(%eax), %xmm1
-	movdqu	%xmm0, (%edi)
-#endif
-#ifdef DATA_CACHE_SIZE_HALF
-	cmp	$DATA_CACHE_SIZE_HALF, %ecx
-#else
-# if (defined SHARED || defined __PIC__)
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
-	cmp	__x86_data_cache_size_half, %ecx
-# endif
-#endif
-	jb L(sh_3_no_prefetch)
-
-	lea	-64(%ecx), %ecx
-
-	.p2align 4
-L(Shl3LoopStart):
-	prefetcht0 0x1c0(%eax)
-	prefetcht0 0x1c0(%edx)
-	movaps	13(%eax), %xmm2
-	movaps	29(%eax), %xmm3
-	movaps	45(%eax), %xmm4
-	movaps	61(%eax), %xmm5
-	movaps	%xmm5, %xmm7
-	palignr	$3, %xmm4, %xmm5
-	palignr	$3, %xmm3, %xmm4
-	movaps	%xmm5, 48(%edx)
-	palignr	$3, %xmm2, %xmm3
-	lea	64(%eax), %eax
-	palignr	$3, %xmm1, %xmm2
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm7, %xmm1
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	sub	$64, %ecx
-	ja	L(Shl3LoopStart)
-
-L(Shl3LoopLeave):
-	add	$32, %ecx
-	jle	L(shl_end_0)
-
-	movaps	13(%eax), %xmm2
-	movaps	29(%eax), %xmm3
-	palignr	$3, %xmm2, %xmm3
-	palignr	$3, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	%xmm3, 16(%edx)
-	lea	32(%edx, %ecx), %edx
-	lea	32(%eax, %ecx), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(sh_3_no_prefetch):
-	lea	-32(%ecx), %ecx
-	lea	-3(%eax), %eax
-	xor	%edi, %edi
-
-	.p2align 4
-L(sh_3_no_prefetch_loop):
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$3, %xmm2, %xmm3
-	palignr	$3, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jb	L(sh_3_end_no_prefetch_loop)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$3, %xmm2, %xmm3
-	palignr	$3, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jae	L(sh_3_no_prefetch_loop)
-
-L(sh_3_end_no_prefetch_loop):
-	lea	32(%ecx), %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	3(%edi, %eax), %eax
-	POP	(%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(shl_4):
-#ifndef USE_AS_MEMMOVE
-	movaps	-4(%eax), %xmm1
-#else
-	movl	DEST+4(%esp), %edi
-	movaps	-4(%eax), %xmm1
-	movdqu	%xmm0, (%edi)
-#endif
-#ifdef DATA_CACHE_SIZE_HALF
-	cmp	$DATA_CACHE_SIZE_HALF, %ecx
-#else
-# if (defined SHARED || defined __PIC__)
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
-	cmp	__x86_data_cache_size_half, %ecx
-# endif
-#endif
-	jb L(sh_4_no_prefetch)
-
-	lea	-64(%ecx), %ecx
-
-	.p2align 4
-L(Shl4LoopStart):
-	prefetcht0 0x1c0(%eax)
-	prefetcht0 0x1c0(%edx)
-	movaps	12(%eax), %xmm2
-	movaps	28(%eax), %xmm3
-	movaps	44(%eax), %xmm4
-	movaps	60(%eax), %xmm5
-	movaps	%xmm5, %xmm7
-	palignr	$4, %xmm4, %xmm5
-	palignr	$4, %xmm3, %xmm4
-	movaps	%xmm5, 48(%edx)
-	palignr	$4, %xmm2, %xmm3
-	lea	64(%eax), %eax
-	palignr	$4, %xmm1, %xmm2
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm7, %xmm1
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	sub	$64, %ecx
-	ja	L(Shl4LoopStart)
-
-L(Shl4LoopLeave):
-	add	$32, %ecx
-	jle	L(shl_end_0)
-
-	movaps	12(%eax), %xmm2
-	movaps	28(%eax), %xmm3
-	palignr	$4, %xmm2, %xmm3
-	palignr	$4, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	%xmm3, 16(%edx)
-	lea	32(%edx, %ecx), %edx
-	lea	32(%eax, %ecx), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(sh_4_no_prefetch):
-	lea	-32(%ecx), %ecx
-	lea	-4(%eax), %eax
-	xor	%edi, %edi
-
-	.p2align 4
-L(sh_4_no_prefetch_loop):
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$4, %xmm2, %xmm3
-	palignr	$4, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jb	L(sh_4_end_no_prefetch_loop)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$4, %xmm2, %xmm3
-	palignr	$4, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jae	L(sh_4_no_prefetch_loop)
-
-L(sh_4_end_no_prefetch_loop):
-	lea	32(%ecx), %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	4(%edi, %eax), %eax
-	POP	(%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(shl_5):
-#ifndef USE_AS_MEMMOVE
-	movaps	-5(%eax), %xmm1
-#else
-	movl	DEST+4(%esp), %edi
-	movaps	-5(%eax), %xmm1
-	movdqu	%xmm0, (%edi)
-#endif
-#ifdef DATA_CACHE_SIZE_HALF
-	cmp	$DATA_CACHE_SIZE_HALF, %ecx
-#else
-# if (defined SHARED || defined __PIC__)
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
-	cmp	__x86_data_cache_size_half, %ecx
-# endif
-#endif
-	jb L(sh_5_no_prefetch)
-
-	lea	-64(%ecx), %ecx
-
-	.p2align 4
-L(Shl5LoopStart):
-	prefetcht0 0x1c0(%eax)
-	prefetcht0 0x1c0(%edx)
-	movaps	11(%eax), %xmm2
-	movaps	27(%eax), %xmm3
-	movaps	43(%eax), %xmm4
-	movaps	59(%eax), %xmm5
-	movaps	%xmm5, %xmm7
-	palignr	$5, %xmm4, %xmm5
-	palignr	$5, %xmm3, %xmm4
-	movaps	%xmm5, 48(%edx)
-	palignr	$5, %xmm2, %xmm3
-	lea	64(%eax), %eax
-	palignr	$5, %xmm1, %xmm2
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm7, %xmm1
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	sub	$64, %ecx
-	ja	L(Shl5LoopStart)
-
-L(Shl5LoopLeave):
-	add	$32, %ecx
-	jle	L(shl_end_0)
-
-	movaps	11(%eax), %xmm2
-	movaps	27(%eax), %xmm3
-	palignr	$5, %xmm2, %xmm3
-	palignr	$5, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	%xmm3, 16(%edx)
-	lea	32(%edx, %ecx), %edx
-	lea	32(%eax, %ecx), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(sh_5_no_prefetch):
-	lea	-32(%ecx), %ecx
-	lea	-5(%eax), %eax
-	xor	%edi, %edi
-
-	.p2align 4
-L(sh_5_no_prefetch_loop):
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$5, %xmm2, %xmm3
-	palignr	$5, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jb	L(sh_5_end_no_prefetch_loop)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$5, %xmm2, %xmm3
-	palignr	$5, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jae	L(sh_5_no_prefetch_loop)
-
-L(sh_5_end_no_prefetch_loop):
-	lea	32(%ecx), %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	5(%edi, %eax), %eax
-	POP	(%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(shl_6):
-#ifndef USE_AS_MEMMOVE
-	movaps	-6(%eax), %xmm1
-#else
-	movl	DEST+4(%esp), %edi
-	movaps	-6(%eax), %xmm1
-	movdqu	%xmm0, (%edi)
-#endif
-#ifdef DATA_CACHE_SIZE_HALF
-	cmp	$DATA_CACHE_SIZE_HALF, %ecx
-#else
-# if (defined SHARED || defined __PIC__)
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
-	cmp	__x86_data_cache_size_half, %ecx
-# endif
-#endif
-	jb L(sh_6_no_prefetch)
-
-	lea	-64(%ecx), %ecx
-
-	.p2align 4
-L(Shl6LoopStart):
-	prefetcht0 0x1c0(%eax)
-	prefetcht0 0x1c0(%edx)
-	movaps	10(%eax), %xmm2
-	movaps	26(%eax), %xmm3
-	movaps	42(%eax), %xmm4
-	movaps	58(%eax), %xmm5
-	movaps	%xmm5, %xmm7
-	palignr	$6, %xmm4, %xmm5
-	palignr	$6, %xmm3, %xmm4
-	movaps	%xmm5, 48(%edx)
-	palignr	$6, %xmm2, %xmm3
-	lea	64(%eax), %eax
-	palignr	$6, %xmm1, %xmm2
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm7, %xmm1
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	sub	$64, %ecx
-	ja	L(Shl6LoopStart)
-
-L(Shl6LoopLeave):
-	add	$32, %ecx
-	jle	L(shl_end_0)
-
-	movaps	10(%eax), %xmm2
-	movaps	26(%eax), %xmm3
-	palignr	$6, %xmm2, %xmm3
-	palignr	$6, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	%xmm3, 16(%edx)
-	lea	32(%edx, %ecx), %edx
-	lea	32(%eax, %ecx), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(sh_6_no_prefetch):
-	lea	-32(%ecx), %ecx
-	lea	-6(%eax), %eax
-	xor	%edi, %edi
-
-	.p2align 4
-L(sh_6_no_prefetch_loop):
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$6, %xmm2, %xmm3
-	palignr	$6, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jb	L(sh_6_end_no_prefetch_loop)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$6, %xmm2, %xmm3
-	palignr	$6, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-
-	jae	L(sh_6_no_prefetch_loop)
-
-L(sh_6_end_no_prefetch_loop):
-	lea	32(%ecx), %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	6(%edi, %eax), %eax
-	POP	(%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(shl_7):
-#ifndef USE_AS_MEMMOVE
-	movaps	-7(%eax), %xmm1
-#else
-	movl	DEST+4(%esp), %edi
-	movaps	-7(%eax), %xmm1
-	movdqu	%xmm0, (%edi)
-#endif
-#ifdef DATA_CACHE_SIZE_HALF
-	cmp	$DATA_CACHE_SIZE_HALF, %ecx
-#else
-# if (defined SHARED || defined __PIC__)
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
-	cmp	__x86_data_cache_size_half, %ecx
-# endif
-#endif
-	jb L(sh_7_no_prefetch)
-
-	lea	-64(%ecx), %ecx
-
-	.p2align 4
-L(Shl7LoopStart):
-	prefetcht0 0x1c0(%eax)
-	prefetcht0 0x1c0(%edx)
-	movaps	9(%eax), %xmm2
-	movaps	25(%eax), %xmm3
-	movaps	41(%eax), %xmm4
-	movaps	57(%eax), %xmm5
-	movaps	%xmm5, %xmm7
-	palignr	$7, %xmm4, %xmm5
-	palignr	$7, %xmm3, %xmm4
-	movaps	%xmm5, 48(%edx)
-	palignr	$7, %xmm2, %xmm3
-	lea	64(%eax), %eax
-	palignr	$7, %xmm1, %xmm2
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm7, %xmm1
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	sub	$64, %ecx
-	ja	L(Shl7LoopStart)
-
-L(Shl7LoopLeave):
-	add	$32, %ecx
-	jle	L(shl_end_0)
-
-	movaps	9(%eax), %xmm2
-	movaps	25(%eax), %xmm3
-	palignr	$7, %xmm2, %xmm3
-	palignr	$7, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	%xmm3, 16(%edx)
-	lea	32(%edx, %ecx), %edx
-	lea	32(%eax, %ecx), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(sh_7_no_prefetch):
-	lea	-32(%ecx), %ecx
-	lea	-7(%eax), %eax
-	xor	%edi, %edi
-
-	.p2align 4
-L(sh_7_no_prefetch_loop):
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$7, %xmm2, %xmm3
-	palignr	$7, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jb	L(sh_7_end_no_prefetch_loop)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$7, %xmm2, %xmm3
-	palignr	$7, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jae	L(sh_7_no_prefetch_loop)
-
-L(sh_7_end_no_prefetch_loop):
-	lea	32(%ecx), %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	7(%edi, %eax), %eax
-	POP	(%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(shl_8):
-#ifndef USE_AS_MEMMOVE
-	movaps	-8(%eax), %xmm1
-#else
-	movl	DEST+4(%esp), %edi
-	movaps	-8(%eax), %xmm1
-	movdqu	%xmm0, (%edi)
-#endif
-#ifdef DATA_CACHE_SIZE_HALF
-	cmp	$DATA_CACHE_SIZE_HALF, %ecx
-#else
-# if (defined SHARED || defined __PIC__)
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
-	cmp	__x86_data_cache_size_half, %ecx
-# endif
-#endif
-	jb L(sh_8_no_prefetch)
-
-	lea	-64(%ecx), %ecx
-
-	.p2align 4
-L(Shl8LoopStart):
-	prefetcht0 0x1c0(%eax)
-	prefetcht0 0x1c0(%edx)
-	movaps	8(%eax), %xmm2
-	movaps	24(%eax), %xmm3
-	movaps	40(%eax), %xmm4
-	movaps	56(%eax), %xmm5
-	movaps	%xmm5, %xmm7
-	palignr	$8, %xmm4, %xmm5
-	palignr	$8, %xmm3, %xmm4
-	movaps	%xmm5, 48(%edx)
-	palignr	$8, %xmm2, %xmm3
-	lea	64(%eax), %eax
-	palignr	$8, %xmm1, %xmm2
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm7, %xmm1
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	sub	$64, %ecx
-	ja	L(Shl8LoopStart)
-
-L(LoopLeave8):
-	add	$32, %ecx
-	jle	L(shl_end_0)
-
-	movaps	8(%eax), %xmm2
-	movaps	24(%eax), %xmm3
-	palignr	$8, %xmm2, %xmm3
-	palignr	$8, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	%xmm3, 16(%edx)
-	lea	32(%edx, %ecx), %edx
-	lea	32(%eax, %ecx), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(sh_8_no_prefetch):
-	lea	-32(%ecx), %ecx
-	lea	-8(%eax), %eax
-	xor	%edi, %edi
-
-	.p2align 4
-L(sh_8_no_prefetch_loop):
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$8, %xmm2, %xmm3
-	palignr	$8, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jb	L(sh_8_end_no_prefetch_loop)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$8, %xmm2, %xmm3
-	palignr	$8, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jae	L(sh_8_no_prefetch_loop)
-
-L(sh_8_end_no_prefetch_loop):
-	lea	32(%ecx), %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	8(%edi, %eax), %eax
-	POP	(%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(shl_9):
-#ifndef USE_AS_MEMMOVE
-	movaps	-9(%eax), %xmm1
-#else
-	movl	DEST+4(%esp), %edi
-	movaps	-9(%eax), %xmm1
-	movdqu	%xmm0, (%edi)
-#endif
-#ifdef DATA_CACHE_SIZE_HALF
-	cmp	$DATA_CACHE_SIZE_HALF, %ecx
-#else
-# if (defined SHARED || defined __PIC__)
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
-	cmp	__x86_data_cache_size_half, %ecx
-# endif
-#endif
-	jb L(sh_9_no_prefetch)
-
-	lea	-64(%ecx), %ecx
-
-	.p2align 4
-L(Shl9LoopStart):
-	prefetcht0 0x1c0(%eax)
-	prefetcht0 0x1c0(%edx)
-	movaps	7(%eax), %xmm2
-	movaps	23(%eax), %xmm3
-	movaps	39(%eax), %xmm4
-	movaps	55(%eax), %xmm5
-	movaps	%xmm5, %xmm7
-	palignr	$9, %xmm4, %xmm5
-	palignr	$9, %xmm3, %xmm4
-	movaps	%xmm5, 48(%edx)
-	palignr	$9, %xmm2, %xmm3
-	lea	64(%eax), %eax
-	palignr	$9, %xmm1, %xmm2
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm7, %xmm1
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	sub	$64, %ecx
-	ja	L(Shl9LoopStart)
-
-L(Shl9LoopLeave):
-	add	$32, %ecx
-	jle	L(shl_end_0)
-
-	movaps	7(%eax), %xmm2
-	movaps	23(%eax), %xmm3
-	palignr	$9, %xmm2, %xmm3
-	palignr	$9, %xmm1, %xmm2
-
-	movaps	%xmm2, (%edx)
-	movaps	%xmm3, 16(%edx)
-	lea	32(%edx, %ecx), %edx
-	lea	32(%eax, %ecx), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(sh_9_no_prefetch):
-	lea	-32(%ecx), %ecx
-	lea	-9(%eax), %eax
-	xor	%edi, %edi
-
-	.p2align 4
-L(sh_9_no_prefetch_loop):
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$9, %xmm2, %xmm3
-	palignr	$9, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jb	L(sh_9_end_no_prefetch_loop)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$9, %xmm2, %xmm3
-	palignr	$9, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jae	L(sh_9_no_prefetch_loop)
-
-L(sh_9_end_no_prefetch_loop):
-	lea	32(%ecx), %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	9(%edi, %eax), %eax
-	POP	(%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(shl_10):
-#ifndef USE_AS_MEMMOVE
-	movaps	-10(%eax), %xmm1
-#else
-	movl	DEST+4(%esp), %edi
-	movaps	-10(%eax), %xmm1
-	movdqu	%xmm0, (%edi)
-#endif
-#ifdef DATA_CACHE_SIZE_HALF
-	cmp	$DATA_CACHE_SIZE_HALF, %ecx
-#else
-# if (defined SHARED || defined __PIC__)
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
-	cmp	__x86_data_cache_size_half, %ecx
-# endif
-#endif
-	jb L(sh_10_no_prefetch)
-
-	lea	-64(%ecx), %ecx
-
-	.p2align 4
-L(Shl10LoopStart):
-	prefetcht0 0x1c0(%eax)
-	prefetcht0 0x1c0(%edx)
-	movaps	6(%eax), %xmm2
-	movaps	22(%eax), %xmm3
-	movaps	38(%eax), %xmm4
-	movaps	54(%eax), %xmm5
-	movaps	%xmm5, %xmm7
-	palignr	$10, %xmm4, %xmm5
-	palignr	$10, %xmm3, %xmm4
-	movaps	%xmm5, 48(%edx)
-	palignr	$10, %xmm2, %xmm3
-	lea	64(%eax), %eax
-	palignr	$10, %xmm1, %xmm2
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm7, %xmm1
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	sub	$64, %ecx
-	ja	L(Shl10LoopStart)
-
-L(Shl10LoopLeave):
-	add	$32, %ecx
-	jle	L(shl_end_0)
-
-	movaps	6(%eax), %xmm2
-	movaps	22(%eax), %xmm3
-	palignr	$10, %xmm2, %xmm3
-	palignr	$10, %xmm1, %xmm2
-
-	movaps	%xmm2, (%edx)
-	movaps	%xmm3, 16(%edx)
-	lea	32(%edx, %ecx), %edx
-	lea	32(%eax, %ecx), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(sh_10_no_prefetch):
-	lea	-32(%ecx), %ecx
-	lea	-10(%eax), %eax
-	xor	%edi, %edi
-
-	.p2align 4
-L(sh_10_no_prefetch_loop):
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$10, %xmm2, %xmm3
-	palignr	$10, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jb	L(sh_10_end_no_prefetch_loop)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$10, %xmm2, %xmm3
-	palignr	$10, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jae	L(sh_10_no_prefetch_loop)
-
-L(sh_10_end_no_prefetch_loop):
-	lea	32(%ecx), %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	10(%edi, %eax), %eax
-	POP	(%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(shl_11):
-#ifndef USE_AS_MEMMOVE
-	movaps	-11(%eax), %xmm1
-#else
-	movl	DEST+4(%esp), %edi
-	movaps	-11(%eax), %xmm1
-	movdqu	%xmm0, (%edi)
-#endif
-#ifdef DATA_CACHE_SIZE_HALF
-	cmp	$DATA_CACHE_SIZE_HALF, %ecx
-#else
-# if (defined SHARED || defined __PIC__)
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
-	cmp	__x86_data_cache_size_half, %ecx
-# endif
-#endif
-	jb L(sh_11_no_prefetch)
-
-	lea	-64(%ecx), %ecx
-
-	.p2align 4
-L(Shl11LoopStart):
-	prefetcht0 0x1c0(%eax)
-	prefetcht0 0x1c0(%edx)
-	movaps	5(%eax), %xmm2
-	movaps	21(%eax), %xmm3
-	movaps	37(%eax), %xmm4
-	movaps	53(%eax), %xmm5
-	movaps	%xmm5, %xmm7
-	palignr	$11, %xmm4, %xmm5
-	palignr	$11, %xmm3, %xmm4
-	movaps	%xmm5, 48(%edx)
-	palignr	$11, %xmm2, %xmm3
-	lea	64(%eax), %eax
-	palignr	$11, %xmm1, %xmm2
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm7, %xmm1
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	sub	$64, %ecx
-	ja	L(Shl11LoopStart)
-
-L(Shl11LoopLeave):
-	add	$32, %ecx
-	jle	L(shl_end_0)
-
-	movaps	5(%eax), %xmm2
-	movaps	21(%eax), %xmm3
-	palignr	$11, %xmm2, %xmm3
-	palignr	$11, %xmm1, %xmm2
-
-	movaps	%xmm2, (%edx)
-	movaps	%xmm3, 16(%edx)
-	lea	32(%edx, %ecx), %edx
-	lea	32(%eax, %ecx), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(sh_11_no_prefetch):
-	lea	-32(%ecx), %ecx
-	lea	-11(%eax), %eax
-	xor	%edi, %edi
-
-	.p2align 4
-L(sh_11_no_prefetch_loop):
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$11, %xmm2, %xmm3
-	palignr	$11, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jb	L(sh_11_end_no_prefetch_loop)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$11, %xmm2, %xmm3
-	palignr	$11, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jae	L(sh_11_no_prefetch_loop)
-
-L(sh_11_end_no_prefetch_loop):
-	lea	32(%ecx), %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	11(%edi, %eax), %eax
-	POP	(%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(shl_12):
-#ifndef USE_AS_MEMMOVE
-	movaps	-12(%eax), %xmm1
-#else
-	movl	DEST+4(%esp), %edi
-	movaps	-12(%eax), %xmm1
-	movdqu	%xmm0, (%edi)
-#endif
-#ifdef DATA_CACHE_SIZE_HALF
-	cmp	$DATA_CACHE_SIZE_HALF, %ecx
-#else
-# if (defined SHARED || defined __PIC__)
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
-	cmp	__x86_data_cache_size_half, %ecx
-# endif
-#endif
-	jb L(sh_12_no_prefetch)
-
-	lea	-64(%ecx), %ecx
-
-	.p2align 4
-L(Shl12LoopStart):
-	prefetcht0 0x1c0(%eax)
-	prefetcht0 0x1c0(%edx)
-	movaps	4(%eax), %xmm2
-	movaps	20(%eax), %xmm3
-	movaps	36(%eax), %xmm4
-	movaps	52(%eax), %xmm5
-	movaps	%xmm5, %xmm7
-	palignr	$12, %xmm4, %xmm5
-	palignr	$12, %xmm3, %xmm4
-	movaps	%xmm5, 48(%edx)
-	palignr	$12, %xmm2, %xmm3
-	lea	64(%eax), %eax
-	palignr	$12, %xmm1, %xmm2
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm7, %xmm1
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	sub	$64, %ecx
-	ja	L(Shl12LoopStart)
-
-L(Shl12LoopLeave):
-	add	$32, %ecx
-	jle	L(shl_end_0)
-
-	movaps	4(%eax), %xmm2
-	movaps	20(%eax), %xmm3
-	palignr	$12, %xmm2, %xmm3
-	palignr	$12, %xmm1, %xmm2
-
-	movaps	%xmm2, (%edx)
-	movaps	%xmm3, 16(%edx)
-	lea	32(%edx, %ecx), %edx
-	lea	32(%eax, %ecx), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(sh_12_no_prefetch):
-	lea	-32(%ecx), %ecx
-	lea	-12(%eax), %eax
-	xor	%edi, %edi
-
-	.p2align 4
-L(sh_12_no_prefetch_loop):
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$12, %xmm2, %xmm3
-	palignr	$12, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jb	L(sh_12_end_no_prefetch_loop)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$12, %xmm2, %xmm3
-	palignr	$12, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jae	L(sh_12_no_prefetch_loop)
-
-L(sh_12_end_no_prefetch_loop):
-	lea	32(%ecx), %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	12(%edi, %eax), %eax
-	POP	(%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(shl_13):
-#ifndef USE_AS_MEMMOVE
-	movaps	-13(%eax), %xmm1
-#else
-	movl	DEST+4(%esp), %edi
-	movaps	-13(%eax), %xmm1
-	movdqu	%xmm0, (%edi)
-#endif
-#ifdef DATA_CACHE_SIZE_HALF
-	cmp	$DATA_CACHE_SIZE_HALF, %ecx
-#else
-# if (defined SHARED || defined __PIC__)
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
-	cmp	__x86_data_cache_size_half, %ecx
-# endif
-#endif
-	jb L(sh_13_no_prefetch)
-
-	lea	-64(%ecx), %ecx
-
-	.p2align 4
-L(Shl13LoopStart):
-	prefetcht0 0x1c0(%eax)
-	prefetcht0 0x1c0(%edx)
-	movaps	3(%eax), %xmm2
-	movaps	19(%eax), %xmm3
-	movaps	35(%eax), %xmm4
-	movaps	51(%eax), %xmm5
-	movaps	%xmm5, %xmm7
-	palignr	$13, %xmm4, %xmm5
-	palignr	$13, %xmm3, %xmm4
-	movaps	%xmm5, 48(%edx)
-	palignr	$13, %xmm2, %xmm3
-	lea	64(%eax), %eax
-	palignr	$13, %xmm1, %xmm2
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm7, %xmm1
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	sub	$64, %ecx
-	ja	L(Shl13LoopStart)
-
-L(Shl13LoopLeave):
-	add	$32, %ecx
-	jle	L(shl_end_0)
-
-	movaps	3(%eax), %xmm2
-	movaps	19(%eax), %xmm3
-	palignr	$13, %xmm2, %xmm3
-	palignr	$13, %xmm1, %xmm2
-
-	movaps	%xmm2, (%edx)
-	movaps	%xmm3, 16(%edx)
-	lea	32(%edx, %ecx), %edx
-	lea	32(%eax, %ecx), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(sh_13_no_prefetch):
-	lea	-32(%ecx), %ecx
-	lea	-13(%eax), %eax
-	xor	%edi, %edi
-
-	.p2align 4
-L(sh_13_no_prefetch_loop):
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$13, %xmm2, %xmm3
-	palignr	$13, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jb	L(sh_13_end_no_prefetch_loop)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$13, %xmm2, %xmm3
-	palignr	$13, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jae	L(sh_13_no_prefetch_loop)
-
-L(sh_13_end_no_prefetch_loop):
-	lea	32(%ecx), %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	13(%edi, %eax), %eax
-	POP	(%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(shl_14):
-#ifndef USE_AS_MEMMOVE
-	movaps	-14(%eax), %xmm1
-#else
-	movl	DEST+4(%esp), %edi
-	movaps	-14(%eax), %xmm1
-	movdqu	%xmm0, (%edi)
-#endif
-#ifdef DATA_CACHE_SIZE_HALF
-	cmp	$DATA_CACHE_SIZE_HALF, %ecx
-#else
-# if (defined SHARED || defined __PIC__)
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
-	cmp	__x86_data_cache_size_half, %ecx
-# endif
-#endif
-	jb L(sh_14_no_prefetch)
-
-	lea	-64(%ecx), %ecx
-
-	.p2align 4
-L(Shl14LoopStart):
-	prefetcht0 0x1c0(%eax)
-	prefetcht0 0x1c0(%edx)
-	movaps	2(%eax), %xmm2
-	movaps	18(%eax), %xmm3
-	movaps	34(%eax), %xmm4
-	movaps	50(%eax), %xmm5
-	movaps	%xmm5, %xmm7
-	palignr	$14, %xmm4, %xmm5
-	palignr	$14, %xmm3, %xmm4
-	movaps	%xmm5, 48(%edx)
-	palignr	$14, %xmm2, %xmm3
-	lea	64(%eax), %eax
-	palignr	$14, %xmm1, %xmm2
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm7, %xmm1
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	sub	$64, %ecx
-	ja	L(Shl14LoopStart)
-
-L(Shl14LoopLeave):
-	add	$32, %ecx
-	jle	L(shl_end_0)
-
-	movaps	2(%eax), %xmm2
-	movaps	18(%eax), %xmm3
-	palignr	$14, %xmm2, %xmm3
-	palignr	$14, %xmm1, %xmm2
-
-	movaps	%xmm2, (%edx)
-	movaps	%xmm3, 16(%edx)
-	lea	32(%edx, %ecx), %edx
-	lea	32(%eax, %ecx), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(sh_14_no_prefetch):
-	lea	-32(%ecx), %ecx
-	lea	-14(%eax), %eax
-	xor	%edi, %edi
-
-	.p2align 4
-L(sh_14_no_prefetch_loop):
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$14, %xmm2, %xmm3
-	palignr	$14, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jb	L(sh_14_end_no_prefetch_loop)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$14, %xmm2, %xmm3
-	palignr	$14, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jae	L(sh_14_no_prefetch_loop)
-
-L(sh_14_end_no_prefetch_loop):
-	lea	32(%ecx), %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	14(%edi, %eax), %eax
-	POP	(%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(shl_15):
-#ifndef USE_AS_MEMMOVE
-	movaps	-15(%eax), %xmm1
-#else
-	movl	DEST+4(%esp), %edi
-	movaps	-15(%eax), %xmm1
-	movdqu	%xmm0, (%edi)
-#endif
-#ifdef DATA_CACHE_SIZE_HALF
-	cmp	$DATA_CACHE_SIZE_HALF, %ecx
-#else
-# if (defined SHARED || defined __PIC__)
-	SETUP_PIC_REG(bx)
-	add	$_GLOBAL_OFFSET_TABLE_, %ebx
-	cmp	__x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
-	cmp	__x86_data_cache_size_half, %ecx
-# endif
-#endif
-	jb L(sh_15_no_prefetch)
-
-	lea	-64(%ecx), %ecx
-
-	.p2align 4
-L(Shl15LoopStart):
-	prefetcht0 0x1c0(%eax)
-	prefetcht0 0x1c0(%edx)
-	movaps	1(%eax), %xmm2
-	movaps	17(%eax), %xmm3
-	movaps	33(%eax), %xmm4
-	movaps	49(%eax), %xmm5
-	movaps	%xmm5, %xmm7
-	palignr	$15, %xmm4, %xmm5
-	palignr	$15, %xmm3, %xmm4
-	movaps	%xmm5, 48(%edx)
-	palignr	$15, %xmm2, %xmm3
-	lea	64(%eax), %eax
-	palignr	$15, %xmm1, %xmm2
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm7, %xmm1
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	sub	$64, %ecx
-	ja	L(Shl15LoopStart)
-
-L(Shl15LoopLeave):
-	add	$32, %ecx
-	jle	L(shl_end_0)
-
-	movaps	1(%eax), %xmm2
-	movaps	17(%eax), %xmm3
-	palignr	$15, %xmm2, %xmm3
-	palignr	$15, %xmm1, %xmm2
-
-	movaps	%xmm2, (%edx)
-	movaps	%xmm3, 16(%edx)
-	lea	32(%edx, %ecx), %edx
-	lea	32(%eax, %ecx), %eax
-	POP (%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(sh_15_no_prefetch):
-	lea	-32(%ecx), %ecx
-	lea	-15(%eax), %eax
-	xor	%edi, %edi
-
-	.p2align 4
-L(sh_15_no_prefetch_loop):
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm4
-	palignr	$15, %xmm2, %xmm3
-	palignr	$15, %xmm1, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jb	L(sh_15_end_no_prefetch_loop)
-
-	movdqa	16(%eax, %edi), %xmm2
-	sub	$32, %ecx
-	movdqa	32(%eax, %edi), %xmm3
-	movdqa	%xmm3, %xmm1
-	palignr	$15, %xmm2, %xmm3
-	palignr	$15, %xmm4, %xmm2
-	lea	32(%edi), %edi
-	movdqa	%xmm2, -32(%edx, %edi)
-	movdqa	%xmm3, -16(%edx, %edi)
-	jae	L(sh_15_no_prefetch_loop)
-
-L(sh_15_end_no_prefetch_loop):
-	lea	32(%ecx), %ecx
-	add	%ecx, %edi
-	add	%edi, %edx
-	lea	15(%edi, %eax), %eax
-	POP	(%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(shl_end_0):
-	lea	32(%ecx), %ecx
-	lea	(%edx, %ecx), %edx
-	lea	(%eax, %ecx), %eax
-	POP	(%edi)
-	BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
-	.p2align 4
-L(fwd_write_44bytes):
-	movq	-44(%eax), %xmm0
-	movq	%xmm0, -44(%edx)
-L(fwd_write_36bytes):
-	movq	-36(%eax), %xmm0
-	movq	%xmm0, -36(%edx)
-L(fwd_write_28bytes):
-	movq	-28(%eax), %xmm0
-	movq	%xmm0, -28(%edx)
-L(fwd_write_20bytes):
-	movq	-20(%eax), %xmm0
-	movq	%xmm0, -20(%edx)
-L(fwd_write_12bytes):
-	movq	-12(%eax), %xmm0
-	movq	%xmm0, -12(%edx)
-L(fwd_write_4bytes):
-	movl	-4(%eax), %ecx
-	movl	%ecx, -4(%edx)
-#ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#else
-	movl	DEST(%esp), %eax
-#endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_40bytes):
-	movq	-40(%eax), %xmm0
-	movq	%xmm0, -40(%edx)
-L(fwd_write_32bytes):
-	movq	-32(%eax), %xmm0
-	movq	%xmm0, -32(%edx)
-L(fwd_write_24bytes):
-	movq	-24(%eax), %xmm0
-	movq	%xmm0, -24(%edx)
-L(fwd_write_16bytes):
-	movq	-16(%eax), %xmm0
-	movq	%xmm0, -16(%edx)
-L(fwd_write_8bytes):
-	movq	-8(%eax), %xmm0
-	movq	%xmm0, -8(%edx)
-L(fwd_write_0bytes):
-#ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#else
-	movl	DEST(%esp), %eax
-#endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_5bytes):
-	movl	-5(%eax), %ecx
-	movl	-4(%eax), %eax
-	movl	%ecx, -5(%edx)
-	movl	%eax, -4(%edx)
-#ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#else
-	movl	DEST(%esp), %eax
-#endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_45bytes):
-	movq	-45(%eax), %xmm0
-	movq	%xmm0, -45(%edx)
-L(fwd_write_37bytes):
-	movq	-37(%eax), %xmm0
-	movq	%xmm0, -37(%edx)
-L(fwd_write_29bytes):
-	movq	-29(%eax), %xmm0
-	movq	%xmm0, -29(%edx)
-L(fwd_write_21bytes):
-	movq	-21(%eax), %xmm0
-	movq	%xmm0, -21(%edx)
-L(fwd_write_13bytes):
-	movq	-13(%eax), %xmm0
-	movq	%xmm0, -13(%edx)
-	movl	-5(%eax), %ecx
-	movl	%ecx, -5(%edx)
-	movzbl	-1(%eax), %ecx
-	movb	%cl, -1(%edx)
-#ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#else
-	movl	DEST(%esp), %eax
-#endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_41bytes):
-	movq	-41(%eax), %xmm0
-	movq	%xmm0, -41(%edx)
-L(fwd_write_33bytes):
-	movq	-33(%eax), %xmm0
-	movq	%xmm0, -33(%edx)
-L(fwd_write_25bytes):
-	movq	-25(%eax), %xmm0
-	movq	%xmm0, -25(%edx)
-L(fwd_write_17bytes):
-	movq	-17(%eax), %xmm0
-	movq	%xmm0, -17(%edx)
-L(fwd_write_9bytes):
-	movq	-9(%eax), %xmm0
-	movq	%xmm0, -9(%edx)
-L(fwd_write_1bytes):
-	movzbl	-1(%eax), %ecx
-	movb	%cl, -1(%edx)
-#ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#else
-	movl	DEST(%esp), %eax
-#endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_46bytes):
-	movq	-46(%eax), %xmm0
-	movq	%xmm0, -46(%edx)
-L(fwd_write_38bytes):
-	movq	-38(%eax), %xmm0
-	movq	%xmm0, -38(%edx)
-L(fwd_write_30bytes):
-	movq	-30(%eax), %xmm0
-	movq	%xmm0, -30(%edx)
-L(fwd_write_22bytes):
-	movq	-22(%eax), %xmm0
-	movq	%xmm0, -22(%edx)
-L(fwd_write_14bytes):
-	movq	-14(%eax), %xmm0
-	movq	%xmm0, -14(%edx)
-L(fwd_write_6bytes):
-	movl	-6(%eax), %ecx
-	movl	%ecx, -6(%edx)
-	movzwl	-2(%eax), %ecx
-	movw	%cx, -2(%edx)
-#ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#else
-	movl	DEST(%esp), %eax
-#endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_42bytes):
-	movq	-42(%eax), %xmm0
-	movq	%xmm0, -42(%edx)
-L(fwd_write_34bytes):
-	movq	-34(%eax), %xmm0
-	movq	%xmm0, -34(%edx)
-L(fwd_write_26bytes):
-	movq	-26(%eax), %xmm0
-	movq	%xmm0, -26(%edx)
-L(fwd_write_18bytes):
-	movq	-18(%eax), %xmm0
-	movq	%xmm0, -18(%edx)
-L(fwd_write_10bytes):
-	movq	-10(%eax), %xmm0
-	movq	%xmm0, -10(%edx)
-L(fwd_write_2bytes):
-	movzwl	-2(%eax), %ecx
-	movw	%cx, -2(%edx)
-#ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#else
-	movl	DEST(%esp), %eax
-#endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_47bytes):
-	movq	-47(%eax), %xmm0
-	movq	%xmm0, -47(%edx)
-L(fwd_write_39bytes):
-	movq	-39(%eax), %xmm0
-	movq	%xmm0, -39(%edx)
-L(fwd_write_31bytes):
-	movq	-31(%eax), %xmm0
-	movq	%xmm0, -31(%edx)
-L(fwd_write_23bytes):
-	movq	-23(%eax), %xmm0
-	movq	%xmm0, -23(%edx)
-L(fwd_write_15bytes):
-	movq	-15(%eax), %xmm0
-	movq	%xmm0, -15(%edx)
-L(fwd_write_7bytes):
-	movl	-7(%eax), %ecx
-	movl	%ecx, -7(%edx)
-	movzwl	-3(%eax), %ecx
-	movzbl	-1(%eax), %eax
-	movw	%cx, -3(%edx)
-	movb	%al, -1(%edx)
-#ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#else
-	movl	DEST(%esp), %eax
-#endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_43bytes):
-	movq	-43(%eax), %xmm0
-	movq	%xmm0, -43(%edx)
-L(fwd_write_35bytes):
-	movq	-35(%eax), %xmm0
-	movq	%xmm0, -35(%edx)
-L(fwd_write_27bytes):
-	movq	-27(%eax), %xmm0
-	movq	%xmm0, -27(%edx)
-L(fwd_write_19bytes):
-	movq	-19(%eax), %xmm0
-	movq	%xmm0, -19(%edx)
-L(fwd_write_11bytes):
-	movq	-11(%eax), %xmm0
-	movq	%xmm0, -11(%edx)
-L(fwd_write_3bytes):
-	movzwl	-3(%eax), %ecx
-	movzbl	-1(%eax), %eax
-	movw	%cx, -3(%edx)
-	movb	%al, -1(%edx)
-#ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#else
-	movl	DEST(%esp), %eax
-#endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_40bytes_align):
-	movdqa	-40(%eax), %xmm0
-	movdqa	%xmm0, -40(%edx)
-L(fwd_write_24bytes_align):
-	movdqa	-24(%eax), %xmm0
-	movdqa	%xmm0, -24(%edx)
-L(fwd_write_8bytes_align):
-	movq	-8(%eax), %xmm0
-	movq	%xmm0, -8(%edx)
-L(fwd_write_0bytes_align):
-#ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#else
-	movl	DEST(%esp), %eax
-#endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_32bytes_align):
-	movdqa	-32(%eax), %xmm0
-	movdqa	%xmm0, -32(%edx)
-L(fwd_write_16bytes_align):
-	movdqa	-16(%eax), %xmm0
-	movdqa	%xmm0, -16(%edx)
-#ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#else
-	movl	DEST(%esp), %eax
-#endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_5bytes_align):
-	movl	-5(%eax), %ecx
-	movl	-4(%eax), %eax
-	movl	%ecx, -5(%edx)
-	movl	%eax, -4(%edx)
-#ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#else
-	movl	DEST(%esp), %eax
-#endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_45bytes_align):
-	movdqa	-45(%eax), %xmm0
-	movdqa	%xmm0, -45(%edx)
-L(fwd_write_29bytes_align):
-	movdqa	-29(%eax), %xmm0
-	movdqa	%xmm0, -29(%edx)
-L(fwd_write_13bytes_align):
-	movq	-13(%eax), %xmm0
-	movq	%xmm0, -13(%edx)
-	movl	-5(%eax), %ecx
-	movl	%ecx, -5(%edx)
-	movzbl	-1(%eax), %ecx
-	movb	%cl, -1(%edx)
-#ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#else
-	movl	DEST(%esp), %eax
-#endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_37bytes_align):
-	movdqa	-37(%eax), %xmm0
-	movdqa	%xmm0, -37(%edx)
-L(fwd_write_21bytes_align):
-	movdqa	-21(%eax), %xmm0
-	movdqa	%xmm0, -21(%edx)
-	movl	-5(%eax), %ecx
-	movl	%ecx, -5(%edx)
-	movzbl	-1(%eax), %ecx
-	movb	%cl, -1(%edx)
-#ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#else
-	movl	DEST(%esp), %eax
-#endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_41bytes_align):
-	movdqa	-41(%eax), %xmm0
-	movdqa	%xmm0, -41(%edx)
-L(fwd_write_25bytes_align):
-	movdqa	-25(%eax), %xmm0
-	movdqa	%xmm0, -25(%edx)
-L(fwd_write_9bytes_align):
-	movq	-9(%eax), %xmm0
-	movq	%xmm0, -9(%edx)
-L(fwd_write_1bytes_align):
-	movzbl	-1(%eax), %ecx
-	movb	%cl, -1(%edx)
-#ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#else
-	movl	DEST(%esp), %eax
-#endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_33bytes_align):
-	movdqa	-33(%eax), %xmm0
-	movdqa	%xmm0, -33(%edx)
-L(fwd_write_17bytes_align):
-	movdqa	-17(%eax), %xmm0
-	movdqa	%xmm0, -17(%edx)
-	movzbl	-1(%eax), %ecx
-	movb	%cl, -1(%edx)
-#ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#else
-	movl	DEST(%esp), %eax
-#endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_46bytes_align):
-	movdqa	-46(%eax), %xmm0
-	movdqa	%xmm0, -46(%edx)
-L(fwd_write_30bytes_align):
-	movdqa	-30(%eax), %xmm0
-	movdqa	%xmm0, -30(%edx)
-L(fwd_write_14bytes_align):
-	movq	-14(%eax), %xmm0
-	movq	%xmm0, -14(%edx)
-L(fwd_write_6bytes_align):
-	movl	-6(%eax), %ecx
-	movl	%ecx, -6(%edx)
-	movzwl	-2(%eax), %ecx
-	movw	%cx, -2(%edx)
-#ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#else
-	movl	DEST(%esp), %eax
-#endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_38bytes_align):
-	movdqa	-38(%eax), %xmm0
-	movdqa	%xmm0, -38(%edx)
-L(fwd_write_22bytes_align):
-	movdqa	-22(%eax), %xmm0
-	movdqa	%xmm0, -22(%edx)
-	movl	-6(%eax), %ecx
-	movl	%ecx, -6(%edx)
-	movzwl	-2(%eax), %ecx
-	movw	%cx, -2(%edx)
-#ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#else
-	movl	DEST(%esp), %eax
-#endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_42bytes_align):
-	movdqa	-42(%eax), %xmm0
-	movdqa	%xmm0, -42(%edx)
-L(fwd_write_26bytes_align):
-	movdqa	-26(%eax), %xmm0
-	movdqa	%xmm0, -26(%edx)
-L(fwd_write_10bytes_align):
-	movq	-10(%eax), %xmm0
-	movq	%xmm0, -10(%edx)
-L(fwd_write_2bytes_align):
-	movzwl	-2(%eax), %ecx
-	movw	%cx, -2(%edx)
-#ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#else
-	movl	DEST(%esp), %eax
-#endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_34bytes_align):
-	movdqa	-34(%eax), %xmm0
-	movdqa	%xmm0, -34(%edx)
-L(fwd_write_18bytes_align):
-	movdqa	-18(%eax), %xmm0
-	movdqa	%xmm0, -18(%edx)
-	movzwl	-2(%eax), %ecx
-	movw	%cx, -2(%edx)
-#ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#else
-	movl	DEST(%esp), %eax
-#endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_47bytes_align):
-	movdqa	-47(%eax), %xmm0
-	movdqa	%xmm0, -47(%edx)
-L(fwd_write_31bytes_align):
-	movdqa	-31(%eax), %xmm0
-	movdqa	%xmm0, -31(%edx)
-L(fwd_write_15bytes_align):
-	movq	-15(%eax), %xmm0
-	movq	%xmm0, -15(%edx)
-L(fwd_write_7bytes_align):
-	movl	-7(%eax), %ecx
-	movl	%ecx, -7(%edx)
-	movzwl	-3(%eax), %ecx
-	movzbl	-1(%eax), %eax
-	movw	%cx, -3(%edx)
-	movb	%al, -1(%edx)
-#ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#else
-	movl	DEST(%esp), %eax
-#endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_39bytes_align):
-	movdqa	-39(%eax), %xmm0
-	movdqa	%xmm0, -39(%edx)
-L(fwd_write_23bytes_align):
-	movdqa	-23(%eax), %xmm0
-	movdqa	%xmm0, -23(%edx)
-	movl	-7(%eax), %ecx
-	movl	%ecx, -7(%edx)
-	movzwl	-3(%eax), %ecx
-	movzbl	-1(%eax), %eax
-	movw	%cx, -3(%edx)
-	movb	%al, -1(%edx)
-#ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#else
-	movl	DEST(%esp), %eax
-#endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_43bytes_align):
-	movdqa	-43(%eax), %xmm0
-	movdqa	%xmm0, -43(%edx)
-L(fwd_write_27bytes_align):
-	movdqa	-27(%eax), %xmm0
-	movdqa	%xmm0, -27(%edx)
-L(fwd_write_11bytes_align):
-	movq	-11(%eax), %xmm0
-	movq	%xmm0, -11(%edx)
-L(fwd_write_3bytes_align):
-	movzwl	-3(%eax), %ecx
-	movzbl	-1(%eax), %eax
-	movw	%cx, -3(%edx)
-	movb	%al, -1(%edx)
-#ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#else
-	movl	DEST(%esp), %eax
-#endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_35bytes_align):
-	movdqa	-35(%eax), %xmm0
-	movdqa	%xmm0, -35(%edx)
-L(fwd_write_19bytes_align):
-	movdqa	-19(%eax), %xmm0
-	movdqa	%xmm0, -19(%edx)
-	movzwl	-3(%eax), %ecx
-	movzbl	-1(%eax), %eax
-	movw	%cx, -3(%edx)
-	movb	%al, -1(%edx)
-#ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#else
-	movl	DEST(%esp), %eax
-#endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_44bytes_align):
-	movdqa	-44(%eax), %xmm0
-	movdqa	%xmm0, -44(%edx)
-L(fwd_write_28bytes_align):
-	movdqa	-28(%eax), %xmm0
-	movdqa	%xmm0, -28(%edx)
-L(fwd_write_12bytes_align):
-	movq	-12(%eax), %xmm0
-	movq	%xmm0, -12(%edx)
-L(fwd_write_4bytes_align):
-	movl	-4(%eax), %ecx
-	movl	%ecx, -4(%edx)
-#ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#else
-	movl	DEST(%esp), %eax
-#endif
-	RETURN
-
-	.p2align 4
-L(fwd_write_36bytes_align):
-	movdqa	-36(%eax), %xmm0
-	movdqa	%xmm0, -36(%edx)
-L(fwd_write_20bytes_align):
-	movdqa	-20(%eax), %xmm0
-	movdqa	%xmm0, -20(%edx)
-	movl	-4(%eax), %ecx
-	movl	%ecx, -4(%edx)
-#ifdef USE_AS_MEMPCPY
-	movl	%edx, %eax
-#else
-	movl	DEST(%esp), %eax
-#endif
-	RETURN_END
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(large_page):
-	movdqu	(%eax), %xmm1
-#ifdef USE_AS_MEMMOVE
-	movl	DEST+4(%esp), %edi
-	movdqu	%xmm0, (%edi)
-#endif
-	lea	16(%eax), %eax
-	movntdq	%xmm1, (%edx)
-	lea	16(%edx), %edx
-	lea	-0x90(%ecx), %ecx
-	POP (%edi)
-
-	.p2align 4
-L(large_page_loop):
-	movdqu	(%eax), %xmm0
-	movdqu	0x10(%eax), %xmm1
-	movdqu	0x20(%eax), %xmm2
-	movdqu	0x30(%eax), %xmm3
-	movdqu	0x40(%eax), %xmm4
-	movdqu	0x50(%eax), %xmm5
-	movdqu	0x60(%eax), %xmm6
-	movdqu	0x70(%eax), %xmm7
-	lea	0x80(%eax), %eax
-
-	sub	$0x80, %ecx
-	movntdq	%xmm0, (%edx)
-	movntdq	%xmm1, 0x10(%edx)
-	movntdq	%xmm2, 0x20(%edx)
-	movntdq	%xmm3, 0x30(%edx)
-	movntdq	%xmm4, 0x40(%edx)
-	movntdq	%xmm5, 0x50(%edx)
-	movntdq	%xmm6, 0x60(%edx)
-	movntdq	%xmm7, 0x70(%edx)
-	lea	0x80(%edx), %edx
-	jae	L(large_page_loop)
-	cmp	$-0x40, %ecx
-	lea	0x80(%ecx), %ecx
-	jl	L(large_page_less_64bytes)
-
-	movdqu	(%eax), %xmm0
-	movdqu	0x10(%eax), %xmm1
-	movdqu	0x20(%eax), %xmm2
-	movdqu	0x30(%eax), %xmm3
-	lea	0x40(%eax), %eax
-
-	movntdq	%xmm0, (%edx)
-	movntdq	%xmm1, 0x10(%edx)
-	movntdq	%xmm2, 0x20(%edx)
-	movntdq	%xmm3, 0x30(%edx)
-	lea	0x40(%edx), %edx
-	sub	$0x40, %ecx
-L(large_page_less_64bytes):
-	cmp	$32, %ecx
-	jb	L(large_page_less_32bytes)
-	movdqu	(%eax), %xmm0
-	movdqu	0x10(%eax), %xmm1
-	lea	0x20(%eax), %eax
-	movntdq	%xmm0, (%edx)
-	movntdq	%xmm1, 0x10(%edx)
-	lea	0x20(%edx), %edx
-	sub	$0x20, %ecx
-L(large_page_less_32bytes):
-	add	%ecx, %edx
-	add	%ecx, %eax
-	sfence
-	BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
-
-	.p2align 4
-L(bk_write_44bytes):
-	movq	36(%eax), %xmm0
-	movq	%xmm0, 36(%edx)
-L(bk_write_36bytes):
-	movq	28(%eax), %xmm0
-	movq	%xmm0, 28(%edx)
-L(bk_write_28bytes):
-	movq	20(%eax), %xmm0
-	movq	%xmm0, 20(%edx)
-L(bk_write_20bytes):
-	movq	12(%eax), %xmm0
-	movq	%xmm0, 12(%edx)
-L(bk_write_12bytes):
-	movq	4(%eax), %xmm0
-	movq	%xmm0, 4(%edx)
-L(bk_write_4bytes):
-	movl	(%eax), %ecx
-	movl	%ecx, (%edx)
-L(bk_write_0bytes):
-	movl	DEST(%esp), %eax
-#ifdef USE_AS_MEMPCPY
-	movl	LEN(%esp), %ecx
-	add	%ecx, %eax
-#endif
-	RETURN
-
-	.p2align 4
-L(bk_write_40bytes):
-	movq	32(%eax), %xmm0
-	movq	%xmm0, 32(%edx)
-L(bk_write_32bytes):
-	movq	24(%eax), %xmm0
-	movq	%xmm0, 24(%edx)
-L(bk_write_24bytes):
-	movq	16(%eax), %xmm0
-	movq	%xmm0, 16(%edx)
-L(bk_write_16bytes):
-	movq	8(%eax), %xmm0
-	movq	%xmm0, 8(%edx)
-L(bk_write_8bytes):
-	movq	(%eax), %xmm0
-	movq	%xmm0, (%edx)
-	movl	DEST(%esp), %eax
-#ifdef USE_AS_MEMPCPY
-	movl	LEN(%esp), %ecx
-	add	%ecx, %eax
-#endif
-	RETURN
-
-	.p2align 4
-L(bk_write_45bytes):
-	movq	37(%eax), %xmm0
-	movq	%xmm0, 37(%edx)
-L(bk_write_37bytes):
-	movq	29(%eax), %xmm0
-	movq	%xmm0, 29(%edx)
-L(bk_write_29bytes):
-	movq	21(%eax), %xmm0
-	movq	%xmm0, 21(%edx)
-L(bk_write_21bytes):
-	movq	13(%eax), %xmm0
-	movq	%xmm0, 13(%edx)
-L(bk_write_13bytes):
-	movq	5(%eax), %xmm0
-	movq	%xmm0, 5(%edx)
-L(bk_write_5bytes):
-	movl	1(%eax), %ecx
-	movl	%ecx, 1(%edx)
-L(bk_write_1bytes):
-	movzbl	(%eax), %ecx
-	movb	%cl, (%edx)
-	movl	DEST(%esp), %eax
-#ifdef USE_AS_MEMPCPY
-	movl	LEN(%esp), %ecx
-	add	%ecx, %eax
-#endif
-	RETURN
-
-	.p2align 4
-L(bk_write_41bytes):
-	movq	33(%eax), %xmm0
-	movq	%xmm0, 33(%edx)
-L(bk_write_33bytes):
-	movq	25(%eax), %xmm0
-	movq	%xmm0, 25(%edx)
-L(bk_write_25bytes):
-	movq	17(%eax), %xmm0
-	movq	%xmm0, 17(%edx)
-L(bk_write_17bytes):
-	movq	9(%eax), %xmm0
-	movq	%xmm0, 9(%edx)
-L(bk_write_9bytes):
-	movq	1(%eax), %xmm0
-	movq	%xmm0, 1(%edx)
-	movzbl	(%eax), %ecx
-	movb	%cl, (%edx)
-	movl	DEST(%esp), %eax
-#ifdef USE_AS_MEMPCPY
-	movl	LEN(%esp), %ecx
-	add	%ecx, %eax
-#endif
-	RETURN
-
-	.p2align 4
-L(bk_write_46bytes):
-	movq	38(%eax), %xmm0
-	movq	%xmm0, 38(%edx)
-L(bk_write_38bytes):
-	movq	30(%eax), %xmm0
-	movq	%xmm0, 30(%edx)
-L(bk_write_30bytes):
-	movq	22(%eax), %xmm0
-	movq	%xmm0, 22(%edx)
-L(bk_write_22bytes):
-	movq	14(%eax), %xmm0
-	movq	%xmm0, 14(%edx)
-L(bk_write_14bytes):
-	movq	6(%eax), %xmm0
-	movq	%xmm0, 6(%edx)
-L(bk_write_6bytes):
-	movl	2(%eax), %ecx
-	movl	%ecx, 2(%edx)
-	movzwl	(%eax), %ecx
-	movw	%cx, (%edx)
-	movl	DEST(%esp), %eax
-#ifdef USE_AS_MEMPCPY
-	movl	LEN(%esp), %ecx
-	add	%ecx, %eax
-#endif
-	RETURN
-
-	.p2align 4
-L(bk_write_42bytes):
-	movq	34(%eax), %xmm0
-	movq	%xmm0, 34(%edx)
-L(bk_write_34bytes):
-	movq	26(%eax), %xmm0
-	movq	%xmm0, 26(%edx)
-L(bk_write_26bytes):
-	movq	18(%eax), %xmm0
-	movq	%xmm0, 18(%edx)
-L(bk_write_18bytes):
-	movq	10(%eax), %xmm0
-	movq	%xmm0, 10(%edx)
-L(bk_write_10bytes):
-	movq	2(%eax), %xmm0
-	movq	%xmm0, 2(%edx)
-L(bk_write_2bytes):
-	movzwl	(%eax), %ecx
-	movw	%cx, (%edx)
-	movl	DEST(%esp), %eax
-#ifdef USE_AS_MEMPCPY
-	movl	LEN(%esp), %ecx
-	add	%ecx, %eax
-#endif
-	RETURN
-
-	.p2align 4
-L(bk_write_47bytes):
-	movq	39(%eax), %xmm0
-	movq	%xmm0, 39(%edx)
-L(bk_write_39bytes):
-	movq	31(%eax), %xmm0
-	movq	%xmm0, 31(%edx)
-L(bk_write_31bytes):
-	movq	23(%eax), %xmm0
-	movq	%xmm0, 23(%edx)
-L(bk_write_23bytes):
-	movq	15(%eax), %xmm0
-	movq	%xmm0, 15(%edx)
-L(bk_write_15bytes):
-	movq	7(%eax), %xmm0
-	movq	%xmm0, 7(%edx)
-L(bk_write_7bytes):
-	movl	3(%eax), %ecx
-	movl	%ecx, 3(%edx)
-	movzwl	1(%eax), %ecx
-	movw	%cx, 1(%edx)
-	movzbl	(%eax), %eax
-	movb	%al, (%edx)
-	movl	DEST(%esp), %eax
-#ifdef USE_AS_MEMPCPY
-	movl	LEN(%esp), %ecx
-	add	%ecx, %eax
-#endif
-	RETURN
-
-	.p2align 4
-L(bk_write_43bytes):
-	movq	35(%eax), %xmm0
-	movq	%xmm0, 35(%edx)
-L(bk_write_35bytes):
-	movq	27(%eax), %xmm0
-	movq	%xmm0, 27(%edx)
-L(bk_write_27bytes):
-	movq	19(%eax), %xmm0
-	movq	%xmm0, 19(%edx)
-L(bk_write_19bytes):
-	movq	11(%eax), %xmm0
-	movq	%xmm0, 11(%edx)
-L(bk_write_11bytes):
-	movq	3(%eax), %xmm0
-	movq	%xmm0, 3(%edx)
-L(bk_write_3bytes):
-	movzwl	1(%eax), %ecx
-	movw	%cx, 1(%edx)
-	movzbl	(%eax), %eax
-	movb	%al, (%edx)
-	movl	DEST(%esp), %eax
-#ifdef USE_AS_MEMPCPY
-	movl	LEN(%esp), %ecx
-	add	%ecx, %eax
-#endif
-	RETURN_END
-
-
-	.pushsection .rodata.ssse3,"a",@progbits
-	.p2align 2
-L(table_48bytes_fwd):
-	.int	JMPTBL (L(fwd_write_0bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_1bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_2bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_3bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_4bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_5bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_6bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_7bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_8bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_9bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_10bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_11bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_12bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_13bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_14bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_15bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_16bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_17bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_18bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_19bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_20bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_21bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_22bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_23bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_24bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_25bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_26bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_27bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_28bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_29bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_30bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_31bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_32bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_33bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_34bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_35bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_36bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_37bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_38bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_39bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_40bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_41bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_42bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_43bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_44bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_45bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_46bytes), L(table_48bytes_fwd))
-	.int	JMPTBL (L(fwd_write_47bytes), L(table_48bytes_fwd))
-
-	.p2align 2
-L(table_48bytes_fwd_align):
-	.int	JMPTBL (L(fwd_write_0bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_1bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_2bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_3bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_4bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_5bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_6bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_7bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_8bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_9bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_10bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_11bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_12bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_13bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_14bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_15bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_16bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_17bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_18bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_19bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_20bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_21bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_22bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_23bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_24bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_25bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_26bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_27bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_28bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_29bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_30bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_31bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_32bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_33bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_34bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_35bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_36bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_37bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_38bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_39bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_40bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_41bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_42bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_43bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_44bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_45bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_46bytes_align), L(table_48bytes_fwd_align))
-	.int	JMPTBL (L(fwd_write_47bytes_align), L(table_48bytes_fwd_align))
-
-	.p2align 2
-L(shl_table):
-	.int	JMPTBL (L(shl_0), L(shl_table))
-	.int	JMPTBL (L(shl_1), L(shl_table))
-	.int	JMPTBL (L(shl_2), L(shl_table))
-	.int	JMPTBL (L(shl_3), L(shl_table))
-	.int	JMPTBL (L(shl_4), L(shl_table))
-	.int	JMPTBL (L(shl_5), L(shl_table))
-	.int	JMPTBL (L(shl_6), L(shl_table))
-	.int	JMPTBL (L(shl_7), L(shl_table))
-	.int	JMPTBL (L(shl_8), L(shl_table))
-	.int	JMPTBL (L(shl_9), L(shl_table))
-	.int	JMPTBL (L(shl_10), L(shl_table))
-	.int	JMPTBL (L(shl_11), L(shl_table))
-	.int	JMPTBL (L(shl_12), L(shl_table))
-	.int	JMPTBL (L(shl_13), L(shl_table))
-	.int	JMPTBL (L(shl_14), L(shl_table))
-	.int	JMPTBL (L(shl_15), L(shl_table))
-
-	.p2align 2
-L(table_48_bytes_bwd):
-	.int	JMPTBL (L(bk_write_0bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_1bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_2bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_3bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_4bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_5bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_6bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_7bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_8bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_9bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_10bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_11bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_12bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_13bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_14bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_15bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_16bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_17bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_18bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_19bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_20bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_21bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_22bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_23bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_24bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_25bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_26bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_27bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_28bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_29bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_30bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_31bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_32bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_33bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_34bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_35bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_36bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_37bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_38bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_39bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_40bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_41bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_42bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_43bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_44bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_45bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_46bytes), L(table_48_bytes_bwd))
-	.int	JMPTBL (L(bk_write_47bytes), L(table_48_bytes_bwd))
-
-	.popsection
-
-#ifdef USE_AS_MEMMOVE
-	.p2align 4
-L(copy_backward):
-	PUSH (%edi)
-	movl	%eax, %edi
-	lea	(%ecx,%edx,1),%edx
-	lea	(%ecx,%edi,1),%edi
-	testl	$0x3, %edx
-	jnz	L(bk_align)
-
-L(bk_aligned_4):
-	cmp	$64, %ecx
-	jae	L(bk_write_more64bytes)
-
-L(bk_write_64bytesless):
-	cmp	$32, %ecx
-	jb	L(bk_write_less32bytes)
-
-L(bk_write_more32bytes):
-	/* Copy 32 bytes at a time.  */
-	sub	$32, %ecx
-	movq	-8(%edi), %xmm0
-	movq	%xmm0, -8(%edx)
-	movq	-16(%edi), %xmm0
-	movq	%xmm0, -16(%edx)
-	movq	-24(%edi), %xmm0
-	movq	%xmm0, -24(%edx)
-	movq	-32(%edi), %xmm0
-	movq	%xmm0, -32(%edx)
-	sub	$32, %edx
-	sub	$32, %edi
-
-L(bk_write_less32bytes):
-	movl	%edi, %eax
-	sub	%ecx, %edx
-	sub	%ecx, %eax
-	POP (%edi)
-L(bk_write_less32bytes_2):
-	BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4)
-
-	CFI_PUSH (%edi)
-
-	.p2align 4
-L(bk_align):
-	cmp	$8, %ecx
-	jbe	L(bk_write_less32bytes)
-	testl	$1, %edx
-	/* We get here only if (EDX & 3 ) != 0 so if (EDX & 1) ==0,
-	then	(EDX & 2) must be != 0.  */
-	jz	L(bk_got2)
-	sub	$1, %edi
-	sub	$1, %ecx
-	sub	$1, %edx
-	movzbl	(%edi), %eax
-	movb	%al, (%edx)
-
-	testl	$2, %edx
-	jz	L(bk_aligned_4)
-
-L(bk_got2):
-	sub	$2, %edi
-	sub	$2, %ecx
-	sub	$2, %edx
-	movzwl	(%edi), %eax
-	movw	%ax, (%edx)
-	jmp	L(bk_aligned_4)
-
-	.p2align 4
-L(bk_write_more64bytes):
-	/* Check alignment of last byte.  */
-	testl	$15, %edx
-	jz	L(bk_ssse3_cpy_pre)
-
-/* EDX is aligned 4 bytes, but not 16 bytes.  */
-L(bk_ssse3_align):
-	sub	$4, %edi
-	sub	$4, %ecx
-	sub	$4, %edx
-	movl	(%edi), %eax
-	movl	%eax, (%edx)
-
-	testl	$15, %edx
-	jz	L(bk_ssse3_cpy_pre)
-
-	sub	$4, %edi
-	sub	$4, %ecx
-	sub	$4, %edx
-	movl	(%edi), %eax
-	movl	%eax, (%edx)
-
-	testl	$15, %edx
-	jz	L(bk_ssse3_cpy_pre)
-
-	sub	$4, %edi
-	sub	$4, %ecx
-	sub	$4, %edx
-	movl	(%edi), %eax
-	movl	%eax, (%edx)
-
-L(bk_ssse3_cpy_pre):
-	cmp	$64, %ecx
-	jb	L(bk_write_more32bytes)
-
-	.p2align 4
-L(bk_ssse3_cpy):
-	sub	$64, %edi
-	sub	$64, %ecx
-	sub	$64, %edx
-	movdqu	0x30(%edi), %xmm3
-	movdqa	%xmm3, 0x30(%edx)
-	movdqu	0x20(%edi), %xmm2
-	movdqa	%xmm2, 0x20(%edx)
-	movdqu	0x10(%edi), %xmm1
-	movdqa	%xmm1, 0x10(%edx)
-	movdqu	(%edi), %xmm0
-	movdqa	%xmm0, (%edx)
-	cmp	$64, %ecx
-	jae	L(bk_ssse3_cpy)
-	jmp	L(bk_write_64bytesless)
-
-#endif
-
-END (MEMCPY)
diff --git a/libc/arch-x86/string/ssse3-memmove-atom.S b/libc/arch-x86/string/ssse3-memmove-atom.S
deleted file mode 100644
index 3572eac..0000000
--- a/libc/arch-x86/string/ssse3-memmove-atom.S
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
-Copyright (c) 2010, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-
-#define MEMCPY memmove_atom
-#define USE_AS_MEMMOVE
-#include "ssse3-memcpy-atom.S"
diff --git a/libc/arch-x86/string/ssse3-strncpy-atom.S b/libc/arch-x86/string/ssse3-strncpy-atom.S
deleted file mode 100644
index 0c27ffe..0000000
--- a/libc/arch-x86/string/ssse3-strncpy-atom.S
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
-Copyright (c) 2011, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#define USE_AS_STRNCPY
-#define STRCPY strncpy_atom
-#include "ssse3-strcpy-atom.S"
diff --git a/libc/arch-x86/string/ssse3-wcscat-atom.S b/libc/arch-x86/string/ssse3-wcscat-atom.S
deleted file mode 100644
index 8a389a3..0000000
--- a/libc/arch-x86/string/ssse3-wcscat-atom.S
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
-Copyright (c) 2011 Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef L
-# define L(label)	.L##label
-#endif
-
-#ifndef cfi_startproc
-# define cfi_startproc                  .cfi_startproc
-#endif
-
-#ifndef cfi_endproc
-# define cfi_endproc                    .cfi_endproc
-#endif
-
-#ifndef cfi_rel_offset
-# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
-#endif
-
-#ifndef cfi_restore
-# define cfi_restore(reg)	.cfi_restore reg
-#endif
-
-#ifndef cfi_adjust_cfa_offset
-# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
-#endif
-
-#ifndef ENTRY
-# define ENTRY(name)	\
-	.type name,  @function;	\
-	.globl name;	\
-	.p2align 4;	\
-name:	\
-	cfi_startproc
-#endif
-
-#ifndef END
-# define END(name)	\
-	cfi_endproc;	\
-	.size name, .-name
-#endif
-
-#define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-#define PUSH(REG)	pushl REG;	CFI_PUSH (REG)
-#define POP(REG)	popl REG;	CFI_POP (REG)
-
-#define PARMS  4
-#define STR1  PARMS+4
-#define STR2  STR1+4
-
-#define USE_AS_WCSCAT
-
-.text
-ENTRY (wcscat)
-	PUSH    (%edi)
-	mov	STR1(%esp), %edi
-	mov	%edi, %edx
-
-#define RETURN  jmp L(WcscpyAtom)
-#include "sse2-wcslen-atom.S"
-
-L(WcscpyAtom):
-	shl	$2, %eax
-	mov	STR2(%esp), %ecx
-	lea	(%edi, %eax), %edx
-
-	cmpl	$0, (%ecx)
-	jz	L(Exit4)
-	cmpl	$0, 4(%ecx)
-	jz	L(Exit8)
-	cmpl	$0, 8(%ecx)
-	jz	L(Exit12)
-	cmpl	$0, 12(%ecx)
-	jz	L(Exit16)
-
-#undef RETURN
-#define RETURN  POP(%edi);	ret;	CFI_PUSH(%edi)
-#include "ssse3-wcscpy-atom.S"
-
-END (wcscat)
diff --git a/libc/arch-x86/string/ssse3-wcscpy-atom.S b/libc/arch-x86/string/ssse3-wcscpy-atom.S
deleted file mode 100644
index 27cb61e..0000000
--- a/libc/arch-x86/string/ssse3-wcscpy-atom.S
+++ /dev/null
@@ -1,652 +0,0 @@
-/*
-Copyright (c) 2011, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef USE_AS_WCSCAT
-
-# ifndef L
-#  define L(label)	.L##label
-# endif
-
-# ifndef cfi_startproc
-#  define cfi_startproc	.cfi_startproc
-# endif
-
-# ifndef cfi_endproc
-#  define cfi_endproc	.cfi_endproc
-# endif
-
-# ifndef cfi_rel_offset
-#  define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
-# endif
-
-# ifndef cfi_restore
-#  define cfi_restore(reg)	.cfi_restore reg
-# endif
-
-# ifndef cfi_adjust_cfa_offset
-#  define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
-# endif
-
-# ifndef ENTRY
-#  define ENTRY(name)	\
-	.type name, @function;	\
-	.globl name;	\
-	.p2align 4;	\
-name:	\
-	cfi_startproc
-# endif
-
-# ifndef END
-#  define END(name)	\
-	cfi_endproc;	\
-	.size name, .-name
-# endif
-
-# define CFI_PUSH(REG)	\
-	cfi_adjust_cfa_offset (4);	\
-	cfi_rel_offset (REG, 0)
-
-# define CFI_POP(REG)	\
-	cfi_adjust_cfa_offset (-4);	\
-	cfi_restore (REG)
-
-# define PUSH(REG)	pushl REG; CFI_PUSH (REG)
-# define POP(REG)	popl REG; CFI_POP (REG)
-
-# define PARMS	4
-# define RETURN	POP (%edi); ret; CFI_PUSH (%edi)
-
-# define STR1	PARMS
-# define STR2	STR1+4
-# define LEN	STR2+4
-
-.text
-ENTRY (wcscpy)
-	mov	STR1(%esp), %edx
-	mov	STR2(%esp), %ecx
-
-	cmpl	$0, (%ecx)
-	jz	L(ExitTail4)
-	cmpl	$0, 4(%ecx)
-	jz	L(ExitTail8)
-	cmpl	$0, 8(%ecx)
-	jz	L(ExitTail12)
-	cmpl	$0, 12(%ecx)
-	jz	L(ExitTail16)
-
-	PUSH	(%edi)
-	mov	%edx, %edi
-#endif
-	PUSH	(%esi)
-	lea	16(%ecx), %esi
-
-	and	$-16, %esi
-
-	pxor	%xmm0, %xmm0
-	pcmpeqd	(%esi), %xmm0
-	movdqu	(%ecx), %xmm1
-	movdqu	%xmm1, (%edx)
-
-	pmovmskb %xmm0, %eax
-	sub	%ecx, %esi
-
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	mov	%edx, %eax
-	lea	16(%edx), %edx
-	and	$-16, %edx
-	sub	%edx, %eax
-
-	sub	%eax, %ecx
-	mov	%ecx, %eax
-	and	$0xf, %eax
-	mov	$0, %esi
-
-	jz	L(Align16Both)
-	cmp	$4, %eax
-	je	L(Shl4)
-	cmp	$8, %eax
-	je	L(Shl8)
-	jmp	L(Shl12)
-
-L(Align16Both):
-	movaps	(%ecx), %xmm1
-	movaps	16(%ecx), %xmm2
-	movaps	%xmm1, (%edx)
-	pcmpeqd	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%ecx, %esi), %xmm3
-	movaps	%xmm2, (%edx, %esi)
-	pcmpeqd	%xmm3, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%ecx, %esi), %xmm4
-	movaps	%xmm3, (%edx, %esi)
-	pcmpeqd	%xmm4, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%ecx, %esi), %xmm1
-	movaps	%xmm4, (%edx, %esi)
-	pcmpeqd	%xmm1, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%ecx, %esi), %xmm2
-	movaps	%xmm1, (%edx, %esi)
-	pcmpeqd	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	16(%ecx, %esi), %xmm3
-	movaps	%xmm2, (%edx, %esi)
-	pcmpeqd	%xmm3, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	%xmm3, (%edx, %esi)
-	mov	%ecx, %eax
-	lea	16(%ecx, %esi), %ecx
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	sub	%eax, %edx
-
-	mov	$-0x40, %esi
-
-L(Aligned64Loop):
-	movaps	(%ecx), %xmm2
-	movaps	32(%ecx), %xmm3
-	movaps	%xmm2, %xmm4
-	movaps	16(%ecx), %xmm5
-	movaps	%xmm3, %xmm6
-	movaps	48(%ecx), %xmm7
-	pminub	%xmm5, %xmm2
-	pminub	%xmm7, %xmm3
-	pminub	%xmm2, %xmm3
-	lea	64(%edx), %edx
-	pcmpeqd	%xmm0, %xmm3
-	lea	64(%ecx), %ecx
-	pmovmskb %xmm3, %eax
-
-	test	%eax, %eax
-	jnz	L(Aligned64Leave)
-	movaps	%xmm4, -64(%edx)
-	movaps	%xmm5, -48(%edx)
-	movaps	%xmm6, -32(%edx)
-	movaps	%xmm7, -16(%edx)
-	jmp	L(Aligned64Loop)
-
-L(Aligned64Leave):
-	pcmpeqd	%xmm4, %xmm0
-	pmovmskb %xmm0, %eax
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	pcmpeqd	%xmm5, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm4, -64(%edx)
-	lea	16(%esi), %esi
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	pcmpeqd	%xmm6, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm5, -48(%edx)
-	lea	16(%esi), %esi
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	movaps	%xmm6, -32(%edx)
-	pcmpeqd	%xmm7, %xmm0
-	pmovmskb %xmm0, %eax
-	lea	16(%esi), %esi
-	test	%eax, %eax
-	jnz	L(CopyFrom1To16Bytes)
-
-	mov	$-0x40, %esi
-	movaps	%xmm7, -16(%edx)
-	jmp	L(Aligned64Loop)
-
-	.p2align 4
-L(Shl4):
-	movaps	-4(%ecx), %xmm1
-	movaps	12(%ecx), %xmm2
-L(Shl4Start):
-	pcmpeqd	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-
-	test	%eax, %eax
-	jnz	L(Shl4LoopExit)
-
-	palignr	$4, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	28(%ecx), %xmm2
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm1
-
-	test	%eax, %eax
-	jnz	L(Shl4LoopExit)
-
-	palignr	$4, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	28(%ecx), %xmm2
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-
-	test	%eax, %eax
-	jnz	L(Shl4LoopExit)
-
-	palignr	$4, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	28(%ecx), %xmm2
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-
-	test	%eax, %eax
-	jnz	L(Shl4LoopExit)
-
-	palignr	$4, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	28(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-12(%ecx), %ecx
-	sub	%eax, %edx
-
-	movaps	-4(%ecx), %xmm1
-
-L(Shl4LoopStart):
-	movaps	12(%ecx), %xmm2
-	movaps	28(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	44(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	60(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqd	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$4, %xmm4, %xmm5
-	palignr	$4, %xmm3, %xmm4
-	test	%eax, %eax
-	jnz	L(Shl4Start)
-
-	palignr	$4, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$4, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl4LoopStart)
-
-L(Shl4LoopExit):
-	movlpd	(%ecx), %xmm0
-	movl	8(%ecx), %esi
-	movlpd	%xmm0, (%edx)
-	movl	%esi, 8(%edx)
-	POP	(%esi)
-	add	$12, %edx
-	add	$12, %ecx
-	test	%al, %al
-	jz	L(ExitHigh)
-	test	$0x01, %al
-	jnz	L(Exit4)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	%edi, %eax
-	RETURN
-
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(Shl8):
-	movaps	-8(%ecx), %xmm1
-	movaps	8(%ecx), %xmm2
-L(Shl8Start):
-	pcmpeqd	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-
-	test	%eax, %eax
-	jnz	L(Shl8LoopExit)
-
-	palignr	$8, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	24(%ecx), %xmm2
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm1
-
-	test	%eax, %eax
-	jnz	L(Shl8LoopExit)
-
-	palignr	$8, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	24(%ecx), %xmm2
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-
-	test	%eax, %eax
-	jnz	L(Shl8LoopExit)
-
-	palignr	$8, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	24(%ecx), %xmm2
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-
-	test	%eax, %eax
-	jnz	L(Shl8LoopExit)
-
-	palignr	$8, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	24(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-8(%ecx), %ecx
-	sub	%eax, %edx
-
-	movaps	-8(%ecx), %xmm1
-
-L(Shl8LoopStart):
-	movaps	8(%ecx), %xmm2
-	movaps	24(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	40(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	56(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqd	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$8, %xmm4, %xmm5
-	palignr	$8, %xmm3, %xmm4
-	test	%eax, %eax
-	jnz	L(Shl8Start)
-
-	palignr	$8, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$8, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl8LoopStart)
-
-L(Shl8LoopExit):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	POP	(%esi)
-	add	$8, %edx
-	add	$8, %ecx
-	test	%al, %al
-	jz	L(ExitHigh)
-	test	$0x01, %al
-	jnz	L(Exit4)
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	%edi, %eax
-	RETURN
-
-	CFI_PUSH	(%esi)
-
-	.p2align 4
-L(Shl12):
-	movaps	-12(%ecx), %xmm1
-	movaps	4(%ecx), %xmm2
-L(Shl12Start):
-	pcmpeqd	%xmm2, %xmm0
-	pmovmskb %xmm0, %eax
-	movaps	%xmm2, %xmm3
-
-	test	%eax, %eax
-	jnz	L(Shl12LoopExit)
-
-	palignr	$12, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	20(%ecx), %xmm2
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm1
-
-	test	%eax, %eax
-	jnz	L(Shl12LoopExit)
-
-	palignr	$12, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	20(%ecx), %xmm2
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-	movaps	%xmm2, %xmm3
-
-	test	%eax, %eax
-	jnz	L(Shl12LoopExit)
-
-	palignr	$12, %xmm1, %xmm2
-	movaps	%xmm2, (%edx)
-	movaps	20(%ecx), %xmm2
-
-	pcmpeqd	%xmm2, %xmm0
-	lea	16(%edx), %edx
-	pmovmskb %xmm0, %eax
-	lea	16(%ecx), %ecx
-
-	test	%eax, %eax
-	jnz	L(Shl12LoopExit)
-
-	palignr	$12, %xmm3, %xmm2
-	movaps	%xmm2, (%edx)
-	lea	20(%ecx), %ecx
-	lea	16(%edx), %edx
-
-	mov	%ecx, %eax
-	and	$-0x40, %ecx
-	sub	%ecx, %eax
-	lea	-4(%ecx), %ecx
-	sub	%eax, %edx
-
-	movaps	-12(%ecx), %xmm1
-
-L(Shl12LoopStart):
-	movaps	4(%ecx), %xmm2
-	movaps	20(%ecx), %xmm3
-	movaps	%xmm3, %xmm6
-	movaps	36(%ecx), %xmm4
-	movaps	%xmm4, %xmm7
-	movaps	52(%ecx), %xmm5
-	pminub	%xmm2, %xmm6
-	pminub	%xmm5, %xmm7
-	pminub	%xmm6, %xmm7
-	pcmpeqd	%xmm0, %xmm7
-	pmovmskb %xmm7, %eax
-	movaps	%xmm5, %xmm7
-	palignr	$12, %xmm4, %xmm5
-	palignr	$12, %xmm3, %xmm4
-	test	%eax, %eax
-	jnz	L(Shl12Start)
-
-	palignr	$12, %xmm2, %xmm3
-	lea	64(%ecx), %ecx
-	palignr	$12, %xmm1, %xmm2
-	movaps	%xmm7, %xmm1
-	movaps	%xmm5, 48(%edx)
-	movaps	%xmm4, 32(%edx)
-	movaps	%xmm3, 16(%edx)
-	movaps	%xmm2, (%edx)
-	lea	64(%edx), %edx
-	jmp	L(Shl12LoopStart)
-
-L(Shl12LoopExit):
-	movl	(%ecx), %esi
-	movl	%esi, (%edx)
-	mov	$4, %esi
-
-	.p2align 4
-L(CopyFrom1To16Bytes):
-	add	%esi, %edx
-	add	%esi, %ecx
-
-	POP	(%esi)
-	test	%al, %al
-	jz	L(ExitHigh)
-	test	$0x01, %al
-	jnz	L(Exit4)
-L(Exit8):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	%edi, %eax
-	RETURN
-
-	.p2align 4
-L(ExitHigh):
-	test	$0x01, %ah
-	jnz	L(Exit12)
-L(Exit16):
-	movdqu	(%ecx), %xmm0
-	movdqu	%xmm0, (%edx)
-	movl	%edi, %eax
-	RETURN
-
-	.p2align 4
-L(Exit4):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	%edi, %eax
-	RETURN
-
-	.p2align 4
-L(Exit12):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	8(%ecx), %eax
-	movl	%eax, 8(%edx)
-	movl	%edi, %eax
-	RETURN
-
-CFI_POP	(%edi)
-
-	.p2align 4
-L(ExitTail4):
-	movl	(%ecx), %eax
-	movl	%eax, (%edx)
-	movl	%edx, %eax
-	ret
-
-	.p2align 4
-L(ExitTail8):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	%edx, %eax
-	ret
-
-	.p2align 4
-L(ExitTail12):
-	movlpd	(%ecx), %xmm0
-	movlpd	%xmm0, (%edx)
-	movl	8(%ecx), %eax
-	movl	%eax, 8(%edx)
-	movl	%edx, %eax
-	ret
-
-	.p2align 4
-L(ExitTail16):
-	movdqu	(%ecx), %xmm0
-	movdqu	%xmm0, (%edx)
-	movl	%edx, %eax
-	ret
-
-#ifndef USE_AS_WCSCAT
-END (wcscpy)
-#endif
diff --git a/libc/arch-x86/string/ssse3-wmemcmp-atom.S b/libc/arch-x86/string/ssse3-wmemcmp-atom.S
deleted file mode 100644
index a81b78b..0000000
--- a/libc/arch-x86/string/ssse3-wmemcmp-atom.S
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
-Copyright (c) 2011, 2012, 2013 Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#define MEMCMP  wmemcmp_atom
-
-#define USE_WCHAR
-#define USE_AS_WMEMCMP 1
-#include "ssse3-memcmp-atom.S"
diff --git a/libc/include/android/versioning.h b/libc/include/android/versioning.h
index fe9264d..4411aa1 100644
--- a/libc/include/android/versioning.h
+++ b/libc/include/android/versioning.h
@@ -60,8 +60,8 @@
 #define __INTRODUCED_IN_64(api_level) __BIONIC_AVAILABILITY(introduced=api_level)
 #endif
 
-// Vendor modules do not follow SDK versioning. Ignore NDK guards for vendor modules.
-#if defined(__ANDROID_VENDOR__)
+// Vendor and product modules do not follow SDK versioning. Ignore NDK guards for these modules.
+#if defined(__ANDROID_VNDK__)
 #undef __BIONIC_AVAILABILITY
 #define __BIONIC_AVAILABILITY(api_level, ...)
-#endif // defined(__ANDROID_VENDOR__)
+#endif // defined(__ANDROID_VNDK__)
diff --git a/libc/include/bits/fortify/string.h b/libc/include/bits/fortify/string.h
index 4d32b04..041967b 100644
--- a/libc/include/bits/fortify/string.h
+++ b/libc/include/bits/fortify/string.h
@@ -220,8 +220,13 @@
 }
 
 #if __BIONIC_FORTIFY_RUNTIME_CHECKS_ENABLED
-__BIONIC_FORTIFY_INLINE
-size_t strlen(const char* _Nonnull const s __pass_object_size0) __overloadable {
+/*
+ * Clang, when parsing C, can fold strlen to a constant without LLVM's help.
+ * This doesn't apply to overloads of strlen, so write this differently. We
+ * can't use `__pass_object_size0` here, but that's fine: it doesn't help much
+ * on __always_inline functions.
+ */
+extern __always_inline __inline__ __attribute__((gnu_inline)) size_t strlen(const char* _Nonnull s) {
     return __strlen_chk(s, __bos0(s));
 }
 #endif
diff --git a/libc/include/bits/glibc-syscalls.h b/libc/include/bits/glibc-syscalls.h
index 7b171e8..8c5a91d 100644
--- a/libc/include/bits/glibc-syscalls.h
+++ b/libc/include/bits/glibc-syscalls.h
@@ -36,9 +36,6 @@
 #if defined(__NR_arch_prctl)
   #define SYS_arch_prctl __NR_arch_prctl
 #endif
-#if defined(__NR_arch_specific_syscall)
-  #define SYS_arch_specific_syscall __NR_arch_specific_syscall
-#endif
 #if defined(__NR_arm_fadvise64_64)
   #define SYS_arm_fadvise64_64 __NR_arm_fadvise64_64
 #endif
@@ -1272,9 +1269,6 @@
 #if defined(__NR_syscall)
   #define SYS_syscall __NR_syscall
 #endif
-#if defined(__NR_syscalls)
-  #define SYS_syscalls __NR_syscalls
-#endif
 #if defined(__NR_sysfs)
   #define SYS_sysfs __NR_sysfs
 #endif
@@ -1371,6 +1365,9 @@
 #if defined(__NR_unshare)
   #define SYS_unshare __NR_unshare
 #endif
+#if defined(__NR_uretprobe)
+  #define SYS_uretprobe __NR_uretprobe
+#endif
 #if defined(__NR_uselib)
   #define SYS_uselib __NR_uselib
 #endif
diff --git a/libc/include/unistd.h b/libc/include/unistd.h
index e1c268f..9bc01f0 100644
--- a/libc/include/unistd.h
+++ b/libc/include/unistd.h
@@ -257,8 +257,29 @@
 int linkat(int __old_dir_fd, const char* _Nonnull __old_path, int __new_dir_fd, const char* _Nonnull __new_path, int __flags);
 int unlink(const char* _Nonnull __path);
 int unlinkat(int __dirfd, const char* _Nonnull __path, int __flags);
+
+/**
+ * [chdir(2)](https://man7.org/linux/man-pages/man2/chdir.2.html) changes
+ * the current working directory to the given path.
+ *
+ * This function affects all threads in the process, so is generally a bad idea
+ * on Android where most code will be running in a multi-threaded context.
+ *
+ * Returns 0 on success, and returns -1 and sets `errno` on failure.
+ */
 int chdir(const char* _Nonnull __path);
+
+/**
+ * [fchdir(2)](https://man7.org/linux/man-pages/man2/chdir.2.html) changes
+ * the current working directory to the given fd.
+ *
+ * This function affects all threads in the process, so is generally a bad idea
+ * on Android where most code will be running in a multi-threaded context.
+ *
+ * Returns 0 on success, and returns -1 and sets `errno` on failure.
+ */
 int fchdir(int __fd);
+
 int rmdir(const char* _Nonnull __path);
 int pipe(int __fds[_Nonnull 2]);
 #if defined(__USE_GNU)
diff --git a/libc/kernel/android/scsi/scsi/scsi_proto.h b/libc/kernel/android/scsi/scsi/scsi_proto.h
index d873fad..754e12a 100644
--- a/libc/kernel/android/scsi/scsi/scsi_proto.h
+++ b/libc/kernel/android/scsi/scsi/scsi_proto.h
@@ -109,6 +109,7 @@
 #define WRITE_SAME_16 0x93
 #define ZBC_OUT 0x94
 #define ZBC_IN 0x95
+#define WRITE_ATOMIC_16 0x9c
 #define SERVICE_ACTION_BIDIRECTIONAL 0x9d
 #define SERVICE_ACTION_IN_16 0x9e
 #define SERVICE_ACTION_OUT_16 0x9f
diff --git a/libc/kernel/tools/cpp.py b/libc/kernel/tools/cpp.py
index 08b786a..df50806 100755
--- a/libc/kernel/tools/cpp.py
+++ b/libc/kernel/tools/cpp.py
@@ -1430,7 +1430,7 @@
                         state = VAR_DECL
                     elif state == NORMAL and token_id in ['struct', 'typedef',
                                                           'enum', 'union',
-                                                          '__extension__']:
+                                                          '__extension__', '=']:
                         state = OTHER_DECL
                         state_token = token_id
                     elif block.tokens[i].kind == TokenKind.IDENTIFIER:
@@ -2057,7 +2057,7 @@
   struct timeval val2;
 };
 """
-        self.assertEqual(self.parse(text, {"remove": True}), expected)
+        self.assertEqual(self.parse(text, {"remove": None}), expected)
 
     def test_remove_struct_from_end(self):
         text = """\
@@ -2076,7 +2076,7 @@
   struct timeval val2;
 };
 """
-        self.assertEqual(self.parse(text, {"remove": True}), expected)
+        self.assertEqual(self.parse(text, {"remove": None}), expected)
 
     def test_remove_minimal_struct(self):
         text = """\
@@ -2084,7 +2084,7 @@
 };
 """
         expected = "";
-        self.assertEqual(self.parse(text, {"remove": True}), expected)
+        self.assertEqual(self.parse(text, {"remove": None}), expected)
 
     def test_remove_struct_with_struct_fields(self):
         text = """\
@@ -2104,7 +2104,7 @@
   struct remove val2;
 };
 """
-        self.assertEqual(self.parse(text, {"remove": True}), expected)
+        self.assertEqual(self.parse(text, {"remove": None}), expected)
 
     def test_remove_consecutive_structs(self):
         text = """\
@@ -2136,7 +2136,7 @@
   struct timeval val2;
 };
 """
-        self.assertEqual(self.parse(text, {"remove1": True, "remove2": True}), expected)
+        self.assertEqual(self.parse(text, {"remove1": None, "remove2": None}), expected)
 
     def test_remove_multiple_structs(self):
         text = """\
@@ -2169,7 +2169,7 @@
   int val;
 };
 """
-        self.assertEqual(self.parse(text, {"remove1": True, "remove2": True}), expected)
+        self.assertEqual(self.parse(text, {"remove1": None, "remove2": None}), expected)
 
     def test_remove_struct_with_inline_structs(self):
         text = """\
@@ -2194,7 +2194,7 @@
   struct timeval val2;
 };
 """
-        self.assertEqual(self.parse(text, {"remove": True}), expected)
+        self.assertEqual(self.parse(text, {"remove": None}), expected)
 
     def test_remove_struct_across_blocks(self):
         text = """\
@@ -2219,7 +2219,7 @@
   struct timeval val2;
 };
 """
-        self.assertEqual(self.parse(text, {"remove": True}), expected)
+        self.assertEqual(self.parse(text, {"remove": None}), expected)
 
     def test_remove_struct_across_blocks_multiple_structs(self):
         text = """\
@@ -2246,7 +2246,7 @@
   struct timeval val2;
 };
 """
-        self.assertEqual(self.parse(text, {"remove1": True, "remove2": True}), expected)
+        self.assertEqual(self.parse(text, {"remove1": None, "remove2": None}), expected)
 
     def test_remove_multiple_struct_and_add_includes(self):
         text = """\
@@ -2263,7 +2263,7 @@
 #include <bits/remove1.h>
 #include <bits/remove2.h>
 """
-        self.assertEqual(self.parse(text, {"remove1": False, "remove2": False}), expected)
+        self.assertEqual(self.parse(text, {"remove1": "bits/remove1.h", "remove2": "bits/remove2.h"}), expected)
 
 
 class FullPathTest(unittest.TestCase):
@@ -2580,6 +2580,71 @@
   struct timeval timeval;
   struct itimerval itimerval;
 };
+#include <linux/time.h>
+"""
+        self.assertEqual(self.parse(text), expected)
+
+    def test_var_definition(self):
+        # If we're definining the whole thing, it's probably worth keeping.
+        text = """\
+static const char *kString = "hello world";
+static const int kInteger = 42;
+"""
+        expected = """\
+static const char * kString = "hello world";
+static const int kInteger = 42;
+"""
+        self.assertEqual(self.parse(text), expected)
+
+    def test_struct_array_definition(self):
+        text = """\
+struct descriptor {
+  int args;
+  int size;
+};
+static const struct descriptor[] = {
+  {0, 0},
+  {1, 12},
+  {0, 42},
+};
+"""
+        expected = """\
+struct descriptor {
+  int args;
+  int size;
+};
+static const struct descriptor[] = {
+ {
+    0, 0
+  }
+ , {
+    1, 12
+  }
+ , {
+    0, 42
+  }
+ ,
+};
+"""
+        self.assertEqual(self.parse(text), expected)
+
+    def test_array_definition(self):
+        text = """\
+static const char *arr[] = {
+  "foo",
+  "bar",
+  "baz",
+};
+
+static int another_arr[5] = { 1, 2, 3, 4, 5};
+"""
+        expected = """\
+static const char * arr[] = {
+  "foo", "bar", "baz",
+};
+static int another_arr[5] = {
+  1, 2, 3, 4, 5
+};
 """
         self.assertEqual(self.parse(text), expected)
 
diff --git a/libc/kernel/tools/update_all.py b/libc/kernel/tools/update_all.py
index ae89a80..331a957 100755
--- a/libc/kernel/tools/update_all.py
+++ b/libc/kernel/tools/update_all.py
@@ -88,15 +88,8 @@
     # Collect the set of all syscalls for all architectures.
     syscalls = set()
     pattern = re.compile(r'^\s*#\s*define\s*__NR_([a-z_]\S+)')
-    for unistd_h in ['kernel/uapi/asm-generic/unistd.h',
-                     'kernel/uapi/asm-arm/asm/unistd.h',
-                     'kernel/uapi/asm-arm/asm/unistd-eabi.h',
-                     'kernel/uapi/asm-arm/asm/unistd-oabi.h',
-                     'kernel/uapi/asm-riscv/asm/unistd.h',
-                     'kernel/uapi/asm-x86/asm/unistd_32.h',
-                     'kernel/uapi/asm-x86/asm/unistd_64.h',
-                     'kernel/uapi/asm-x86/asm/unistd_x32.h']:
-        for line in open(os.path.join(libc_root, unistd_h)):
+    for unistd_h in glob.glob('%s/kernel/uapi/asm-*/asm/unistd*.h' % libc_root):
+        for line in open(unistd_h):
             m = re.search(pattern, line)
             if m:
                 nr_name = m.group(1)
diff --git a/libc/kernel/uapi/asm-arm64/asm/unistd.h b/libc/kernel/uapi/asm-arm64/asm/unistd.h
index 7457ebc..178578f 100644
--- a/libc/kernel/uapi/asm-arm64/asm/unistd.h
+++ b/libc/kernel/uapi/asm-arm64/asm/unistd.h
@@ -4,10 +4,4 @@
  * See https://android.googlesource.com/platform/bionic/+/master/libc/kernel/
  * for more information.
  */
-#define __ARCH_WANT_RENAMEAT
-#define __ARCH_WANT_NEW_STAT
-#define __ARCH_WANT_SET_GET_RLIMIT
-#define __ARCH_WANT_TIME32_SYSCALLS
-#define __ARCH_WANT_SYS_CLONE3
-#define __ARCH_WANT_MEMFD_SECRET
-#include <asm-generic/unistd.h>
+#include <asm/unistd_64.h>
diff --git a/libc/kernel/uapi/asm-arm64/asm/unistd_64.h b/libc/kernel/uapi/asm-arm64/asm/unistd_64.h
new file mode 100644
index 0000000..0a0a1c0
--- /dev/null
+++ b/libc/kernel/uapi/asm-arm64/asm/unistd_64.h
@@ -0,0 +1,327 @@
+/*
+ * This file is auto-generated. Modifications will be lost.
+ *
+ * See https://android.googlesource.com/platform/bionic/+/master/libc/kernel/
+ * for more information.
+ */
+#ifndef _UAPI_ASM_UNISTD_64_H
+#define _UAPI_ASM_UNISTD_64_H
+#define __NR_io_setup 0
+#define __NR_io_destroy 1
+#define __NR_io_submit 2
+#define __NR_io_cancel 3
+#define __NR_io_getevents 4
+#define __NR_setxattr 5
+#define __NR_lsetxattr 6
+#define __NR_fsetxattr 7
+#define __NR_getxattr 8
+#define __NR_lgetxattr 9
+#define __NR_fgetxattr 10
+#define __NR_listxattr 11
+#define __NR_llistxattr 12
+#define __NR_flistxattr 13
+#define __NR_removexattr 14
+#define __NR_lremovexattr 15
+#define __NR_fremovexattr 16
+#define __NR_getcwd 17
+#define __NR_lookup_dcookie 18
+#define __NR_eventfd2 19
+#define __NR_epoll_create1 20
+#define __NR_epoll_ctl 21
+#define __NR_epoll_pwait 22
+#define __NR_dup 23
+#define __NR_dup3 24
+#define __NR_fcntl 25
+#define __NR_inotify_init1 26
+#define __NR_inotify_add_watch 27
+#define __NR_inotify_rm_watch 28
+#define __NR_ioctl 29
+#define __NR_ioprio_set 30
+#define __NR_ioprio_get 31
+#define __NR_flock 32
+#define __NR_mknodat 33
+#define __NR_mkdirat 34
+#define __NR_unlinkat 35
+#define __NR_symlinkat 36
+#define __NR_linkat 37
+#define __NR_renameat 38
+#define __NR_umount2 39
+#define __NR_mount 40
+#define __NR_pivot_root 41
+#define __NR_nfsservctl 42
+#define __NR_statfs 43
+#define __NR_fstatfs 44
+#define __NR_truncate 45
+#define __NR_ftruncate 46
+#define __NR_fallocate 47
+#define __NR_faccessat 48
+#define __NR_chdir 49
+#define __NR_fchdir 50
+#define __NR_chroot 51
+#define __NR_fchmod 52
+#define __NR_fchmodat 53
+#define __NR_fchownat 54
+#define __NR_fchown 55
+#define __NR_openat 56
+#define __NR_close 57
+#define __NR_vhangup 58
+#define __NR_pipe2 59
+#define __NR_quotactl 60
+#define __NR_getdents64 61
+#define __NR_lseek 62
+#define __NR_read 63
+#define __NR_write 64
+#define __NR_readv 65
+#define __NR_writev 66
+#define __NR_pread64 67
+#define __NR_pwrite64 68
+#define __NR_preadv 69
+#define __NR_pwritev 70
+#define __NR_sendfile 71
+#define __NR_pselect6 72
+#define __NR_ppoll 73
+#define __NR_signalfd4 74
+#define __NR_vmsplice 75
+#define __NR_splice 76
+#define __NR_tee 77
+#define __NR_readlinkat 78
+#define __NR_newfstatat 79
+#define __NR_fstat 80
+#define __NR_sync 81
+#define __NR_fsync 82
+#define __NR_fdatasync 83
+#define __NR_sync_file_range 84
+#define __NR_timerfd_create 85
+#define __NR_timerfd_settime 86
+#define __NR_timerfd_gettime 87
+#define __NR_utimensat 88
+#define __NR_acct 89
+#define __NR_capget 90
+#define __NR_capset 91
+#define __NR_personality 92
+#define __NR_exit 93
+#define __NR_exit_group 94
+#define __NR_waitid 95
+#define __NR_set_tid_address 96
+#define __NR_unshare 97
+#define __NR_futex 98
+#define __NR_set_robust_list 99
+#define __NR_get_robust_list 100
+#define __NR_nanosleep 101
+#define __NR_getitimer 102
+#define __NR_setitimer 103
+#define __NR_kexec_load 104
+#define __NR_init_module 105
+#define __NR_delete_module 106
+#define __NR_timer_create 107
+#define __NR_timer_gettime 108
+#define __NR_timer_getoverrun 109
+#define __NR_timer_settime 110
+#define __NR_timer_delete 111
+#define __NR_clock_settime 112
+#define __NR_clock_gettime 113
+#define __NR_clock_getres 114
+#define __NR_clock_nanosleep 115
+#define __NR_syslog 116
+#define __NR_ptrace 117
+#define __NR_sched_setparam 118
+#define __NR_sched_setscheduler 119
+#define __NR_sched_getscheduler 120
+#define __NR_sched_getparam 121
+#define __NR_sched_setaffinity 122
+#define __NR_sched_getaffinity 123
+#define __NR_sched_yield 124
+#define __NR_sched_get_priority_max 125
+#define __NR_sched_get_priority_min 126
+#define __NR_sched_rr_get_interval 127
+#define __NR_restart_syscall 128
+#define __NR_kill 129
+#define __NR_tkill 130
+#define __NR_tgkill 131
+#define __NR_sigaltstack 132
+#define __NR_rt_sigsuspend 133
+#define __NR_rt_sigaction 134
+#define __NR_rt_sigprocmask 135
+#define __NR_rt_sigpending 136
+#define __NR_rt_sigtimedwait 137
+#define __NR_rt_sigqueueinfo 138
+#define __NR_rt_sigreturn 139
+#define __NR_setpriority 140
+#define __NR_getpriority 141
+#define __NR_reboot 142
+#define __NR_setregid 143
+#define __NR_setgid 144
+#define __NR_setreuid 145
+#define __NR_setuid 146
+#define __NR_setresuid 147
+#define __NR_getresuid 148
+#define __NR_setresgid 149
+#define __NR_getresgid 150
+#define __NR_setfsuid 151
+#define __NR_setfsgid 152
+#define __NR_times 153
+#define __NR_setpgid 154
+#define __NR_getpgid 155
+#define __NR_getsid 156
+#define __NR_setsid 157
+#define __NR_getgroups 158
+#define __NR_setgroups 159
+#define __NR_uname 160
+#define __NR_sethostname 161
+#define __NR_setdomainname 162
+#define __NR_getrlimit 163
+#define __NR_setrlimit 164
+#define __NR_getrusage 165
+#define __NR_umask 166
+#define __NR_prctl 167
+#define __NR_getcpu 168
+#define __NR_gettimeofday 169
+#define __NR_settimeofday 170
+#define __NR_adjtimex 171
+#define __NR_getpid 172
+#define __NR_getppid 173
+#define __NR_getuid 174
+#define __NR_geteuid 175
+#define __NR_getgid 176
+#define __NR_getegid 177
+#define __NR_gettid 178
+#define __NR_sysinfo 179
+#define __NR_mq_open 180
+#define __NR_mq_unlink 181
+#define __NR_mq_timedsend 182
+#define __NR_mq_timedreceive 183
+#define __NR_mq_notify 184
+#define __NR_mq_getsetattr 185
+#define __NR_msgget 186
+#define __NR_msgctl 187
+#define __NR_msgrcv 188
+#define __NR_msgsnd 189
+#define __NR_semget 190
+#define __NR_semctl 191
+#define __NR_semtimedop 192
+#define __NR_semop 193
+#define __NR_shmget 194
+#define __NR_shmctl 195
+#define __NR_shmat 196
+#define __NR_shmdt 197
+#define __NR_socket 198
+#define __NR_socketpair 199
+#define __NR_bind 200
+#define __NR_listen 201
+#define __NR_accept 202
+#define __NR_connect 203
+#define __NR_getsockname 204
+#define __NR_getpeername 205
+#define __NR_sendto 206
+#define __NR_recvfrom 207
+#define __NR_setsockopt 208
+#define __NR_getsockopt 209
+#define __NR_shutdown 210
+#define __NR_sendmsg 211
+#define __NR_recvmsg 212
+#define __NR_readahead 213
+#define __NR_brk 214
+#define __NR_munmap 215
+#define __NR_mremap 216
+#define __NR_add_key 217
+#define __NR_request_key 218
+#define __NR_keyctl 219
+#define __NR_clone 220
+#define __NR_execve 221
+#define __NR_mmap 222
+#define __NR_fadvise64 223
+#define __NR_swapon 224
+#define __NR_swapoff 225
+#define __NR_mprotect 226
+#define __NR_msync 227
+#define __NR_mlock 228
+#define __NR_munlock 229
+#define __NR_mlockall 230
+#define __NR_munlockall 231
+#define __NR_mincore 232
+#define __NR_madvise 233
+#define __NR_remap_file_pages 234
+#define __NR_mbind 235
+#define __NR_get_mempolicy 236
+#define __NR_set_mempolicy 237
+#define __NR_migrate_pages 238
+#define __NR_move_pages 239
+#define __NR_rt_tgsigqueueinfo 240
+#define __NR_perf_event_open 241
+#define __NR_accept4 242
+#define __NR_recvmmsg 243
+#define __NR_wait4 260
+#define __NR_prlimit64 261
+#define __NR_fanotify_init 262
+#define __NR_fanotify_mark 263
+#define __NR_name_to_handle_at 264
+#define __NR_open_by_handle_at 265
+#define __NR_clock_adjtime 266
+#define __NR_syncfs 267
+#define __NR_setns 268
+#define __NR_sendmmsg 269
+#define __NR_process_vm_readv 270
+#define __NR_process_vm_writev 271
+#define __NR_kcmp 272
+#define __NR_finit_module 273
+#define __NR_sched_setattr 274
+#define __NR_sched_getattr 275
+#define __NR_renameat2 276
+#define __NR_seccomp 277
+#define __NR_getrandom 278
+#define __NR_memfd_create 279
+#define __NR_bpf 280
+#define __NR_execveat 281
+#define __NR_userfaultfd 282
+#define __NR_membarrier 283
+#define __NR_mlock2 284
+#define __NR_copy_file_range 285
+#define __NR_preadv2 286
+#define __NR_pwritev2 287
+#define __NR_pkey_mprotect 288
+#define __NR_pkey_alloc 289
+#define __NR_pkey_free 290
+#define __NR_statx 291
+#define __NR_io_pgetevents 292
+#define __NR_rseq 293
+#define __NR_kexec_file_load 294
+#define __NR_pidfd_send_signal 424
+#define __NR_io_uring_setup 425
+#define __NR_io_uring_enter 426
+#define __NR_io_uring_register 427
+#define __NR_open_tree 428
+#define __NR_move_mount 429
+#define __NR_fsopen 430
+#define __NR_fsconfig 431
+#define __NR_fsmount 432
+#define __NR_fspick 433
+#define __NR_pidfd_open 434
+#define __NR_clone3 435
+#define __NR_close_range 436
+#define __NR_openat2 437
+#define __NR_pidfd_getfd 438
+#define __NR_faccessat2 439
+#define __NR_process_madvise 440
+#define __NR_epoll_pwait2 441
+#define __NR_mount_setattr 442
+#define __NR_quotactl_fd 443
+#define __NR_landlock_create_ruleset 444
+#define __NR_landlock_add_rule 445
+#define __NR_landlock_restrict_self 446
+#define __NR_memfd_secret 447
+#define __NR_process_mrelease 448
+#define __NR_futex_waitv 449
+#define __NR_set_mempolicy_home_node 450
+#define __NR_cachestat 451
+#define __NR_fchmodat2 452
+#define __NR_map_shadow_stack 453
+#define __NR_futex_wake 454
+#define __NR_futex_wait 455
+#define __NR_futex_requeue 456
+#define __NR_statmount 457
+#define __NR_listmount 458
+#define __NR_lsm_get_self_attr 459
+#define __NR_lsm_set_self_attr 460
+#define __NR_lsm_list_modules 461
+#define __NR_mseal 462
+#endif
diff --git a/libc/kernel/uapi/asm-generic/unistd.h b/libc/kernel/uapi/asm-generic/unistd.h
index 7eaa89a..652e7a2 100644
--- a/libc/kernel/uapi/asm-generic/unistd.h
+++ b/libc/kernel/uapi/asm-generic/unistd.h
@@ -381,9 +381,7 @@
 #define __NR_fsmount 432
 #define __NR_fspick 433
 #define __NR_pidfd_open 434
-#ifdef __ARCH_WANT_SYS_CLONE3
 #define __NR_clone3 435
-#endif
 #define __NR_close_range 436
 #define __NR_openat2 437
 #define __NR_pidfd_getfd 438
diff --git a/libc/kernel/uapi/asm-riscv/asm/hwprobe.h b/libc/kernel/uapi/asm-riscv/asm/hwprobe.h
index 3f30c88..2e5f9a4 100644
--- a/libc/kernel/uapi/asm-riscv/asm/hwprobe.h
+++ b/libc/kernel/uapi/asm-riscv/asm/hwprobe.h
@@ -54,6 +54,18 @@
 #define RISCV_HWPROBE_EXT_ZACAS (1ULL << 34)
 #define RISCV_HWPROBE_EXT_ZICOND (1ULL << 35)
 #define RISCV_HWPROBE_EXT_ZIHINTPAUSE (1ULL << 36)
+#define RISCV_HWPROBE_EXT_ZVE32X (1ULL << 37)
+#define RISCV_HWPROBE_EXT_ZVE32F (1ULL << 38)
+#define RISCV_HWPROBE_EXT_ZVE64X (1ULL << 39)
+#define RISCV_HWPROBE_EXT_ZVE64F (1ULL << 40)
+#define RISCV_HWPROBE_EXT_ZVE64D (1ULL << 41)
+#define RISCV_HWPROBE_EXT_ZIMOP (1ULL << 42)
+#define RISCV_HWPROBE_EXT_ZCA (1ULL << 43)
+#define RISCV_HWPROBE_EXT_ZCB (1ULL << 44)
+#define RISCV_HWPROBE_EXT_ZCD (1ULL << 45)
+#define RISCV_HWPROBE_EXT_ZCF (1ULL << 46)
+#define RISCV_HWPROBE_EXT_ZCMOP (1ULL << 47)
+#define RISCV_HWPROBE_EXT_ZAWRS (1ULL << 48)
 #define RISCV_HWPROBE_KEY_CPUPERF_0 5
 #define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0)
 #define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0)
@@ -62,5 +74,13 @@
 #define RISCV_HWPROBE_MISALIGNED_UNSUPPORTED (4 << 0)
 #define RISCV_HWPROBE_MISALIGNED_MASK (7 << 0)
 #define RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE 6
+#define RISCV_HWPROBE_KEY_HIGHEST_VIRT_ADDRESS 7
+#define RISCV_HWPROBE_KEY_TIME_CSR_FREQ 8
+#define RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF 9
+#define RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN 0
+#define RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED 1
+#define RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW 2
+#define RISCV_HWPROBE_MISALIGNED_SCALAR_FAST 3
+#define RISCV_HWPROBE_MISALIGNED_SCALAR_UNSUPPORTED 4
 #define RISCV_HWPROBE_WHICH_CPUS (1 << 0)
 #endif
diff --git a/libc/kernel/uapi/asm-riscv/asm/kvm.h b/libc/kernel/uapi/asm-riscv/asm/kvm.h
index 12d8f61..51f4977 100644
--- a/libc/kernel/uapi/asm-riscv/asm/kvm.h
+++ b/libc/kernel/uapi/asm-riscv/asm/kvm.h
@@ -128,6 +128,13 @@
   KVM_RISCV_ISA_EXT_ZTSO,
   KVM_RISCV_ISA_EXT_ZACAS,
   KVM_RISCV_ISA_EXT_SSCOFPMF,
+  KVM_RISCV_ISA_EXT_ZIMOP,
+  KVM_RISCV_ISA_EXT_ZCA,
+  KVM_RISCV_ISA_EXT_ZCB,
+  KVM_RISCV_ISA_EXT_ZCD,
+  KVM_RISCV_ISA_EXT_ZCF,
+  KVM_RISCV_ISA_EXT_ZCMOP,
+  KVM_RISCV_ISA_EXT_ZAWRS,
   KVM_RISCV_ISA_EXT_MAX,
 };
 enum KVM_RISCV_SBI_EXT_ID {
diff --git a/libc/kernel/uapi/asm-riscv/asm/unistd.h b/libc/kernel/uapi/asm-riscv/asm/unistd.h
index 0c58887..f395f71 100644
--- a/libc/kernel/uapi/asm-riscv/asm/unistd.h
+++ b/libc/kernel/uapi/asm-riscv/asm/unistd.h
@@ -4,18 +4,9 @@
  * See https://android.googlesource.com/platform/bionic/+/master/libc/kernel/
  * for more information.
  */
-#if defined(__LP64__) && !defined(__SYSCALL_COMPAT)
-#define __ARCH_WANT_NEW_STAT
-#define __ARCH_WANT_SET_GET_RLIMIT
+#include <asm/bitsperlong.h>
+#if __BITS_PER_LONG == 64
+#include <asm/unistd_64.h>
+#else
+#include <asm/unistd_32.h>
 #endif
-#define __ARCH_WANT_SYS_CLONE3
-#define __ARCH_WANT_MEMFD_SECRET
-#include <asm-generic/unistd.h>
-#ifndef __NR_riscv_flush_icache
-#define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15)
-#endif
-__SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache)
-#ifndef __NR_riscv_hwprobe
-#define __NR_riscv_hwprobe (__NR_arch_specific_syscall + 14)
-#endif
-__SYSCALL(__NR_riscv_hwprobe, sys_riscv_hwprobe)
diff --git a/libc/kernel/uapi/asm-riscv/asm/unistd_32.h b/libc/kernel/uapi/asm-riscv/asm/unistd_32.h
new file mode 100644
index 0000000..864a556
--- /dev/null
+++ b/libc/kernel/uapi/asm-riscv/asm/unistd_32.h
@@ -0,0 +1,318 @@
+/*
+ * This file is auto-generated. Modifications will be lost.
+ *
+ * See https://android.googlesource.com/platform/bionic/+/master/libc/kernel/
+ * for more information.
+ */
+#ifndef _UAPI_ASM_UNISTD_32_H
+#define _UAPI_ASM_UNISTD_32_H
+#define __NR_io_setup 0
+#define __NR_io_destroy 1
+#define __NR_io_submit 2
+#define __NR_io_cancel 3
+#define __NR_setxattr 5
+#define __NR_lsetxattr 6
+#define __NR_fsetxattr 7
+#define __NR_getxattr 8
+#define __NR_lgetxattr 9
+#define __NR_fgetxattr 10
+#define __NR_listxattr 11
+#define __NR_llistxattr 12
+#define __NR_flistxattr 13
+#define __NR_removexattr 14
+#define __NR_lremovexattr 15
+#define __NR_fremovexattr 16
+#define __NR_getcwd 17
+#define __NR_lookup_dcookie 18
+#define __NR_eventfd2 19
+#define __NR_epoll_create1 20
+#define __NR_epoll_ctl 21
+#define __NR_epoll_pwait 22
+#define __NR_dup 23
+#define __NR_dup3 24
+#define __NR_fcntl64 25
+#define __NR_inotify_init1 26
+#define __NR_inotify_add_watch 27
+#define __NR_inotify_rm_watch 28
+#define __NR_ioctl 29
+#define __NR_ioprio_set 30
+#define __NR_ioprio_get 31
+#define __NR_flock 32
+#define __NR_mknodat 33
+#define __NR_mkdirat 34
+#define __NR_unlinkat 35
+#define __NR_symlinkat 36
+#define __NR_linkat 37
+#define __NR_umount2 39
+#define __NR_mount 40
+#define __NR_pivot_root 41
+#define __NR_nfsservctl 42
+#define __NR_statfs64 43
+#define __NR_fstatfs64 44
+#define __NR_truncate64 45
+#define __NR_ftruncate64 46
+#define __NR_fallocate 47
+#define __NR_faccessat 48
+#define __NR_chdir 49
+#define __NR_fchdir 50
+#define __NR_chroot 51
+#define __NR_fchmod 52
+#define __NR_fchmodat 53
+#define __NR_fchownat 54
+#define __NR_fchown 55
+#define __NR_openat 56
+#define __NR_close 57
+#define __NR_vhangup 58
+#define __NR_pipe2 59
+#define __NR_quotactl 60
+#define __NR_getdents64 61
+#define __NR_llseek 62
+#define __NR_read 63
+#define __NR_write 64
+#define __NR_readv 65
+#define __NR_writev 66
+#define __NR_pread64 67
+#define __NR_pwrite64 68
+#define __NR_preadv 69
+#define __NR_pwritev 70
+#define __NR_sendfile64 71
+#define __NR_signalfd4 74
+#define __NR_vmsplice 75
+#define __NR_splice 76
+#define __NR_tee 77
+#define __NR_readlinkat 78
+#define __NR_sync 81
+#define __NR_fsync 82
+#define __NR_fdatasync 83
+#define __NR_sync_file_range 84
+#define __NR_timerfd_create 85
+#define __NR_acct 89
+#define __NR_capget 90
+#define __NR_capset 91
+#define __NR_personality 92
+#define __NR_exit 93
+#define __NR_exit_group 94
+#define __NR_waitid 95
+#define __NR_set_tid_address 96
+#define __NR_unshare 97
+#define __NR_set_robust_list 99
+#define __NR_get_robust_list 100
+#define __NR_getitimer 102
+#define __NR_setitimer 103
+#define __NR_kexec_load 104
+#define __NR_init_module 105
+#define __NR_delete_module 106
+#define __NR_timer_create 107
+#define __NR_timer_getoverrun 109
+#define __NR_timer_delete 111
+#define __NR_syslog 116
+#define __NR_ptrace 117
+#define __NR_sched_setparam 118
+#define __NR_sched_setscheduler 119
+#define __NR_sched_getscheduler 120
+#define __NR_sched_getparam 121
+#define __NR_sched_setaffinity 122
+#define __NR_sched_getaffinity 123
+#define __NR_sched_yield 124
+#define __NR_sched_get_priority_max 125
+#define __NR_sched_get_priority_min 126
+#define __NR_restart_syscall 128
+#define __NR_kill 129
+#define __NR_tkill 130
+#define __NR_tgkill 131
+#define __NR_sigaltstack 132
+#define __NR_rt_sigsuspend 133
+#define __NR_rt_sigaction 134
+#define __NR_rt_sigprocmask 135
+#define __NR_rt_sigpending 136
+#define __NR_rt_sigqueueinfo 138
+#define __NR_rt_sigreturn 139
+#define __NR_setpriority 140
+#define __NR_getpriority 141
+#define __NR_reboot 142
+#define __NR_setregid 143
+#define __NR_setgid 144
+#define __NR_setreuid 145
+#define __NR_setuid 146
+#define __NR_setresuid 147
+#define __NR_getresuid 148
+#define __NR_setresgid 149
+#define __NR_getresgid 150
+#define __NR_setfsuid 151
+#define __NR_setfsgid 152
+#define __NR_times 153
+#define __NR_setpgid 154
+#define __NR_getpgid 155
+#define __NR_getsid 156
+#define __NR_setsid 157
+#define __NR_getgroups 158
+#define __NR_setgroups 159
+#define __NR_uname 160
+#define __NR_sethostname 161
+#define __NR_setdomainname 162
+#define __NR_getrusage 165
+#define __NR_umask 166
+#define __NR_prctl 167
+#define __NR_getcpu 168
+#define __NR_getpid 172
+#define __NR_getppid 173
+#define __NR_getuid 174
+#define __NR_geteuid 175
+#define __NR_getgid 176
+#define __NR_getegid 177
+#define __NR_gettid 178
+#define __NR_sysinfo 179
+#define __NR_mq_open 180
+#define __NR_mq_unlink 181
+#define __NR_mq_notify 184
+#define __NR_mq_getsetattr 185
+#define __NR_msgget 186
+#define __NR_msgctl 187
+#define __NR_msgrcv 188
+#define __NR_msgsnd 189
+#define __NR_semget 190
+#define __NR_semctl 191
+#define __NR_semop 193
+#define __NR_shmget 194
+#define __NR_shmctl 195
+#define __NR_shmat 196
+#define __NR_shmdt 197
+#define __NR_socket 198
+#define __NR_socketpair 199
+#define __NR_bind 200
+#define __NR_listen 201
+#define __NR_accept 202
+#define __NR_connect 203
+#define __NR_getsockname 204
+#define __NR_getpeername 205
+#define __NR_sendto 206
+#define __NR_recvfrom 207
+#define __NR_setsockopt 208
+#define __NR_getsockopt 209
+#define __NR_shutdown 210
+#define __NR_sendmsg 211
+#define __NR_recvmsg 212
+#define __NR_readahead 213
+#define __NR_brk 214
+#define __NR_munmap 215
+#define __NR_mremap 216
+#define __NR_add_key 217
+#define __NR_request_key 218
+#define __NR_keyctl 219
+#define __NR_clone 220
+#define __NR_execve 221
+#define __NR_mmap2 222
+#define __NR_fadvise64_64 223
+#define __NR_swapon 224
+#define __NR_swapoff 225
+#define __NR_mprotect 226
+#define __NR_msync 227
+#define __NR_mlock 228
+#define __NR_munlock 229
+#define __NR_mlockall 230
+#define __NR_munlockall 231
+#define __NR_mincore 232
+#define __NR_madvise 233
+#define __NR_remap_file_pages 234
+#define __NR_mbind 235
+#define __NR_get_mempolicy 236
+#define __NR_set_mempolicy 237
+#define __NR_migrate_pages 238
+#define __NR_move_pages 239
+#define __NR_rt_tgsigqueueinfo 240
+#define __NR_perf_event_open 241
+#define __NR_accept4 242
+#define __NR_riscv_hwprobe 258
+#define __NR_riscv_flush_icache 259
+#define __NR_prlimit64 261
+#define __NR_fanotify_init 262
+#define __NR_fanotify_mark 263
+#define __NR_name_to_handle_at 264
+#define __NR_open_by_handle_at 265
+#define __NR_syncfs 267
+#define __NR_setns 268
+#define __NR_sendmmsg 269
+#define __NR_process_vm_readv 270
+#define __NR_process_vm_writev 271
+#define __NR_kcmp 272
+#define __NR_finit_module 273
+#define __NR_sched_setattr 274
+#define __NR_sched_getattr 275
+#define __NR_renameat2 276
+#define __NR_seccomp 277
+#define __NR_getrandom 278
+#define __NR_memfd_create 279
+#define __NR_bpf 280
+#define __NR_execveat 281
+#define __NR_userfaultfd 282
+#define __NR_membarrier 283
+#define __NR_mlock2 284
+#define __NR_copy_file_range 285
+#define __NR_preadv2 286
+#define __NR_pwritev2 287
+#define __NR_pkey_mprotect 288
+#define __NR_pkey_alloc 289
+#define __NR_pkey_free 290
+#define __NR_statx 291
+#define __NR_rseq 293
+#define __NR_kexec_file_load 294
+#define __NR_clock_gettime64 403
+#define __NR_clock_settime64 404
+#define __NR_clock_adjtime64 405
+#define __NR_clock_getres_time64 406
+#define __NR_clock_nanosleep_time64 407
+#define __NR_timer_gettime64 408
+#define __NR_timer_settime64 409
+#define __NR_timerfd_gettime64 410
+#define __NR_timerfd_settime64 411
+#define __NR_utimensat_time64 412
+#define __NR_pselect6_time64 413
+#define __NR_ppoll_time64 414
+#define __NR_io_pgetevents_time64 416
+#define __NR_recvmmsg_time64 417
+#define __NR_mq_timedsend_time64 418
+#define __NR_mq_timedreceive_time64 419
+#define __NR_semtimedop_time64 420
+#define __NR_rt_sigtimedwait_time64 421
+#define __NR_futex_time64 422
+#define __NR_sched_rr_get_interval_time64 423
+#define __NR_pidfd_send_signal 424
+#define __NR_io_uring_setup 425
+#define __NR_io_uring_enter 426
+#define __NR_io_uring_register 427
+#define __NR_open_tree 428
+#define __NR_move_mount 429
+#define __NR_fsopen 430
+#define __NR_fsconfig 431
+#define __NR_fsmount 432
+#define __NR_fspick 433
+#define __NR_pidfd_open 434
+#define __NR_clone3 435
+#define __NR_close_range 436
+#define __NR_openat2 437
+#define __NR_pidfd_getfd 438
+#define __NR_faccessat2 439
+#define __NR_process_madvise 440
+#define __NR_epoll_pwait2 441
+#define __NR_mount_setattr 442
+#define __NR_quotactl_fd 443
+#define __NR_landlock_create_ruleset 444
+#define __NR_landlock_add_rule 445
+#define __NR_landlock_restrict_self 446
+#define __NR_memfd_secret 447
+#define __NR_process_mrelease 448
+#define __NR_futex_waitv 449
+#define __NR_set_mempolicy_home_node 450
+#define __NR_cachestat 451
+#define __NR_fchmodat2 452
+#define __NR_map_shadow_stack 453
+#define __NR_futex_wake 454
+#define __NR_futex_wait 455
+#define __NR_futex_requeue 456
+#define __NR_statmount 457
+#define __NR_listmount 458
+#define __NR_lsm_get_self_attr 459
+#define __NR_lsm_set_self_attr 460
+#define __NR_lsm_list_modules 461
+#define __NR_mseal 462
+#endif
diff --git a/libc/kernel/uapi/asm-riscv/asm/unistd_64.h b/libc/kernel/uapi/asm-riscv/asm/unistd_64.h
new file mode 100644
index 0000000..f15b65b
--- /dev/null
+++ b/libc/kernel/uapi/asm-riscv/asm/unistd_64.h
@@ -0,0 +1,328 @@
+/*
+ * This file is auto-generated. Modifications will be lost.
+ *
+ * See https://android.googlesource.com/platform/bionic/+/master/libc/kernel/
+ * for more information.
+ */
+#ifndef _UAPI_ASM_UNISTD_64_H
+#define _UAPI_ASM_UNISTD_64_H
+#define __NR_io_setup 0
+#define __NR_io_destroy 1
+#define __NR_io_submit 2
+#define __NR_io_cancel 3
+#define __NR_io_getevents 4
+#define __NR_setxattr 5
+#define __NR_lsetxattr 6
+#define __NR_fsetxattr 7
+#define __NR_getxattr 8
+#define __NR_lgetxattr 9
+#define __NR_fgetxattr 10
+#define __NR_listxattr 11
+#define __NR_llistxattr 12
+#define __NR_flistxattr 13
+#define __NR_removexattr 14
+#define __NR_lremovexattr 15
+#define __NR_fremovexattr 16
+#define __NR_getcwd 17
+#define __NR_lookup_dcookie 18
+#define __NR_eventfd2 19
+#define __NR_epoll_create1 20
+#define __NR_epoll_ctl 21
+#define __NR_epoll_pwait 22
+#define __NR_dup 23
+#define __NR_dup3 24
+#define __NR_fcntl 25
+#define __NR_inotify_init1 26
+#define __NR_inotify_add_watch 27
+#define __NR_inotify_rm_watch 28
+#define __NR_ioctl 29
+#define __NR_ioprio_set 30
+#define __NR_ioprio_get 31
+#define __NR_flock 32
+#define __NR_mknodat 33
+#define __NR_mkdirat 34
+#define __NR_unlinkat 35
+#define __NR_symlinkat 36
+#define __NR_linkat 37
+#define __NR_umount2 39
+#define __NR_mount 40
+#define __NR_pivot_root 41
+#define __NR_nfsservctl 42
+#define __NR_statfs 43
+#define __NR_fstatfs 44
+#define __NR_truncate 45
+#define __NR_ftruncate 46
+#define __NR_fallocate 47
+#define __NR_faccessat 48
+#define __NR_chdir 49
+#define __NR_fchdir 50
+#define __NR_chroot 51
+#define __NR_fchmod 52
+#define __NR_fchmodat 53
+#define __NR_fchownat 54
+#define __NR_fchown 55
+#define __NR_openat 56
+#define __NR_close 57
+#define __NR_vhangup 58
+#define __NR_pipe2 59
+#define __NR_quotactl 60
+#define __NR_getdents64 61
+#define __NR_lseek 62
+#define __NR_read 63
+#define __NR_write 64
+#define __NR_readv 65
+#define __NR_writev 66
+#define __NR_pread64 67
+#define __NR_pwrite64 68
+#define __NR_preadv 69
+#define __NR_pwritev 70
+#define __NR_sendfile 71
+#define __NR_pselect6 72
+#define __NR_ppoll 73
+#define __NR_signalfd4 74
+#define __NR_vmsplice 75
+#define __NR_splice 76
+#define __NR_tee 77
+#define __NR_readlinkat 78
+#define __NR_newfstatat 79
+#define __NR_fstat 80
+#define __NR_sync 81
+#define __NR_fsync 82
+#define __NR_fdatasync 83
+#define __NR_sync_file_range 84
+#define __NR_timerfd_create 85
+#define __NR_timerfd_settime 86
+#define __NR_timerfd_gettime 87
+#define __NR_utimensat 88
+#define __NR_acct 89
+#define __NR_capget 90
+#define __NR_capset 91
+#define __NR_personality 92
+#define __NR_exit 93
+#define __NR_exit_group 94
+#define __NR_waitid 95
+#define __NR_set_tid_address 96
+#define __NR_unshare 97
+#define __NR_futex 98
+#define __NR_set_robust_list 99
+#define __NR_get_robust_list 100
+#define __NR_nanosleep 101
+#define __NR_getitimer 102
+#define __NR_setitimer 103
+#define __NR_kexec_load 104
+#define __NR_init_module 105
+#define __NR_delete_module 106
+#define __NR_timer_create 107
+#define __NR_timer_gettime 108
+#define __NR_timer_getoverrun 109
+#define __NR_timer_settime 110
+#define __NR_timer_delete 111
+#define __NR_clock_settime 112
+#define __NR_clock_gettime 113
+#define __NR_clock_getres 114
+#define __NR_clock_nanosleep 115
+#define __NR_syslog 116
+#define __NR_ptrace 117
+#define __NR_sched_setparam 118
+#define __NR_sched_setscheduler 119
+#define __NR_sched_getscheduler 120
+#define __NR_sched_getparam 121
+#define __NR_sched_setaffinity 122
+#define __NR_sched_getaffinity 123
+#define __NR_sched_yield 124
+#define __NR_sched_get_priority_max 125
+#define __NR_sched_get_priority_min 126
+#define __NR_sched_rr_get_interval 127
+#define __NR_restart_syscall 128
+#define __NR_kill 129
+#define __NR_tkill 130
+#define __NR_tgkill 131
+#define __NR_sigaltstack 132
+#define __NR_rt_sigsuspend 133
+#define __NR_rt_sigaction 134
+#define __NR_rt_sigprocmask 135
+#define __NR_rt_sigpending 136
+#define __NR_rt_sigtimedwait 137
+#define __NR_rt_sigqueueinfo 138
+#define __NR_rt_sigreturn 139
+#define __NR_setpriority 140
+#define __NR_getpriority 141
+#define __NR_reboot 142
+#define __NR_setregid 143
+#define __NR_setgid 144
+#define __NR_setreuid 145
+#define __NR_setuid 146
+#define __NR_setresuid 147
+#define __NR_getresuid 148
+#define __NR_setresgid 149
+#define __NR_getresgid 150
+#define __NR_setfsuid 151
+#define __NR_setfsgid 152
+#define __NR_times 153
+#define __NR_setpgid 154
+#define __NR_getpgid 155
+#define __NR_getsid 156
+#define __NR_setsid 157
+#define __NR_getgroups 158
+#define __NR_setgroups 159
+#define __NR_uname 160
+#define __NR_sethostname 161
+#define __NR_setdomainname 162
+#define __NR_getrlimit 163
+#define __NR_setrlimit 164
+#define __NR_getrusage 165
+#define __NR_umask 166
+#define __NR_prctl 167
+#define __NR_getcpu 168
+#define __NR_gettimeofday 169
+#define __NR_settimeofday 170
+#define __NR_adjtimex 171
+#define __NR_getpid 172
+#define __NR_getppid 173
+#define __NR_getuid 174
+#define __NR_geteuid 175
+#define __NR_getgid 176
+#define __NR_getegid 177
+#define __NR_gettid 178
+#define __NR_sysinfo 179
+#define __NR_mq_open 180
+#define __NR_mq_unlink 181
+#define __NR_mq_timedsend 182
+#define __NR_mq_timedreceive 183
+#define __NR_mq_notify 184
+#define __NR_mq_getsetattr 185
+#define __NR_msgget 186
+#define __NR_msgctl 187
+#define __NR_msgrcv 188
+#define __NR_msgsnd 189
+#define __NR_semget 190
+#define __NR_semctl 191
+#define __NR_semtimedop 192
+#define __NR_semop 193
+#define __NR_shmget 194
+#define __NR_shmctl 195
+#define __NR_shmat 196
+#define __NR_shmdt 197
+#define __NR_socket 198
+#define __NR_socketpair 199
+#define __NR_bind 200
+#define __NR_listen 201
+#define __NR_accept 202
+#define __NR_connect 203
+#define __NR_getsockname 204
+#define __NR_getpeername 205
+#define __NR_sendto 206
+#define __NR_recvfrom 207
+#define __NR_setsockopt 208
+#define __NR_getsockopt 209
+#define __NR_shutdown 210
+#define __NR_sendmsg 211
+#define __NR_recvmsg 212
+#define __NR_readahead 213
+#define __NR_brk 214
+#define __NR_munmap 215
+#define __NR_mremap 216
+#define __NR_add_key 217
+#define __NR_request_key 218
+#define __NR_keyctl 219
+#define __NR_clone 220
+#define __NR_execve 221
+#define __NR_mmap 222
+#define __NR_fadvise64 223
+#define __NR_swapon 224
+#define __NR_swapoff 225
+#define __NR_mprotect 226
+#define __NR_msync 227
+#define __NR_mlock 228
+#define __NR_munlock 229
+#define __NR_mlockall 230
+#define __NR_munlockall 231
+#define __NR_mincore 232
+#define __NR_madvise 233
+#define __NR_remap_file_pages 234
+#define __NR_mbind 235
+#define __NR_get_mempolicy 236
+#define __NR_set_mempolicy 237
+#define __NR_migrate_pages 238
+#define __NR_move_pages 239
+#define __NR_rt_tgsigqueueinfo 240
+#define __NR_perf_event_open 241
+#define __NR_accept4 242
+#define __NR_recvmmsg 243
+#define __NR_riscv_hwprobe 258
+#define __NR_riscv_flush_icache 259
+#define __NR_wait4 260
+#define __NR_prlimit64 261
+#define __NR_fanotify_init 262
+#define __NR_fanotify_mark 263
+#define __NR_name_to_handle_at 264
+#define __NR_open_by_handle_at 265
+#define __NR_clock_adjtime 266
+#define __NR_syncfs 267
+#define __NR_setns 268
+#define __NR_sendmmsg 269
+#define __NR_process_vm_readv 270
+#define __NR_process_vm_writev 271
+#define __NR_kcmp 272
+#define __NR_finit_module 273
+#define __NR_sched_setattr 274
+#define __NR_sched_getattr 275
+#define __NR_renameat2 276
+#define __NR_seccomp 277
+#define __NR_getrandom 278
+#define __NR_memfd_create 279
+#define __NR_bpf 280
+#define __NR_execveat 281
+#define __NR_userfaultfd 282
+#define __NR_membarrier 283
+#define __NR_mlock2 284
+#define __NR_copy_file_range 285
+#define __NR_preadv2 286
+#define __NR_pwritev2 287
+#define __NR_pkey_mprotect 288
+#define __NR_pkey_alloc 289
+#define __NR_pkey_free 290
+#define __NR_statx 291
+#define __NR_io_pgetevents 292
+#define __NR_rseq 293
+#define __NR_kexec_file_load 294
+#define __NR_pidfd_send_signal 424
+#define __NR_io_uring_setup 425
+#define __NR_io_uring_enter 426
+#define __NR_io_uring_register 427
+#define __NR_open_tree 428
+#define __NR_move_mount 429
+#define __NR_fsopen 430
+#define __NR_fsconfig 431
+#define __NR_fsmount 432
+#define __NR_fspick 433
+#define __NR_pidfd_open 434
+#define __NR_clone3 435
+#define __NR_close_range 436
+#define __NR_openat2 437
+#define __NR_pidfd_getfd 438
+#define __NR_faccessat2 439
+#define __NR_process_madvise 440
+#define __NR_epoll_pwait2 441
+#define __NR_mount_setattr 442
+#define __NR_quotactl_fd 443
+#define __NR_landlock_create_ruleset 444
+#define __NR_landlock_add_rule 445
+#define __NR_landlock_restrict_self 446
+#define __NR_memfd_secret 447
+#define __NR_process_mrelease 448
+#define __NR_futex_waitv 449
+#define __NR_set_mempolicy_home_node 450
+#define __NR_cachestat 451
+#define __NR_fchmodat2 452
+#define __NR_map_shadow_stack 453
+#define __NR_futex_wake 454
+#define __NR_futex_wait 455
+#define __NR_futex_requeue 456
+#define __NR_statmount 457
+#define __NR_listmount 458
+#define __NR_lsm_get_self_attr 459
+#define __NR_lsm_set_self_attr 460
+#define __NR_lsm_list_modules 461
+#define __NR_mseal 462
+#endif
diff --git a/libc/kernel/uapi/asm-x86/asm/kvm.h b/libc/kernel/uapi/asm-x86/asm/kvm.h
index 17b1c5d..cd647b6 100644
--- a/libc/kernel/uapi/asm-x86/asm/kvm.h
+++ b/libc/kernel/uapi/asm-x86/asm/kvm.h
@@ -94,6 +94,7 @@
 #define KVM_NR_IRQCHIPS 3
 #define KVM_RUN_X86_SMM (1 << 0)
 #define KVM_RUN_X86_BUS_LOCK (1 << 1)
+#define KVM_RUN_X86_GUEST_MODE (1 << 2)
 struct kvm_regs {
   __u64 rax, rbx, rcx, rdx;
   __u64 rsi, rdi, rsp, rbp;
@@ -532,6 +533,9 @@
   KVM_SEV_GET_ATTESTATION_REPORT,
   KVM_SEV_SEND_CANCEL,
   KVM_SEV_INIT2,
+  KVM_SEV_SNP_LAUNCH_START = 100,
+  KVM_SEV_SNP_LAUNCH_UPDATE,
+  KVM_SEV_SNP_LAUNCH_FINISH,
   KVM_SEV_NR_MAX,
 };
 struct kvm_sev_cmd {
@@ -644,6 +648,42 @@
   __u32 trans_len;
   __u32 pad2;
 };
+struct kvm_sev_snp_launch_start {
+  __u64 policy;
+  __u8 gosvw[16];
+  __u16 flags;
+  __u8 pad0[6];
+  __u64 pad1[4];
+};
+#define KVM_SEV_SNP_PAGE_TYPE_NORMAL 0x1
+#define KVM_SEV_SNP_PAGE_TYPE_ZERO 0x3
+#define KVM_SEV_SNP_PAGE_TYPE_UNMEASURED 0x4
+#define KVM_SEV_SNP_PAGE_TYPE_SECRETS 0x5
+#define KVM_SEV_SNP_PAGE_TYPE_CPUID 0x6
+struct kvm_sev_snp_launch_update {
+  __u64 gfn_start;
+  __u64 uaddr;
+  __u64 len;
+  __u8 type;
+  __u8 pad0;
+  __u16 flags;
+  __u32 pad1;
+  __u64 pad2[4];
+};
+#define KVM_SEV_SNP_ID_BLOCK_SIZE 96
+#define KVM_SEV_SNP_ID_AUTH_SIZE 4096
+#define KVM_SEV_SNP_FINISH_DATA_SIZE 32
+struct kvm_sev_snp_launch_finish {
+  __u64 id_block_uaddr;
+  __u64 id_auth_uaddr;
+  __u8 id_block_en;
+  __u8 auth_key_en;
+  __u8 vcek_disabled;
+  __u8 host_data[KVM_SEV_SNP_FINISH_DATA_SIZE];
+  __u8 pad0[3];
+  __u16 flags;
+  __u64 pad1[4];
+};
 #define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0)
 #define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1)
 struct kvm_hyperv_eventfd {
@@ -667,4 +707,5 @@
 #define KVM_X86_SW_PROTECTED_VM 1
 #define KVM_X86_SEV_VM 2
 #define KVM_X86_SEV_ES_VM 3
+#define KVM_X86_SNP_VM 4
 #endif
diff --git a/libc/kernel/uapi/asm-x86/asm/svm.h b/libc/kernel/uapi/asm-x86/asm/svm.h
index ffbf0b3..4f165fa 100644
--- a/libc/kernel/uapi/asm-x86/asm/svm.h
+++ b/libc/kernel/uapi/asm-x86/asm/svm.h
@@ -117,6 +117,7 @@
 #define SVM_VMGEXIT_AP_CREATE_ON_INIT 0
 #define SVM_VMGEXIT_AP_CREATE 1
 #define SVM_VMGEXIT_AP_DESTROY 2
+#define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018
 #define SVM_VMGEXIT_HV_FEATURES 0x8000fffd
 #define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe
 #define SVM_VMGEXIT_TERM_REASON(reason_set,reason_code) (((((u64) reason_set) & 0xf)) | ((((u64) reason_code) & 0xff) << 4))
diff --git a/libc/kernel/uapi/asm-x86/asm/unistd_64.h b/libc/kernel/uapi/asm-x86/asm/unistd_64.h
index 5dd666c..d5408a3 100644
--- a/libc/kernel/uapi/asm-x86/asm/unistd_64.h
+++ b/libc/kernel/uapi/asm-x86/asm/unistd_64.h
@@ -341,6 +341,7 @@
 #define __NR_statx 332
 #define __NR_io_pgetevents 333
 #define __NR_rseq 334
+#define __NR_uretprobe 335
 #define __NR_pidfd_send_signal 424
 #define __NR_io_uring_setup 425
 #define __NR_io_uring_enter 426
diff --git a/libc/kernel/uapi/asm-x86/asm/unistd_x32.h b/libc/kernel/uapi/asm-x86/asm/unistd_x32.h
index a2ff6f4..fdcf7e6 100644
--- a/libc/kernel/uapi/asm-x86/asm/unistd_x32.h
+++ b/libc/kernel/uapi/asm-x86/asm/unistd_x32.h
@@ -294,6 +294,7 @@
 #define __NR_statx (__X32_SYSCALL_BIT + 332)
 #define __NR_io_pgetevents (__X32_SYSCALL_BIT + 333)
 #define __NR_rseq (__X32_SYSCALL_BIT + 334)
+#define __NR_uretprobe (__X32_SYSCALL_BIT + 335)
 #define __NR_pidfd_send_signal (__X32_SYSCALL_BIT + 424)
 #define __NR_io_uring_setup (__X32_SYSCALL_BIT + 425)
 #define __NR_io_uring_enter (__X32_SYSCALL_BIT + 426)
diff --git a/libc/kernel/uapi/drm/amdgpu_drm.h b/libc/kernel/uapi/drm/amdgpu_drm.h
index 0ad0bc2..7bbd5de 100644
--- a/libc/kernel/uapi/drm/amdgpu_drm.h
+++ b/libc/kernel/uapi/drm/amdgpu_drm.h
@@ -65,6 +65,7 @@
 #define AMDGPU_GEM_CREATE_COHERENT (1 << 13)
 #define AMDGPU_GEM_CREATE_UNCACHED (1 << 14)
 #define AMDGPU_GEM_CREATE_EXT_COHERENT (1 << 15)
+#define AMDGPU_GEM_CREATE_GFX12_DCC (1 << 16)
 struct drm_amdgpu_gem_create_in {
   __u64 bo_size;
   __u64 alignment;
@@ -216,6 +217,14 @@
 #define AMDGPU_TILING_DCC_INDEPENDENT_128B_MASK 0x1
 #define AMDGPU_TILING_SCANOUT_SHIFT 63
 #define AMDGPU_TILING_SCANOUT_MASK 0x1
+#define AMDGPU_TILING_GFX12_SWIZZLE_MODE_SHIFT 0
+#define AMDGPU_TILING_GFX12_SWIZZLE_MODE_MASK 0x7
+#define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_SHIFT 3
+#define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_MASK 0x3
+#define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_SHIFT 5
+#define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_MASK 0x7
+#define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_SHIFT 8
+#define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_MASK 0x3f
 #define AMDGPU_TILING_SET(field,value) (((__u64) (value) & AMDGPU_TILING_ ##field ##_MASK) << AMDGPU_TILING_ ##field ##_SHIFT)
 #define AMDGPU_TILING_GET(value,field) (((__u64) (value) >> AMDGPU_TILING_ ##field ##_SHIFT) & AMDGPU_TILING_ ##field ##_MASK)
 #define AMDGPU_GEM_METADATA_OP_SET_METADATA 1
@@ -744,6 +753,10 @@
 #define AMDGPU_FAMILY_GC_10_3_6 149
 #define AMDGPU_FAMILY_GC_10_3_7 151
 #define AMDGPU_FAMILY_GC_11_5_0 150
+#define AMDGPU_FAMILY_GC_12_0_0 152
+struct drm_color_ctm_3x4 {
+  __u64 matrix[12];
+};
 #ifdef __cplusplus
 }
 #endif
diff --git a/libc/kernel/uapi/drm/drm_fourcc.h b/libc/kernel/uapi/drm/drm_fourcc.h
index 6fd2eb8..4902d6c 100644
--- a/libc/kernel/uapi/drm/drm_fourcc.h
+++ b/libc/kernel/uapi/drm/drm_fourcc.h
@@ -255,12 +255,17 @@
 #define AMD_FMT_MOD_TILE_VER_GFX10 2
 #define AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS 3
 #define AMD_FMT_MOD_TILE_VER_GFX11 4
+#define AMD_FMT_MOD_TILE_VER_GFX12 5
 #define AMD_FMT_MOD_TILE_GFX9_64K_S 9
 #define AMD_FMT_MOD_TILE_GFX9_64K_D 10
 #define AMD_FMT_MOD_TILE_GFX9_64K_S_X 25
 #define AMD_FMT_MOD_TILE_GFX9_64K_D_X 26
 #define AMD_FMT_MOD_TILE_GFX9_64K_R_X 27
 #define AMD_FMT_MOD_TILE_GFX11_256K_R_X 31
+#define AMD_FMT_MOD_TILE_GFX12_256B_2D 1
+#define AMD_FMT_MOD_TILE_GFX12_4K_2D 2
+#define AMD_FMT_MOD_TILE_GFX12_64K_2D 3
+#define AMD_FMT_MOD_TILE_GFX12_256K_2D 4
 #define AMD_FMT_MOD_DCC_BLOCK_64B 0
 #define AMD_FMT_MOD_DCC_BLOCK_128B 1
 #define AMD_FMT_MOD_DCC_BLOCK_256B 2
diff --git a/libc/kernel/uapi/drm/drm_mode.h b/libc/kernel/uapi/drm/drm_mode.h
index 8fccdaf..06c91c5 100644
--- a/libc/kernel/uapi/drm/drm_mode.h
+++ b/libc/kernel/uapi/drm/drm_mode.h
@@ -357,9 +357,6 @@
 struct drm_color_ctm {
   __u64 matrix[9];
 };
-struct drm_color_ctm_3x4 {
-  __u64 matrix[12];
-};
 struct drm_color_lut {
   __u16 red;
   __u16 green;
diff --git a/libc/kernel/uapi/drm/i915_drm.h b/libc/kernel/uapi/drm/i915_drm.h
index 13eda7c..b43d8df 100644
--- a/libc/kernel/uapi/drm/i915_drm.h
+++ b/libc/kernel/uapi/drm/i915_drm.h
@@ -745,6 +745,7 @@
 #define I915_CONTEXT_PARAM_RINGSIZE 0xc
 #define I915_CONTEXT_PARAM_PROTECTED_CONTENT 0xd
 #define I915_CONTEXT_PARAM_LOW_LATENCY 0xe
+#define I915_CONTEXT_PARAM_CONTEXT_IMAGE 0xf
   __u64 value;
 };
 struct drm_i915_gem_context_param_sseu {
@@ -799,6 +800,14 @@
 } __attribute__((packed));
 #define I915_DEFINE_CONTEXT_PARAM_ENGINES(name__,N__) struct { __u64 extensions; struct i915_engine_class_instance engines[N__]; \
 } __attribute__((packed)) name__
+struct i915_gem_context_param_context_image {
+  struct i915_engine_class_instance engine;
+  __u32 flags;
+#define I915_CONTEXT_IMAGE_FLAG_ENGINE_INDEX (1u << 0)
+  __u32 size;
+  __u32 mbz;
+  __u64 image;
+} __attribute__((packed));
 struct drm_i915_gem_context_create_ext_setparam {
   struct i915_user_extension base;
   struct drm_i915_gem_context_param param;
diff --git a/libc/kernel/uapi/drm/ivpu_accel.h b/libc/kernel/uapi/drm/ivpu_accel.h
index fcbf6f7..960bd43 100644
--- a/libc/kernel/uapi/drm/ivpu_accel.h
+++ b/libc/kernel/uapi/drm/ivpu_accel.h
@@ -18,12 +18,20 @@
 #define DRM_IVPU_BO_INFO 0x03
 #define DRM_IVPU_SUBMIT 0x05
 #define DRM_IVPU_BO_WAIT 0x06
+#define DRM_IVPU_METRIC_STREAMER_START 0x07
+#define DRM_IVPU_METRIC_STREAMER_STOP 0x08
+#define DRM_IVPU_METRIC_STREAMER_GET_DATA 0x09
+#define DRM_IVPU_METRIC_STREAMER_GET_INFO 0x0a
 #define DRM_IOCTL_IVPU_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_GET_PARAM, struct drm_ivpu_param)
 #define DRM_IOCTL_IVPU_SET_PARAM DRM_IOW(DRM_COMMAND_BASE + DRM_IVPU_SET_PARAM, struct drm_ivpu_param)
 #define DRM_IOCTL_IVPU_BO_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_BO_CREATE, struct drm_ivpu_bo_create)
 #define DRM_IOCTL_IVPU_BO_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_BO_INFO, struct drm_ivpu_bo_info)
 #define DRM_IOCTL_IVPU_SUBMIT DRM_IOW(DRM_COMMAND_BASE + DRM_IVPU_SUBMIT, struct drm_ivpu_submit)
 #define DRM_IOCTL_IVPU_BO_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_BO_WAIT, struct drm_ivpu_bo_wait)
+#define DRM_IOCTL_IVPU_METRIC_STREAMER_START DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_METRIC_STREAMER_START, struct drm_ivpu_metric_streamer_start)
+#define DRM_IOCTL_IVPU_METRIC_STREAMER_STOP DRM_IOW(DRM_COMMAND_BASE + DRM_IVPU_METRIC_STREAMER_STOP, struct drm_ivpu_metric_streamer_stop)
+#define DRM_IOCTL_IVPU_METRIC_STREAMER_GET_DATA DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_METRIC_STREAMER_GET_DATA, struct drm_ivpu_metric_streamer_get_data)
+#define DRM_IOCTL_IVPU_METRIC_STREAMER_GET_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_IVPU_METRIC_STREAMER_GET_INFO, struct drm_ivpu_metric_streamer_get_data)
 #define DRM_IVPU_PARAM_DEVICE_ID 0
 #define DRM_IVPU_PARAM_DEVICE_REVISION 1
 #define DRM_IVPU_PARAM_PLATFORM_TYPE 2
@@ -96,6 +104,22 @@
   __u32 job_status;
   __u32 pad;
 };
+struct drm_ivpu_metric_streamer_start {
+  __u64 metric_group_mask;
+  __u64 sampling_period_ns;
+  __u32 read_period_samples;
+  __u32 sample_size;
+  __u32 max_data_size;
+};
+struct drm_ivpu_metric_streamer_get_data {
+  __u64 metric_group_mask;
+  __u64 buffer_ptr;
+  __u64 buffer_size;
+  __u64 data_size;
+};
+struct drm_ivpu_metric_streamer_stop {
+  __u64 metric_group_mask;
+};
 #ifdef __cplusplus
 }
 #endif
diff --git a/libc/kernel/uapi/drm/msm_drm.h b/libc/kernel/uapi/drm/msm_drm.h
index 4d83744..7ec5ed2 100644
--- a/libc/kernel/uapi/drm/msm_drm.h
+++ b/libc/kernel/uapi/drm/msm_drm.h
@@ -37,6 +37,7 @@
 #define MSM_PARAM_VA_START 0x0e
 #define MSM_PARAM_VA_SIZE 0x0f
 #define MSM_PARAM_HIGHEST_BANK_BIT 0x10
+#define MSM_PARAM_RAYTRACING 0x11
 #define MSM_PARAM_NR_RINGS MSM_PARAM_PRIORITIES
 struct drm_msm_param {
   __u32 pipe;
diff --git a/libc/kernel/uapi/drm/v3d_drm.h b/libc/kernel/uapi/drm/v3d_drm.h
index 4000fd3..b7aca21 100644
--- a/libc/kernel/uapi/drm/v3d_drm.h
+++ b/libc/kernel/uapi/drm/v3d_drm.h
@@ -22,6 +22,7 @@
 #define DRM_V3D_PERFMON_DESTROY 0x09
 #define DRM_V3D_PERFMON_GET_VALUES 0x0a
 #define DRM_V3D_SUBMIT_CPU 0x0b
+#define DRM_V3D_PERFMON_GET_COUNTER 0x0c
 #define DRM_IOCTL_V3D_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl)
 #define DRM_IOCTL_V3D_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo)
 #define DRM_IOCTL_V3D_CREATE_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_CREATE_BO, struct drm_v3d_create_bo)
@@ -34,6 +35,7 @@
 #define DRM_IOCTL_V3D_PERFMON_DESTROY DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_DESTROY, struct drm_v3d_perfmon_destroy)
 #define DRM_IOCTL_V3D_PERFMON_GET_VALUES DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_GET_VALUES, struct drm_v3d_perfmon_get_values)
 #define DRM_IOCTL_V3D_SUBMIT_CPU DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CPU, struct drm_v3d_submit_cpu)
+#define DRM_IOCTL_V3D_PERFMON_GET_COUNTER DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_PERFMON_GET_COUNTER, struct drm_v3d_perfmon_get_counter)
 #define DRM_V3D_SUBMIT_CL_FLUSH_CACHE 0x01
 #define DRM_V3D_SUBMIT_EXTENSION 0x02
 struct drm_v3d_extension {
@@ -119,6 +121,7 @@
   DRM_V3D_PARAM_SUPPORTS_PERFMON,
   DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT,
   DRM_V3D_PARAM_SUPPORTS_CPU_QUEUE,
+  DRM_V3D_PARAM_MAX_PERF_COUNTERS,
 };
 struct drm_v3d_get_param {
   __u32 param;
@@ -324,6 +327,16 @@
   __u32 pad;
   __u64 values_ptr;
 };
+#define DRM_V3D_PERFCNT_MAX_NAME 64
+#define DRM_V3D_PERFCNT_MAX_CATEGORY 32
+#define DRM_V3D_PERFCNT_MAX_DESCRIPTION 256
+struct drm_v3d_perfmon_get_counter {
+  __u8 counter;
+  __u8 name[DRM_V3D_PERFCNT_MAX_NAME];
+  __u8 category[DRM_V3D_PERFCNT_MAX_CATEGORY];
+  __u8 description[DRM_V3D_PERFCNT_MAX_DESCRIPTION];
+  __u8 reserved[7];
+};
 #ifdef __cplusplus
 }
 #endif
diff --git a/libc/kernel/uapi/drm/xe_drm.h b/libc/kernel/uapi/drm/xe_drm.h
index d1b6dad..a034b29 100644
--- a/libc/kernel/uapi/drm/xe_drm.h
+++ b/libc/kernel/uapi/drm/xe_drm.h
@@ -21,6 +21,7 @@
 #define DRM_XE_EXEC_QUEUE_GET_PROPERTY 0x08
 #define DRM_XE_EXEC 0x09
 #define DRM_XE_WAIT_USER_FENCE 0x0a
+#define DRM_XE_OBSERVATION 0x0b
 #define DRM_IOCTL_XE_DEVICE_QUERY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_DEVICE_QUERY, struct drm_xe_device_query)
 #define DRM_IOCTL_XE_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_GEM_CREATE, struct drm_xe_gem_create)
 #define DRM_IOCTL_XE_GEM_MMAP_OFFSET DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_GEM_MMAP_OFFSET, struct drm_xe_gem_mmap_offset)
@@ -32,6 +33,7 @@
 #define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_GET_PROPERTY, struct drm_xe_exec_queue_get_property)
 #define DRM_IOCTL_XE_EXEC DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec)
 #define DRM_IOCTL_XE_WAIT_USER_FENCE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence)
+#define DRM_IOCTL_XE_OBSERVATION DRM_IOW(DRM_COMMAND_BASE + DRM_XE_OBSERVATION, struct drm_xe_observation_param)
 struct drm_xe_user_extension {
   __u64 next_extension;
   __u32 name;
@@ -120,6 +122,7 @@
   __u16 gt_id;
 #define DRM_XE_TOPO_DSS_GEOMETRY 1
 #define DRM_XE_TOPO_DSS_COMPUTE 2
+#define DRM_XE_TOPO_L3_BANK 3
 #define DRM_XE_TOPO_EU_PER_DSS 4
   __u16 type;
   __u32 num_bytes;
@@ -155,6 +158,7 @@
 #define DRM_XE_DEVICE_QUERY_GT_TOPOLOGY 5
 #define DRM_XE_DEVICE_QUERY_ENGINE_CYCLES 6
 #define DRM_XE_DEVICE_QUERY_UC_FW_VERSION 7
+#define DRM_XE_DEVICE_QUERY_OA_UNITS 8
   __u32 query;
   __u32 size;
   __u64 data;
@@ -309,6 +313,92 @@
   __u32 pad2;
   __u64 reserved[2];
 };
+enum drm_xe_observation_type {
+  DRM_XE_OBSERVATION_TYPE_OA,
+};
+enum drm_xe_observation_op {
+  DRM_XE_OBSERVATION_OP_STREAM_OPEN,
+  DRM_XE_OBSERVATION_OP_ADD_CONFIG,
+  DRM_XE_OBSERVATION_OP_REMOVE_CONFIG,
+};
+struct drm_xe_observation_param {
+  __u64 extensions;
+  __u64 observation_type;
+  __u64 observation_op;
+  __u64 param;
+};
+enum drm_xe_observation_ioctls {
+  DRM_XE_OBSERVATION_IOCTL_ENABLE = _IO('i', 0x0),
+  DRM_XE_OBSERVATION_IOCTL_DISABLE = _IO('i', 0x1),
+  DRM_XE_OBSERVATION_IOCTL_CONFIG = _IO('i', 0x2),
+  DRM_XE_OBSERVATION_IOCTL_STATUS = _IO('i', 0x3),
+  DRM_XE_OBSERVATION_IOCTL_INFO = _IO('i', 0x4),
+};
+enum drm_xe_oa_unit_type {
+  DRM_XE_OA_UNIT_TYPE_OAG,
+  DRM_XE_OA_UNIT_TYPE_OAM,
+};
+struct drm_xe_oa_unit {
+  __u64 extensions;
+  __u32 oa_unit_id;
+  __u32 oa_unit_type;
+  __u64 capabilities;
+#define DRM_XE_OA_CAPS_BASE (1 << 0)
+  __u64 oa_timestamp_freq;
+  __u64 reserved[4];
+  __u64 num_engines;
+  struct drm_xe_engine_class_instance eci[];
+};
+struct drm_xe_query_oa_units {
+  __u64 extensions;
+  __u32 num_oa_units;
+  __u32 pad;
+  __u64 oa_units[];
+};
+enum drm_xe_oa_format_type {
+  DRM_XE_OA_FMT_TYPE_OAG,
+  DRM_XE_OA_FMT_TYPE_OAR,
+  DRM_XE_OA_FMT_TYPE_OAM,
+  DRM_XE_OA_FMT_TYPE_OAC,
+  DRM_XE_OA_FMT_TYPE_OAM_MPEC,
+  DRM_XE_OA_FMT_TYPE_PEC,
+};
+enum drm_xe_oa_property_id {
+#define DRM_XE_OA_EXTENSION_SET_PROPERTY 0
+  DRM_XE_OA_PROPERTY_OA_UNIT_ID = 1,
+  DRM_XE_OA_PROPERTY_SAMPLE_OA,
+  DRM_XE_OA_PROPERTY_OA_METRIC_SET,
+  DRM_XE_OA_PROPERTY_OA_FORMAT,
+#define DRM_XE_OA_FORMAT_MASK_FMT_TYPE (0xffu << 0)
+#define DRM_XE_OA_FORMAT_MASK_COUNTER_SEL (0xffu << 8)
+#define DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE (0xffu << 16)
+#define DRM_XE_OA_FORMAT_MASK_BC_REPORT (0xffu << 24)
+  DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT,
+  DRM_XE_OA_PROPERTY_OA_DISABLED,
+  DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID,
+  DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE,
+  DRM_XE_OA_PROPERTY_NO_PREEMPT,
+};
+struct drm_xe_oa_config {
+  __u64 extensions;
+  char uuid[36];
+  __u32 n_regs;
+  __u64 regs_ptr;
+};
+struct drm_xe_oa_stream_status {
+  __u64 extensions;
+  __u64 oa_status;
+#define DRM_XE_OASTATUS_MMIO_TRG_Q_FULL (1 << 3)
+#define DRM_XE_OASTATUS_COUNTER_OVERFLOW (1 << 2)
+#define DRM_XE_OASTATUS_BUFFER_OVERFLOW (1 << 1)
+#define DRM_XE_OASTATUS_REPORT_LOST (1 << 0)
+  __u64 reserved[3];
+};
+struct drm_xe_oa_stream_info {
+  __u64 extensions;
+  __u64 oa_buf_size;
+  __u64 reserved[3];
+};
 #ifdef __cplusplus
 }
 #endif
diff --git a/libc/kernel/uapi/linux/bpf.h b/libc/kernel/uapi/linux/bpf.h
index c732920..8d64816 100644
--- a/libc/kernel/uapi/linux/bpf.h
+++ b/libc/kernel/uapi/linux/bpf.h
@@ -367,6 +367,7 @@
 #define BPF_F_QUERY_EFFECTIVE (1U << 0)
 #define BPF_F_TEST_RUN_ON_CPU (1U << 0)
 #define BPF_F_TEST_XDP_LIVE_FRAMES (1U << 1)
+#define BPF_F_TEST_SKB_CHECKSUM_COMPLETE (1U << 2)
 enum bpf_stats_type {
   BPF_STATS_RUN_TIME = 0,
 };
@@ -773,8 +774,11 @@
 #define __bpf_md_ptr(type,name) union { type name; __u64 : 64; \
 } __attribute__((aligned(8)))
 enum {
-  BPF_SKB_TSTAMP_UNSPEC,
-  BPF_SKB_TSTAMP_DELIVERY_MONO,
+  BPF_SKB_TSTAMP_UNSPEC = 0,
+  BPF_SKB_TSTAMP_DELIVERY_MONO = 1,
+  BPF_SKB_CLOCK_REALTIME = 0,
+  BPF_SKB_CLOCK_MONOTONIC = 1,
+  BPF_SKB_CLOCK_TAI = 2,
 };
 struct __sk_buff {
   __u32 len;
diff --git a/libc/kernel/uapi/linux/btrfs_tree.h b/libc/kernel/uapi/linux/btrfs_tree.h
index ea33eee..88f44d9 100644
--- a/libc/kernel/uapi/linux/btrfs_tree.h
+++ b/libc/kernel/uapi/linux/btrfs_tree.h
@@ -279,18 +279,8 @@
   __le64 devid;
   __le64 physical;
 } __attribute__((__packed__));
-#define BTRFS_STRIPE_RAID0 1
-#define BTRFS_STRIPE_RAID1 2
-#define BTRFS_STRIPE_DUP 3
-#define BTRFS_STRIPE_RAID10 4
-#define BTRFS_STRIPE_RAID5 5
-#define BTRFS_STRIPE_RAID6 6
-#define BTRFS_STRIPE_RAID1C3 7
-#define BTRFS_STRIPE_RAID1C4 8
 struct btrfs_stripe_extent {
-  __u8 encoding;
-  __u8 reserved[7];
-  struct btrfs_raid_stride strides[];
+  __DECLARE_FLEX_ARRAY(struct btrfs_raid_stride, strides);
 } __attribute__((__packed__));
 #define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0)
 #define BTRFS_HEADER_FLAG_RELOC (1ULL << 1)
@@ -300,6 +290,9 @@
 #define BTRFS_SUPER_FLAG_METADUMP_V2 (1ULL << 34)
 #define BTRFS_SUPER_FLAG_CHANGING_FSID (1ULL << 35)
 #define BTRFS_SUPER_FLAG_CHANGING_FSID_V2 (1ULL << 36)
+#define BTRFS_SUPER_FLAG_CHANGING_BG_TREE (1ULL << 38)
+#define BTRFS_SUPER_FLAG_CHANGING_DATA_CSUM (1ULL << 39)
+#define BTRFS_SUPER_FLAG_CHANGING_META_CSUM (1ULL << 40)
 struct btrfs_extent_item {
   __le64 refs;
   __le64 generation;
diff --git a/libc/kernel/uapi/linux/dlm.h b/libc/kernel/uapi/linux/dlm.h
index 3c73908..1344f9b 100644
--- a/libc/kernel/uapi/linux/dlm.h
+++ b/libc/kernel/uapi/linux/dlm.h
@@ -20,4 +20,5 @@
 };
 #define DLM_LSFL_TIMEWARN 0x00000002
 #define DLM_LSFL_NEWEXCL 0x00000008
+#define __DLM_LSFL_RESERVED0 0x00000010
 #endif
diff --git a/libc/kernel/uapi/linux/dma-heap.h b/libc/kernel/uapi/linux/dma-heap.h
index 467e336..ac86b59 100644
--- a/libc/kernel/uapi/linux/dma-heap.h
+++ b/libc/kernel/uapi/linux/dma-heap.h
@@ -9,7 +9,7 @@
 #include <linux/ioctl.h>
 #include <linux/types.h>
 #define DMA_HEAP_VALID_FD_FLAGS (O_CLOEXEC | O_ACCMODE)
-#define DMA_HEAP_VALID_HEAP_FLAGS (0)
+#define DMA_HEAP_VALID_HEAP_FLAGS (0ULL)
 struct dma_heap_allocation_data {
   __u64 len;
   __u32 fd;
diff --git a/libc/kernel/uapi/linux/ethtool.h b/libc/kernel/uapi/linux/ethtool.h
index 334c788..e213ba1 100644
--- a/libc/kernel/uapi/linux/ethtool.h
+++ b/libc/kernel/uapi/linux/ethtool.h
@@ -270,6 +270,56 @@
   ETHTOOL_MODULE_POWER_MODE_LOW = 1,
   ETHTOOL_MODULE_POWER_MODE_HIGH,
 };
+enum ethtool_c33_pse_ext_state {
+  ETHTOOL_C33_PSE_EXT_STATE_ERROR_CONDITION = 1,
+  ETHTOOL_C33_PSE_EXT_STATE_MR_MPS_VALID,
+  ETHTOOL_C33_PSE_EXT_STATE_MR_PSE_ENABLE,
+  ETHTOOL_C33_PSE_EXT_STATE_OPTION_DETECT_TED,
+  ETHTOOL_C33_PSE_EXT_STATE_OPTION_VPORT_LIM,
+  ETHTOOL_C33_PSE_EXT_STATE_OVLD_DETECTED,
+  ETHTOOL_C33_PSE_EXT_STATE_PD_DLL_POWER_TYPE,
+  ETHTOOL_C33_PSE_EXT_STATE_POWER_NOT_AVAILABLE,
+  ETHTOOL_C33_PSE_EXT_STATE_SHORT_DETECTED,
+};
+enum ethtool_c33_pse_ext_substate_mr_mps_valid {
+  ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_MPS_VALID_DETECTED_UNDERLOAD = 1,
+  ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_MPS_VALID_CONNECTION_OPEN,
+};
+enum ethtool_c33_pse_ext_substate_error_condition {
+  ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_NON_EXISTING_PORT = 1,
+  ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_UNDEFINED_PORT,
+  ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_INTERNAL_HW_FAULT,
+  ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_COMM_ERROR_AFTER_FORCE_ON,
+  ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_UNKNOWN_PORT_STATUS,
+  ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_HOST_CRASH_TURN_OFF,
+  ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_HOST_CRASH_FORCE_SHUTDOWN,
+  ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_CONFIG_CHANGE,
+  ETHTOOL_C33_PSE_EXT_SUBSTATE_ERROR_CONDITION_DETECTED_OVER_TEMP,
+};
+enum ethtool_c33_pse_ext_substate_mr_pse_enable {
+  ETHTOOL_C33_PSE_EXT_SUBSTATE_MR_PSE_ENABLE_DISABLE_PIN_ACTIVE = 1,
+};
+enum ethtool_c33_pse_ext_substate_option_detect_ted {
+  ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_DETECT_TED_DET_IN_PROCESS = 1,
+  ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_DETECT_TED_CONNECTION_CHECK_ERROR,
+};
+enum ethtool_c33_pse_ext_substate_option_vport_lim {
+  ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_HIGH_VOLTAGE = 1,
+  ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_LOW_VOLTAGE,
+  ETHTOOL_C33_PSE_EXT_SUBSTATE_OPTION_VPORT_LIM_VOLTAGE_INJECTION,
+};
+enum ethtool_c33_pse_ext_substate_ovld_detected {
+  ETHTOOL_C33_PSE_EXT_SUBSTATE_OVLD_DETECTED_OVERLOAD = 1,
+};
+enum ethtool_c33_pse_ext_substate_power_not_available {
+  ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_BUDGET_EXCEEDED = 1,
+  ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_PORT_PW_LIMIT_EXCEEDS_CONTROLLER_BUDGET,
+  ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_PD_REQUEST_EXCEEDS_PORT_LIMIT,
+  ETHTOOL_C33_PSE_EXT_SUBSTATE_POWER_NOT_AVAILABLE_HW_PW_LIMIT,
+};
+enum ethtool_c33_pse_ext_substate_short_detected {
+  ETHTOOL_C33_PSE_EXT_SUBSTATE_SHORT_DETECTED_SHORT_CONDITION = 1,
+};
 enum ethtool_pse_types {
   ETHTOOL_PSE_UNKNOWN = 1 << 0,
   ETHTOOL_PSE_PODL = 1 << 1,
@@ -311,6 +361,12 @@
   ETHTOOL_MM_VERIFY_STATUS_FAILED,
   ETHTOOL_MM_VERIFY_STATUS_DISABLED,
 };
+enum ethtool_module_fw_flash_status {
+  ETHTOOL_MODULE_FW_FLASH_STATUS_STARTED = 1,
+  ETHTOOL_MODULE_FW_FLASH_STATUS_IN_PROGRESS,
+  ETHTOOL_MODULE_FW_FLASH_STATUS_COMPLETED,
+  ETHTOOL_MODULE_FW_FLASH_STATUS_ERROR,
+};
 struct ethtool_gstrings {
   __u32 cmd;
   __u32 string_set;
@@ -749,6 +805,7 @@
   ETHTOOL_LINK_MODE_10baseT1S_Full_BIT = 99,
   ETHTOOL_LINK_MODE_10baseT1S_Half_BIT = 100,
   ETHTOOL_LINK_MODE_10baseT1S_P2MP_Half_BIT = 101,
+  ETHTOOL_LINK_MODE_10baseT1BRR_Full_BIT = 102,
   __ETHTOOL_LINK_MODE_MASK_NBITS
 };
 #define __ETHTOOL_LINK_MODE_LEGACY_MASK(base_name) (1UL << (ETHTOOL_LINK_MODE_ ##base_name ##_BIT))
diff --git a/libc/kernel/uapi/linux/ethtool_netlink.h b/libc/kernel/uapi/linux/ethtool_netlink.h
index 6757ef5..ac6391a 100644
--- a/libc/kernel/uapi/linux/ethtool_netlink.h
+++ b/libc/kernel/uapi/linux/ethtool_netlink.h
@@ -52,6 +52,7 @@
   ETHTOOL_MSG_PLCA_GET_STATUS,
   ETHTOOL_MSG_MM_GET,
   ETHTOOL_MSG_MM_SET,
+  ETHTOOL_MSG_MODULE_FW_FLASH_ACT,
   __ETHTOOL_MSG_USER_CNT,
   ETHTOOL_MSG_USER_MAX = __ETHTOOL_MSG_USER_CNT - 1
 };
@@ -100,6 +101,7 @@
   ETHTOOL_MSG_PLCA_NTF,
   ETHTOOL_MSG_MM_GET_REPLY,
   ETHTOOL_MSG_MM_NTF,
+  ETHTOOL_MSG_MODULE_FW_FLASH_NTF,
   __ETHTOOL_MSG_KERNEL_CNT,
   ETHTOOL_MSG_KERNEL_MAX = __ETHTOOL_MSG_KERNEL_CNT - 1
 };
@@ -316,10 +318,26 @@
   ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES,
   ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES,
   ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS,
+  ETHTOOL_A_COALESCE_RX_PROFILE,
+  ETHTOOL_A_COALESCE_TX_PROFILE,
   __ETHTOOL_A_COALESCE_CNT,
   ETHTOOL_A_COALESCE_MAX = (__ETHTOOL_A_COALESCE_CNT - 1)
 };
 enum {
+  ETHTOOL_A_PROFILE_UNSPEC,
+  ETHTOOL_A_PROFILE_IRQ_MODERATION,
+  __ETHTOOL_A_PROFILE_CNT,
+  ETHTOOL_A_PROFILE_MAX = (__ETHTOOL_A_PROFILE_CNT - 1)
+};
+enum {
+  ETHTOOL_A_IRQ_MODERATION_UNSPEC,
+  ETHTOOL_A_IRQ_MODERATION_USEC,
+  ETHTOOL_A_IRQ_MODERATION_PKTS,
+  ETHTOOL_A_IRQ_MODERATION_COMPS,
+  __ETHTOOL_A_IRQ_MODERATION_CNT,
+  ETHTOOL_A_IRQ_MODERATION_MAX = (__ETHTOOL_A_IRQ_MODERATION_CNT - 1)
+};
+enum {
   ETHTOOL_A_PAUSE_UNSPEC,
   ETHTOOL_A_PAUSE_HEADER,
   ETHTOOL_A_PAUSE_AUTONEG,
@@ -635,6 +653,11 @@
   ETHTOOL_A_MODULE_MAX = (__ETHTOOL_A_MODULE_CNT - 1)
 };
 enum {
+  ETHTOOL_A_C33_PSE_PW_LIMIT_UNSPEC,
+  ETHTOOL_A_C33_PSE_PW_LIMIT_MIN,
+  ETHTOOL_A_C33_PSE_PW_LIMIT_MAX,
+};
+enum {
   ETHTOOL_A_PSE_UNSPEC,
   ETHTOOL_A_PSE_HEADER,
   ETHTOOL_A_PODL_PSE_ADMIN_STATE,
@@ -643,6 +666,12 @@
   ETHTOOL_A_C33_PSE_ADMIN_STATE,
   ETHTOOL_A_C33_PSE_ADMIN_CONTROL,
   ETHTOOL_A_C33_PSE_PW_D_STATUS,
+  ETHTOOL_A_C33_PSE_PW_CLASS,
+  ETHTOOL_A_C33_PSE_ACTUAL_PW,
+  ETHTOOL_A_C33_PSE_EXT_STATE,
+  ETHTOOL_A_C33_PSE_EXT_SUBSTATE,
+  ETHTOOL_A_C33_PSE_AVAIL_PW_LIMIT,
+  ETHTOOL_A_C33_PSE_PW_LIMIT_RANGES,
   __ETHTOOL_A_PSE_CNT,
   ETHTOOL_A_PSE_MAX = (__ETHTOOL_A_PSE_CNT - 1)
 };
@@ -699,6 +728,18 @@
   __ETHTOOL_A_MM_CNT,
   ETHTOOL_A_MM_MAX = (__ETHTOOL_A_MM_CNT - 1)
 };
+enum {
+  ETHTOOL_A_MODULE_FW_FLASH_UNSPEC,
+  ETHTOOL_A_MODULE_FW_FLASH_HEADER,
+  ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME,
+  ETHTOOL_A_MODULE_FW_FLASH_PASSWORD,
+  ETHTOOL_A_MODULE_FW_FLASH_STATUS,
+  ETHTOOL_A_MODULE_FW_FLASH_STATUS_MSG,
+  ETHTOOL_A_MODULE_FW_FLASH_DONE,
+  ETHTOOL_A_MODULE_FW_FLASH_TOTAL,
+  __ETHTOOL_A_MODULE_FW_FLASH_CNT,
+  ETHTOOL_A_MODULE_FW_FLASH_MAX = (__ETHTOOL_A_MODULE_FW_FLASH_CNT - 1)
+};
 #define ETHTOOL_GENL_NAME "ethtool"
 #define ETHTOOL_GENL_VERSION 1
 #define ETHTOOL_MCGRP_MONITOR_NAME "monitor"
diff --git a/libc/kernel/uapi/linux/fs.h b/libc/kernel/uapi/linux/fs.h
index 38a2d9e..adab56f 100644
--- a/libc/kernel/uapi/linux/fs.h
+++ b/libc/kernel/uapi/linux/fs.h
@@ -194,8 +194,10 @@
 #define RWF_NOWAIT (( __kernel_rwf_t) 0x00000008)
 #define RWF_APPEND (( __kernel_rwf_t) 0x00000010)
 #define RWF_NOAPPEND (( __kernel_rwf_t) 0x00000020)
-#define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT | RWF_APPEND | RWF_NOAPPEND)
-#define PAGEMAP_SCAN _IOWR('f', 16, struct pm_scan_arg)
+#define RWF_ATOMIC (( __kernel_rwf_t) 0x00000040)
+#define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT | RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC)
+#define PROCFS_IOCTL_MAGIC 'f'
+#define PAGEMAP_SCAN _IOWR(PROCFS_IOCTL_MAGIC, 16, struct pm_scan_arg)
 #define PAGE_IS_WPALLOWED (1 << 0)
 #define PAGE_IS_WRITTEN (1 << 1)
 #define PAGE_IS_FILE (1 << 2)
@@ -225,4 +227,30 @@
   __u64 category_anyof_mask;
   __u64 return_mask;
 };
+#define PROCMAP_QUERY _IOWR(PROCFS_IOCTL_MAGIC, 17, struct procmap_query)
+enum procmap_query_flags {
+  PROCMAP_QUERY_VMA_READABLE = 0x01,
+  PROCMAP_QUERY_VMA_WRITABLE = 0x02,
+  PROCMAP_QUERY_VMA_EXECUTABLE = 0x04,
+  PROCMAP_QUERY_VMA_SHARED = 0x08,
+  PROCMAP_QUERY_COVERING_OR_NEXT_VMA = 0x10,
+  PROCMAP_QUERY_FILE_BACKED_VMA = 0x20,
+};
+struct procmap_query {
+  __u64 size;
+  __u64 query_flags;
+  __u64 query_addr;
+  __u64 vma_start;
+  __u64 vma_end;
+  __u64 vma_flags;
+  __u64 vma_page_size;
+  __u64 vma_offset;
+  __u64 inode;
+  __u32 dev_major;
+  __u32 dev_minor;
+  __u32 vma_name_size;
+  __u32 build_id_size;
+  __u64 vma_name_addr;
+  __u64 build_id_addr;
+};
 #endif
diff --git a/libc/kernel/uapi/linux/if_xdp.h b/libc/kernel/uapi/linux/if_xdp.h
index b7eec87..7201e06 100644
--- a/libc/kernel/uapi/linux/if_xdp.h
+++ b/libc/kernel/uapi/linux/if_xdp.h
@@ -14,6 +14,7 @@
 #define XDP_USE_SG (1 << 4)
 #define XDP_UMEM_UNALIGNED_CHUNK_FLAG (1 << 0)
 #define XDP_UMEM_TX_SW_CSUM (1 << 1)
+#define XDP_UMEM_TX_METADATA_LEN (1 << 2)
 struct sockaddr_xdp {
   __u16 sxdp_family;
   __u16 sxdp_flags;
diff --git a/libc/kernel/uapi/linux/iio/buffer.h b/libc/kernel/uapi/linux/iio/buffer.h
index 45c6f65..7e03a8c 100644
--- a/libc/kernel/uapi/linux/iio/buffer.h
+++ b/libc/kernel/uapi/linux/iio/buffer.h
@@ -6,5 +6,16 @@
  */
 #ifndef _UAPI_IIO_BUFFER_H_
 #define _UAPI_IIO_BUFFER_H_
+#include <linux/types.h>
+#define IIO_BUFFER_DMABUF_CYCLIC (1 << 0)
+#define IIO_BUFFER_DMABUF_SUPPORTED_FLAGS 0x00000001
+struct iio_dmabuf {
+  __u32 fd;
+  __u32 flags;
+  __u64 bytes_used;
+};
 #define IIO_BUFFER_GET_FD_IOCTL _IOWR('i', 0x91, int)
+#define IIO_BUFFER_DMABUF_ATTACH_IOCTL _IOW('i', 0x92, int)
+#define IIO_BUFFER_DMABUF_DETACH_IOCTL _IOW('i', 0x93, int)
+#define IIO_BUFFER_DMABUF_ENQUEUE_IOCTL _IOW('i', 0x94, struct iio_dmabuf)
 #endif
diff --git a/libc/kernel/uapi/linux/in.h b/libc/kernel/uapi/linux/in.h
index 44efdd8..97bf493 100644
--- a/libc/kernel/uapi/linux/in.h
+++ b/libc/kernel/uapi/linux/in.h
@@ -69,6 +69,8 @@
 #define IPPROTO_ETHERNET IPPROTO_ETHERNET
   IPPROTO_RAW = 255,
 #define IPPROTO_RAW IPPROTO_RAW
+  IPPROTO_SMC = 256,
+#define IPPROTO_SMC IPPROTO_SMC
   IPPROTO_MPTCP = 262,
 #define IPPROTO_MPTCP IPPROTO_MPTCP
   IPPROTO_MAX
diff --git a/libc/kernel/uapi/linux/io_uring.h b/libc/kernel/uapi/linux/io_uring.h
index 5505963..6b4f2ea 100644
--- a/libc/kernel/uapi/linux/io_uring.h
+++ b/libc/kernel/uapi/linux/io_uring.h
@@ -176,6 +176,8 @@
   IORING_OP_FUTEX_WAITV,
   IORING_OP_FIXED_FD_INSTALL,
   IORING_OP_FTRUNCATE,
+  IORING_OP_BIND,
+  IORING_OP_LISTEN,
   IORING_OP_LAST,
 };
 #define IORING_URING_CMD_FIXED (1U << 0)
diff --git a/libc/kernel/uapi/linux/iommufd.h b/libc/kernel/uapi/linux/iommufd.h
index 2570628..6f663b4 100644
--- a/libc/kernel/uapi/linux/iommufd.h
+++ b/libc/kernel/uapi/linux/iommufd.h
@@ -12,19 +12,20 @@
 enum {
   IOMMUFD_CMD_BASE = 0x80,
   IOMMUFD_CMD_DESTROY = IOMMUFD_CMD_BASE,
-  IOMMUFD_CMD_IOAS_ALLOC,
-  IOMMUFD_CMD_IOAS_ALLOW_IOVAS,
-  IOMMUFD_CMD_IOAS_COPY,
-  IOMMUFD_CMD_IOAS_IOVA_RANGES,
-  IOMMUFD_CMD_IOAS_MAP,
-  IOMMUFD_CMD_IOAS_UNMAP,
-  IOMMUFD_CMD_OPTION,
-  IOMMUFD_CMD_VFIO_IOAS,
-  IOMMUFD_CMD_HWPT_ALLOC,
-  IOMMUFD_CMD_GET_HW_INFO,
-  IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING,
-  IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP,
-  IOMMUFD_CMD_HWPT_INVALIDATE,
+  IOMMUFD_CMD_IOAS_ALLOC = 0x81,
+  IOMMUFD_CMD_IOAS_ALLOW_IOVAS = 0x82,
+  IOMMUFD_CMD_IOAS_COPY = 0x83,
+  IOMMUFD_CMD_IOAS_IOVA_RANGES = 0x84,
+  IOMMUFD_CMD_IOAS_MAP = 0x85,
+  IOMMUFD_CMD_IOAS_UNMAP = 0x86,
+  IOMMUFD_CMD_OPTION = 0x87,
+  IOMMUFD_CMD_VFIO_IOAS = 0x88,
+  IOMMUFD_CMD_HWPT_ALLOC = 0x89,
+  IOMMUFD_CMD_GET_HW_INFO = 0x8a,
+  IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING = 0x8b,
+  IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP = 0x8c,
+  IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d,
+  IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e,
 };
 struct iommu_destroy {
   __u32 size;
@@ -122,6 +123,7 @@
 enum iommufd_hwpt_alloc_flags {
   IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0,
   IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1,
+  IOMMU_HWPT_FAULT_ID_VALID = 1 << 2,
 };
 enum iommu_hwpt_vtd_s1_flags {
   IOMMU_VTD_S1_SRE = 1 << 0,
@@ -135,8 +137,8 @@
   __u32 __reserved;
 };
 enum iommu_hwpt_data_type {
-  IOMMU_HWPT_DATA_NONE,
-  IOMMU_HWPT_DATA_VTD_S1,
+  IOMMU_HWPT_DATA_NONE = 0,
+  IOMMU_HWPT_DATA_VTD_S1 = 1,
 };
 struct iommu_hwpt_alloc {
   __u32 size;
@@ -148,6 +150,8 @@
   __u32 data_type;
   __u32 data_len;
   __aligned_u64 data_uptr;
+  __u32 fault_id;
+  __u32 __reserved2;
 };
 #define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC)
 enum iommu_hw_info_vtd_flags {
@@ -160,8 +164,8 @@
   __aligned_u64 ecap_reg;
 };
 enum iommu_hw_info_type {
-  IOMMU_HW_INFO_TYPE_NONE,
-  IOMMU_HW_INFO_TYPE_INTEL_VTD,
+  IOMMU_HW_INFO_TYPE_NONE = 0,
+  IOMMU_HW_INFO_TYPE_INTEL_VTD = 1,
 };
 enum iommufd_hw_capabilities {
   IOMMU_HW_CAP_DIRTY_TRACKING = 1 << 0,
@@ -202,7 +206,7 @@
 };
 #define IOMMU_HWPT_GET_DIRTY_BITMAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP)
 enum iommu_hwpt_invalidate_data_type {
-  IOMMU_HWPT_INVALIDATE_DATA_VTD_S1,
+  IOMMU_HWPT_INVALIDATE_DATA_VTD_S1 = 0,
 };
 enum iommu_hwpt_vtd_s1_invalidate_flags {
   IOMMU_VTD_INV_FLAGS_LEAF = 1 << 0,
@@ -223,4 +227,39 @@
   __u32 __reserved;
 };
 #define IOMMU_HWPT_INVALIDATE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_INVALIDATE)
+enum iommu_hwpt_pgfault_flags {
+  IOMMU_PGFAULT_FLAGS_PASID_VALID = (1 << 0),
+  IOMMU_PGFAULT_FLAGS_LAST_PAGE = (1 << 1),
+};
+enum iommu_hwpt_pgfault_perm {
+  IOMMU_PGFAULT_PERM_READ = (1 << 0),
+  IOMMU_PGFAULT_PERM_WRITE = (1 << 1),
+  IOMMU_PGFAULT_PERM_EXEC = (1 << 2),
+  IOMMU_PGFAULT_PERM_PRIV = (1 << 3),
+};
+struct iommu_hwpt_pgfault {
+  __u32 flags;
+  __u32 dev_id;
+  __u32 pasid;
+  __u32 grpid;
+  __u32 perm;
+  __u64 addr;
+  __u32 length;
+  __u32 cookie;
+};
+enum iommufd_page_response_code {
+  IOMMUFD_PAGE_RESP_SUCCESS = 0,
+  IOMMUFD_PAGE_RESP_INVALID = 1,
+};
+struct iommu_hwpt_page_response {
+  __u32 cookie;
+  __u32 code;
+};
+struct iommu_fault_alloc {
+  __u32 size;
+  __u32 flags;
+  __u32 out_fault_id;
+  __u32 out_fault_fd;
+};
+#define IOMMU_FAULT_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_FAULT_QUEUE_ALLOC)
 #endif
diff --git a/libc/kernel/uapi/linux/kfd_ioctl.h b/libc/kernel/uapi/linux/kfd_ioctl.h
index 62c9872..193dd8e 100644
--- a/libc/kernel/uapi/linux/kfd_ioctl.h
+++ b/libc/kernel/uapi/linux/kfd_ioctl.h
@@ -9,7 +9,7 @@
 #include <drm/drm.h>
 #include <linux/ioctl.h>
 #define KFD_IOCTL_MAJOR_VERSION 1
-#define KFD_IOCTL_MINOR_VERSION 15
+#define KFD_IOCTL_MINOR_VERSION 16
 struct kfd_ioctl_get_version_args {
   __u32 major_version;
   __u32 minor_version;
@@ -269,6 +269,7 @@
 #define KFD_IOC_ALLOC_MEM_FLAGS_COHERENT (1 << 26)
 #define KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED (1 << 25)
 #define KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENT (1 << 24)
+#define KFD_IOC_ALLOC_MEM_FLAGS_CONTIGUOUS (1 << 23)
 struct kfd_ioctl_alloc_memory_of_gpu_args {
   __u64 va_addr;
   __u64 size;
@@ -465,6 +466,7 @@
 };
 enum kfd_dbg_trap_flags {
   KFD_DBG_TRAP_FLAG_SINGLE_MEM_OP = 1,
+  KFD_DBG_TRAP_FLAG_SINGLE_ALU_OP = 2,
 };
 enum kfd_dbg_trap_exception_code {
   EC_NONE = 0,
diff --git a/libc/kernel/uapi/linux/kfd_sysfs.h b/libc/kernel/uapi/linux/kfd_sysfs.h
index e538cf2..7771582 100644
--- a/libc/kernel/uapi/linux/kfd_sysfs.h
+++ b/libc/kernel/uapi/linux/kfd_sysfs.h
@@ -35,7 +35,8 @@
 #define HSA_CAP_SVMAPI_SUPPORTED 0x08000000
 #define HSA_CAP_FLAGS_COHERENTHOSTACCESS 0x10000000
 #define HSA_CAP_TRAP_DEBUG_FIRMWARE_SUPPORTED 0x20000000
-#define HSA_CAP_RESERVED 0xe00f8000
+#define HSA_CAP_TRAP_DEBUG_PRECISE_ALU_OPERATIONS_SUPPORTED 0x40000000
+#define HSA_CAP_RESERVED 0x800f8000
 #define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_MASK 0x0000000f
 #define HSA_DBG_WATCH_ADDR_MASK_LO_BIT_SHIFT 0
 #define HSA_DBG_WATCH_ADDR_MASK_HI_BIT_MASK 0x000003f0
diff --git a/libc/kernel/uapi/linux/kvm.h b/libc/kernel/uapi/linux/kvm.h
index ffaf5e6..297a09d 100644
--- a/libc/kernel/uapi/linux/kvm.h
+++ b/libc/kernel/uapi/linux/kvm.h
@@ -149,9 +149,10 @@
 #define KVM_INTERNAL_ERROR_DELIVERY_EV 3
 #define KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON 4
 #define KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES (1ULL << 0)
+#define HINT_UNSAFE_IN_KVM(_symbol) _symbol
 struct kvm_run {
   __u8 request_interrupt_window;
-  __u8 immediate_exit;
+  __u8 HINT_UNSAFE_IN_KVM(immediate_exit);
   __u8 padding1[6];
   __u32 exit_reason;
   __u8 ready_for_interrupt_injection;
@@ -719,6 +720,9 @@
 #define KVM_CAP_MEMORY_ATTRIBUTES 233
 #define KVM_CAP_GUEST_MEMFD 234
 #define KVM_CAP_VM_TYPES 235
+#define KVM_CAP_PRE_FAULT_MEMORY 236
+#define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237
+#define KVM_CAP_X86_GUEST_MODE 238
 struct kvm_irq_routing_irqchip {
   __u32 irqchip;
   __u32 pin;
@@ -1109,4 +1113,11 @@
   __u64 flags;
   __u64 reserved[6];
 };
+#define KVM_PRE_FAULT_MEMORY _IOWR(KVMIO, 0xd5, struct kvm_pre_fault_memory)
+struct kvm_pre_fault_memory {
+  __u64 gpa;
+  __u64 size;
+  __u64 flags;
+  __u64 padding[5];
+};
 #endif
diff --git a/libc/kernel/uapi/linux/media/raspberrypi/pisp_be_config.h b/libc/kernel/uapi/linux/media/raspberrypi/pisp_be_config.h
new file mode 100644
index 0000000..2e981ad
--- /dev/null
+++ b/libc/kernel/uapi/linux/media/raspberrypi/pisp_be_config.h
@@ -0,0 +1,418 @@
+/*
+ * This file is auto-generated. Modifications will be lost.
+ *
+ * See https://android.googlesource.com/platform/bionic/+/master/libc/kernel/
+ * for more information.
+ */
+#ifndef _UAPI_PISP_BE_CONFIG_H_
+#define _UAPI_PISP_BE_CONFIG_H_
+#include <linux/types.h>
+#include "pisp_common.h"
+#define PISP_BACK_END_INPUT_ALIGN 4u
+#define PISP_BACK_END_COMPRESSED_ALIGN 8u
+#define PISP_BACK_END_OUTPUT_MIN_ALIGN 16u
+#define PISP_BACK_END_OUTPUT_MAX_ALIGN 64u
+#define PISP_BACK_END_MIN_TILE_WIDTH 16u
+#define PISP_BACK_END_MIN_TILE_HEIGHT 16u
+#define PISP_BACK_END_NUM_OUTPUTS 2
+#define PISP_BACK_END_HOG_OUTPUT 1
+#define PISP_BACK_END_NUM_TILES 64
+enum pisp_be_bayer_enable {
+  PISP_BE_BAYER_ENABLE_INPUT = 0x000001,
+  PISP_BE_BAYER_ENABLE_DECOMPRESS = 0x000002,
+  PISP_BE_BAYER_ENABLE_DPC = 0x000004,
+  PISP_BE_BAYER_ENABLE_GEQ = 0x000008,
+  PISP_BE_BAYER_ENABLE_TDN_INPUT = 0x000010,
+  PISP_BE_BAYER_ENABLE_TDN_DECOMPRESS = 0x000020,
+  PISP_BE_BAYER_ENABLE_TDN = 0x000040,
+  PISP_BE_BAYER_ENABLE_TDN_COMPRESS = 0x000080,
+  PISP_BE_BAYER_ENABLE_TDN_OUTPUT = 0x000100,
+  PISP_BE_BAYER_ENABLE_SDN = 0x000200,
+  PISP_BE_BAYER_ENABLE_BLC = 0x000400,
+  PISP_BE_BAYER_ENABLE_STITCH_INPUT = 0x000800,
+  PISP_BE_BAYER_ENABLE_STITCH_DECOMPRESS = 0x001000,
+  PISP_BE_BAYER_ENABLE_STITCH = 0x002000,
+  PISP_BE_BAYER_ENABLE_STITCH_COMPRESS = 0x004000,
+  PISP_BE_BAYER_ENABLE_STITCH_OUTPUT = 0x008000,
+  PISP_BE_BAYER_ENABLE_WBG = 0x010000,
+  PISP_BE_BAYER_ENABLE_CDN = 0x020000,
+  PISP_BE_BAYER_ENABLE_LSC = 0x040000,
+  PISP_BE_BAYER_ENABLE_TONEMAP = 0x080000,
+  PISP_BE_BAYER_ENABLE_CAC = 0x100000,
+  PISP_BE_BAYER_ENABLE_DEBIN = 0x200000,
+  PISP_BE_BAYER_ENABLE_DEMOSAIC = 0x400000,
+};
+enum pisp_be_rgb_enable {
+  PISP_BE_RGB_ENABLE_INPUT = 0x000001,
+  PISP_BE_RGB_ENABLE_CCM = 0x000002,
+  PISP_BE_RGB_ENABLE_SAT_CONTROL = 0x000004,
+  PISP_BE_RGB_ENABLE_YCBCR = 0x000008,
+  PISP_BE_RGB_ENABLE_FALSE_COLOUR = 0x000010,
+  PISP_BE_RGB_ENABLE_SHARPEN = 0x000020,
+  PISP_BE_RGB_ENABLE_YCBCR_INVERSE = 0x000080,
+  PISP_BE_RGB_ENABLE_GAMMA = 0x000100,
+  PISP_BE_RGB_ENABLE_CSC0 = 0x000200,
+  PISP_BE_RGB_ENABLE_CSC1 = 0x000400,
+  PISP_BE_RGB_ENABLE_DOWNSCALE0 = 0x001000,
+  PISP_BE_RGB_ENABLE_DOWNSCALE1 = 0x002000,
+  PISP_BE_RGB_ENABLE_RESAMPLE0 = 0x008000,
+  PISP_BE_RGB_ENABLE_RESAMPLE1 = 0x010000,
+  PISP_BE_RGB_ENABLE_OUTPUT0 = 0x040000,
+  PISP_BE_RGB_ENABLE_OUTPUT1 = 0x080000,
+  PISP_BE_RGB_ENABLE_HOG = 0x200000
+};
+#define PISP_BE_RGB_ENABLE_CSC(i) (PISP_BE_RGB_ENABLE_CSC0 << (i))
+#define PISP_BE_RGB_ENABLE_DOWNSCALE(i) (PISP_BE_RGB_ENABLE_DOWNSCALE0 << (i))
+#define PISP_BE_RGB_ENABLE_RESAMPLE(i) (PISP_BE_RGB_ENABLE_RESAMPLE0 << (i))
+#define PISP_BE_RGB_ENABLE_OUTPUT(i) (PISP_BE_RGB_ENABLE_OUTPUT0 << (i))
+enum pisp_be_dirty {
+  PISP_BE_DIRTY_GLOBAL = 0x0001,
+  PISP_BE_DIRTY_SH_FC_COMBINE = 0x0002,
+  PISP_BE_DIRTY_CROP = 0x0004
+};
+struct pisp_be_global_config {
+  __u32 bayer_enables;
+  __u32 rgb_enables;
+  __u8 bayer_order;
+  __u8 pad[3];
+} __attribute__((packed));
+struct pisp_be_input_buffer_config {
+  __u32 addr[3][2];
+} __attribute__((packed));
+struct pisp_be_dpc_config {
+  __u8 coeff_level;
+  __u8 coeff_range;
+  __u8 pad;
+#define PISP_BE_DPC_FLAG_FOLDBACK 1
+  __u8 flags;
+} __attribute__((packed));
+struct pisp_be_geq_config {
+  __u16 offset;
+#define PISP_BE_GEQ_SHARPER (1U << 15)
+#define PISP_BE_GEQ_SLOPE ((1 << 10) - 1)
+  __u16 slope_sharper;
+  __u16 min;
+  __u16 max;
+} __attribute__((packed));
+struct pisp_be_tdn_input_buffer_config {
+  __u32 addr[2];
+} __attribute__((packed));
+struct pisp_be_tdn_config {
+  __u16 black_level;
+  __u16 ratio;
+  __u16 noise_constant;
+  __u16 noise_slope;
+  __u16 threshold;
+  __u8 reset;
+  __u8 pad;
+} __attribute__((packed));
+struct pisp_be_tdn_output_buffer_config {
+  __u32 addr[2];
+} __attribute__((packed));
+struct pisp_be_sdn_config {
+  __u16 black_level;
+  __u8 leakage;
+  __u8 pad;
+  __u16 noise_constant;
+  __u16 noise_slope;
+  __u16 noise_constant2;
+  __u16 noise_slope2;
+} __attribute__((packed));
+struct pisp_be_stitch_input_buffer_config {
+  __u32 addr[2];
+} __attribute__((packed));
+#define PISP_BE_STITCH_STREAMING_LONG 0x8000
+#define PISP_BE_STITCH_EXPOSURE_RATIO_MASK 0x7fff
+struct pisp_be_stitch_config {
+  __u16 threshold_lo;
+  __u8 threshold_diff_power;
+  __u8 pad;
+  __u16 exposure_ratio;
+  __u8 motion_threshold_256;
+  __u8 motion_threshold_recip;
+} __attribute__((packed));
+struct pisp_be_stitch_output_buffer_config {
+  __u32 addr[2];
+} __attribute__((packed));
+struct pisp_be_cdn_config {
+  __u16 thresh;
+  __u8 iir_strength;
+  __u8 g_adjust;
+} __attribute__((packed));
+#define PISP_BE_LSC_LOG_GRID_SIZE 5
+#define PISP_BE_LSC_GRID_SIZE (1 << PISP_BE_LSC_LOG_GRID_SIZE)
+#define PISP_BE_LSC_STEP_PRECISION 18
+struct pisp_be_lsc_config {
+  __u16 grid_step_x;
+  __u16 grid_step_y;
+#define PISP_BE_LSC_LUT_SIZE (PISP_BE_LSC_GRID_SIZE + 1)
+  __u32 lut_packed[PISP_BE_LSC_LUT_SIZE][PISP_BE_LSC_LUT_SIZE];
+} __attribute__((packed));
+struct pisp_be_lsc_extra {
+  __u16 offset_x;
+  __u16 offset_y;
+} __attribute__((packed));
+#define PISP_BE_CAC_LOG_GRID_SIZE 3
+#define PISP_BE_CAC_GRID_SIZE (1 << PISP_BE_CAC_LOG_GRID_SIZE)
+#define PISP_BE_CAC_STEP_PRECISION 20
+struct pisp_be_cac_config {
+  __u16 grid_step_x;
+  __u16 grid_step_y;
+#define PISP_BE_CAC_LUT_SIZE (PISP_BE_CAC_GRID_SIZE + 1)
+  __s8 lut[PISP_BE_CAC_LUT_SIZE][PISP_BE_CAC_LUT_SIZE][2][2];
+} __attribute__((packed));
+struct pisp_be_cac_extra {
+  __u16 offset_x;
+  __u16 offset_y;
+} __attribute__((packed));
+#define PISP_BE_DEBIN_NUM_COEFFS 4
+struct pisp_be_debin_config {
+  __s8 coeffs[PISP_BE_DEBIN_NUM_COEFFS];
+  __s8 h_enable;
+  __s8 v_enable;
+  __s8 pad[2];
+} __attribute__((packed));
+#define PISP_BE_TONEMAP_LUT_SIZE 64
+struct pisp_be_tonemap_config {
+  __u16 detail_constant;
+  __u16 detail_slope;
+  __u16 iir_strength;
+  __u16 strength;
+  __u32 lut[PISP_BE_TONEMAP_LUT_SIZE];
+} __attribute__((packed));
+struct pisp_be_demosaic_config {
+  __u8 sharper;
+  __u8 fc_mode;
+  __u8 pad[2];
+} __attribute__((packed));
+struct pisp_be_ccm_config {
+  __s16 coeffs[9];
+  __u8 pad[2];
+  __s32 offsets[3];
+} __attribute__((packed));
+struct pisp_be_sat_control_config {
+  __u8 shift_r;
+  __u8 shift_g;
+  __u8 shift_b;
+  __u8 pad;
+} __attribute__((packed));
+struct pisp_be_false_colour_config {
+  __u8 distance;
+  __u8 pad[3];
+} __attribute__((packed));
+#define PISP_BE_SHARPEN_SIZE 5
+#define PISP_BE_SHARPEN_FUNC_NUM_POINTS 9
+struct pisp_be_sharpen_config {
+  __s8 kernel0[PISP_BE_SHARPEN_SIZE * PISP_BE_SHARPEN_SIZE];
+  __s8 pad0[3];
+  __s8 kernel1[PISP_BE_SHARPEN_SIZE * PISP_BE_SHARPEN_SIZE];
+  __s8 pad1[3];
+  __s8 kernel2[PISP_BE_SHARPEN_SIZE * PISP_BE_SHARPEN_SIZE];
+  __s8 pad2[3];
+  __s8 kernel3[PISP_BE_SHARPEN_SIZE * PISP_BE_SHARPEN_SIZE];
+  __s8 pad3[3];
+  __s8 kernel4[PISP_BE_SHARPEN_SIZE * PISP_BE_SHARPEN_SIZE];
+  __s8 pad4[3];
+  __u16 threshold_offset0;
+  __u16 threshold_slope0;
+  __u16 scale0;
+  __u16 pad5;
+  __u16 threshold_offset1;
+  __u16 threshold_slope1;
+  __u16 scale1;
+  __u16 pad6;
+  __u16 threshold_offset2;
+  __u16 threshold_slope2;
+  __u16 scale2;
+  __u16 pad7;
+  __u16 threshold_offset3;
+  __u16 threshold_slope3;
+  __u16 scale3;
+  __u16 pad8;
+  __u16 threshold_offset4;
+  __u16 threshold_slope4;
+  __u16 scale4;
+  __u16 pad9;
+  __u16 positive_strength;
+  __u16 positive_pre_limit;
+  __u16 positive_func[PISP_BE_SHARPEN_FUNC_NUM_POINTS];
+  __u16 positive_limit;
+  __u16 negative_strength;
+  __u16 negative_pre_limit;
+  __u16 negative_func[PISP_BE_SHARPEN_FUNC_NUM_POINTS];
+  __u16 negative_limit;
+  __u8 enables;
+  __u8 white;
+  __u8 black;
+  __u8 grey;
+} __attribute__((packed));
+struct pisp_be_sh_fc_combine_config {
+  __u8 y_factor;
+  __u8 c1_factor;
+  __u8 c2_factor;
+  __u8 pad;
+} __attribute__((packed));
+#define PISP_BE_GAMMA_LUT_SIZE 64
+struct pisp_be_gamma_config {
+  __u32 lut[PISP_BE_GAMMA_LUT_SIZE];
+} __attribute__((packed));
+struct pisp_be_crop_config {
+  __u16 offset_x, offset_y;
+  __u16 width, height;
+} __attribute__((packed));
+#define PISP_BE_RESAMPLE_FILTER_SIZE 96
+struct pisp_be_resample_config {
+  __u16 scale_factor_h, scale_factor_v;
+  __s16 coef[PISP_BE_RESAMPLE_FILTER_SIZE];
+} __attribute__((packed));
+struct pisp_be_resample_extra {
+  __u16 scaled_width;
+  __u16 scaled_height;
+  __s16 initial_phase_h[3];
+  __s16 initial_phase_v[3];
+} __attribute__((packed));
+struct pisp_be_downscale_config {
+  __u16 scale_factor_h;
+  __u16 scale_factor_v;
+  __u16 scale_recip_h;
+  __u16 scale_recip_v;
+} __attribute__((packed));
+struct pisp_be_downscale_extra {
+  __u16 scaled_width;
+  __u16 scaled_height;
+} __attribute__((packed));
+struct pisp_be_hog_config {
+  __u8 compute_signed;
+  __u8 channel_mix[3];
+  __u32 stride;
+} __attribute__((packed));
+struct pisp_be_axi_config {
+  __u8 r_qos;
+  __u8 r_cache_prot;
+  __u8 w_qos;
+  __u8 w_cache_prot;
+} __attribute__((packed));
+enum pisp_be_transform {
+  PISP_BE_TRANSFORM_NONE = 0x0,
+  PISP_BE_TRANSFORM_HFLIP = 0x1,
+  PISP_BE_TRANSFORM_VFLIP = 0x2,
+  PISP_BE_TRANSFORM_ROT180 = (PISP_BE_TRANSFORM_HFLIP | PISP_BE_TRANSFORM_VFLIP)
+};
+struct pisp_be_output_format_config {
+  struct pisp_image_format_config image;
+  __u8 transform;
+  __u8 pad[3];
+  __u16 lo;
+  __u16 hi;
+  __u16 lo2;
+  __u16 hi2;
+} __attribute__((packed));
+struct pisp_be_output_buffer_config {
+  __u32 addr[3][2];
+} __attribute__((packed));
+struct pisp_be_hog_buffer_config {
+  __u32 addr[2];
+} __attribute__((packed));
+struct pisp_be_config {
+  struct pisp_be_input_buffer_config input_buffer;
+  struct pisp_be_tdn_input_buffer_config tdn_input_buffer;
+  struct pisp_be_stitch_input_buffer_config stitch_input_buffer;
+  struct pisp_be_tdn_output_buffer_config tdn_output_buffer;
+  struct pisp_be_stitch_output_buffer_config stitch_output_buffer;
+  struct pisp_be_output_buffer_config output_buffer[PISP_BACK_END_NUM_OUTPUTS];
+  struct pisp_be_hog_buffer_config hog_buffer;
+  struct pisp_be_global_config global;
+  struct pisp_image_format_config input_format;
+  struct pisp_decompress_config decompress;
+  struct pisp_be_dpc_config dpc;
+  struct pisp_be_geq_config geq;
+  struct pisp_image_format_config tdn_input_format;
+  struct pisp_decompress_config tdn_decompress;
+  struct pisp_be_tdn_config tdn;
+  struct pisp_compress_config tdn_compress;
+  struct pisp_image_format_config tdn_output_format;
+  struct pisp_be_sdn_config sdn;
+  struct pisp_bla_config blc;
+  struct pisp_compress_config stitch_compress;
+  struct pisp_image_format_config stitch_output_format;
+  struct pisp_image_format_config stitch_input_format;
+  struct pisp_decompress_config stitch_decompress;
+  struct pisp_be_stitch_config stitch;
+  struct pisp_be_lsc_config lsc;
+  struct pisp_wbg_config wbg;
+  struct pisp_be_cdn_config cdn;
+  struct pisp_be_cac_config cac;
+  struct pisp_be_debin_config debin;
+  struct pisp_be_tonemap_config tonemap;
+  struct pisp_be_demosaic_config demosaic;
+  struct pisp_be_ccm_config ccm;
+  struct pisp_be_sat_control_config sat_control;
+  struct pisp_be_ccm_config ycbcr;
+  struct pisp_be_sharpen_config sharpen;
+  struct pisp_be_false_colour_config false_colour;
+  struct pisp_be_sh_fc_combine_config sh_fc_combine;
+  struct pisp_be_ccm_config ycbcr_inverse;
+  struct pisp_be_gamma_config gamma;
+  struct pisp_be_ccm_config csc[PISP_BACK_END_NUM_OUTPUTS];
+  struct pisp_be_downscale_config downscale[PISP_BACK_END_NUM_OUTPUTS];
+  struct pisp_be_resample_config resample[PISP_BACK_END_NUM_OUTPUTS];
+  struct pisp_be_output_format_config output_format[PISP_BACK_END_NUM_OUTPUTS];
+  struct pisp_be_hog_config hog;
+  struct pisp_be_axi_config axi;
+  struct pisp_be_lsc_extra lsc_extra;
+  struct pisp_be_cac_extra cac_extra;
+  struct pisp_be_downscale_extra downscale_extra[PISP_BACK_END_NUM_OUTPUTS];
+  struct pisp_be_resample_extra resample_extra[PISP_BACK_END_NUM_OUTPUTS];
+  struct pisp_be_crop_config crop;
+  struct pisp_image_format_config hog_format;
+  __u32 dirty_flags_bayer;
+  __u32 dirty_flags_rgb;
+  __u32 dirty_flags_extra;
+} __attribute__((packed));
+enum pisp_tile_edge {
+  PISP_LEFT_EDGE = (1 << 0),
+  PISP_RIGHT_EDGE = (1 << 1),
+  PISP_TOP_EDGE = (1 << 2),
+  PISP_BOTTOM_EDGE = (1 << 3)
+};
+struct pisp_tile {
+  __u8 edge;
+  __u8 pad0[3];
+  __u32 input_addr_offset;
+  __u32 input_addr_offset2;
+  __u16 input_offset_x;
+  __u16 input_offset_y;
+  __u16 input_width;
+  __u16 input_height;
+  __u32 tdn_input_addr_offset;
+  __u32 tdn_output_addr_offset;
+  __u32 stitch_input_addr_offset;
+  __u32 stitch_output_addr_offset;
+  __u32 lsc_grid_offset_x;
+  __u32 lsc_grid_offset_y;
+  __u32 cac_grid_offset_x;
+  __u32 cac_grid_offset_y;
+  __u16 crop_x_start[PISP_BACK_END_NUM_OUTPUTS];
+  __u16 crop_x_end[PISP_BACK_END_NUM_OUTPUTS];
+  __u16 crop_y_start[PISP_BACK_END_NUM_OUTPUTS];
+  __u16 crop_y_end[PISP_BACK_END_NUM_OUTPUTS];
+  __u16 downscale_phase_x[3 * PISP_BACK_END_NUM_OUTPUTS];
+  __u16 downscale_phase_y[3 * PISP_BACK_END_NUM_OUTPUTS];
+  __u16 resample_in_width[PISP_BACK_END_NUM_OUTPUTS];
+  __u16 resample_in_height[PISP_BACK_END_NUM_OUTPUTS];
+  __u16 resample_phase_x[3 * PISP_BACK_END_NUM_OUTPUTS];
+  __u16 resample_phase_y[3 * PISP_BACK_END_NUM_OUTPUTS];
+  __u16 output_offset_x[PISP_BACK_END_NUM_OUTPUTS];
+  __u16 output_offset_y[PISP_BACK_END_NUM_OUTPUTS];
+  __u16 output_width[PISP_BACK_END_NUM_OUTPUTS];
+  __u16 output_height[PISP_BACK_END_NUM_OUTPUTS];
+  __u32 output_addr_offset[PISP_BACK_END_NUM_OUTPUTS];
+  __u32 output_addr_offset2[PISP_BACK_END_NUM_OUTPUTS];
+  __u32 output_hog_addr_offset;
+} __attribute__((packed));
+struct pisp_be_tiles_config {
+  struct pisp_be_config config;
+  struct pisp_tile tiles[PISP_BACK_END_NUM_TILES];
+  __u32 num_tiles;
+} __attribute__((packed));
+#endif
diff --git a/libc/kernel/uapi/linux/media/raspberrypi/pisp_common.h b/libc/kernel/uapi/linux/media/raspberrypi/pisp_common.h
new file mode 100644
index 0000000..0e0b23f
--- /dev/null
+++ b/libc/kernel/uapi/linux/media/raspberrypi/pisp_common.h
@@ -0,0 +1,119 @@
+/*
+ * This file is auto-generated. Modifications will be lost.
+ *
+ * See https://android.googlesource.com/platform/bionic/+/master/libc/kernel/
+ * for more information.
+ */
+#ifndef _UAPI_PISP_COMMON_H_
+#define _UAPI_PISP_COMMON_H_
+#include <linux/types.h>
+struct pisp_image_format_config {
+  __u16 width;
+  __u16 height;
+  __u32 format;
+  __s32 stride;
+  __s32 stride2;
+} __attribute__((packed));
+enum pisp_bayer_order {
+  PISP_BAYER_ORDER_RGGB = 0,
+  PISP_BAYER_ORDER_GBRG = 1,
+  PISP_BAYER_ORDER_BGGR = 2,
+  PISP_BAYER_ORDER_GRBG = 3,
+  PISP_BAYER_ORDER_GREYSCALE = 128
+};
+enum pisp_image_format {
+  PISP_IMAGE_FORMAT_BPS_8 = 0x00000000,
+  PISP_IMAGE_FORMAT_BPS_10 = 0x00000001,
+  PISP_IMAGE_FORMAT_BPS_12 = 0x00000002,
+  PISP_IMAGE_FORMAT_BPS_16 = 0x00000003,
+  PISP_IMAGE_FORMAT_BPS_MASK = 0x00000003,
+  PISP_IMAGE_FORMAT_PLANARITY_INTERLEAVED = 0x00000000,
+  PISP_IMAGE_FORMAT_PLANARITY_SEMI_PLANAR = 0x00000010,
+  PISP_IMAGE_FORMAT_PLANARITY_PLANAR = 0x00000020,
+  PISP_IMAGE_FORMAT_PLANARITY_MASK = 0x00000030,
+  PISP_IMAGE_FORMAT_SAMPLING_444 = 0x00000000,
+  PISP_IMAGE_FORMAT_SAMPLING_422 = 0x00000100,
+  PISP_IMAGE_FORMAT_SAMPLING_420 = 0x00000200,
+  PISP_IMAGE_FORMAT_SAMPLING_MASK = 0x00000300,
+  PISP_IMAGE_FORMAT_ORDER_NORMAL = 0x00000000,
+  PISP_IMAGE_FORMAT_ORDER_SWAPPED = 0x00001000,
+  PISP_IMAGE_FORMAT_SHIFT_0 = 0x00000000,
+  PISP_IMAGE_FORMAT_SHIFT_1 = 0x00010000,
+  PISP_IMAGE_FORMAT_SHIFT_2 = 0x00020000,
+  PISP_IMAGE_FORMAT_SHIFT_3 = 0x00030000,
+  PISP_IMAGE_FORMAT_SHIFT_4 = 0x00040000,
+  PISP_IMAGE_FORMAT_SHIFT_5 = 0x00050000,
+  PISP_IMAGE_FORMAT_SHIFT_6 = 0x00060000,
+  PISP_IMAGE_FORMAT_SHIFT_7 = 0x00070000,
+  PISP_IMAGE_FORMAT_SHIFT_8 = 0x00080000,
+  PISP_IMAGE_FORMAT_SHIFT_MASK = 0x000f0000,
+  PISP_IMAGE_FORMAT_BPP_32 = 0x00100000,
+  PISP_IMAGE_FORMAT_UNCOMPRESSED = 0x00000000,
+  PISP_IMAGE_FORMAT_COMPRESSION_MODE_1 = 0x01000000,
+  PISP_IMAGE_FORMAT_COMPRESSION_MODE_2 = 0x02000000,
+  PISP_IMAGE_FORMAT_COMPRESSION_MODE_3 = 0x03000000,
+  PISP_IMAGE_FORMAT_COMPRESSION_MASK = 0x03000000,
+  PISP_IMAGE_FORMAT_HOG_SIGNED = 0x04000000,
+  PISP_IMAGE_FORMAT_HOG_UNSIGNED = 0x08000000,
+  PISP_IMAGE_FORMAT_INTEGRAL_IMAGE = 0x10000000,
+  PISP_IMAGE_FORMAT_WALLPAPER_ROLL = 0x20000000,
+  PISP_IMAGE_FORMAT_THREE_CHANNEL = 0x40000000,
+  PISP_IMAGE_FORMAT_SINGLE_16 = PISP_IMAGE_FORMAT_BPS_16,
+  PISP_IMAGE_FORMAT_THREE_16 = PISP_IMAGE_FORMAT_BPS_16 | PISP_IMAGE_FORMAT_THREE_CHANNEL
+};
+#define PISP_IMAGE_FORMAT_BPS_8(fmt) (((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) == PISP_IMAGE_FORMAT_BPS_8)
+#define PISP_IMAGE_FORMAT_BPS_10(fmt) (((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) == PISP_IMAGE_FORMAT_BPS_10)
+#define PISP_IMAGE_FORMAT_BPS_12(fmt) (((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) == PISP_IMAGE_FORMAT_BPS_12)
+#define PISP_IMAGE_FORMAT_BPS_16(fmt) (((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) == PISP_IMAGE_FORMAT_BPS_16)
+#define PISP_IMAGE_FORMAT_BPS(fmt) (((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) ? 8 + (2 << (((fmt) & PISP_IMAGE_FORMAT_BPS_MASK) - 1)) : 8)
+#define PISP_IMAGE_FORMAT_SHIFT(fmt) (((fmt) & PISP_IMAGE_FORMAT_SHIFT_MASK) / PISP_IMAGE_FORMAT_SHIFT_1)
+#define PISP_IMAGE_FORMAT_THREE_CHANNEL(fmt) ((fmt) & PISP_IMAGE_FORMAT_THREE_CHANNEL)
+#define PISP_IMAGE_FORMAT_SINGLE_CHANNEL(fmt) (! ((fmt) & PISP_IMAGE_FORMAT_THREE_CHANNEL))
+#define PISP_IMAGE_FORMAT_COMPRESSED(fmt) (((fmt) & PISP_IMAGE_FORMAT_COMPRESSION_MASK) != PISP_IMAGE_FORMAT_UNCOMPRESSED)
+#define PISP_IMAGE_FORMAT_SAMPLING_444(fmt) (((fmt) & PISP_IMAGE_FORMAT_SAMPLING_MASK) == PISP_IMAGE_FORMAT_SAMPLING_444)
+#define PISP_IMAGE_FORMAT_SAMPLING_422(fmt) (((fmt) & PISP_IMAGE_FORMAT_SAMPLING_MASK) == PISP_IMAGE_FORMAT_SAMPLING_422)
+#define PISP_IMAGE_FORMAT_SAMPLING_420(fmt) (((fmt) & PISP_IMAGE_FORMAT_SAMPLING_MASK) == PISP_IMAGE_FORMAT_SAMPLING_420)
+#define PISP_IMAGE_FORMAT_ORDER_NORMAL(fmt) (! ((fmt) & PISP_IMAGE_FORMAT_ORDER_SWAPPED))
+#define PISP_IMAGE_FORMAT_ORDER_SWAPPED(fmt) ((fmt) & PISP_IMAGE_FORMAT_ORDER_SWAPPED)
+#define PISP_IMAGE_FORMAT_INTERLEAVED(fmt) (((fmt) & PISP_IMAGE_FORMAT_PLANARITY_MASK) == PISP_IMAGE_FORMAT_PLANARITY_INTERLEAVED)
+#define PISP_IMAGE_FORMAT_SEMIPLANAR(fmt) (((fmt) & PISP_IMAGE_FORMAT_PLANARITY_MASK) == PISP_IMAGE_FORMAT_PLANARITY_SEMI_PLANAR)
+#define PISP_IMAGE_FORMAT_PLANAR(fmt) (((fmt) & PISP_IMAGE_FORMAT_PLANARITY_MASK) == PISP_IMAGE_FORMAT_PLANARITY_PLANAR)
+#define PISP_IMAGE_FORMAT_WALLPAPER(fmt) ((fmt) & PISP_IMAGE_FORMAT_WALLPAPER_ROLL)
+#define PISP_IMAGE_FORMAT_BPP_32(fmt) ((fmt) & PISP_IMAGE_FORMAT_BPP_32)
+#define PISP_IMAGE_FORMAT_HOG(fmt) ((fmt) & (PISP_IMAGE_FORMAT_HOG_SIGNED | PISP_IMAGE_FORMAT_HOG_UNSIGNED))
+#define PISP_WALLPAPER_WIDTH 128
+struct pisp_bla_config {
+  __u16 black_level_r;
+  __u16 black_level_gr;
+  __u16 black_level_gb;
+  __u16 black_level_b;
+  __u16 output_black_level;
+  __u8 pad[2];
+} __attribute__((packed));
+struct pisp_wbg_config {
+  __u16 gain_r;
+  __u16 gain_g;
+  __u16 gain_b;
+  __u8 pad[2];
+} __attribute__((packed));
+struct pisp_compress_config {
+  __u16 offset;
+  __u8 pad;
+  __u8 mode;
+} __attribute__((packed));
+struct pisp_decompress_config {
+  __u16 offset;
+  __u8 pad;
+  __u8 mode;
+} __attribute__((packed));
+enum pisp_axi_flags {
+  PISP_AXI_FLAG_ALIGN = 128,
+  PISP_AXI_FLAG_PAD = 64,
+  PISP_AXI_FLAG_PANIC = 32,
+};
+struct pisp_axi_config {
+  __u8 maxlen_flags;
+  __u8 cache_prot;
+  __u16 qos;
+} __attribute__((packed));
+#endif
diff --git a/libc/kernel/uapi/linux/mman.h b/libc/kernel/uapi/linux/mman.h
index cf1e978..f50b51c 100644
--- a/libc/kernel/uapi/linux/mman.h
+++ b/libc/kernel/uapi/linux/mman.h
@@ -18,6 +18,7 @@
 #define MAP_SHARED 0x01
 #define MAP_PRIVATE 0x02
 #define MAP_SHARED_VALIDATE 0x03
+#define MAP_DROPPABLE 0x08
 #define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT
 #define MAP_HUGE_MASK HUGETLB_FLAG_ENCODE_MASK
 #define MAP_HUGE_16KB HUGETLB_FLAG_ENCODE_16KB
diff --git a/libc/kernel/uapi/linux/mount.h b/libc/kernel/uapi/linux/mount.h
index 1166a7a..c4278b5 100644
--- a/libc/kernel/uapi/linux/mount.h
+++ b/libc/kernel/uapi/linux/mount.h
@@ -90,7 +90,7 @@
 #define MOUNT_ATTR_SIZE_VER0 32
 struct statmount {
   __u32 size;
-  __u32 __spare1;
+  __u32 mnt_opts;
   __u64 mask;
   __u32 sb_dev_major;
   __u32 sb_dev_minor;
@@ -108,7 +108,8 @@
   __u64 propagate_from;
   __u32 mnt_root;
   __u32 mnt_point;
-  __u64 __spare2[50];
+  __u64 mnt_ns_id;
+  __u64 __spare2[49];
   char str[];
 };
 struct mnt_id_req {
@@ -116,13 +117,18 @@
   __u32 spare;
   __u64 mnt_id;
   __u64 param;
+  __u64 mnt_ns_id;
 };
 #define MNT_ID_REQ_SIZE_VER0 24
+#define MNT_ID_REQ_SIZE_VER1 32
 #define STATMOUNT_SB_BASIC 0x00000001U
 #define STATMOUNT_MNT_BASIC 0x00000002U
 #define STATMOUNT_PROPAGATE_FROM 0x00000004U
 #define STATMOUNT_MNT_ROOT 0x00000008U
 #define STATMOUNT_MNT_POINT 0x00000010U
 #define STATMOUNT_FS_TYPE 0x00000020U
+#define STATMOUNT_MNT_NS_ID 0x00000040U
+#define STATMOUNT_MNT_OPTS 0x00000080U
 #define LSMT_ROOT 0xffffffffffffffff
+#define LISTMOUNT_REVERSE (1 << 0)
 #endif
diff --git a/libc/kernel/uapi/linux/netfilter/nf_tables.h b/libc/kernel/uapi/linux/netfilter/nf_tables.h
index 7922147..bfc6e25 100644
--- a/libc/kernel/uapi/linux/netfilter/nf_tables.h
+++ b/libc/kernel/uapi/linux/netfilter/nf_tables.h
@@ -695,7 +695,7 @@
   __NFTA_SECMARK_MAX,
 };
 #define NFTA_SECMARK_MAX (__NFTA_SECMARK_MAX - 1)
-#define NFT_SECMARK_CTX_MAXLEN 256
+#define NFT_SECMARK_CTX_MAXLEN 4096
 enum nft_reject_types {
   NFT_REJECT_ICMP_UNREACH,
   NFT_REJECT_TCP_RST,
diff --git a/libc/kernel/uapi/linux/nfs4.h b/libc/kernel/uapi/linux/nfs4.h
index 21f1103..6512901 100644
--- a/libc/kernel/uapi/linux/nfs4.h
+++ b/libc/kernel/uapi/linux/nfs4.h
@@ -34,6 +34,7 @@
 #define NFS4_OPEN_RESULT_CONFIRM 0x0002
 #define NFS4_OPEN_RESULT_LOCKTYPE_POSIX 0x0004
 #define NFS4_OPEN_RESULT_PRESERVE_UNLINKED 0x0008
+#define NFS4_OPEN_RESULT_NO_OPEN_STATEID 0x0010
 #define NFS4_OPEN_RESULT_MAY_NOTIFY_LOCK 0x0020
 #define NFS4_SHARE_ACCESS_MASK 0x000F
 #define NFS4_SHARE_ACCESS_READ 0x0001
@@ -52,6 +53,8 @@
 #define NFS4_SHARE_WHEN_MASK 0xF0000
 #define NFS4_SHARE_SIGNAL_DELEG_WHEN_RESRC_AVAIL 0x10000
 #define NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED 0x20000
+#define NFS4_SHARE_WANT_DELEG_TIMESTAMPS 0x100000
+#define NFS4_SHARE_WANT_OPEN_XOR_DELEGATION 0x200000
 #define NFS4_CDFC4_FORE 0x1
 #define NFS4_CDFC4_BACK 0x2
 #define NFS4_CDFC4_BOTH 0x3
diff --git a/libc/kernel/uapi/linux/nfsd_netlink.h b/libc/kernel/uapi/linux/nfsd_netlink.h
index 45cb504..bd3f02c 100644
--- a/libc/kernel/uapi/linux/nfsd_netlink.h
+++ b/libc/kernel/uapi/linux/nfsd_netlink.h
@@ -58,6 +58,12 @@
   NFSD_A_SERVER_SOCK_MAX = (__NFSD_A_SERVER_SOCK_MAX - 1)
 };
 enum {
+  NFSD_A_POOL_MODE_MODE = 1,
+  NFSD_A_POOL_MODE_NPOOLS,
+  __NFSD_A_POOL_MODE_MAX,
+  NFSD_A_POOL_MODE_MAX = (__NFSD_A_POOL_MODE_MAX - 1)
+};
+enum {
   NFSD_CMD_RPC_STATUS_GET = 1,
   NFSD_CMD_THREADS_SET,
   NFSD_CMD_THREADS_GET,
@@ -65,6 +71,8 @@
   NFSD_CMD_VERSION_GET,
   NFSD_CMD_LISTENER_SET,
   NFSD_CMD_LISTENER_GET,
+  NFSD_CMD_POOL_MODE_SET,
+  NFSD_CMD_POOL_MODE_GET,
   __NFSD_CMD_MAX,
   NFSD_CMD_MAX = (__NFSD_CMD_MAX - 1)
 };
diff --git a/libc/kernel/uapi/linux/nl80211.h b/libc/kernel/uapi/linux/nl80211.h
index 98180c2..1bad2f2 100644
--- a/libc/kernel/uapi/linux/nl80211.h
+++ b/libc/kernel/uapi/linux/nl80211.h
@@ -528,6 +528,8 @@
   NL80211_ATTR_MLO_TTLM_DLINK,
   NL80211_ATTR_MLO_TTLM_ULINK,
   NL80211_ATTR_ASSOC_SPP_AMSDU,
+  NL80211_ATTR_WIPHY_RADIOS,
+  NL80211_ATTR_WIPHY_INTERFACE_COMBINATIONS,
   __NL80211_ATTR_AFTER_LAST,
   NUM_NL80211_ATTR = __NL80211_ATTR_AFTER_LAST,
   NL80211_ATTR_MAX = __NL80211_ATTR_AFTER_LAST - 1
@@ -878,6 +880,7 @@
   NL80211_FREQUENCY_ATTR_NO_6GHZ_VLP_CLIENT,
   NL80211_FREQUENCY_ATTR_NO_6GHZ_AFC_CLIENT,
   NL80211_FREQUENCY_ATTR_CAN_MONITOR,
+  NL80211_FREQUENCY_ATTR_ALLOW_6GHZ_VLP_AP,
   __NL80211_FREQUENCY_ATTR_AFTER_LAST,
   NL80211_FREQUENCY_ATTR_MAX = __NL80211_FREQUENCY_ATTR_AFTER_LAST - 1
 };
@@ -955,6 +958,7 @@
   NL80211_RRF_DFS_CONCURRENT = 1 << 21,
   NL80211_RRF_NO_6GHZ_VLP_CLIENT = 1 << 22,
   NL80211_RRF_NO_6GHZ_AFC_CLIENT = 1 << 23,
+  NL80211_RRF_ALLOW_6GHZ_VLP_AP = 1 << 24,
 };
 #define NL80211_RRF_PASSIVE_SCAN NL80211_RRF_NO_IR
 #define NL80211_RRF_NO_IBSS NL80211_RRF_NO_IR
@@ -1973,4 +1977,19 @@
   NL80211_AP_SETTINGS_EXTERNAL_AUTH_SUPPORT = 1 << 0,
   NL80211_AP_SETTINGS_SA_QUERY_OFFLOAD_SUPPORT = 1 << 1,
 };
+enum nl80211_wiphy_radio_attrs {
+  __NL80211_WIPHY_RADIO_ATTR_INVALID,
+  NL80211_WIPHY_RADIO_ATTR_INDEX,
+  NL80211_WIPHY_RADIO_ATTR_FREQ_RANGE,
+  NL80211_WIPHY_RADIO_ATTR_INTERFACE_COMBINATION,
+  __NL80211_WIPHY_RADIO_ATTR_LAST,
+  NL80211_WIPHY_RADIO_ATTR_MAX = __NL80211_WIPHY_RADIO_ATTR_LAST - 1,
+};
+enum nl80211_wiphy_radio_freq_range {
+  __NL80211_WIPHY_RADIO_FREQ_ATTR_INVALID,
+  NL80211_WIPHY_RADIO_FREQ_ATTR_START,
+  NL80211_WIPHY_RADIO_FREQ_ATTR_END,
+  __NL80211_WIPHY_RADIO_FREQ_ATTR_LAST,
+  NL80211_WIPHY_RADIO_FREQ_ATTR_MAX = __NL80211_WIPHY_RADIO_FREQ_ATTR_LAST - 1,
+};
 #endif
diff --git a/libc/kernel/uapi/linux/nsfs.h b/libc/kernel/uapi/linux/nsfs.h
index 61a5797..c8f2208 100644
--- a/libc/kernel/uapi/linux/nsfs.h
+++ b/libc/kernel/uapi/linux/nsfs.h
@@ -7,9 +7,15 @@
 #ifndef __LINUX_NSFS_H
 #define __LINUX_NSFS_H
 #include <linux/ioctl.h>
+#include <linux/types.h>
 #define NSIO 0xb7
 #define NS_GET_USERNS _IO(NSIO, 0x1)
 #define NS_GET_PARENT _IO(NSIO, 0x2)
 #define NS_GET_NSTYPE _IO(NSIO, 0x3)
 #define NS_GET_OWNER_UID _IO(NSIO, 0x4)
+#define NS_GET_MNTNS_ID _IOR(NSIO, 0x5, __u64)
+#define NS_GET_PID_FROM_PIDNS _IOR(NSIO, 0x6, int)
+#define NS_GET_TGID_FROM_PIDNS _IOR(NSIO, 0x7, int)
+#define NS_GET_PID_IN_PIDNS _IOR(NSIO, 0x8, int)
+#define NS_GET_TGID_IN_PIDNS _IOR(NSIO, 0x9, int)
 #endif
diff --git a/libc/kernel/uapi/linux/openvswitch.h b/libc/kernel/uapi/linux/openvswitch.h
index d45f4fa..98c8037 100644
--- a/libc/kernel/uapi/linux/openvswitch.h
+++ b/libc/kernel/uapi/linux/openvswitch.h
@@ -429,6 +429,13 @@
   __OVS_CHECK_PKT_LEN_ATTR_MAX,
 };
 #define OVS_CHECK_PKT_LEN_ATTR_MAX (__OVS_CHECK_PKT_LEN_ATTR_MAX - 1)
+#define OVS_PSAMPLE_COOKIE_MAX_SIZE 16
+enum ovs_psample_attr {
+  OVS_PSAMPLE_ATTR_GROUP = 1,
+  OVS_PSAMPLE_ATTR_COOKIE,
+  __OVS_PSAMPLE_ATTR_MAX
+};
+#define OVS_PSAMPLE_ATTR_MAX (__OVS_PSAMPLE_ATTR_MAX - 1)
 enum ovs_action_attr {
   OVS_ACTION_ATTR_UNSPEC,
   OVS_ACTION_ATTR_OUTPUT,
@@ -455,6 +462,7 @@
   OVS_ACTION_ATTR_ADD_MPLS,
   OVS_ACTION_ATTR_DEC_TTL,
   OVS_ACTION_ATTR_DROP,
+  OVS_ACTION_ATTR_PSAMPLE,
   __OVS_ACTION_ATTR_MAX,
 };
 #define OVS_ACTION_ATTR_MAX (__OVS_ACTION_ATTR_MAX - 1)
diff --git a/libc/kernel/uapi/linux/perf_event.h b/libc/kernel/uapi/linux/perf_event.h
index 16a1a2e..ec9b856 100644
--- a/libc/kernel/uapi/linux/perf_event.h
+++ b/libc/kernel/uapi/linux/perf_event.h
@@ -460,6 +460,8 @@
 #define PERF_MEM_LVLNUM_L2 0x02
 #define PERF_MEM_LVLNUM_L3 0x03
 #define PERF_MEM_LVLNUM_L4 0x04
+#define PERF_MEM_LVLNUM_L2_MHB 0x05
+#define PERF_MEM_LVLNUM_MSC 0x06
 #define PERF_MEM_LVLNUM_UNC 0x08
 #define PERF_MEM_LVLNUM_CXL 0x09
 #define PERF_MEM_LVLNUM_IO 0x0a
diff --git a/libc/kernel/uapi/linux/pidfd.h b/libc/kernel/uapi/linux/pidfd.h
index 082b4a0..9068727 100644
--- a/libc/kernel/uapi/linux/pidfd.h
+++ b/libc/kernel/uapi/linux/pidfd.h
@@ -8,9 +8,21 @@
 #define _UAPI_LINUX_PIDFD_H
 #include <linux/types.h>
 #include <linux/fcntl.h>
+#include <linux/ioctl.h>
 #define PIDFD_NONBLOCK O_NONBLOCK
 #define PIDFD_THREAD O_EXCL
 #define PIDFD_SIGNAL_THREAD (1UL << 0)
 #define PIDFD_SIGNAL_THREAD_GROUP (1UL << 1)
 #define PIDFD_SIGNAL_PROCESS_GROUP (1UL << 2)
+#define PIDFS_IOCTL_MAGIC 0xFF
+#define PIDFD_GET_CGROUP_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 1)
+#define PIDFD_GET_IPC_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 2)
+#define PIDFD_GET_MNT_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 3)
+#define PIDFD_GET_NET_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 4)
+#define PIDFD_GET_PID_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 5)
+#define PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 6)
+#define PIDFD_GET_TIME_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 7)
+#define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8)
+#define PIDFD_GET_USER_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 9)
+#define PIDFD_GET_UTS_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 10)
 #endif
diff --git a/libc/kernel/uapi/linux/pkt_cls.h b/libc/kernel/uapi/linux/pkt_cls.h
index 6b5143c..bdca553 100644
--- a/libc/kernel/uapi/linux/pkt_cls.h
+++ b/libc/kernel/uapi/linux/pkt_cls.h
@@ -419,6 +419,8 @@
   TCA_FLOWER_KEY_CFM,
   TCA_FLOWER_KEY_SPI,
   TCA_FLOWER_KEY_SPI_MASK,
+  TCA_FLOWER_KEY_ENC_FLAGS,
+  TCA_FLOWER_KEY_ENC_FLAGS_MASK,
   __TCA_FLOWER_MAX,
 };
 #define TCA_FLOWER_MAX (__TCA_FLOWER_MAX - 1)
@@ -497,7 +499,13 @@
 enum {
   TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0),
   TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1),
+  TCA_FLOWER_KEY_FLAGS_TUNNEL_CSUM = (1 << 2),
+  TCA_FLOWER_KEY_FLAGS_TUNNEL_DONT_FRAGMENT = (1 << 3),
+  TCA_FLOWER_KEY_FLAGS_TUNNEL_OAM = (1 << 4),
+  TCA_FLOWER_KEY_FLAGS_TUNNEL_CRIT_OPT = (1 << 5),
+  __TCA_FLOWER_KEY_FLAGS_MAX,
 };
+#define TCA_FLOWER_KEY_FLAGS_MAX (__TCA_FLOWER_KEY_FLAGS_MAX - 1)
 enum {
   TCA_FLOWER_KEY_CFM_OPT_UNSPEC,
   TCA_FLOWER_KEY_CFM_MD_LEVEL,
diff --git a/libc/kernel/uapi/linux/psample.h b/libc/kernel/uapi/linux/psample.h
index c82e76e..f9f979c 100644
--- a/libc/kernel/uapi/linux/psample.h
+++ b/libc/kernel/uapi/linux/psample.h
@@ -22,6 +22,8 @@
   PSAMPLE_ATTR_LATENCY,
   PSAMPLE_ATTR_TIMESTAMP,
   PSAMPLE_ATTR_PROTO,
+  PSAMPLE_ATTR_USER_COOKIE,
+  PSAMPLE_ATTR_SAMPLE_PROBABILITY,
   __PSAMPLE_ATTR_MAX
 };
 enum psample_command {
diff --git a/libc/kernel/uapi/linux/psp-sev.h b/libc/kernel/uapi/linux/psp-sev.h
index 82fcbf1..7274081 100644
--- a/libc/kernel/uapi/linux/psp-sev.h
+++ b/libc/kernel/uapi/linux/psp-sev.h
@@ -20,6 +20,7 @@
   SNP_PLATFORM_STATUS,
   SNP_COMMIT,
   SNP_SET_CONFIG,
+  SNP_VLEK_LOAD,
   SEV_MAX,
 };
 typedef enum {
@@ -28,6 +29,7 @@
   SEV_RET_INVALID_PLATFORM_STATE,
   SEV_RET_INVALID_GUEST_STATE,
   SEV_RET_INAVLID_CONFIG,
+  SEV_RET_INVALID_CONFIG = SEV_RET_INAVLID_CONFIG,
   SEV_RET_INVALID_LEN,
   SEV_RET_ALREADY_OWNED,
   SEV_RET_INVALID_CERTIFICATE,
@@ -113,6 +115,15 @@
   __u32 rsvd : 30;
   __u8 rsvd1[52];
 } __attribute__((__packed__));
+struct sev_user_data_snp_vlek_load {
+  __u32 len;
+  __u8 vlek_wrapped_version;
+  __u8 rsvd[3];
+  __u64 vlek_wrapped_address;
+} __attribute__((__packed__));
+struct sev_user_data_snp_wrapped_vlek_hashstick {
+  __u8 data[432];
+} __attribute__((__packed__));
 struct sev_issue_cmd {
   __u32 cmd;
   __u64 data;
diff --git a/libc/kernel/uapi/linux/random.h b/libc/kernel/uapi/linux/random.h
index d1fd998..64f62d9 100644
--- a/libc/kernel/uapi/linux/random.h
+++ b/libc/kernel/uapi/linux/random.h
@@ -24,4 +24,10 @@
 #define GRND_NONBLOCK 0x0001
 #define GRND_RANDOM 0x0002
 #define GRND_INSECURE 0x0004
+struct vgetrandom_opaque_params {
+  __u32 size_of_opaque_state;
+  __u32 mmap_prot;
+  __u32 mmap_flags;
+  __u32 reserved[13];
+};
 #endif
diff --git a/libc/kernel/uapi/linux/sev-guest.h b/libc/kernel/uapi/linux/sev-guest.h
index a822bed..a722641 100644
--- a/libc/kernel/uapi/linux/sev-guest.h
+++ b/libc/kernel/uapi/linux/sev-guest.h
@@ -51,6 +51,8 @@
 #define SNP_GUEST_FW_ERR_MASK GENMASK_ULL(31, 0)
 #define SNP_GUEST_VMM_ERR_SHIFT 32
 #define SNP_GUEST_VMM_ERR(x) (((u64) x) << SNP_GUEST_VMM_ERR_SHIFT)
+#define SNP_GUEST_FW_ERR(x) ((x) & SNP_GUEST_FW_ERR_MASK)
+#define SNP_GUEST_ERR(vmm_err,fw_err) (SNP_GUEST_VMM_ERR(vmm_err) | SNP_GUEST_FW_ERR(fw_err))
 #define SNP_GUEST_VMM_ERR_INVALID_LEN 1
 #define SNP_GUEST_VMM_ERR_BUSY 2
 #endif
diff --git a/libc/kernel/uapi/linux/stat.h b/libc/kernel/uapi/linux/stat.h
index ff98fd2..aae9ed4 100644
--- a/libc/kernel/uapi/linux/stat.h
+++ b/libc/kernel/uapi/linux/stat.h
@@ -69,7 +69,11 @@
   __u32 stx_dio_mem_align;
   __u32 stx_dio_offset_align;
   __u64 stx_subvol;
-  __u64 __spare3[11];
+  __u32 stx_atomic_write_unit_min;
+  __u32 stx_atomic_write_unit_max;
+  __u32 stx_atomic_write_segments_max;
+  __u32 __spare1[1];
+  __u64 __spare3[9];
 };
 #define STATX_TYPE 0x00000001U
 #define STATX_MODE 0x00000002U
@@ -88,6 +92,7 @@
 #define STATX_DIOALIGN 0x00002000U
 #define STATX_MNT_ID_UNIQUE 0x00004000U
 #define STATX_SUBVOL 0x00008000U
+#define STATX_WRITE_ATOMIC 0x00010000U
 #define STATX__RESERVED 0x80000000U
 #define STATX_ALL 0x00000fffU
 #define STATX_ATTR_COMPRESSED 0x00000004
@@ -99,4 +104,5 @@
 #define STATX_ATTR_MOUNT_ROOT 0x00002000
 #define STATX_ATTR_VERITY 0x00100000
 #define STATX_ATTR_DAX 0x00200000
+#define STATX_ATTR_WRITE_ATOMIC 0x00400000
 #endif
diff --git a/libc/kernel/uapi/linux/tcp_metrics.h b/libc/kernel/uapi/linux/tcp_metrics.h
index 931f50c..46ca141 100644
--- a/libc/kernel/uapi/linux/tcp_metrics.h
+++ b/libc/kernel/uapi/linux/tcp_metrics.h
@@ -4,8 +4,8 @@
  * See https://android.googlesource.com/platform/bionic/+/master/libc/kernel/
  * for more information.
  */
-#ifndef _LINUX_TCP_METRICS_H
-#define _LINUX_TCP_METRICS_H
+#ifndef _UAPI_LINUX_TCP_METRICS_H
+#define _UAPI_LINUX_TCP_METRICS_H
 #include <linux/types.h>
 #define TCP_METRICS_GENL_NAME "tcp_metrics"
 #define TCP_METRICS_GENL_VERSION 0x1
@@ -21,6 +21,17 @@
 };
 #define TCP_METRIC_MAX (__TCP_METRIC_MAX - 1)
 enum {
+  TCP_METRICS_A_METRICS_RTT = 1,
+  TCP_METRICS_A_METRICS_RTTVAR,
+  TCP_METRICS_A_METRICS_SSTHRESH,
+  TCP_METRICS_A_METRICS_CWND,
+  TCP_METRICS_A_METRICS_REODERING,
+  TCP_METRICS_A_METRICS_RTT_US,
+  TCP_METRICS_A_METRICS_RTTVAR_US,
+  __TCP_METRICS_A_METRICS_MAX
+};
+#define TCP_METRICS_A_METRICS_MAX (__TCP_METRICS_A_METRICS_MAX - 1)
+enum {
   TCP_METRICS_ATTR_UNSPEC,
   TCP_METRICS_ATTR_ADDR_IPV4,
   TCP_METRICS_ATTR_ADDR_IPV6,
diff --git a/libc/kernel/uapi/linux/um_timetravel.h b/libc/kernel/uapi/linux/um_timetravel.h
index 8706017..49eeb28 100644
--- a/libc/kernel/uapi/linux/um_timetravel.h
+++ b/libc/kernel/uapi/linux/um_timetravel.h
@@ -12,6 +12,15 @@
   __u32 seq;
   __u64 time;
 };
+#define UM_TIMETRAVEL_MAX_FDS 2
+enum um_timetravel_shared_mem_fds {
+  UM_TIMETRAVEL_SHARED_MEMFD,
+  UM_TIMETRAVEL_SHARED_LOGFD,
+  UM_TIMETRAVEL_SHARED_MAX_FDS,
+};
+enum um_timetravel_start_ack {
+  UM_TIMETRAVEL_START_ACK_ID = 0xffff,
+};
 enum um_timetravel_ops {
   UM_TIMETRAVEL_ACK = 0,
   UM_TIMETRAVEL_START = 1,
@@ -22,5 +31,36 @@
   UM_TIMETRAVEL_RUN = 6,
   UM_TIMETRAVEL_FREE_UNTIL = 7,
   UM_TIMETRAVEL_GET_TOD = 8,
+  UM_TIMETRAVEL_BROADCAST = 9,
+};
+#define UM_TIMETRAVEL_SCHEDSHM_VERSION 2
+enum um_timetravel_schedshm_cap {
+  UM_TIMETRAVEL_SCHEDSHM_CAP_TIME_SHARE = 0x1,
+};
+enum um_timetravel_schedshm_flags {
+  UM_TIMETRAVEL_SCHEDSHM_FLAGS_REQ_RUN = 0x1,
+};
+union um_timetravel_schedshm_client {
+  struct {
+    __u32 capa;
+    __u32 flags;
+    __u64 req_time;
+    __u64 name;
+  };
+  char reserve[128];
+};
+struct um_timetravel_schedshm {
+  union {
+    struct {
+      __u32 version;
+      __u32 len;
+      __u64 free_until;
+      __u64 current_time;
+      __u16 running_id;
+      __u16 max_clients;
+    };
+    char hdr[4096];
+  };
+  union um_timetravel_schedshm_client clients[];
 };
 #endif
diff --git a/libc/kernel/uapi/linux/v4l2-controls.h b/libc/kernel/uapi/linux/v4l2-controls.h
index 23158dc..f6ef26c 100644
--- a/libc/kernel/uapi/linux/v4l2-controls.h
+++ b/libc/kernel/uapi/linux/v4l2-controls.h
@@ -714,6 +714,7 @@
   V4L2_MPEG_VIDEO_AV1_LEVEL_7_2 = 22,
   V4L2_MPEG_VIDEO_AV1_LEVEL_7_3 = 23
 };
+#define V4L2_CID_MPEG_VIDEO_AVERAGE_QP (V4L2_CID_CODEC_BASE + 657)
 #define V4L2_CID_CODEC_CX2341X_BASE (V4L2_CTRL_CLASS_CODEC | 0x1000)
 #define V4L2_CID_MPEG_CX2341X_VIDEO_SPATIAL_FILTER_MODE (V4L2_CID_CODEC_CX2341X_BASE + 0)
 enum v4l2_mpeg_cx2341x_video_spatial_filter_mode {
diff --git a/libc/kernel/uapi/linux/version.h b/libc/kernel/uapi/linux/version.h
index 7faa30f..0cc45cf 100644
--- a/libc/kernel/uapi/linux/version.h
+++ b/libc/kernel/uapi/linux/version.h
@@ -4,8 +4,8 @@
  * See https://android.googlesource.com/platform/bionic/+/master/libc/kernel/
  * for more information.
  */
-#define LINUX_VERSION_CODE 395776
+#define LINUX_VERSION_CODE 396032
 #define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + ((c) > 255 ? 255 : (c)))
 #define LINUX_VERSION_MAJOR 6
-#define LINUX_VERSION_PATCHLEVEL 10
+#define LINUX_VERSION_PATCHLEVEL 11
 #define LINUX_VERSION_SUBLEVEL 0
diff --git a/libc/kernel/uapi/linux/videodev2.h b/libc/kernel/uapi/linux/videodev2.h
index 3905949..fb69a8a 100644
--- a/libc/kernel/uapi/linux/videodev2.h
+++ b/libc/kernel/uapi/linux/videodev2.h
@@ -236,6 +236,8 @@
 #define V4L2_PIX_FMT_RGBA1010102 v4l2_fourcc('R', 'A', '3', '0')
 #define V4L2_PIX_FMT_ARGB2101010 v4l2_fourcc('A', 'R', '3', '0')
 #define V4L2_PIX_FMT_BGR48_12 v4l2_fourcc('B', '3', '1', '2')
+#define V4L2_PIX_FMT_BGR48 v4l2_fourcc('B', 'G', 'R', '6')
+#define V4L2_PIX_FMT_RGB48 v4l2_fourcc('R', 'G', 'B', '6')
 #define V4L2_PIX_FMT_ABGR64_12 v4l2_fourcc('B', '4', '1', '2')
 #define V4L2_PIX_FMT_GREY v4l2_fourcc('G', 'R', 'E', 'Y')
 #define V4L2_PIX_FMT_Y4 v4l2_fourcc('Y', '0', '4', ' ')
@@ -425,6 +427,16 @@
 #define V4L2_PIX_FMT_IPU3_SGBRG10 v4l2_fourcc('i', 'p', '3', 'g')
 #define V4L2_PIX_FMT_IPU3_SGRBG10 v4l2_fourcc('i', 'p', '3', 'G')
 #define V4L2_PIX_FMT_IPU3_SRGGB10 v4l2_fourcc('i', 'p', '3', 'r')
+#define V4L2_PIX_FMT_PISP_COMP1_RGGB v4l2_fourcc('P', 'C', '1', 'R')
+#define V4L2_PIX_FMT_PISP_COMP1_GRBG v4l2_fourcc('P', 'C', '1', 'G')
+#define V4L2_PIX_FMT_PISP_COMP1_GBRG v4l2_fourcc('P', 'C', '1', 'g')
+#define V4L2_PIX_FMT_PISP_COMP1_BGGR v4l2_fourcc('P', 'C', '1', 'B')
+#define V4L2_PIX_FMT_PISP_COMP1_MONO v4l2_fourcc('P', 'C', '1', 'M')
+#define V4L2_PIX_FMT_PISP_COMP2_RGGB v4l2_fourcc('P', 'C', '2', 'R')
+#define V4L2_PIX_FMT_PISP_COMP2_GRBG v4l2_fourcc('P', 'C', '2', 'G')
+#define V4L2_PIX_FMT_PISP_COMP2_GBRG v4l2_fourcc('P', 'C', '2', 'g')
+#define V4L2_PIX_FMT_PISP_COMP2_BGGR v4l2_fourcc('P', 'C', '2', 'B')
+#define V4L2_PIX_FMT_PISP_COMP2_MONO v4l2_fourcc('P', 'C', '2', 'M')
 #define V4L2_SDR_FMT_CU8 v4l2_fourcc('C', 'U', '0', '8')
 #define V4L2_SDR_FMT_CU16LE v4l2_fourcc('C', 'U', '1', '6')
 #define V4L2_SDR_FMT_CS8 v4l2_fourcc('C', 'S', '0', '8')
@@ -444,6 +456,7 @@
 #define V4L2_META_FMT_VIVID v4l2_fourcc('V', 'I', 'V', 'D')
 #define V4L2_META_FMT_RK_ISP1_PARAMS v4l2_fourcc('R', 'K', '1', 'P')
 #define V4L2_META_FMT_RK_ISP1_STAT_3A v4l2_fourcc('R', 'K', '1', 'S')
+#define V4L2_META_FMT_RPI_BE_CFG v4l2_fourcc('R', 'P', 'B', 'C')
 #define V4L2_PIX_FMT_PRIV_MAGIC 0xfeedcafe
 #define V4L2_PIX_FMT_FLAG_PREMUL_ALPHA 0x00000001
 #define V4L2_PIX_FMT_FLAG_SET_CSC 0x00000002
diff --git a/libc/kernel/uapi/linux/xfrm.h b/libc/kernel/uapi/linux/xfrm.h
index b8e79e8..9509efe 100644
--- a/libc/kernel/uapi/linux/xfrm.h
+++ b/libc/kernel/uapi/linux/xfrm.h
@@ -260,6 +260,7 @@
   XFRMA_IF_ID,
   XFRMA_MTIMER_THRESH,
   XFRMA_SA_DIR,
+  XFRMA_NAT_KEEPALIVE_INTERVAL,
   __XFRMA_MAX
 #define XFRMA_OUTPUT_MARK XFRMA_SET_MARK
 #define XFRMA_MAX (__XFRMA_MAX - 1)
diff --git a/libc/kernel/uapi/linux/zorro_ids.h b/libc/kernel/uapi/linux/zorro_ids.h
index f47c899..05239cd 100644
--- a/libc/kernel/uapi/linux/zorro_ids.h
+++ b/libc/kernel/uapi/linux/zorro_ids.h
@@ -354,6 +354,8 @@
 #define ZORRO_MANUF_VMC 0x1389
 #define ZORRO_PROD_VMC_ISDN_BLASTER_Z2 ZORRO_ID(VMC, 0x01, 0)
 #define ZORRO_PROD_VMC_HYPERCOM_4 ZORRO_ID(VMC, 0x02, 0)
+#define ZORRO_MANUF_CSLAB 0x1400
+#define ZORRO_PROD_CSLAB_WARP_1260 ZORRO_ID(CSLAB, 0x65, 0)
 #define ZORRO_MANUF_INFORMATION 0x157C
 #define ZORRO_PROD_INFORMATION_ISDN_ENGINE_I ZORRO_ID(INFORMATION, 0x64, 0)
 #define ZORRO_MANUF_VORTEX 0x2017
diff --git a/libc/kernel/uapi/misc/mrvl_cn10k_dpi.h b/libc/kernel/uapi/misc/mrvl_cn10k_dpi.h
new file mode 100644
index 0000000..7d8671f
--- /dev/null
+++ b/libc/kernel/uapi/misc/mrvl_cn10k_dpi.h
@@ -0,0 +1,26 @@
+/*
+ * This file is auto-generated. Modifications will be lost.
+ *
+ * See https://android.googlesource.com/platform/bionic/+/master/libc/kernel/
+ * for more information.
+ */
+#ifndef __MRVL_CN10K_DPI_H__
+#define __MRVL_CN10K_DPI_H__
+#include <linux/types.h>
+#define DPI_MAX_ENGINES 6
+struct dpi_mps_mrrs_cfg {
+  __u16 max_read_req_sz;
+  __u16 max_payload_sz;
+  __u16 port;
+  __u16 reserved;
+};
+struct dpi_engine_cfg {
+  __u64 fifo_mask;
+  __u16 molr[DPI_MAX_ENGINES];
+  __u16 update_molr;
+  __u16 reserved;
+};
+#define DPI_MAGIC_NUM 0xB8
+#define DPI_MPS_MRRS_CFG _IOW(DPI_MAGIC_NUM, 1, struct dpi_mps_mrrs_cfg)
+#define DPI_ENGINE_CFG _IOW(DPI_MAGIC_NUM, 2, struct dpi_engine_cfg)
+#endif
diff --git a/libc/kernel/uapi/rdma/bnxt_re-abi.h b/libc/kernel/uapi/rdma/bnxt_re-abi.h
index 3dceafd..50f8b8a 100644
--- a/libc/kernel/uapi/rdma/bnxt_re-abi.h
+++ b/libc/kernel/uapi/rdma/bnxt_re-abi.h
@@ -18,7 +18,7 @@
   BNXT_RE_UCNTX_CMASK_WC_DPI_ENABLED = 0x04ULL,
   BNXT_RE_UCNTX_CMASK_DBR_PACING_ENABLED = 0x08ULL,
   BNXT_RE_UCNTX_CMASK_POW2_DISABLED = 0x10ULL,
-  BNXT_RE_COMP_MASK_UCNTX_HW_RETX_ENABLED = 0x40,
+  BNXT_RE_UCNTX_CMASK_MSN_TABLE_ENABLED = 0x40,
 };
 enum bnxt_re_wqe_mode {
   BNXT_QPLIB_WQE_MODE_STATIC = 0x00,
diff --git a/libc/kernel/uapi/rdma/ib_user_ioctl_cmds.h b/libc/kernel/uapi/rdma/ib_user_ioctl_cmds.h
index d774fba..5f5c426 100644
--- a/libc/kernel/uapi/rdma/ib_user_ioctl_cmds.h
+++ b/libc/kernel/uapi/rdma/ib_user_ioctl_cmds.h
@@ -8,8 +8,6 @@
 #define IB_USER_IOCTL_CMDS_H
 #define UVERBS_ID_NS_MASK 0xF000
 #define UVERBS_ID_NS_SHIFT 12
-#define UVERBS_UDATA_DRIVER_DATA_NS 1
-#define UVERBS_UDATA_DRIVER_DATA_FLAG (1UL << UVERBS_ID_NS_SHIFT)
 enum uverbs_default_objects {
   UVERBS_OBJECT_DEVICE,
   UVERBS_OBJECT_PD,
@@ -30,8 +28,10 @@
   UVERBS_OBJECT_ASYNC_EVENT,
 };
 enum {
-  UVERBS_ATTR_UHW_IN = UVERBS_UDATA_DRIVER_DATA_FLAG,
+  UVERBS_ID_DRIVER_NS = 1UL << UVERBS_ID_NS_SHIFT,
+  UVERBS_ATTR_UHW_IN = UVERBS_ID_DRIVER_NS,
   UVERBS_ATTR_UHW_OUT,
+  UVERBS_ID_DRIVER_NS_WITH_UHW,
 };
 enum uverbs_methods_device {
   UVERBS_METHOD_INVOKE_WRITE,
diff --git a/libc/kernel/uapi/rdma/mana-abi.h b/libc/kernel/uapi/rdma/mana-abi.h
index 7347175..02bb061 100644
--- a/libc/kernel/uapi/rdma/mana-abi.h
+++ b/libc/kernel/uapi/rdma/mana-abi.h
@@ -33,6 +33,13 @@
   __u32 tx_vp_offset;
   __u32 reserved;
 };
+struct mana_ib_create_rc_qp {
+  __aligned_u64 queue_buf[4];
+  __u32 queue_size[4];
+};
+struct mana_ib_create_rc_qp_resp {
+  __u32 queue_id[4];
+};
 struct mana_ib_create_wq {
   __aligned_u64 wq_buf_addr;
   __u32 wq_buf_size;
diff --git a/libc/kernel/uapi/rdma/mlx5_user_ioctl_cmds.h b/libc/kernel/uapi/rdma/mlx5_user_ioctl_cmds.h
index 69d5250..ebafb00 100644
--- a/libc/kernel/uapi/rdma/mlx5_user_ioctl_cmds.h
+++ b/libc/kernel/uapi/rdma/mlx5_user_ioctl_cmds.h
@@ -201,6 +201,9 @@
 enum mlx5_ib_device_query_context_attrs {
   MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX = (1U << UVERBS_ID_NS_SHIFT),
 };
+enum mlx5_ib_create_cq_attrs {
+  MLX5_IB_ATTR_CREATE_CQ_UAR_INDEX = UVERBS_ID_DRIVER_NS_WITH_UHW,
+};
 #define MLX5_IB_DW_MATCH_PARAM 0xA0
 struct mlx5_ib_match_params {
   __u32 match_params[MLX5_IB_DW_MATCH_PARAM];
diff --git a/libc/kernel/uapi/rdma/rdma_netlink.h b/libc/kernel/uapi/rdma/rdma_netlink.h
index 912a3c0..ac027ac 100644
--- a/libc/kernel/uapi/rdma/rdma_netlink.h
+++ b/libc/kernel/uapi/rdma/rdma_netlink.h
@@ -199,6 +199,8 @@
   RDMA_NLDEV_CMD_RES_SRQ_GET,
   RDMA_NLDEV_CMD_STAT_GET_STATUS,
   RDMA_NLDEV_CMD_RES_SRQ_GET_RAW,
+  RDMA_NLDEV_CMD_NEWDEV,
+  RDMA_NLDEV_CMD_DELDEV,
   RDMA_NLDEV_NUM_OPS
 };
 enum rdma_nldev_print_type {
@@ -306,6 +308,9 @@
   RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE,
   RDMA_NLDEV_ATTR_DRIVER_DETAILS,
   RDMA_NLDEV_ATTR_RES_SUBTYPE,
+  RDMA_NLDEV_ATTR_DEV_TYPE,
+  RDMA_NLDEV_ATTR_PARENT_NAME,
+  RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE,
   RDMA_NLDEV_ATTR_MAX
 };
 enum rdma_nl_counter_mode {
@@ -318,4 +323,11 @@
   RDMA_COUNTER_MASK_QP_TYPE = 1,
   RDMA_COUNTER_MASK_PID = 1 << 1,
 };
+enum rdma_nl_dev_type {
+  RDMA_DEVICE_TYPE_SMI = 1,
+};
+enum rdma_nl_name_assign_type {
+  RDMA_NAME_ASSIGN_TYPE_UNKNOWN = 0,
+  RDMA_NAME_ASSIGN_TYPE_USER = 1,
+};
 #endif
diff --git a/libc/kernel/uapi/scsi/scsi_bsg_mpi3mr.h b/libc/kernel/uapi/scsi/scsi_bsg_mpi3mr.h
index d98e8fa..fe3094e 100644
--- a/libc/kernel/uapi/scsi/scsi_bsg_mpi3mr.h
+++ b/libc/kernel/uapi/scsi/scsi_bsg_mpi3mr.h
@@ -144,7 +144,7 @@
 };
 struct mpi3mr_bsg_in_hdb_status {
   __u8 num_hdb_types;
-  __u8 rsvd1;
+  __u8 element_trigger_format;
   __u16 rsvd2;
   __u32 rsvd3;
   struct mpi3mr_hdb_entry entry[1];
diff --git a/libc/kernel/uapi/sound/asequencer.h b/libc/kernel/uapi/sound/asequencer.h
index 7918528..a3826a5 100644
--- a/libc/kernel/uapi/sound/asequencer.h
+++ b/libc/kernel/uapi/sound/asequencer.h
@@ -7,7 +7,7 @@
 #ifndef _UAPI__SOUND_ASEQUENCER_H
 #define _UAPI__SOUND_ASEQUENCER_H
 #include <sound/asound.h>
-#define SNDRV_SEQ_VERSION SNDRV_PROTOCOL_VERSION(1, 0, 3)
+#define SNDRV_SEQ_VERSION SNDRV_PROTOCOL_VERSION(1, 0, 4)
 #define SNDRV_SEQ_EVENT_SYSTEM 0
 #define SNDRV_SEQ_EVENT_RESULT 1
 #define SNDRV_SEQ_EVENT_NOTE 5
@@ -343,7 +343,8 @@
   int ppq;
   unsigned int skew_value;
   unsigned int skew_base;
-  char reserved[24];
+  unsigned short tempo_base;
+  char reserved[22];
 };
 #define SNDRV_SEQ_TIMER_ALSA 0
 #define SNDRV_SEQ_TIMER_MIDI_CLOCK 1
diff --git a/libc/kernel/uapi/sound/asound.h b/libc/kernel/uapi/sound/asound.h
index b608ed5..cfe9f66 100644
--- a/libc/kernel/uapi/sound/asound.h
+++ b/libc/kernel/uapi/sound/asound.h
@@ -92,7 +92,7 @@
 #define SNDRV_HWDEP_IOCTL_INFO _IOR('H', 0x01, struct snd_hwdep_info)
 #define SNDRV_HWDEP_IOCTL_DSP_STATUS _IOR('H', 0x02, struct snd_hwdep_dsp_status)
 #define SNDRV_HWDEP_IOCTL_DSP_LOAD _IOW('H', 0x03, struct snd_hwdep_dsp_image)
-#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 17)
+#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 18)
 typedef unsigned long snd_pcm_uframes_t;
 typedef signed long snd_pcm_sframes_t;
 enum {
@@ -263,7 +263,7 @@
   unsigned char id[16];
   unsigned short id16[8];
   unsigned int id32[4];
-};
+} __attribute__((deprecated));
 struct snd_pcm_info {
   unsigned int device;
   unsigned int subdevice;
@@ -276,7 +276,7 @@
   int dev_subclass;
   unsigned int subdevices_count;
   unsigned int subdevices_avail;
-  union snd_pcm_sync_id sync;
+  unsigned char pad1[16];
   unsigned char reserved[64];
 };
 typedef int snd_pcm_hw_param_t;
@@ -324,7 +324,8 @@
   unsigned int rate_num;
   unsigned int rate_den;
   snd_pcm_uframes_t fifo_size;
-  unsigned char reserved[64];
+  unsigned char sync[16];
+  unsigned char reserved[48];
 };
 enum {
   SNDRV_PCM_TSTAMP_NONE = 0,
diff --git a/libc/kernel/uapi/sound/sof/abi.h b/libc/kernel/uapi/sound/sof/abi.h
index fe1fe47..ed66131 100644
--- a/libc/kernel/uapi/sound/sof/abi.h
+++ b/libc/kernel/uapi/sound/sof/abi.h
@@ -9,7 +9,7 @@
 #include <linux/types.h>
 #define SOF_ABI_MAJOR 3
 #define SOF_ABI_MINOR 23
-#define SOF_ABI_PATCH 0
+#define SOF_ABI_PATCH 1
 #define SOF_ABI_MAJOR_SHIFT 24
 #define SOF_ABI_MAJOR_MASK 0xff
 #define SOF_ABI_MINOR_SHIFT 12
diff --git a/linker/linker_main.cpp b/linker/linker_main.cpp
index 6ccd75b..74cd517 100644
--- a/linker/linker_main.cpp
+++ b/linker/linker_main.cpp
@@ -722,6 +722,8 @@
   tmp_linker_so.set_linker_flag();
 
   if (!tmp_linker_so.prelink_image()) __linker_cannot_link(args.argv[0]);
+  // There is special logic in soinfo::relocate to avoid duplicating the
+  // relocations we did in relocate_linker().
   if (!tmp_linker_so.link_image(SymbolLookupList(&tmp_linker_so), &tmp_linker_so, nullptr, nullptr)) __linker_cannot_link(args.argv[0]);
 
   return __linker_init_post_relocation(args, tmp_linker_so);
diff --git a/linker/linker_phdr.cpp b/linker/linker_phdr.cpp
index 48206be..b7db4cd 100644
--- a/linker/linker_phdr.cpp
+++ b/linker/linker_phdr.cpp
@@ -976,7 +976,6 @@
     ElfW(Addr) seg_start = phdr->p_vaddr + load_bias_;
     ElfW(Addr) seg_end = seg_start + p_memsz;
 
-    ElfW(Addr) seg_page_start = page_start(seg_start);
     ElfW(Addr) seg_page_end = page_end(seg_end);
 
     ElfW(Addr) seg_file_end = seg_start + p_filesz;
diff --git a/tests/Android.bp b/tests/Android.bp
index 6aecb40..0bd4a32 100644
--- a/tests/Android.bp
+++ b/tests/Android.bp
@@ -746,6 +746,40 @@
     },
 }
 
+cc_defaults {
+    name: "bionic_fortify_c_tests_defaults",
+    defaults: [
+        "bionic_clang_fortify_tests_w_flags",
+        "bionic_tests_defaults",
+    ],
+    cflags: [
+        "-U_FORTIFY_SOURCE",
+        // -fbuiltin is required here to counteract -fno-builtin from
+        // `bionic_tests_defaults`. With `-fno-builtin`, Clang won't
+        // const-evaluate calls to `strlen`, which is tested for here.
+        "-fbuiltin",
+    ],
+    srcs: [
+        "clang_fortify_c_only_tests.c",
+    ],
+    tidy: false,
+    shared: {
+        enabled: false,
+    },
+}
+
+cc_test_library {
+    name: "libfortify1-c-tests-clang",
+    defaults: ["bionic_fortify_c_tests_defaults"],
+    cflags: ["-D_FORTIFY_SOURCE=1"],
+}
+
+cc_test_library {
+    name: "libfortify2-c-tests-clang",
+    defaults: ["bionic_fortify_c_tests_defaults"],
+    cflags: ["-D_FORTIFY_SOURCE=2"],
+}
+
 // -----------------------------------------------------------------------------
 // Library of all tests (excluding the dynamic linker tests).
 // -----------------------------------------------------------------------------
@@ -757,8 +791,10 @@
         "libBionicStandardTests",
         "libBionicElfTlsTests",
         "libBionicFramePointerTests",
+        "libfortify1-c-tests-clang",
         "libfortify1-tests-clang",
         "libfortify1-new-tests-clang",
+        "libfortify2-c-tests-clang",
         "libfortify2-tests-clang",
         "libfortify2-new-tests-clang",
     ],
diff --git a/tests/DoNotOptimize.h b/tests/DoNotOptimize.h
new file mode 100644
index 0000000..711d339
--- /dev/null
+++ b/tests/DoNotOptimize.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+// From <benchmark/benchmark.h>.
+template <class Tp>
+static inline void DoNotOptimize(Tp const& value) {
+  asm volatile("" : : "r,m"(value) : "memory");
+}
+template <class Tp>
+static inline void DoNotOptimize(Tp& value) {
+  asm volatile("" : "+r,m"(value) : : "memory");
+}
diff --git a/tests/clang_fortify_c_only_tests.c b/tests/clang_fortify_c_only_tests.c
new file mode 100644
index 0000000..3bec848
--- /dev/null
+++ b/tests/clang_fortify_c_only_tests.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <string.h>
+
+// This is a test specifically of bionic's FORTIFY machinery. Other stdlibs need not apply.
+#ifdef __BIONIC__
+
+// Ensure that strlen can be evaluated at compile-time. Clang doesn't support
+// this in C++, but does in C.
+_Static_assert(strlen("foo") == 3, "");
+
+#endif  // __BIONIC__
diff --git a/tests/clang_fortify_tests.cpp b/tests/clang_fortify_tests.cpp
index f08fd1f..da7926d 100644
--- a/tests/clang_fortify_tests.cpp
+++ b/tests/clang_fortify_tests.cpp
@@ -89,6 +89,10 @@
 #include <unistd.h>
 #include <wchar.h>
 
+#include <array>
+
+#include "DoNotOptimize.h"
+
 #ifndef COMPILATION_TESTS
 #include <android-base/silent_death_test.h>
 #include <gtest/gtest.h>
@@ -133,6 +137,24 @@
 
 const static int kBogusFD = -1;
 
+FORTIFY_TEST(strlen) {
+  auto run_strlen_with_contents = [&](std::array<char, 3> contents) {
+    // A lot of cruft is necessary to make this test DTRT. LLVM and Clang love to fold/optimize
+    // strlen calls, and that's the opposite of what we want to happen.
+
+    // Loop to convince LLVM that `contents` can never be known (since `xor volatile_value` can flip
+    // any bit in each elem of `contents`).
+    volatile char always_zero = 0;
+    for (char& c : contents) {
+      c ^= always_zero;
+    }
+    DoNotOptimize(strlen(&contents.front()));
+  };
+
+  EXPECT_NO_DEATH(run_strlen_with_contents({'f', 'o', '\0'}));
+  EXPECT_FORTIFY_DEATH(run_strlen_with_contents({'f', 'o', 'o'}));
+}
+
 FORTIFY_TEST(string) {
   char small_buffer[8] = {};
 
diff --git a/tests/cpu_target_features_test.cpp b/tests/cpu_target_features_test.cpp
index 3458bca..d773772 100644
--- a/tests/cpu_target_features_test.cpp
+++ b/tests/cpu_target_features_test.cpp
@@ -54,15 +54,3 @@
   GTEST_SKIP() << "Not targeting an aarch64 architecture.";
 #endif
 }
-
-TEST(cpu_target_features, has_expected_arm_compiler_values) {
-#if defined(__arm__)
-  ExecTestHelper eth;
-  char* const argv[] = {nullptr};
-  const auto invocation = [&] { execvp("cpu-target-features", argv); };
-  eth.Run(invocation, 0, "(^|\n)__ARM_FEATURE_AES=1($|\n)");
-  eth.Run(invocation, 0, "(^|\n)__ARM_FEATURE_CRC32=1($|\n)");
-#else
-  GTEST_SKIP() << "Not targeting an arm architecture.";
-#endif
-}
diff --git a/tests/fenv_test.cpp b/tests/fenv_test.cpp
index 9cf9d98..bbf339f 100644
--- a/tests/fenv_test.cpp
+++ b/tests/fenv_test.cpp
@@ -16,6 +16,7 @@
 
 #include <gtest/gtest.h>
 
+#include "DoNotOptimize.h"
 #include "utils.h"
 
 #include <fenv.h>
diff --git a/tests/malloc_test.cpp b/tests/malloc_test.cpp
index 813f348..3f1ba79 100644
--- a/tests/malloc_test.cpp
+++ b/tests/malloc_test.cpp
@@ -47,6 +47,7 @@
 #include <android-base/file.h>
 #include <android-base/test_utils.h>
 
+#include "DoNotOptimize.h"
 #include "utils.h"
 
 #if defined(__BIONIC__)
diff --git a/tests/prebuilt-elf-files/arm64/libtest_invalid-empty_shdr_table.so b/tests/prebuilt-elf-files/arm64/libtest_invalid-empty_shdr_table.so
index c8b5430..21a8f26 100755
--- a/tests/prebuilt-elf-files/arm64/libtest_invalid-empty_shdr_table.so
+++ b/tests/prebuilt-elf-files/arm64/libtest_invalid-empty_shdr_table.so
Binary files differ
diff --git a/tests/prebuilt-elf-files/arm64/libtest_invalid-local-tls.so b/tests/prebuilt-elf-files/arm64/libtest_invalid-local-tls.so
index 20c5765..c902bbe 100755
--- a/tests/prebuilt-elf-files/arm64/libtest_invalid-local-tls.so
+++ b/tests/prebuilt-elf-files/arm64/libtest_invalid-local-tls.so
Binary files differ
diff --git a/tests/prebuilt-elf-files/arm64/libtest_invalid-unaligned_shdr_offset.so b/tests/prebuilt-elf-files/arm64/libtest_invalid-unaligned_shdr_offset.so
index 6e5a6e3..fb86bca 100755
--- a/tests/prebuilt-elf-files/arm64/libtest_invalid-unaligned_shdr_offset.so
+++ b/tests/prebuilt-elf-files/arm64/libtest_invalid-unaligned_shdr_offset.so
Binary files differ
diff --git a/tests/prebuilt-elf-files/gen-libtest_invalid-local-tls.sh b/tests/prebuilt-elf-files/gen-libtest_invalid-local-tls.sh
index 0f3e736..98a2b00 100755
--- a/tests/prebuilt-elf-files/gen-libtest_invalid-local-tls.sh
+++ b/tests/prebuilt-elf-files/gen-libtest_invalid-local-tls.sh
@@ -19,12 +19,18 @@
 build() {
   arch=$1
   target=$2
+
+  if [[ "$arch" == "arm64" || "$arch" == "x86_64" ]]; then
+    alignment="-Wl,-z,max-page-size=16384"
+  fi
+
   $NDK21E/toolchains/llvm/prebuilt/linux-x86_64/bin/clang -O2 --target=$target \
       -fpic -shared -o $arch/libtest_invalid-local-tls.so -fno-emulated-tls \
-      -fuse-ld=gold test.c test2.c
+      $alignment -fuse-ld=gold test.c test2.c
 }
 
 build arm armv7a-linux-androideabi29
 build arm64 aarch64-linux-android29
 build x86 i686-linux-android29
 build x86_64 x86_64-linux-android29
+
diff --git a/tests/prebuilt-elf-files/x86_64/libtest_invalid-empty_shdr_table.so b/tests/prebuilt-elf-files/x86_64/libtest_invalid-empty_shdr_table.so
index af1538d..00fefd4 100755
--- a/tests/prebuilt-elf-files/x86_64/libtest_invalid-empty_shdr_table.so
+++ b/tests/prebuilt-elf-files/x86_64/libtest_invalid-empty_shdr_table.so
Binary files differ
diff --git a/tests/prebuilt-elf-files/x86_64/libtest_invalid-local-tls.so b/tests/prebuilt-elf-files/x86_64/libtest_invalid-local-tls.so
index 5b689ba..31d2b37 100755
--- a/tests/prebuilt-elf-files/x86_64/libtest_invalid-local-tls.so
+++ b/tests/prebuilt-elf-files/x86_64/libtest_invalid-local-tls.so
Binary files differ
diff --git a/tests/prebuilt-elf-files/x86_64/libtest_invalid-unaligned_shdr_offset.so b/tests/prebuilt-elf-files/x86_64/libtest_invalid-unaligned_shdr_offset.so
index 87631af..f9c310f 100755
--- a/tests/prebuilt-elf-files/x86_64/libtest_invalid-unaligned_shdr_offset.so
+++ b/tests/prebuilt-elf-files/x86_64/libtest_invalid-unaligned_shdr_offset.so
Binary files differ
diff --git a/tests/string_test.cpp b/tests/string_test.cpp
index 6e1fcfc..289a483 100644
--- a/tests/string_test.cpp
+++ b/tests/string_test.cpp
@@ -748,7 +748,7 @@
       expected_end = copy_len;
     }
 
-    ASSERT_EQ(state.ptr2 + expected_end, stpncpy(state.ptr2, state.ptr1, copy_len));
+    ASSERT_EQ(state.ptr2, strncpy(state.ptr2, state.ptr1, copy_len));
 
     // Verify ptr1 was not modified.
     ASSERT_EQ(0, memcmp(state.ptr1, state.ptr, state.MAX_LEN));
diff --git a/tests/unistd_test.cpp b/tests/unistd_test.cpp
index 78b55c1..9ad3b6d 100644
--- a/tests/unistd_test.cpp
+++ b/tests/unistd_test.cpp
@@ -16,6 +16,7 @@
 
 #include <gtest/gtest.h>
 
+#include "DoNotOptimize.h"
 #include "SignalUtils.h"
 #include "utils.h"
 
diff --git a/tests/utils.h b/tests/utils.h
index 3c83b73..4740e59 100644
--- a/tests/utils.h
+++ b/tests/utils.h
@@ -295,16 +295,6 @@
   size_t start_count_ = CountOpenFds();
 };
 
-// From <benchmark/benchmark.h>.
-template <class Tp>
-static inline void DoNotOptimize(Tp const& value) {
-  asm volatile("" : : "r,m"(value) : "memory");
-}
-template <class Tp>
-static inline void DoNotOptimize(Tp& value) {
-  asm volatile("" : "+r,m"(value) : : "memory");
-}
-
 static inline bool running_with_mte() {
 #ifdef __aarch64__
   int level = prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0);