Merge "avx2 implementation for memset."
diff --git a/libc/Android.bp b/libc/Android.bp
index 3a8948b..53db888 100644
--- a/libc/Android.bp
+++ b/libc/Android.bp
@@ -376,7 +376,6 @@
                 "upstream-freebsd/lib/libc/string/wcscat.c",
                 "upstream-freebsd/lib/libc/string/wcscpy.c",
                 "upstream-freebsd/lib/libc/string/wmemcmp.c",
-                "upstream-freebsd/lib/libc/string/wmemset.c",
             ],
         },
     },
@@ -927,7 +926,6 @@
                 "arch-x86/generic/string/wcscat.c",
                 "arch-x86/generic/string/wcscpy.c",
                 "arch-x86/generic/string/wmemcmp.c",
-                "arch-x86/generic/string/wmemset.c",
 
                 "arch-x86/atom/string/sse2-memchr-atom.S",
                 "arch-x86/atom/string/sse2-memrchr-atom.S",
@@ -977,9 +975,6 @@
                 "arch-x86/atom/string/ssse3-strcpy-atom.S",
                 "arch-x86/atom/string/ssse3-strncpy-atom.S",
                 "arch-x86/atom/string/ssse3-wmemcmp-atom.S",
-
-                // avx2 functions
-                "arch-x86/kabylake/string/avx2-wmemset-kbl.S",
             ],
 
             exclude_srcs: [
@@ -990,6 +985,7 @@
         },
         x86_64: {
             srcs: [
+                "arch-x86_64/string/avx2-memset-kbl.S",
                 "arch-x86_64/string/sse2-memmove-slm.S",
                 "arch-x86_64/string/sse2-memset-slm.S",
                 "arch-x86_64/string/sse2-stpcpy-slm.S",
@@ -1002,7 +998,6 @@
                 "arch-x86_64/string/sse4-memcmp-slm.S",
                 "arch-x86_64/string/ssse3-strcmp-slm.S",
                 "arch-x86_64/string/ssse3-strncmp-slm.S",
-                "arch-x86_64/string/avx2-wmemset-kbl.S",
 
                 "arch-x86_64/bionic/__bionic_clone.S",
                 "arch-x86_64/bionic/_exit_with_stack_teardown.S",
@@ -1515,6 +1510,9 @@
     name: "libc_static_dispatch",
 
     arch: {
+        x86_64: {
+            srcs: ["arch-x86_64/static_function_dispatch.S"],
+        },
         x86: {
             srcs: ["arch-x86/static_function_dispatch.S"],
         },
@@ -1540,6 +1538,9 @@
         "-fno-jump-tables",
     ],
     arch: {
+        x86_64: {
+            srcs: ["arch-x86_64/dynamic_function_dispatch.cpp"],
+        },
         x86: {
             srcs: ["arch-x86/dynamic_function_dispatch.cpp"],
         },
diff --git a/libc/NOTICE b/libc/NOTICE
index 9cbbde2..fa3dd2c 100644
--- a/libc/NOTICE
+++ b/libc/NOTICE
@@ -783,22 +783,6 @@
 -------------------------------------------------------------------
 
 Copyright (C) 2019 The Android Open Source Project
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-
--------------------------------------------------------------------
-
-Copyright (C) 2019 The Android Open Source Project
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
@@ -826,34 +810,6 @@
 
 -------------------------------------------------------------------
 
-Copyright (C) 2019 The Android Open Source Project
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in
-   the documentation and/or other materials provided with the
-   distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
-BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
-OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
-AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-SUCH DAMAGE.
-
--------------------------------------------------------------------
-
 Copyright (C) 2020 The Android Open Source Project
 All rights reserved.
 
diff --git a/libc/arch-x86/dynamic_function_dispatch.cpp b/libc/arch-x86/dynamic_function_dispatch.cpp
index e94fa1f..38d8a0a 100644
--- a/libc/arch-x86/dynamic_function_dispatch.cpp
+++ b/libc/arch-x86/dynamic_function_dispatch.cpp
@@ -95,13 +95,6 @@
     RETURN_FUNC(wmemcmp_func, wmemcmp_freebsd);
 }
 
-typedef int wmemset_func(const wchar_t* __lhs, const wchar_t* __rhs, size_t __n);
-DEFINE_IFUNC_FOR(wmemset) {
-    __builtin_cpu_init();
-    if (__builtin_cpu_supports("avx2")) RETURN_FUNC(wmemset_func, wmemset_avx2);
-    RETURN_FUNC(wmemset_func, wmemset_freebsd);
-}
-
 typedef int strcmp_func(const char* __lhs, const char* __rhs);
 DEFINE_IFUNC_FOR(strcmp) {
     __builtin_cpu_init();
diff --git a/libc/arch-x86/generic/string/wmemset.c b/libc/arch-x86/generic/string/wmemset.c
deleted file mode 100644
index 35d489f..0000000
--- a/libc/arch-x86/generic/string/wmemset.c
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * Copyright (C) 2019 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
-
-#define wmemset wmemset_freebsd
-
-#include <upstream-freebsd/lib/libc/string/wmemset.c>
diff --git a/libc/arch-x86/kabylake/string/avx2-wmemset-kbl.S b/libc/arch-x86/kabylake/string/avx2-wmemset-kbl.S
deleted file mode 100644
index 69b66c7..0000000
--- a/libc/arch-x86/kabylake/string/avx2-wmemset-kbl.S
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
-Copyright (C) 2019 The Android Open Source Project
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
- * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in
-   the documentation and/or other materials provided with the
-   distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
-BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
-OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
-AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-SUCH DAMAGE.
-*/
-
-#include <private/bionic_asm.h>
-
-#ifndef WMEMSET
- #define WMEMSET wmemset_avx2
-#endif
-
-ENTRY(WMEMSET)
-# BB#0:
-	pushl	%ebp
-	pushl	%ebx
-	pushl	%edi
-	pushl	%esi
-	pushl	%eax
-	movl	32(%esp), %ecx
-	movl	24(%esp), %eax
-	testl	%ecx, %ecx
-	je	.LBB0_12
-# BB#1:
-	movl	28(%esp), %edx
-	xorl	%edi, %edi
-	movl	%eax, %esi
-	cmpl	$32, %ecx
-	jb	.LBB0_10
-# BB#2:
-	movl	%ecx, %eax
-	andl	$-32, %eax
-	vmovd	%edx, %xmm0
-	vpbroadcastd	%xmm0, %ymm0
-	movl	%eax, (%esp)            # 4-byte Spill
-	leal	-32(%eax), %esi
-	movl	%esi, %eax
-	shrl	$5, %eax
-	leal	1(%eax), %edi
-	andl	$7, %edi
-	xorl	%ebx, %ebx
-	cmpl	$224, %esi
-	jb	.LBB0_5
-# BB#3:
-	movl	24(%esp), %esi
-	leal	992(%esi), %ebp
-	leal	-1(%edi), %esi
-	subl	%eax, %esi
-	xorl	%ebx, %ebx
-	.p2align	4, 0x90
-.LBB0_4:                                # =>This Inner Loop Header: Depth=1
-	vmovdqu	%ymm0, -992(%ebp,%ebx,4)
-	vmovdqu	%ymm0, -960(%ebp,%ebx,4)
-	vmovdqu	%ymm0, -928(%ebp,%ebx,4)
-	vmovdqu	%ymm0, -896(%ebp,%ebx,4)
-	vmovdqu	%ymm0, -864(%ebp,%ebx,4)
-	vmovdqu	%ymm0, -832(%ebp,%ebx,4)
-	vmovdqu	%ymm0, -800(%ebp,%ebx,4)
-	vmovdqu	%ymm0, -768(%ebp,%ebx,4)
-	vmovdqu	%ymm0, -736(%ebp,%ebx,4)
-	vmovdqu	%ymm0, -704(%ebp,%ebx,4)
-	vmovdqu	%ymm0, -672(%ebp,%ebx,4)
-	vmovdqu	%ymm0, -640(%ebp,%ebx,4)
-	vmovdqu	%ymm0, -608(%ebp,%ebx,4)
-	vmovdqu	%ymm0, -576(%ebp,%ebx,4)
-	vmovdqu	%ymm0, -544(%ebp,%ebx,4)
-	vmovdqu	%ymm0, -512(%ebp,%ebx,4)
-	vmovdqu	%ymm0, -480(%ebp,%ebx,4)
-	vmovdqu	%ymm0, -448(%ebp,%ebx,4)
-	vmovdqu	%ymm0, -416(%ebp,%ebx,4)
-	vmovdqu	%ymm0, -384(%ebp,%ebx,4)
-	vmovdqu	%ymm0, -352(%ebp,%ebx,4)
-	vmovdqu	%ymm0, -320(%ebp,%ebx,4)
-	vmovdqu	%ymm0, -288(%ebp,%ebx,4)
-	vmovdqu	%ymm0, -256(%ebp,%ebx,4)
-	vmovdqu	%ymm0, -224(%ebp,%ebx,4)
-	vmovdqu	%ymm0, -192(%ebp,%ebx,4)
-	vmovdqu	%ymm0, -160(%ebp,%ebx,4)
-	vmovdqu	%ymm0, -128(%ebp,%ebx,4)
-	vmovdqu	%ymm0, -96(%ebp,%ebx,4)
-	vmovdqu	%ymm0, -64(%ebp,%ebx,4)
-	vmovdqu	%ymm0, -32(%ebp,%ebx,4)
-	vmovdqu	%ymm0, (%ebp,%ebx,4)
-	addl	$256, %ebx              # imm = 0x100
-	addl	$8, %esi
-	jne	.LBB0_4
-.LBB0_5:
-	testl	%edi, %edi
-	movl	24(%esp), %eax
-	je	.LBB0_8
-# BB#6:
-	leal	(%eax,%ebx,4), %esi
-	addl	$96, %esi
-	negl	%edi
-	.p2align	4, 0x90
-.LBB0_7:                                # =>This Inner Loop Header: Depth=1
-	vmovdqu	%ymm0, -96(%esi)
-	vmovdqu	%ymm0, -64(%esi)
-	vmovdqu	%ymm0, -32(%esi)
-	vmovdqu	%ymm0, (%esi)
-	subl	$-128, %esi
-	addl	$1, %edi
-	jne	.LBB0_7
-.LBB0_8:
-	movl	(%esp), %edi            # 4-byte Reload
-	cmpl	%ecx, %edi
-	je	.LBB0_12
-# BB#9:
-	leal	(%eax,%edi,4), %esi
-.LBB0_10:
-	subl	%edi, %ecx
-	.p2align	4, 0x90
-.LBB0_11:                               # =>This Inner Loop Header: Depth=1
-	movl	%edx, (%esi)
-	addl	$4, %esi
-	addl	$-1, %ecx
-	jne	.LBB0_11
-.LBB0_12:
-	addl	$4, %esp
-	popl	%esi
-	popl	%edi
-	popl	%ebx
-	popl	%ebp
-	vzeroupper
-	retl
-END(WMEMSET)
diff --git a/libc/arch-x86/static_function_dispatch.S b/libc/arch-x86/static_function_dispatch.S
index 1560c04..7e8e63d 100644
--- a/libc/arch-x86/static_function_dispatch.S
+++ b/libc/arch-x86/static_function_dispatch.S
@@ -45,7 +45,6 @@
 FUNCTION_DELEGATE(strncmp, strncmp_generic)
 FUNCTION_DELEGATE(strcat, strcat_generic)
 FUNCTION_DELEGATE(wmemcmp, wmemcmp_freebsd)
-FUNCTION_DELEGATE(wmemset, wmemset_freebsd)
 FUNCTION_DELEGATE(wcscat, wcscat_freebsd)
 FUNCTION_DELEGATE(strncat, strncat_openbsd)
 FUNCTION_DELEGATE(strlcat, strlcat_openbsd)
diff --git a/libc/arch-x86_64/dynamic_function_dispatch.cpp b/libc/arch-x86_64/dynamic_function_dispatch.cpp
new file mode 100644
index 0000000..c846ded
--- /dev/null
+++ b/libc/arch-x86_64/dynamic_function_dispatch.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2022 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <stddef.h>
+
+#include <private/bionic_ifuncs.h>
+
+extern "C" {
+
+typedef int memset_func(void* __dst, int __ch, size_t __n);
+DEFINE_IFUNC_FOR(memset) {
+  __builtin_cpu_init();
+  if (__builtin_cpu_supports("avx2")) RETURN_FUNC(memset_func, memset_avx2);
+  RETURN_FUNC(memset_func, memset_generic);
+}
+
+typedef void* __memset_chk_func(void* s, int c, size_t n, size_t n2);
+DEFINE_IFUNC_FOR(__memset_chk) {
+  __builtin_cpu_init();
+  if (__builtin_cpu_supports("avx2")) RETURN_FUNC(__memset_chk_func, __memset_chk_avx2);
+  RETURN_FUNC(__memset_chk_func, __memset_chk_generic);
+}
+
+}  // extern "C"
diff --git a/libc/arch-x86_64/static_function_dispatch.S b/libc/arch-x86_64/static_function_dispatch.S
new file mode 100644
index 0000000..93ff5f2
--- /dev/null
+++ b/libc/arch-x86_64/static_function_dispatch.S
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2022 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <private/bionic_asm.h>
+
+#define FUNCTION_DELEGATE(name, impl) \
+ENTRY(name); \
+    jmp impl; \
+END(name)
+
+FUNCTION_DELEGATE(memset, memset_generic)
+FUNCTION_DELEGATE(__memset_chk, __memset_chk_generic)
diff --git a/libc/arch-x86_64/string/avx2-memset-kbl.S b/libc/arch-x86_64/string/avx2-memset-kbl.S
new file mode 100644
index 0000000..09dd07d
--- /dev/null
+++ b/libc/arch-x86_64/string/avx2-memset-kbl.S
@@ -0,0 +1,160 @@
+/*
+Copyright (c) 2014, Intel Corporation
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+    * this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright notice,
+    * this list of conditions and the following disclaimer in the documentation
+    * and/or other materials provided with the distribution.
+
+    * Neither the name of Intel Corporation nor the names of its contributors
+    * may be used to endorse or promote products derived from this software
+    * without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <private/bionic_asm.h>
+
+#include "cache.h"
+
+#ifndef L
+# define L(label)	.L##label
+#endif
+
+#ifndef ALIGN
+# define ALIGN(n)	.p2align n
+#endif
+
+	.section .text.avx2,"ax",@progbits
+
+ENTRY(__memset_chk_avx2)
+	# %rdi = dst, %rsi = byte, %rdx = n, %rcx = dst_len
+	cmp %rcx, %rdx
+	ja __memset_chk_fail
+	// Fall through to memset...
+END(__memset_chk_avx2)
+
+ENTRY(memset_avx2)
+	movq	%rdi, %rax
+	and	$0xff, %rsi
+	mov	$0x0101010101010101, %rcx
+	imul	%rsi, %rcx
+	cmpq	$16, %rdx
+	jae	L(16bytesormore)
+	testb	$8, %dl
+	jnz	L(8_15bytes)
+	testb	$4, %dl
+	jnz	L(4_7bytes)
+	testb	$2, %dl
+	jnz	L(2_3bytes)
+	testb	$1, %dl
+	jz	L(return)
+	movb	%cl, (%rdi)
+L(return):
+	ret
+
+L(8_15bytes):
+	movq	%rcx, (%rdi)
+	movq	%rcx, -8(%rdi, %rdx)
+	ret
+
+L(4_7bytes):
+	movl	%ecx, (%rdi)
+	movl	%ecx, -4(%rdi, %rdx)
+	ret
+
+L(2_3bytes):
+	movw	%cx, (%rdi)
+	movw	%cx, -2(%rdi, %rdx)
+	ret
+
+	ALIGN (4)
+L(16bytesormore):
+	movd	%rcx, %xmm0
+	pshufd	$0, %xmm0, %xmm0
+	movdqu	%xmm0, (%rdi)
+	movdqu	%xmm0, -16(%rdi, %rdx)
+	cmpq	$32, %rdx
+	jbe	L(32bytesless)
+	movdqu	%xmm0, 16(%rdi)
+	movdqu	%xmm0, -32(%rdi, %rdx)
+	cmpq	$64, %rdx
+	jbe	L(64bytesless)
+	movdqu	%xmm0, 32(%rdi)
+	movdqu	%xmm0, 48(%rdi)
+	movdqu	%xmm0, -64(%rdi, %rdx)
+	movdqu	%xmm0, -48(%rdi, %rdx)
+	cmpq	$128, %rdx
+	jbe	L(128bytesless)
+        vpbroadcastb %xmm0, %ymm0
+	vmovdqu	%ymm0, 64(%rdi)
+	vmovdqu	%ymm0, 96(%rdi)
+	vmovdqu	%ymm0, -128(%rdi, %rdx)
+	vmovdqu	%ymm0, -96(%rdi, %rdx)
+	cmpq	$256, %rdx
+        ja      L(256bytesmore)
+L(32bytesless):
+L(64bytesless):
+L(128bytesless):
+	ret
+
+	ALIGN (4)
+L(256bytesmore):
+	leaq	128(%rdi), %rcx
+	andq	$-128, %rcx
+	movq	%rdx, %r8
+	addq	%rdi, %rdx
+	andq	$-128, %rdx
+	cmpq	%rcx, %rdx
+	je	L(return)
+
+#ifdef SHARED_CACHE_SIZE
+	cmp	$SHARED_CACHE_SIZE, %r8
+#else
+	cmp	__x86_64_shared_cache_size(%rip), %r8
+#endif
+	ja	L(256bytesmore_nt)
+
+	ALIGN (4)
+L(256bytesmore_normal):
+	vmovdqa	%ymm0, (%rcx)
+	vmovdqa	%ymm0, 32(%rcx)
+	vmovdqa	%ymm0, 64(%rcx)
+	vmovdqa	%ymm0, 96(%rcx)
+	addq	$128, %rcx
+	cmpq	%rcx, %rdx
+	jne	L(256bytesmore_normal)
+	ret
+
+	ALIGN (4)
+L(256bytesmore_nt):
+	movntdq	 %xmm0, (%rcx)
+	movntdq	 %xmm0, 16(%rcx)
+	movntdq	 %xmm0, 32(%rcx)
+	movntdq	 %xmm0, 48(%rcx)
+	movntdq	 %xmm0, 64(%rcx)
+	movntdq	 %xmm0, 80(%rcx)
+	movntdq	 %xmm0, 96(%rcx)
+	movntdq	 %xmm0, 112(%rcx)
+	leaq	128(%rcx), %rcx
+	cmpq	%rcx, %rdx
+	jne	L(256bytesmore_nt)
+	sfence
+	ret
+
+END(memset_avx2)
diff --git a/libc/arch-x86_64/string/avx2-wmemset-kbl.S b/libc/arch-x86_64/string/avx2-wmemset-kbl.S
deleted file mode 100644
index 7c485cf..0000000
--- a/libc/arch-x86_64/string/avx2-wmemset-kbl.S
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
-Copyright (C) 2019 The Android Open Source Project
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
- * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
-   notice, this list of conditions and the following disclaimer in
-   the documentation and/or other materials provided with the
-   distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
-BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
-OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
-AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-SUCH DAMAGE.
-*/
-
-#include <private/bionic_asm.h>
-
-#ifndef WMEMSET
- #define WMEMSET wmemset_avx2
-#endif
-
-        .section .text.avx2,"ax",@progbits
-
-ENTRY (WMEMSET)
-# BB#0:
-	testq	%rdx, %rdx
-	je	.LBB0_14
-# BB#1:
-	cmpq	$32, %rdx
-	jae	.LBB0_3
-# BB#2:
-	xorl	%r8d, %r8d
-	movq	%rdi, %rax
-	jmp	.LBB0_12
-.LBB0_3:
-	movq	%rdx, %r8
-	andq	$-32, %r8
-	vmovd	%esi, %xmm0
-	vpbroadcastd	%xmm0, %ymm0
-	leaq	-32(%r8), %rcx
-	movq	%rcx, %rax
-	shrq	$5, %rax
-	leal	1(%rax), %r9d
-	andl	$7, %r9d
-	cmpq	$224, %rcx
-	jae	.LBB0_5
-# BB#4:
-	xorl	%eax, %eax
-	testq	%r9, %r9
-	jne	.LBB0_8
-	jmp	.LBB0_10
-.LBB0_5:
-	leaq	992(%rdi), %rcx
-	leaq	-1(%r9), %r10
-	subq	%rax, %r10
-	xorl	%eax, %eax
-	.p2align	4, 0x90
-.LBB0_6:                                # =>This Inner Loop Header: Depth=1
-	vmovdqu	%ymm0, -992(%rcx,%rax,4)
-	vmovdqu	%ymm0, -960(%rcx,%rax,4)
-	vmovdqu	%ymm0, -928(%rcx,%rax,4)
-	vmovdqu	%ymm0, -896(%rcx,%rax,4)
-	vmovdqu	%ymm0, -864(%rcx,%rax,4)
-	vmovdqu	%ymm0, -832(%rcx,%rax,4)
-	vmovdqu	%ymm0, -800(%rcx,%rax,4)
-	vmovdqu	%ymm0, -768(%rcx,%rax,4)
-	vmovdqu	%ymm0, -736(%rcx,%rax,4)
-	vmovdqu	%ymm0, -704(%rcx,%rax,4)
-	vmovdqu	%ymm0, -672(%rcx,%rax,4)
-	vmovdqu	%ymm0, -640(%rcx,%rax,4)
-	vmovdqu	%ymm0, -608(%rcx,%rax,4)
-	vmovdqu	%ymm0, -576(%rcx,%rax,4)
-	vmovdqu	%ymm0, -544(%rcx,%rax,4)
-	vmovdqu	%ymm0, -512(%rcx,%rax,4)
-	vmovdqu	%ymm0, -480(%rcx,%rax,4)
-	vmovdqu	%ymm0, -448(%rcx,%rax,4)
-	vmovdqu	%ymm0, -416(%rcx,%rax,4)
-	vmovdqu	%ymm0, -384(%rcx,%rax,4)
-	vmovdqu	%ymm0, -352(%rcx,%rax,4)
-	vmovdqu	%ymm0, -320(%rcx,%rax,4)
-	vmovdqu	%ymm0, -288(%rcx,%rax,4)
-	vmovdqu	%ymm0, -256(%rcx,%rax,4)
-	vmovdqu	%ymm0, -224(%rcx,%rax,4)
-	vmovdqu	%ymm0, -192(%rcx,%rax,4)
-	vmovdqu	%ymm0, -160(%rcx,%rax,4)
-	vmovdqu	%ymm0, -128(%rcx,%rax,4)
-	vmovdqu	%ymm0, -96(%rcx,%rax,4)
-	vmovdqu	%ymm0, -64(%rcx,%rax,4)
-	vmovdqu	%ymm0, -32(%rcx,%rax,4)
-	vmovdqu	%ymm0, (%rcx,%rax,4)
-	addq	$256, %rax              # imm = 0x100
-	addq	$8, %r10
-	jne	.LBB0_6
-# BB#7:
-	testq	%r9, %r9
-	je	.LBB0_10
-.LBB0_8:
-	leaq	(%rdi,%rax,4), %rax
-	addq	$96, %rax
-	negq	%r9
-	.p2align	4, 0x90
-.LBB0_9:                                # =>This Inner Loop Header: Depth=1
-	vmovdqu	%ymm0, -96(%rax)
-	vmovdqu	%ymm0, -64(%rax)
-	vmovdqu	%ymm0, -32(%rax)
-	vmovdqu	%ymm0, (%rax)
-	subq	$-128, %rax
-	addq	$1, %r9
-	jne	.LBB0_9
-.LBB0_10:
-	cmpq	%rdx, %r8
-	je	.LBB0_14
-# BB#11:
-	leaq	(%rdi,%r8,4), %rax
-.LBB0_12:
-	subq	%r8, %rdx
-	.p2align	4, 0x90
-.LBB0_13:                               # =>This Inner Loop Header: Depth=1
-	movl	%esi, (%rax)
-	addq	$4, %rax
-	addq	$-1, %rdx
-	jne	.LBB0_13
-.LBB0_14:
-	movq	%rdi, %rax
-	vzeroupper
-	retq
-END(WMEMSET)
diff --git a/libc/arch-x86_64/string/sse2-memset-slm.S b/libc/arch-x86_64/string/sse2-memset-slm.S
index fc502c0..cceadd2 100644
--- a/libc/arch-x86_64/string/sse2-memset-slm.S
+++ b/libc/arch-x86_64/string/sse2-memset-slm.S
@@ -41,16 +41,16 @@
 #endif
 
 
-ENTRY(__memset_chk)
+ENTRY(__memset_chk_generic)
   # %rdi = dst, %rsi = byte, %rdx = n, %rcx = dst_len
   cmp %rcx, %rdx
   ja __memset_chk_fail
   // Fall through to memset...
-END(__memset_chk)
+END(__memset_chk_generic)
 
 
 	.section .text.sse2,"ax",@progbits
-ENTRY(memset)
+ENTRY(memset_generic)
 	movq	%rdi, %rax
 	and	$0xff, %rsi
 	mov	$0x0101010101010101, %rcx
@@ -146,4 +146,4 @@
 	sfence
 	ret
 
-END(memset)
+END(memset_generic)