Merge "avx2 implementation for memset."
diff --git a/libc/Android.bp b/libc/Android.bp
index 3a8948b..53db888 100644
--- a/libc/Android.bp
+++ b/libc/Android.bp
@@ -376,7 +376,6 @@
"upstream-freebsd/lib/libc/string/wcscat.c",
"upstream-freebsd/lib/libc/string/wcscpy.c",
"upstream-freebsd/lib/libc/string/wmemcmp.c",
- "upstream-freebsd/lib/libc/string/wmemset.c",
],
},
},
@@ -927,7 +926,6 @@
"arch-x86/generic/string/wcscat.c",
"arch-x86/generic/string/wcscpy.c",
"arch-x86/generic/string/wmemcmp.c",
- "arch-x86/generic/string/wmemset.c",
"arch-x86/atom/string/sse2-memchr-atom.S",
"arch-x86/atom/string/sse2-memrchr-atom.S",
@@ -977,9 +975,6 @@
"arch-x86/atom/string/ssse3-strcpy-atom.S",
"arch-x86/atom/string/ssse3-strncpy-atom.S",
"arch-x86/atom/string/ssse3-wmemcmp-atom.S",
-
- // avx2 functions
- "arch-x86/kabylake/string/avx2-wmemset-kbl.S",
],
exclude_srcs: [
@@ -990,6 +985,7 @@
},
x86_64: {
srcs: [
+ "arch-x86_64/string/avx2-memset-kbl.S",
"arch-x86_64/string/sse2-memmove-slm.S",
"arch-x86_64/string/sse2-memset-slm.S",
"arch-x86_64/string/sse2-stpcpy-slm.S",
@@ -1002,7 +998,6 @@
"arch-x86_64/string/sse4-memcmp-slm.S",
"arch-x86_64/string/ssse3-strcmp-slm.S",
"arch-x86_64/string/ssse3-strncmp-slm.S",
- "arch-x86_64/string/avx2-wmemset-kbl.S",
"arch-x86_64/bionic/__bionic_clone.S",
"arch-x86_64/bionic/_exit_with_stack_teardown.S",
@@ -1515,6 +1510,9 @@
name: "libc_static_dispatch",
arch: {
+ x86_64: {
+ srcs: ["arch-x86_64/static_function_dispatch.S"],
+ },
x86: {
srcs: ["arch-x86/static_function_dispatch.S"],
},
@@ -1540,6 +1538,9 @@
"-fno-jump-tables",
],
arch: {
+ x86_64: {
+ srcs: ["arch-x86_64/dynamic_function_dispatch.cpp"],
+ },
x86: {
srcs: ["arch-x86/dynamic_function_dispatch.cpp"],
},
diff --git a/libc/NOTICE b/libc/NOTICE
index 9cbbde2..fa3dd2c 100644
--- a/libc/NOTICE
+++ b/libc/NOTICE
@@ -783,22 +783,6 @@
-------------------------------------------------------------------
Copyright (C) 2019 The Android Open Source Project
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-
--------------------------------------------------------------------
-
-Copyright (C) 2019 The Android Open Source Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -826,34 +810,6 @@
-------------------------------------------------------------------
-Copyright (C) 2019 The Android Open Source Project
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in
- the documentation and/or other materials provided with the
- distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
-BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
-OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
-AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-SUCH DAMAGE.
-
--------------------------------------------------------------------
-
Copyright (C) 2020 The Android Open Source Project
All rights reserved.
diff --git a/libc/arch-x86/dynamic_function_dispatch.cpp b/libc/arch-x86/dynamic_function_dispatch.cpp
index e94fa1f..38d8a0a 100644
--- a/libc/arch-x86/dynamic_function_dispatch.cpp
+++ b/libc/arch-x86/dynamic_function_dispatch.cpp
@@ -95,13 +95,6 @@
RETURN_FUNC(wmemcmp_func, wmemcmp_freebsd);
}
-typedef int wmemset_func(const wchar_t* __lhs, const wchar_t* __rhs, size_t __n);
-DEFINE_IFUNC_FOR(wmemset) {
- __builtin_cpu_init();
- if (__builtin_cpu_supports("avx2")) RETURN_FUNC(wmemset_func, wmemset_avx2);
- RETURN_FUNC(wmemset_func, wmemset_freebsd);
-}
-
typedef int strcmp_func(const char* __lhs, const char* __rhs);
DEFINE_IFUNC_FOR(strcmp) {
__builtin_cpu_init();
diff --git a/libc/arch-x86/generic/string/wmemset.c b/libc/arch-x86/generic/string/wmemset.c
deleted file mode 100644
index 35d489f..0000000
--- a/libc/arch-x86/generic/string/wmemset.c
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * Copyright (C) 2019 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
-
-#define wmemset wmemset_freebsd
-
-#include <upstream-freebsd/lib/libc/string/wmemset.c>
diff --git a/libc/arch-x86/kabylake/string/avx2-wmemset-kbl.S b/libc/arch-x86/kabylake/string/avx2-wmemset-kbl.S
deleted file mode 100644
index 69b66c7..0000000
--- a/libc/arch-x86/kabylake/string/avx2-wmemset-kbl.S
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
-Copyright (C) 2019 The Android Open Source Project
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in
- the documentation and/or other materials provided with the
- distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
-BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
-OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
-AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-SUCH DAMAGE.
-*/
-
-#include <private/bionic_asm.h>
-
-#ifndef WMEMSET
- #define WMEMSET wmemset_avx2
-#endif
-
-ENTRY(WMEMSET)
-# BB#0:
- pushl %ebp
- pushl %ebx
- pushl %edi
- pushl %esi
- pushl %eax
- movl 32(%esp), %ecx
- movl 24(%esp), %eax
- testl %ecx, %ecx
- je .LBB0_12
-# BB#1:
- movl 28(%esp), %edx
- xorl %edi, %edi
- movl %eax, %esi
- cmpl $32, %ecx
- jb .LBB0_10
-# BB#2:
- movl %ecx, %eax
- andl $-32, %eax
- vmovd %edx, %xmm0
- vpbroadcastd %xmm0, %ymm0
- movl %eax, (%esp) # 4-byte Spill
- leal -32(%eax), %esi
- movl %esi, %eax
- shrl $5, %eax
- leal 1(%eax), %edi
- andl $7, %edi
- xorl %ebx, %ebx
- cmpl $224, %esi
- jb .LBB0_5
-# BB#3:
- movl 24(%esp), %esi
- leal 992(%esi), %ebp
- leal -1(%edi), %esi
- subl %eax, %esi
- xorl %ebx, %ebx
- .p2align 4, 0x90
-.LBB0_4: # =>This Inner Loop Header: Depth=1
- vmovdqu %ymm0, -992(%ebp,%ebx,4)
- vmovdqu %ymm0, -960(%ebp,%ebx,4)
- vmovdqu %ymm0, -928(%ebp,%ebx,4)
- vmovdqu %ymm0, -896(%ebp,%ebx,4)
- vmovdqu %ymm0, -864(%ebp,%ebx,4)
- vmovdqu %ymm0, -832(%ebp,%ebx,4)
- vmovdqu %ymm0, -800(%ebp,%ebx,4)
- vmovdqu %ymm0, -768(%ebp,%ebx,4)
- vmovdqu %ymm0, -736(%ebp,%ebx,4)
- vmovdqu %ymm0, -704(%ebp,%ebx,4)
- vmovdqu %ymm0, -672(%ebp,%ebx,4)
- vmovdqu %ymm0, -640(%ebp,%ebx,4)
- vmovdqu %ymm0, -608(%ebp,%ebx,4)
- vmovdqu %ymm0, -576(%ebp,%ebx,4)
- vmovdqu %ymm0, -544(%ebp,%ebx,4)
- vmovdqu %ymm0, -512(%ebp,%ebx,4)
- vmovdqu %ymm0, -480(%ebp,%ebx,4)
- vmovdqu %ymm0, -448(%ebp,%ebx,4)
- vmovdqu %ymm0, -416(%ebp,%ebx,4)
- vmovdqu %ymm0, -384(%ebp,%ebx,4)
- vmovdqu %ymm0, -352(%ebp,%ebx,4)
- vmovdqu %ymm0, -320(%ebp,%ebx,4)
- vmovdqu %ymm0, -288(%ebp,%ebx,4)
- vmovdqu %ymm0, -256(%ebp,%ebx,4)
- vmovdqu %ymm0, -224(%ebp,%ebx,4)
- vmovdqu %ymm0, -192(%ebp,%ebx,4)
- vmovdqu %ymm0, -160(%ebp,%ebx,4)
- vmovdqu %ymm0, -128(%ebp,%ebx,4)
- vmovdqu %ymm0, -96(%ebp,%ebx,4)
- vmovdqu %ymm0, -64(%ebp,%ebx,4)
- vmovdqu %ymm0, -32(%ebp,%ebx,4)
- vmovdqu %ymm0, (%ebp,%ebx,4)
- addl $256, %ebx # imm = 0x100
- addl $8, %esi
- jne .LBB0_4
-.LBB0_5:
- testl %edi, %edi
- movl 24(%esp), %eax
- je .LBB0_8
-# BB#6:
- leal (%eax,%ebx,4), %esi
- addl $96, %esi
- negl %edi
- .p2align 4, 0x90
-.LBB0_7: # =>This Inner Loop Header: Depth=1
- vmovdqu %ymm0, -96(%esi)
- vmovdqu %ymm0, -64(%esi)
- vmovdqu %ymm0, -32(%esi)
- vmovdqu %ymm0, (%esi)
- subl $-128, %esi
- addl $1, %edi
- jne .LBB0_7
-.LBB0_8:
- movl (%esp), %edi # 4-byte Reload
- cmpl %ecx, %edi
- je .LBB0_12
-# BB#9:
- leal (%eax,%edi,4), %esi
-.LBB0_10:
- subl %edi, %ecx
- .p2align 4, 0x90
-.LBB0_11: # =>This Inner Loop Header: Depth=1
- movl %edx, (%esi)
- addl $4, %esi
- addl $-1, %ecx
- jne .LBB0_11
-.LBB0_12:
- addl $4, %esp
- popl %esi
- popl %edi
- popl %ebx
- popl %ebp
- vzeroupper
- retl
-END(WMEMSET)
diff --git a/libc/arch-x86/static_function_dispatch.S b/libc/arch-x86/static_function_dispatch.S
index 1560c04..7e8e63d 100644
--- a/libc/arch-x86/static_function_dispatch.S
+++ b/libc/arch-x86/static_function_dispatch.S
@@ -45,7 +45,6 @@
FUNCTION_DELEGATE(strncmp, strncmp_generic)
FUNCTION_DELEGATE(strcat, strcat_generic)
FUNCTION_DELEGATE(wmemcmp, wmemcmp_freebsd)
-FUNCTION_DELEGATE(wmemset, wmemset_freebsd)
FUNCTION_DELEGATE(wcscat, wcscat_freebsd)
FUNCTION_DELEGATE(strncat, strncat_openbsd)
FUNCTION_DELEGATE(strlcat, strlcat_openbsd)
diff --git a/libc/arch-x86_64/dynamic_function_dispatch.cpp b/libc/arch-x86_64/dynamic_function_dispatch.cpp
new file mode 100644
index 0000000..c846ded
--- /dev/null
+++ b/libc/arch-x86_64/dynamic_function_dispatch.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2022 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <stddef.h>
+
+#include <private/bionic_ifuncs.h>
+
+extern "C" {
+
+typedef int memset_func(void* __dst, int __ch, size_t __n);
+DEFINE_IFUNC_FOR(memset) {
+ __builtin_cpu_init();
+ if (__builtin_cpu_supports("avx2")) RETURN_FUNC(memset_func, memset_avx2);
+ RETURN_FUNC(memset_func, memset_generic);
+}
+
+typedef void* __memset_chk_func(void* s, int c, size_t n, size_t n2);
+DEFINE_IFUNC_FOR(__memset_chk) {
+ __builtin_cpu_init();
+ if (__builtin_cpu_supports("avx2")) RETURN_FUNC(__memset_chk_func, __memset_chk_avx2);
+ RETURN_FUNC(__memset_chk_func, __memset_chk_generic);
+}
+
+} // extern "C"
diff --git a/libc/arch-x86_64/static_function_dispatch.S b/libc/arch-x86_64/static_function_dispatch.S
new file mode 100644
index 0000000..93ff5f2
--- /dev/null
+++ b/libc/arch-x86_64/static_function_dispatch.S
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2022 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <private/bionic_asm.h>
+
+#define FUNCTION_DELEGATE(name, impl) \
+ENTRY(name); \
+ jmp impl; \
+END(name)
+
+FUNCTION_DELEGATE(memset, memset_generic)
+FUNCTION_DELEGATE(__memset_chk, __memset_chk_generic)
diff --git a/libc/arch-x86_64/string/avx2-memset-kbl.S b/libc/arch-x86_64/string/avx2-memset-kbl.S
new file mode 100644
index 0000000..09dd07d
--- /dev/null
+++ b/libc/arch-x86_64/string/avx2-memset-kbl.S
@@ -0,0 +1,160 @@
+/*
+Copyright (c) 2014, Intel Corporation
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+
+ * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <private/bionic_asm.h>
+
+#include "cache.h"
+
+#ifndef L
+# define L(label) .L##label
+#endif
+
+#ifndef ALIGN
+# define ALIGN(n) .p2align n
+#endif
+
+ .section .text.avx2,"ax",@progbits
+
+ENTRY(__memset_chk_avx2)
+ # %rdi = dst, %rsi = byte, %rdx = n, %rcx = dst_len
+ cmp %rcx, %rdx
+ ja __memset_chk_fail
+ // Fall through to memset...
+END(__memset_chk_avx2)
+
+ENTRY(memset_avx2)
+ movq %rdi, %rax
+ and $0xff, %rsi
+ mov $0x0101010101010101, %rcx
+ imul %rsi, %rcx
+ cmpq $16, %rdx
+ jae L(16bytesormore)
+ testb $8, %dl
+ jnz L(8_15bytes)
+ testb $4, %dl
+ jnz L(4_7bytes)
+ testb $2, %dl
+ jnz L(2_3bytes)
+ testb $1, %dl
+ jz L(return)
+ movb %cl, (%rdi)
+L(return):
+ ret
+
+L(8_15bytes):
+ movq %rcx, (%rdi)
+ movq %rcx, -8(%rdi, %rdx)
+ ret
+
+L(4_7bytes):
+ movl %ecx, (%rdi)
+ movl %ecx, -4(%rdi, %rdx)
+ ret
+
+L(2_3bytes):
+ movw %cx, (%rdi)
+ movw %cx, -2(%rdi, %rdx)
+ ret
+
+ ALIGN (4)
+L(16bytesormore):
+ movd %rcx, %xmm0
+ pshufd $0, %xmm0, %xmm0
+ movdqu %xmm0, (%rdi)
+ movdqu %xmm0, -16(%rdi, %rdx)
+ cmpq $32, %rdx
+ jbe L(32bytesless)
+ movdqu %xmm0, 16(%rdi)
+ movdqu %xmm0, -32(%rdi, %rdx)
+ cmpq $64, %rdx
+ jbe L(64bytesless)
+ movdqu %xmm0, 32(%rdi)
+ movdqu %xmm0, 48(%rdi)
+ movdqu %xmm0, -64(%rdi, %rdx)
+ movdqu %xmm0, -48(%rdi, %rdx)
+ cmpq $128, %rdx
+ jbe L(128bytesless)
+ vpbroadcastb %xmm0, %ymm0
+ vmovdqu %ymm0, 64(%rdi)
+ vmovdqu %ymm0, 96(%rdi)
+ vmovdqu %ymm0, -128(%rdi, %rdx)
+ vmovdqu %ymm0, -96(%rdi, %rdx)
+ cmpq $256, %rdx
+ ja L(256bytesmore)
+L(32bytesless):
+L(64bytesless):
+L(128bytesless):
+ ret
+
+ ALIGN (4)
+L(256bytesmore):
+ leaq 128(%rdi), %rcx
+ andq $-128, %rcx
+ movq %rdx, %r8
+ addq %rdi, %rdx
+ andq $-128, %rdx
+ cmpq %rcx, %rdx
+ je L(return)
+
+#ifdef SHARED_CACHE_SIZE
+ cmp $SHARED_CACHE_SIZE, %r8
+#else
+ cmp __x86_64_shared_cache_size(%rip), %r8
+#endif
+ ja L(256bytesmore_nt)
+
+ ALIGN (4)
+L(256bytesmore_normal):
+ vmovdqa %ymm0, (%rcx)
+ vmovdqa %ymm0, 32(%rcx)
+ vmovdqa %ymm0, 64(%rcx)
+ vmovdqa %ymm0, 96(%rcx)
+ addq $128, %rcx
+ cmpq %rcx, %rdx
+ jne L(256bytesmore_normal)
+ ret
+
+ ALIGN (4)
+L(256bytesmore_nt):
+ movntdq %xmm0, (%rcx)
+ movntdq %xmm0, 16(%rcx)
+ movntdq %xmm0, 32(%rcx)
+ movntdq %xmm0, 48(%rcx)
+ movntdq %xmm0, 64(%rcx)
+ movntdq %xmm0, 80(%rcx)
+ movntdq %xmm0, 96(%rcx)
+ movntdq %xmm0, 112(%rcx)
+ leaq 128(%rcx), %rcx
+ cmpq %rcx, %rdx
+ jne L(256bytesmore_nt)
+ sfence
+ ret
+
+END(memset_avx2)
diff --git a/libc/arch-x86_64/string/avx2-wmemset-kbl.S b/libc/arch-x86_64/string/avx2-wmemset-kbl.S
deleted file mode 100644
index 7c485cf..0000000
--- a/libc/arch-x86_64/string/avx2-wmemset-kbl.S
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
-Copyright (C) 2019 The Android Open Source Project
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in
- the documentation and/or other materials provided with the
- distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
-BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
-OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
-AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
-OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-SUCH DAMAGE.
-*/
-
-#include <private/bionic_asm.h>
-
-#ifndef WMEMSET
- #define WMEMSET wmemset_avx2
-#endif
-
- .section .text.avx2,"ax",@progbits
-
-ENTRY (WMEMSET)
-# BB#0:
- testq %rdx, %rdx
- je .LBB0_14
-# BB#1:
- cmpq $32, %rdx
- jae .LBB0_3
-# BB#2:
- xorl %r8d, %r8d
- movq %rdi, %rax
- jmp .LBB0_12
-.LBB0_3:
- movq %rdx, %r8
- andq $-32, %r8
- vmovd %esi, %xmm0
- vpbroadcastd %xmm0, %ymm0
- leaq -32(%r8), %rcx
- movq %rcx, %rax
- shrq $5, %rax
- leal 1(%rax), %r9d
- andl $7, %r9d
- cmpq $224, %rcx
- jae .LBB0_5
-# BB#4:
- xorl %eax, %eax
- testq %r9, %r9
- jne .LBB0_8
- jmp .LBB0_10
-.LBB0_5:
- leaq 992(%rdi), %rcx
- leaq -1(%r9), %r10
- subq %rax, %r10
- xorl %eax, %eax
- .p2align 4, 0x90
-.LBB0_6: # =>This Inner Loop Header: Depth=1
- vmovdqu %ymm0, -992(%rcx,%rax,4)
- vmovdqu %ymm0, -960(%rcx,%rax,4)
- vmovdqu %ymm0, -928(%rcx,%rax,4)
- vmovdqu %ymm0, -896(%rcx,%rax,4)
- vmovdqu %ymm0, -864(%rcx,%rax,4)
- vmovdqu %ymm0, -832(%rcx,%rax,4)
- vmovdqu %ymm0, -800(%rcx,%rax,4)
- vmovdqu %ymm0, -768(%rcx,%rax,4)
- vmovdqu %ymm0, -736(%rcx,%rax,4)
- vmovdqu %ymm0, -704(%rcx,%rax,4)
- vmovdqu %ymm0, -672(%rcx,%rax,4)
- vmovdqu %ymm0, -640(%rcx,%rax,4)
- vmovdqu %ymm0, -608(%rcx,%rax,4)
- vmovdqu %ymm0, -576(%rcx,%rax,4)
- vmovdqu %ymm0, -544(%rcx,%rax,4)
- vmovdqu %ymm0, -512(%rcx,%rax,4)
- vmovdqu %ymm0, -480(%rcx,%rax,4)
- vmovdqu %ymm0, -448(%rcx,%rax,4)
- vmovdqu %ymm0, -416(%rcx,%rax,4)
- vmovdqu %ymm0, -384(%rcx,%rax,4)
- vmovdqu %ymm0, -352(%rcx,%rax,4)
- vmovdqu %ymm0, -320(%rcx,%rax,4)
- vmovdqu %ymm0, -288(%rcx,%rax,4)
- vmovdqu %ymm0, -256(%rcx,%rax,4)
- vmovdqu %ymm0, -224(%rcx,%rax,4)
- vmovdqu %ymm0, -192(%rcx,%rax,4)
- vmovdqu %ymm0, -160(%rcx,%rax,4)
- vmovdqu %ymm0, -128(%rcx,%rax,4)
- vmovdqu %ymm0, -96(%rcx,%rax,4)
- vmovdqu %ymm0, -64(%rcx,%rax,4)
- vmovdqu %ymm0, -32(%rcx,%rax,4)
- vmovdqu %ymm0, (%rcx,%rax,4)
- addq $256, %rax # imm = 0x100
- addq $8, %r10
- jne .LBB0_6
-# BB#7:
- testq %r9, %r9
- je .LBB0_10
-.LBB0_8:
- leaq (%rdi,%rax,4), %rax
- addq $96, %rax
- negq %r9
- .p2align 4, 0x90
-.LBB0_9: # =>This Inner Loop Header: Depth=1
- vmovdqu %ymm0, -96(%rax)
- vmovdqu %ymm0, -64(%rax)
- vmovdqu %ymm0, -32(%rax)
- vmovdqu %ymm0, (%rax)
- subq $-128, %rax
- addq $1, %r9
- jne .LBB0_9
-.LBB0_10:
- cmpq %rdx, %r8
- je .LBB0_14
-# BB#11:
- leaq (%rdi,%r8,4), %rax
-.LBB0_12:
- subq %r8, %rdx
- .p2align 4, 0x90
-.LBB0_13: # =>This Inner Loop Header: Depth=1
- movl %esi, (%rax)
- addq $4, %rax
- addq $-1, %rdx
- jne .LBB0_13
-.LBB0_14:
- movq %rdi, %rax
- vzeroupper
- retq
-END(WMEMSET)
diff --git a/libc/arch-x86_64/string/sse2-memset-slm.S b/libc/arch-x86_64/string/sse2-memset-slm.S
index fc502c0..cceadd2 100644
--- a/libc/arch-x86_64/string/sse2-memset-slm.S
+++ b/libc/arch-x86_64/string/sse2-memset-slm.S
@@ -41,16 +41,16 @@
#endif
-ENTRY(__memset_chk)
+ENTRY(__memset_chk_generic)
# %rdi = dst, %rsi = byte, %rdx = n, %rcx = dst_len
cmp %rcx, %rdx
ja __memset_chk_fail
// Fall through to memset...
-END(__memset_chk)
+END(__memset_chk_generic)
.section .text.sse2,"ax",@progbits
-ENTRY(memset)
+ENTRY(memset_generic)
movq %rdi, %rax
and $0xff, %rsi
mov $0x0101010101010101, %rcx
@@ -146,4 +146,4 @@
sfence
ret
-END(memset)
+END(memset_generic)