Merge "Revert "Update to v6.11 kernel headers."" into main
diff --git a/libc/Android.bp b/libc/Android.bp
index a4f2c69..eeea728 100644
--- a/libc/Android.bp
+++ b/libc/Android.bp
@@ -1181,12 +1181,9 @@
"arch-x86/bionic/vfork.S",
"arch-x86/bionic/__x86.get_pc_thunk.S",
- "arch-x86/generic/string/memcmp.S",
-
"arch-x86/string/sse2-memchr-atom.S",
"arch-x86/string/sse2-memmove-slm.S",
"arch-x86/string/sse2-memrchr-atom.S",
- "arch-x86/string/sse2-memset-atom.S",
"arch-x86/string/sse2-memset-slm.S",
"arch-x86/string/sse2-stpcpy-slm.S",
"arch-x86/string/sse2-stpncpy-slm.S",
@@ -1200,18 +1197,14 @@
"arch-x86/string/sse2-wcsrchr-atom.S",
"arch-x86/string/sse2-wcslen-atom.S",
"arch-x86/string/sse2-wcscmp-atom.S",
- "arch-x86/string/sse2-strlen-atom.S",
"arch-x86/string/ssse3-memcmp-atom.S",
- "arch-x86/string/ssse3-memmove-atom.S",
"arch-x86/string/ssse3-strcat-atom.S",
"arch-x86/string/ssse3-strcmp-atom.S",
- "arch-x86/string/ssse3-strcpy-atom.S",
"arch-x86/string/ssse3-strlcat-atom.S",
"arch-x86/string/ssse3-strlcpy-atom.S",
"arch-x86/string/ssse3-strncat-atom.S",
"arch-x86/string/ssse3-strncmp-atom.S",
- "arch-x86/string/ssse3-strncpy-atom.S",
"arch-x86/string/ssse3-wcscat-atom.S",
"arch-x86/string/ssse3-wcscpy-atom.S",
"arch-x86/string/ssse3-wmemcmp-atom.S",
diff --git a/libc/arch-x86/dynamic_function_dispatch.cpp b/libc/arch-x86/dynamic_function_dispatch.cpp
index e6cc5fb..98d7ec2 100644
--- a/libc/arch-x86/dynamic_function_dispatch.cpp
+++ b/libc/arch-x86/dynamic_function_dispatch.cpp
@@ -33,57 +33,11 @@
DEFINE_IFUNC_FOR(memcmp) {
__builtin_cpu_init();
- if (__builtin_cpu_is("atom")) RETURN_FUNC(memcmp_func_t, memcmp_atom);
if (__builtin_cpu_supports("sse4.1")) RETURN_FUNC(memcmp_func_t, memcmp_sse4);
- RETURN_FUNC(memcmp_func_t, memcmp_generic);
+ RETURN_FUNC(memcmp_func_t, memcmp_atom);
}
MEMCMP_SHIM()
-DEFINE_IFUNC_FOR(memset) {
- __builtin_cpu_init();
- if (__builtin_cpu_is("atom")) RETURN_FUNC(memset_func_t, memset_atom);
- RETURN_FUNC(memset_func_t, memset_generic);
-}
-MEMSET_SHIM()
-
-DEFINE_IFUNC_FOR(__memset_chk) {
- __builtin_cpu_init();
- if (__builtin_cpu_is("atom")) RETURN_FUNC(__memset_chk_func_t, __memset_chk_atom);
- RETURN_FUNC(__memset_chk_func_t, __memset_chk_generic);
-}
-__MEMSET_CHK_SHIM()
-
-DEFINE_IFUNC_FOR(memmove) {
- __builtin_cpu_init();
- if (__builtin_cpu_is("atom")) RETURN_FUNC(memmove_func_t, memmove_atom);
- RETURN_FUNC(memmove_func_t, memmove_generic);
-}
-MEMMOVE_SHIM()
-
-DEFINE_IFUNC_FOR(memcpy) { return memmove_resolver(); }
-MEMCPY_SHIM()
-
-DEFINE_IFUNC_FOR(strcpy) {
- __builtin_cpu_init();
- if (__builtin_cpu_is("atom")) RETURN_FUNC(strcpy_func_t, strcpy_atom);
- RETURN_FUNC(strcpy_func_t, strcpy_generic);
-}
-STRCPY_SHIM()
-
-DEFINE_IFUNC_FOR(strncpy) {
- __builtin_cpu_init();
- if (__builtin_cpu_is("atom")) RETURN_FUNC(strncpy_func_t, strncpy_atom);
- RETURN_FUNC(strncpy_func_t, strncpy_generic);
-}
-STRNCPY_SHIM()
-
-DEFINE_IFUNC_FOR(strlen) {
- __builtin_cpu_init();
- if (__builtin_cpu_is("atom")) RETURN_FUNC(strlen_func_t, strlen_atom);
- RETURN_FUNC(strlen_func_t, strlen_generic);
-}
-STRLEN_SHIM()
-
typedef int wmemcmp_func_t(const wchar_t*, const wchar_t*, size_t);
DEFINE_IFUNC_FOR(wmemcmp) {
__builtin_cpu_init();
diff --git a/libc/arch-x86/generic/string/memcmp.S b/libc/arch-x86/generic/string/memcmp.S
deleted file mode 100644
index 1d327c7..0000000
--- a/libc/arch-x86/generic/string/memcmp.S
+++ /dev/null
@@ -1,44 +0,0 @@
-/* $OpenBSD: memcmp.S,v 1.4 2005/08/07 11:30:38 espie Exp $ */
-/*
- * Written by J.T. Conklin <jtc@netbsd.org>.
- * Public domain.
- */
-
-#include <private/bionic_asm.h>
-
-ENTRY(memcmp_generic)
- pushl %edi
- pushl %esi
- movl 12(%esp),%edi
- movl 16(%esp),%esi
- cld /* set compare direction forward */
-
- movl 20(%esp),%ecx /* compare by words */
- shrl $2,%ecx
- repe
- cmpsl
- jne L5 /* do we match so far? */
-
- movl 20(%esp),%ecx /* compare remainder by bytes */
- andl $3,%ecx
- repe
- cmpsb
- jne L6 /* do we match? */
-
- xorl %eax,%eax /* we match, return zero */
- popl %esi
- popl %edi
- ret
-
-L5: movl $4,%ecx /* We know that one of the next */
- subl %ecx,%edi /* four pairs of bytes do not */
- subl %ecx,%esi /* match. */
- repe
- cmpsb
-L6: movzbl -1(%edi),%eax /* Perform unsigned comparison */
- movzbl -1(%esi),%edx
- subl %edx,%eax
- popl %esi
- popl %edi
- ret
-END(memcmp_generic)
diff --git a/libc/arch-x86/string/sse2-memmove-slm.S b/libc/arch-x86/string/sse2-memmove-slm.S
index 7f42374..2ed4e7b 100644
--- a/libc/arch-x86/string/sse2-memmove-slm.S
+++ b/libc/arch-x86/string/sse2-memmove-slm.S
@@ -31,7 +31,7 @@
#define FOR_SILVERMONT
#ifndef MEMMOVE
-# define MEMMOVE memmove_generic
+# define MEMMOVE memmove
#endif
#ifndef L
@@ -551,3 +551,9 @@
jmp L(mm_recalc_len)
END (MEMMOVE)
+
+// N.B., `private/bionic_asm.h` provides ALIAS_SYMBOL, but that file provides
+// conflicting definitions for some macros in this file. Since ALIAS_SYMBOL is
+// small, inline it here.
+.globl memcpy;
+.equ memcpy, MEMMOVE
diff --git a/libc/arch-x86/string/sse2-memset-atom.S b/libc/arch-x86/string/sse2-memset-atom.S
deleted file mode 100644
index e43ead0..0000000
--- a/libc/arch-x86/string/sse2-memset-atom.S
+++ /dev/null
@@ -1,841 +0,0 @@
-/*
-Copyright (c) 2010, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
-
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
-
- * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#include <private/bionic_asm.h>
-
-#define FOR_ATOM
-
-#ifndef L
-# define L(label) .L##label
-#endif
-
-#ifndef ALIGN
-# define ALIGN(n) .p2align n
-#endif
-
-#define CFI_PUSH(REG) \
- .cfi_adjust_cfa_offset 4; \
- .cfi_rel_offset REG, 0
-
-#define CFI_POP(REG) \
- .cfi_adjust_cfa_offset -4; \
- .cfi_restore REG
-
-#define PUSH(REG) pushl REG; CFI_PUSH(REG)
-#define POP(REG) popl REG; CFI_POP(REG)
-
-#define PARMS 8 /* Preserve EBX. */
-#define DST PARMS
-#define CHR (DST+4)
-#define LEN (CHR+4)
-#define CHK_DST_LEN (LEN+4)
-#define SETRTNVAL movl DST(%esp), %eax
-
-#define ENTRANCE PUSH(%ebx);
-#define RETURN_END POP(%ebx); ret
-#define RETURN RETURN_END; CFI_PUSH(%ebx)
-#define JMPTBL(I, B) I - B
-
-#define SETUP_PIC_REG(x) call __x86.get_pc_thunk.x
-
-/* Load an entry in a jump table into EBX and branch to it. TABLE is a
- jump table with relative offsets. */
-# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \
- /* We first load PC into EBX. */ \
- call __x86.get_pc_thunk.bx; \
- /* Get the address of the jump table. */ \
- add $(TABLE - .), %ebx; \
- /* Get the entry and convert the relative offset to the \
- absolute address. */ \
- add (%ebx,%ecx,4), %ebx; \
- add %ecx, %edx; \
- /* We loaded the jump table and adjusted EDX. Go. */ \
- jmp *%ebx
-
-ENTRY(__memset_chk_atom)
- ENTRANCE
-
- movl LEN(%esp), %ecx
- cmpl CHK_DST_LEN(%esp), %ecx
- jna L(memset_length_loaded)
-
- POP(%ebx) // Undo ENTRANCE without returning.
- jmp __memset_chk_fail
-END(__memset_chk_atom)
-
- .section .text.sse2,"ax",@progbits
- ALIGN(4)
-ENTRY(memset_atom)
- ENTRANCE
-
- movl LEN(%esp), %ecx
-L(memset_length_loaded):
- movzbl CHR(%esp), %eax
- movb %al, %ah
- /* Fill the whole EAX with pattern. */
- movl %eax, %edx
- shl $16, %eax
- or %edx, %eax
- movl DST(%esp), %edx
- cmp $32, %ecx
- jae L(32bytesormore)
-
-L(write_less32bytes):
- BRANCH_TO_JMPTBL_ENTRY(L(table_less_32bytes))
-
-
- .pushsection .rodata.sse2,"a",@progbits
- ALIGN(2)
-L(table_less_32bytes):
- .int JMPTBL(L(write_0bytes), L(table_less_32bytes))
- .int JMPTBL(L(write_1bytes), L(table_less_32bytes))
- .int JMPTBL(L(write_2bytes), L(table_less_32bytes))
- .int JMPTBL(L(write_3bytes), L(table_less_32bytes))
- .int JMPTBL(L(write_4bytes), L(table_less_32bytes))
- .int JMPTBL(L(write_5bytes), L(table_less_32bytes))
- .int JMPTBL(L(write_6bytes), L(table_less_32bytes))
- .int JMPTBL(L(write_7bytes), L(table_less_32bytes))
- .int JMPTBL(L(write_8bytes), L(table_less_32bytes))
- .int JMPTBL(L(write_9bytes), L(table_less_32bytes))
- .int JMPTBL(L(write_10bytes), L(table_less_32bytes))
- .int JMPTBL(L(write_11bytes), L(table_less_32bytes))
- .int JMPTBL(L(write_12bytes), L(table_less_32bytes))
- .int JMPTBL(L(write_13bytes), L(table_less_32bytes))
- .int JMPTBL(L(write_14bytes), L(table_less_32bytes))
- .int JMPTBL(L(write_15bytes), L(table_less_32bytes))
- .int JMPTBL(L(write_16bytes), L(table_less_32bytes))
- .int JMPTBL(L(write_17bytes), L(table_less_32bytes))
- .int JMPTBL(L(write_18bytes), L(table_less_32bytes))
- .int JMPTBL(L(write_19bytes), L(table_less_32bytes))
- .int JMPTBL(L(write_20bytes), L(table_less_32bytes))
- .int JMPTBL(L(write_21bytes), L(table_less_32bytes))
- .int JMPTBL(L(write_22bytes), L(table_less_32bytes))
- .int JMPTBL(L(write_23bytes), L(table_less_32bytes))
- .int JMPTBL(L(write_24bytes), L(table_less_32bytes))
- .int JMPTBL(L(write_25bytes), L(table_less_32bytes))
- .int JMPTBL(L(write_26bytes), L(table_less_32bytes))
- .int JMPTBL(L(write_27bytes), L(table_less_32bytes))
- .int JMPTBL(L(write_28bytes), L(table_less_32bytes))
- .int JMPTBL(L(write_29bytes), L(table_less_32bytes))
- .int JMPTBL(L(write_30bytes), L(table_less_32bytes))
- .int JMPTBL(L(write_31bytes), L(table_less_32bytes))
- .popsection
-
- ALIGN(4)
-L(write_28bytes):
- movl %eax, -28(%edx)
-L(write_24bytes):
- movl %eax, -24(%edx)
-L(write_20bytes):
- movl %eax, -20(%edx)
-L(write_16bytes):
- movl %eax, -16(%edx)
-L(write_12bytes):
- movl %eax, -12(%edx)
-L(write_8bytes):
- movl %eax, -8(%edx)
-L(write_4bytes):
- movl %eax, -4(%edx)
-L(write_0bytes):
- SETRTNVAL
- RETURN
-
- ALIGN(4)
-L(write_29bytes):
- movl %eax, -29(%edx)
-L(write_25bytes):
- movl %eax, -25(%edx)
-L(write_21bytes):
- movl %eax, -21(%edx)
-L(write_17bytes):
- movl %eax, -17(%edx)
-L(write_13bytes):
- movl %eax, -13(%edx)
-L(write_9bytes):
- movl %eax, -9(%edx)
-L(write_5bytes):
- movl %eax, -5(%edx)
-L(write_1bytes):
- movb %al, -1(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN(4)
-L(write_30bytes):
- movl %eax, -30(%edx)
-L(write_26bytes):
- movl %eax, -26(%edx)
-L(write_22bytes):
- movl %eax, -22(%edx)
-L(write_18bytes):
- movl %eax, -18(%edx)
-L(write_14bytes):
- movl %eax, -14(%edx)
-L(write_10bytes):
- movl %eax, -10(%edx)
-L(write_6bytes):
- movl %eax, -6(%edx)
-L(write_2bytes):
- movw %ax, -2(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN(4)
-L(write_31bytes):
- movl %eax, -31(%edx)
-L(write_27bytes):
- movl %eax, -27(%edx)
-L(write_23bytes):
- movl %eax, -23(%edx)
-L(write_19bytes):
- movl %eax, -19(%edx)
-L(write_15bytes):
- movl %eax, -15(%edx)
-L(write_11bytes):
- movl %eax, -11(%edx)
-L(write_7bytes):
- movl %eax, -7(%edx)
-L(write_3bytes):
- movw %ax, -3(%edx)
- movb %al, -1(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN(4)
-/* ECX > 32 and EDX is 4 byte aligned. */
-L(32bytesormore):
- /* Fill xmm0 with the pattern. */
- movd %eax, %xmm0
- pshufd $0, %xmm0, %xmm0
- testl $0xf, %edx
- jz L(aligned_16)
-/* ECX > 32 and EDX is not 16 byte aligned. */
-L(not_aligned_16):
- movdqu %xmm0, (%edx)
- movl %edx, %eax
- and $-16, %edx
- add $16, %edx
- sub %edx, %eax
- add %eax, %ecx
- movd %xmm0, %eax
-
- ALIGN(4)
-L(aligned_16):
- cmp $128, %ecx
- jae L(128bytesormore)
-
-L(aligned_16_less128bytes):
- BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
-
- ALIGN(4)
-L(128bytesormore):
- PUSH(%ebx)
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- mov __x86_shared_cache_size@GOTOFF(%ebx), %ebx
- cmp %ebx, %ecx
- jae L(128bytesormore_nt_start)
-
-
- POP(%ebx)
-# define RESTORE_EBX_STATE CFI_PUSH(%ebx)
- PUSH(%ebx)
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- cmp __x86_data_cache_size@GOTOFF(%ebx), %ecx
- POP(%ebx)
-
- jae L(128bytes_L2_normal)
- subl $128, %ecx
-L(128bytesormore_normal):
- sub $128, %ecx
- movdqa %xmm0, (%edx)
- movdqa %xmm0, 0x10(%edx)
- movdqa %xmm0, 0x20(%edx)
- movdqa %xmm0, 0x30(%edx)
- movdqa %xmm0, 0x40(%edx)
- movdqa %xmm0, 0x50(%edx)
- movdqa %xmm0, 0x60(%edx)
- movdqa %xmm0, 0x70(%edx)
- lea 128(%edx), %edx
- jb L(128bytesless_normal)
-
-
- sub $128, %ecx
- movdqa %xmm0, (%edx)
- movdqa %xmm0, 0x10(%edx)
- movdqa %xmm0, 0x20(%edx)
- movdqa %xmm0, 0x30(%edx)
- movdqa %xmm0, 0x40(%edx)
- movdqa %xmm0, 0x50(%edx)
- movdqa %xmm0, 0x60(%edx)
- movdqa %xmm0, 0x70(%edx)
- lea 128(%edx), %edx
- jae L(128bytesormore_normal)
-
-L(128bytesless_normal):
- add $128, %ecx
- BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
-
- ALIGN(4)
-L(128bytes_L2_normal):
- prefetcht0 0x380(%edx)
- prefetcht0 0x3c0(%edx)
- sub $128, %ecx
- movdqa %xmm0, (%edx)
- movaps %xmm0, 0x10(%edx)
- movaps %xmm0, 0x20(%edx)
- movaps %xmm0, 0x30(%edx)
- movaps %xmm0, 0x40(%edx)
- movaps %xmm0, 0x50(%edx)
- movaps %xmm0, 0x60(%edx)
- movaps %xmm0, 0x70(%edx)
- add $128, %edx
- cmp $128, %ecx
- jae L(128bytes_L2_normal)
-
-L(128bytesless_L2_normal):
- BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
-
- RESTORE_EBX_STATE
-L(128bytesormore_nt_start):
- sub %ebx, %ecx
- mov %ebx, %eax
- and $0x7f, %eax
- add %eax, %ecx
- movd %xmm0, %eax
- ALIGN(4)
-L(128bytesormore_shared_cache_loop):
- prefetcht0 0x3c0(%edx)
- prefetcht0 0x380(%edx)
- sub $0x80, %ebx
- movdqa %xmm0, (%edx)
- movdqa %xmm0, 0x10(%edx)
- movdqa %xmm0, 0x20(%edx)
- movdqa %xmm0, 0x30(%edx)
- movdqa %xmm0, 0x40(%edx)
- movdqa %xmm0, 0x50(%edx)
- movdqa %xmm0, 0x60(%edx)
- movdqa %xmm0, 0x70(%edx)
- add $0x80, %edx
- cmp $0x80, %ebx
- jae L(128bytesormore_shared_cache_loop)
- cmp $0x80, %ecx
- jb L(shared_cache_loop_end)
- ALIGN(4)
-L(128bytesormore_nt):
- sub $0x80, %ecx
- movntdq %xmm0, (%edx)
- movntdq %xmm0, 0x10(%edx)
- movntdq %xmm0, 0x20(%edx)
- movntdq %xmm0, 0x30(%edx)
- movntdq %xmm0, 0x40(%edx)
- movntdq %xmm0, 0x50(%edx)
- movntdq %xmm0, 0x60(%edx)
- movntdq %xmm0, 0x70(%edx)
- add $0x80, %edx
- cmp $0x80, %ecx
- jae L(128bytesormore_nt)
- sfence
-L(shared_cache_loop_end):
- POP(%ebx)
- BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
-
-
- .pushsection .rodata.sse2,"a",@progbits
- ALIGN(2)
-L(table_16_128bytes):
- .int JMPTBL(L(aligned_16_0bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_1bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_2bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_3bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_4bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_5bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_6bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_7bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_8bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_9bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_10bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_11bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_12bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_13bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_14bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_15bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_16bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_17bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_18bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_19bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_20bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_21bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_22bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_23bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_24bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_25bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_26bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_27bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_28bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_29bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_30bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_31bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_32bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_33bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_34bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_35bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_36bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_37bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_38bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_39bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_40bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_41bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_42bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_43bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_44bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_45bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_46bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_47bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_48bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_49bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_50bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_51bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_52bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_53bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_54bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_55bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_56bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_57bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_58bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_59bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_60bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_61bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_62bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_63bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_64bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_65bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_66bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_67bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_68bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_69bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_70bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_71bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_72bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_73bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_74bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_75bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_76bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_77bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_78bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_79bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_80bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_81bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_82bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_83bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_84bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_85bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_86bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_87bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_88bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_89bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_90bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_91bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_92bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_93bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_94bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_95bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_96bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_97bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_98bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_99bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_100bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_101bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_102bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_103bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_104bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_105bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_106bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_107bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_108bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_109bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_110bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_111bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_112bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_113bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_114bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_115bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_116bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_117bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_118bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_119bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_120bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_121bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_122bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_123bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_124bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_125bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_126bytes), L(table_16_128bytes))
- .int JMPTBL(L(aligned_16_127bytes), L(table_16_128bytes))
- .popsection
-
- ALIGN(4)
-L(aligned_16_112bytes):
- movdqa %xmm0, -112(%edx)
-L(aligned_16_96bytes):
- movdqa %xmm0, -96(%edx)
-L(aligned_16_80bytes):
- movdqa %xmm0, -80(%edx)
-L(aligned_16_64bytes):
- movdqa %xmm0, -64(%edx)
-L(aligned_16_48bytes):
- movdqa %xmm0, -48(%edx)
-L(aligned_16_32bytes):
- movdqa %xmm0, -32(%edx)
-L(aligned_16_16bytes):
- movdqa %xmm0, -16(%edx)
-L(aligned_16_0bytes):
- SETRTNVAL
- RETURN
-
- ALIGN(4)
-L(aligned_16_113bytes):
- movdqa %xmm0, -113(%edx)
-L(aligned_16_97bytes):
- movdqa %xmm0, -97(%edx)
-L(aligned_16_81bytes):
- movdqa %xmm0, -81(%edx)
-L(aligned_16_65bytes):
- movdqa %xmm0, -65(%edx)
-L(aligned_16_49bytes):
- movdqa %xmm0, -49(%edx)
-L(aligned_16_33bytes):
- movdqa %xmm0, -33(%edx)
-L(aligned_16_17bytes):
- movdqa %xmm0, -17(%edx)
-L(aligned_16_1bytes):
- movb %al, -1(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN(4)
-L(aligned_16_114bytes):
- movdqa %xmm0, -114(%edx)
-L(aligned_16_98bytes):
- movdqa %xmm0, -98(%edx)
-L(aligned_16_82bytes):
- movdqa %xmm0, -82(%edx)
-L(aligned_16_66bytes):
- movdqa %xmm0, -66(%edx)
-L(aligned_16_50bytes):
- movdqa %xmm0, -50(%edx)
-L(aligned_16_34bytes):
- movdqa %xmm0, -34(%edx)
-L(aligned_16_18bytes):
- movdqa %xmm0, -18(%edx)
-L(aligned_16_2bytes):
- movw %ax, -2(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN(4)
-L(aligned_16_115bytes):
- movdqa %xmm0, -115(%edx)
-L(aligned_16_99bytes):
- movdqa %xmm0, -99(%edx)
-L(aligned_16_83bytes):
- movdqa %xmm0, -83(%edx)
-L(aligned_16_67bytes):
- movdqa %xmm0, -67(%edx)
-L(aligned_16_51bytes):
- movdqa %xmm0, -51(%edx)
-L(aligned_16_35bytes):
- movdqa %xmm0, -35(%edx)
-L(aligned_16_19bytes):
- movdqa %xmm0, -19(%edx)
-L(aligned_16_3bytes):
- movw %ax, -3(%edx)
- movb %al, -1(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN(4)
-L(aligned_16_116bytes):
- movdqa %xmm0, -116(%edx)
-L(aligned_16_100bytes):
- movdqa %xmm0, -100(%edx)
-L(aligned_16_84bytes):
- movdqa %xmm0, -84(%edx)
-L(aligned_16_68bytes):
- movdqa %xmm0, -68(%edx)
-L(aligned_16_52bytes):
- movdqa %xmm0, -52(%edx)
-L(aligned_16_36bytes):
- movdqa %xmm0, -36(%edx)
-L(aligned_16_20bytes):
- movdqa %xmm0, -20(%edx)
-L(aligned_16_4bytes):
- movl %eax, -4(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN(4)
-L(aligned_16_117bytes):
- movdqa %xmm0, -117(%edx)
-L(aligned_16_101bytes):
- movdqa %xmm0, -101(%edx)
-L(aligned_16_85bytes):
- movdqa %xmm0, -85(%edx)
-L(aligned_16_69bytes):
- movdqa %xmm0, -69(%edx)
-L(aligned_16_53bytes):
- movdqa %xmm0, -53(%edx)
-L(aligned_16_37bytes):
- movdqa %xmm0, -37(%edx)
-L(aligned_16_21bytes):
- movdqa %xmm0, -21(%edx)
-L(aligned_16_5bytes):
- movl %eax, -5(%edx)
- movb %al, -1(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN(4)
-L(aligned_16_118bytes):
- movdqa %xmm0, -118(%edx)
-L(aligned_16_102bytes):
- movdqa %xmm0, -102(%edx)
-L(aligned_16_86bytes):
- movdqa %xmm0, -86(%edx)
-L(aligned_16_70bytes):
- movdqa %xmm0, -70(%edx)
-L(aligned_16_54bytes):
- movdqa %xmm0, -54(%edx)
-L(aligned_16_38bytes):
- movdqa %xmm0, -38(%edx)
-L(aligned_16_22bytes):
- movdqa %xmm0, -22(%edx)
-L(aligned_16_6bytes):
- movl %eax, -6(%edx)
- movw %ax, -2(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN(4)
-L(aligned_16_119bytes):
- movdqa %xmm0, -119(%edx)
-L(aligned_16_103bytes):
- movdqa %xmm0, -103(%edx)
-L(aligned_16_87bytes):
- movdqa %xmm0, -87(%edx)
-L(aligned_16_71bytes):
- movdqa %xmm0, -71(%edx)
-L(aligned_16_55bytes):
- movdqa %xmm0, -55(%edx)
-L(aligned_16_39bytes):
- movdqa %xmm0, -39(%edx)
-L(aligned_16_23bytes):
- movdqa %xmm0, -23(%edx)
-L(aligned_16_7bytes):
- movl %eax, -7(%edx)
- movw %ax, -3(%edx)
- movb %al, -1(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN(4)
-L(aligned_16_120bytes):
- movdqa %xmm0, -120(%edx)
-L(aligned_16_104bytes):
- movdqa %xmm0, -104(%edx)
-L(aligned_16_88bytes):
- movdqa %xmm0, -88(%edx)
-L(aligned_16_72bytes):
- movdqa %xmm0, -72(%edx)
-L(aligned_16_56bytes):
- movdqa %xmm0, -56(%edx)
-L(aligned_16_40bytes):
- movdqa %xmm0, -40(%edx)
-L(aligned_16_24bytes):
- movdqa %xmm0, -24(%edx)
-L(aligned_16_8bytes):
- movq %xmm0, -8(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN(4)
-L(aligned_16_121bytes):
- movdqa %xmm0, -121(%edx)
-L(aligned_16_105bytes):
- movdqa %xmm0, -105(%edx)
-L(aligned_16_89bytes):
- movdqa %xmm0, -89(%edx)
-L(aligned_16_73bytes):
- movdqa %xmm0, -73(%edx)
-L(aligned_16_57bytes):
- movdqa %xmm0, -57(%edx)
-L(aligned_16_41bytes):
- movdqa %xmm0, -41(%edx)
-L(aligned_16_25bytes):
- movdqa %xmm0, -25(%edx)
-L(aligned_16_9bytes):
- movq %xmm0, -9(%edx)
- movb %al, -1(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN(4)
-L(aligned_16_122bytes):
- movdqa %xmm0, -122(%edx)
-L(aligned_16_106bytes):
- movdqa %xmm0, -106(%edx)
-L(aligned_16_90bytes):
- movdqa %xmm0, -90(%edx)
-L(aligned_16_74bytes):
- movdqa %xmm0, -74(%edx)
-L(aligned_16_58bytes):
- movdqa %xmm0, -58(%edx)
-L(aligned_16_42bytes):
- movdqa %xmm0, -42(%edx)
-L(aligned_16_26bytes):
- movdqa %xmm0, -26(%edx)
-L(aligned_16_10bytes):
- movq %xmm0, -10(%edx)
- movw %ax, -2(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN(4)
-L(aligned_16_123bytes):
- movdqa %xmm0, -123(%edx)
-L(aligned_16_107bytes):
- movdqa %xmm0, -107(%edx)
-L(aligned_16_91bytes):
- movdqa %xmm0, -91(%edx)
-L(aligned_16_75bytes):
- movdqa %xmm0, -75(%edx)
-L(aligned_16_59bytes):
- movdqa %xmm0, -59(%edx)
-L(aligned_16_43bytes):
- movdqa %xmm0, -43(%edx)
-L(aligned_16_27bytes):
- movdqa %xmm0, -27(%edx)
-L(aligned_16_11bytes):
- movq %xmm0, -11(%edx)
- movw %ax, -3(%edx)
- movb %al, -1(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN(4)
-L(aligned_16_124bytes):
- movdqa %xmm0, -124(%edx)
-L(aligned_16_108bytes):
- movdqa %xmm0, -108(%edx)
-L(aligned_16_92bytes):
- movdqa %xmm0, -92(%edx)
-L(aligned_16_76bytes):
- movdqa %xmm0, -76(%edx)
-L(aligned_16_60bytes):
- movdqa %xmm0, -60(%edx)
-L(aligned_16_44bytes):
- movdqa %xmm0, -44(%edx)
-L(aligned_16_28bytes):
- movdqa %xmm0, -28(%edx)
-L(aligned_16_12bytes):
- movq %xmm0, -12(%edx)
- movl %eax, -4(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN(4)
-L(aligned_16_125bytes):
- movdqa %xmm0, -125(%edx)
-L(aligned_16_109bytes):
- movdqa %xmm0, -109(%edx)
-L(aligned_16_93bytes):
- movdqa %xmm0, -93(%edx)
-L(aligned_16_77bytes):
- movdqa %xmm0, -77(%edx)
-L(aligned_16_61bytes):
- movdqa %xmm0, -61(%edx)
-L(aligned_16_45bytes):
- movdqa %xmm0, -45(%edx)
-L(aligned_16_29bytes):
- movdqa %xmm0, -29(%edx)
-L(aligned_16_13bytes):
- movq %xmm0, -13(%edx)
- movl %eax, -5(%edx)
- movb %al, -1(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN(4)
-L(aligned_16_126bytes):
- movdqa %xmm0, -126(%edx)
-L(aligned_16_110bytes):
- movdqa %xmm0, -110(%edx)
-L(aligned_16_94bytes):
- movdqa %xmm0, -94(%edx)
-L(aligned_16_78bytes):
- movdqa %xmm0, -78(%edx)
-L(aligned_16_62bytes):
- movdqa %xmm0, -62(%edx)
-L(aligned_16_46bytes):
- movdqa %xmm0, -46(%edx)
-L(aligned_16_30bytes):
- movdqa %xmm0, -30(%edx)
-L(aligned_16_14bytes):
- movq %xmm0, -14(%edx)
- movl %eax, -6(%edx)
- movw %ax, -2(%edx)
- SETRTNVAL
- RETURN
-
- ALIGN(4)
-L(aligned_16_127bytes):
- movdqa %xmm0, -127(%edx)
-L(aligned_16_111bytes):
- movdqa %xmm0, -111(%edx)
-L(aligned_16_95bytes):
- movdqa %xmm0, -95(%edx)
-L(aligned_16_79bytes):
- movdqa %xmm0, -79(%edx)
-L(aligned_16_63bytes):
- movdqa %xmm0, -63(%edx)
-L(aligned_16_47bytes):
- movdqa %xmm0, -47(%edx)
-L(aligned_16_31bytes):
- movdqa %xmm0, -31(%edx)
-L(aligned_16_15bytes):
- movq %xmm0, -15(%edx)
- movl %eax, -7(%edx)
- movw %ax, -3(%edx)
- movb %al, -1(%edx)
- SETRTNVAL
- RETURN_END
-
-END(memset_atom)
diff --git a/libc/arch-x86/string/sse2-memset-slm.S b/libc/arch-x86/string/sse2-memset-slm.S
index e4c8fa1..ec2ee52 100644
--- a/libc/arch-x86/string/sse2-memset-slm.S
+++ b/libc/arch-x86/string/sse2-memset-slm.S
@@ -79,7 +79,7 @@
/* We loaded the jump table and adjusted EDX. Go. */ \
jmp *%ebx
-ENTRY(__memset_chk_generic)
+ENTRY(__memset_chk)
ENTRANCE
movl LEN(%esp), %ecx
@@ -88,11 +88,11 @@
POP(%ebx) // Undo ENTRANCE without returning.
jmp __memset_chk_fail
-END(__memset_chk_generic)
+END(__memset_chk)
.section .text.sse2,"ax",@progbits
ALIGN(4)
-ENTRY(memset_generic)
+ENTRY(memset)
ENTRANCE
movl LEN(%esp), %ecx
@@ -755,4 +755,4 @@
SETRTNVAL
RETURN_END
-END(memset_generic)
+END(memset)
diff --git a/libc/arch-x86/string/sse2-strcpy-slm.S b/libc/arch-x86/string/sse2-strcpy-slm.S
index 22ceeab..b5d84b5 100644
--- a/libc/arch-x86/string/sse2-strcpy-slm.S
+++ b/libc/arch-x86/string/sse2-strcpy-slm.S
@@ -79,7 +79,7 @@
#define POP(REG) popl REG; CFI_POP (REG)
#ifndef STRCPY
-# define STRCPY strcpy_generic
+# define STRCPY strcpy
#endif
#ifdef USE_AS_STPNCPY
diff --git a/libc/arch-x86/string/sse2-strlen-slm.S b/libc/arch-x86/string/sse2-strlen-slm.S
index b805ad6..27cc025 100644
--- a/libc/arch-x86/string/sse2-strlen-slm.S
+++ b/libc/arch-x86/string/sse2-strlen-slm.S
@@ -29,7 +29,7 @@
*/
#ifndef STRLEN
-# define STRLEN strlen_generic
+# define STRLEN strlen
#endif
#ifndef L
diff --git a/libc/arch-x86/string/sse2-strncpy-slm.S b/libc/arch-x86/string/sse2-strncpy-slm.S
index aff7fb9..591419f 100644
--- a/libc/arch-x86/string/sse2-strncpy-slm.S
+++ b/libc/arch-x86/string/sse2-strncpy-slm.S
@@ -29,5 +29,5 @@
*/
#define USE_AS_STRNCPY
-#define STRCPY strncpy_generic
+#define STRCPY strncpy
#include "sse2-strcpy-slm.S"
diff --git a/libc/arch-x86/string/ssse3-memcpy-atom.S b/libc/arch-x86/string/ssse3-memcpy-atom.S
deleted file mode 100644
index 83e1985..0000000
--- a/libc/arch-x86/string/ssse3-memcpy-atom.S
+++ /dev/null
@@ -1,3124 +0,0 @@
-/*
-Copyright (c) 2010, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
-
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
-
- * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#define FOR_ATOM
-
-#ifndef MEMCPY
-# define MEMCPY memcpy_atom
-#endif
-
-#ifndef L
-# define L(label) .L##label
-#endif
-
-#ifndef cfi_startproc
-# define cfi_startproc .cfi_startproc
-#endif
-
-#ifndef cfi_endproc
-# define cfi_endproc .cfi_endproc
-#endif
-
-#ifndef cfi_rel_offset
-# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off
-#endif
-
-#ifndef cfi_restore
-# define cfi_restore(reg) .cfi_restore reg
-#endif
-
-#ifndef cfi_adjust_cfa_offset
-# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off
-#endif
-
-#ifndef ENTRY
-# define ENTRY(name) \
- .type name, @function; \
- .globl name; \
- .p2align 4; \
-name: \
- cfi_startproc
-#endif
-
-#ifndef END
-# define END(name) \
- cfi_endproc; \
- .size name, .-name
-#endif
-
-#define DEST PARMS
-#define SRC DEST+4
-#define LEN SRC+4
-
-#define CFI_PUSH(REG) \
- cfi_adjust_cfa_offset (4); \
- cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG) \
- cfi_adjust_cfa_offset (-4); \
- cfi_restore (REG)
-
-#define PUSH(REG) pushl REG; CFI_PUSH (REG)
-#define POP(REG) popl REG; CFI_POP (REG)
-
-#if (defined SHARED || defined __PIC__)
-# define PARMS 8 /* Preserve EBX. */
-# define ENTRANCE PUSH (%ebx);
-# define RETURN_END POP (%ebx); ret
-# define RETURN RETURN_END; CFI_PUSH (%ebx)
-# define JMPTBL(I, B) I - B
-
-# define SETUP_PIC_REG(x) call __x86.get_pc_thunk.x
-
-/* Load an entry in a jump table into EBX and branch to it. TABLE is a
- jump table with relative offsets. INDEX is a register contains the
- index into the jump table. SCALE is the scale of INDEX. */
-
-# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
- /* We first load PC into EBX. */ \
- SETUP_PIC_REG(bx); \
- /* Get the address of the jump table. */ \
- addl $(TABLE - .), %ebx; \
- /* Get the entry and convert the relative offset to the \
- absolute address. */ \
- addl (%ebx, INDEX, SCALE), %ebx; \
- /* We loaded the jump table. Go. */ \
- jmp *%ebx
-#else
-
-# define PARMS 4
-# define ENTRANCE
-# define RETURN_END ret
-# define RETURN RETURN_END
-# define JMPTBL(I, B) I
-
-/* Branch to an entry in a jump table. TABLE is a jump table with
- absolute offsets. INDEX is a register contains the index into the
- jump table. SCALE is the scale of INDEX. */
-
-# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
- jmp *TABLE(, INDEX, SCALE)
-#endif
-
- .section .text.ssse3,"ax",@progbits
-ENTRY (MEMCPY)
- ENTRANCE
- movl LEN(%esp), %ecx
- movl SRC(%esp), %eax
- movl DEST(%esp), %edx
-
-#ifdef USE_AS_MEMMOVE
- cmp %eax, %edx
- jb L(copy_forward)
- je L(fwd_write_0bytes)
- cmp $32, %ecx
- jae L(memmove_bwd)
- jmp L(bk_write_less32bytes_2)
-
- .p2align 4
-L(memmove_bwd):
- add %ecx, %eax
- cmp %eax, %edx
- movl SRC(%esp), %eax
- jb L(copy_backward)
-
-L(copy_forward):
-#endif
- cmp $48, %ecx
- jae L(48bytesormore)
-
-L(fwd_write_less32bytes):
-#ifndef USE_AS_MEMMOVE
- cmp %dl, %al
- jb L(bk_write)
-#endif
- add %ecx, %edx
- add %ecx, %eax
- BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
-#ifndef USE_AS_MEMMOVE
- .p2align 4
-L(bk_write):
- BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4)
-#endif
-
- .p2align 4
-L(48bytesormore):
-#ifndef USE_AS_MEMMOVE
- movlpd (%eax), %xmm0
- movlpd 8(%eax), %xmm1
- movlpd %xmm0, (%edx)
- movlpd %xmm1, 8(%edx)
-#else
- movdqu (%eax), %xmm0
-#endif
- PUSH (%edi)
- movl %edx, %edi
- and $-16, %edx
- add $16, %edx
- sub %edx, %edi
- add %edi, %ecx
- sub %edi, %eax
-
-#ifdef SHARED_CACHE_SIZE_HALF
- cmp $SHARED_CACHE_SIZE_HALF, %ecx
-#else
-# if (defined SHARED || defined __PIC__)
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- cmp __x86_shared_cache_size_half@GOTOFF(%ebx), %ecx
-# else
- cmp __x86_shared_cache_size_half, %ecx
-# endif
-#endif
-
- mov %eax, %edi
- jae L(large_page)
- and $0xf, %edi
- jz L(shl_0)
- BRANCH_TO_JMPTBL_ENTRY (L(shl_table), %edi, 4)
-
- .p2align 4
-L(shl_0):
-#ifdef USE_AS_MEMMOVE
- movl DEST+4(%esp), %edi
- movdqu %xmm0, (%edi)
-#endif
- xor %edi, %edi
- cmp $127, %ecx
- ja L(shl_0_gobble)
- lea -32(%ecx), %ecx
-
- .p2align 4
-L(shl_0_loop):
- movdqa (%eax, %edi), %xmm0
- movdqa 16(%eax, %edi), %xmm1
- sub $32, %ecx
- movdqa %xmm0, (%edx, %edi)
- movdqa %xmm1, 16(%edx, %edi)
- lea 32(%edi), %edi
- jb L(shl_0_end)
-
- movdqa (%eax, %edi), %xmm0
- movdqa 16(%eax, %edi), %xmm1
- sub $32, %ecx
- movdqa %xmm0, (%edx, %edi)
- movdqa %xmm1, 16(%edx, %edi)
- lea 32(%edi), %edi
- jb L(shl_0_end)
-
- movdqa (%eax, %edi), %xmm0
- movdqa 16(%eax, %edi), %xmm1
- sub $32, %ecx
- movdqa %xmm0, (%edx, %edi)
- movdqa %xmm1, 16(%edx, %edi)
- lea 32(%edi), %edi
- jb L(shl_0_end)
-
- movdqa (%eax, %edi), %xmm0
- movdqa 16(%eax, %edi), %xmm1
- sub $32, %ecx
- movdqa %xmm0, (%edx, %edi)
- movdqa %xmm1, 16(%edx, %edi)
- lea 32(%edi), %edi
-
-L(shl_0_end):
- lea 32(%ecx), %ecx
- add %ecx, %edi
- add %edi, %edx
- add %edi, %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd_align), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(shl_0_gobble):
-#ifdef DATA_CACHE_SIZE_HALF
- cmp $DATA_CACHE_SIZE_HALF, %ecx
-#else
-# if (defined SHARED || defined __PIC__)
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
- cmp __x86_data_cache_size_half, %ecx
-# endif
-#endif
- POP (%edi)
- lea -128(%ecx), %ecx
- jae L(shl_0_gobble_mem_loop)
-
- .p2align 4
-L(shl_0_gobble_cache_loop):
- movdqa (%eax), %xmm0
- movdqa 0x10(%eax), %xmm1
- movdqa 0x20(%eax), %xmm2
- movdqa 0x30(%eax), %xmm3
- movdqa 0x40(%eax), %xmm4
- movdqa 0x50(%eax), %xmm5
- movdqa 0x60(%eax), %xmm6
- movdqa 0x70(%eax), %xmm7
- lea 0x80(%eax), %eax
- sub $128, %ecx
- movdqa %xmm0, (%edx)
- movdqa %xmm1, 0x10(%edx)
- movdqa %xmm2, 0x20(%edx)
- movdqa %xmm3, 0x30(%edx)
- movdqa %xmm4, 0x40(%edx)
- movdqa %xmm5, 0x50(%edx)
- movdqa %xmm6, 0x60(%edx)
- movdqa %xmm7, 0x70(%edx)
- lea 0x80(%edx), %edx
-
- jae L(shl_0_gobble_cache_loop)
- cmp $-0x40, %ecx
- lea 0x80(%ecx), %ecx
- jl L(shl_0_cache_less_64bytes)
-
- movdqa (%eax), %xmm0
- sub $0x40, %ecx
- movdqa 0x10(%eax), %xmm1
- movdqa %xmm0, (%edx)
- movdqa %xmm1, 0x10(%edx)
- movdqa 0x20(%eax), %xmm0
- movdqa 0x30(%eax), %xmm1
- add $0x40, %eax
- movdqa %xmm0, 0x20(%edx)
- movdqa %xmm1, 0x30(%edx)
- add $0x40, %edx
-
-L(shl_0_cache_less_64bytes):
- cmp $0x20, %ecx
- jb L(shl_0_cache_less_32bytes)
- movdqa (%eax), %xmm0
- sub $0x20, %ecx
- movdqa 0x10(%eax), %xmm1
- add $0x20, %eax
- movdqa %xmm0, (%edx)
- movdqa %xmm1, 0x10(%edx)
- add $0x20, %edx
-
-L(shl_0_cache_less_32bytes):
- cmp $0x10, %ecx
- jb L(shl_0_cache_less_16bytes)
- sub $0x10, %ecx
- movdqa (%eax), %xmm0
- add $0x10, %eax
- movdqa %xmm0, (%edx)
- add $0x10, %edx
-
-L(shl_0_cache_less_16bytes):
- add %ecx, %edx
- add %ecx, %eax
- BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
-
- .p2align 4
-L(shl_0_gobble_mem_loop):
- prefetcht0 0x1c0(%eax)
- prefetcht0 0x280(%eax)
- prefetcht0 0x1c0(%edx)
-
- movdqa (%eax), %xmm0
- movdqa 0x10(%eax), %xmm1
- movdqa 0x20(%eax), %xmm2
- movdqa 0x30(%eax), %xmm3
- movdqa 0x40(%eax), %xmm4
- movdqa 0x50(%eax), %xmm5
- movdqa 0x60(%eax), %xmm6
- movdqa 0x70(%eax), %xmm7
- lea 0x80(%eax), %eax
- sub $0x80, %ecx
- movdqa %xmm0, (%edx)
- movdqa %xmm1, 0x10(%edx)
- movdqa %xmm2, 0x20(%edx)
- movdqa %xmm3, 0x30(%edx)
- movdqa %xmm4, 0x40(%edx)
- movdqa %xmm5, 0x50(%edx)
- movdqa %xmm6, 0x60(%edx)
- movdqa %xmm7, 0x70(%edx)
- lea 0x80(%edx), %edx
-
- jae L(shl_0_gobble_mem_loop)
- cmp $-0x40, %ecx
- lea 0x80(%ecx), %ecx
- jl L(shl_0_mem_less_64bytes)
-
- movdqa (%eax), %xmm0
- sub $0x40, %ecx
- movdqa 0x10(%eax), %xmm1
-
- movdqa %xmm0, (%edx)
- movdqa %xmm1, 0x10(%edx)
-
- movdqa 0x20(%eax), %xmm0
- movdqa 0x30(%eax), %xmm1
- add $0x40, %eax
-
- movdqa %xmm0, 0x20(%edx)
- movdqa %xmm1, 0x30(%edx)
- add $0x40, %edx
-
-L(shl_0_mem_less_64bytes):
- cmp $0x20, %ecx
- jb L(shl_0_mem_less_32bytes)
- movdqa (%eax), %xmm0
- sub $0x20, %ecx
- movdqa 0x10(%eax), %xmm1
- add $0x20, %eax
- movdqa %xmm0, (%edx)
- movdqa %xmm1, 0x10(%edx)
- add $0x20, %edx
-
-L(shl_0_mem_less_32bytes):
- cmp $0x10, %ecx
- jb L(shl_0_mem_less_16bytes)
- sub $0x10, %ecx
- movdqa (%eax), %xmm0
- add $0x10, %eax
- movdqa %xmm0, (%edx)
- add $0x10, %edx
-
-L(shl_0_mem_less_16bytes):
- add %ecx, %edx
- add %ecx, %eax
- BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd_align), %ecx, 4)
-
- .p2align 4
-L(shl_1):
-#ifndef USE_AS_MEMMOVE
- movaps -1(%eax), %xmm1
-#else
- movl DEST+4(%esp), %edi
- movaps -1(%eax), %xmm1
- movdqu %xmm0, (%edi)
-#endif
-#ifdef DATA_CACHE_SIZE_HALF
- cmp $DATA_CACHE_SIZE_HALF, %ecx
-#else
-# if (defined SHARED || defined __PIC__)
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
- cmp __x86_data_cache_size_half, %ecx
-# endif
-#endif
- jb L(sh_1_no_prefetch)
-
- lea -64(%ecx), %ecx
-
- .p2align 4
-L(Shl1LoopStart):
- prefetcht0 0x1c0(%eax)
- prefetcht0 0x1c0(%edx)
- movaps 15(%eax), %xmm2
- movaps 31(%eax), %xmm3
- movaps 47(%eax), %xmm4
- movaps 63(%eax), %xmm5
- movaps %xmm5, %xmm7
- palignr $1, %xmm4, %xmm5
- palignr $1, %xmm3, %xmm4
- movaps %xmm5, 48(%edx)
- palignr $1, %xmm2, %xmm3
- lea 64(%eax), %eax
- palignr $1, %xmm1, %xmm2
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm7, %xmm1
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- sub $64, %ecx
- ja L(Shl1LoopStart)
-
-L(Shl1LoopLeave):
- add $32, %ecx
- jle L(shl_end_0)
-
- movaps 15(%eax), %xmm2
- movaps 31(%eax), %xmm3
- palignr $1, %xmm2, %xmm3
- palignr $1, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps %xmm3, 16(%edx)
- lea 32(%edx, %ecx), %edx
- lea 32(%eax, %ecx), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(sh_1_no_prefetch):
- lea -32(%ecx), %ecx
- lea -1(%eax), %eax
- xor %edi, %edi
-
- .p2align 4
-L(sh_1_no_prefetch_loop):
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $1, %xmm2, %xmm3
- palignr $1, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jb L(sh_1_end_no_prefetch_loop)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $1, %xmm2, %xmm3
- palignr $1, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jae L(sh_1_no_prefetch_loop)
-
-L(sh_1_end_no_prefetch_loop):
- lea 32(%ecx), %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 1(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(shl_2):
-#ifndef USE_AS_MEMMOVE
- movaps -2(%eax), %xmm1
-#else
- movl DEST+4(%esp), %edi
- movaps -2(%eax), %xmm1
- movdqu %xmm0, (%edi)
-#endif
-#ifdef DATA_CACHE_SIZE_HALF
- cmp $DATA_CACHE_SIZE_HALF, %ecx
-#else
-# if (defined SHARED || defined __PIC__)
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
- cmp __x86_data_cache_size_half, %ecx
-# endif
-#endif
- jb L(sh_2_no_prefetch)
-
- lea -64(%ecx), %ecx
-
- .p2align 4
-L(Shl2LoopStart):
- prefetcht0 0x1c0(%eax)
- prefetcht0 0x1c0(%edx)
- movaps 14(%eax), %xmm2
- movaps 30(%eax), %xmm3
- movaps 46(%eax), %xmm4
- movaps 62(%eax), %xmm5
- movaps %xmm5, %xmm7
- palignr $2, %xmm4, %xmm5
- palignr $2, %xmm3, %xmm4
- movaps %xmm5, 48(%edx)
- palignr $2, %xmm2, %xmm3
- lea 64(%eax), %eax
- palignr $2, %xmm1, %xmm2
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm7, %xmm1
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- sub $64, %ecx
- ja L(Shl2LoopStart)
-
-L(Shl2LoopLeave):
- add $32, %ecx
- jle L(shl_end_0)
-
- movaps 14(%eax), %xmm2
- movaps 30(%eax), %xmm3
- palignr $2, %xmm2, %xmm3
- palignr $2, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps %xmm3, 16(%edx)
- lea 32(%edx, %ecx), %edx
- lea 32(%eax, %ecx), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(sh_2_no_prefetch):
- lea -32(%ecx), %ecx
- lea -2(%eax), %eax
- xor %edi, %edi
-
- .p2align 4
-L(sh_2_no_prefetch_loop):
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $2, %xmm2, %xmm3
- palignr $2, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jb L(sh_2_end_no_prefetch_loop)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $2, %xmm2, %xmm3
- palignr $2, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jae L(sh_2_no_prefetch_loop)
-
-L(sh_2_end_no_prefetch_loop):
- lea 32(%ecx), %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 2(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(shl_3):
-#ifndef USE_AS_MEMMOVE
- movaps -3(%eax), %xmm1
-#else
- movl DEST+4(%esp), %edi
- movaps -3(%eax), %xmm1
- movdqu %xmm0, (%edi)
-#endif
-#ifdef DATA_CACHE_SIZE_HALF
- cmp $DATA_CACHE_SIZE_HALF, %ecx
-#else
-# if (defined SHARED || defined __PIC__)
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
- cmp __x86_data_cache_size_half, %ecx
-# endif
-#endif
- jb L(sh_3_no_prefetch)
-
- lea -64(%ecx), %ecx
-
- .p2align 4
-L(Shl3LoopStart):
- prefetcht0 0x1c0(%eax)
- prefetcht0 0x1c0(%edx)
- movaps 13(%eax), %xmm2
- movaps 29(%eax), %xmm3
- movaps 45(%eax), %xmm4
- movaps 61(%eax), %xmm5
- movaps %xmm5, %xmm7
- palignr $3, %xmm4, %xmm5
- palignr $3, %xmm3, %xmm4
- movaps %xmm5, 48(%edx)
- palignr $3, %xmm2, %xmm3
- lea 64(%eax), %eax
- palignr $3, %xmm1, %xmm2
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm7, %xmm1
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- sub $64, %ecx
- ja L(Shl3LoopStart)
-
-L(Shl3LoopLeave):
- add $32, %ecx
- jle L(shl_end_0)
-
- movaps 13(%eax), %xmm2
- movaps 29(%eax), %xmm3
- palignr $3, %xmm2, %xmm3
- palignr $3, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps %xmm3, 16(%edx)
- lea 32(%edx, %ecx), %edx
- lea 32(%eax, %ecx), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(sh_3_no_prefetch):
- lea -32(%ecx), %ecx
- lea -3(%eax), %eax
- xor %edi, %edi
-
- .p2align 4
-L(sh_3_no_prefetch_loop):
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $3, %xmm2, %xmm3
- palignr $3, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jb L(sh_3_end_no_prefetch_loop)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $3, %xmm2, %xmm3
- palignr $3, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jae L(sh_3_no_prefetch_loop)
-
-L(sh_3_end_no_prefetch_loop):
- lea 32(%ecx), %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 3(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(shl_4):
-#ifndef USE_AS_MEMMOVE
- movaps -4(%eax), %xmm1
-#else
- movl DEST+4(%esp), %edi
- movaps -4(%eax), %xmm1
- movdqu %xmm0, (%edi)
-#endif
-#ifdef DATA_CACHE_SIZE_HALF
- cmp $DATA_CACHE_SIZE_HALF, %ecx
-#else
-# if (defined SHARED || defined __PIC__)
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
- cmp __x86_data_cache_size_half, %ecx
-# endif
-#endif
- jb L(sh_4_no_prefetch)
-
- lea -64(%ecx), %ecx
-
- .p2align 4
-L(Shl4LoopStart):
- prefetcht0 0x1c0(%eax)
- prefetcht0 0x1c0(%edx)
- movaps 12(%eax), %xmm2
- movaps 28(%eax), %xmm3
- movaps 44(%eax), %xmm4
- movaps 60(%eax), %xmm5
- movaps %xmm5, %xmm7
- palignr $4, %xmm4, %xmm5
- palignr $4, %xmm3, %xmm4
- movaps %xmm5, 48(%edx)
- palignr $4, %xmm2, %xmm3
- lea 64(%eax), %eax
- palignr $4, %xmm1, %xmm2
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm7, %xmm1
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- sub $64, %ecx
- ja L(Shl4LoopStart)
-
-L(Shl4LoopLeave):
- add $32, %ecx
- jle L(shl_end_0)
-
- movaps 12(%eax), %xmm2
- movaps 28(%eax), %xmm3
- palignr $4, %xmm2, %xmm3
- palignr $4, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps %xmm3, 16(%edx)
- lea 32(%edx, %ecx), %edx
- lea 32(%eax, %ecx), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(sh_4_no_prefetch):
- lea -32(%ecx), %ecx
- lea -4(%eax), %eax
- xor %edi, %edi
-
- .p2align 4
-L(sh_4_no_prefetch_loop):
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $4, %xmm2, %xmm3
- palignr $4, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jb L(sh_4_end_no_prefetch_loop)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $4, %xmm2, %xmm3
- palignr $4, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jae L(sh_4_no_prefetch_loop)
-
-L(sh_4_end_no_prefetch_loop):
- lea 32(%ecx), %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 4(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(shl_5):
-#ifndef USE_AS_MEMMOVE
- movaps -5(%eax), %xmm1
-#else
- movl DEST+4(%esp), %edi
- movaps -5(%eax), %xmm1
- movdqu %xmm0, (%edi)
-#endif
-#ifdef DATA_CACHE_SIZE_HALF
- cmp $DATA_CACHE_SIZE_HALF, %ecx
-#else
-# if (defined SHARED || defined __PIC__)
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
- cmp __x86_data_cache_size_half, %ecx
-# endif
-#endif
- jb L(sh_5_no_prefetch)
-
- lea -64(%ecx), %ecx
-
- .p2align 4
-L(Shl5LoopStart):
- prefetcht0 0x1c0(%eax)
- prefetcht0 0x1c0(%edx)
- movaps 11(%eax), %xmm2
- movaps 27(%eax), %xmm3
- movaps 43(%eax), %xmm4
- movaps 59(%eax), %xmm5
- movaps %xmm5, %xmm7
- palignr $5, %xmm4, %xmm5
- palignr $5, %xmm3, %xmm4
- movaps %xmm5, 48(%edx)
- palignr $5, %xmm2, %xmm3
- lea 64(%eax), %eax
- palignr $5, %xmm1, %xmm2
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm7, %xmm1
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- sub $64, %ecx
- ja L(Shl5LoopStart)
-
-L(Shl5LoopLeave):
- add $32, %ecx
- jle L(shl_end_0)
-
- movaps 11(%eax), %xmm2
- movaps 27(%eax), %xmm3
- palignr $5, %xmm2, %xmm3
- palignr $5, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps %xmm3, 16(%edx)
- lea 32(%edx, %ecx), %edx
- lea 32(%eax, %ecx), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(sh_5_no_prefetch):
- lea -32(%ecx), %ecx
- lea -5(%eax), %eax
- xor %edi, %edi
-
- .p2align 4
-L(sh_5_no_prefetch_loop):
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $5, %xmm2, %xmm3
- palignr $5, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jb L(sh_5_end_no_prefetch_loop)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $5, %xmm2, %xmm3
- palignr $5, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jae L(sh_5_no_prefetch_loop)
-
-L(sh_5_end_no_prefetch_loop):
- lea 32(%ecx), %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 5(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(shl_6):
-#ifndef USE_AS_MEMMOVE
- movaps -6(%eax), %xmm1
-#else
- movl DEST+4(%esp), %edi
- movaps -6(%eax), %xmm1
- movdqu %xmm0, (%edi)
-#endif
-#ifdef DATA_CACHE_SIZE_HALF
- cmp $DATA_CACHE_SIZE_HALF, %ecx
-#else
-# if (defined SHARED || defined __PIC__)
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
- cmp __x86_data_cache_size_half, %ecx
-# endif
-#endif
- jb L(sh_6_no_prefetch)
-
- lea -64(%ecx), %ecx
-
- .p2align 4
-L(Shl6LoopStart):
- prefetcht0 0x1c0(%eax)
- prefetcht0 0x1c0(%edx)
- movaps 10(%eax), %xmm2
- movaps 26(%eax), %xmm3
- movaps 42(%eax), %xmm4
- movaps 58(%eax), %xmm5
- movaps %xmm5, %xmm7
- palignr $6, %xmm4, %xmm5
- palignr $6, %xmm3, %xmm4
- movaps %xmm5, 48(%edx)
- palignr $6, %xmm2, %xmm3
- lea 64(%eax), %eax
- palignr $6, %xmm1, %xmm2
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm7, %xmm1
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- sub $64, %ecx
- ja L(Shl6LoopStart)
-
-L(Shl6LoopLeave):
- add $32, %ecx
- jle L(shl_end_0)
-
- movaps 10(%eax), %xmm2
- movaps 26(%eax), %xmm3
- palignr $6, %xmm2, %xmm3
- palignr $6, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps %xmm3, 16(%edx)
- lea 32(%edx, %ecx), %edx
- lea 32(%eax, %ecx), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(sh_6_no_prefetch):
- lea -32(%ecx), %ecx
- lea -6(%eax), %eax
- xor %edi, %edi
-
- .p2align 4
-L(sh_6_no_prefetch_loop):
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $6, %xmm2, %xmm3
- palignr $6, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jb L(sh_6_end_no_prefetch_loop)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $6, %xmm2, %xmm3
- palignr $6, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
-
- jae L(sh_6_no_prefetch_loop)
-
-L(sh_6_end_no_prefetch_loop):
- lea 32(%ecx), %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 6(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(shl_7):
-#ifndef USE_AS_MEMMOVE
- movaps -7(%eax), %xmm1
-#else
- movl DEST+4(%esp), %edi
- movaps -7(%eax), %xmm1
- movdqu %xmm0, (%edi)
-#endif
-#ifdef DATA_CACHE_SIZE_HALF
- cmp $DATA_CACHE_SIZE_HALF, %ecx
-#else
-# if (defined SHARED || defined __PIC__)
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
- cmp __x86_data_cache_size_half, %ecx
-# endif
-#endif
- jb L(sh_7_no_prefetch)
-
- lea -64(%ecx), %ecx
-
- .p2align 4
-L(Shl7LoopStart):
- prefetcht0 0x1c0(%eax)
- prefetcht0 0x1c0(%edx)
- movaps 9(%eax), %xmm2
- movaps 25(%eax), %xmm3
- movaps 41(%eax), %xmm4
- movaps 57(%eax), %xmm5
- movaps %xmm5, %xmm7
- palignr $7, %xmm4, %xmm5
- palignr $7, %xmm3, %xmm4
- movaps %xmm5, 48(%edx)
- palignr $7, %xmm2, %xmm3
- lea 64(%eax), %eax
- palignr $7, %xmm1, %xmm2
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm7, %xmm1
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- sub $64, %ecx
- ja L(Shl7LoopStart)
-
-L(Shl7LoopLeave):
- add $32, %ecx
- jle L(shl_end_0)
-
- movaps 9(%eax), %xmm2
- movaps 25(%eax), %xmm3
- palignr $7, %xmm2, %xmm3
- palignr $7, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps %xmm3, 16(%edx)
- lea 32(%edx, %ecx), %edx
- lea 32(%eax, %ecx), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(sh_7_no_prefetch):
- lea -32(%ecx), %ecx
- lea -7(%eax), %eax
- xor %edi, %edi
-
- .p2align 4
-L(sh_7_no_prefetch_loop):
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $7, %xmm2, %xmm3
- palignr $7, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jb L(sh_7_end_no_prefetch_loop)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $7, %xmm2, %xmm3
- palignr $7, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jae L(sh_7_no_prefetch_loop)
-
-L(sh_7_end_no_prefetch_loop):
- lea 32(%ecx), %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 7(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(shl_8):
-#ifndef USE_AS_MEMMOVE
- movaps -8(%eax), %xmm1
-#else
- movl DEST+4(%esp), %edi
- movaps -8(%eax), %xmm1
- movdqu %xmm0, (%edi)
-#endif
-#ifdef DATA_CACHE_SIZE_HALF
- cmp $DATA_CACHE_SIZE_HALF, %ecx
-#else
-# if (defined SHARED || defined __PIC__)
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
- cmp __x86_data_cache_size_half, %ecx
-# endif
-#endif
- jb L(sh_8_no_prefetch)
-
- lea -64(%ecx), %ecx
-
- .p2align 4
-L(Shl8LoopStart):
- prefetcht0 0x1c0(%eax)
- prefetcht0 0x1c0(%edx)
- movaps 8(%eax), %xmm2
- movaps 24(%eax), %xmm3
- movaps 40(%eax), %xmm4
- movaps 56(%eax), %xmm5
- movaps %xmm5, %xmm7
- palignr $8, %xmm4, %xmm5
- palignr $8, %xmm3, %xmm4
- movaps %xmm5, 48(%edx)
- palignr $8, %xmm2, %xmm3
- lea 64(%eax), %eax
- palignr $8, %xmm1, %xmm2
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm7, %xmm1
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- sub $64, %ecx
- ja L(Shl8LoopStart)
-
-L(LoopLeave8):
- add $32, %ecx
- jle L(shl_end_0)
-
- movaps 8(%eax), %xmm2
- movaps 24(%eax), %xmm3
- palignr $8, %xmm2, %xmm3
- palignr $8, %xmm1, %xmm2
- movaps %xmm2, (%edx)
- movaps %xmm3, 16(%edx)
- lea 32(%edx, %ecx), %edx
- lea 32(%eax, %ecx), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(sh_8_no_prefetch):
- lea -32(%ecx), %ecx
- lea -8(%eax), %eax
- xor %edi, %edi
-
- .p2align 4
-L(sh_8_no_prefetch_loop):
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $8, %xmm2, %xmm3
- palignr $8, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jb L(sh_8_end_no_prefetch_loop)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $8, %xmm2, %xmm3
- palignr $8, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jae L(sh_8_no_prefetch_loop)
-
-L(sh_8_end_no_prefetch_loop):
- lea 32(%ecx), %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 8(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(shl_9):
-#ifndef USE_AS_MEMMOVE
- movaps -9(%eax), %xmm1
-#else
- movl DEST+4(%esp), %edi
- movaps -9(%eax), %xmm1
- movdqu %xmm0, (%edi)
-#endif
-#ifdef DATA_CACHE_SIZE_HALF
- cmp $DATA_CACHE_SIZE_HALF, %ecx
-#else
-# if (defined SHARED || defined __PIC__)
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
- cmp __x86_data_cache_size_half, %ecx
-# endif
-#endif
- jb L(sh_9_no_prefetch)
-
- lea -64(%ecx), %ecx
-
- .p2align 4
-L(Shl9LoopStart):
- prefetcht0 0x1c0(%eax)
- prefetcht0 0x1c0(%edx)
- movaps 7(%eax), %xmm2
- movaps 23(%eax), %xmm3
- movaps 39(%eax), %xmm4
- movaps 55(%eax), %xmm5
- movaps %xmm5, %xmm7
- palignr $9, %xmm4, %xmm5
- palignr $9, %xmm3, %xmm4
- movaps %xmm5, 48(%edx)
- palignr $9, %xmm2, %xmm3
- lea 64(%eax), %eax
- palignr $9, %xmm1, %xmm2
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm7, %xmm1
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- sub $64, %ecx
- ja L(Shl9LoopStart)
-
-L(Shl9LoopLeave):
- add $32, %ecx
- jle L(shl_end_0)
-
- movaps 7(%eax), %xmm2
- movaps 23(%eax), %xmm3
- palignr $9, %xmm2, %xmm3
- palignr $9, %xmm1, %xmm2
-
- movaps %xmm2, (%edx)
- movaps %xmm3, 16(%edx)
- lea 32(%edx, %ecx), %edx
- lea 32(%eax, %ecx), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(sh_9_no_prefetch):
- lea -32(%ecx), %ecx
- lea -9(%eax), %eax
- xor %edi, %edi
-
- .p2align 4
-L(sh_9_no_prefetch_loop):
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $9, %xmm2, %xmm3
- palignr $9, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jb L(sh_9_end_no_prefetch_loop)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $9, %xmm2, %xmm3
- palignr $9, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jae L(sh_9_no_prefetch_loop)
-
-L(sh_9_end_no_prefetch_loop):
- lea 32(%ecx), %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 9(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(shl_10):
-#ifndef USE_AS_MEMMOVE
- movaps -10(%eax), %xmm1
-#else
- movl DEST+4(%esp), %edi
- movaps -10(%eax), %xmm1
- movdqu %xmm0, (%edi)
-#endif
-#ifdef DATA_CACHE_SIZE_HALF
- cmp $DATA_CACHE_SIZE_HALF, %ecx
-#else
-# if (defined SHARED || defined __PIC__)
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
- cmp __x86_data_cache_size_half, %ecx
-# endif
-#endif
- jb L(sh_10_no_prefetch)
-
- lea -64(%ecx), %ecx
-
- .p2align 4
-L(Shl10LoopStart):
- prefetcht0 0x1c0(%eax)
- prefetcht0 0x1c0(%edx)
- movaps 6(%eax), %xmm2
- movaps 22(%eax), %xmm3
- movaps 38(%eax), %xmm4
- movaps 54(%eax), %xmm5
- movaps %xmm5, %xmm7
- palignr $10, %xmm4, %xmm5
- palignr $10, %xmm3, %xmm4
- movaps %xmm5, 48(%edx)
- palignr $10, %xmm2, %xmm3
- lea 64(%eax), %eax
- palignr $10, %xmm1, %xmm2
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm7, %xmm1
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- sub $64, %ecx
- ja L(Shl10LoopStart)
-
-L(Shl10LoopLeave):
- add $32, %ecx
- jle L(shl_end_0)
-
- movaps 6(%eax), %xmm2
- movaps 22(%eax), %xmm3
- palignr $10, %xmm2, %xmm3
- palignr $10, %xmm1, %xmm2
-
- movaps %xmm2, (%edx)
- movaps %xmm3, 16(%edx)
- lea 32(%edx, %ecx), %edx
- lea 32(%eax, %ecx), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(sh_10_no_prefetch):
- lea -32(%ecx), %ecx
- lea -10(%eax), %eax
- xor %edi, %edi
-
- .p2align 4
-L(sh_10_no_prefetch_loop):
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $10, %xmm2, %xmm3
- palignr $10, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jb L(sh_10_end_no_prefetch_loop)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $10, %xmm2, %xmm3
- palignr $10, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jae L(sh_10_no_prefetch_loop)
-
-L(sh_10_end_no_prefetch_loop):
- lea 32(%ecx), %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 10(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(shl_11):
-#ifndef USE_AS_MEMMOVE
- movaps -11(%eax), %xmm1
-#else
- movl DEST+4(%esp), %edi
- movaps -11(%eax), %xmm1
- movdqu %xmm0, (%edi)
-#endif
-#ifdef DATA_CACHE_SIZE_HALF
- cmp $DATA_CACHE_SIZE_HALF, %ecx
-#else
-# if (defined SHARED || defined __PIC__)
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
- cmp __x86_data_cache_size_half, %ecx
-# endif
-#endif
- jb L(sh_11_no_prefetch)
-
- lea -64(%ecx), %ecx
-
- .p2align 4
-L(Shl11LoopStart):
- prefetcht0 0x1c0(%eax)
- prefetcht0 0x1c0(%edx)
- movaps 5(%eax), %xmm2
- movaps 21(%eax), %xmm3
- movaps 37(%eax), %xmm4
- movaps 53(%eax), %xmm5
- movaps %xmm5, %xmm7
- palignr $11, %xmm4, %xmm5
- palignr $11, %xmm3, %xmm4
- movaps %xmm5, 48(%edx)
- palignr $11, %xmm2, %xmm3
- lea 64(%eax), %eax
- palignr $11, %xmm1, %xmm2
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm7, %xmm1
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- sub $64, %ecx
- ja L(Shl11LoopStart)
-
-L(Shl11LoopLeave):
- add $32, %ecx
- jle L(shl_end_0)
-
- movaps 5(%eax), %xmm2
- movaps 21(%eax), %xmm3
- palignr $11, %xmm2, %xmm3
- palignr $11, %xmm1, %xmm2
-
- movaps %xmm2, (%edx)
- movaps %xmm3, 16(%edx)
- lea 32(%edx, %ecx), %edx
- lea 32(%eax, %ecx), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(sh_11_no_prefetch):
- lea -32(%ecx), %ecx
- lea -11(%eax), %eax
- xor %edi, %edi
-
- .p2align 4
-L(sh_11_no_prefetch_loop):
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $11, %xmm2, %xmm3
- palignr $11, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jb L(sh_11_end_no_prefetch_loop)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $11, %xmm2, %xmm3
- palignr $11, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jae L(sh_11_no_prefetch_loop)
-
-L(sh_11_end_no_prefetch_loop):
- lea 32(%ecx), %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 11(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(shl_12):
-#ifndef USE_AS_MEMMOVE
- movaps -12(%eax), %xmm1
-#else
- movl DEST+4(%esp), %edi
- movaps -12(%eax), %xmm1
- movdqu %xmm0, (%edi)
-#endif
-#ifdef DATA_CACHE_SIZE_HALF
- cmp $DATA_CACHE_SIZE_HALF, %ecx
-#else
-# if (defined SHARED || defined __PIC__)
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
- cmp __x86_data_cache_size_half, %ecx
-# endif
-#endif
- jb L(sh_12_no_prefetch)
-
- lea -64(%ecx), %ecx
-
- .p2align 4
-L(Shl12LoopStart):
- prefetcht0 0x1c0(%eax)
- prefetcht0 0x1c0(%edx)
- movaps 4(%eax), %xmm2
- movaps 20(%eax), %xmm3
- movaps 36(%eax), %xmm4
- movaps 52(%eax), %xmm5
- movaps %xmm5, %xmm7
- palignr $12, %xmm4, %xmm5
- palignr $12, %xmm3, %xmm4
- movaps %xmm5, 48(%edx)
- palignr $12, %xmm2, %xmm3
- lea 64(%eax), %eax
- palignr $12, %xmm1, %xmm2
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm7, %xmm1
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- sub $64, %ecx
- ja L(Shl12LoopStart)
-
-L(Shl12LoopLeave):
- add $32, %ecx
- jle L(shl_end_0)
-
- movaps 4(%eax), %xmm2
- movaps 20(%eax), %xmm3
- palignr $12, %xmm2, %xmm3
- palignr $12, %xmm1, %xmm2
-
- movaps %xmm2, (%edx)
- movaps %xmm3, 16(%edx)
- lea 32(%edx, %ecx), %edx
- lea 32(%eax, %ecx), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(sh_12_no_prefetch):
- lea -32(%ecx), %ecx
- lea -12(%eax), %eax
- xor %edi, %edi
-
- .p2align 4
-L(sh_12_no_prefetch_loop):
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $12, %xmm2, %xmm3
- palignr $12, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jb L(sh_12_end_no_prefetch_loop)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $12, %xmm2, %xmm3
- palignr $12, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jae L(sh_12_no_prefetch_loop)
-
-L(sh_12_end_no_prefetch_loop):
- lea 32(%ecx), %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 12(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(shl_13):
-#ifndef USE_AS_MEMMOVE
- movaps -13(%eax), %xmm1
-#else
- movl DEST+4(%esp), %edi
- movaps -13(%eax), %xmm1
- movdqu %xmm0, (%edi)
-#endif
-#ifdef DATA_CACHE_SIZE_HALF
- cmp $DATA_CACHE_SIZE_HALF, %ecx
-#else
-# if (defined SHARED || defined __PIC__)
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
- cmp __x86_data_cache_size_half, %ecx
-# endif
-#endif
- jb L(sh_13_no_prefetch)
-
- lea -64(%ecx), %ecx
-
- .p2align 4
-L(Shl13LoopStart):
- prefetcht0 0x1c0(%eax)
- prefetcht0 0x1c0(%edx)
- movaps 3(%eax), %xmm2
- movaps 19(%eax), %xmm3
- movaps 35(%eax), %xmm4
- movaps 51(%eax), %xmm5
- movaps %xmm5, %xmm7
- palignr $13, %xmm4, %xmm5
- palignr $13, %xmm3, %xmm4
- movaps %xmm5, 48(%edx)
- palignr $13, %xmm2, %xmm3
- lea 64(%eax), %eax
- palignr $13, %xmm1, %xmm2
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm7, %xmm1
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- sub $64, %ecx
- ja L(Shl13LoopStart)
-
-L(Shl13LoopLeave):
- add $32, %ecx
- jle L(shl_end_0)
-
- movaps 3(%eax), %xmm2
- movaps 19(%eax), %xmm3
- palignr $13, %xmm2, %xmm3
- palignr $13, %xmm1, %xmm2
-
- movaps %xmm2, (%edx)
- movaps %xmm3, 16(%edx)
- lea 32(%edx, %ecx), %edx
- lea 32(%eax, %ecx), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(sh_13_no_prefetch):
- lea -32(%ecx), %ecx
- lea -13(%eax), %eax
- xor %edi, %edi
-
- .p2align 4
-L(sh_13_no_prefetch_loop):
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $13, %xmm2, %xmm3
- palignr $13, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jb L(sh_13_end_no_prefetch_loop)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $13, %xmm2, %xmm3
- palignr $13, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jae L(sh_13_no_prefetch_loop)
-
-L(sh_13_end_no_prefetch_loop):
- lea 32(%ecx), %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 13(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(shl_14):
-#ifndef USE_AS_MEMMOVE
- movaps -14(%eax), %xmm1
-#else
- movl DEST+4(%esp), %edi
- movaps -14(%eax), %xmm1
- movdqu %xmm0, (%edi)
-#endif
-#ifdef DATA_CACHE_SIZE_HALF
- cmp $DATA_CACHE_SIZE_HALF, %ecx
-#else
-# if (defined SHARED || defined __PIC__)
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
- cmp __x86_data_cache_size_half, %ecx
-# endif
-#endif
- jb L(sh_14_no_prefetch)
-
- lea -64(%ecx), %ecx
-
- .p2align 4
-L(Shl14LoopStart):
- prefetcht0 0x1c0(%eax)
- prefetcht0 0x1c0(%edx)
- movaps 2(%eax), %xmm2
- movaps 18(%eax), %xmm3
- movaps 34(%eax), %xmm4
- movaps 50(%eax), %xmm5
- movaps %xmm5, %xmm7
- palignr $14, %xmm4, %xmm5
- palignr $14, %xmm3, %xmm4
- movaps %xmm5, 48(%edx)
- palignr $14, %xmm2, %xmm3
- lea 64(%eax), %eax
- palignr $14, %xmm1, %xmm2
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm7, %xmm1
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- sub $64, %ecx
- ja L(Shl14LoopStart)
-
-L(Shl14LoopLeave):
- add $32, %ecx
- jle L(shl_end_0)
-
- movaps 2(%eax), %xmm2
- movaps 18(%eax), %xmm3
- palignr $14, %xmm2, %xmm3
- palignr $14, %xmm1, %xmm2
-
- movaps %xmm2, (%edx)
- movaps %xmm3, 16(%edx)
- lea 32(%edx, %ecx), %edx
- lea 32(%eax, %ecx), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(sh_14_no_prefetch):
- lea -32(%ecx), %ecx
- lea -14(%eax), %eax
- xor %edi, %edi
-
- .p2align 4
-L(sh_14_no_prefetch_loop):
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $14, %xmm2, %xmm3
- palignr $14, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jb L(sh_14_end_no_prefetch_loop)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $14, %xmm2, %xmm3
- palignr $14, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jae L(sh_14_no_prefetch_loop)
-
-L(sh_14_end_no_prefetch_loop):
- lea 32(%ecx), %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 14(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(shl_15):
-#ifndef USE_AS_MEMMOVE
- movaps -15(%eax), %xmm1
-#else
- movl DEST+4(%esp), %edi
- movaps -15(%eax), %xmm1
- movdqu %xmm0, (%edi)
-#endif
-#ifdef DATA_CACHE_SIZE_HALF
- cmp $DATA_CACHE_SIZE_HALF, %ecx
-#else
-# if (defined SHARED || defined __PIC__)
- SETUP_PIC_REG(bx)
- add $_GLOBAL_OFFSET_TABLE_, %ebx
- cmp __x86_data_cache_size_half@GOTOFF(%ebx), %ecx
-# else
- cmp __x86_data_cache_size_half, %ecx
-# endif
-#endif
- jb L(sh_15_no_prefetch)
-
- lea -64(%ecx), %ecx
-
- .p2align 4
-L(Shl15LoopStart):
- prefetcht0 0x1c0(%eax)
- prefetcht0 0x1c0(%edx)
- movaps 1(%eax), %xmm2
- movaps 17(%eax), %xmm3
- movaps 33(%eax), %xmm4
- movaps 49(%eax), %xmm5
- movaps %xmm5, %xmm7
- palignr $15, %xmm4, %xmm5
- palignr $15, %xmm3, %xmm4
- movaps %xmm5, 48(%edx)
- palignr $15, %xmm2, %xmm3
- lea 64(%eax), %eax
- palignr $15, %xmm1, %xmm2
- movaps %xmm4, 32(%edx)
- movaps %xmm3, 16(%edx)
- movaps %xmm7, %xmm1
- movaps %xmm2, (%edx)
- lea 64(%edx), %edx
- sub $64, %ecx
- ja L(Shl15LoopStart)
-
-L(Shl15LoopLeave):
- add $32, %ecx
- jle L(shl_end_0)
-
- movaps 1(%eax), %xmm2
- movaps 17(%eax), %xmm3
- palignr $15, %xmm2, %xmm3
- palignr $15, %xmm1, %xmm2
-
- movaps %xmm2, (%edx)
- movaps %xmm3, 16(%edx)
- lea 32(%edx, %ecx), %edx
- lea 32(%eax, %ecx), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(sh_15_no_prefetch):
- lea -32(%ecx), %ecx
- lea -15(%eax), %eax
- xor %edi, %edi
-
- .p2align 4
-L(sh_15_no_prefetch_loop):
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm4
- palignr $15, %xmm2, %xmm3
- palignr $15, %xmm1, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jb L(sh_15_end_no_prefetch_loop)
-
- movdqa 16(%eax, %edi), %xmm2
- sub $32, %ecx
- movdqa 32(%eax, %edi), %xmm3
- movdqa %xmm3, %xmm1
- palignr $15, %xmm2, %xmm3
- palignr $15, %xmm4, %xmm2
- lea 32(%edi), %edi
- movdqa %xmm2, -32(%edx, %edi)
- movdqa %xmm3, -16(%edx, %edi)
- jae L(sh_15_no_prefetch_loop)
-
-L(sh_15_end_no_prefetch_loop):
- lea 32(%ecx), %ecx
- add %ecx, %edi
- add %edi, %edx
- lea 15(%edi, %eax), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(shl_end_0):
- lea 32(%ecx), %ecx
- lea (%edx, %ecx), %edx
- lea (%eax, %ecx), %eax
- POP (%edi)
- BRANCH_TO_JMPTBL_ENTRY(L(table_48bytes_fwd), %ecx, 4)
-
- .p2align 4
-L(fwd_write_44bytes):
- movq -44(%eax), %xmm0
- movq %xmm0, -44(%edx)
-L(fwd_write_36bytes):
- movq -36(%eax), %xmm0
- movq %xmm0, -36(%edx)
-L(fwd_write_28bytes):
- movq -28(%eax), %xmm0
- movq %xmm0, -28(%edx)
-L(fwd_write_20bytes):
- movq -20(%eax), %xmm0
- movq %xmm0, -20(%edx)
-L(fwd_write_12bytes):
- movq -12(%eax), %xmm0
- movq %xmm0, -12(%edx)
-L(fwd_write_4bytes):
- movl -4(%eax), %ecx
- movl %ecx, -4(%edx)
-#ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-#else
- movl DEST(%esp), %eax
-#endif
- RETURN
-
- .p2align 4
-L(fwd_write_40bytes):
- movq -40(%eax), %xmm0
- movq %xmm0, -40(%edx)
-L(fwd_write_32bytes):
- movq -32(%eax), %xmm0
- movq %xmm0, -32(%edx)
-L(fwd_write_24bytes):
- movq -24(%eax), %xmm0
- movq %xmm0, -24(%edx)
-L(fwd_write_16bytes):
- movq -16(%eax), %xmm0
- movq %xmm0, -16(%edx)
-L(fwd_write_8bytes):
- movq -8(%eax), %xmm0
- movq %xmm0, -8(%edx)
-L(fwd_write_0bytes):
-#ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-#else
- movl DEST(%esp), %eax
-#endif
- RETURN
-
- .p2align 4
-L(fwd_write_5bytes):
- movl -5(%eax), %ecx
- movl -4(%eax), %eax
- movl %ecx, -5(%edx)
- movl %eax, -4(%edx)
-#ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-#else
- movl DEST(%esp), %eax
-#endif
- RETURN
-
- .p2align 4
-L(fwd_write_45bytes):
- movq -45(%eax), %xmm0
- movq %xmm0, -45(%edx)
-L(fwd_write_37bytes):
- movq -37(%eax), %xmm0
- movq %xmm0, -37(%edx)
-L(fwd_write_29bytes):
- movq -29(%eax), %xmm0
- movq %xmm0, -29(%edx)
-L(fwd_write_21bytes):
- movq -21(%eax), %xmm0
- movq %xmm0, -21(%edx)
-L(fwd_write_13bytes):
- movq -13(%eax), %xmm0
- movq %xmm0, -13(%edx)
- movl -5(%eax), %ecx
- movl %ecx, -5(%edx)
- movzbl -1(%eax), %ecx
- movb %cl, -1(%edx)
-#ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-#else
- movl DEST(%esp), %eax
-#endif
- RETURN
-
- .p2align 4
-L(fwd_write_41bytes):
- movq -41(%eax), %xmm0
- movq %xmm0, -41(%edx)
-L(fwd_write_33bytes):
- movq -33(%eax), %xmm0
- movq %xmm0, -33(%edx)
-L(fwd_write_25bytes):
- movq -25(%eax), %xmm0
- movq %xmm0, -25(%edx)
-L(fwd_write_17bytes):
- movq -17(%eax), %xmm0
- movq %xmm0, -17(%edx)
-L(fwd_write_9bytes):
- movq -9(%eax), %xmm0
- movq %xmm0, -9(%edx)
-L(fwd_write_1bytes):
- movzbl -1(%eax), %ecx
- movb %cl, -1(%edx)
-#ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-#else
- movl DEST(%esp), %eax
-#endif
- RETURN
-
- .p2align 4
-L(fwd_write_46bytes):
- movq -46(%eax), %xmm0
- movq %xmm0, -46(%edx)
-L(fwd_write_38bytes):
- movq -38(%eax), %xmm0
- movq %xmm0, -38(%edx)
-L(fwd_write_30bytes):
- movq -30(%eax), %xmm0
- movq %xmm0, -30(%edx)
-L(fwd_write_22bytes):
- movq -22(%eax), %xmm0
- movq %xmm0, -22(%edx)
-L(fwd_write_14bytes):
- movq -14(%eax), %xmm0
- movq %xmm0, -14(%edx)
-L(fwd_write_6bytes):
- movl -6(%eax), %ecx
- movl %ecx, -6(%edx)
- movzwl -2(%eax), %ecx
- movw %cx, -2(%edx)
-#ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-#else
- movl DEST(%esp), %eax
-#endif
- RETURN
-
- .p2align 4
-L(fwd_write_42bytes):
- movq -42(%eax), %xmm0
- movq %xmm0, -42(%edx)
-L(fwd_write_34bytes):
- movq -34(%eax), %xmm0
- movq %xmm0, -34(%edx)
-L(fwd_write_26bytes):
- movq -26(%eax), %xmm0
- movq %xmm0, -26(%edx)
-L(fwd_write_18bytes):
- movq -18(%eax), %xmm0
- movq %xmm0, -18(%edx)
-L(fwd_write_10bytes):
- movq -10(%eax), %xmm0
- movq %xmm0, -10(%edx)
-L(fwd_write_2bytes):
- movzwl -2(%eax), %ecx
- movw %cx, -2(%edx)
-#ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-#else
- movl DEST(%esp), %eax
-#endif
- RETURN
-
- .p2align 4
-L(fwd_write_47bytes):
- movq -47(%eax), %xmm0
- movq %xmm0, -47(%edx)
-L(fwd_write_39bytes):
- movq -39(%eax), %xmm0
- movq %xmm0, -39(%edx)
-L(fwd_write_31bytes):
- movq -31(%eax), %xmm0
- movq %xmm0, -31(%edx)
-L(fwd_write_23bytes):
- movq -23(%eax), %xmm0
- movq %xmm0, -23(%edx)
-L(fwd_write_15bytes):
- movq -15(%eax), %xmm0
- movq %xmm0, -15(%edx)
-L(fwd_write_7bytes):
- movl -7(%eax), %ecx
- movl %ecx, -7(%edx)
- movzwl -3(%eax), %ecx
- movzbl -1(%eax), %eax
- movw %cx, -3(%edx)
- movb %al, -1(%edx)
-#ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-#else
- movl DEST(%esp), %eax
-#endif
- RETURN
-
- .p2align 4
-L(fwd_write_43bytes):
- movq -43(%eax), %xmm0
- movq %xmm0, -43(%edx)
-L(fwd_write_35bytes):
- movq -35(%eax), %xmm0
- movq %xmm0, -35(%edx)
-L(fwd_write_27bytes):
- movq -27(%eax), %xmm0
- movq %xmm0, -27(%edx)
-L(fwd_write_19bytes):
- movq -19(%eax), %xmm0
- movq %xmm0, -19(%edx)
-L(fwd_write_11bytes):
- movq -11(%eax), %xmm0
- movq %xmm0, -11(%edx)
-L(fwd_write_3bytes):
- movzwl -3(%eax), %ecx
- movzbl -1(%eax), %eax
- movw %cx, -3(%edx)
- movb %al, -1(%edx)
-#ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-#else
- movl DEST(%esp), %eax
-#endif
- RETURN
-
- .p2align 4
-L(fwd_write_40bytes_align):
- movdqa -40(%eax), %xmm0
- movdqa %xmm0, -40(%edx)
-L(fwd_write_24bytes_align):
- movdqa -24(%eax), %xmm0
- movdqa %xmm0, -24(%edx)
-L(fwd_write_8bytes_align):
- movq -8(%eax), %xmm0
- movq %xmm0, -8(%edx)
-L(fwd_write_0bytes_align):
-#ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-#else
- movl DEST(%esp), %eax
-#endif
- RETURN
-
- .p2align 4
-L(fwd_write_32bytes_align):
- movdqa -32(%eax), %xmm0
- movdqa %xmm0, -32(%edx)
-L(fwd_write_16bytes_align):
- movdqa -16(%eax), %xmm0
- movdqa %xmm0, -16(%edx)
-#ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-#else
- movl DEST(%esp), %eax
-#endif
- RETURN
-
- .p2align 4
-L(fwd_write_5bytes_align):
- movl -5(%eax), %ecx
- movl -4(%eax), %eax
- movl %ecx, -5(%edx)
- movl %eax, -4(%edx)
-#ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-#else
- movl DEST(%esp), %eax
-#endif
- RETURN
-
- .p2align 4
-L(fwd_write_45bytes_align):
- movdqa -45(%eax), %xmm0
- movdqa %xmm0, -45(%edx)
-L(fwd_write_29bytes_align):
- movdqa -29(%eax), %xmm0
- movdqa %xmm0, -29(%edx)
-L(fwd_write_13bytes_align):
- movq -13(%eax), %xmm0
- movq %xmm0, -13(%edx)
- movl -5(%eax), %ecx
- movl %ecx, -5(%edx)
- movzbl -1(%eax), %ecx
- movb %cl, -1(%edx)
-#ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-#else
- movl DEST(%esp), %eax
-#endif
- RETURN
-
- .p2align 4
-L(fwd_write_37bytes_align):
- movdqa -37(%eax), %xmm0
- movdqa %xmm0, -37(%edx)
-L(fwd_write_21bytes_align):
- movdqa -21(%eax), %xmm0
- movdqa %xmm0, -21(%edx)
- movl -5(%eax), %ecx
- movl %ecx, -5(%edx)
- movzbl -1(%eax), %ecx
- movb %cl, -1(%edx)
-#ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-#else
- movl DEST(%esp), %eax
-#endif
- RETURN
-
- .p2align 4
-L(fwd_write_41bytes_align):
- movdqa -41(%eax), %xmm0
- movdqa %xmm0, -41(%edx)
-L(fwd_write_25bytes_align):
- movdqa -25(%eax), %xmm0
- movdqa %xmm0, -25(%edx)
-L(fwd_write_9bytes_align):
- movq -9(%eax), %xmm0
- movq %xmm0, -9(%edx)
-L(fwd_write_1bytes_align):
- movzbl -1(%eax), %ecx
- movb %cl, -1(%edx)
-#ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-#else
- movl DEST(%esp), %eax
-#endif
- RETURN
-
- .p2align 4
-L(fwd_write_33bytes_align):
- movdqa -33(%eax), %xmm0
- movdqa %xmm0, -33(%edx)
-L(fwd_write_17bytes_align):
- movdqa -17(%eax), %xmm0
- movdqa %xmm0, -17(%edx)
- movzbl -1(%eax), %ecx
- movb %cl, -1(%edx)
-#ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-#else
- movl DEST(%esp), %eax
-#endif
- RETURN
-
- .p2align 4
-L(fwd_write_46bytes_align):
- movdqa -46(%eax), %xmm0
- movdqa %xmm0, -46(%edx)
-L(fwd_write_30bytes_align):
- movdqa -30(%eax), %xmm0
- movdqa %xmm0, -30(%edx)
-L(fwd_write_14bytes_align):
- movq -14(%eax), %xmm0
- movq %xmm0, -14(%edx)
-L(fwd_write_6bytes_align):
- movl -6(%eax), %ecx
- movl %ecx, -6(%edx)
- movzwl -2(%eax), %ecx
- movw %cx, -2(%edx)
-#ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-#else
- movl DEST(%esp), %eax
-#endif
- RETURN
-
- .p2align 4
-L(fwd_write_38bytes_align):
- movdqa -38(%eax), %xmm0
- movdqa %xmm0, -38(%edx)
-L(fwd_write_22bytes_align):
- movdqa -22(%eax), %xmm0
- movdqa %xmm0, -22(%edx)
- movl -6(%eax), %ecx
- movl %ecx, -6(%edx)
- movzwl -2(%eax), %ecx
- movw %cx, -2(%edx)
-#ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-#else
- movl DEST(%esp), %eax
-#endif
- RETURN
-
- .p2align 4
-L(fwd_write_42bytes_align):
- movdqa -42(%eax), %xmm0
- movdqa %xmm0, -42(%edx)
-L(fwd_write_26bytes_align):
- movdqa -26(%eax), %xmm0
- movdqa %xmm0, -26(%edx)
-L(fwd_write_10bytes_align):
- movq -10(%eax), %xmm0
- movq %xmm0, -10(%edx)
-L(fwd_write_2bytes_align):
- movzwl -2(%eax), %ecx
- movw %cx, -2(%edx)
-#ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-#else
- movl DEST(%esp), %eax
-#endif
- RETURN
-
- .p2align 4
-L(fwd_write_34bytes_align):
- movdqa -34(%eax), %xmm0
- movdqa %xmm0, -34(%edx)
-L(fwd_write_18bytes_align):
- movdqa -18(%eax), %xmm0
- movdqa %xmm0, -18(%edx)
- movzwl -2(%eax), %ecx
- movw %cx, -2(%edx)
-#ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-#else
- movl DEST(%esp), %eax
-#endif
- RETURN
-
- .p2align 4
-L(fwd_write_47bytes_align):
- movdqa -47(%eax), %xmm0
- movdqa %xmm0, -47(%edx)
-L(fwd_write_31bytes_align):
- movdqa -31(%eax), %xmm0
- movdqa %xmm0, -31(%edx)
-L(fwd_write_15bytes_align):
- movq -15(%eax), %xmm0
- movq %xmm0, -15(%edx)
-L(fwd_write_7bytes_align):
- movl -7(%eax), %ecx
- movl %ecx, -7(%edx)
- movzwl -3(%eax), %ecx
- movzbl -1(%eax), %eax
- movw %cx, -3(%edx)
- movb %al, -1(%edx)
-#ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-#else
- movl DEST(%esp), %eax
-#endif
- RETURN
-
- .p2align 4
-L(fwd_write_39bytes_align):
- movdqa -39(%eax), %xmm0
- movdqa %xmm0, -39(%edx)
-L(fwd_write_23bytes_align):
- movdqa -23(%eax), %xmm0
- movdqa %xmm0, -23(%edx)
- movl -7(%eax), %ecx
- movl %ecx, -7(%edx)
- movzwl -3(%eax), %ecx
- movzbl -1(%eax), %eax
- movw %cx, -3(%edx)
- movb %al, -1(%edx)
-#ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-#else
- movl DEST(%esp), %eax
-#endif
- RETURN
-
- .p2align 4
-L(fwd_write_43bytes_align):
- movdqa -43(%eax), %xmm0
- movdqa %xmm0, -43(%edx)
-L(fwd_write_27bytes_align):
- movdqa -27(%eax), %xmm0
- movdqa %xmm0, -27(%edx)
-L(fwd_write_11bytes_align):
- movq -11(%eax), %xmm0
- movq %xmm0, -11(%edx)
-L(fwd_write_3bytes_align):
- movzwl -3(%eax), %ecx
- movzbl -1(%eax), %eax
- movw %cx, -3(%edx)
- movb %al, -1(%edx)
-#ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-#else
- movl DEST(%esp), %eax
-#endif
- RETURN
-
- .p2align 4
-L(fwd_write_35bytes_align):
- movdqa -35(%eax), %xmm0
- movdqa %xmm0, -35(%edx)
-L(fwd_write_19bytes_align):
- movdqa -19(%eax), %xmm0
- movdqa %xmm0, -19(%edx)
- movzwl -3(%eax), %ecx
- movzbl -1(%eax), %eax
- movw %cx, -3(%edx)
- movb %al, -1(%edx)
-#ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-#else
- movl DEST(%esp), %eax
-#endif
- RETURN
-
- .p2align 4
-L(fwd_write_44bytes_align):
- movdqa -44(%eax), %xmm0
- movdqa %xmm0, -44(%edx)
-L(fwd_write_28bytes_align):
- movdqa -28(%eax), %xmm0
- movdqa %xmm0, -28(%edx)
-L(fwd_write_12bytes_align):
- movq -12(%eax), %xmm0
- movq %xmm0, -12(%edx)
-L(fwd_write_4bytes_align):
- movl -4(%eax), %ecx
- movl %ecx, -4(%edx)
-#ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-#else
- movl DEST(%esp), %eax
-#endif
- RETURN
-
- .p2align 4
-L(fwd_write_36bytes_align):
- movdqa -36(%eax), %xmm0
- movdqa %xmm0, -36(%edx)
-L(fwd_write_20bytes_align):
- movdqa -20(%eax), %xmm0
- movdqa %xmm0, -20(%edx)
- movl -4(%eax), %ecx
- movl %ecx, -4(%edx)
-#ifdef USE_AS_MEMPCPY
- movl %edx, %eax
-#else
- movl DEST(%esp), %eax
-#endif
- RETURN_END
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(large_page):
- movdqu (%eax), %xmm1
-#ifdef USE_AS_MEMMOVE
- movl DEST+4(%esp), %edi
- movdqu %xmm0, (%edi)
-#endif
- lea 16(%eax), %eax
- movntdq %xmm1, (%edx)
- lea 16(%edx), %edx
- lea -0x90(%ecx), %ecx
- POP (%edi)
-
- .p2align 4
-L(large_page_loop):
- movdqu (%eax), %xmm0
- movdqu 0x10(%eax), %xmm1
- movdqu 0x20(%eax), %xmm2
- movdqu 0x30(%eax), %xmm3
- movdqu 0x40(%eax), %xmm4
- movdqu 0x50(%eax), %xmm5
- movdqu 0x60(%eax), %xmm6
- movdqu 0x70(%eax), %xmm7
- lea 0x80(%eax), %eax
-
- sub $0x80, %ecx
- movntdq %xmm0, (%edx)
- movntdq %xmm1, 0x10(%edx)
- movntdq %xmm2, 0x20(%edx)
- movntdq %xmm3, 0x30(%edx)
- movntdq %xmm4, 0x40(%edx)
- movntdq %xmm5, 0x50(%edx)
- movntdq %xmm6, 0x60(%edx)
- movntdq %xmm7, 0x70(%edx)
- lea 0x80(%edx), %edx
- jae L(large_page_loop)
- cmp $-0x40, %ecx
- lea 0x80(%ecx), %ecx
- jl L(large_page_less_64bytes)
-
- movdqu (%eax), %xmm0
- movdqu 0x10(%eax), %xmm1
- movdqu 0x20(%eax), %xmm2
- movdqu 0x30(%eax), %xmm3
- lea 0x40(%eax), %eax
-
- movntdq %xmm0, (%edx)
- movntdq %xmm1, 0x10(%edx)
- movntdq %xmm2, 0x20(%edx)
- movntdq %xmm3, 0x30(%edx)
- lea 0x40(%edx), %edx
- sub $0x40, %ecx
-L(large_page_less_64bytes):
- cmp $32, %ecx
- jb L(large_page_less_32bytes)
- movdqu (%eax), %xmm0
- movdqu 0x10(%eax), %xmm1
- lea 0x20(%eax), %eax
- movntdq %xmm0, (%edx)
- movntdq %xmm1, 0x10(%edx)
- lea 0x20(%edx), %edx
- sub $0x20, %ecx
-L(large_page_less_32bytes):
- add %ecx, %edx
- add %ecx, %eax
- sfence
- BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
-
- .p2align 4
-L(bk_write_44bytes):
- movq 36(%eax), %xmm0
- movq %xmm0, 36(%edx)
-L(bk_write_36bytes):
- movq 28(%eax), %xmm0
- movq %xmm0, 28(%edx)
-L(bk_write_28bytes):
- movq 20(%eax), %xmm0
- movq %xmm0, 20(%edx)
-L(bk_write_20bytes):
- movq 12(%eax), %xmm0
- movq %xmm0, 12(%edx)
-L(bk_write_12bytes):
- movq 4(%eax), %xmm0
- movq %xmm0, 4(%edx)
-L(bk_write_4bytes):
- movl (%eax), %ecx
- movl %ecx, (%edx)
-L(bk_write_0bytes):
- movl DEST(%esp), %eax
-#ifdef USE_AS_MEMPCPY
- movl LEN(%esp), %ecx
- add %ecx, %eax
-#endif
- RETURN
-
- .p2align 4
-L(bk_write_40bytes):
- movq 32(%eax), %xmm0
- movq %xmm0, 32(%edx)
-L(bk_write_32bytes):
- movq 24(%eax), %xmm0
- movq %xmm0, 24(%edx)
-L(bk_write_24bytes):
- movq 16(%eax), %xmm0
- movq %xmm0, 16(%edx)
-L(bk_write_16bytes):
- movq 8(%eax), %xmm0
- movq %xmm0, 8(%edx)
-L(bk_write_8bytes):
- movq (%eax), %xmm0
- movq %xmm0, (%edx)
- movl DEST(%esp), %eax
-#ifdef USE_AS_MEMPCPY
- movl LEN(%esp), %ecx
- add %ecx, %eax
-#endif
- RETURN
-
- .p2align 4
-L(bk_write_45bytes):
- movq 37(%eax), %xmm0
- movq %xmm0, 37(%edx)
-L(bk_write_37bytes):
- movq 29(%eax), %xmm0
- movq %xmm0, 29(%edx)
-L(bk_write_29bytes):
- movq 21(%eax), %xmm0
- movq %xmm0, 21(%edx)
-L(bk_write_21bytes):
- movq 13(%eax), %xmm0
- movq %xmm0, 13(%edx)
-L(bk_write_13bytes):
- movq 5(%eax), %xmm0
- movq %xmm0, 5(%edx)
-L(bk_write_5bytes):
- movl 1(%eax), %ecx
- movl %ecx, 1(%edx)
-L(bk_write_1bytes):
- movzbl (%eax), %ecx
- movb %cl, (%edx)
- movl DEST(%esp), %eax
-#ifdef USE_AS_MEMPCPY
- movl LEN(%esp), %ecx
- add %ecx, %eax
-#endif
- RETURN
-
- .p2align 4
-L(bk_write_41bytes):
- movq 33(%eax), %xmm0
- movq %xmm0, 33(%edx)
-L(bk_write_33bytes):
- movq 25(%eax), %xmm0
- movq %xmm0, 25(%edx)
-L(bk_write_25bytes):
- movq 17(%eax), %xmm0
- movq %xmm0, 17(%edx)
-L(bk_write_17bytes):
- movq 9(%eax), %xmm0
- movq %xmm0, 9(%edx)
-L(bk_write_9bytes):
- movq 1(%eax), %xmm0
- movq %xmm0, 1(%edx)
- movzbl (%eax), %ecx
- movb %cl, (%edx)
- movl DEST(%esp), %eax
-#ifdef USE_AS_MEMPCPY
- movl LEN(%esp), %ecx
- add %ecx, %eax
-#endif
- RETURN
-
- .p2align 4
-L(bk_write_46bytes):
- movq 38(%eax), %xmm0
- movq %xmm0, 38(%edx)
-L(bk_write_38bytes):
- movq 30(%eax), %xmm0
- movq %xmm0, 30(%edx)
-L(bk_write_30bytes):
- movq 22(%eax), %xmm0
- movq %xmm0, 22(%edx)
-L(bk_write_22bytes):
- movq 14(%eax), %xmm0
- movq %xmm0, 14(%edx)
-L(bk_write_14bytes):
- movq 6(%eax), %xmm0
- movq %xmm0, 6(%edx)
-L(bk_write_6bytes):
- movl 2(%eax), %ecx
- movl %ecx, 2(%edx)
- movzwl (%eax), %ecx
- movw %cx, (%edx)
- movl DEST(%esp), %eax
-#ifdef USE_AS_MEMPCPY
- movl LEN(%esp), %ecx
- add %ecx, %eax
-#endif
- RETURN
-
- .p2align 4
-L(bk_write_42bytes):
- movq 34(%eax), %xmm0
- movq %xmm0, 34(%edx)
-L(bk_write_34bytes):
- movq 26(%eax), %xmm0
- movq %xmm0, 26(%edx)
-L(bk_write_26bytes):
- movq 18(%eax), %xmm0
- movq %xmm0, 18(%edx)
-L(bk_write_18bytes):
- movq 10(%eax), %xmm0
- movq %xmm0, 10(%edx)
-L(bk_write_10bytes):
- movq 2(%eax), %xmm0
- movq %xmm0, 2(%edx)
-L(bk_write_2bytes):
- movzwl (%eax), %ecx
- movw %cx, (%edx)
- movl DEST(%esp), %eax
-#ifdef USE_AS_MEMPCPY
- movl LEN(%esp), %ecx
- add %ecx, %eax
-#endif
- RETURN
-
- .p2align 4
-L(bk_write_47bytes):
- movq 39(%eax), %xmm0
- movq %xmm0, 39(%edx)
-L(bk_write_39bytes):
- movq 31(%eax), %xmm0
- movq %xmm0, 31(%edx)
-L(bk_write_31bytes):
- movq 23(%eax), %xmm0
- movq %xmm0, 23(%edx)
-L(bk_write_23bytes):
- movq 15(%eax), %xmm0
- movq %xmm0, 15(%edx)
-L(bk_write_15bytes):
- movq 7(%eax), %xmm0
- movq %xmm0, 7(%edx)
-L(bk_write_7bytes):
- movl 3(%eax), %ecx
- movl %ecx, 3(%edx)
- movzwl 1(%eax), %ecx
- movw %cx, 1(%edx)
- movzbl (%eax), %eax
- movb %al, (%edx)
- movl DEST(%esp), %eax
-#ifdef USE_AS_MEMPCPY
- movl LEN(%esp), %ecx
- add %ecx, %eax
-#endif
- RETURN
-
- .p2align 4
-L(bk_write_43bytes):
- movq 35(%eax), %xmm0
- movq %xmm0, 35(%edx)
-L(bk_write_35bytes):
- movq 27(%eax), %xmm0
- movq %xmm0, 27(%edx)
-L(bk_write_27bytes):
- movq 19(%eax), %xmm0
- movq %xmm0, 19(%edx)
-L(bk_write_19bytes):
- movq 11(%eax), %xmm0
- movq %xmm0, 11(%edx)
-L(bk_write_11bytes):
- movq 3(%eax), %xmm0
- movq %xmm0, 3(%edx)
-L(bk_write_3bytes):
- movzwl 1(%eax), %ecx
- movw %cx, 1(%edx)
- movzbl (%eax), %eax
- movb %al, (%edx)
- movl DEST(%esp), %eax
-#ifdef USE_AS_MEMPCPY
- movl LEN(%esp), %ecx
- add %ecx, %eax
-#endif
- RETURN_END
-
-
- .pushsection .rodata.ssse3,"a",@progbits
- .p2align 2
-L(table_48bytes_fwd):
- .int JMPTBL (L(fwd_write_0bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_1bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_2bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_3bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_4bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_5bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_6bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_7bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_8bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_9bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_10bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_11bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_12bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_13bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_14bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_15bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_16bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_17bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_18bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_19bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_20bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_21bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_22bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_23bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_24bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_25bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_26bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_27bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_28bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_29bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_30bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_31bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_32bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_33bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_34bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_35bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_36bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_37bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_38bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_39bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_40bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_41bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_42bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_43bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_44bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_45bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_46bytes), L(table_48bytes_fwd))
- .int JMPTBL (L(fwd_write_47bytes), L(table_48bytes_fwd))
-
- .p2align 2
-L(table_48bytes_fwd_align):
- .int JMPTBL (L(fwd_write_0bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_1bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_2bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_3bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_4bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_5bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_6bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_7bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_8bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_9bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_10bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_11bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_12bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_13bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_14bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_15bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_16bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_17bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_18bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_19bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_20bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_21bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_22bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_23bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_24bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_25bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_26bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_27bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_28bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_29bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_30bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_31bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_32bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_33bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_34bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_35bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_36bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_37bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_38bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_39bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_40bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_41bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_42bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_43bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_44bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_45bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_46bytes_align), L(table_48bytes_fwd_align))
- .int JMPTBL (L(fwd_write_47bytes_align), L(table_48bytes_fwd_align))
-
- .p2align 2
-L(shl_table):
- .int JMPTBL (L(shl_0), L(shl_table))
- .int JMPTBL (L(shl_1), L(shl_table))
- .int JMPTBL (L(shl_2), L(shl_table))
- .int JMPTBL (L(shl_3), L(shl_table))
- .int JMPTBL (L(shl_4), L(shl_table))
- .int JMPTBL (L(shl_5), L(shl_table))
- .int JMPTBL (L(shl_6), L(shl_table))
- .int JMPTBL (L(shl_7), L(shl_table))
- .int JMPTBL (L(shl_8), L(shl_table))
- .int JMPTBL (L(shl_9), L(shl_table))
- .int JMPTBL (L(shl_10), L(shl_table))
- .int JMPTBL (L(shl_11), L(shl_table))
- .int JMPTBL (L(shl_12), L(shl_table))
- .int JMPTBL (L(shl_13), L(shl_table))
- .int JMPTBL (L(shl_14), L(shl_table))
- .int JMPTBL (L(shl_15), L(shl_table))
-
- .p2align 2
-L(table_48_bytes_bwd):
- .int JMPTBL (L(bk_write_0bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_1bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_2bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_3bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_4bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_5bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_6bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_7bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_8bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_9bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_10bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_11bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_12bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_13bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_14bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_15bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_16bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_17bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_18bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_19bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_20bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_21bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_22bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_23bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_24bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_25bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_26bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_27bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_28bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_29bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_30bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_31bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_32bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_33bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_34bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_35bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_36bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_37bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_38bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_39bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_40bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_41bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_42bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_43bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_44bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_45bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_46bytes), L(table_48_bytes_bwd))
- .int JMPTBL (L(bk_write_47bytes), L(table_48_bytes_bwd))
-
- .popsection
-
-#ifdef USE_AS_MEMMOVE
- .p2align 4
-L(copy_backward):
- PUSH (%edi)
- movl %eax, %edi
- lea (%ecx,%edx,1),%edx
- lea (%ecx,%edi,1),%edi
- testl $0x3, %edx
- jnz L(bk_align)
-
-L(bk_aligned_4):
- cmp $64, %ecx
- jae L(bk_write_more64bytes)
-
-L(bk_write_64bytesless):
- cmp $32, %ecx
- jb L(bk_write_less32bytes)
-
-L(bk_write_more32bytes):
- /* Copy 32 bytes at a time. */
- sub $32, %ecx
- movq -8(%edi), %xmm0
- movq %xmm0, -8(%edx)
- movq -16(%edi), %xmm0
- movq %xmm0, -16(%edx)
- movq -24(%edi), %xmm0
- movq %xmm0, -24(%edx)
- movq -32(%edi), %xmm0
- movq %xmm0, -32(%edx)
- sub $32, %edx
- sub $32, %edi
-
-L(bk_write_less32bytes):
- movl %edi, %eax
- sub %ecx, %edx
- sub %ecx, %eax
- POP (%edi)
-L(bk_write_less32bytes_2):
- BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4)
-
- CFI_PUSH (%edi)
-
- .p2align 4
-L(bk_align):
- cmp $8, %ecx
- jbe L(bk_write_less32bytes)
- testl $1, %edx
- /* We get here only if (EDX & 3 ) != 0 so if (EDX & 1) ==0,
- then (EDX & 2) must be != 0. */
- jz L(bk_got2)
- sub $1, %edi
- sub $1, %ecx
- sub $1, %edx
- movzbl (%edi), %eax
- movb %al, (%edx)
-
- testl $2, %edx
- jz L(bk_aligned_4)
-
-L(bk_got2):
- sub $2, %edi
- sub $2, %ecx
- sub $2, %edx
- movzwl (%edi), %eax
- movw %ax, (%edx)
- jmp L(bk_aligned_4)
-
- .p2align 4
-L(bk_write_more64bytes):
- /* Check alignment of last byte. */
- testl $15, %edx
- jz L(bk_ssse3_cpy_pre)
-
-/* EDX is aligned 4 bytes, but not 16 bytes. */
-L(bk_ssse3_align):
- sub $4, %edi
- sub $4, %ecx
- sub $4, %edx
- movl (%edi), %eax
- movl %eax, (%edx)
-
- testl $15, %edx
- jz L(bk_ssse3_cpy_pre)
-
- sub $4, %edi
- sub $4, %ecx
- sub $4, %edx
- movl (%edi), %eax
- movl %eax, (%edx)
-
- testl $15, %edx
- jz L(bk_ssse3_cpy_pre)
-
- sub $4, %edi
- sub $4, %ecx
- sub $4, %edx
- movl (%edi), %eax
- movl %eax, (%edx)
-
-L(bk_ssse3_cpy_pre):
- cmp $64, %ecx
- jb L(bk_write_more32bytes)
-
- .p2align 4
-L(bk_ssse3_cpy):
- sub $64, %edi
- sub $64, %ecx
- sub $64, %edx
- movdqu 0x30(%edi), %xmm3
- movdqa %xmm3, 0x30(%edx)
- movdqu 0x20(%edi), %xmm2
- movdqa %xmm2, 0x20(%edx)
- movdqu 0x10(%edi), %xmm1
- movdqa %xmm1, 0x10(%edx)
- movdqu (%edi), %xmm0
- movdqa %xmm0, (%edx)
- cmp $64, %ecx
- jae L(bk_ssse3_cpy)
- jmp L(bk_write_64bytesless)
-
-#endif
-
-END (MEMCPY)
diff --git a/libc/arch-x86/string/ssse3-memmove-atom.S b/libc/arch-x86/string/ssse3-memmove-atom.S
deleted file mode 100644
index 3572eac..0000000
--- a/libc/arch-x86/string/ssse3-memmove-atom.S
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
-Copyright (c) 2010, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
-
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
-
- * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-
-#define MEMCPY memmove_atom
-#define USE_AS_MEMMOVE
-#include "ssse3-memcpy-atom.S"
diff --git a/libc/arch-x86/string/ssse3-strncpy-atom.S b/libc/arch-x86/string/ssse3-strncpy-atom.S
deleted file mode 100644
index 0c27ffe..0000000
--- a/libc/arch-x86/string/ssse3-strncpy-atom.S
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
-Copyright (c) 2011, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
-
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
-
- * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#define USE_AS_STRNCPY
-#define STRCPY strncpy_atom
-#include "ssse3-strcpy-atom.S"
diff --git a/libc/include/bits/fortify/string.h b/libc/include/bits/fortify/string.h
index 4d32b04..041967b 100644
--- a/libc/include/bits/fortify/string.h
+++ b/libc/include/bits/fortify/string.h
@@ -220,8 +220,13 @@
}
#if __BIONIC_FORTIFY_RUNTIME_CHECKS_ENABLED
-__BIONIC_FORTIFY_INLINE
-size_t strlen(const char* _Nonnull const s __pass_object_size0) __overloadable {
+/*
+ * Clang, when parsing C, can fold strlen to a constant without LLVM's help.
+ * This doesn't apply to overloads of strlen, so write this differently. We
+ * can't use `__pass_object_size0` here, but that's fine: it doesn't help much
+ * on __always_inline functions.
+ */
+extern __always_inline __inline__ __attribute__((gnu_inline)) size_t strlen(const char* _Nonnull s) {
return __strlen_chk(s, __bos0(s));
}
#endif
diff --git a/libc/include/unistd.h b/libc/include/unistd.h
index e1c268f..9bc01f0 100644
--- a/libc/include/unistd.h
+++ b/libc/include/unistd.h
@@ -257,8 +257,29 @@
int linkat(int __old_dir_fd, const char* _Nonnull __old_path, int __new_dir_fd, const char* _Nonnull __new_path, int __flags);
int unlink(const char* _Nonnull __path);
int unlinkat(int __dirfd, const char* _Nonnull __path, int __flags);
+
+/**
+ * [chdir(2)](https://man7.org/linux/man-pages/man2/chdir.2.html) changes
+ * the current working directory to the given path.
+ *
+ * This function affects all threads in the process, so is generally a bad idea
+ * on Android where most code will be running in a multi-threaded context.
+ *
+ * Returns 0 on success, and returns -1 and sets `errno` on failure.
+ */
int chdir(const char* _Nonnull __path);
+
+/**
+ * [fchdir(2)](https://man7.org/linux/man-pages/man2/chdir.2.html) changes
+ * the current working directory to the given fd.
+ *
+ * This function affects all threads in the process, so is generally a bad idea
+ * on Android where most code will be running in a multi-threaded context.
+ *
+ * Returns 0 on success, and returns -1 and sets `errno` on failure.
+ */
int fchdir(int __fd);
+
int rmdir(const char* _Nonnull __path);
int pipe(int __fds[_Nonnull 2]);
#if defined(__USE_GNU)
diff --git a/libc/kernel/tools/update_all.py b/libc/kernel/tools/update_all.py
index ae89a80..9e5ed42 100755
--- a/libc/kernel/tools/update_all.py
+++ b/libc/kernel/tools/update_all.py
@@ -92,7 +92,8 @@
'kernel/uapi/asm-arm/asm/unistd.h',
'kernel/uapi/asm-arm/asm/unistd-eabi.h',
'kernel/uapi/asm-arm/asm/unistd-oabi.h',
- 'kernel/uapi/asm-riscv/asm/unistd.h',
+ 'kernel/uapi/asm-riscv/asm/unistd_32.h',
+ 'kernel/uapi/asm-riscv/asm/unistd_64.h',
'kernel/uapi/asm-x86/asm/unistd_32.h',
'kernel/uapi/asm-x86/asm/unistd_64.h',
'kernel/uapi/asm-x86/asm/unistd_x32.h']:
diff --git a/linker/linker_phdr.cpp b/linker/linker_phdr.cpp
index 48206be..b7db4cd 100644
--- a/linker/linker_phdr.cpp
+++ b/linker/linker_phdr.cpp
@@ -976,7 +976,6 @@
ElfW(Addr) seg_start = phdr->p_vaddr + load_bias_;
ElfW(Addr) seg_end = seg_start + p_memsz;
- ElfW(Addr) seg_page_start = page_start(seg_start);
ElfW(Addr) seg_page_end = page_end(seg_end);
ElfW(Addr) seg_file_end = seg_start + p_filesz;
diff --git a/tests/Android.bp b/tests/Android.bp
index 6aecb40..0bd4a32 100644
--- a/tests/Android.bp
+++ b/tests/Android.bp
@@ -746,6 +746,40 @@
},
}
+cc_defaults {
+ name: "bionic_fortify_c_tests_defaults",
+ defaults: [
+ "bionic_clang_fortify_tests_w_flags",
+ "bionic_tests_defaults",
+ ],
+ cflags: [
+ "-U_FORTIFY_SOURCE",
+ // -fbuiltin is required here to counteract -fno-builtin from
+ // `bionic_tests_defaults`. With `-fno-builtin`, Clang won't
+ // const-evaluate calls to `strlen`, which is tested for here.
+ "-fbuiltin",
+ ],
+ srcs: [
+ "clang_fortify_c_only_tests.c",
+ ],
+ tidy: false,
+ shared: {
+ enabled: false,
+ },
+}
+
+cc_test_library {
+ name: "libfortify1-c-tests-clang",
+ defaults: ["bionic_fortify_c_tests_defaults"],
+ cflags: ["-D_FORTIFY_SOURCE=1"],
+}
+
+cc_test_library {
+ name: "libfortify2-c-tests-clang",
+ defaults: ["bionic_fortify_c_tests_defaults"],
+ cflags: ["-D_FORTIFY_SOURCE=2"],
+}
+
// -----------------------------------------------------------------------------
// Library of all tests (excluding the dynamic linker tests).
// -----------------------------------------------------------------------------
@@ -757,8 +791,10 @@
"libBionicStandardTests",
"libBionicElfTlsTests",
"libBionicFramePointerTests",
+ "libfortify1-c-tests-clang",
"libfortify1-tests-clang",
"libfortify1-new-tests-clang",
+ "libfortify2-c-tests-clang",
"libfortify2-tests-clang",
"libfortify2-new-tests-clang",
],
diff --git a/tests/clang_fortify_c_only_tests.c b/tests/clang_fortify_c_only_tests.c
new file mode 100644
index 0000000..3bec848
--- /dev/null
+++ b/tests/clang_fortify_c_only_tests.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <string.h>
+
+// This is a test specifically of bionic's FORTIFY machinery. Other stdlibs need not apply.
+#ifdef __BIONIC__
+
+// Ensure that strlen can be evaluated at compile-time. Clang doesn't support
+// this in C++, but does in C.
+_Static_assert(strlen("foo") == 3, "");
+
+#endif // __BIONIC__
diff --git a/tests/clang_fortify_tests.cpp b/tests/clang_fortify_tests.cpp
index f08fd1f..105c261 100644
--- a/tests/clang_fortify_tests.cpp
+++ b/tests/clang_fortify_tests.cpp
@@ -89,6 +89,8 @@
#include <unistd.h>
#include <wchar.h>
+#include <array>
+
#ifndef COMPILATION_TESTS
#include <android-base/silent_death_test.h>
#include <gtest/gtest.h>
@@ -133,6 +135,25 @@
const static int kBogusFD = -1;
+FORTIFY_TEST(strlen) {
+ auto run_strlen_with_contents = [&](std::array<char, 3> contents) {
+ // A lot of cruft is necessary to make this test DTRT. LLVM and Clang love to fold/optimize
+ // strlen calls, and that's the opposite of what we want to happen.
+
+ // Loop to convince LLVM that `contents` can never be known (since `xor volatile_value` can flip
+ // any bit in each elem of `contents`).
+ volatile char always_zero = 0;
+ for (char& c : contents) {
+ c ^= always_zero;
+ }
+ // Store in a volatile, so the strlen itself cannot be optimized out.
+ volatile size_t _strlen_result = strlen(&contents.front());
+ };
+
+ EXPECT_NO_DEATH(run_strlen_with_contents({'f', 'o', '\0'}));
+ EXPECT_FORTIFY_DEATH(run_strlen_with_contents({'f', 'o', 'o'}));
+}
+
FORTIFY_TEST(string) {
char small_buffer[8] = {};
diff --git a/tests/cpu_target_features_test.cpp b/tests/cpu_target_features_test.cpp
index 3458bca..d773772 100644
--- a/tests/cpu_target_features_test.cpp
+++ b/tests/cpu_target_features_test.cpp
@@ -54,15 +54,3 @@
GTEST_SKIP() << "Not targeting an aarch64 architecture.";
#endif
}
-
-TEST(cpu_target_features, has_expected_arm_compiler_values) {
-#if defined(__arm__)
- ExecTestHelper eth;
- char* const argv[] = {nullptr};
- const auto invocation = [&] { execvp("cpu-target-features", argv); };
- eth.Run(invocation, 0, "(^|\n)__ARM_FEATURE_AES=1($|\n)");
- eth.Run(invocation, 0, "(^|\n)__ARM_FEATURE_CRC32=1($|\n)");
-#else
- GTEST_SKIP() << "Not targeting an arm architecture.";
-#endif
-}