Merge "Revert "Add 64-bit slm optimized strlcpy and srlcat.""
diff --git a/libc/Android.bp b/libc/Android.bp
index 360aea3..102a095 100644
--- a/libc/Android.bp
+++ b/libc/Android.bp
@@ -624,8 +624,6 @@
"upstream-openbsd/lib/libc/string/stpncpy.c",
"upstream-openbsd/lib/libc/string/strcat.c",
"upstream-openbsd/lib/libc/string/strcpy.c",
- "upstream-openbsd/lib/libc/string/strlcat.c",
- "upstream-openbsd/lib/libc/string/strlcpy.c",
"upstream-openbsd/lib/libc/string/strncat.c",
"upstream-openbsd/lib/libc/string/strncmp.c",
"upstream-openbsd/lib/libc/string/strncpy.c",
@@ -1176,8 +1174,6 @@
"arch-x86_64/string/sse2-stpncpy-slm.S",
"arch-x86_64/string/sse2-strcat-slm.S",
"arch-x86_64/string/sse2-strcpy-slm.S",
- "arch-x86_64/string/sse2-strlcat-slm.S",
- "arch-x86_64/string/sse2-strlcpy-slm.S",
"arch-x86_64/string/sse2-strlen-slm.S",
"arch-x86_64/string/sse2-strncat-slm.S",
"arch-x86_64/string/sse2-strncpy-slm.S",
diff --git a/libc/arch-x86_64/string/sse2-strlcat-slm.S b/libc/arch-x86_64/string/sse2-strlcat-slm.S
deleted file mode 100644
index d79e8c1..0000000
--- a/libc/arch-x86_64/string/sse2-strlcat-slm.S
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
-Copyright (c) 2014, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
-
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
-
- * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#define USE_AS_STRLCAT
-
-#ifndef STRLCPY
-# define STRLCPY strlcat
-#endif
-
-#include "sse2-strlcpy-slm.S"
diff --git a/libc/arch-x86_64/string/sse2-strlcpy-slm.S b/libc/arch-x86_64/string/sse2-strlcpy-slm.S
deleted file mode 100755
index 9d4b52f..0000000
--- a/libc/arch-x86_64/string/sse2-strlcpy-slm.S
+++ /dev/null
@@ -1,1062 +0,0 @@
-/*
-Copyright (c) 2014, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
- * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
-
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials provided with the distribution.
-
- * Neither the name of Intel Corporation nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#ifndef L
-# define L(label) .L##label
-#endif
-
-#ifndef cfi_startproc
-# define cfi_startproc .cfi_startproc
-#endif
-
-#ifndef cfi_endproc
-# define cfi_endproc .cfi_endproc
-#endif
-
-#ifndef ENTRY
-# define ENTRY(name) \
- .type name, @function; \
- .globl name; \
- .p2align 4; \
-name: \
- cfi_startproc
-#endif
-
-#ifndef END
-# define END(name) \
- cfi_endproc; \
- .size name, .-name
-#endif
-
-
-#ifndef STRLCPY
-# define STRLCPY strlcpy
-#endif
-
-#define JMPTBL(I, B) I - B
-#define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
- lea TABLE(%rip), %r11; \
- movslq (%r11, INDEX, SCALE), %rcx; \
- lea (%r11, %rcx), %rcx; \
- jmp *%rcx
-
-#define RETURN \
- add %r9, %rax; \
- ret
-
-.text
-ENTRY (STRLCPY)
- xor %rax, %rax
- xor %r9, %r9
- mov %rdx, %r8
- cmp $0, %r8
- jz L(CalculateSrcLen)
-
-#ifdef USE_AS_STRLCAT
- xor %rcx, %rcx
- pxor %xmm0, %xmm0
-
- movdqu (%rdi), %xmm1
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %rdx
-
- cmp $17, %r8
- jb L(SizeEndCase1)
- test %rdx, %rdx
- jnz L(StringEndCase1)
-
- add $16, %rax
- movdqu 16(%rdi), %xmm1
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %rdx
-
- cmp $33, %r8
- jb L(SizeEndCase1)
- test %rdx, %rdx
- jnz L(StringEndCase1)
-
- mov %rdi, %rcx
- and $15, %rcx
- and $-16, %rdi
-
- add %rcx, %r8
- sub $16, %r8
-
-L(DstLenLoop):
- movdqa (%rdi, %rax), %xmm1
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %rdx
- sub $16, %r8
- jbe L(SizeEndCase2)
- test %rdx, %rdx
- jnz L(StringEndCase2)
- add $16, %rax
- jmp L(DstLenLoop)
-
-L(StringEndCase2):
- add $16, %r8
- bsf %rdx, %rdx
- sub %rdx, %r8
- add %rdx, %rax
- sub %rcx, %r9
- add %rax, %rdi
- jmp L(CopySrcString)
-
-L(SizeEndCase1):
- test %rdx, %rdx
- jz L(SizeEnd)
- bsf %rdx, %rdx
- add %rdx, %rax
- cmp %r8, %rax
- jb L(StringEnd)
-L(SizeEnd):
- mov %r8, %r9
- jmp L(CalculateSrcLenCase1)
-
-L(SizeEndCase2):
- add $16, %r8
- test %rdx, %rdx
- jz L(StringEndCase4)
- bsf %rdx, %rdx
- cmp %r8, %rdx
- jb L(StringEndCase3)
-L(StringEndCase4):
- add %r8, %rax
- sub %rcx, %rax
- mov %rax, %r9
- jmp L(CalculateSrcLenCase1)
-
-L(StringEndCase3):
- add %rdx, %rax
- sub %rcx, %r9
- add %rax, %rdi
- sub %rdx, %r8
- jmp L(CopySrcString)
-
-L(StringEndCase1):
- bsf %rdx, %rdx
- add %rdx, %rax
- sub %rcx, %rax
-L(StringEnd):
- add %rax, %rdi
- sub %rax, %r8
-#endif
-
- mov %rsi, %rcx
- and $63, %rcx
- cmp $32, %rcx
- jbe L(CopySrcString)
-
- and $-16, %rsi
- and $15, %rcx
- pxor %xmm0, %xmm0
- pxor %xmm1, %xmm1
-
- pcmpeqb (%rsi), %xmm1
- pmovmskb %xmm1, %rdx
- shr %cl, %rdx
- mov $16, %r10
- sub %rcx, %r10
- cmp %r10, %r8
- jbe L(CopyFrom1To16BytesTailCase2OrCase3)
- test %rdx, %rdx
- jnz L(CopyFrom1To16BytesTail)
-
- pcmpeqb 16(%rsi), %xmm0
- pmovmskb %xmm0, %rdx
- add $16, %r10
- cmp %r10, %r8
- jbe L(CopyFrom1To32BytesCase2OrCase3)
- test %rdx, %rdx
- jnz L(CopyFrom1To32Bytes)
-
- movdqu (%rsi, %rcx), %xmm1
- movdqu %xmm1, (%rdi)
-#ifdef USE_AS_STRLCAT
- add %rax, %r9
-#endif
- jmp L(LoopStart)
-
- .p2align 4
-L(CopySrcString):
-#ifdef USE_AS_STRLCAT
- add %rax, %r9
- xor %rax, %rax
-#endif
- pxor %xmm0, %xmm0
- movdqu (%rsi), %xmm1
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %rdx
-
- cmp $17, %r8
- jb L(CopyFrom1To16BytesTail1Case2OrCase3)
- test %rdx, %rdx
- jnz L(CopyFrom1To16BytesTail1)
-
- movdqu 16(%rsi), %xmm2
- pcmpeqb %xmm2, %xmm0
- movdqu %xmm1, (%rdi)
- pmovmskb %xmm0, %rdx
- add $16, %rax
-
- cmp $33, %r8
- jb L(CopyFrom1To32Bytes1Case2OrCase3)
- test %rdx, %rdx
- jnz L(CopyFrom1To32Bytes1)
-
- mov %rsi, %rcx
- and $15, %rcx
- and $-16, %rsi
-
-L(LoopStart):
- sub %rcx, %rdi
- add %rcx, %r8
- sub $16, %r8
- mov $16, %rax
-
-L(16Loop):
- movdqa (%rsi, %rax), %xmm1
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %rdx
- sub $16, %r8
- jbe L(CopyFrom1To16BytesCase2OrCase3)
- test %rdx, %rdx
- jnz L(CopyFrom1To16BytesXmmExit)
- movdqu %xmm1, (%rdi, %rax)
- add $16, %rax
- jmp L(16Loop)
-
-/*------End of main part with loops---------------------*/
-
-/* Case1 */
- .p2align 4
-L(CopyFrom1To16Bytes):
- add %rcx, %rdi
- add %rcx, %rsi
- bsf %rdx, %rdx
- add %rdx, %rax
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStringTailTable), %rdx, 4)
-
- .p2align 4
-L(CopyFrom1To16BytesTail):
- add %rcx, %rsi
- bsf %rdx, %rdx
- add %rdx, %rax
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStringTailTable), %rdx, 4)
-
- .p2align 4
-L(CopyFrom1To32Bytes1):
- add $16, %rsi
- add $16, %rdi
- sub $16, %r8
-L(CopyFrom1To16BytesTail1):
- bsf %rdx, %rdx
- add %rdx, %rax
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStringTailTable), %rdx, 4)
-
- .p2align 4
-L(CopyFrom1To32Bytes):
- bsf %rdx, %rdx
- add %rcx, %rsi
- add $16, %rdx
- sub %rcx, %rdx
- add %rdx, %rax
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStringTailTable), %rdx, 4)
-
- .p2align 4
-L(CopyFrom1To16BytesExit):
- add %rdx, %rax
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStringTailTable), %rdx, 4)
-
-/* Case2 */
-
- .p2align 4
-L(CopyFrom1To16BytesCase2):
- add $16, %r8
- add %rax, %rdi
- add %rax, %rsi
- bsf %rdx, %rdx
- sub %rcx, %rax
- cmp %r8, %rdx
- jb L(CopyFrom1To16BytesExit)
- add %r8, %rax
- BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %r8, 4)
-
- .p2align 4
-L(CopyFrom1To32BytesCase2):
- add %rcx, %rsi
- bsf %rdx, %rdx
- add $16, %rdx
- sub %rcx, %rdx
- cmp %r8, %rdx
- jb L(CopyFrom1To16BytesExit)
- add %r8, %rax
- BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %r8, 4)
-
-L(CopyFrom1To16BytesTailCase2):
- add %rcx, %rsi
- bsf %rdx, %rdx
- cmp %r8, %rdx
- jb L(CopyFrom1To16BytesExit)
- add %r8, %rax
- BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %r8, 4)
-
- .p2align 4
-L(CopyFrom1To16BytesTail1Case2):
- bsf %rdx, %rdx
- cmp %r8, %rdx
- jb L(CopyFrom1To16BytesExit)
- add %r8, %rax
- BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %r8, 4)
-
-/* Case2 or Case3, Case3 */
-
- .p2align 4
-L(CopyFrom1To16BytesCase2OrCase3):
- test %rdx, %rdx
- jnz L(CopyFrom1To16BytesCase2)
- add $16, %r8
- add %rax, %rdi
- add %rax, %rsi
- add %r8, %rax
- sub %rcx, %rax
- BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %r8, 4)
-
- .p2align 4
-L(CopyFrom1To32BytesCase2OrCase3):
- test %rdx, %rdx
- jnz L(CopyFrom1To32BytesCase2)
- add %rcx, %rsi
- add %r8, %rax
- BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %r8, 4)
-
- .p2align 4
-L(CopyFrom1To16BytesTailCase2OrCase3):
- test %rdx, %rdx
- jnz L(CopyFrom1To16BytesTailCase2)
- add %rcx, %rsi
- add %r8, %rax
- BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %r8, 4)
-
- .p2align 4
-L(CopyFrom1To32Bytes1Case2OrCase3):
- add $16, %rdi
- add $16, %rsi
- sub $16, %r8
-L(CopyFrom1To16BytesTail1Case2OrCase3):
- test %rdx, %rdx
- jnz L(CopyFrom1To16BytesTail1Case2)
- add %r8, %rax
- BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %r8, 4)
-
- .p2align 4
-L(CopyFrom1To16BytesXmmExit):
- bsf %rdx, %rdx
- add %rax, %rdi
- add %rax, %rsi
- add %rdx, %rax
- sub %rcx, %rax
- BRANCH_TO_JMPTBL_ENTRY (L(ExitStringTailTable), %rdx, 4)
-
-/*------------End labels regarding with copying 1-16 bytes--and 1-32 bytes----*/
-
-
- .p2align 4
-L(Exit0):
- RETURN
-
- .p2align 4
-L(Exit1):
- movb $0, (%rdi)
- jmp L(CalculateSrcLen)
-
- .p2align 4
-L(Exit2):
- movb (%rsi), %dh
- movb %dh, (%rdi)
- movb $0, 1(%rdi)
- jmp L(CalculateSrcLen)
-
- .p2align 4
-L(Exit3):
- movw (%rsi), %dx
- movw %dx, (%rdi)
- movb $0, 2(%rdi)
- jmp L(CalculateSrcLen)
-
- .p2align 4
-L(Exit4):
- movw (%rsi), %cx
- movb 2(%rsi), %dh
- movw %cx, (%rdi)
- movb %dh, 2(%rdi)
- movb $0, 3(%rdi)
- jmp L(CalculateSrcLen)
-
- .p2align 4
-L(Exit5):
- movl (%rsi), %edx
- movl %edx, (%rdi)
- movb $0, 4(%rdi)
- jmp L(CalculateSrcLen)
-
- .p2align 4
-L(Exit6):
- movl (%rsi), %ecx
- movb 4(%rsi), %dh
- movl %ecx, (%rdi)
- movb %dh, 4(%rdi)
- movb $0, 5(%rdi)
- jmp L(CalculateSrcLen)
-
- .p2align 4
-L(Exit7):
- movl (%rsi), %ecx
- movw 4(%rsi), %dx
- movl %ecx, (%rdi)
- movw %dx, 4(%rdi)
- movb $0, 6(%rdi)
- jmp L(CalculateSrcLen)
-
- .p2align 4
-L(Exit8):
- movl (%rsi), %ecx
- movl 3(%rsi), %edx
- movl %ecx, (%rdi)
- movl %edx, 3(%rdi)
- movb $0, 7(%rdi)
- jmp L(CalculateSrcLen)
-
- .p2align 4
-L(Exit9):
- movq (%rsi), %rdx
- movq %rdx, (%rdi)
- movb $0, 8(%rdi)
- jmp L(CalculateSrcLen)
-
- .p2align 4
-L(Exit10):
- movq (%rsi), %rcx
- movb 8(%rsi), %dh
- movq %rcx, (%rdi)
- movb %dh, 8(%rdi)
- movb $0, 9(%rdi)
- jmp L(CalculateSrcLen)
-
- .p2align 4
-L(Exit11):
- movq (%rsi), %rcx
- movw 8(%rsi), %dx
- movq %rcx, (%rdi)
- movw %dx, 8(%rdi)
- movb $0, 10(%rdi)
- jmp L(CalculateSrcLen)
-
- .p2align 4
-L(Exit12):
- movq (%rsi), %rcx
- movl 7(%rsi), %edx
- movq %rcx, (%rdi)
- movl %edx, 7(%rdi)
- movb $0, 11(%rdi)
- jmp L(CalculateSrcLen)
-
- .p2align 4
-L(Exit13):
- movq (%rsi), %rcx
- movl 8(%rsi), %edx
- movq %rcx, (%rdi)
- movl %edx, 8(%rdi)
- movb $0, 12(%rdi)
- jmp L(CalculateSrcLen)
-
- .p2align 4
-L(Exit14):
- movq (%rsi), %rcx
- movq 5(%rsi), %rdx
- movq %rcx, (%rdi)
- movq %rdx, 5(%rdi)
- movb $0, 13(%rdi)
- jmp L(CalculateSrcLen)
-
- .p2align 4
-L(Exit15):
- movq (%rsi), %rcx
- movq 6(%rsi), %rdx
- movq %rcx, (%rdi)
- movq %rdx, 6(%rdi)
- movb $0, 14(%rdi)
- jmp L(CalculateSrcLen)
-
- .p2align 4
-L(Exit16):
- movq (%rsi), %rcx
- movq 7(%rsi), %rdx
- movq %rcx, (%rdi)
- movq %rdx, 7(%rdi)
- movb $0, 15(%rdi)
- jmp L(CalculateSrcLen)
-
- .p2align 4
-L(Exit17):
- movdqu (%rsi), %xmm0
- movdqu %xmm0, (%rdi)
- movb $0, 16(%rdi)
- jmp L(CalculateSrcLen)
-
- .p2align 4
-L(Exit18):
- movdqu (%rsi), %xmm0
- movb 16(%rsi), %dh
- movdqu %xmm0, (%rdi)
- movb %dh, 16(%rdi)
- movb $0, 17(%rdi)
- jmp L(CalculateSrcLen)
-
- .p2align 4
-L(Exit19):
- movdqu (%rsi), %xmm0
- movw 16(%rsi), %cx
- movdqu %xmm0, (%rdi)
- movw %cx, 16(%rdi)
- movb $0, 18(%rdi)
- jmp L(CalculateSrcLen)
-
- .p2align 4
-L(Exit20):
- movdqu (%rsi), %xmm0
- movl 15(%rsi), %ecx
- movdqu %xmm0, (%rdi)
- movl %ecx, 15(%rdi)
- movb $0, 19(%rdi)
- jmp L(CalculateSrcLen)
-
- .p2align 4
-L(Exit21):
- movdqu (%rsi), %xmm0
- movl 16(%rsi), %ecx
- movdqu %xmm0, (%rdi)
- movl %ecx, 16(%rdi)
- movb $0, 20(%rdi)
- jmp L(CalculateSrcLen)
-
- .p2align 4
-L(Exit22):
- movdqu (%rsi), %xmm0
- movl 16(%rsi), %ecx
- movb 20(%rsi), %dh
- movdqu %xmm0, (%rdi)
- movl %ecx, 16(%rdi)
- movb %dh, 20(%rdi)
- movb $0, 21(%rdi)
- jmp L(CalculateSrcLen)
-
- .p2align 4
-L(Exit23):
- movdqu (%rsi), %xmm0
- movq 14(%rsi), %rcx
- movdqu %xmm0, (%rdi)
- movq %rcx, 14(%rdi)
- movb $0, 22(%rdi)
- jmp L(CalculateSrcLen)
-
- .p2align 4
-L(Exit24):
- movdqu (%rsi), %xmm0
- movq 15(%rsi), %rcx
- movdqu %xmm0, (%rdi)
- movq %rcx, 15(%rdi)
- movb $0, 23(%rdi)
- jmp L(CalculateSrcLen)
-
- .p2align 4
-L(Exit25):
- movdqu (%rsi), %xmm0
- movq 16(%rsi), %rcx
- movdqu %xmm0, (%rdi)
- movq %rcx, 16(%rdi)
- movb $0, 24(%rdi)
- jmp L(CalculateSrcLen)
-
- .p2align 4
-L(Exit26):
- movdqu (%rsi), %xmm0
- movq 16(%rsi), %rcx
- movb 24(%rsi), %dh
- movdqu %xmm0, (%rdi)
- movq %rcx, 16(%rdi)
- mov %dh, 24(%rdi)
- movb $0, 25(%rdi)
- jmp L(CalculateSrcLen)
-
- .p2align 4
-L(Exit27):
- movdqu (%rsi), %xmm0
- movq 16(%rsi), %rdx
- movw 24(%rsi), %cx
- movdqu %xmm0, (%rdi)
- movq %rdx, 16(%rdi)
- movw %cx, 24(%rdi)
- movb $0, 26(%rdi)
- jmp L(CalculateSrcLen)
-
- .p2align 4
-L(Exit28):
- movdqu (%rsi), %xmm0
- movq 16(%rsi), %rdx
- movl 23(%rsi), %ecx
- movdqu %xmm0, (%rdi)
- movq %rdx, 16(%rdi)
- movl %ecx, 23(%rdi)
- movb $0, 27(%rdi)
- jmp L(CalculateSrcLen)
-
- .p2align 4
-L(Exit29):
- movdqu (%rsi), %xmm0
- movq 16(%rsi), %rdx
- movl 24(%rsi), %ecx
- movdqu %xmm0, (%rdi)
- movq %rdx, 16(%rdi)
- movl %ecx, 24(%rdi)
- movb $0, 28(%rdi)
- jmp L(CalculateSrcLen)
-
- .p2align 4
-L(Exit30):
- movdqu (%rsi), %xmm0
- movdqu 13(%rsi), %xmm2
- movdqu %xmm0, (%rdi)
- movdqu %xmm2, 13(%rdi)
- movb $0, 29(%rdi)
- jmp L(CalculateSrcLen)
-
- .p2align 4
-L(Exit31):
- movdqu (%rsi), %xmm0
- movdqu 14(%rsi), %xmm2
- movdqu %xmm0, (%rdi)
- movdqu %xmm2, 14(%rdi)
- movb $0, 30(%rdi)
- jmp L(CalculateSrcLen)
-
- .p2align 4
-L(Exit32):
- movdqu (%rsi), %xmm0
- movdqu 15(%rsi), %xmm2
- movdqu %xmm0, (%rdi)
- movdqu %xmm2, 15(%rdi)
- movb $0, 31(%rdi)
- jmp L(CalculateSrcLen)
-
- .p2align 4
-L(StringTail0):
- mov (%rsi), %dl
- mov %dl, (%rdi)
- RETURN
-
- .p2align 4
-L(StringTail1):
- mov (%rsi), %dx
- mov %dx, (%rdi)
- RETURN
-
- .p2align 4
-L(StringTail2):
- mov (%rsi), %cx
- mov 2(%rsi), %dl
- mov %cx, (%rdi)
- mov %dl, 2(%rdi)
- RETURN
-
- .p2align 4
-L(StringTail3):
- mov (%rsi), %edx
- mov %edx, (%rdi)
- RETURN
-
- .p2align 4
-L(StringTail4):
- mov (%rsi), %ecx
- mov 4(%rsi), %dl
- mov %ecx, (%rdi)
- mov %dl, 4(%rdi)
- RETURN
-
- .p2align 4
-L(StringTail5):
- mov (%rsi), %ecx
- mov 4(%rsi), %dx
- mov %ecx, (%rdi)
- mov %dx, 4(%rdi)
- RETURN
-
- .p2align 4
-L(StringTail6):
- mov (%rsi), %ecx
- mov 3(%rsi), %edx
- mov %ecx, (%rdi)
- mov %edx, 3(%rdi)
- RETURN
-
- .p2align 4
-L(StringTail7):
- mov (%rsi), %rdx
- mov %rdx, (%rdi)
- RETURN
-
- .p2align 4
-L(StringTail8):
- mov (%rsi), %rcx
- mov 8(%rsi), %dl
- mov %rcx, (%rdi)
- mov %dl, 8(%rdi)
- RETURN
-
- .p2align 4
-L(StringTail9):
- mov (%rsi), %rcx
- mov 8(%rsi), %dx
- mov %rcx, (%rdi)
- mov %dx, 8(%rdi)
- RETURN
-
- .p2align 4
-L(StringTail10):
- mov (%rsi), %rcx
- mov 7(%rsi), %edx
- mov %rcx, (%rdi)
- mov %edx, 7(%rdi)
- RETURN
-
- .p2align 4
-L(StringTail11):
- mov (%rsi), %rcx
- mov 8(%rsi), %edx
- mov %rcx, (%rdi)
- mov %edx, 8(%rdi)
- RETURN
-
- .p2align 4
-L(StringTail12):
- mov (%rsi), %rcx
- mov 5(%rsi), %rdx
- mov %rcx, (%rdi)
- mov %rdx, 5(%rdi)
- RETURN
-
- .p2align 4
-L(StringTail13):
- mov (%rsi), %rcx
- mov 6(%rsi), %rdx
- mov %rcx, (%rdi)
- mov %rdx, 6(%rdi)
- RETURN
-
- .p2align 4
-L(StringTail14):
- mov (%rsi), %rcx
- mov 7(%rsi), %rdx
- mov %rcx, (%rdi)
- mov %rdx, 7(%rdi)
- RETURN
-
- .p2align 4
-L(StringTail15):
- movdqu (%rsi), %xmm0
- movdqu %xmm0, (%rdi)
- RETURN
-
- .p2align 4
-L(StringTail16):
- movdqu (%rsi), %xmm0
- mov 16(%rsi), %cl
- movdqu %xmm0, (%rdi)
- mov %cl, 16(%rdi)
- RETURN
-
- .p2align 4
-L(StringTail17):
- movdqu (%rsi), %xmm0
- mov 16(%rsi), %cx
- movdqu %xmm0, (%rdi)
- mov %cx, 16(%rdi)
- RETURN
-
- .p2align 4
-L(StringTail18):
- movdqu (%rsi), %xmm0
- mov 15(%rsi), %ecx
- movdqu %xmm0, (%rdi)
- mov %ecx, 15(%rdi)
- RETURN
-
- .p2align 4
-L(StringTail19):
- movdqu (%rsi), %xmm0
- mov 16(%rsi), %ecx
- movdqu %xmm0, (%rdi)
- mov %ecx, 16(%rdi)
- RETURN
-
- .p2align 4
-L(StringTail20):
- movdqu (%rsi), %xmm0
- mov 16(%rsi), %ecx
- mov 20(%rsi), %dl
- movdqu %xmm0, (%rdi)
- mov %ecx, 16(%rdi)
- mov %dl, 20(%rdi)
- RETURN
-
- .p2align 4
-L(StringTail21):
- movdqu (%rsi), %xmm0
- mov 14(%rsi), %rcx
- movdqu %xmm0, (%rdi)
- mov %rcx, 14(%rdi)
- RETURN
-
- .p2align 4
-L(StringTail22):
- movdqu (%rsi), %xmm0
- mov 15(%rsi), %rcx
- movdqu %xmm0, (%rdi)
- mov %rcx, 15(%rdi)
- RETURN
-
- .p2align 4
-L(StringTail23):
- movdqu (%rsi), %xmm0
- mov 16(%rsi), %rcx
- movdqu %xmm0, (%rdi)
- mov %rcx, 16(%rdi)
- RETURN
-
- .p2align 4
-L(StringTail24):
- movdqu (%rsi), %xmm0
- mov 16(%rsi), %rdx
- mov 24(%rsi), %cl
- movdqu %xmm0, (%rdi)
- mov %rdx, 16(%rdi)
- mov %cl, 24(%rdi)
- RETURN
-
- .p2align 4
-L(StringTail25):
- movdqu (%rsi), %xmm0
- mov 16(%rsi), %rdx
- mov 24(%rsi), %cx
- movdqu %xmm0, (%rdi)
- mov %rdx, 16(%rdi)
- mov %cx, 24(%rdi)
- RETURN
-
- .p2align 4
-L(StringTail26):
- movdqu (%rsi), %xmm0
- mov 16(%rsi), %rdx
- mov 23(%rsi), %ecx
- movdqu %xmm0, (%rdi)
- mov %rdx, 16(%rdi)
- mov %ecx, 23(%rdi)
- RETURN
-
- .p2align 4
-L(StringTail27):
- movdqu (%rsi), %xmm0
- mov 16(%rsi), %rdx
- mov 24(%rsi), %ecx
- movdqu %xmm0, (%rdi)
- mov %rdx, 16(%rdi)
- mov %ecx, 24(%rdi)
- RETURN
-
- .p2align 4
-L(StringTail28):
- movdqu (%rsi), %xmm0
- movdqu 13(%rsi), %xmm2
- movdqu %xmm0, (%rdi)
- movdqu %xmm2, 13(%rdi)
- RETURN
-
- .p2align 4
-L(StringTail29):
- movdqu (%rsi), %xmm0
- movdqu 14(%rsi), %xmm2
- movdqu %xmm0, (%rdi)
- movdqu %xmm2, 14(%rdi)
- RETURN
-
- .p2align 4
-L(StringTail30):
- movdqu (%rsi), %xmm0
- movdqu 15(%rsi), %xmm2
- movdqu %xmm0, (%rdi)
- movdqu %xmm2, 15(%rdi)
- RETURN
-
- .p2align 4
-L(StringTail31):
- movdqu (%rsi), %xmm0
- movdqu 16(%rsi), %xmm2
- movdqu %xmm0, (%rdi)
- movdqu %xmm2, 16(%rdi)
- RETURN
-
- .p2align 4
-L(StringTail32):
- movdqu (%rsi), %xmm0
- movdqu 16(%rsi), %xmm2
- mov 32(%rsi), %cl
- movdqu %xmm0, (%rdi)
- movdqu %xmm2, 16(%rdi)
- mov %cl, 32(%rdi)
- RETURN
-
- .p2align 4
-L(StringTail33):
- movdqu (%rsi), %xmm0
- movdqu 16(%rsi), %xmm2
- mov 32(%rsi), %cl
- movdqu %xmm0, (%rdi)
- movdqu %xmm2, 16(%rdi)
- mov %cl, 32(%rdi)
- RETURN
-
- .p2align 4
-L(CalculateSrcLenCase1):
- xor %r8, %r8
- xor %rax, %rax
-L(CalculateSrcLen):
- pxor %xmm0, %xmm0
- xor %rcx, %rcx
- add %r8, %rsi
- movdqu (%rsi), %xmm1
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %rdx
- test %rdx, %rdx
- jnz L(SrcLenLoopEnd)
-
- add %rax, %r9
- mov $16, %rax
- mov %rsi, %rcx
- and $15, %rcx
- and $-16, %rsi
-L(SrcLenLoop):
- movdqa (%rsi, %rax), %xmm1
- pcmpeqb %xmm1, %xmm0
- pmovmskb %xmm0, %rdx
- test %rdx, %rdx
- jnz L(SrcLenLoopEnd)
- add $16, %rax
- jmp L(SrcLenLoop)
-
- .p2align 4
-L(SrcLenLoopEnd):
- bsf %rdx, %rdx
- add %rdx, %rax
- sub %rcx, %rax
- RETURN
-
-END (STRLCPY)
-
- .p2align 4
- .section .rodata
-L(ExitTable):
- .int JMPTBL(L(Exit0), L(ExitTable))
- .int JMPTBL(L(Exit1), L(ExitTable))
- .int JMPTBL(L(Exit2), L(ExitTable))
- .int JMPTBL(L(Exit3), L(ExitTable))
- .int JMPTBL(L(Exit4), L(ExitTable))
- .int JMPTBL(L(Exit5), L(ExitTable))
- .int JMPTBL(L(Exit6), L(ExitTable))
- .int JMPTBL(L(Exit7), L(ExitTable))
- .int JMPTBL(L(Exit8), L(ExitTable))
- .int JMPTBL(L(Exit9), L(ExitTable))
- .int JMPTBL(L(Exit10), L(ExitTable))
- .int JMPTBL(L(Exit11), L(ExitTable))
- .int JMPTBL(L(Exit12), L(ExitTable))
- .int JMPTBL(L(Exit13), L(ExitTable))
- .int JMPTBL(L(Exit14), L(ExitTable))
- .int JMPTBL(L(Exit15), L(ExitTable))
- .int JMPTBL(L(Exit16), L(ExitTable))
- .int JMPTBL(L(Exit17), L(ExitTable))
- .int JMPTBL(L(Exit18), L(ExitTable))
- .int JMPTBL(L(Exit19), L(ExitTable))
- .int JMPTBL(L(Exit20), L(ExitTable))
- .int JMPTBL(L(Exit21), L(ExitTable))
- .int JMPTBL(L(Exit22), L(ExitTable))
- .int JMPTBL(L(Exit23), L(ExitTable))
- .int JMPTBL(L(Exit24), L(ExitTable))
- .int JMPTBL(L(Exit25), L(ExitTable))
- .int JMPTBL(L(Exit26), L(ExitTable))
- .int JMPTBL(L(Exit27), L(ExitTable))
- .int JMPTBL(L(Exit28), L(ExitTable))
- .int JMPTBL(L(Exit29), L(ExitTable))
- .int JMPTBL(L(Exit30), L(ExitTable))
- .int JMPTBL(L(Exit31), L(ExitTable))
- .int JMPTBL(L(Exit32), L(ExitTable))
-L(ExitStringTailTable):
- .int JMPTBL(L(StringTail0), L(ExitStringTailTable))
- .int JMPTBL(L(StringTail1), L(ExitStringTailTable))
- .int JMPTBL(L(StringTail2), L(ExitStringTailTable))
- .int JMPTBL(L(StringTail3), L(ExitStringTailTable))
- .int JMPTBL(L(StringTail4), L(ExitStringTailTable))
- .int JMPTBL(L(StringTail5), L(ExitStringTailTable))
- .int JMPTBL(L(StringTail6), L(ExitStringTailTable))
- .int JMPTBL(L(StringTail7), L(ExitStringTailTable))
- .int JMPTBL(L(StringTail8), L(ExitStringTailTable))
- .int JMPTBL(L(StringTail9), L(ExitStringTailTable))
- .int JMPTBL(L(StringTail10), L(ExitStringTailTable))
- .int JMPTBL(L(StringTail11), L(ExitStringTailTable))
- .int JMPTBL(L(StringTail12), L(ExitStringTailTable))
- .int JMPTBL(L(StringTail13), L(ExitStringTailTable))
- .int JMPTBL(L(StringTail14), L(ExitStringTailTable))
- .int JMPTBL(L(StringTail15), L(ExitStringTailTable))
- .int JMPTBL(L(StringTail16), L(ExitStringTailTable))
- .int JMPTBL(L(StringTail17), L(ExitStringTailTable))
- .int JMPTBL(L(StringTail18), L(ExitStringTailTable))
- .int JMPTBL(L(StringTail19), L(ExitStringTailTable))
- .int JMPTBL(L(StringTail20), L(ExitStringTailTable))
- .int JMPTBL(L(StringTail21), L(ExitStringTailTable))
- .int JMPTBL(L(StringTail22), L(ExitStringTailTable))
- .int JMPTBL(L(StringTail23), L(ExitStringTailTable))
- .int JMPTBL(L(StringTail24), L(ExitStringTailTable))
- .int JMPTBL(L(StringTail25), L(ExitStringTailTable))
- .int JMPTBL(L(StringTail26), L(ExitStringTailTable))
- .int JMPTBL(L(StringTail27), L(ExitStringTailTable))
- .int JMPTBL(L(StringTail28), L(ExitStringTailTable))
- .int JMPTBL(L(StringTail29), L(ExitStringTailTable))
- .int JMPTBL(L(StringTail30), L(ExitStringTailTable))
- .int JMPTBL(L(StringTail31), L(ExitStringTailTable))
- .int JMPTBL(L(StringTail32), L(ExitStringTailTable))
- .int JMPTBL(L(StringTail33), L(ExitStringTailTable))