Add 64-bit Silvermont-optimized string/memory functions.
Add following functions:
bcopy, bzero, memcpy, memmove, memset, stpcpy, stpncpy, strcat, strcpy,
strlen, strncat, strncpy, memcmp, strcmp, strncmp.
Set all these functions as the default ones.
Change-Id: Ic66b250ad8c349a43d25e2d4dea075604f6df6ac
Signed-off-by: Varvara Rainchik <varvara.rainchik@intel.com>
diff --git a/libc/arch-x86_64/string/sse2-memset-slm.S b/libc/arch-x86_64/string/sse2-memset-slm.S
new file mode 100644
index 0000000..bfcafae
--- /dev/null
+++ b/libc/arch-x86_64/string/sse2-memset-slm.S
@@ -0,0 +1,173 @@
+/*
+Copyright (c) 2014, Intel Corporation
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+
+ * Neither the name of Intel Corporation nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include "cache.h"
+
+#ifndef MEMSET
+# define MEMSET memset
+#endif
+
+#ifndef L
+# define L(label) .L##label
+#endif
+
+#ifndef ALIGN
+# define ALIGN(n) .p2align n
+#endif
+
+#ifndef cfi_startproc
+# define cfi_startproc .cfi_startproc
+#endif
+
+#ifndef cfi_endproc
+# define cfi_endproc .cfi_endproc
+#endif
+
+#ifndef ENTRY
+# define ENTRY(name) \
+ .type name, @function; \
+ .globl name; \
+name: \
+ cfi_startproc
+#endif
+
+#ifndef END
+# define END(name) \
+ cfi_endproc; \
+ .size name, .-name
+#endif
+
+ .section .text.sse2,"ax",@progbits
+ENTRY (MEMSET)
+ movq %rdi, %rax
+#ifdef USE_AS_BZERO_P
+ mov %rsi, %rdx
+ xor %rcx, %rcx
+#else
+ and $0xff, %rsi
+ mov $0x0101010101010101, %rcx
+ imul %rsi, %rcx
+#endif
+ cmpq $16, %rdx
+ jae L(16bytesormore)
+ testb $8, %dl
+ jnz L(8_15bytes)
+ testb $4, %dl
+ jnz L(4_7bytes)
+ testb $2, %dl
+ jnz L(2_3bytes)
+ testb $1, %dl
+ jz L(return)
+ movb %cl, (%rdi)
+L(return):
+ ret
+
+L(8_15bytes):
+ movq %rcx, (%rdi)
+ movq %rcx, -8(%rdi, %rdx)
+ ret
+
+L(4_7bytes):
+ movl %ecx, (%rdi)
+ movl %ecx, -4(%rdi, %rdx)
+ ret
+
+L(2_3bytes):
+ movw %cx, (%rdi)
+ movw %cx, -2(%rdi, %rdx)
+ ret
+
+ ALIGN (4)
+L(16bytesormore):
+#ifdef USE_AS_BZERO_P
+ pxor %xmm0, %xmm0
+#else
+ movd %rcx, %xmm0
+ pshufd $0, %xmm0, %xmm0
+#endif
+ movdqu %xmm0, (%rdi)
+ movdqu %xmm0, -16(%rdi, %rdx)
+ cmpq $32, %rdx
+ jbe L(32bytesless)
+ movdqu %xmm0, 16(%rdi)
+ movdqu %xmm0, -32(%rdi, %rdx)
+ cmpq $64, %rdx
+ jbe L(64bytesless)
+ movdqu %xmm0, 32(%rdi)
+ movdqu %xmm0, 48(%rdi)
+ movdqu %xmm0, -64(%rdi, %rdx)
+ movdqu %xmm0, -48(%rdi, %rdx)
+ cmpq $128, %rdx
+ ja L(128bytesmore)
+L(32bytesless):
+L(64bytesless):
+ ret
+
+ ALIGN (4)
+L(128bytesmore):
+ leaq 64(%rdi), %rcx
+ andq $-64, %rcx
+ movq %rdx, %r8
+ addq %rdi, %rdx
+ andq $-64, %rdx
+ cmpq %rcx, %rdx
+ je L(return)
+
+#ifdef SHARED_CACHE_SIZE
+ cmp $SHARED_CACHE_SIZE, %r8
+#else
+ cmp __x86_64_shared_cache_size(%rip), %r8
+#endif
+ ja L(128bytesmore_nt)
+
+ ALIGN (4)
+L(128bytesmore_normal):
+ movdqa %xmm0, (%rcx)
+ movaps %xmm0, 0x10(%rcx)
+ movaps %xmm0, 0x20(%rcx)
+ movaps %xmm0, 0x30(%rcx)
+ addq $64, %rcx
+ cmpq %rcx, %rdx
+ jne L(128bytesmore_normal)
+ ret
+
+ ALIGN (4)
+L(128bytesmore_nt):
+ movntdq %xmm0, (%rcx)
+ movntdq %xmm0, 0x10(%rcx)
+ movntdq %xmm0, 0x20(%rcx)
+ movntdq %xmm0, 0x30(%rcx)
+ leaq 64(%rcx), %rcx
+ cmpq %rcx, %rdx
+ jne L(128bytesmore_nt)
+ sfence
+ ret
+
+END (MEMSET)