Add 32-bit Silvermont-optimized string/memory functions.
Add following functions:
bcopy, memcpy, memmove, memset, bzero, memcmp, wmemcmp, strlen,
strcpy, strncpy, stpcpy, stpncpy.
Create new directories inside arch-x86 to specify architecture: atom,
silvermont and generic (non atom or silvermont architectures are treated like generic).
Due to introducing optimized versions of stpcpy and stpncpy,
c-implementations of these functions are moved from
common for architectures makefile to arm and mips specific makefiles.
Change-Id: I990f8061c3e9bca1f154119303da9e781c5d086e
Signed-off-by: Varvara Rainchik <varvara.rainchik@intel.com>
diff --git a/libc/arch-x86/generic/generic.mk b/libc/arch-x86/generic/generic.mk
new file mode 100644
index 0000000..c8b40ee
--- /dev/null
+++ b/libc/arch-x86/generic/generic.mk
@@ -0,0 +1,55 @@
+libc_bionic_src_files_x86 += \
+ arch-x86/atom/string/sse2-index-atom.S \
+ arch-x86/atom/string/sse2-memchr-atom.S \
+ arch-x86/atom/string/sse2-memrchr-atom.S \
+ arch-x86/atom/string/sse2-strchr-atom.S \
+ arch-x86/atom/string/sse2-strnlen-atom.S \
+ arch-x86/atom/string/sse2-strrchr-atom.S \
+ arch-x86/atom/string/sse2-wcschr-atom.S \
+ arch-x86/atom/string/sse2-wcsrchr-atom.S \
+ arch-x86/atom/string/sse2-wcslen-atom.S \
+ arch-x86/atom/string/sse2-wcscmp-atom.S \
+ arch-x86/silvermont/string/sse2-bcopy-slm.S \
+ arch-x86/silvermont/string/sse2-bzero-slm.S \
+ arch-x86/silvermont/string/sse2-memcpy-slm.S \
+ arch-x86/silvermont/string/sse2-memmove-slm.S \
+ arch-x86/silvermont/string/sse2-memset-slm.S \
+ arch-x86/silvermont/string/sse2-stpcpy-slm.S \
+ arch-x86/silvermont/string/sse2-stpncpy-slm.S \
+ arch-x86/silvermont/string/sse2-strcpy-slm.S \
+ arch-x86/silvermont/string/sse2-strlen-slm.S \
+ arch-x86/silvermont/string/sse2-strncpy-slm.S
+
+ifeq ($(ARCH_X86_HAVE_SSSE3),true)
+libc_bionic_src_files_x86 += \
+ arch-x86/atom/string/ssse3-strncat-atom.S \
+ arch-x86/atom/string/ssse3-strlcat-atom.S \
+ arch-x86/atom/string/ssse3-strlcpy-atom.S \
+ arch-x86/atom/string/ssse3-strcmp-atom.S \
+ arch-x86/atom/string/ssse3-strncmp-atom.S \
+ arch-x86/atom/string/ssse3-strcat-atom.S \
+ arch-x86/atom/string/ssse3-memcmp16-atom.S \
+ arch-x86/atom/string/ssse3-wcscat-atom.S \
+ arch-x86/atom/string/ssse3-wcscpy-atom.S
+else
+libc_bionic_src_files_x86 += \
+ arch-x86/generic/string/strcmp.S \
+ arch-x86/generic/string/strncmp.S \
+ arch-x86/generic/string/strcat.S \
+ bionic/__memcmp16.cpp \
+ upstream-freebsd/lib/libc/string/wcscpy.c \
+ upstream-freebsd/lib/libc/string/wcscat.c \
+ upstream-openbsd/lib/libc/string/strlcat.c \
+ upstream-openbsd/lib/libc/string/strlcpy.c \
+ upstream-openbsd/lib/libc/string/strncat.c
+endif
+
+ifeq ($(ARCH_X86_HAVE_SSE4),true)
+ libc_bionic_src_files_x86 += \
+ arch-x86/silvermont/string/sse4-memcmp-slm.S \
+ arch-x86/silvermont/string/sse4-wmemcmp-slm.S
+else
+libc_bionic_src_files_x86 += \
+ arch-x86/generic/string/memcmp.S \
+ upstream-freebsd/lib/libc/string/wmemcmp.c
+endif
diff --git a/libc/arch-x86/generic/string/bcopy.S b/libc/arch-x86/generic/string/bcopy.S
new file mode 100644
index 0000000..f425c58
--- /dev/null
+++ b/libc/arch-x86/generic/string/bcopy.S
@@ -0,0 +1,98 @@
+/* $OpenBSD: bcopy.S,v 1.5 2005/08/07 11:30:38 espie Exp $ */
+
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from locore.s.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <private/bionic_asm.h>
+
+ /*
+ * (ov)bcopy (src,dst,cnt)
+ * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
+ */
+
+#if defined(MEMCOPY)
+ENTRY(memcpy)
+#elif defined(MEMMOVE)
+ENTRY(memmove)
+#else
+ENTRY(bcopy)
+#endif
+ pushl %esi
+ pushl %edi
+#if defined(MEMCOPY) || defined(MEMMOVE)
+ movl 12(%esp),%edi
+ movl 16(%esp),%esi
+ movl %edi, %eax
+#else
+ movl 12(%esp),%esi
+ movl 16(%esp),%edi
+#endif
+ movl 20(%esp),%ecx
+ movl %ecx,%edx
+ cmpl %esi,%edi /* potentially overlapping? */
+ jnb 1f
+ cld /* nope, copy forwards. */
+ shrl $2,%ecx /* copy by words */
+ rep
+ movsl
+ movl %edx,%ecx
+ andl $3,%ecx /* any bytes left? */
+ rep
+ movsb
+ popl %edi
+ popl %esi
+ ret
+1:
+ addl %ecx,%edi /* copy backwards. */
+ addl %ecx,%esi
+ std
+ andl $3,%ecx /* any fractional bytes? */
+ decl %edi
+ decl %esi
+ rep
+ movsb
+ movl %edx,%ecx
+ shrl $2,%ecx
+ subl $3,%esi
+ subl $3,%edi
+ rep
+ movsl
+ popl %edi
+ popl %esi
+ cld
+ ret
+#if defined(MEMCOPY)
+END(memcpy)
+#elif defined(MEMMOVE)
+END(memmove)
+#else
+END(bcopy)
+#endif
diff --git a/libc/arch-x86/generic/string/memcmp.S b/libc/arch-x86/generic/string/memcmp.S
new file mode 100644
index 0000000..ef36b4f
--- /dev/null
+++ b/libc/arch-x86/generic/string/memcmp.S
@@ -0,0 +1,44 @@
+/* $OpenBSD: memcmp.S,v 1.4 2005/08/07 11:30:38 espie Exp $ */
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <private/bionic_asm.h>
+
+ENTRY(memcmp)
+ pushl %edi
+ pushl %esi
+ movl 12(%esp),%edi
+ movl 16(%esp),%esi
+ cld /* set compare direction forward */
+
+ movl 20(%esp),%ecx /* compare by words */
+ shrl $2,%ecx
+ repe
+ cmpsl
+ jne L5 /* do we match so far? */
+
+ movl 20(%esp),%ecx /* compare remainder by bytes */
+ andl $3,%ecx
+ repe
+ cmpsb
+ jne L6 /* do we match? */
+
+ xorl %eax,%eax /* we match, return zero */
+ popl %esi
+ popl %edi
+ ret
+
+L5: movl $4,%ecx /* We know that one of the next */
+ subl %ecx,%edi /* four pairs of bytes do not */
+ subl %ecx,%esi /* match. */
+ repe
+ cmpsb
+L6: movzbl -1(%edi),%eax /* Perform unsigned comparison */
+ movzbl -1(%esi),%edx
+ subl %edx,%eax
+ popl %esi
+ popl %edi
+ ret
+END(memcmp)
diff --git a/libc/arch-x86/generic/string/memcpy.S b/libc/arch-x86/generic/string/memcpy.S
new file mode 100644
index 0000000..95c8a83
--- /dev/null
+++ b/libc/arch-x86/generic/string/memcpy.S
@@ -0,0 +1,3 @@
+/* $OpenBSD: memcpy.S,v 1.3 2005/08/07 11:30:38 espie Exp $ */
+#define MEMCOPY
+#include "bcopy.S"
diff --git a/libc/arch-x86/generic/string/memmove.S b/libc/arch-x86/generic/string/memmove.S
new file mode 100644
index 0000000..c5bfd19
--- /dev/null
+++ b/libc/arch-x86/generic/string/memmove.S
@@ -0,0 +1,3 @@
+/* $OpenBSD: memmove.S,v 1.3 2005/08/07 11:30:38 espie Exp $ */
+#define MEMMOVE
+#include "bcopy.S"
diff --git a/libc/arch-x86/generic/string/strcat.S b/libc/arch-x86/generic/string/strcat.S
new file mode 100644
index 0000000..49e8eee
--- /dev/null
+++ b/libc/arch-x86/generic/string/strcat.S
@@ -0,0 +1,74 @@
+/* $OpenBSD: strcat.S,v 1.8 2005/08/07 11:30:38 espie Exp $ */
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <private/bionic_asm.h>
+
+#if defined(APIWARN)
+#APP
+ .section .gnu.warning.strcat
+ .ascii "warning: strcat() is almost always misused, please use strlcat()"
+#NO_APP
+#endif
+
+/*
+ * NOTE: I've unrolled the loop eight times: large enough to make a
+ * significant difference, and small enough not to totally trash the
+ * cache.
+ */
+
+ENTRY(strcat)
+ pushl %edi /* save edi */
+ movl 8(%esp),%edi /* dst address */
+ movl 12(%esp),%edx /* src address */
+ pushl %edi /* push destination address */
+
+ cld /* set search forward */
+ xorl %eax,%eax /* set search for null terminator */
+ movl $-1,%ecx /* set search for lots of characters */
+ repne /* search! */
+ scasb
+
+ leal -1(%edi),%ecx /* correct dst address */
+
+ .align 2,0x90
+L1: movb (%edx),%al /* unroll loop, but not too much */
+ movb %al,(%ecx)
+ testb %al,%al
+ jz L2
+ movb 1(%edx),%al
+ movb %al,1(%ecx)
+ testb %al,%al
+ jz L2
+ movb 2(%edx),%al
+ movb %al,2(%ecx)
+ testb %al,%al
+ jz L2
+ movb 3(%edx),%al
+ movb %al,3(%ecx)
+ testb %al,%al
+ jz L2
+ movb 4(%edx),%al
+ movb %al,4(%ecx)
+ testb %al,%al
+ jz L2
+ movb 5(%edx),%al
+ movb %al,5(%ecx)
+ testb %al,%al
+ jz L2
+ movb 6(%edx),%al
+ movb %al,6(%ecx)
+ testb %al,%al
+ jz L2
+ movb 7(%edx),%al
+ movb %al,7(%ecx)
+ addl $8,%edx
+ addl $8,%ecx
+ testb %al,%al
+ jnz L1
+L2: popl %eax /* pop destination address */
+ popl %edi /* restore edi */
+ ret
+END(strcat)
diff --git a/libc/arch-x86/generic/string/strcmp.S b/libc/arch-x86/generic/string/strcmp.S
new file mode 100644
index 0000000..580f4d5
--- /dev/null
+++ b/libc/arch-x86/generic/string/strcmp.S
@@ -0,0 +1,82 @@
+/* $OpenBSD: strcmp.S,v 1.3 2005/08/07 11:30:38 espie Exp $ */
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <private/bionic_asm.h>
+
+/*
+ * NOTE: I've unrolled the loop eight times: large enough to make a
+ * significant difference, and small enough not to totally trash the
+ * cache.
+ */
+
+ENTRY(strcmp)
+ movl 0x04(%esp),%eax
+ movl 0x08(%esp),%edx
+ jmp L2 /* Jump into the loop! */
+
+ .align 2,0x90
+L1: incl %eax
+ incl %edx
+L2: movb (%eax),%cl
+ testb %cl,%cl /* null terminator??? */
+ jz L3
+ cmpb %cl,(%edx) /* chars match??? */
+ jne L3
+ incl %eax
+ incl %edx
+ movb (%eax),%cl
+ testb %cl,%cl
+ jz L3
+ cmpb %cl,(%edx)
+ jne L3
+ incl %eax
+ incl %edx
+ movb (%eax),%cl
+ testb %cl,%cl
+ jz L3
+ cmpb %cl,(%edx)
+ jne L3
+ incl %eax
+ incl %edx
+ movb (%eax),%cl
+ testb %cl,%cl
+ jz L3
+ cmpb %cl,(%edx)
+ jne L3
+ incl %eax
+ incl %edx
+ movb (%eax),%cl
+ testb %cl,%cl
+ jz L3
+ cmpb %cl,(%edx)
+ jne L3
+ incl %eax
+ incl %edx
+ movb (%eax),%cl
+ testb %cl,%cl
+ jz L3
+ cmpb %cl,(%edx)
+ jne L3
+ incl %eax
+ incl %edx
+ movb (%eax),%cl
+ testb %cl,%cl
+ jz L3
+ cmpb %cl,(%edx)
+ jne L3
+ incl %eax
+ incl %edx
+ movb (%eax),%cl
+ testb %cl,%cl
+ jz L3
+ cmpb %cl,(%edx)
+ je L1
+ .align 2, 0x90
+L3: movzbl (%eax),%eax /* unsigned comparison */
+ movzbl (%edx),%edx
+ subl %edx,%eax
+ ret
+END(strcmp)
diff --git a/libc/arch-x86/generic/string/strncmp.S b/libc/arch-x86/generic/string/strncmp.S
new file mode 100644
index 0000000..9ba83a1
--- /dev/null
+++ b/libc/arch-x86/generic/string/strncmp.S
@@ -0,0 +1,114 @@
+/* $OpenBSD: strncmp.S,v 1.3 2005/08/07 11:30:38 espie Exp $ */
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <private/bionic_asm.h>
+
+/*
+ * NOTE: I've unrolled the loop eight times: large enough to make a
+ * significant difference, and small enough not to totally trash the
+ * cache.
+ */
+
+ENTRY(strncmp)
+ pushl %ebx
+ movl 8(%esp),%eax
+ movl 12(%esp),%ecx
+ movl 16(%esp),%edx
+ testl %edx,%edx
+ jmp L2 /* Jump into the loop! */
+
+ .align 2,0x90
+L1: incl %eax
+ incl %ecx
+ decl %edx
+L2: jz L4 /* strings are equal */
+ movb (%eax),%bl
+ testb %bl,%bl
+ jz L3
+ cmpb %bl,(%ecx)
+ jne L3
+
+ incl %eax
+ incl %ecx
+ decl %edx
+ jz L4
+ movb (%eax),%bl
+ testb %bl,%bl
+ jz L3
+ cmpb %bl,(%ecx)
+ jne L3
+
+ incl %eax
+ incl %ecx
+ decl %edx
+ jz L4
+ movb (%eax),%bl
+ testb %bl,%bl
+ jz L3
+ cmpb %bl,(%ecx)
+ jne L3
+
+ incl %eax
+ incl %ecx
+ decl %edx
+ jz L4
+ movb (%eax),%bl
+ testb %bl,%bl
+ jz L3
+ cmpb %bl,(%ecx)
+ jne L3
+
+ incl %eax
+ incl %ecx
+ decl %edx
+ jz L4
+ movb (%eax),%bl
+ testb %bl,%bl
+ jz L3
+ cmpb %bl,(%ecx)
+ jne L3
+
+ incl %eax
+ incl %ecx
+ decl %edx
+ jz L4
+ movb (%eax),%bl
+ testb %bl,%bl
+ jz L3
+ cmpb %bl,(%ecx)
+ jne L3
+
+ incl %eax
+ incl %ecx
+ decl %edx
+ jz L4
+ movb (%eax),%bl
+ testb %bl,%bl
+ jz L3
+ cmpb %bl,(%ecx)
+ jne L3
+
+ incl %eax
+ incl %ecx
+ decl %edx
+ jz L4
+ movb (%eax),%bl
+ testb %bl,%bl
+ jz L3
+ cmpb %bl,(%ecx)
+ je L1
+
+ .align 2,0x90
+L3: movzbl (%eax),%eax /* unsigned comparision */
+ movzbl (%ecx),%ecx
+ subl %ecx,%eax
+ popl %ebx
+ ret
+ .align 2,0x90
+L4: xorl %eax,%eax
+ popl %ebx
+ ret
+END(strncmp)
diff --git a/libc/arch-x86/generic/string/swab.S b/libc/arch-x86/generic/string/swab.S
new file mode 100644
index 0000000..b44d134
--- /dev/null
+++ b/libc/arch-x86/generic/string/swab.S
@@ -0,0 +1,68 @@
+/* $OpenBSD: swab.S,v 1.3 2005/08/07 11:30:38 espie Exp $ */
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <private/bionic_asm.h>
+
+/*
+ * On the i486, this code is negligibly faster than the code generated
+ * by gcc at about half the size. If my i386 databook is correct, it
+ * should be considerably faster than the gcc code on a i386.
+ */
+
+ENTRY(swab)
+ pushl %esi
+ pushl %edi
+ movl 12(%esp),%esi
+ movl 16(%esp),%edi
+ movl 20(%esp),%ecx
+
+ cld # set direction forward
+
+ shrl $1,%ecx
+ testl $7,%ecx # copy first group of 1 to 7 words
+ jz L2 # while swaping alternate bytes.
+ .align 2,0x90
+L1: lodsw
+ rorw $8,%ax
+ stosw
+ decl %ecx
+ testl $7,%ecx
+ jnz L1
+
+L2: shrl $3,%ecx # copy remainder 8 words at a time
+ jz L4 # while swapping alternate bytes.
+ .align 2,0x90
+L3: lodsw
+ rorw $8,%ax
+ stosw
+ lodsw
+ rorw $8,%ax
+ stosw
+ lodsw
+ rorw $8,%ax
+ stosw
+ lodsw
+ rorw $8,%ax
+ stosw
+ lodsw
+ rorw $8,%ax
+ stosw
+ lodsw
+ rorw $8,%ax
+ stosw
+ lodsw
+ rorw $8,%ax
+ stosw
+ lodsw
+ rorw $8,%ax
+ stosw
+ decl %ecx
+ jnz L3
+
+L4: popl %edi
+ popl %esi
+ ret
+END(swab)