Add 32-bit Silvermont-optimized string/memory functions.

Add following functions:
bcopy, memcpy, memmove, memset, bzero, memcmp, wmemcmp, strlen,
strcpy, strncpy, stpcpy, stpncpy.
Create new directories inside arch-x86 to specify architecture: atom,
silvermont and generic (non atom or silvermont architectures are treated like generic).
Due to introducing optimized versions of stpcpy and stpncpy,
c-implementations of these functions are moved from
common for architectures makefile to arm and mips specific makefiles.

Change-Id: I990f8061c3e9bca1f154119303da9e781c5d086e
Signed-off-by: Varvara Rainchik <varvara.rainchik@intel.com>
diff --git a/libc/arch-x86/generic/generic.mk b/libc/arch-x86/generic/generic.mk
new file mode 100644
index 0000000..c8b40ee
--- /dev/null
+++ b/libc/arch-x86/generic/generic.mk
@@ -0,0 +1,55 @@
+libc_bionic_src_files_x86 += \
+    arch-x86/atom/string/sse2-index-atom.S \
+    arch-x86/atom/string/sse2-memchr-atom.S \
+    arch-x86/atom/string/sse2-memrchr-atom.S \
+    arch-x86/atom/string/sse2-strchr-atom.S \
+    arch-x86/atom/string/sse2-strnlen-atom.S \
+    arch-x86/atom/string/sse2-strrchr-atom.S \
+    arch-x86/atom/string/sse2-wcschr-atom.S \
+    arch-x86/atom/string/sse2-wcsrchr-atom.S \
+    arch-x86/atom/string/sse2-wcslen-atom.S \
+    arch-x86/atom/string/sse2-wcscmp-atom.S \
+    arch-x86/silvermont/string/sse2-bcopy-slm.S \
+    arch-x86/silvermont/string/sse2-bzero-slm.S \
+    arch-x86/silvermont/string/sse2-memcpy-slm.S \
+    arch-x86/silvermont/string/sse2-memmove-slm.S \
+    arch-x86/silvermont/string/sse2-memset-slm.S \
+    arch-x86/silvermont/string/sse2-stpcpy-slm.S \
+    arch-x86/silvermont/string/sse2-stpncpy-slm.S \
+    arch-x86/silvermont/string/sse2-strcpy-slm.S \
+    arch-x86/silvermont/string/sse2-strlen-slm.S \
+    arch-x86/silvermont/string/sse2-strncpy-slm.S
+
+ifeq ($(ARCH_X86_HAVE_SSSE3),true)
+libc_bionic_src_files_x86 += \
+    arch-x86/atom/string/ssse3-strncat-atom.S \
+    arch-x86/atom/string/ssse3-strlcat-atom.S \
+    arch-x86/atom/string/ssse3-strlcpy-atom.S \
+    arch-x86/atom/string/ssse3-strcmp-atom.S \
+    arch-x86/atom/string/ssse3-strncmp-atom.S \
+    arch-x86/atom/string/ssse3-strcat-atom.S \
+    arch-x86/atom/string/ssse3-memcmp16-atom.S \
+    arch-x86/atom/string/ssse3-wcscat-atom.S \
+    arch-x86/atom/string/ssse3-wcscpy-atom.S
+else
+libc_bionic_src_files_x86 += \
+    arch-x86/generic/string/strcmp.S \
+    arch-x86/generic/string/strncmp.S \
+    arch-x86/generic/string/strcat.S \
+    bionic/__memcmp16.cpp \
+    upstream-freebsd/lib/libc/string/wcscpy.c \
+    upstream-freebsd/lib/libc/string/wcscat.c \
+    upstream-openbsd/lib/libc/string/strlcat.c \
+    upstream-openbsd/lib/libc/string/strlcpy.c \
+    upstream-openbsd/lib/libc/string/strncat.c
+endif
+
+ifeq ($(ARCH_X86_HAVE_SSE4),true)
+ libc_bionic_src_files_x86 += \
+    arch-x86/silvermont/string/sse4-memcmp-slm.S \
+    arch-x86/silvermont/string/sse4-wmemcmp-slm.S
+else
+libc_bionic_src_files_x86 += \
+    arch-x86/generic/string/memcmp.S \
+    upstream-freebsd/lib/libc/string/wmemcmp.c
+endif
diff --git a/libc/arch-x86/generic/string/bcopy.S b/libc/arch-x86/generic/string/bcopy.S
new file mode 100644
index 0000000..f425c58
--- /dev/null
+++ b/libc/arch-x86/generic/string/bcopy.S
@@ -0,0 +1,98 @@
+/*	$OpenBSD: bcopy.S,v 1.5 2005/08/07 11:30:38 espie Exp $	*/
+
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from locore.s.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <private/bionic_asm.h>
+
+	/*
+	 * (ov)bcopy (src,dst,cnt)
+	 *  ws@tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
+	 */
+
+#if defined(MEMCOPY)
+ENTRY(memcpy)
+#elif defined(MEMMOVE)
+ENTRY(memmove)
+#else
+ENTRY(bcopy)
+#endif
+	pushl	%esi
+	pushl	%edi
+#if defined(MEMCOPY) || defined(MEMMOVE)
+	movl	12(%esp),%edi
+	movl	16(%esp),%esi
+	movl	%edi, %eax
+#else
+	movl	12(%esp),%esi
+	movl	16(%esp),%edi
+#endif
+	movl	20(%esp),%ecx
+	movl	%ecx,%edx
+	cmpl	%esi,%edi	/* potentially overlapping? */
+	jnb	1f
+	cld			/* nope, copy forwards. */
+	shrl	$2,%ecx		/* copy by words */
+	rep
+	movsl
+	movl	%edx,%ecx
+	andl	$3,%ecx		/* any bytes left? */
+	rep
+	movsb
+	popl	%edi
+	popl	%esi
+	ret
+1:
+	addl	%ecx,%edi	/* copy backwards. */
+	addl	%ecx,%esi
+	std
+	andl	$3,%ecx		/* any fractional bytes? */
+	decl	%edi
+	decl	%esi
+	rep
+	movsb
+	movl	%edx,%ecx
+	shrl	$2,%ecx
+	subl	$3,%esi
+	subl	$3,%edi
+	rep
+	movsl
+	popl	%edi
+	popl	%esi
+	cld
+	ret
+#if defined(MEMCOPY)
+END(memcpy)
+#elif defined(MEMMOVE)
+END(memmove)
+#else
+END(bcopy)
+#endif
diff --git a/libc/arch-x86/generic/string/memcmp.S b/libc/arch-x86/generic/string/memcmp.S
new file mode 100644
index 0000000..ef36b4f
--- /dev/null
+++ b/libc/arch-x86/generic/string/memcmp.S
@@ -0,0 +1,44 @@
+/*	$OpenBSD: memcmp.S,v 1.4 2005/08/07 11:30:38 espie Exp $ */
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <private/bionic_asm.h>
+
+ENTRY(memcmp)
+	pushl	%edi
+	pushl	%esi
+	movl	12(%esp),%edi
+	movl	16(%esp),%esi
+	cld				/* set compare direction forward */
+
+	movl	20(%esp),%ecx		/* compare by words */
+	shrl	$2,%ecx
+	repe
+	cmpsl
+	jne	L5			/* do we match so far? */
+
+	movl	20(%esp),%ecx		/* compare remainder by bytes */
+	andl	$3,%ecx
+	repe
+	cmpsb
+	jne	L6			/* do we match? */
+
+	xorl	%eax,%eax		/* we match, return zero	*/
+	popl	%esi
+	popl	%edi
+	ret
+
+L5:	movl	$4,%ecx			/* We know that one of the next	*/
+	subl	%ecx,%edi		/* four pairs of bytes do not	*/
+	subl	%ecx,%esi		/* match.			*/
+	repe
+	cmpsb
+L6:	movzbl  -1(%edi),%eax		/* Perform unsigned comparison	*/
+	movzbl	-1(%esi),%edx
+	subl	%edx,%eax
+	popl	%esi
+	popl	%edi
+	ret
+END(memcmp)
diff --git a/libc/arch-x86/generic/string/memcpy.S b/libc/arch-x86/generic/string/memcpy.S
new file mode 100644
index 0000000..95c8a83
--- /dev/null
+++ b/libc/arch-x86/generic/string/memcpy.S
@@ -0,0 +1,3 @@
+/*	$OpenBSD: memcpy.S,v 1.3 2005/08/07 11:30:38 espie Exp $ */
+#define MEMCOPY
+#include "bcopy.S"
diff --git a/libc/arch-x86/generic/string/memmove.S b/libc/arch-x86/generic/string/memmove.S
new file mode 100644
index 0000000..c5bfd19
--- /dev/null
+++ b/libc/arch-x86/generic/string/memmove.S
@@ -0,0 +1,3 @@
+/*	$OpenBSD: memmove.S,v 1.3 2005/08/07 11:30:38 espie Exp $ */
+#define MEMMOVE
+#include "bcopy.S"
diff --git a/libc/arch-x86/generic/string/strcat.S b/libc/arch-x86/generic/string/strcat.S
new file mode 100644
index 0000000..49e8eee
--- /dev/null
+++ b/libc/arch-x86/generic/string/strcat.S
@@ -0,0 +1,74 @@
+/*	$OpenBSD: strcat.S,v 1.8 2005/08/07 11:30:38 espie Exp $ */
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <private/bionic_asm.h>
+
+#if defined(APIWARN)
+#APP
+	.section .gnu.warning.strcat
+	.ascii "warning: strcat() is almost always misused, please use strlcat()"
+#NO_APP
+#endif
+
+/*
+ * NOTE: I've unrolled the loop eight times: large enough to make a
+ * significant difference, and small enough not to totally trash the
+ * cache.
+ */
+
+ENTRY(strcat)
+	pushl	%edi			/* save edi */
+	movl	8(%esp),%edi		/* dst address */
+	movl	12(%esp),%edx		/* src address */
+	pushl	%edi			/* push destination address */
+
+	cld				/* set search forward */
+	xorl	%eax,%eax		/* set search for null terminator */
+	movl	$-1,%ecx		/* set search for lots of characters */
+	repne				/* search! */
+	scasb
+
+	leal	-1(%edi),%ecx		/* correct dst address */
+
+	.align 2,0x90
+L1:	movb	(%edx),%al		/* unroll loop, but not too much */
+	movb	%al,(%ecx)
+	testb	%al,%al
+	jz	L2
+	movb	1(%edx),%al
+	movb	%al,1(%ecx)
+	testb	%al,%al
+	jz	L2
+	movb	2(%edx),%al
+	movb	%al,2(%ecx)
+	testb	%al,%al
+	jz	L2
+	movb	3(%edx),%al
+	movb	%al,3(%ecx)
+	testb	%al,%al
+	jz	L2
+	movb	4(%edx),%al
+	movb	%al,4(%ecx)
+	testb	%al,%al
+	jz	L2
+	movb	5(%edx),%al
+	movb	%al,5(%ecx)
+	testb	%al,%al
+	jz	L2
+	movb	6(%edx),%al
+	movb	%al,6(%ecx)
+	testb	%al,%al
+	jz	L2
+	movb	7(%edx),%al
+	movb	%al,7(%ecx)
+	addl	$8,%edx
+	addl	$8,%ecx
+	testb	%al,%al
+	jnz	L1
+L2:	popl	%eax			/* pop destination address */
+	popl	%edi			/* restore edi */
+	ret
+END(strcat)
diff --git a/libc/arch-x86/generic/string/strcmp.S b/libc/arch-x86/generic/string/strcmp.S
new file mode 100644
index 0000000..580f4d5
--- /dev/null
+++ b/libc/arch-x86/generic/string/strcmp.S
@@ -0,0 +1,82 @@
+/*	$OpenBSD: strcmp.S,v 1.3 2005/08/07 11:30:38 espie Exp $ */
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <private/bionic_asm.h>
+
+/*
+ * NOTE: I've unrolled the loop eight times: large enough to make a
+ * significant difference, and small enough not to totally trash the
+ * cache.
+ */
+
+ENTRY(strcmp)
+	movl	0x04(%esp),%eax
+	movl	0x08(%esp),%edx
+	jmp	L2			/* Jump into the loop! */
+
+	.align	2,0x90
+L1:	incl	%eax
+	incl	%edx
+L2:	movb	(%eax),%cl
+	testb	%cl,%cl			/* null terminator??? */
+	jz	L3
+	cmpb	%cl,(%edx)		/* chars match??? */
+	jne	L3
+	incl	%eax
+	incl	%edx
+	movb	(%eax),%cl
+	testb	%cl,%cl
+	jz	L3
+	cmpb	%cl,(%edx)
+	jne	L3
+	incl	%eax
+	incl	%edx
+	movb	(%eax),%cl
+	testb	%cl,%cl
+	jz	L3
+	cmpb	%cl,(%edx)
+	jne	L3
+	incl	%eax
+	incl	%edx
+	movb	(%eax),%cl
+	testb	%cl,%cl
+	jz	L3
+	cmpb	%cl,(%edx)
+	jne	L3
+	incl	%eax
+	incl	%edx
+	movb	(%eax),%cl
+	testb	%cl,%cl
+	jz	L3
+	cmpb	%cl,(%edx)
+	jne	L3
+	incl	%eax
+	incl	%edx
+	movb	(%eax),%cl
+	testb	%cl,%cl
+	jz	L3
+	cmpb	%cl,(%edx)
+	jne	L3
+	incl	%eax
+	incl	%edx
+	movb	(%eax),%cl
+	testb	%cl,%cl
+	jz	L3
+	cmpb	%cl,(%edx)
+	jne	L3
+	incl	%eax
+	incl	%edx
+	movb	(%eax),%cl
+	testb	%cl,%cl
+	jz	L3
+	cmpb	%cl,(%edx)
+	je	L1
+	.align 2, 0x90
+L3:	movzbl	(%eax),%eax		/* unsigned comparison */
+	movzbl	(%edx),%edx
+	subl	%edx,%eax
+	ret
+END(strcmp)
diff --git a/libc/arch-x86/generic/string/strncmp.S b/libc/arch-x86/generic/string/strncmp.S
new file mode 100644
index 0000000..9ba83a1
--- /dev/null
+++ b/libc/arch-x86/generic/string/strncmp.S
@@ -0,0 +1,114 @@
+/*	$OpenBSD: strncmp.S,v 1.3 2005/08/07 11:30:38 espie Exp $ */
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <private/bionic_asm.h>
+
+/*
+ * NOTE: I've unrolled the loop eight times: large enough to make a
+ * significant difference, and small enough not to totally trash the
+ * cache.
+ */
+
+ENTRY(strncmp)
+	pushl	%ebx
+	movl	8(%esp),%eax
+	movl	12(%esp),%ecx
+	movl	16(%esp),%edx
+	testl	%edx,%edx
+	jmp	L2			/* Jump into the loop! */
+
+	.align 2,0x90
+L1:	incl	%eax
+	incl	%ecx
+	decl	%edx
+L2:	jz	L4			/* strings are equal */
+	movb	(%eax),%bl
+	testb	%bl,%bl
+	jz	L3
+	cmpb	%bl,(%ecx)
+	jne	L3
+
+	incl	%eax
+	incl	%ecx
+	decl	%edx
+	jz	L4
+	movb	(%eax),%bl
+	testb	%bl,%bl
+	jz	L3
+	cmpb	%bl,(%ecx)
+	jne	L3
+
+	incl	%eax
+	incl	%ecx
+	decl	%edx
+	jz	L4
+	movb	(%eax),%bl
+	testb	%bl,%bl
+	jz	L3
+	cmpb	%bl,(%ecx)
+	jne	L3
+
+	incl	%eax
+	incl	%ecx
+	decl	%edx
+	jz	L4
+	movb	(%eax),%bl
+	testb	%bl,%bl
+	jz	L3
+	cmpb	%bl,(%ecx)
+	jne	L3
+
+	incl	%eax
+	incl	%ecx
+	decl	%edx
+	jz	L4
+	movb	(%eax),%bl
+	testb	%bl,%bl
+	jz	L3
+	cmpb	%bl,(%ecx)
+	jne	L3
+
+	incl	%eax
+	incl	%ecx
+	decl	%edx
+	jz	L4
+	movb	(%eax),%bl
+	testb	%bl,%bl
+	jz	L3
+	cmpb	%bl,(%ecx)
+	jne	L3
+
+	incl	%eax
+	incl	%ecx
+	decl	%edx
+	jz	L4
+	movb	(%eax),%bl
+	testb	%bl,%bl
+	jz	L3
+	cmpb	%bl,(%ecx)
+	jne	L3
+
+	incl	%eax
+	incl	%ecx
+	decl	%edx
+	jz	L4
+	movb	(%eax),%bl
+	testb	%bl,%bl
+	jz	L3
+	cmpb	%bl,(%ecx)
+	je	L1
+
+	.align 2,0x90
+L3:	movzbl	(%eax),%eax		/* unsigned comparision */
+	movzbl	(%ecx),%ecx
+	subl	%ecx,%eax
+	popl	%ebx
+	ret
+	.align 2,0x90
+L4:	xorl	%eax,%eax
+	popl	%ebx
+	ret
+END(strncmp)
diff --git a/libc/arch-x86/generic/string/swab.S b/libc/arch-x86/generic/string/swab.S
new file mode 100644
index 0000000..b44d134
--- /dev/null
+++ b/libc/arch-x86/generic/string/swab.S
@@ -0,0 +1,68 @@
+/*	$OpenBSD: swab.S,v 1.3 2005/08/07 11:30:38 espie Exp $ */
+/*
+ * Written by J.T. Conklin <jtc@netbsd.org>.
+ * Public domain.
+ */
+
+#include <private/bionic_asm.h>
+
+/*
+ * On the i486, this code is negligibly faster than the code generated
+ * by gcc at about half the size.  If my i386 databook is correct, it
+ * should be considerably faster than the gcc code on a i386.
+ */
+
+ENTRY(swab)
+	pushl	%esi
+	pushl	%edi
+	movl	12(%esp),%esi
+	movl	16(%esp),%edi
+	movl	20(%esp),%ecx
+
+	cld				# set direction forward
+
+	shrl	$1,%ecx
+	testl	$7,%ecx			# copy first group of 1 to 7 words
+	jz	L2			# while swaping alternate bytes.
+	.align	2,0x90
+L1:	lodsw
+	rorw	$8,%ax
+	stosw
+	decl	%ecx
+	testl	$7,%ecx
+	jnz	L1
+
+L2:	shrl	$3,%ecx			# copy remainder 8 words at a time
+	jz	L4			# while swapping alternate bytes.
+	.align	2,0x90
+L3:	lodsw
+	rorw	$8,%ax
+	stosw
+	lodsw
+	rorw	$8,%ax
+	stosw
+	lodsw
+	rorw	$8,%ax
+	stosw
+	lodsw
+	rorw	$8,%ax
+	stosw
+	lodsw
+	rorw	$8,%ax
+	stosw
+	lodsw
+	rorw	$8,%ax
+	stosw
+	lodsw
+	rorw	$8,%ax
+	stosw
+	lodsw
+	rorw	$8,%ax
+	stosw
+	decl	%ecx
+	jnz	L3
+
+L4:	popl	%edi
+	popl	%esi
+	ret
+END(swab)