Make memcpy memmove

Bug: http://b/63992911
Test: Change BoardConfig.mk and compile for each variant
Change-Id: Ia0cc68d8e90e3316ddb2e9ff1555a009b6a0c5be
diff --git a/libc/arch-x86_64/string/sse2-memcpy-slm.S b/libc/arch-x86_64/string/sse2-memcpy-slm.S
deleted file mode 100644
index 4c30fb6..0000000
--- a/libc/arch-x86_64/string/sse2-memcpy-slm.S
+++ /dev/null
@@ -1,299 +0,0 @@
-/*
-Copyright (c) 2014, Intel Corporation
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-    * Redistributions of source code must retain the above copyright notice,
-    * this list of conditions and the following disclaimer.
-
-    * Redistributions in binary form must reproduce the above copyright notice,
-    * this list of conditions and the following disclaimer in the documentation
-    * and/or other materials provided with the distribution.
-
-    * Neither the name of Intel Corporation nor the names of its contributors
-    * may be used to endorse or promote products derived from this software
-    * without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
-ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-#include "cache.h"
-
-#ifndef MEMCPY
-# define MEMCPY		memcpy
-#endif
-
-#ifndef L
-# define L(label)	.L##label
-#endif
-
-#ifndef cfi_startproc
-# define cfi_startproc	.cfi_startproc
-#endif
-
-#ifndef cfi_endproc
-# define cfi_endproc	.cfi_endproc
-#endif
-
-#ifndef cfi_rel_offset
-# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
-#endif
-
-#ifndef cfi_restore
-# define cfi_restore(reg)	.cfi_restore reg
-#endif
-
-#ifndef cfi_adjust_cfa_offset
-# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
-#endif
-
-#ifndef ENTRY
-# define ENTRY(name)		\
-	.type name,  @function;		\
-	.globl name;		\
-	.p2align 4;		\
-name:		\
-	cfi_startproc
-#endif
-
-#ifndef END
-# define END(name)		\
-	cfi_endproc;		\
-	.size name, .-name
-#endif
-
-#define CFI_PUSH(REG)		\
-	cfi_adjust_cfa_offset (4);		\
-	cfi_rel_offset (REG, 0)
-
-#define CFI_POP(REG)		\
-	cfi_adjust_cfa_offset (-4);		\
-	cfi_restore (REG)
-
-#define PUSH(REG)	push REG;
-#define POP(REG)	pop REG;
-
-#define ENTRANCE	PUSH (%rbx);
-#define RETURN_END	POP (%rbx); ret
-#define RETURN		RETURN_END;
-
-	.section .text.sse2,"ax",@progbits
-ENTRY (MEMCPY)
-	ENTRANCE
-	cmp	%rsi, %rdi
-	je	L(return)
-
-	cmp	$16, %rdx
-	jbe	L(len_0_16_bytes)
-
-	cmp	$SHARED_CACHE_SIZE_HALF, %rdx
-	jae	L(large_page)
-
-	movdqu	(%rsi), %xmm0
-	movdqu	-16(%rsi, %rdx), %xmm1
-	cmp	$32, %rdx
-	movdqu	%xmm0, (%rdi)
-	movdqu	%xmm1, -16(%rdi, %rdx)
-	jbe	L(return)
-
-	movdqu	16(%rsi), %xmm0
-	movdqu	-32(%rsi, %rdx), %xmm1
-	cmp	$64, %rdx
-	movdqu	%xmm0, 16(%rdi)
-	movdqu	%xmm1, -32(%rdi, %rdx)
-	jbe	L(return)
-
-	movdqu	32(%rsi), %xmm0
-	movdqu	48(%rsi), %xmm1
-	movdqu	-48(%rsi, %rdx), %xmm2
-	movdqu	-64(%rsi, %rdx), %xmm3
-	cmp	$128, %rdx
-	movdqu	%xmm0, 32(%rdi)
-	movdqu	%xmm1, 48(%rdi)
-	movdqu	%xmm2, -48(%rdi, %rdx)
-	movdqu	%xmm3, -64(%rdi, %rdx)
-	jbe	L(return)
-
-/* Now the main loop: we align the address of the destination.  */
-	lea	64(%rdi), %r8
-	and	$-64, %r8
-
-	add	%rdi, %rdx
-	and	$-64, %rdx
-
-	sub	%rdi, %rsi
-
-/* We should stop two iterations before the termination
-	(in order not to misprefetch).  */
-	sub	$64, %rdx
-	cmp	%r8, %rdx
-	je	L(main_loop_just_one_iteration)
-
-	sub	$64, %rdx
-	cmp	%r8, %rdx
-	je	L(main_loop_last_two_iterations)
-
-
-	.p2align 4
-L(main_loop_cache):
-
-	prefetcht0 128(%r8, %rsi)
-
-	movdqu	(%r8, %rsi), %xmm0
-	movdqu	16(%r8, %rsi), %xmm1
-	movdqu	32(%r8, %rsi), %xmm2
-	movdqu	48(%r8, %rsi), %xmm3
-	movdqa	%xmm0, (%r8)
-	movdqa	%xmm1, 16(%r8)
-	movdqa	%xmm2, 32(%r8)
-	movdqa	%xmm3, 48(%r8)
-	lea	64(%r8), %r8
-	cmp	%r8, %rdx
-	jne	L(main_loop_cache)
-
-L(main_loop_last_two_iterations):
-	movdqu	(%r8, %rsi), %xmm0
-	movdqu	16(%r8, %rsi), %xmm1
-	movdqu	32(%r8, %rsi), %xmm2
-	movdqu	48(%r8, %rsi), %xmm3
-	movdqu	64(%r8, %rsi), %xmm4
-	movdqu	80(%r8, %rsi), %xmm5
-	movdqu	96(%r8, %rsi), %xmm6
-	movdqu	112(%r8, %rsi), %xmm7
-	movdqa	%xmm0, (%r8)
-	movdqa	%xmm1, 16(%r8)
-	movdqa	%xmm2, 32(%r8)
-	movdqa	%xmm3, 48(%r8)
-	movdqa	%xmm4, 64(%r8)
-	movdqa	%xmm5, 80(%r8)
-	movdqa	%xmm6, 96(%r8)
-	movdqa	%xmm7, 112(%r8)
-	jmp	L(return)
-
-L(main_loop_just_one_iteration):
-	movdqu	(%r8, %rsi), %xmm0
-	movdqu	16(%r8, %rsi), %xmm1
-	movdqu	32(%r8, %rsi), %xmm2
-	movdqu	48(%r8, %rsi), %xmm3
-	movdqa	%xmm0, (%r8)
-	movdqa	%xmm1, 16(%r8)
-	movdqa	%xmm2, 32(%r8)
-	movdqa	%xmm3, 48(%r8)
-	jmp	L(return)
-
-L(large_page):
-	movdqu	(%rsi), %xmm0
-	movdqu	16(%rsi), %xmm1
-	movdqu	32(%rsi), %xmm2
-	movdqu	48(%rsi), %xmm3
-	movdqu	-64(%rsi, %rdx), %xmm4
-	movdqu	-48(%rsi, %rdx), %xmm5
-	movdqu	-32(%rsi, %rdx), %xmm6
-	movdqu	-16(%rsi, %rdx), %xmm7
-	movdqu	%xmm0, (%rdi)
-	movdqu	%xmm1, 16(%rdi)
-	movdqu	%xmm2, 32(%rdi)
-	movdqu	%xmm3, 48(%rdi)
-	movdqu	%xmm4, -64(%rdi, %rdx)
-	movdqu	%xmm5, -48(%rdi, %rdx)
-	movdqu	%xmm6, -32(%rdi, %rdx)
-	movdqu	%xmm7, -16(%rdi, %rdx)
-
-	movdqu	64(%rsi), %xmm0
-	movdqu	80(%rsi), %xmm1
-	movdqu	96(%rsi), %xmm2
-	movdqu	112(%rsi), %xmm3
-	movdqu	-128(%rsi, %rdx), %xmm4
-	movdqu	-112(%rsi, %rdx), %xmm5
-	movdqu	-96(%rsi, %rdx), %xmm6
-	movdqu	-80(%rsi, %rdx), %xmm7
-	movdqu	%xmm0, 64(%rdi)
-	movdqu	%xmm1, 80(%rdi)
-	movdqu	%xmm2, 96(%rdi)
-	movdqu	%xmm3, 112(%rdi)
-	movdqu	%xmm4, -128(%rdi, %rdx)
-	movdqu	%xmm5, -112(%rdi, %rdx)
-	movdqu	%xmm6, -96(%rdi, %rdx)
-	movdqu	%xmm7, -80(%rdi, %rdx)
-
-/* Now the main loop with non temporal stores. We align
-	the address of the destination.  */
-	lea	128(%rdi), %r8
-	and	$-128, %r8
-
-	add	%rdi, %rdx
-	and	$-128, %rdx
-
-	sub	%rdi, %rsi
-
-	.p2align 4
-L(main_loop_large_page):
-	movdqu	(%r8, %rsi), %xmm0
-	movdqu	16(%r8, %rsi), %xmm1
-	movdqu	32(%r8, %rsi), %xmm2
-	movdqu	48(%r8, %rsi), %xmm3
-	movdqu	64(%r8, %rsi), %xmm4
-	movdqu	80(%r8, %rsi), %xmm5
-	movdqu	96(%r8, %rsi), %xmm6
-	movdqu	112(%r8, %rsi), %xmm7
-	movntdq	%xmm0, (%r8)
-	movntdq	%xmm1, 16(%r8)
-	movntdq	%xmm2, 32(%r8)
-	movntdq	%xmm3, 48(%r8)
-	movntdq	%xmm4, 64(%r8)
-	movntdq	%xmm5, 80(%r8)
-	movntdq	%xmm6, 96(%r8)
-	movntdq	%xmm7, 112(%r8)
-	lea	128(%r8), %r8
-	cmp	%r8, %rdx
-	jne	L(main_loop_large_page)
-	sfence
-	jmp	L(return)
-
-L(len_0_16_bytes):
-	testb	$24, %dl
-	jne	L(len_9_16_bytes)
-	testb	$4, %dl
-	.p2align 4,,5
-	jne	L(len_5_8_bytes)
-	test	%rdx, %rdx
-	.p2align 4,,2
-	je	L(return)
-	movzbl	(%rsi), %ebx
-	testb	$2, %dl
-	movb	%bl, (%rdi)
-	je	L(return)
-	movzwl	-2(%rsi,%rdx), %ebx
-	movw	%bx, -2(%rdi,%rdx)
-	jmp	L(return)
-
-L(len_9_16_bytes):
-	movq	(%rsi), %xmm0
-	movq	-8(%rsi, %rdx), %xmm1
-	movq	%xmm0, (%rdi)
-	movq	%xmm1, -8(%rdi, %rdx)
-	jmp	L(return)
-
-L(len_5_8_bytes):
-	movl	(%rsi), %ebx
-	movl	%ebx, (%rdi)
-	movl	-4(%rsi,%rdx), %ebx
-	movl	%ebx, -4(%rdi,%rdx)
-	jmp	L(return)
-
-L(return):
-	mov 	%rdi, %rax
-	RETURN
-
-END (MEMCPY)
diff --git a/libc/arch-x86_64/string/sse2-memmove-slm.S b/libc/arch-x86_64/string/sse2-memmove-slm.S
index 6a5afd6..7395028 100644
--- a/libc/arch-x86_64/string/sse2-memmove-slm.S
+++ b/libc/arch-x86_64/string/sse2-memmove-slm.S
@@ -67,6 +67,12 @@
 	cfi_startproc
 #endif
 
+#ifndef ALIAS_SYMBOL
+# define ALIAS_SYMBOL(alias, original) \
+	.globl alias; \
+	.equ alias, original
+#endif
+
 #ifndef END
 # define END(name)		\
 	cfi_endproc;		\
@@ -508,3 +514,5 @@
 	jmp	L(mm_recalc_len)
 
 END (MEMMOVE)
+
+ALIAS_SYMBOL(memcpy, MEMMOVE)