x86, x86_64: Add __memcpy_chk assembly versions.

Implement __memcpy_chk() in assembly for x86 and x86_64 and remove the
conditionally-compiled C fallback now that all supported architectures
have an external implementation.

Bug: 389669171
Test: mma
Test: ./tests/run-on-host.sh 32 --gtest_filter='*memcpy*'
Test: ./tests/run-on-host.sh 64 --gtest_filter='*memcpy*'
Change-Id: I755fdd348f2f380e7c40bc57dc1f4d36d791b1c0
diff --git a/libc/arch-x86/string/sse2-memmove-slm.S b/libc/arch-x86/string/sse2-memmove-slm.S
index 2ed4e7b..a25b4c7 100644
--- a/libc/arch-x86/string/sse2-memmove-slm.S
+++ b/libc/arch-x86/string/sse2-memmove-slm.S
@@ -96,6 +96,13 @@
 #define SETUP_PIC_REG(x)	call	__x86.get_pc_thunk.x
 
 	.section .text.sse2,"ax",@progbits
+ENTRY (__memcpy_chk)
+/* NOTE: We can't use LEN here because ebx has not been pushed yet. */
+	movl	12(%esp), %ecx
+	cmpl	16(%esp), %ecx
+	ja	__memcpy_chk_fail
+/* Fall through to memcpy/memmove. */
+END (__memcpy_chk)
 ENTRY (MEMMOVE)
 	ENTRANCE
 	movl	LEN(%esp), %ecx
diff --git a/libc/arch-x86_64/string/sse2-memmove-slm.S b/libc/arch-x86_64/string/sse2-memmove-slm.S
index b787385..9f5fb12 100644
--- a/libc/arch-x86_64/string/sse2-memmove-slm.S
+++ b/libc/arch-x86_64/string/sse2-memmove-slm.S
@@ -99,6 +99,11 @@
 #define RETURN		RETURN_END;
 
 	.section .text.sse2,"ax",@progbits
+ENTRY (__memcpy_chk)
+	cmp	%rcx, %rdx
+	ja	__memcpy_chk_fail
+/* Fall through to memcpy/memmove. */
+END (__memcpy_chk)
 ENTRY (MEMMOVE)
 	ENTRANCE
 	mov	%rdi, %rax
diff --git a/libc/bionic/fortify.cpp b/libc/bionic/fortify.cpp
index 80f7c20..15053d3 100644
--- a/libc/bionic/fortify.cpp
+++ b/libc/bionic/fortify.cpp
@@ -489,16 +489,6 @@
   return strcpy(dst, src);
 }
 
-#if !defined(__arm__) && !defined(__aarch64__) && !defined(__riscv)
-// Runtime implementation of __memcpy_chk (used directly by compiler, not in headers).
-// arm32,arm64,riscv have assembler implementations, and don't need this C fallback.
-extern "C" void* __memcpy_chk(void* dst, const void* src, size_t count, size_t dst_len) {
-  __check_count("memcpy", "count", count);
-  __check_buffer_access("memcpy", "write into", count, dst_len);
-  return memcpy(dst, src, count);
-}
-#endif
-
 // Runtime implementation of __mempcpy_chk (used directly by compiler, not in headers).
 extern "C" void* __mempcpy_chk(void* dst, const void* src, size_t count, size_t dst_len) {
   __check_count("mempcpy", "count", count);