Mandate optimized __memset_chk for arm and arm64.

This involves actually implementing assembler __memset_chk for arm64,
but that's easily done.

Obviously I'd like this for all architectures (and all the string functions),
but this is low-hanging fruit...

Change-Id: I70ec48c91aafd1f0feb974a2555c51611de9ef82
diff --git a/libc/arch-arm64/denver64/bionic/memset.S b/libc/arch-arm64/denver64/bionic/memset.S
index 9127d89..bea5b26 100644
--- a/libc/arch-arm64/denver64/bionic/memset.S
+++ b/libc/arch-arm64/denver64/bionic/memset.S
@@ -48,6 +48,7 @@
 #define dstin		x0
 #define val		w1
 #define count		x2
+#define dst_count x3 /* for __memset_chk */
 #define tmp1		x3
 #define tmp1w		w3
 #define tmp2		x4
@@ -63,6 +64,19 @@
 
 #define QA_l		q0
 
+ENTRY(__memset_chk)
+  cmp count, dst_count
+  bls memset
+
+  // Preserve for accurate backtrace.
+  stp x29, x30, [sp, -16]!
+  .cfi_def_cfa_offset 16
+  .cfi_rel_offset x29, 0
+  .cfi_rel_offset x30, 8
+
+  bl __memset_chk_fail
+END(__memset_chk)
+
 ENTRY(memset)
 
 	mov	dst, dstin		/* Preserve return value.  */