Mandate optimized __memset_chk for arm and arm64.

This involves actually implementing assembler __memset_chk for arm64,
but that's easily done.

Obviously I'd like this for all architectures (and all the string functions),
but this is low-hanging fruit...

Change-Id: I70ec48c91aafd1f0feb974a2555c51611de9ef82
diff --git a/libc/bionic/fortify.cpp b/libc/bionic/fortify.cpp
index ad7aa04..a1db2a4 100644
--- a/libc/bionic/fortify.cpp
+++ b/libc/bionic/fortify.cpp
@@ -153,6 +153,15 @@
   return memrchr(s, c, n);
 }
 
+#if !defined(__aarch64__) && !defined(__arm__) // TODO: add optimized assembler for the others too.
+// Runtime implementation of __builtin___memset_chk (used directly by compiler, not in headers).
+extern "C" void* __memset_chk(void* dst, int byte, size_t count, size_t dst_len) {
+  __check_count("memset", "count", count);
+  __check_buffer_access("memset", "write into", count, dst_len);
+  return memset(dst, byte, count);
+}
+#endif
+
 // memset is performance-critical enough that we have assembler __memset_chk implementations.
 // This function is used to give better diagnostics than we can easily do from assembler.
 extern "C" void* __memset_chk_fail(void* /*dst*/, int /*byte*/, size_t count, size_t dst_len) {