Merge "Skip the MemtagNoteTests when the overriding sysprop is set."
diff --git a/benchmarks/malloc_benchmark.cpp b/benchmarks/malloc_benchmark.cpp
index 18ba523..e733cd0 100644
--- a/benchmarks/malloc_benchmark.cpp
+++ b/benchmarks/malloc_benchmark.cpp
@@ -36,11 +36,11 @@
 
 #if defined(__BIONIC__)
 
-static void BM_mallopt_purge(benchmark::State& state) {
+static void RunMalloptPurge(benchmark::State& state, int purge_value) {
   static size_t sizes[] = {8, 16, 32, 64, 128, 1024, 4096, 16384, 65536, 131072, 1048576};
   static int pagesize = getpagesize();
   mallopt(M_DECAY_TIME, 1);
-  mallopt(M_PURGE, 0);
+  mallopt(M_PURGE_ALL, 0);
   for (auto _ : state) {
     state.PauseTiming();
     std::vector<void*> ptrs;
@@ -63,10 +63,19 @@
     ptrs.clear();
     state.ResumeTiming();
 
-    mallopt(M_PURGE, 0);
+    mallopt(purge_value, 0);
   }
   mallopt(M_DECAY_TIME, 0);
 }
+
+static void BM_mallopt_purge(benchmark::State& state) {
+  RunMalloptPurge(state, M_PURGE);
+}
 BIONIC_BENCHMARK(BM_mallopt_purge);
 
+static void BM_mallopt_purge_all(benchmark::State& state) {
+  RunMalloptPurge(state, M_PURGE_ALL);
+}
+BIONIC_BENCHMARK(BM_mallopt_purge_all);
+
 #endif
diff --git a/benchmarks/malloc_map_benchmark.cpp b/benchmarks/malloc_map_benchmark.cpp
index ba4d62c..5757325 100644
--- a/benchmarks/malloc_map_benchmark.cpp
+++ b/benchmarks/malloc_map_benchmark.cpp
@@ -69,7 +69,7 @@
   for (auto _ : state) {
 #if defined(__BIONIC__)
     state.PauseTiming();
-    mallopt(M_PURGE, 0);
+    mallopt(M_PURGE_ALL, 0);
     uint64_t rss_bytes_before = 0;
     Gather(&rss_bytes_before);
     state.ResumeTiming();
@@ -80,7 +80,7 @@
     }
 #if defined(__BIONIC__)
     state.PauseTiming();
-    mallopt(M_PURGE, 0);
+    mallopt(M_PURGE_ALL, 0);
     Gather(&rss_bytes);
     // Try and record only the memory used in the map.
     rss_bytes -= rss_bytes_before;
diff --git a/benchmarks/malloc_rss_benchmark.cpp b/benchmarks/malloc_rss_benchmark.cpp
index 58f61d9..4b34e72 100644
--- a/benchmarks/malloc_rss_benchmark.cpp
+++ b/benchmarks/malloc_rss_benchmark.cpp
@@ -112,7 +112,7 @@
 
   // Do an explicit purge to ensure we will be more likely to get the actual
   // in-use memory.
-  mallopt(M_PURGE, 0);
+  mallopt(M_PURGE_ALL, 0);
 
   android::meminfo::ProcMemInfo proc_mem(getpid());
   const std::vector<android::meminfo::Vma>& maps = proc_mem.MapsWithoutUsageStats();
diff --git a/libc/Android.bp b/libc/Android.bp
index c101f45..99261f9 100644
--- a/libc/Android.bp
+++ b/libc/Android.bp
@@ -827,6 +827,7 @@
         riscv64: {
             srcs: [
                "arch-riscv64/string/__memset_chk.S",
+               "arch-riscv64/string/__memcpy_chk.S",
             ],
         },
     },
@@ -2307,6 +2308,11 @@
                 "arch-arm64/string/__memcpy_chk.S",
             ],
         },
+        riscv64: {
+            srcs: [
+               "arch-riscv64/string/__memcpy_chk.S",
+            ],
+        },
     },
     whole_static_libs: [
         "libarm-optimized-routines-mem",
diff --git a/libc/arch-arm64/bionic/setjmp.S b/libc/arch-arm64/bionic/setjmp.S
index 8e00b56..d787a56 100644
--- a/libc/arch-arm64/bionic/setjmp.S
+++ b/libc/arch-arm64/bionic/setjmp.S
@@ -46,8 +46,6 @@
 // 0      sigflag/cookie  setjmp cookie in top 31 bits, signal mask flag in low bit
 // 1      sigmask         signal mask (not used with _setjmp / _longjmp)
 // 2      core_base       base of core registers (x18-x30, sp)
-//                        (We only store the low bits of x18 to avoid leaking the
-//                        shadow call stack address into memory.)
 // 16     float_base      base of float registers (d8-d15)
 // 24     checksum        checksum of core registers
 // 25     reserved        reserved entries (room to grow)
@@ -68,8 +66,6 @@
 #define _JB_D8_D9       (_JB_D10_D11 + 2)
 #define _JB_CHECKSUM    (_JB_D8_D9 + 2)
 
-#define SCS_MASK (SCS_SIZE - 1)
-
 .macro m_mangle_registers reg, sp_reg
   eor x3, x3, \reg
   eor x19, x19, \reg
@@ -155,6 +151,9 @@
   bic x1, x1, #1
 
   // Mask off the high bits of the shadow call stack pointer.
+  // We only store the low bits of x18 to avoid leaking the
+  // shadow call stack address into memory.
+  // See the SCS commentary in pthread_internal.h for more detail.
   and x3, x18, #SCS_MASK
 
   // Save core registers.
diff --git a/libc/arch-riscv64/bionic/setjmp.S b/libc/arch-riscv64/bionic/setjmp.S
index eea2978..ba3cacf 100644
--- a/libc/arch-riscv64/bionic/setjmp.S
+++ b/libc/arch-riscv64/bionic/setjmp.S
@@ -79,7 +79,7 @@
 .macro m_mangle_registers reg, sp_reg
   xor s0, s0, \reg
   xor s1, s1, \reg
-  xor s2, s2, \reg
+  xor a4, a4, \reg  // a4 is the masked s2 (x18) for SCS.
   xor s3, s3, \reg
   xor s4, s4, \reg
   xor s5, s5, \reg
@@ -151,13 +151,20 @@
   ld a1, _JB_SIGFLAG(a0)
   andi a1, a1, -2
 
+  // Mask off the high bits of the shadow call stack pointer.
+  // We only store the low bits of x18 to avoid leaking the
+  // shadow call stack address into memory.
+  // See the SCS commentary in pthread_internal.h for more detail.
+  li a4, SCS_MASK
+  and a4, a4, x18
+
   // Save core registers.
   mv a2, sp
   m_mangle_registers a1, sp_reg=a2
   sd ra,  _JB_RA(a0)
   sd s0,  _JB_S0(a0)
   sd s1,  _JB_S1(a0)
-  sd s2,  _JB_S2(a0)
+  sd a4,  _JB_S2(a0)  // a4 is the masked s2 (x18) for SCS.
   sd s3,  _JB_S3(a0)
   sd s4,  _JB_S4(a0)
   sd s5,  _JB_S5(a0)
@@ -231,7 +238,7 @@
   ld ra,  _JB_RA(a0)
   ld s0,  _JB_S0(a0)
   ld s1,  _JB_S1(a0)
-  ld s2,  _JB_S2(a0)
+  ld a4,  _JB_S2(a0)  // Don't clobber s2 (x18) used for SCS yet.
   ld s3,  _JB_S3(a0)
   ld s4,  _JB_S4(a0)
   ld s5,  _JB_S5(a0)
@@ -245,6 +252,11 @@
   m_unmangle_registers a2, sp_reg=a3
   mv sp, a3
 
+  // Restore the low bits of the shadow call stack pointer.
+  li a5, ~SCS_MASK
+  and x18, x18, a5
+  or x18, a4, x18
+
   addi sp, sp, -24
   sd   ra, 0(sp)
   sd   a0, 8(sp)
diff --git a/libc/arch-riscv64/string/__memcpy_chk.S b/libc/arch-riscv64/string/__memcpy_chk.S
new file mode 100644
index 0000000..4a2d13d
--- /dev/null
+++ b/libc/arch-riscv64/string/__memcpy_chk.S
@@ -0,0 +1,9 @@
+#include <private/bionic_asm.h>
+
+ENTRY(__memcpy_chk)
+  bleu a2, a3, 1f
+  call __memcpy_chk_fail
+
+1:
+   tail memcpy
+END(__memcpy_chk)
diff --git a/libc/async_safe/async_safe_log.cpp b/libc/async_safe/async_safe_log.cpp
index 2380e68..420560f 100644
--- a/libc/async_safe/async_safe_log.cpp
+++ b/libc/async_safe/async_safe_log.cpp
@@ -254,7 +254,7 @@
     bool alternate = false;
     size_t bytelen = sizeof(int);
     int slen;
-    char buffer[32]; /* temporary buffer used to format numbers */
+    char buffer[64];  // temporary buffer used to format numbers/format errno string
 
     char c;
 
@@ -359,8 +359,7 @@
       buffer[1] = 'x';
       format_integer(buffer + 2, sizeof(buffer) - 2, value, 'x');
     } else if (c == 'm') {
-      char buf[256];
-      str = strerror_r(errno, buf, sizeof(buf));
+      strerror_r(errno, buffer, sizeof(buffer));
     } else if (c == 'd' || c == 'i' || c == 'o' || c == 'u' || c == 'x' || c == 'X') {
       /* integers - first read value from stack */
       uint64_t value;
diff --git a/libc/bionic/fortify.cpp b/libc/bionic/fortify.cpp
index 73cf973..7dee5e3 100644
--- a/libc/bionic/fortify.cpp
+++ b/libc/bionic/fortify.cpp
@@ -489,9 +489,9 @@
   return strcpy(dst, src);
 }
 
-#if !defined(__arm__) && !defined(__aarch64__)
+#if !defined(__arm__) && !defined(__aarch64__) && !defined(__riscv)
 // Runtime implementation of __memcpy_chk (used directly by compiler, not in headers).
-// arm32 and arm64 have assembler implementations, and don't need this C fallback.
+// arm32,arm64,riscv have assembler implementations, and don't need this C fallback.
 extern "C" void* __memcpy_chk(void* dst, const void* src, size_t count, size_t dst_len) {
   __check_count("memcpy", "count", count);
   __check_buffer_access("memcpy", "write into", count, dst_len);
diff --git a/libc/bionic/gwp_asan_wrappers.cpp b/libc/bionic/gwp_asan_wrappers.cpp
index 251633d..fab29ef 100644
--- a/libc/bionic/gwp_asan_wrappers.cpp
+++ b/libc/bionic/gwp_asan_wrappers.cpp
@@ -307,8 +307,10 @@
     sysprop_names[3] = persist_default_sysprop;
   }
 
+  // TODO(mitchp): Log overrides using this.
+  const char* source;
   return get_config_from_env_or_sysprops(env_var, sysprop_names, arraysize(sysprop_names),
-                                         value_out, PROP_VALUE_MAX);
+                                         value_out, PROP_VALUE_MAX, &source);
 }
 
 bool GetGwpAsanIntegerOption(unsigned long long* result,
diff --git a/libc/bionic/jemalloc_wrapper.cpp b/libc/bionic/jemalloc_wrapper.cpp
index ef488ee..ce3f314 100644
--- a/libc/bionic/jemalloc_wrapper.cpp
+++ b/libc/bionic/jemalloc_wrapper.cpp
@@ -102,7 +102,7 @@
       }
     }
     return 1;
-  } else if (param == M_PURGE) {
+  } else if (param == M_PURGE || param == M_PURGE_ALL) {
     // Only clear the current thread cache since there is no easy way to
     // clear the caches of other threads.
     // This must be done first so that cleared allocations get purged
diff --git a/libc/bionic/libc_init_static.cpp b/libc/bionic/libc_init_static.cpp
index d64d402..0935cd6 100644
--- a/libc/bionic/libc_init_static.cpp
+++ b/libc/bionic/libc_init_static.cpp
@@ -217,16 +217,13 @@
 // Returns true if there's an environment setting (either sysprop or env var)
 // that should overwrite the ELF note, and places the equivalent heap tagging
 // level into *level.
-static bool get_environment_memtag_setting(HeapTaggingLevel* level) {
+static bool get_environment_memtag_setting(const char* basename, HeapTaggingLevel* level) {
   static const char kMemtagPrognameSyspropPrefix[] = "arm64.memtag.process.";
   static const char kMemtagGlobalSysprop[] = "persist.arm64.memtag.default";
   static const char kMemtagOverrideSyspropPrefix[] =
       "persist.device_config.memory_safety_native.mode_override.process.";
 
-  const char* progname = __libc_shared_globals()->init_progname;
-  if (progname == nullptr) return false;
-
-  const char* basename = __gnu_basename(progname);
+  if (basename == nullptr) return false;
 
   char options_str[PROP_VALUE_MAX];
   char sysprop_name[512];
@@ -237,8 +234,9 @@
                            kMemtagOverrideSyspropPrefix, basename);
   const char* sys_prop_names[] = {sysprop_name, remote_sysprop_name, kMemtagGlobalSysprop};
 
+  const char* source = nullptr;
   if (!get_config_from_env_or_sysprops("MEMTAG_OPTIONS", sys_prop_names, arraysize(sys_prop_names),
-                                       options_str, sizeof(options_str))) {
+                                       options_str, sizeof(options_str), &source)) {
     return false;
   }
 
@@ -249,27 +247,35 @@
   } else if (strcmp("off", options_str) == 0) {
     *level = M_HEAP_TAGGING_LEVEL_TBI;
   } else {
-    async_safe_format_log(
-        ANDROID_LOG_ERROR, "libc",
-        "unrecognized memtag level: \"%s\" (options are \"sync\", \"async\", or \"off\").",
-        options_str);
+    async_safe_format_log(ANDROID_LOG_ERROR, "libc",
+                          "%s: unrecognized memtag level in %s: \"%s\" (options are \"sync\", "
+                          "\"async\", or \"off\").",
+                          basename, source, options_str);
     return false;
   }
-
+  async_safe_format_log(ANDROID_LOG_DEBUG, "libc", "%s: chose memtag level \"%s\" from %s.",
+                        basename, options_str, source);
   return true;
 }
 
+static void log_elf_memtag_level(const char* basename, const char* level) {
+  async_safe_format_log(ANDROID_LOG_DEBUG, "libc", "%s: chose memtag level \"%s\" from ELF note",
+                        basename ?: "<unknown>", level);
+}
+
 // Returns the initial heap tagging level. Note: This function will never return
 // M_HEAP_TAGGING_LEVEL_NONE, if MTE isn't enabled for this process we enable
 // M_HEAP_TAGGING_LEVEL_TBI.
 static HeapTaggingLevel __get_heap_tagging_level(const void* phdr_start, size_t phdr_ct,
                                                  uintptr_t load_bias, bool* stack) {
+  const char* progname = __libc_shared_globals()->init_progname;
+  const char* basename = progname ? __gnu_basename(progname) : nullptr;
   unsigned note_val =
       __get_memtag_note(reinterpret_cast<const ElfW(Phdr)*>(phdr_start), phdr_ct, load_bias);
   *stack = note_val & NT_MEMTAG_STACK;
 
   HeapTaggingLevel level;
-  if (get_environment_memtag_setting(&level)) return level;
+  if (get_environment_memtag_setting(basename, &level)) return level;
 
   // Note, previously (in Android 12), any value outside of bits [0..3] resulted
   // in a check-fail. In order to be permissive of further extensions, we
@@ -284,14 +290,17 @@
       // by anyone, but we note it (heh) here for posterity, in case the zero
       // level becomes meaningful, and binaries with this note can be executed
       // on Android 12 devices.
+      log_elf_memtag_level(basename, "off");
       return M_HEAP_TAGGING_LEVEL_TBI;
     case NT_MEMTAG_LEVEL_ASYNC:
+      log_elf_memtag_level(basename, "async");
       return M_HEAP_TAGGING_LEVEL_ASYNC;
     case NT_MEMTAG_LEVEL_SYNC:
     default:
       // We allow future extensions to specify mode 3 (currently unused), with
       // the idea that it might be used for ASYMM mode or something else. On
       // this version of Android, it falls back to SYNC mode.
+      log_elf_memtag_level(basename, "sync");
       return M_HEAP_TAGGING_LEVEL_SYNC;
   }
 }
diff --git a/libc/bionic/pthread_create.cpp b/libc/bionic/pthread_create.cpp
index 417ce76..15d6d6d 100644
--- a/libc/bionic/pthread_create.cpp
+++ b/libc/bionic/pthread_create.cpp
@@ -116,14 +116,14 @@
 }
 
 static void __init_shadow_call_stack(pthread_internal_t* thread __unused) {
-#ifdef __aarch64__
+#if defined(__aarch64__) || defined(__riscv)
   // Allocate the stack and the guard region.
   char* scs_guard_region = reinterpret_cast<char*>(
       mmap(nullptr, SCS_GUARD_REGION_SIZE, 0, MAP_PRIVATE | MAP_ANON, -1, 0));
   thread->shadow_call_stack_guard_region = scs_guard_region;
 
   // The address is aligned to SCS_SIZE so that we only need to store the lower log2(SCS_SIZE) bits
-  // in jmp_buf.
+  // in jmp_buf. See the SCS commentary in pthread_internal.h for more detail.
   char* scs_aligned_guard_region =
       reinterpret_cast<char*>(align_up(reinterpret_cast<uintptr_t>(scs_guard_region), SCS_SIZE));
 
@@ -133,11 +133,15 @@
   size_t scs_offset =
       (getpid() == 1) ? 0 : (arc4random_uniform(SCS_GUARD_REGION_SIZE / SCS_SIZE - 1) * SCS_SIZE);
 
-  // Make the stack readable and writable and store its address in register x18. This is
-  // deliberately the only place where the address is stored.
-  char *scs = scs_aligned_guard_region + scs_offset;
+  // Make the stack readable and writable and store its address in x18.
+  // This is deliberately the only place where the address is stored.
+  char* scs = scs_aligned_guard_region + scs_offset;
   mprotect(scs, SCS_SIZE, PROT_READ | PROT_WRITE);
+#if defined(__aarch64__)
   __asm__ __volatile__("mov x18, %0" ::"r"(scs));
+#elif defined(__riscv)
+  __asm__ __volatile__("mv x18, %0" ::"r"(scs));
+#endif
 #endif
 }
 
diff --git a/libc/bionic/pthread_exit.cpp b/libc/bionic/pthread_exit.cpp
index bde95ec..f584b27 100644
--- a/libc/bionic/pthread_exit.cpp
+++ b/libc/bionic/pthread_exit.cpp
@@ -117,7 +117,7 @@
     __rt_sigprocmask(SIG_BLOCK, &set, nullptr, sizeof(sigset64_t));
   }
 
-#ifdef __aarch64__
+#if defined(__aarch64__) || defined(__riscv)
   // Free the shadow call stack and guard pages.
   munmap(thread->shadow_call_stack_guard_region, SCS_GUARD_REGION_SIZE);
 #endif
diff --git a/libc/bionic/pthread_internal.h b/libc/bionic/pthread_internal.h
index 7222b62..083c2ed 100644
--- a/libc/bionic/pthread_internal.h
+++ b/libc/bionic/pthread_internal.h
@@ -105,9 +105,13 @@
 
   void* alternate_signal_stack;
 
-  // The start address of the shadow call stack's guard region (arm64 only).
+  // The start address of the shadow call stack's guard region (arm64/riscv64).
+  // This region is SCS_GUARD_REGION_SIZE bytes large, but only SCS_SIZE bytes
+  // are actually used.
+  //
   // This address is only used to deallocate the shadow call stack on thread
   // exit; the address of the stack itself is stored only in the x18 register.
+  //
   // Because the protection offered by SCS relies on the secrecy of the stack
   // address, storing the address here weakens the protection, but only
   // slightly, because it is relatively easy for an attacker to discover the
@@ -115,13 +119,22 @@
   // to other allocations), but not the stack itself, which is <0.1% of the size
   // of the guard region.
   //
+  // longjmp()/setjmp() don't store all the bits of x18, only the bottom bits
+  // covered by SCS_MASK. Since longjmp()/setjmp() between different threads is
+  // undefined behavior (and unsupported on Android), we can retrieve the high
+  // bits of x18 from the current value in x18 --- all the jmp_buf needs to store
+  // is where exactly the shadow stack pointer is in the thread's shadow stack:
+  // the bottom bits of x18.
+  //
   // There are at least two other options for discovering the start address of
   // the guard region on thread exit, but they are not as simple as storing in
   // TLS.
+  //
   // 1) Derive it from the value of the x18 register. This is only possible in
   //    processes that do not contain legacy code that might clobber x18,
   //    therefore each process must declare early during process startup whether
   //    it might load legacy code.
+  //    TODO: riscv64 has no legacy code, so we can actually go this route there!
   // 2) Mark the guard region as such using prctl(PR_SET_VMA_ANON_NAME) and
   //    discover its address by reading /proc/self/maps. One issue with this is
   //    that reading /proc/self/maps can race with allocations, so we may need
diff --git a/libc/bionic/sysprop_helpers.cpp b/libc/bionic/sysprop_helpers.cpp
index 5627034..2051330 100644
--- a/libc/bionic/sysprop_helpers.cpp
+++ b/libc/bionic/sysprop_helpers.cpp
@@ -57,18 +57,22 @@
 }
 
 bool get_config_from_env_or_sysprops(const char* env_var_name, const char* const* sys_prop_names,
-                                     size_t sys_prop_names_size, char* options,
-                                     size_t options_size) {
+                                     size_t sys_prop_names_size, char* options, size_t options_size,
+                                     const char** chosen_source) {
   const char* env = getenv(env_var_name);
   if (env && *env != '\0') {
     strncpy(options, env, options_size);
     options[options_size - 1] = '\0';  // Ensure null-termination.
+    *chosen_source = env_var_name;
     return true;
   }
 
   for (size_t i = 0; i < sys_prop_names_size; ++i) {
     if (sys_prop_names[i] == nullptr) continue;
-    if (get_property_value(sys_prop_names[i], options, options_size)) return true;
+    if (get_property_value(sys_prop_names[i], options, options_size)) {
+      *chosen_source = sys_prop_names[i];
+      return true;
+    }
   }
   return false;
 }
diff --git a/libc/bionic/sysprop_helpers.h b/libc/bionic/sysprop_helpers.h
index a02c2dc..84e7af1 100644
--- a/libc/bionic/sysprop_helpers.h
+++ b/libc/bionic/sysprop_helpers.h
@@ -36,10 +36,13 @@
 //   2. System properties, in the order they're specified in sys_prop_names.
 // If neither of these options are specified (or they're both an empty string),
 // this function returns false. Otherwise, it returns true, and the presiding
-// options string is written to the `options` buffer of size `size`. If this
-// function returns true, `options` is guaranteed to be null-terminated.
+// options string is written to the `options` buffer of size `size`. It will
+// store a pointer to either env_var_name, or into the relevant entry of
+// sys_prop_names into choicen_source, indiciating which value was used. If
+// this function returns true, `options` is guaranteed to be null-terminated.
 // `options_size` should be at least PROP_VALUE_MAX.
 __LIBC_HIDDEN__ bool get_config_from_env_or_sysprops(const char* env_var_name,
                                                      const char* const* sys_prop_names,
                                                      size_t sys_prop_names_size, char* options,
-                                                     size_t options_size);
+                                                     size_t options_size,
+                                                     const char** chosen_source);
diff --git a/libc/include/android/api-level.h b/libc/include/android/api-level.h
index 79085d1..fe77d8f 100644
--- a/libc/include/android/api-level.h
+++ b/libc/include/android/api-level.h
@@ -165,6 +165,9 @@
 /** Names the "U" API level (34), for comparison against `__ANDROID_API__`. */
 #define __ANDROID_API_U__ 34
 
+/** Names the "V" API level (35), for comparison against `__ANDROID_API__`. */
+#define __ANDROID_API_V__ 35
+
 /* This file is included in <features.h>, and might be used from .S files. */
 #if !defined(__ASSEMBLY__)
 
diff --git a/libc/include/bits/elf_common.h b/libc/include/bits/elf_common.h
index ea833f4..b3c57a2 100644
--- a/libc/include/bits/elf_common.h
+++ b/libc/include/bits/elf_common.h
@@ -418,12 +418,12 @@
 #define	SHT_HIOS		0x6fffffff	/* Last of OS specific semantics */
 #define	SHT_LOPROC		0x70000000	/* reserved range for processor */
 #define	SHT_X86_64_UNWIND	0x70000001	/* unwind information */
-#define	SHT_AMD64_UNWIND	SHT_X86_64_UNWIND 
+#define	SHT_AMD64_UNWIND	SHT_X86_64_UNWIND
 
 #define	SHT_ARM_EXIDX		0x70000001	/* Exception index table. */
-#define	SHT_ARM_PREEMPTMAP	0x70000002	/* BPABI DLL dynamic linking 
+#define	SHT_ARM_PREEMPTMAP	0x70000002	/* BPABI DLL dynamic linking
 						   pre-emption map. */
-#define	SHT_ARM_ATTRIBUTES	0x70000003	/* Object file compatibility 
+#define	SHT_ARM_ATTRIBUTES	0x70000003	/* Object file compatibility
 						   attributes. */
 #define	SHT_ARM_DEBUGOVERLAY	0x70000004	/* See DBGOVL for details. */
 #define	SHT_ARM_OVERLAYSECTION	0x70000005	/* See DBGOVL for details. */
@@ -648,6 +648,11 @@
 #define	DT_AARCH64_BTI_PLT		0x70000001
 #define	DT_AARCH64_PAC_PLT		0x70000003
 #define	DT_AARCH64_VARIANT_PCS		0x70000005
+#define DT_AARCH64_MEMTAG_MODE		0x70000009
+#define DT_AARCH64_MEMTAG_HEAP		0x7000000b
+#define DT_AARCH64_MEMTAG_STACK		0x7000000c
+#define DT_AARCH64_MEMTAG_GLOBALS	0x7000000d
+#define DT_AARCH64_MEMTAG_GLOBALSSZ	0x7000000f
 
 #define	DT_ARM_SYMTABSZ			0x70000001
 #define	DT_ARM_PREEMPTMAP		0x70000002
diff --git a/libc/include/getopt.h b/libc/include/getopt.h
index 014226a..c1c0442 100644
--- a/libc/include/getopt.h
+++ b/libc/include/getopt.h
@@ -49,8 +49,11 @@
 #define optional_argument 2
 
 struct option {
-  /** Name of long option. */
-  const char *name;
+  /**
+   * Name of long option. Options must have a non-NULL name.
+   * A NULL name signals the end of the options array.
+   */
+  const char * _Nullable name;
 
   /**
    * One of `no_argument`, `required_argument`, or `optional_argument`.
@@ -58,7 +61,7 @@
   int has_arg;
 
   /** If not NULL, set `*flag` to val when option found. */
-  int* flag;
+  int* _Nullable flag;
 
   /** If `flag` not NULL, the value to assign to `*flag`; otherwise the return value. */
   int val;
@@ -69,12 +72,12 @@
 /**
  * [getopt_long(3)](http://man7.org/linux/man-pages/man3/getopt.3.html) parses command-line options.
  */
-int getopt_long(int __argc, char* const* __argv, const char* __options, const struct option* __long_options, int* __long_index);
+int getopt_long(int __argc, char* _Nonnull const* _Nonnull __argv, const char* _Nonnull __options, const struct option* _Nonnull __long_options, int* _Nullable __long_index);
 
 /**
  * [getopt_long_only(3)](http://man7.org/linux/man-pages/man3/getopt.3.html) parses command-line options.
  */
-int getopt_long_only(int __argc, char* const* __argv, const char* __options, const struct option* __long_options, int* __long_index);
+int getopt_long_only(int __argc, char* _Nonnull const* _Nonnull __argv, const char* _Nonnull __options, const struct option* _Nonnull __long_options, int* _Nullable __long_index);
 
 #ifndef _OPTRESET_DECLARED
 #define _OPTRESET_DECLARED
diff --git a/libc/include/link.h b/libc/include/link.h
index bd430f5..a0a3d60 100644
--- a/libc/include/link.h
+++ b/libc/include/link.h
@@ -44,41 +44,41 @@
 
 struct dl_phdr_info {
   ElfW(Addr) dlpi_addr;
-  const char* dlpi_name;
-  const ElfW(Phdr)* dlpi_phdr;
+  const char* _Nullable dlpi_name;
+  const ElfW(Phdr)* _Nullable dlpi_phdr;
   ElfW(Half) dlpi_phnum;
 
   // These fields were added in Android R.
   unsigned long long dlpi_adds;
   unsigned long long dlpi_subs;
   size_t dlpi_tls_modid;
-  void* dlpi_tls_data;
+  void* _Nullable dlpi_tls_data;
 };
 
 #if defined(__arm__)
-int dl_iterate_phdr(int (*__callback)(struct dl_phdr_info*, size_t, void*), void* __data) __INTRODUCED_IN(21);
+int dl_iterate_phdr(int (* _Nonnull __callback)(struct dl_phdr_info* _Nonnull, size_t, void* _Nullable), void* _Nullable __data) __INTRODUCED_IN(21);
 #else
-int dl_iterate_phdr(int (*__callback)(struct dl_phdr_info*, size_t, void*), void* __data);
+int dl_iterate_phdr(int (* _Nonnull __callback)(struct dl_phdr_info* _Nonnull, size_t, void*_Nullable ), void* _Nullable __data);
 #endif
 
 #ifdef __arm__
 typedef uintptr_t _Unwind_Ptr;
-_Unwind_Ptr dl_unwind_find_exidx(_Unwind_Ptr, int*);
+_Unwind_Ptr dl_unwind_find_exidx(_Unwind_Ptr, int* _Nonnull);
 #endif
 
 /* Used by the dynamic linker to communicate with the debugger. */
 struct link_map {
   ElfW(Addr) l_addr;
-  char* l_name;
-  ElfW(Dyn)* l_ld;
-  struct link_map* l_next;
-  struct link_map* l_prev;
+  char* _Nullable l_name;
+  ElfW(Dyn)* _Nullable l_ld;
+  struct link_map* _Nullable l_next;
+  struct link_map* _Nullable l_prev;
 };
 
 /* Used by the dynamic linker to communicate with the debugger. */
 struct r_debug {
   int32_t r_version;
-  struct link_map* r_map;
+  struct link_map* _Nullable r_map;
   ElfW(Addr) r_brk;
   enum {
     RT_CONSISTENT,
diff --git a/libc/include/locale.h b/libc/include/locale.h
index 8785b24..27f2a3f 100644
--- a/libc/include/locale.h
+++ b/libc/include/locale.h
@@ -70,16 +70,16 @@
                      LC_IDENTIFICATION_MASK)
 
 struct lconv {
-  char* decimal_point;
-  char* thousands_sep;
-  char* grouping;
-  char* int_curr_symbol;
-  char* currency_symbol;
-  char* mon_decimal_point;
-  char* mon_thousands_sep;
-  char* mon_grouping;
-  char* positive_sign;
-  char* negative_sign;
+  char* _Nonnull decimal_point;
+  char* _Nonnull thousands_sep;
+  char* _Nonnull grouping;
+  char* _Nonnull int_curr_symbol;
+  char* _Nonnull currency_symbol;
+  char* _Nonnull mon_decimal_point;
+  char* _Nonnull mon_thousands_sep;
+  char* _Nonnull mon_grouping;
+  char* _Nonnull positive_sign;
+  char* _Nonnull negative_sign;
   char int_frac_digits;
   char frac_digits;
   char p_cs_precedes;
@@ -96,13 +96,13 @@
   char int_n_sign_posn;
 };
 
-struct lconv* localeconv(void) __INTRODUCED_IN_NO_GUARD_FOR_NDK(21);
+struct lconv* _Nonnull localeconv(void) __INTRODUCED_IN_NO_GUARD_FOR_NDK(21);
 
-locale_t duplocale(locale_t __l) __INTRODUCED_IN(21);
-void freelocale(locale_t __l) __INTRODUCED_IN(21);
-locale_t newlocale(int __category_mask, const char* __locale_name, locale_t __base) __INTRODUCED_IN(21);
-char* setlocale(int __category, const char* __locale_name);
-locale_t uselocale(locale_t __l) __INTRODUCED_IN(21);
+locale_t _Nullable duplocale(locale_t _Nonnull __l) __INTRODUCED_IN(21);
+void freelocale(locale_t _Nonnull __l) __INTRODUCED_IN(21);
+locale_t _Nullable newlocale(int __category_mask, const char* _Nonnull __locale_name, locale_t _Nullable __base) __INTRODUCED_IN(21);
+char* _Nullable setlocale(int __category, const char* _Nullable __locale_name);
+locale_t _Nullable uselocale(locale_t _Nullable __l) __INTRODUCED_IN(21);
 
 #define LC_GLOBAL_LOCALE __BIONIC_CAST(reinterpret_cast, locale_t, -1L)
 
diff --git a/libc/include/malloc.h b/libc/include/malloc.h
index 02bda60..6a2d380 100644
--- a/libc/include/malloc.h
+++ b/libc/include/malloc.h
@@ -183,7 +183,15 @@
  * Available since API level 28.
  */
 #define M_PURGE (-101)
-
+/**
+ * mallopt() option to immediately purge all possible memory back to
+ * the kernel. This call can take longer than a normal purge since it
+ * examines everything. In some cases, it can take more than twice the
+ * time of a M_PURGE call. The value is ignored.
+ *
+ * Available since API level 34.
+ */
+#define M_PURGE_ALL (-104)
 
 /**
  * mallopt() option to tune the allocator's choice of memory tags to
diff --git a/libc/include/mntent.h b/libc/include/mntent.h
index d5987dc..43cab1f 100644
--- a/libc/include/mntent.h
+++ b/libc/include/mntent.h
@@ -47,21 +47,21 @@
 #define MNTOPT_SUID "suid"
 
 struct mntent {
-  char* mnt_fsname;
-  char* mnt_dir;
-  char* mnt_type;
-  char* mnt_opts;
+  char* _Nullable mnt_fsname;
+  char* _Nullable mnt_dir;
+  char* _Nullable mnt_type;
+  char* _Nullable mnt_opts;
   int mnt_freq;
   int mnt_passno;
 };
 
 __BEGIN_DECLS
 
-int endmntent(FILE* __fp) __INTRODUCED_IN(21);
-struct mntent* getmntent(FILE* __fp);
-struct mntent* getmntent_r(FILE* __fp, struct mntent* __entry, char* __buf, int __size) __INTRODUCED_IN(21);
-FILE* setmntent(const char* __filename, const char* __type) __INTRODUCED_IN(21);
-char* hasmntopt(const struct mntent* __entry, const char* __option) __INTRODUCED_IN(26);
+int endmntent(FILE* _Nullable __fp) __INTRODUCED_IN(21);
+struct mntent* _Nullable getmntent(FILE* _Nonnull __fp);
+struct mntent* _Nullable getmntent_r(FILE* _Nonnull __fp, struct mntent* _Nonnull __entry, char* _Nonnull __buf, int __size) __INTRODUCED_IN(21);
+FILE* _Nullable setmntent(const char* _Nonnull __filename, const char* _Nonnull __type) __INTRODUCED_IN(21);
+char* _Nullable hasmntopt(const struct mntent* _Nonnull __entry, const char* _Nonnull __option) __INTRODUCED_IN(26);
 
 __END_DECLS
 
diff --git a/libc/include/pwd.h b/libc/include/pwd.h
index d481aac..2b17fbf 100644
--- a/libc/include/pwd.h
+++ b/libc/include/pwd.h
@@ -66,31 +66,31 @@
 __BEGIN_DECLS
 
 struct passwd {
-  char* pw_name;
-  char* pw_passwd;
+  char* _Nullable pw_name;
+  char* _Nullable pw_passwd;
   uid_t pw_uid;
   gid_t pw_gid;
 #ifdef __LP64__
-  char* pw_gecos;
+  char* _Nullable pw_gecos;
 #else
   /* Note: On LP32, we define pw_gecos to pw_passwd since they're both NULL. */
 # define pw_gecos pw_passwd
 #endif
-  char* pw_dir;
-  char* pw_shell;
+  char* _Nullable pw_dir;
+  char* _Nullable pw_shell;
 };
 
-struct passwd* getpwnam(const char* __name);
-struct passwd* getpwuid(uid_t __uid);
+struct passwd* _Nullable getpwnam(const char* _Nonnull __name);
+struct passwd* _Nullable getpwuid(uid_t __uid);
 
 /* Note: Android has thousands and thousands of ids to iterate through */
-struct passwd* getpwent(void) __INTRODUCED_IN(26);
+struct passwd* _Nullable getpwent(void) __INTRODUCED_IN(26);
 
 void setpwent(void) __INTRODUCED_IN(26);
 void endpwent(void) __INTRODUCED_IN(26);
 
-int getpwnam_r(const char* __name, struct passwd* __pwd, char* __buf, size_t __n, struct passwd** __result);
-int getpwuid_r(uid_t __uid, struct passwd* __pwd, char* __buf, size_t __n, struct passwd** __result);
+int getpwnam_r(const char* _Nonnull __name, struct passwd* _Nonnull __pwd, char* _Nonnull __buf, size_t __n, struct passwd* _Nullable * _Nonnull __result);
+int getpwuid_r(uid_t __uid, struct passwd* _Nonnull __pwd, char* _Nonnull __buf, size_t __n, struct passwd* _Nullable * _Nonnull __result);
 
 __END_DECLS
 
diff --git a/libc/include/resolv.h b/libc/include/resolv.h
index 6318d00..f25484a 100644
--- a/libc/include/resolv.h
+++ b/libc/include/resolv.h
@@ -40,24 +40,24 @@
 __BEGIN_DECLS
 
 #define b64_ntop __b64_ntop
-int b64_ntop(u_char const* __src, size_t __src_size, char* __dst, size_t __dst_size);
+int b64_ntop(u_char const* _Nonnull __src, size_t __src_size, char* _Nonnull __dst, size_t __dst_size);
 #define b64_pton __b64_pton
-int b64_pton(char const* __src, u_char* __dst, size_t __dst_size);
+int b64_pton(char const* _Nonnull __src, u_char* _Nonnull __dst, size_t __dst_size);
 
 #define dn_comp __dn_comp
-int dn_comp(const char* __src, u_char* __dst, int __dst_size, u_char** __dn_ptrs , u_char** __last_dn_ptr);
+int dn_comp(const char* _Nonnull __src, u_char* _Nonnull __dst, int __dst_size, u_char* _Nullable * _Nullable __dn_ptrs , u_char* _Nullable * _Nullable __last_dn_ptr);
 
-int dn_expand(const u_char* __msg, const u_char* __eom, const u_char* __src, char* __dst, int __dst_size);
+int dn_expand(const u_char* _Nonnull __msg, const u_char* _Nonnull __eom, const u_char* _Nonnull __src, char* _Nonnull __dst, int __dst_size);
 
 #define p_class __p_class
-const char* p_class(int __class);
+const char* _Nonnull p_class(int __class);
 #define p_type __p_type
-const char* p_type(int __type);
+const char* _Nonnull p_type(int __type);
 
 int res_init(void);
-int res_mkquery(int __opcode, const char* __domain_name, int __class, int __type, const u_char* __data, int __data_size, const u_char* __new_rr_in, u_char* __buf, int __buf_size);
-int res_query(const char* __name, int __class, int __type, u_char* __answer, int __answer_size);
-int res_search(const char* __name, int __class, int __type, u_char* __answer, int __answer_size);
+int res_mkquery(int __opcode, const char* _Nonnull __domain_name, int __class, int __type, const u_char* _Nullable __data, int __data_size, const u_char* _Nullable __new_rr_in, u_char* _Nonnull __buf, int __buf_size);
+int res_query(const char* _Nonnull __name, int __class, int __type, u_char* _Nonnull __answer, int __answer_size);
+int res_search(const char* _Nonnull __name, int __class, int __type, u_char* _Nonnull __answer, int __answer_size);
 
 #define res_randomid __res_randomid
 u_int __res_randomid(void) __INTRODUCED_IN(29);
diff --git a/libc/include/sys/signalfd.h b/libc/include/sys/signalfd.h
index bd911f7..f669cc8 100644
--- a/libc/include/sys/signalfd.h
+++ b/libc/include/sys/signalfd.h
@@ -48,11 +48,11 @@
  *
  * Available since API level 18.
  */
-int signalfd(int __fd, const sigset_t* __mask, int __flags) __INTRODUCED_IN(18);
+int signalfd(int __fd, const sigset_t* _Nonnull __mask, int __flags) __INTRODUCED_IN(18);
 
 /**
  * Like signalfd() but allows setting a signal mask with RT signals even from a 32-bit process.
  */
-int signalfd64(int __fd, const sigset64_t* __mask, int __flags) __INTRODUCED_IN(28);
+int signalfd64(int __fd, const sigset64_t* _Nonnull __mask, int __flags) __INTRODUCED_IN(28);
 
 __END_DECLS
diff --git a/libc/include/threads.h b/libc/include/threads.h
index 1b00b8f..b1008de 100644
--- a/libc/include/threads.h
+++ b/libc/include/threads.h
@@ -51,9 +51,9 @@
 typedef pthread_mutex_t mtx_t;
 
 /** The type for a thread-specific storage destructor. */
-typedef void (*tss_dtor_t)(void*);
+typedef void (*tss_dtor_t)(void* _Nullable);
 /** The type of the function passed to thrd_create() to create a new thread. */
-typedef int (*thrd_start_t)(void*);
+typedef int (*thrd_start_t)(void* _Nullable);
 
 /** The type used by call_once(). */
 typedef pthread_once_t once_flag;
@@ -82,72 +82,72 @@
 // This file is implemented as static inlines before API level 30.
 
 /** Uses `__flag` to ensure that `__function` is called exactly once. */
-void call_once(once_flag* __flag, void (*__function)(void)) __INTRODUCED_IN(30);
+void call_once(once_flag* _Nonnull __flag, void (* _Nonnull __function)(void)) __INTRODUCED_IN(30);
 
 
 
 /**
  * Unblocks all threads blocked on `__cond`.
  */
-int cnd_broadcast(cnd_t* __cond) __INTRODUCED_IN(30);
+int cnd_broadcast(cnd_t* _Nonnull __cond) __INTRODUCED_IN(30);
 
 /**
  * Destroys a condition variable.
  */
-void cnd_destroy(cnd_t* __cond) __INTRODUCED_IN(30);
+void cnd_destroy(cnd_t* _Nonnull __cond) __INTRODUCED_IN(30);
 
 /**
  * Creates a condition variable.
  */
-int cnd_init(cnd_t* __cond) __INTRODUCED_IN(30);
+int cnd_init(cnd_t* _Nonnull __cond) __INTRODUCED_IN(30);
 
 /**
  * Unblocks one thread blocked on `__cond`.
  */
-int cnd_signal(cnd_t* __cond) __INTRODUCED_IN(30);
+int cnd_signal(cnd_t* _Nonnull __cond) __INTRODUCED_IN(30);
 
 /**
  * Unlocks `__mutex` and blocks until `__cond` is signaled or `__timeout` occurs.
  */
-int cnd_timedwait(cnd_t* __cond, mtx_t* __mutex, const struct timespec* __timeout)
+int cnd_timedwait(cnd_t* _Nonnull __cond, mtx_t* _Nonnull __mutex, const struct timespec* _Nonnull __timeout)
     __INTRODUCED_IN(30);
 
 /**
  * Unlocks `__mutex` and blocks until `__cond` is signaled.
  */
-int cnd_wait(cnd_t* __cond, mtx_t* __mutex) __INTRODUCED_IN(30);
+int cnd_wait(cnd_t* _Nonnull __cond, mtx_t* _Nonnull __mutex) __INTRODUCED_IN(30);
 
 
 
 /**
  * Destroys a mutex.
  */
-void mtx_destroy(mtx_t* __mutex) __INTRODUCED_IN(30);
+void mtx_destroy(mtx_t* _Nonnull __mutex) __INTRODUCED_IN(30);
 
 /**
  * Creates a mutex.
  */
-int mtx_init(mtx_t* __mutex, int __type) __INTRODUCED_IN(30);
+int mtx_init(mtx_t* _Nonnull __mutex, int __type) __INTRODUCED_IN(30);
 
 /**
  * Blocks until `__mutex` is acquired.
  */
-int mtx_lock(mtx_t* __mutex) __INTRODUCED_IN(30);
+int mtx_lock(mtx_t* _Nonnull __mutex) __INTRODUCED_IN(30);
 
 /**
  * Blocks until `__mutex` is acquired or `__timeout` expires.
  */
-int mtx_timedlock(mtx_t* __mutex, const struct timespec* __timeout) __INTRODUCED_IN(30);
+int mtx_timedlock(mtx_t* _Nonnull __mutex, const struct timespec* _Nonnull __timeout) __INTRODUCED_IN(30);
 
 /**
  * Acquires `__mutex` or returns `thrd_busy`.
  */
-int mtx_trylock(mtx_t* __mutex) __INTRODUCED_IN(30);
+int mtx_trylock(mtx_t* _Nonnull __mutex) __INTRODUCED_IN(30);
 
 /**
  * Unlocks `__mutex`.
  */
-int mtx_unlock(mtx_t* __mutex) __INTRODUCED_IN(30);
+int mtx_unlock(mtx_t* _Nonnull __mutex) __INTRODUCED_IN(30);
 
 
 
@@ -155,7 +155,7 @@
  * Creates a new thread running `__function(__arg)`, and sets `*__thrd` to
  * the new thread.
  */
-int thrd_create(thrd_t* __thrd, thrd_start_t __function, void* __arg) __INTRODUCED_IN(30);
+int thrd_create(thrd_t* _Nonnull __thrd, thrd_start_t _Nonnull __function, void* _Nullable __arg) __INTRODUCED_IN(30);
 
 /**
  * Returns the `thrd_t` corresponding to the caller.
@@ -181,7 +181,7 @@
  * Blocks until `__thrd` terminates. If `__result` is not null, `*__result`
  * is set to the exiting thread's result.
  */
-int thrd_join(thrd_t __thrd, int* __result) __INTRODUCED_IN(30);
+int thrd_join(thrd_t __thrd, int* _Nullable __result) __INTRODUCED_IN(30);
 
 /**
  * Blocks the caller for at least `__duration` unless a signal is delivered.
@@ -190,7 +190,7 @@
  *
  * Returns 0 on success, or -1 if a signal was delivered.
  */
-int thrd_sleep(const struct timespec* __duration, struct timespec* __remaining) __INTRODUCED_IN(30);
+int thrd_sleep(const struct timespec* _Nonnull __duration, struct timespec* _Nullable __remaining) __INTRODUCED_IN(30);
 
 /**
  * Request that other threads should be scheduled.
@@ -203,7 +203,7 @@
  * Creates a thread-specific storage key with the associated destructor (which
  * may be null).
  */
-int tss_create(tss_t* __key, tss_dtor_t __dtor) __INTRODUCED_IN(30);
+int tss_create(tss_t* _Nonnull __key, tss_dtor_t _Nullable __dtor) __INTRODUCED_IN(30);
 
 /**
  * Destroys a thread-specific storage key.
@@ -214,13 +214,13 @@
  * Returns the value for the current thread held in the thread-specific storage
  * identified by `__key`.
  */
-void* tss_get(tss_t __key) __INTRODUCED_IN(30);
+void* _Nullable tss_get(tss_t __key) __INTRODUCED_IN(30);
 
 /**
  * Sets the current thread's value for the thread-specific storage identified
  * by `__key` to `__value`.
  */
-int tss_set(tss_t __key, void* __value) __INTRODUCED_IN(30);
+int tss_set(tss_t __key, void* _Nonnull __value) __INTRODUCED_IN(30);
 
 #endif
 
diff --git a/libc/kernel/tools/defaults.py b/libc/kernel/tools/defaults.py
index d0fe157..c9c0a22 100644
--- a/libc/kernel/tools/defaults.py
+++ b/libc/kernel/tools/defaults.py
@@ -48,6 +48,7 @@
     "in_addr": False,
     "ip_mreq_source": False,
     "ip_msfilter": False,
+    "timespec": False,
     }
 
 # define to true if you want to remove all defined(CONFIG_FOO) tests
diff --git a/libc/kernel/uapi/linux/time.h b/libc/kernel/uapi/linux/time.h
index df52295..55d0e6e 100644
--- a/libc/kernel/uapi/linux/time.h
+++ b/libc/kernel/uapi/linux/time.h
@@ -18,14 +18,11 @@
  ****************************************************************************/
 #ifndef _UAPI_LINUX_TIME_H
 #define _UAPI_LINUX_TIME_H
+#include <bits/timespec.h>
 #include <linux/types.h>
 #include <linux/time_types.h>
 #ifndef _STRUCT_TIMESPEC
 #define _STRUCT_TIMESPEC
-struct timespec {
-  __kernel_old_time_t tv_sec;
-  long tv_nsec;
-};
 #endif
 struct timeval {
   __kernel_old_time_t tv_sec;
diff --git a/libc/private/bionic_constants.h b/libc/private/bionic_constants.h
index 09294b6..d7f4474 100644
--- a/libc/private/bionic_constants.h
+++ b/libc/private/bionic_constants.h
@@ -14,18 +14,18 @@
  * limitations under the License.
  */
 
-#ifndef _BIONIC_CONSTANTS_H_
-#define _BIONIC_CONSTANTS_H_
+#pragma once
 
 #define NS_PER_S 1000000000
 
-// Size of the shadow call stack. This must be a power of 2.
+// Size of the shadow call stack. This can be small because these stacks only
+// contain return addresses. This must be a power of 2 so the mask trick works.
+// See the SCS commentary in pthread_internal.h for more detail.
 #define SCS_SIZE (8 * 1024)
+#define SCS_MASK (SCS_SIZE - 1)
 
 // The shadow call stack is allocated at an aligned address within a guard region of this size. The
 // guard region must be large enough that we can allocate an SCS_SIZE-aligned SCS while ensuring
 // that there is at least one guard page after the SCS so that a stack overflow results in a SIGSEGV
 // instead of corrupting the allocation that comes after it.
 #define SCS_GUARD_REGION_SIZE (16 * 1024 * 1024)
-
-#endif // _BIONIC_CONSTANTS_H_
diff --git a/libm/Android.bp b/libm/Android.bp
index cc0d666..4c34fb6 100644
--- a/libm/Android.bp
+++ b/libm/Android.bp
@@ -94,8 +94,6 @@
         "upstream-freebsd/lib/msun/src/s_cbrtf.c",
         "upstream-freebsd/lib/msun/src/s_ccosh.c",
         "upstream-freebsd/lib/msun/src/s_ccoshf.c",
-        "upstream-freebsd/lib/msun/src/s_ceil.c",
-        "upstream-freebsd/lib/msun/src/s_ceilf.c",
         "upstream-freebsd/lib/msun/src/s_cexp.c",
         "upstream-freebsd/lib/msun/src/s_cexpf.c",
         "upstream-freebsd/lib/msun/src/s_cimag.c",
@@ -130,8 +128,6 @@
         "upstream-freebsd/lib/msun/src/s_fdim.c",
         "upstream-freebsd/lib/msun/src/s_finite.c",
         "upstream-freebsd/lib/msun/src/s_finitef.c",
-        "upstream-freebsd/lib/msun/src/s_floor.c",
-        "upstream-freebsd/lib/msun/src/s_floorf.c",
         "upstream-freebsd/lib/msun/src/s_fma.c",
         "upstream-freebsd/lib/msun/src/s_fmaf.c",
         "upstream-freebsd/lib/msun/src/s_fmax.c",
@@ -282,10 +278,8 @@
         arm: {
             srcs: [
                 "arm/fenv.c",
-            ],
-            exclude_srcs: [
-                "upstream-freebsd/lib/msun/src/s_floor.c",
-                "upstream-freebsd/lib/msun/src/s_floorf.c",
+                "upstream-freebsd/lib/msun/src/s_ceil.c",
+                "upstream-freebsd/lib/msun/src/s_ceilf.c",
             ],
             instruction_set: "arm",
             pack_relocations: false,
@@ -303,10 +297,6 @@
                 "arm64/fenv.c",
             ],
             exclude_srcs: [
-                "upstream-freebsd/lib/msun/src/s_ceil.c",
-                "upstream-freebsd/lib/msun/src/s_ceilf.c",
-                "upstream-freebsd/lib/msun/src/s_floor.c",
-                "upstream-freebsd/lib/msun/src/s_floorf.c",
                 "upstream-freebsd/lib/msun/src/s_fma.c",
                 "upstream-freebsd/lib/msun/src/s_fmaf.c",
                 "upstream-freebsd/lib/msun/src/s_fmax.c",
@@ -337,11 +327,6 @@
             ],
 
             exclude_srcs: [
-                // TODO: do the rest when our clang has https://reviews.llvm.org/D136508.
-                // TODO: "upstream-freebsd/lib/msun/src/s_ceil.c",
-                // TODO: "upstream-freebsd/lib/msun/src/s_ceilf.c",
-                // TODO: "upstream-freebsd/lib/msun/src/s_floor.c",
-                // TODO: "upstream-freebsd/lib/msun/src/s_floorf.c",
                 "upstream-freebsd/lib/msun/src/s_fma.c",
                 "upstream-freebsd/lib/msun/src/s_fmaf.c",
                 "upstream-freebsd/lib/msun/src/s_fmax.c",
@@ -356,12 +341,12 @@
                 "upstream-freebsd/lib/msun/src/s_lrintf.c",
                 "upstream-freebsd/lib/msun/src/s_lround.c",
                 "upstream-freebsd/lib/msun/src/s_lroundf.c",
-                // TODO: "upstream-freebsd/lib/msun/src/s_rint.c",
-                // TODO: "upstream-freebsd/lib/msun/src/s_rintf.c",
-                // TODO: "upstream-freebsd/lib/msun/src/s_round.c",
-                // TODO: "upstream-freebsd/lib/msun/src/s_roundf.c",
-                // TODO: "upstream-freebsd/lib/msun/src/s_trunc.c",
-                // TODO: "upstream-freebsd/lib/msun/src/s_truncf.c",
+                "upstream-freebsd/lib/msun/src/s_rint.c",
+                "upstream-freebsd/lib/msun/src/s_rintf.c",
+                "upstream-freebsd/lib/msun/src/s_round.c",
+                "upstream-freebsd/lib/msun/src/s_roundf.c",
+                "upstream-freebsd/lib/msun/src/s_trunc.c",
+                "upstream-freebsd/lib/msun/src/s_truncf.c",
             ],
             version_script: ":libm.riscv64.map",
         },
@@ -373,10 +358,6 @@
                 "x86/lrintf.S",
             ],
             exclude_srcs: [
-                "upstream-freebsd/lib/msun/src/s_ceil.c",
-                "upstream-freebsd/lib/msun/src/s_ceilf.c",
-                "upstream-freebsd/lib/msun/src/s_floor.c",
-                "upstream-freebsd/lib/msun/src/s_floorf.c",
                 "upstream-freebsd/lib/msun/src/s_lrint.c",
                 "upstream-freebsd/lib/msun/src/s_lrintf.c",
                 "upstream-freebsd/lib/msun/src/s_rint.c",
@@ -400,10 +381,6 @@
                 "x86_64/lrintf.S",
             ],
             exclude_srcs: [
-                "upstream-freebsd/lib/msun/src/s_ceil.c",
-                "upstream-freebsd/lib/msun/src/s_ceilf.c",
-                "upstream-freebsd/lib/msun/src/s_floor.c",
-                "upstream-freebsd/lib/msun/src/s_floorf.c",
                 "upstream-freebsd/lib/msun/src/s_llrint.c",
                 "upstream-freebsd/lib/msun/src/s_llrintf.c",
                 "upstream-freebsd/lib/msun/src/s_lrint.c",
diff --git a/libm/builtins.cpp b/libm/builtins.cpp
index 96c006d..41e145b 100644
--- a/libm/builtins.cpp
+++ b/libm/builtins.cpp
@@ -22,7 +22,7 @@
 float fabsf(float x) { return __builtin_fabsf(x); }
 long double fabsl(long double x) { return __builtin_fabsl(x); }
 
-#if defined(__aarch64__) || defined(__i386__) || defined(__x86_64__)
+#if defined(__aarch64__) || defined(__riscv) || defined(__i386__) || defined(__x86_64__)
 float ceilf(float x) { return __builtin_ceilf(x); }
 double ceil(double x) { return __builtin_ceil(x); }
 #if defined(__ILP32__)
@@ -46,7 +46,7 @@
 }
 float floorf(float x) { return s_floorf::floorf(x); }
 double floor(double x) { return s_floor::floor(x); }
-#elif defined(__arm__) || defined(__aarch64__) || defined(__i386__) || defined(__x86_64__)
+#else
 float floorf(float x) { return __builtin_floorf(x); }
 double floor(double x) { return __builtin_floor(x); }
 #if defined(__ILP32__)
@@ -79,7 +79,7 @@
 long long llroundf(float x) { return __builtin_llroundf(x); }
 #endif
 
-#if defined(__aarch64__) || defined(__i386__) || defined(__x86_64__)
+#if defined(__aarch64__) || defined(__riscv) || defined(__i386__) || defined(__x86_64__)
 float rintf(float x) { return __builtin_rintf(x); }
 double rint(double x) { return __builtin_rint(x); }
 #if defined(__ILP32__)
@@ -87,7 +87,7 @@
 #endif
 #endif
 
-#if defined(__aarch64__)
+#if defined(__aarch64__) || defined(__riscv)
 float roundf(float x) { return __builtin_roundf(x); }
 double round(double x) { return __builtin_round(x); }
 #endif
@@ -98,7 +98,7 @@
 __weak_reference(sqrt, sqrtl);
 #endif
 
-#if defined(__aarch64__) || defined(__i386__) || defined(__x86_64__)
+#if defined(__aarch64__) || defined(__riscv) || defined(__i386__) || defined(__x86_64__)
 float truncf(float x) { return __builtin_truncf(x); }
 double trunc(double x) { return __builtin_trunc(x); }
 #if defined(__ILP32__)
diff --git a/linker/linker.cpp b/linker/linker.cpp
index 6246f8c..c5a822a 100644
--- a/linker/linker.cpp
+++ b/linker/linker.cpp
@@ -3142,6 +3142,14 @@
       case DT_AARCH64_VARIANT_PCS:
         // Ignored: AArch64 processor-specific dynamic array tags.
         break;
+      // TODO(mitchp): Add support to libc_init_mte to use these dynamic array entries instead of
+      // the Android-specific ELF note.
+      case DT_AARCH64_MEMTAG_MODE:
+      case DT_AARCH64_MEMTAG_HEAP:
+      case DT_AARCH64_MEMTAG_STACK:
+      case DT_AARCH64_MEMTAG_GLOBALS:
+      case DT_AARCH64_MEMTAG_GLOBALSSZ:
+        break;
 #endif
 
       default:
diff --git a/tests/Android.bp b/tests/Android.bp
index 8c6057f..6d2a8d4 100644
--- a/tests/Android.bp
+++ b/tests/Android.bp
@@ -66,18 +66,20 @@
         "libcutils_headers",
         "gwp_asan_headers"
     ],
-    // Ensure that the tests exercise shadow call stack support and
-    // the hint space PAC/BTI instructions.
+    stl: "libc++",
+
+    // Ensure that the tests exercise shadow call stack support.
+    // We don't use `scs: true` here because that would give us a second
+    // variant of this library where we actually just want to say "this
+    // library should always be built this way".
     arch: {
         arm64: {
-            cflags: [
-                "-fsanitize=shadow-call-stack",
-                // Disable this option for now: see b/151372823
-                //"-mbranch-protection=standard",
-            ],
+            cflags: ["-fsanitize=shadow-call-stack"],
+        },
+        riscv64: {
+            cflags: ["-fsanitize=shadow-call-stack"],
         },
     },
-    stl: "libc++",
     sanitize: {
         address: false,
     },
diff --git a/tests/async_safe_test.cpp b/tests/async_safe_test.cpp
index f52387e..dc4db07 100644
--- a/tests/async_safe_test.cpp
+++ b/tests/async_safe_test.cpp
@@ -16,6 +16,8 @@
 
 #include <gtest/gtest.h>
 
+#include <errno.h>
+
 #if defined(__BIONIC__)
 #include <async_safe/log.h>
 #endif // __BIONIC__
@@ -227,3 +229,19 @@
   GTEST_SKIP() << "bionic-only test";
 #endif // __BIONIC__
 }
+
+// Verify that using %m is never cut off.
+TEST(async_safe_format_buffer, percent_m_fits_in_buffer) {
+#if defined(__BIONIC__)
+  for (int i = 0; i < 256; i++) {
+    errno = i;
+    char async_buf[256];
+    async_safe_format_buffer(async_buf, sizeof(async_buf), "%m");
+    char strerror_buf[1024];
+    strerror_r(errno, strerror_buf, sizeof(strerror_buf));
+    ASSERT_STREQ(strerror_buf, async_buf);
+  }
+#else   // __BIONIC__
+  GTEST_SKIP() << "bionic-only test";
+#endif  // __BIONIC__
+}
diff --git a/tests/locale_test.cpp b/tests/locale_test.cpp
index b4da6de..a220c83 100644
--- a/tests/locale_test.cpp
+++ b/tests/locale_test.cpp
@@ -79,9 +79,12 @@
 }
 
 TEST(locale, newlocale_NULL_locale_name) {
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wnonnull"
   errno = 0;
   EXPECT_EQ(nullptr, newlocale(LC_ALL, nullptr, nullptr));
   EXPECT_EQ(EINVAL, errno);
+#pragma clang diagnostic pop
 }
 
 TEST(locale, newlocale_bad_locale_name) {
diff --git a/tests/malloc_test.cpp b/tests/malloc_test.cpp
index 63ad99d..4e7eb7b 100644
--- a/tests/malloc_test.cpp
+++ b/tests/malloc_test.cpp
@@ -36,7 +36,10 @@
 #include <algorithm>
 #include <atomic>
 #include <functional>
+#include <string>
 #include <thread>
+#include <unordered_map>
+#include <utility>
 #include <vector>
 
 #include <tinyxml2.h>
@@ -695,6 +698,44 @@
 #endif
 }
 
+TEST(malloc, mallopt_purge_all) {
+#if defined(__BIONIC__)
+  SKIP_WITH_HWASAN << "hwasan does not implement mallopt";
+  errno = 0;
+  ASSERT_EQ(1, mallopt(M_PURGE_ALL, 0));
+#else
+  GTEST_SKIP() << "bionic-only test";
+#endif
+}
+
+// Verify that all of the mallopt values are unique.
+TEST(malloc, mallopt_unique_params) {
+#if defined(__BIONIC__)
+  std::vector<std::pair<int, std::string>> params{
+      std::make_pair(M_DECAY_TIME, "M_DECAY_TIME"),
+      std::make_pair(M_PURGE, "M_PURGE"),
+      std::make_pair(M_PURGE_ALL, "M_PURGE_ALL"),
+      std::make_pair(M_MEMTAG_TUNING, "M_MEMTAG_TUNING"),
+      std::make_pair(M_THREAD_DISABLE_MEM_INIT, "M_THREAD_DISABLE_MEM_INIT"),
+      std::make_pair(M_CACHE_COUNT_MAX, "M_CACHE_COUNT_MAX"),
+      std::make_pair(M_CACHE_SIZE_MAX, "M_CACHE_SIZE_MAX"),
+      std::make_pair(M_TSDS_COUNT_MAX, "M_TSDS_COUNT_MAX"),
+      std::make_pair(M_BIONIC_ZERO_INIT, "M_BIONIC_ZERO_INIT"),
+      std::make_pair(M_BIONIC_SET_HEAP_TAGGING_LEVEL, "M_BIONIC_SET_HEAP_TAGGING_LEVEL"),
+  };
+
+  std::unordered_map<int, std::string> all_params;
+  for (const auto& param : params) {
+    EXPECT_TRUE(all_params.count(param.first) == 0)
+        << "mallopt params " << all_params[param.first] << " and " << param.second
+        << " have the same value " << param.first;
+    all_params.insert(param);
+  }
+#else
+  GTEST_SKIP() << "bionic-only test";
+#endif
+}
+
 #if defined(__BIONIC__)
 static void GetAllocatorVersion(bool* allocator_scudo) {
   TemporaryFile tf;
diff --git a/tests/scs_test.cpp b/tests/scs_test.cpp
index 24cb347..0776a43 100644
--- a/tests/scs_test.cpp
+++ b/tests/scs_test.cpp
@@ -14,8 +14,6 @@
  * limitations under the License.
  */
 
-#if __has_feature(shadow_call_stack)
-
 #include <gtest/gtest.h>
 
 #include "private/bionic_constants.h"
@@ -33,7 +31,9 @@
 }
 
 TEST(scs_test, stack_overflow) {
+#if defined(__aarch64__) || defined(__riscv)
   ASSERT_EXIT(recurse1(SCS_SIZE), testing::KilledBySignal(SIGSEGV), "");
-}
-
+#else
+  GTEST_SKIP() << "no SCS on this architecture";
 #endif
+}
diff --git a/tests/setjmp_test.cpp b/tests/setjmp_test.cpp
index 2f891ec..6ae8bfd 100644
--- a/tests/setjmp_test.cpp
+++ b/tests/setjmp_test.cpp
@@ -278,7 +278,6 @@
 }
 
 TEST(setjmp, bug_152210274) {
-  SKIP_WITH_HWASAN; // b/227390656
   // Ensure that we never have a mangled value in the stack pointer.
 #if defined(__BIONIC__)
   struct sigaction sa = {.sa_flags = SA_SIGINFO, .sa_sigaction = [](int, siginfo_t*, void*) {}};
@@ -299,15 +298,19 @@
         perror("setjmp");
         abort();
       }
-      if (*static_cast<pid_t*>(arg) == 100) longjmp(buf, 1);
+      // This will never be true, but the compiler doesn't know that, so the
+      // setjmp won't be removed by DCE. With HWASan/MTE this also acts as a
+      // kind of enforcement that the threads are done before leaving the test.
+      if (*static_cast<size_t*>(arg) != 123) longjmp(buf, 1);
     }
     return nullptr;
   };
+  pthread_t threads[kNumThreads];
   pid_t tids[kNumThreads] = {};
+  size_t var = 123;
   for (size_t i = 0; i < kNumThreads; ++i) {
-    pthread_t t;
-    ASSERT_EQ(0, pthread_create(&t, nullptr, jumper, &tids[i]));
-    tids[i] = pthread_gettid_np(t);
+    ASSERT_EQ(0, pthread_create(&threads[i], nullptr, jumper, &var));
+    tids[i] = pthread_gettid_np(threads[i]);
   }
 
   // Start the interrupter thread.
@@ -327,6 +330,9 @@
   pthread_t t;
   ASSERT_EQ(0, pthread_create(&t, nullptr, interrupter, tids));
   pthread_join(t, nullptr);
+  for (size_t i = 0; i < kNumThreads; i++) {
+    pthread_join(threads[i], nullptr);
+  }
 #else
   GTEST_SKIP() << "tests uses functions not in glibc";
 #endif
diff --git a/tests/stack_protector_test_helper.cpp b/tests/stack_protector_test_helper.cpp
index eddd940..69b3c5d 100644
--- a/tests/stack_protector_test_helper.cpp
+++ b/tests/stack_protector_test_helper.cpp
@@ -15,12 +15,11 @@
  */
 
 // Deliberately overwrite the stack canary.
-__attribute__((noinline)) void modify_stack_protector_test() {
+__attribute__((noinline, optnone)) void modify_stack_protector_test() {
   // We can't use memset here because it's fortified, and we want to test
   // the line of defense *after* that.
-  // Without volatile, the generic x86/x86-64 targets don't write to the stack.
   // We can't make a constant change, since the existing byte might already have
   // had that value.
-  volatile char* p = reinterpret_cast<volatile char*>(&p + 1);
+  char* p = reinterpret_cast<char*>(&p + 1);
   *p = ~*p;
 }
diff --git a/tests/unistd_test.cpp b/tests/unistd_test.cpp
index 2a36460..4c21627 100644
--- a/tests/unistd_test.cpp
+++ b/tests/unistd_test.cpp
@@ -589,9 +589,10 @@
   TestGetTidCachingWithFork(CloneAndSetTid, exit);
 }
 
+__attribute__((no_sanitize("hwaddress", "memtag")))
 static int CloneStartRoutine(int (*start_routine)(void*)) {
   void* child_stack[1024];
-  return clone(start_routine, untag_address(&child_stack[1024]), SIGCHLD, nullptr);
+  return clone(start_routine, &child_stack[1024], SIGCHLD, nullptr);
 }
 
 static int GetPidCachingCloneStartRoutine(void*) {