diff --git a/libc/SYSCALLS.TXT b/libc/SYSCALLS.TXT
index 45b2a1b..9aeb07c 100644
--- a/libc/SYSCALLS.TXT
+++ b/libc/SYSCALLS.TXT
@@ -26,7 +26,7 @@
 # This file is processed by a python script named gensyscalls.py, run via
 # genrules in Android.bp.
 
-int     execve(const char*, char* const*, char* const*)  all
+int     __execve:execve(const char*, char* const*, char* const*)  all
 
 uid_t   getuid:getuid32()         lp32
 uid_t   getuid:getuid()           lp64
@@ -320,7 +320,7 @@
 
 int __eventfd:eventfd2(unsigned int, int)  all
 
-void _exit|_Exit:exit_group(int)  all
+void __exit_group:exit_group(int)  all
 void __exit:exit(int)  all
 
 int inotify_init1(int)  all
diff --git a/libc/arch-arm64/bionic/setjmp.S b/libc/arch-arm64/bionic/setjmp.S
index d2fafdb..8e00b56 100644
--- a/libc/arch-arm64/bionic/setjmp.S
+++ b/libc/arch-arm64/bionic/setjmp.S
@@ -194,7 +194,7 @@
   cmp x2, x12
   bne __bionic_setjmp_checksum_mismatch
 
-#if __has_feature(hwaddress_sanitizer)
+  // Update stack memory tags (MTE + hwasan).
   stp x0, x30, [sp, #-16]!
   .cfi_adjust_cfa_offset 16
   .cfi_rel_offset x0, 0
@@ -206,7 +206,7 @@
   bic x2, x2, #1
   ldr x0, [x0, #(_JB_X30_SP  * 8 + 8)]
   eor x0, x0, x2
-  bl __hwasan_handle_longjmp
+  bl memtag_handle_longjmp
 
   mov x1, x19 // Restore 'value'.
   // Restore original x0 and lr.
@@ -214,7 +214,6 @@
   .cfi_adjust_cfa_offset -16
   .cfi_restore x0
   .cfi_restore x30
-#endif
 
   // Do we need to restore the signal mask?
   ldr x2, [x0, #(_JB_SIGFLAG * 8)]
diff --git a/libc/arch-arm64/bionic/vfork.S b/libc/arch-arm64/bionic/vfork.S
index df7b063..9878e8d 100644
--- a/libc/arch-arm64/bionic/vfork.S
+++ b/libc/arch-arm64/bionic/vfork.S
@@ -45,6 +45,9 @@
     ldr     w10, [x9, #20]
     str     w0, [x9, #20]
 
+    // Clear vfork_child_stack_bottom_.
+    str     xzr, [x9, #776]
+
     mov     x0, #(CLONE_VM | CLONE_VFORK | SIGCHLD)
     mov     x1, xzr
     mov     x2, xzr
@@ -62,9 +65,6 @@
     cneg    x0, x0, hi
     b.hi    __set_errno_internal
 
-#if __has_feature(hwaddress_sanitizer)
-    cbz x0, .L_exit
-
     // Clean up stack shadow in the parent process.
     // https://github.com/google/sanitizers/issues/925
     paciasp
@@ -75,7 +75,7 @@
     .cfi_rel_offset x30, 8
 
     add x0, sp, #16
-    bl __hwasan_handle_vfork
+    bl memtag_handle_vfork
 
     ldp x0, x30, [sp], #16
     .cfi_adjust_cfa_offset -16
@@ -84,8 +84,6 @@
     autiasp
     .cfi_negate_ra_state
 
-#endif
-
 .L_exit:
     ret
 END(vfork)
diff --git a/libc/bionic/exec.cpp b/libc/bionic/exec.cpp
index fd2c401..40612e7 100644
--- a/libc/bionic/exec.cpp
+++ b/libc/bionic/exec.cpp
@@ -39,10 +39,12 @@
 #include <string.h>
 #include <unistd.h>
 
-#include "private/__bionic_get_shell_path.h"
 #include "private/FdPath.h"
+#include "private/__bionic_get_shell_path.h"
+#include "pthread_internal.h"
 
 extern "C" char** environ;
+extern "C" int __execve(const char* pathname, char* const* argv, char* const* envp);
 
 enum { ExecL, ExecLE, ExecLP };
 
@@ -181,3 +183,9 @@
   if (errno == ENOENT) errno = EBADF;
   return -1;
 }
+
+__attribute__((no_sanitize("memtag"))) int execve(const char* pathname, char* const* argv,
+                                                  char* const* envp) {
+  __get_thread()->vfork_child_stack_bottom = __builtin_frame_address(0);
+  return __execve(pathname, argv, envp);
+}
diff --git a/libc/bionic/exit.cpp b/libc/bionic/exit.cpp
index a5aed78..52fd193 100644
--- a/libc/bionic/exit.cpp
+++ b/libc/bionic/exit.cpp
@@ -30,9 +30,18 @@
 #include <unistd.h>
 
 #include "private/bionic_defs.h"
+#include "pthread_internal.h"
 
 extern "C" void __cxa_finalize(void* dso_handle);
 extern "C" void __cxa_thread_finalize();
+extern "C" __noreturn void __exit_group(int status);
+
+__attribute__((no_sanitize("memtag"))) void _exit(int status) {
+  __get_thread()->vfork_child_stack_bottom = __builtin_frame_address(0);
+  __exit_group(status);
+}
+
+__strong_alias(_Exit, _exit);
 
 __BIONIC_WEAK_FOR_NATIVE_BRIDGE
 void exit(int status) {
diff --git a/libc/bionic/heap_tagging.cpp b/libc/bionic/heap_tagging.cpp
index aac2048..78d21b0 100644
--- a/libc/bionic/heap_tagging.cpp
+++ b/libc/bionic/heap_tagging.cpp
@@ -32,6 +32,8 @@
 
 #include <bionic/pthread_internal.h>
 #include <platform/bionic/malloc.h>
+#include <sanitizer/hwasan_interface.h>
+#include <sys/auxv.h>
 
 extern "C" void scudo_malloc_disable_memory_tagging();
 extern "C" void scudo_malloc_set_track_allocation_stacks(int);
@@ -170,3 +172,69 @@
 
   return true;
 }
+
+#ifdef __aarch64__
+static inline __attribute__((no_sanitize("memtag"))) void untag_memory(void* from, void* to) {
+  __asm__ __volatile__(
+      ".arch_extension mte\n"
+      "1:\n"
+      "stg %[Ptr], [%[Ptr]], #16\n"
+      "cmp %[Ptr], %[End]\n"
+      "b.lt 1b\n"
+      : [Ptr] "+&r"(from)
+      : [End] "r"(to)
+      : "memory");
+}
+#endif
+
+#ifdef __aarch64__
+// 128Mb of stack should be enough for anybody.
+static constexpr size_t kUntagLimit = 128 * 1024 * 1024;
+#endif  // __aarch64__
+
+extern "C" __LIBC_HIDDEN__ __attribute__((no_sanitize("memtag"))) void memtag_handle_longjmp(
+    void* sp_dst __unused) {
+#ifdef __aarch64__
+  if (__libc_globals->memtag_stack) {
+    void* sp = __builtin_frame_address(0);
+    size_t distance = reinterpret_cast<uintptr_t>(sp_dst) - reinterpret_cast<uintptr_t>(sp);
+    if (distance > kUntagLimit) {
+      async_safe_fatal(
+          "memtag_handle_longjmp: stack adjustment too large! %p -> %p, distance %zx > %zx\n", sp,
+          sp_dst, distance, kUntagLimit);
+    } else {
+      untag_memory(sp, sp_dst);
+    }
+  }
+#endif  // __aarch64__
+
+#if __has_feature(hwaddress_sanitizer)
+  __hwasan_handle_longjmp(sp_dst);
+#endif  // __has_feature(hwaddress_sanitizer)
+}
+
+extern "C" __LIBC_HIDDEN__ __attribute__((no_sanitize("memtag"), no_sanitize("hwaddress"))) void
+memtag_handle_vfork(void* sp __unused) {
+#ifdef __aarch64__
+  if (__libc_globals->memtag_stack) {
+    void* child_sp = __get_thread()->vfork_child_stack_bottom;
+    __get_thread()->vfork_child_stack_bottom = nullptr;
+    if (child_sp) {
+      size_t distance = reinterpret_cast<uintptr_t>(sp) - reinterpret_cast<uintptr_t>(child_sp);
+      if (distance > kUntagLimit) {
+        async_safe_fatal(
+            "memtag_handle_vfork: stack adjustment too large! %p -> %p, distance %zx > %zx\n",
+            child_sp, sp, distance, kUntagLimit);
+      } else {
+        untag_memory(child_sp, sp);
+      }
+    } else {
+      async_safe_fatal("memtag_handle_vfork: child SP unknown\n");
+    }
+  }
+#endif  // __aarch64__
+
+#if __has_feature(hwaddress_sanitizer)
+  __hwasan_handle_vfork(sp);
+#endif  // __has_feature(hwaddress_sanitizer)
+}
diff --git a/libc/bionic/pthread_internal.h b/libc/bionic/pthread_internal.h
index 071a5bc..7222b62 100644
--- a/libc/bionic/pthread_internal.h
+++ b/libc/bionic/pthread_internal.h
@@ -160,6 +160,13 @@
   bionic_tls* bionic_tls;
 
   int errno_value;
+
+  // The last observed value of SP in a vfork child process.
+  // The part of the stack between this address and the value of SP when the vfork parent process
+  // regains control may have stale MTE tags and needs cleanup. This field is only meaningful while
+  // the parent is waiting for the vfork child to return control by calling either exec*() or
+  // exit().
+  void* vfork_child_stack_bottom;
 };
 
 struct ThreadMapping {
