Snap for 12651823 from 3fb52fc057b9dfacfa80fae3b5100251401f48d4 to 25Q1-release

Change-Id: Ifd067d6b32f21a7bebf90f4615b892ce52c4992a
diff --git a/libc/bionic/pthread_create.cpp b/libc/bionic/pthread_create.cpp
index ba20c51..54bfa20 100644
--- a/libc/bionic/pthread_create.cpp
+++ b/libc/bionic/pthread_create.cpp
@@ -351,15 +351,20 @@
 
 extern "C" int __rt_sigprocmask(int, const sigset64_t*, sigset64_t*, size_t);
 
-__attribute__((no_sanitize("hwaddress")))
+__attribute__((no_sanitize("hwaddress", "memtag")))
 #if defined(__aarch64__)
 // This function doesn't return, but it does appear in stack traces. Avoid using return PAC in this
 // function because we may end up resetting IA, which may confuse unwinders due to mismatching keys.
 __attribute__((target("branch-protection=bti")))
 #endif
-static int __pthread_start(void* arg) {
+static int
+__pthread_start(void* arg) {
   pthread_internal_t* thread = reinterpret_cast<pthread_internal_t*>(arg);
-
+#if defined(__aarch64__)
+  if (thread->should_allocate_stack_mte_ringbuffer) {
+    thread->bionic_tcb->tls_slot(TLS_SLOT_STACK_MTE) = __allocate_stack_mte_ringbuffer(0, thread);
+  }
+#endif
   __hwasan_thread_enter();
 
   // Wait for our creating thread to release us. This lets it have time to
@@ -450,9 +455,9 @@
 // This has to be done under g_thread_creation_lock or g_thread_list_lock to avoid racing with
 // __pthread_internal_remap_stack_with_mte.
 #ifdef __aarch64__
-  if (__libc_memtag_stack_abi) {
-    tcb->tls_slot(TLS_SLOT_STACK_MTE) = __allocate_stack_mte_ringbuffer(0, thread);
-  }
+  thread->should_allocate_stack_mte_ringbuffer = __libc_memtag_stack_abi;
+#else
+  thread->should_allocate_stack_mte_ringbuffer = false;
 #endif
 
   sigset64_t block_all_mask;
diff --git a/libc/bionic/pthread_exit.cpp b/libc/bionic/pthread_exit.cpp
index 0181aba..27d05c2 100644
--- a/libc/bionic/pthread_exit.cpp
+++ b/libc/bionic/pthread_exit.cpp
@@ -33,10 +33,11 @@
 #include <string.h>
 #include <sys/mman.h>
 
-#include "private/bionic_constants.h"
-#include "private/bionic_defs.h"
+#include "platform/bionic/mte.h"
 #include "private/ScopedRWLock.h"
 #include "private/ScopedSignalBlocker.h"
+#include "private/bionic_constants.h"
+#include "private/bionic_defs.h"
 #include "pthread_internal.h"
 
 extern "C" __noreturn void _exit_with_stack_teardown(void*, size_t);
@@ -67,7 +68,7 @@
 }
 
 __BIONIC_WEAK_FOR_NATIVE_BRIDGE
-void pthread_exit(void* return_value) {
+__attribute__((no_sanitize("memtag"))) void pthread_exit(void* return_value) {
   // Call dtors for thread_local objects first.
   __cxa_thread_finalize();
 
@@ -138,6 +139,13 @@
   __notify_thread_exit_callbacks();
   __hwasan_thread_exit();
 
+#if defined(__aarch64__)
+  if (void* stack_mte_tls = thread->bionic_tcb->tls_slot(TLS_SLOT_STACK_MTE)) {
+    stack_mte_free_ringbuffer(reinterpret_cast<uintptr_t>(stack_mte_tls));
+  }
+#endif
+  // Everything below this line needs to be no_sanitize("memtag").
+
   if (old_state == THREAD_DETACHED && thread->mmap_size != 0) {
     // We need to free mapped space for detached threads when they exit.
     // That's not something we can do in C.
diff --git a/libc/bionic/pthread_internal.cpp b/libc/bionic/pthread_internal.cpp
index 3bfb899..c6426ed 100644
--- a/libc/bionic/pthread_internal.cpp
+++ b/libc/bionic/pthread_internal.cpp
@@ -77,11 +77,6 @@
 }
 
 static void __pthread_internal_free(pthread_internal_t* thread) {
-#ifdef __aarch64__
-  if (void* stack_mte_tls = thread->bionic_tcb->tls_slot(TLS_SLOT_STACK_MTE)) {
-    stack_mte_free_ringbuffer(reinterpret_cast<uintptr_t>(stack_mte_tls));
-  }
-#endif
   if (thread->mmap_size != 0) {
     // Free mapped space, including thread stack and pthread_internal_t.
     munmap(thread->mmap_base, thread->mmap_size);
@@ -213,7 +208,10 @@
   __libc_memtag_stack_abi = true;
 
   for (pthread_internal_t* t = g_thread_list; t != nullptr; t = t->next) {
-    if (t->terminating) continue;
+    // should_allocate_stack_mte_ringbuffer indicates the thread is already
+    // aware that this process requires stack MTE, and will allocate the
+    // ring buffer in __pthread_start.
+    if (t->terminating || t->should_allocate_stack_mte_ringbuffer) continue;
     t->bionic_tcb->tls_slot(TLS_SLOT_STACK_MTE) =
         __allocate_stack_mte_ringbuffer(0, t->is_main() ? nullptr : t);
   }
diff --git a/libc/bionic/pthread_internal.h b/libc/bionic/pthread_internal.h
index 5db42ab..cbaa9a6 100644
--- a/libc/bionic/pthread_internal.h
+++ b/libc/bionic/pthread_internal.h
@@ -181,6 +181,7 @@
 
   bionic_tcb* bionic_tcb;
   char stack_mte_ringbuffer_vma_name_buffer[32];
+  bool should_allocate_stack_mte_ringbuffer;
 
   bool is_main() { return start_routine == nullptr; }
 };
diff --git a/libc/malloc_debug/Android.bp b/libc/malloc_debug/Android.bp
index 3828c28..5d61801 100644
--- a/libc/malloc_debug/Android.bp
+++ b/libc/malloc_debug/Android.bp
@@ -79,6 +79,10 @@
         "libmemunreachable",
     ],
 
+    whole_static_libs: [
+        "libmemory_trace",
+    ],
+
     shared_libs: [
         "libunwindstack",
     ],
diff --git a/libc/malloc_debug/Nanotime.h b/libc/malloc_debug/Nanotime.h
new file mode 100644
index 0000000..d7c3f60
--- /dev/null
+++ b/libc/malloc_debug/Nanotime.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#pragma once
+
+#include <stdint.h>
+#include <time.h>
+
+static inline __always_inline uint64_t Nanotime() {
+  struct timespec t = {};
+  clock_gettime(CLOCK_MONOTONIC, &t);
+  return static_cast<uint64_t>(t.tv_sec) * 1000000000LL + t.tv_nsec;
+}
diff --git a/libc/malloc_debug/RecordData.cpp b/libc/malloc_debug/RecordData.cpp
index 79e051b..1641732 100644
--- a/libc/malloc_debug/RecordData.cpp
+++ b/libc/malloc_debug/RecordData.cpp
@@ -39,72 +39,19 @@
 #include <mutex>
 
 #include <android-base/stringprintf.h>
+#include <memory_trace/MemoryTrace.h>
 
 #include "Config.h"
 #include "DebugData.h"
+#include "Nanotime.h"
 #include "RecordData.h"
 #include "debug_disable.h"
 #include "debug_log.h"
 
-RecordEntry::RecordEntry() : tid_(gettid()) {
-}
-
-bool ThreadCompleteEntry::Write(int fd) const {
-  return dprintf(fd, "%d: thread_done 0x0\n", tid_) > 0;
-}
-
-AllocEntry::AllocEntry(void* pointer, uint64_t start_ns, uint64_t end_ns)
-    : pointer_(pointer), start_ns_(start_ns), end_ns_(end_ns) {}
-
-MallocEntry::MallocEntry(void* pointer, size_t size, uint64_t start_ns, uint64_t end_ns)
-    : AllocEntry(pointer, start_ns, end_ns), size_(size) {}
-
-bool MallocEntry::Write(int fd) const {
-  return dprintf(fd, "%d: malloc %p %zu %" PRIu64 " %" PRIu64 "\n", tid_, pointer_, size_,
-                 start_ns_, end_ns_) > 0;
-}
-
-FreeEntry::FreeEntry(void* pointer, uint64_t start_ns, uint64_t end_ns)
-    : AllocEntry(pointer, start_ns, end_ns) {}
-
-bool FreeEntry::Write(int fd) const {
-  return dprintf(fd, "%d: free %p %" PRIu64 " %" PRIu64 "\n", tid_, pointer_, start_ns_, end_ns_) >
-         0;
-}
-
-CallocEntry::CallocEntry(void* pointer, size_t nmemb, size_t size, uint64_t start_ns,
-                         uint64_t end_ns)
-    : MallocEntry(pointer, size, start_ns, end_ns), nmemb_(nmemb) {}
-
-bool CallocEntry::Write(int fd) const {
-  return dprintf(fd, "%d: calloc %p %zu %zu %" PRIu64 " %" PRIu64 "\n", tid_, pointer_, nmemb_,
-                 size_, start_ns_, end_ns_) > 0;
-}
-
-ReallocEntry::ReallocEntry(void* pointer, size_t size, void* old_pointer, uint64_t start_ns,
-                           uint64_t end_ns)
-    : MallocEntry(pointer, size, start_ns, end_ns), old_pointer_(old_pointer) {}
-
-bool ReallocEntry::Write(int fd) const {
-  return dprintf(fd, "%d: realloc %p %p %zu %" PRIu64 " %" PRIu64 "\n", tid_, pointer_,
-                 old_pointer_, size_, start_ns_, end_ns_) > 0;
-}
-
-// aligned_alloc, posix_memalign, memalign, pvalloc, valloc all recorded with this class.
-MemalignEntry::MemalignEntry(void* pointer, size_t size, size_t alignment, uint64_t start_ns,
-                             uint64_t end_ns)
-    : MallocEntry(pointer, size, start_ns, end_ns), alignment_(alignment) {}
-
-bool MemalignEntry::Write(int fd) const {
-  return dprintf(fd, "%d: memalign %p %zu %zu %" PRIu64 " %" PRIu64 "\n", tid_, pointer_,
-                 alignment_, size_, start_ns_, end_ns_) > 0;
-}
-
 struct ThreadData {
-  ThreadData(RecordData* record_data, ThreadCompleteEntry* entry)
-      : record_data(record_data), entry(entry) {}
-  RecordData* record_data;
-  ThreadCompleteEntry* entry;
+  ThreadData(RecordData* record_data) : record_data(record_data) {}
+
+  RecordData* record_data = nullptr;
   size_t count = 0;
 };
 
@@ -117,7 +64,8 @@
   if (thread_data->count == 4) {
     ScopedDisableDebugCalls disable;
 
-    thread_data->record_data->AddEntryOnly(thread_data->entry);
+    thread_data->record_data->AddEntryOnly(memory_trace::Entry{
+        .tid = gettid(), .type = memory_trace::THREAD_DONE, .end_ns = Nanotime()});
     delete thread_data;
   } else {
     pthread_setspecific(thread_data->record_data->key(), data);
@@ -159,7 +107,7 @@
   }
 
   for (size_t i = 0; i < cur_index_; i++) {
-    if (!entries_[i]->Write(dump_fd)) {
+    if (!memory_trace::WriteEntryToFd(dump_fd, entries_[i])) {
       error_log("Failed to write record alloc information: %s", strerror(errno));
       break;
     }
@@ -201,23 +149,23 @@
   pthread_key_delete(key_);
 }
 
-void RecordData::AddEntryOnly(const RecordEntry* entry) {
+void RecordData::AddEntryOnly(const memory_trace::Entry& entry) {
   std::lock_guard<std::mutex> entries_lock(entries_lock_);
   if (cur_index_ == entries_.size()) {
     // Maxed out, throw the entry away.
     return;
   }
 
-  entries_[cur_index_++].reset(entry);
+  entries_[cur_index_++] = entry;
   if (cur_index_ == entries_.size()) {
     info_log("Maximum number of records added, all new operations will be dropped.");
   }
 }
 
-void RecordData::AddEntry(const RecordEntry* entry) {
+void RecordData::AddEntry(const memory_trace::Entry& entry) {
   void* data = pthread_getspecific(key_);
   if (data == nullptr) {
-    ThreadData* thread_data = new ThreadData(this, new ThreadCompleteEntry());
+    ThreadData* thread_data = new ThreadData(this);
     pthread_setspecific(key_, thread_data);
   }
 
diff --git a/libc/malloc_debug/RecordData.h b/libc/malloc_debug/RecordData.h
index 7efa1f7..f4b0d82 100644
--- a/libc/malloc_debug/RecordData.h
+++ b/libc/malloc_debug/RecordData.h
@@ -39,117 +39,9 @@
 #include <string>
 #include <vector>
 
+#include <memory_trace/MemoryTrace.h>
 #include <platform/bionic/macros.h>
 
-class RecordEntry {
- public:
-  RecordEntry();
-  virtual ~RecordEntry() = default;
-
-  virtual bool Write(int fd) const = 0;
-
- protected:
-  pid_t tid_;
-
- private:
-  BIONIC_DISALLOW_COPY_AND_ASSIGN(RecordEntry);
-};
-
-class ThreadCompleteEntry : public RecordEntry {
- public:
-  ThreadCompleteEntry() = default;
-  virtual ~ThreadCompleteEntry() = default;
-
-  bool Write(int fd) const override;
-
- private:
-  BIONIC_DISALLOW_COPY_AND_ASSIGN(ThreadCompleteEntry);
-};
-
-class AllocEntry : public RecordEntry {
- public:
-  explicit AllocEntry(void* pointer, uint64_t st, uint64_t et);
-  virtual ~AllocEntry() = default;
-
- protected:
-  void* pointer_;
-
-  // The start/end time of this operation.
-  uint64_t start_ns_;
-  uint64_t end_ns_;
-
- private:
-  BIONIC_DISALLOW_COPY_AND_ASSIGN(AllocEntry);
-};
-
-class MallocEntry : public AllocEntry {
- public:
-  MallocEntry(void* pointer, size_t size, uint64_t st, uint64_t et);
-  virtual ~MallocEntry() = default;
-
-  bool Write(int fd) const override;
-
- protected:
-  size_t size_;
-
- private:
-  BIONIC_DISALLOW_COPY_AND_ASSIGN(MallocEntry);
-};
-
-class FreeEntry : public AllocEntry {
- public:
-  explicit FreeEntry(void* pointer, uint64_t st, uint64_t et);
-  virtual ~FreeEntry() = default;
-
-  bool Write(int fd) const override;
-
- private:
-  BIONIC_DISALLOW_COPY_AND_ASSIGN(FreeEntry);
-};
-
-class CallocEntry : public MallocEntry {
- public:
-  CallocEntry(void* pointer, size_t nmemb, size_t size, uint64_t st, uint64_t et);
-  virtual ~CallocEntry() = default;
-
-  bool Write(int fd) const override;
-
- protected:
-  size_t nmemb_;
-
- private:
-  BIONIC_DISALLOW_COPY_AND_ASSIGN(CallocEntry);
-};
-
-class ReallocEntry : public MallocEntry {
- public:
-  ReallocEntry(void* pointer, size_t size, void* old_pointer, uint64_t st, uint64_t et);
-  virtual ~ReallocEntry() = default;
-
-  bool Write(int fd) const override;
-
- protected:
-  void* old_pointer_;
-
- private:
-  BIONIC_DISALLOW_COPY_AND_ASSIGN(ReallocEntry);
-};
-
-// aligned_alloc, posix_memalign, memalign, pvalloc, valloc all recorded with this class.
-class MemalignEntry : public MallocEntry {
- public:
-  MemalignEntry(void* pointer, size_t size, size_t alignment, uint64_t st, uint64_t et);
-  virtual ~MemalignEntry() = default;
-
-  bool Write(int fd) const override;
-
- protected:
-  size_t alignment_;
-
- private:
-  BIONIC_DISALLOW_COPY_AND_ASSIGN(MemalignEntry);
-};
-
 class Config;
 
 class RecordData {
@@ -159,8 +51,8 @@
 
   bool Initialize(const Config& config);
 
-  void AddEntry(const RecordEntry* entry);
-  void AddEntryOnly(const RecordEntry* entry);
+  void AddEntry(const memory_trace::Entry& entry);
+  void AddEntryOnly(const memory_trace::Entry& entry);
 
   const std::string& file() { return file_; }
   pthread_key_t key() { return key_; }
@@ -176,7 +68,7 @@
 
   std::mutex entries_lock_;
   pthread_key_t key_;
-  std::vector<std::unique_ptr<const RecordEntry>> entries_;
+  std::vector<memory_trace::Entry> entries_;
   size_t cur_index_;
   std::string file_;
 
diff --git a/libc/malloc_debug/malloc_debug.cpp b/libc/malloc_debug/malloc_debug.cpp
index 3743852..c183897 100644
--- a/libc/malloc_debug/malloc_debug.cpp
+++ b/libc/malloc_debug/malloc_debug.cpp
@@ -54,6 +54,7 @@
 #include "Config.h"
 #include "DebugData.h"
 #include "LogAllocatorStats.h"
+#include "Nanotime.h"
 #include "Unreachable.h"
 #include "UnwindBacktrace.h"
 #include "backtrace.h"
@@ -70,12 +71,6 @@
 
 const MallocDispatch* g_dispatch;
 
-static inline __always_inline uint64_t Nanotime() {
-  struct timespec t = {};
-  clock_gettime(CLOCK_MONOTONIC, &t);
-  return static_cast<uint64_t>(t.tv_sec) * 1000000000LL + t.tv_nsec;
-}
-
 namespace {
 // A TimedResult contains the result of from malloc end_ns al. functions and the
 // start/end timestamps.
@@ -598,8 +593,13 @@
   TimedResult result = InternalMalloc(size);
 
   if (g_debug->config().options() & RECORD_ALLOCS) {
-    g_debug->record->AddEntry(new MallocEntry(result.getValue<void*>(), size,
-                                              result.GetStartTimeNS(), result.GetEndTimeNS()));
+    g_debug->record->AddEntry(
+        memory_trace::Entry{.tid = gettid(),
+                            .type = memory_trace::MALLOC,
+                            .ptr = reinterpret_cast<uint64_t>(result.getValue<void*>()),
+                            .size = size,
+                            .start_ns = result.GetStartTimeNS(),
+                            .end_ns = result.GetEndTimeNS()});
   }
 
   return result.getValue<void*>();
@@ -687,8 +687,11 @@
   TimedResult result = InternalFree(pointer);
 
   if (g_debug->config().options() & RECORD_ALLOCS) {
-    g_debug->record->AddEntry(
-        new FreeEntry(pointer, result.GetStartTimeNS(), result.GetEndTimeNS()));
+    g_debug->record->AddEntry(memory_trace::Entry{.tid = gettid(),
+                                                  .type = memory_trace::FREE,
+                                                  .ptr = reinterpret_cast<uint64_t>(pointer),
+                                                  .start_ns = result.GetStartTimeNS(),
+                                                  .end_ns = result.GetEndTimeNS()});
   }
 }
 
@@ -771,8 +774,13 @@
     }
 
     if (g_debug->config().options() & RECORD_ALLOCS) {
-      g_debug->record->AddEntry(new MemalignEntry(pointer, bytes, alignment,
-                                                  result.GetStartTimeNS(), result.GetEndTimeNS()));
+      g_debug->record->AddEntry(memory_trace::Entry{.tid = gettid(),
+                                                    .type = memory_trace::MEMALIGN,
+                                                    .ptr = reinterpret_cast<uint64_t>(pointer),
+                                                    .size = bytes,
+                                                    .u.align = alignment,
+                                                    .start_ns = result.GetStartTimeNS(),
+                                                    .end_ns = result.GetEndTimeNS()});
     }
   }
 
@@ -791,11 +799,16 @@
 
   if (pointer == nullptr) {
     TimedResult result = InternalMalloc(bytes);
-    if (g_debug->config().options() & RECORD_ALLOCS) {
-      g_debug->record->AddEntry(new ReallocEntry(result.getValue<void*>(), bytes, nullptr,
-                                                 result.GetStartTimeNS(), result.GetEndTimeNS()));
-    }
     pointer = result.getValue<void*>();
+    if (g_debug->config().options() & RECORD_ALLOCS) {
+      g_debug->record->AddEntry(memory_trace::Entry{.tid = gettid(),
+                                                    .type = memory_trace::REALLOC,
+                                                    .ptr = reinterpret_cast<uint64_t>(pointer),
+                                                    .size = bytes,
+                                                    .u.old_ptr = 0,
+                                                    .start_ns = result.GetStartTimeNS(),
+                                                    .end_ns = result.GetEndTimeNS()});
+    }
     return pointer;
   }
 
@@ -807,8 +820,14 @@
     TimedResult result = InternalFree(pointer);
 
     if (g_debug->config().options() & RECORD_ALLOCS) {
-      g_debug->record->AddEntry(new ReallocEntry(nullptr, bytes, pointer, result.GetStartTimeNS(),
-                                                 result.GetEndTimeNS()));
+      g_debug->record->AddEntry(
+          memory_trace::Entry{.tid = gettid(),
+                              .type = memory_trace::REALLOC,
+                              .ptr = 0,
+                              .size = 0,
+                              .u.old_ptr = reinterpret_cast<uint64_t>(pointer),
+                              .start_ns = result.GetStartTimeNS(),
+                              .end_ns = result.GetEndTimeNS()});
     }
 
     return nullptr;
@@ -905,8 +924,13 @@
   }
 
   if (g_debug->config().options() & RECORD_ALLOCS) {
-    g_debug->record->AddEntry(new ReallocEntry(new_pointer, bytes, pointer, result.GetStartTimeNS(),
-                                               result.GetEndTimeNS()));
+    g_debug->record->AddEntry(memory_trace::Entry{.tid = gettid(),
+                                                  .type = memory_trace::REALLOC,
+                                                  .ptr = reinterpret_cast<uint64_t>(new_pointer),
+                                                  .size = bytes,
+                                                  .u.old_ptr = reinterpret_cast<uint64_t>(pointer),
+                                                  .start_ns = result.GetStartTimeNS(),
+                                                  .end_ns = result.GetEndTimeNS()});
   }
 
   return new_pointer;
@@ -962,8 +986,13 @@
   }
 
   if (g_debug->config().options() & RECORD_ALLOCS) {
-    g_debug->record->AddEntry(
-        new CallocEntry(pointer, nmemb, bytes, result.GetStartTimeNS(), result.GetEndTimeNS()));
+    g_debug->record->AddEntry(memory_trace::Entry{.tid = gettid(),
+                                                  .type = memory_trace::CALLOC,
+                                                  .ptr = reinterpret_cast<uint64_t>(pointer),
+                                                  .size = bytes,
+                                                  .u.n_elements = nmemb,
+                                                  .start_ns = result.GetStartTimeNS(),
+                                                  .end_ns = result.GetEndTimeNS()});
   }
 
   if (pointer != nullptr && g_debug->TrackPointers()) {
diff --git a/tests/Android.bp b/tests/Android.bp
index 7137a2c..22fa542 100644
--- a/tests/Android.bp
+++ b/tests/Android.bp
@@ -1205,6 +1205,35 @@
 }
 
 cc_test {
+    name: "memtag_stack_abi_test",
+    enabled: false,
+    // This does not use bionic_tests_defaults because it is not supported on
+    // host.
+    arch: {
+        arm64: {
+            enabled: true,
+        },
+    },
+    // We don't use `sanitize:` so we generate the appropriate ELF note, but
+    // still support non-MTE devices.
+    // TODO(fmayer): also add a test that enables stack MTE for MTE devices,
+    // which would test for more bugs.
+    ldflags: ["-fsanitize=memtag-stack"],
+    // Turn off all other sanitizers from SANITIZE_TARGET.
+    sanitize: {
+        never: true,
+    },
+    shared_libs: [
+        "libbase",
+    ],
+    srcs: [
+        "memtag_stack_abi_test.cpp",
+    ],
+    header_libs: ["bionic_libc_platform_headers"],
+    test_suites: ["device-tests"],
+}
+
+cc_test {
     name: "bionic-stress-tests",
     defaults: [
         "bionic_tests_defaults",
diff --git a/tests/memtag_stack_abi_test.cpp b/tests/memtag_stack_abi_test.cpp
new file mode 100644
index 0000000..4725c8d
--- /dev/null
+++ b/tests/memtag_stack_abi_test.cpp
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <filesystem>
+#include <fstream>
+#include <iterator>
+#include <string>
+#include <thread>
+
+#include <dlfcn.h>
+#include <stdlib.h>
+
+#include <android-base/logging.h>
+#include <gtest/gtest.h>
+
+static size_t NumberBuffers() {
+  size_t bufs = 0;
+  std::ifstream file("/proc/self/maps");
+  CHECK(file.is_open());
+  std::string line;
+  while (std::getline(file, line)) {
+    if (line.find("stack_mte_ring") != std::string::npos) {
+      ++bufs;
+    }
+  }
+  return bufs;
+}
+
+static size_t NumberThreads() {
+  std::filesystem::directory_iterator di("/proc/self/task");
+  return std::distance(begin(di), end(di));
+}
+
+TEST(MemtagStackAbiTest, MainThread) {
+#if defined(__BIONIC__) && defined(__aarch64__)
+  ASSERT_EQ(NumberBuffers(), 1U);
+  ASSERT_EQ(NumberBuffers(), NumberThreads());
+#else
+  GTEST_SKIP() << "requires bionic arm64";
+#endif
+}
+
+TEST(MemtagStackAbiTest, JoinableThread) {
+#if defined(__BIONIC__) && defined(__aarch64__)
+  ASSERT_EQ(NumberBuffers(), 1U);
+  ASSERT_EQ(NumberBuffers(), NumberThreads());
+  std::thread th([] {
+    ASSERT_EQ(NumberBuffers(), 2U);
+    ASSERT_EQ(NumberBuffers(), NumberThreads());
+  });
+  th.join();
+  ASSERT_EQ(NumberBuffers(), 1U);
+  ASSERT_EQ(NumberBuffers(), NumberThreads());
+#else
+  GTEST_SKIP() << "requires bionic arm64";
+#endif
+}
+
+TEST(MemtagStackAbiTest, DetachedThread) {
+#if defined(__BIONIC__) && defined(__aarch64__)
+  ASSERT_EQ(NumberBuffers(), 1U);
+  ASSERT_EQ(NumberBuffers(), NumberThreads());
+  std::thread th([] {
+    ASSERT_EQ(NumberBuffers(), 2U);
+    ASSERT_EQ(NumberBuffers(), NumberThreads());
+  });
+  th.detach();
+  // Leave the thread some time to exit.
+  for (int i = 0; NumberBuffers() != 1 && i < 3; ++i) {
+    sleep(1);
+  }
+  ASSERT_EQ(NumberBuffers(), 1U);
+  ASSERT_EQ(NumberBuffers(), NumberThreads());
+#else
+  GTEST_SKIP() << "requires bionic arm64";
+#endif
+}
diff --git a/tests/struct_layout_test.cpp b/tests/struct_layout_test.cpp
index 1f04344..b9fd315 100644
--- a/tests/struct_layout_test.cpp
+++ b/tests/struct_layout_test.cpp
@@ -30,7 +30,7 @@
 #define CHECK_OFFSET(name, field, offset) \
     check_offset(#name, #field, offsetof(name, field), offset);
 #ifdef __LP64__
-  CHECK_SIZE(pthread_internal_t, 816);
+  CHECK_SIZE(pthread_internal_t, 824);
   CHECK_OFFSET(pthread_internal_t, next, 0);
   CHECK_OFFSET(pthread_internal_t, prev, 8);
   CHECK_OFFSET(pthread_internal_t, tid, 16);
@@ -57,6 +57,7 @@
   CHECK_OFFSET(pthread_internal_t, errno_value, 768);
   CHECK_OFFSET(pthread_internal_t, bionic_tcb, 776);
   CHECK_OFFSET(pthread_internal_t, stack_mte_ringbuffer_vma_name_buffer, 784);
+  CHECK_OFFSET(pthread_internal_t, should_allocate_stack_mte_ringbuffer, 816);
   CHECK_SIZE(bionic_tls, 12200);
   CHECK_OFFSET(bionic_tls, key_data, 0);
   CHECK_OFFSET(bionic_tls, locale, 2080);
@@ -74,7 +75,7 @@
   CHECK_OFFSET(bionic_tls, bionic_systrace_disabled, 12193);
   CHECK_OFFSET(bionic_tls, padding, 12194);
 #else
-  CHECK_SIZE(pthread_internal_t, 704);
+  CHECK_SIZE(pthread_internal_t, 708);
   CHECK_OFFSET(pthread_internal_t, next, 0);
   CHECK_OFFSET(pthread_internal_t, prev, 4);
   CHECK_OFFSET(pthread_internal_t, tid, 8);
@@ -101,6 +102,7 @@
   CHECK_OFFSET(pthread_internal_t, errno_value, 664);
   CHECK_OFFSET(pthread_internal_t, bionic_tcb, 668);
   CHECK_OFFSET(pthread_internal_t, stack_mte_ringbuffer_vma_name_buffer, 672);
+  CHECK_OFFSET(pthread_internal_t, should_allocate_stack_mte_ringbuffer, 704);
   CHECK_SIZE(bionic_tls, 11080);
   CHECK_OFFSET(bionic_tls, key_data, 0);
   CHECK_OFFSET(bionic_tls, locale, 1040);