[MTE] Cleanup stack buffer for detached threads

Bug: 378140560
Bug: 377483468
Change-Id: Ie20b7204894c03d4e3ddb10a3f1f9017c4909e38
diff --git a/libc/bionic/pthread_exit.cpp b/libc/bionic/pthread_exit.cpp
index 0181aba..27d05c2 100644
--- a/libc/bionic/pthread_exit.cpp
+++ b/libc/bionic/pthread_exit.cpp
@@ -33,10 +33,11 @@
 #include <string.h>
 #include <sys/mman.h>
 
-#include "private/bionic_constants.h"
-#include "private/bionic_defs.h"
+#include "platform/bionic/mte.h"
 #include "private/ScopedRWLock.h"
 #include "private/ScopedSignalBlocker.h"
+#include "private/bionic_constants.h"
+#include "private/bionic_defs.h"
 #include "pthread_internal.h"
 
 extern "C" __noreturn void _exit_with_stack_teardown(void*, size_t);
@@ -67,7 +68,7 @@
 }
 
 __BIONIC_WEAK_FOR_NATIVE_BRIDGE
-void pthread_exit(void* return_value) {
+__attribute__((no_sanitize("memtag"))) void pthread_exit(void* return_value) {
   // Call dtors for thread_local objects first.
   __cxa_thread_finalize();
 
@@ -138,6 +139,13 @@
   __notify_thread_exit_callbacks();
   __hwasan_thread_exit();
 
+#if defined(__aarch64__)
+  if (void* stack_mte_tls = thread->bionic_tcb->tls_slot(TLS_SLOT_STACK_MTE)) {
+    stack_mte_free_ringbuffer(reinterpret_cast<uintptr_t>(stack_mte_tls));
+  }
+#endif
+  // Everything below this line needs to be no_sanitize("memtag").
+
   if (old_state == THREAD_DETACHED && thread->mmap_size != 0) {
     // We need to free mapped space for detached threads when they exit.
     // That's not something we can do in C.
diff --git a/libc/bionic/pthread_internal.cpp b/libc/bionic/pthread_internal.cpp
index 3bfb899..bec9703 100644
--- a/libc/bionic/pthread_internal.cpp
+++ b/libc/bionic/pthread_internal.cpp
@@ -77,11 +77,6 @@
 }
 
 static void __pthread_internal_free(pthread_internal_t* thread) {
-#ifdef __aarch64__
-  if (void* stack_mte_tls = thread->bionic_tcb->tls_slot(TLS_SLOT_STACK_MTE)) {
-    stack_mte_free_ringbuffer(reinterpret_cast<uintptr_t>(stack_mte_tls));
-  }
-#endif
   if (thread->mmap_size != 0) {
     // Free mapped space, including thread stack and pthread_internal_t.
     munmap(thread->mmap_base, thread->mmap_size);
diff --git a/tests/Android.bp b/tests/Android.bp
index f5f3e31..d02e803 100644
--- a/tests/Android.bp
+++ b/tests/Android.bp
@@ -1199,6 +1199,35 @@
 }
 
 cc_test {
+    name: "memtag_stack_abi_test",
+    enabled: false,
+    // This does not use bionic_tests_defaults because it is not supported on
+    // host.
+    arch: {
+        arm64: {
+            enabled: true,
+        },
+    },
+    // We don't use `sanitize:` so we generate the appropriate ELF note, but
+    // still support non-MTE devices.
+    // TODO(fmayer): also add a test that enables stack MTE for MTE devices,
+    // which would test for more bugs.
+    ldflags: ["-fsanitize=memtag-stack"],
+    // Turn off all other sanitizers from SANITIZE_TARGET.
+    sanitize: {
+      never: true,
+    },
+    shared_libs: [
+        "libbase",
+    ],
+    srcs: [
+        "memtag_stack_abi_test.cpp",
+    ],
+    header_libs: ["bionic_libc_platform_headers"],
+    test_suites: ["device-tests"],
+}
+
+cc_test {
     name: "bionic-stress-tests",
     defaults: [
         "bionic_tests_defaults",
diff --git a/tests/memtag_stack_abi_test.cpp b/tests/memtag_stack_abi_test.cpp
new file mode 100644
index 0000000..4725c8d
--- /dev/null
+++ b/tests/memtag_stack_abi_test.cpp
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <filesystem>
+#include <fstream>
+#include <iterator>
+#include <string>
+#include <thread>
+
+#include <dlfcn.h>
+#include <stdlib.h>
+
+#include <android-base/logging.h>
+#include <gtest/gtest.h>
+
+static size_t NumberBuffers() {
+  size_t bufs = 0;
+  std::ifstream file("/proc/self/maps");
+  CHECK(file.is_open());
+  std::string line;
+  while (std::getline(file, line)) {
+    if (line.find("stack_mte_ring") != std::string::npos) {
+      ++bufs;
+    }
+  }
+  return bufs;
+}
+
+static size_t NumberThreads() {
+  std::filesystem::directory_iterator di("/proc/self/task");
+  return std::distance(begin(di), end(di));
+}
+
+TEST(MemtagStackAbiTest, MainThread) {
+#if defined(__BIONIC__) && defined(__aarch64__)
+  ASSERT_EQ(NumberBuffers(), 1U);
+  ASSERT_EQ(NumberBuffers(), NumberThreads());
+#else
+  GTEST_SKIP() << "requires bionic arm64";
+#endif
+}
+
+TEST(MemtagStackAbiTest, JoinableThread) {
+#if defined(__BIONIC__) && defined(__aarch64__)
+  ASSERT_EQ(NumberBuffers(), 1U);
+  ASSERT_EQ(NumberBuffers(), NumberThreads());
+  std::thread th([] {
+    ASSERT_EQ(NumberBuffers(), 2U);
+    ASSERT_EQ(NumberBuffers(), NumberThreads());
+  });
+  th.join();
+  ASSERT_EQ(NumberBuffers(), 1U);
+  ASSERT_EQ(NumberBuffers(), NumberThreads());
+#else
+  GTEST_SKIP() << "requires bionic arm64";
+#endif
+}
+
+TEST(MemtagStackAbiTest, DetachedThread) {
+#if defined(__BIONIC__) && defined(__aarch64__)
+  ASSERT_EQ(NumberBuffers(), 1U);
+  ASSERT_EQ(NumberBuffers(), NumberThreads());
+  std::thread th([] {
+    ASSERT_EQ(NumberBuffers(), 2U);
+    ASSERT_EQ(NumberBuffers(), NumberThreads());
+  });
+  th.detach();
+  // Leave the thread some time to exit.
+  for (int i = 0; NumberBuffers() != 1 && i < 3; ++i) {
+    sleep(1);
+  }
+  ASSERT_EQ(NumberBuffers(), 1U);
+  ASSERT_EQ(NumberBuffers(), NumberThreads());
+#else
+  GTEST_SKIP() << "requires bionic arm64";
+#endif
+}