Revert "[MTE] allocate ring buffer for stack history"

This reverts commit 10d11ddfcb52122db52ac9f15b4884f663e9915d.

Reason for revert: b/348239278

Change-Id: I14a1a3177ecbf5d8cf44836dc790c501c102960a
diff --git a/libc/bionic/heap_tagging.cpp b/libc/bionic/heap_tagging.cpp
index 066ec32..cadab3c 100644
--- a/libc/bionic/heap_tagging.cpp
+++ b/libc/bionic/heap_tagging.cpp
@@ -53,8 +53,6 @@
   heap_tagging_level = __libc_shared_globals()->initial_heap_tagging_level;
 #endif
 
-  __libc_memtag_stack_abi = __libc_shared_globals()->initial_memtag_stack_abi;
-
   __libc_globals.mutate([](libc_globals* globals) {
     switch (heap_tagging_level) {
       case M_HEAP_TAGGING_LEVEL_TBI:
diff --git a/libc/bionic/libc_init_common.cpp b/libc/bionic/libc_init_common.cpp
index 939e4e1..c82c52e 100644
--- a/libc/bionic/libc_init_common.cpp
+++ b/libc/bionic/libc_init_common.cpp
@@ -58,7 +58,6 @@
 
 __LIBC_HIDDEN__ constinit WriteProtected<libc_globals> __libc_globals;
 __LIBC_HIDDEN__ constinit _Atomic(bool) __libc_memtag_stack;
-__LIBC_HIDDEN__ constinit bool __libc_memtag_stack_abi;
 
 // Not public, but well-known in the BSDs.
 __BIONIC_WEAK_VARIABLE_FOR_NATIVE_BRIDGE
diff --git a/libc/bionic/libc_init_static.cpp b/libc/bionic/libc_init_static.cpp
index ac97376..3da0a92 100644
--- a/libc/bionic/libc_init_static.cpp
+++ b/libc/bionic/libc_init_static.cpp
@@ -289,7 +289,11 @@
 
   // We can't short-circuit the environment override, as `stack` is still inherited from the
   // binary's settings.
-  get_environment_memtag_setting(&level);
+  if (get_environment_memtag_setting(&level)) {
+    if (level == M_HEAP_TAGGING_LEVEL_NONE || level == M_HEAP_TAGGING_LEVEL_TBI) {
+      *stack = false;
+    }
+  }
   return level;
 }
 
@@ -325,14 +329,13 @@
   bool memtag_stack = false;
   HeapTaggingLevel level =
       __get_tagging_level(memtag_dynamic_entries, phdr_start, phdr_ct, load_bias, &memtag_stack);
-  // initial_memtag_stack is used by the linker (in linker.cpp) to communicate than any library
-  // linked by this executable enables memtag-stack.
-  // memtag_stack is also set for static executables if they request memtag stack via the note,
-  // in which case it will differ from initial_memtag_stack.
-  if (__libc_shared_globals()->initial_memtag_stack || memtag_stack) {
+  // This is used by the linker (in linker.cpp) to communicate than any library linked by this
+  // executable enables memtag-stack.
+  if (__libc_shared_globals()->initial_memtag_stack) {
+    if (!memtag_stack) {
+      async_safe_format_log(ANDROID_LOG_INFO, "libc", "enabling PROT_MTE as requested by linker");
+    }
     memtag_stack = true;
-    __libc_shared_globals()->initial_memtag_stack_abi = true;
-    __get_bionic_tcb()->tls_slot(TLS_SLOT_STACK_MTE) = __allocate_stack_mte_ringbuffer(0, nullptr);
   }
   if (int64_t timed_upgrade = __get_memtag_upgrade_secs()) {
     if (level == M_HEAP_TAGGING_LEVEL_ASYNC) {
diff --git a/libc/bionic/pthread_create.cpp b/libc/bionic/pthread_create.cpp
index a8d09eb..5bd4f16 100644
--- a/libc/bionic/pthread_create.cpp
+++ b/libc/bionic/pthread_create.cpp
@@ -65,7 +65,6 @@
 }
 
 void __init_bionic_tls_ptrs(bionic_tcb* tcb, bionic_tls* tls) {
-  tcb->thread()->bionic_tcb = tcb;
   tcb->thread()->bionic_tls = tls;
   tcb->tls_slot(TLS_SLOT_BIONIC_TLS) = tls;
 }
@@ -444,14 +443,6 @@
 
   ScopedReadLock locker(&g_thread_creation_lock);
 
-// This has to be done under g_thread_creation_lock or g_thread_list_lock to avoid racing with
-// __pthread_internal_remap_stack_with_mte.
-#ifdef __aarch64__
-  if (__libc_memtag_stack_abi) {
-    tcb->tls_slot(TLS_SLOT_STACK_MTE) = __allocate_stack_mte_ringbuffer(0, thread);
-  }
-#endif
-
   sigset64_t block_all_mask;
   sigfillset64(&block_all_mask);
   __rt_sigprocmask(SIG_SETMASK, &block_all_mask, &thread->start_mask, sizeof(thread->start_mask));
diff --git a/libc/bionic/pthread_internal.cpp b/libc/bionic/pthread_internal.cpp
index e8a8ba2..8b9573f 100644
--- a/libc/bionic/pthread_internal.cpp
+++ b/libc/bionic/pthread_internal.cpp
@@ -33,12 +33,10 @@
 #include <stdlib.h>
 #include <string.h>
 #include <sys/mman.h>
-#include <sys/prctl.h>
 
 #include <async_safe/log.h>
 #include <bionic/reserved_signals.h>
 
-#include "bionic/tls_defines.h"
 #include "private/ErrnoRestorer.h"
 #include "private/ScopedRWLock.h"
 #include "private/bionic_futex.h"
@@ -73,21 +71,8 @@
     g_thread_list = thread->next;
   }
 }
-// N.B. that this is NOT the pagesize, but 4096. This is hardcoded in the codegen.
-// See
-// https://github.com/search?q=repo%3Allvm/llvm-project%20AArch64StackTagging%3A%3AinsertBaseTaggedPointer&type=code
-constexpr size_t kStackMteRingbufferSizeMultiplier = 4096;
 
 static void __pthread_internal_free(pthread_internal_t* thread) {
-#ifdef __aarch64__
-  if (void* stack_mte_tls = thread->bionic_tcb->tls_slot(TLS_SLOT_STACK_MTE)) {
-    size_t size =
-        kStackMteRingbufferSizeMultiplier * (reinterpret_cast<uintptr_t>(stack_mte_tls) >> 56ULL);
-    void* ptr = reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(stack_mte_tls) &
-                                        ((1ULL << 56ULL) - 1ULL));
-    munmap(ptr, size);
-  }
-#endif
   if (thread->mmap_size != 0) {
     // Free mapped space, including thread stack and pthread_internal_t.
     munmap(thread->mmap_base, thread->mmap_size);
@@ -191,70 +176,12 @@
   async_safe_fatal("stack not found in /proc/self/maps");
 }
 
-__LIBC_HIDDEN__ void* __allocate_stack_mte_ringbuffer(size_t n, pthread_internal_t* thread) {
-  if (n > 7) async_safe_fatal("error: invalid mte stack ring buffer size");
-  // Allocation needs to be aligned to 2*size to make the fancy code-gen work.
-  // So we allocate 3*size - pagesz bytes, which will always contain size bytes
-  // aligned to 2*size, and unmap the unneeded part.
-  // See
-  // https://github.com/search?q=repo%3Allvm/llvm-project%20AArch64StackTagging%3A%3AinsertBaseTaggedPointer&type=code
-  //
-  // In the worst case, we get an allocation that is one page past the properly
-  // aligned address, in which case we have to unmap the previous
-  // 2*size - pagesz bytes. In that case, we still have size properly aligned
-  // bytes left.
-  size_t size = (1 << n) * kStackMteRingbufferSizeMultiplier;
-  size_t pgsize = page_size();
-
-  size_t alloc_size = __BIONIC_ALIGN(3 * size - pgsize, pgsize);
-  void* allocation_ptr =
-      mmap(nullptr, alloc_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-  if (allocation_ptr == MAP_FAILED)
-    async_safe_fatal("error: failed to allocate stack mte ring buffer");
-  uintptr_t allocation = reinterpret_cast<uintptr_t>(allocation_ptr);
-
-  size_t alignment = 2 * size;
-  uintptr_t aligned_allocation = __BIONIC_ALIGN(allocation, alignment);
-  if (allocation != aligned_allocation) {
-    munmap(reinterpret_cast<void*>(allocation), aligned_allocation - allocation);
-  }
-  if (aligned_allocation + size != allocation + alloc_size) {
-    munmap(reinterpret_cast<void*>(aligned_allocation + size),
-           (allocation + alloc_size) - (aligned_allocation + size));
-  }
-
-  const char* name;
-  if (thread == nullptr) {
-    name = "stack_mte_ring:main";
-  } else {
-    // The kernel doesn't copy the name string, but this variable will last at least as long as the
-    // mapped area. We unmap the ring buffer before unmapping the rest of the thread storage.
-    auto& name_buffer = thread->stack_mte_ringbuffer_vma_name_buffer;
-    static_assert(arraysize(name_buffer) >= arraysize("stack_mte_ring:") + 11 + 1);
-    async_safe_format_buffer(name_buffer, arraysize(name_buffer), "stack_mte_ring:%d", thread->tid);
-    name = name_buffer;
-  }
-  prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, reinterpret_cast<void*>(aligned_allocation), size, name);
-
-  // We store the size in the top byte of the pointer (which is ignored)
-  return reinterpret_cast<void*>(aligned_allocation | ((1ULL << n) << 56ULL));
-}
-
 bool __pthread_internal_remap_stack_with_mte() {
 #if defined(__aarch64__)
-  ScopedWriteLock creation_locker(&g_thread_creation_lock);
-  ScopedReadLock list_locker(&g_thread_list_lock);
-  // If process already uses memtag-stack ABI, we don't need to do anything.
-  if (__libc_memtag_stack_abi) return false;
-  __libc_memtag_stack_abi = true;
-
-  for (pthread_internal_t* t = g_thread_list; t != nullptr; t = t->next) {
-    if (t->terminating) continue;
-    t->bionic_tcb->tls_slot(TLS_SLOT_STACK_MTE) =
-        __allocate_stack_mte_ringbuffer(0, t->is_main() ? nullptr : t);
-  }
+  // If process doesn't have MTE enabled, we don't need to do anything.
   if (!atomic_load(&__libc_globals->memtag)) return false;
-  if (atomic_exchange(&__libc_memtag_stack, true)) return false;
+  bool prev = atomic_exchange(&__libc_memtag_stack, true);
+  if (prev) return false;
   uintptr_t lo, hi;
   __find_main_stack_limits(&lo, &hi);
 
@@ -262,6 +189,8 @@
                PROT_READ | PROT_WRITE | PROT_MTE | PROT_GROWSDOWN)) {
     async_safe_fatal("error: failed to set PROT_MTE on main thread");
   }
+  ScopedWriteLock creation_locker(&g_thread_creation_lock);
+  ScopedReadLock list_locker(&g_thread_list_lock);
   for (pthread_internal_t* t = g_thread_list; t != nullptr; t = t->next) {
     if (t->terminating || t->is_main()) continue;
     if (mprotect(t->mmap_base_unguarded, t->mmap_size_unguarded,
diff --git a/libc/bionic/pthread_internal.h b/libc/bionic/pthread_internal.h
index b270a06..b0e9461 100644
--- a/libc/bionic/pthread_internal.h
+++ b/libc/bionic/pthread_internal.h
@@ -178,10 +178,6 @@
   bionic_tls* bionic_tls;
 
   int errno_value;
-
-  bionic_tcb* bionic_tcb;
-  char stack_mte_ringbuffer_vma_name_buffer[32];
-
   bool is_main() { return start_routine == nullptr; }
 };
 
@@ -213,7 +209,6 @@
 __LIBC_HIDDEN__ void __pthread_internal_remove(pthread_internal_t* thread);
 __LIBC_HIDDEN__ void __pthread_internal_remove_and_free(pthread_internal_t* thread);
 __LIBC_HIDDEN__ void __find_main_stack_limits(uintptr_t* low, uintptr_t* high);
-__LIBC_HIDDEN__ void* __allocate_stack_mte_ringbuffer(size_t n, pthread_internal_t* thread);
 
 static inline __always_inline bionic_tcb* __get_bionic_tcb() {
   return reinterpret_cast<bionic_tcb*>(&__get_tls()[MIN_TLS_SLOT]);
diff --git a/libc/platform/bionic/tls_defines.h b/libc/platform/bionic/tls_defines.h
index 06c6617..8fe8701 100644
--- a/libc/platform/bionic/tls_defines.h
+++ b/libc/platform/bionic/tls_defines.h
@@ -85,8 +85,7 @@
 // [1] "Addenda to, and Errata in, the ABI for the ARM Architecture". Section 3.
 // http://infocenter.arm.com/help/topic/com.arm.doc.ihi0045e/IHI0045E_ABI_addenda.pdf
 
-#define MIN_TLS_SLOT (-3)  // update this value when reserving a slot
-#define TLS_SLOT_STACK_MTE (-3)
+#define MIN_TLS_SLOT (-2)  // update this value when reserving a slot
 #define TLS_SLOT_NATIVE_BRIDGE_GUEST_STATE (-2)
 #define TLS_SLOT_BIONIC_TLS     (-1)
 #define TLS_SLOT_DTV              0
diff --git a/libc/private/bionic_globals.h b/libc/private/bionic_globals.h
index a1bebda..0949056 100644
--- a/libc/private/bionic_globals.h
+++ b/libc/private/bionic_globals.h
@@ -76,23 +76,10 @@
 };
 
 __LIBC_HIDDEN__ extern WriteProtected<libc_globals> __libc_globals;
-// These cannot be in __libc_globals, because we cannot access the
+// This cannot be in __libc_globals, because we cannot access the
 // WriteProtected in a thread-safe way.
 // See b/328256432.
-//
-// __libc_memtag_stack says whether stack MTE is enabled on the process, i.e.
-// whether the stack pages are mapped with PROT_MTE. This is always false if
-// MTE is disabled for the process (i.e. libc_globals.memtag is false).
 __LIBC_HIDDEN__ extern _Atomic(bool) __libc_memtag_stack;
-// __libc_memtag_stack_abi says whether the process contains any code that was
-// compiled with memtag-stack. This is true even if the process does not have
-// MTE enabled (e.g. because it was overridden using MEMTAG_OPTIONS, or because
-// MTE is disabled for the device).
-// Code compiled with memtag-stack needs a stack history buffer in
-// TLS_SLOT_STACK_MTE, because the codegen will emit an unconditional
-// (to keep the code branchless) write to it.
-// Protected by g_heap_creation_lock.
-__LIBC_HIDDEN__ extern bool __libc_memtag_stack_abi;
 
 struct abort_msg_t;
 struct crash_detail_page_t;
@@ -146,9 +133,7 @@
   size_t scudo_stack_depot_size = 0;
 
   HeapTaggingLevel initial_heap_tagging_level = M_HEAP_TAGGING_LEVEL_NONE;
-  // See comments for __libc_memtag_stack / __libc_memtag_stack_abi above.
   bool initial_memtag_stack = false;
-  bool initial_memtag_stack_abi = false;
   int64_t heap_tagging_upgrade_timer_sec = 0;
 
   void (*memtag_stack_dlopen_callback)() = nullptr;
diff --git a/linker/Android.bp b/linker/Android.bp
index 143dbd5..da57f7a 100644
--- a/linker/Android.bp
+++ b/linker/Android.bp
@@ -360,7 +360,6 @@
 
     sanitize: {
         hwaddress: false,
-        memtag_stack: false,
     },
 
     static_libs: [
diff --git a/tests/libs/testbinary_is_stack_mte.cpp b/tests/libs/testbinary_is_stack_mte.cpp
index 0cdc466..d8074d5 100644
--- a/tests/libs/testbinary_is_stack_mte.cpp
+++ b/tests/libs/testbinary_is_stack_mte.cpp
@@ -36,9 +36,7 @@
 #if defined(__BIONIC__) && defined(__aarch64__)
 
 extern "C" int main(int, char**) {
-  void* mte_tls_ptr = mte_tls();
-  *reinterpret_cast<uintptr_t*>(mte_tls_ptr) = 1;
-  int ret = is_stack_mte_on() && mte_tls_ptr != nullptr ? 0 : 1;
+  int ret = is_stack_mte_on() ? 0 : 1;
   printf("RAN\n");
   return ret;
 }
diff --git a/tests/libs/testbinary_is_stack_mte_after_dlopen.cpp b/tests/libs/testbinary_is_stack_mte_after_dlopen.cpp
index 35af8f4..937ac4c 100644
--- a/tests/libs/testbinary_is_stack_mte_after_dlopen.cpp
+++ b/tests/libs/testbinary_is_stack_mte_after_dlopen.cpp
@@ -96,7 +96,6 @@
   State state = kInit;
 
   bool is_early_thread_mte_on = false;
-  void* early_thread_mte_tls = nullptr;
   std::thread early_th([&] {
     {
       std::lock_guard lk(m);
@@ -108,8 +107,6 @@
       cv.wait(lk, [&] { return state == kStackRemapped; });
     }
     is_early_thread_mte_on = is_stack_mte_on();
-    early_thread_mte_tls = mte_tls();
-    *reinterpret_cast<uintptr_t*>(early_thread_mte_tls) = 1;
   });
   {
     std::unique_lock lk(m);
@@ -123,7 +120,6 @@
   cv.notify_one();
   CHECK(handle != nullptr);
   CHECK(is_stack_mte_on());
-  CHECK(mte_tls() != nullptr);
 
   bool new_stack_page_mte_on = false;
   uintptr_t low;
@@ -133,18 +129,11 @@
   CHECK(new_stack_page_mte_on);
 
   bool is_late_thread_mte_on = false;
-  void* late_thread_mte_tls = nullptr;
-  std::thread late_th([&] {
-    is_late_thread_mte_on = is_stack_mte_on();
-    late_thread_mte_tls = mte_tls();
-    *reinterpret_cast<uintptr_t*>(late_thread_mte_tls) = 1;
-  });
+  std::thread late_th([&] { is_late_thread_mte_on = is_stack_mte_on(); });
   late_th.join();
   early_th.join();
   CHECK(is_late_thread_mte_on);
   CHECK(is_early_thread_mte_on);
-  CHECK(late_thread_mte_tls != nullptr);
-  CHECK(early_thread_mte_tls != nullptr);
   printf("RAN\n");
   return 0;
 }
diff --git a/tests/mte_utils.h b/tests/mte_utils.h
index 020faec..6e8385c 100644
--- a/tests/mte_utils.h
+++ b/tests/mte_utils.h
@@ -40,10 +40,4 @@
   return p == p_cpy;
 }
 
-static void* mte_tls() {
-  void** dst;
-  __asm__("mrs %0, TPIDR_EL0" : "=r"(dst) :);
-  return dst[-3];
-}
-
 #endif
diff --git a/tests/struct_layout_test.cpp b/tests/struct_layout_test.cpp
index 1f04344..0123ed9 100644
--- a/tests/struct_layout_test.cpp
+++ b/tests/struct_layout_test.cpp
@@ -30,7 +30,7 @@
 #define CHECK_OFFSET(name, field, offset) \
     check_offset(#name, #field, offsetof(name, field), offset);
 #ifdef __LP64__
-  CHECK_SIZE(pthread_internal_t, 816);
+  CHECK_SIZE(pthread_internal_t, 776);
   CHECK_OFFSET(pthread_internal_t, next, 0);
   CHECK_OFFSET(pthread_internal_t, prev, 8);
   CHECK_OFFSET(pthread_internal_t, tid, 16);
@@ -55,8 +55,6 @@
   CHECK_OFFSET(pthread_internal_t, dlerror_buffer, 248);
   CHECK_OFFSET(pthread_internal_t, bionic_tls, 760);
   CHECK_OFFSET(pthread_internal_t, errno_value, 768);
-  CHECK_OFFSET(pthread_internal_t, bionic_tcb, 776);
-  CHECK_OFFSET(pthread_internal_t, stack_mte_ringbuffer_vma_name_buffer, 784);
   CHECK_SIZE(bionic_tls, 12200);
   CHECK_OFFSET(bionic_tls, key_data, 0);
   CHECK_OFFSET(bionic_tls, locale, 2080);
@@ -74,7 +72,7 @@
   CHECK_OFFSET(bionic_tls, bionic_systrace_disabled, 12193);
   CHECK_OFFSET(bionic_tls, padding, 12194);
 #else
-  CHECK_SIZE(pthread_internal_t, 704);
+  CHECK_SIZE(pthread_internal_t, 668);
   CHECK_OFFSET(pthread_internal_t, next, 0);
   CHECK_OFFSET(pthread_internal_t, prev, 4);
   CHECK_OFFSET(pthread_internal_t, tid, 8);
@@ -99,8 +97,6 @@
   CHECK_OFFSET(pthread_internal_t, dlerror_buffer, 148);
   CHECK_OFFSET(pthread_internal_t, bionic_tls, 660);
   CHECK_OFFSET(pthread_internal_t, errno_value, 664);
-  CHECK_OFFSET(pthread_internal_t, bionic_tcb, 668);
-  CHECK_OFFSET(pthread_internal_t, stack_mte_ringbuffer_vma_name_buffer, 672);
   CHECK_SIZE(bionic_tls, 11080);
   CHECK_OFFSET(bionic_tls, key_data, 0);
   CHECK_OFFSET(bionic_tls, locale, 1040);