[MTE] allocate ring buffer for stack history

Test: atest memtag_stack_dlopen_test
Bug: 309446520
Change-Id: Ibf477bcfb832c5eba0244e86cdac5517f054eb49
diff --git a/libc/bionic/pthread_create.cpp b/libc/bionic/pthread_create.cpp
index 5bd4f16..a8d09eb 100644
--- a/libc/bionic/pthread_create.cpp
+++ b/libc/bionic/pthread_create.cpp
@@ -65,6 +65,7 @@
 }
 
 void __init_bionic_tls_ptrs(bionic_tcb* tcb, bionic_tls* tls) {
+  tcb->thread()->bionic_tcb = tcb;
   tcb->thread()->bionic_tls = tls;
   tcb->tls_slot(TLS_SLOT_BIONIC_TLS) = tls;
 }
@@ -443,6 +444,14 @@
 
   ScopedReadLock locker(&g_thread_creation_lock);
 
+// This has to be done under g_thread_creation_lock or g_thread_list_lock to avoid racing with
+// __pthread_internal_remap_stack_with_mte.
+#ifdef __aarch64__
+  if (__libc_memtag_stack_abi) {
+    tcb->tls_slot(TLS_SLOT_STACK_MTE) = __allocate_stack_mte_ringbuffer(0, thread);
+  }
+#endif
+
   sigset64_t block_all_mask;
   sigfillset64(&block_all_mask);
   __rt_sigprocmask(SIG_SETMASK, &block_all_mask, &thread->start_mask, sizeof(thread->start_mask));