Reland^2 "[MTE] remap stacks with PROT_MTE when requested by dlopened library"

Also enable stack MTE if main binary links in a library that needs it.

Otherwise the following is possible:

1. a binary doesn't require stack MTE, but links in libraries that use
   stg on the stack
2. that binary later dlopens a library that requires stack MTE, and our
   logic in dlopen remaps the stacks with MTE
3. the libraries from step 1 now have tagged pointers with missing tags
   in memory, so things go wrong

This reverts commit f53e91cc810be2a36377f3b7765f50c89f1f0046.

Reason for revert: Fixed problem detected in b/324568991

Test: atest memtag_stack_dlopen_test with MTE enabled
Test: check crash is gone on fullmte build
Change-Id: I4a93f6814a19683c3ea5fe1e6d455df5459d31e1
diff --git a/libc/bionic/pthread_internal.h b/libc/bionic/pthread_internal.h
index 3b9e6a4..091f711 100644
--- a/libc/bionic/pthread_internal.h
+++ b/libc/bionic/pthread_internal.h
@@ -178,6 +178,7 @@
   bionic_tls* bionic_tls;
 
   int errno_value;
+  bool is_main() { return start_routine == nullptr; }
 };
 
 struct ThreadMapping {
@@ -207,6 +208,7 @@
 __LIBC_HIDDEN__ pid_t __pthread_internal_gettid(pthread_t pthread_id, const char* caller);
 __LIBC_HIDDEN__ void __pthread_internal_remove(pthread_internal_t* thread);
 __LIBC_HIDDEN__ void __pthread_internal_remove_and_free(pthread_internal_t* thread);
+__LIBC_HIDDEN__ void __find_main_stack_limits(uintptr_t* low, uintptr_t* high);
 
 static inline __always_inline bionic_tcb* __get_bionic_tcb() {
   return reinterpret_cast<bionic_tcb*>(&__get_tls()[MIN_TLS_SLOT]);
@@ -266,6 +268,9 @@
 __LIBC_HIDDEN__ extern void __bionic_atfork_run_child();
 __LIBC_HIDDEN__ extern void __bionic_atfork_run_parent();
 
+// Re-map all threads and successively launched threads with PROT_MTE.
+__LIBC_HIDDEN__ void __pthread_internal_remap_stack_with_mte();
+
 extern "C" bool android_run_on_all_threads(bool (*func)(void*), void* arg);
 
 extern pthread_rwlock_t g_thread_creation_lock;