Reland^2 "[MTE] remap stacks with PROT_MTE when requested by dlopened library"

Also enable stack MTE if main binary links in a library that needs it.

Otherwise the following is possible:

1. a binary doesn't require stack MTE, but links in libraries that use
   stg on the stack
2. that binary later dlopens a library that requires stack MTE, and our
   logic in dlopen remaps the stacks with MTE
3. the libraries from step 1 now have tagged pointers with missing tags
   in memory, so things go wrong

This reverts commit f53e91cc810be2a36377f3b7765f50c89f1f0046.

Reason for revert: Fixed problem detected in b/324568991

Test: atest memtag_stack_dlopen_test with MTE enabled
Test: check crash is gone on fullmte build
Change-Id: I4a93f6814a19683c3ea5fe1e6d455df5459d31e1
diff --git a/libc/bionic/libc_init_dynamic.cpp b/libc/bionic/libc_init_dynamic.cpp
index c61810e..1180a51 100644
--- a/libc/bionic/libc_init_dynamic.cpp
+++ b/libc/bionic/libc_init_dynamic.cpp
@@ -39,11 +39,12 @@
  *   all dynamic linking has been performed.
  */
 
+#include <elf.h>
 #include <stddef.h>
+#include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <stdint.h>
-#include <elf.h>
+#include "bionic/pthread_internal.h"
 #include "libc_init_common.h"
 
 #include "private/bionic_defs.h"
@@ -59,6 +60,11 @@
   extern int __cxa_atexit(void (*)(void *), void *, void *);
 };
 
+void memtag_stack_dlopen_callback() {
+  async_safe_format_log(ANDROID_LOG_INFO, "libc", "remapping stacks as PROT_MTE");
+  __pthread_internal_remap_stack_with_mte();
+}
+
 // Use an initializer so __libc_sysinfo will have a fallback implementation
 // while .preinit_array constructors run.
 #if defined(__i386__)
@@ -156,6 +162,10 @@
 
   __libc_init_mte_late();
 
+  // This roundabout way is needed so we don't use the static libc linked into the linker, which
+  // will not affect the process.
+  __libc_shared_globals()->memtag_stack_dlopen_callback = memtag_stack_dlopen_callback;
+
   exit(slingshot(args.argc - __libc_shared_globals()->initial_linker_arg_count,
                  args.argv + __libc_shared_globals()->initial_linker_arg_count,
                  args.envp));