Merge changes from topic "loader_crt_pad_segment" into main

* changes:
  bionic: loader: Extend GNU_RELRO protection
  bionic: loader: Extend LOAD segment VMAs
diff --git a/benchmarks/atomic_benchmark.cpp b/benchmarks/atomic_benchmark.cpp
index 487b71c..e3a6fb2 100644
--- a/benchmarks/atomic_benchmark.cpp
+++ b/benchmarks/atomic_benchmark.cpp
@@ -37,11 +37,9 @@
 // We assume that the compiler is not smart enough to optimize away fences in a single-threaded
 // program. If that changes, we'll need to add a second thread.
 
-// We're going to use `++` on this volatile in all the tests. This is
-// fine, because we're only using `volatile` in the "don't optimize this out"
-// sense, and don't care whether the increment is atomic or not.
-#pragma clang diagnostic ignored "-Wdeprecated-volatile"
+// We increment the counter this way to avoid -Wdeprecated-volatile warnings.
 static volatile unsigned counter;
+#define INC_COUNTER() counter = counter + 1
 
 std::atomic<int> test_loc(0);
 
@@ -51,7 +49,7 @@
 
 void BM_atomic_empty(benchmark::State& state) {
   while (state.KeepRunning()) {
-    ++counter;
+    INC_COUNTER();
   }
 }
 BIONIC_BENCHMARK(BM_atomic_empty);
@@ -60,7 +58,7 @@
   unsigned result = 0;
   while (state.KeepRunning()) {
     result += test_loc.load(std::memory_order_relaxed);
-    ++counter;
+    INC_COUNTER();
   }
   sink = result;
 }
@@ -70,7 +68,7 @@
   unsigned result = 0;
   while (state.KeepRunning()) {
     result += test_loc.load(std::memory_order_acquire);
-    ++counter;
+    INC_COUNTER();
   }
   sink = result;
 }
@@ -80,7 +78,7 @@
   int i = counter;
   while (state.KeepRunning()) {
     test_loc.store(++i, std::memory_order_release);
-    ++counter;
+    INC_COUNTER();
   }
 }
 BIONIC_BENCHMARK(BM_atomic_store_release);
@@ -89,7 +87,7 @@
   int i = counter;
   while (state.KeepRunning()) {
     test_loc.store(++i, std::memory_order_seq_cst);
-    ++counter;
+    INC_COUNTER();
   }
 }
 BIONIC_BENCHMARK(BM_atomic_store_seq_cst);
@@ -98,7 +96,7 @@
   unsigned result = 0;
   while (state.KeepRunning()) {
     result += test_loc.fetch_add(1, std::memory_order_relaxed);
-    ++counter;
+    INC_COUNTER();
   }
   sink = result;
 }
@@ -108,7 +106,7 @@
   unsigned result = 0;
   while (state.KeepRunning()) {
     result += test_loc.fetch_add(1, std::memory_order_seq_cst);
-    ++counter;
+    INC_COUNTER();
   }
   sink = result;
 }
@@ -122,7 +120,7 @@
   while (state.KeepRunning()) {
     result += test_loc.load(std::memory_order_relaxed);
     std::atomic_thread_fence(std::memory_order_acquire);
-    ++counter;
+    INC_COUNTER();
   }
   sink = result;
 }
@@ -133,7 +131,7 @@
   while (state.KeepRunning()) {
     result += test_loc.load(std::memory_order_relaxed);
     std::atomic_thread_fence(std::memory_order_seq_cst);
-    ++counter;
+    INC_COUNTER();
   }
   sink = result;
 }
@@ -146,7 +144,8 @@
   while (state.KeepRunning()) {
     {
       std::lock_guard<std::mutex> _(mtx);
-      result += ++counter;
+      INC_COUNTER();
+      result += counter;
     }
   }
   sink = result;
diff --git a/libc/Android.bp b/libc/Android.bp
index 807073a..c7d2923 100644
--- a/libc/Android.bp
+++ b/libc/Android.bp
@@ -833,12 +833,117 @@
 cc_library_static {
     defaults: ["libc_defaults"],
     srcs: [
+        "bionic/NetdClientDispatch.cpp",
+        "bionic/__bionic_get_shell_path.cpp",
+        "bionic/__cmsg_nxthdr.cpp",
+        "bionic/__cxa_thread_atexit_impl.cpp",
+        "bionic/__errno.cpp",
+        "bionic/__gnu_basename.cpp",
+        "bionic/__libc_current_sigrtmax.cpp",
+        "bionic/__libc_current_sigrtmin.cpp",
+        "bionic/abort.cpp",
+        "bionic/accept.cpp",
+        "bionic/access.cpp",
         "bionic/android_set_abort_message.cpp",
         "bionic/android_unsafe_frame_pointer_chase.cpp",
+        "bionic/arpa_inet.cpp",
+        "bionic/assert.cpp",
         "bionic/atexit.cpp",
+        "bionic/atof.cpp",
+        "bionic/bionic_allocator.cpp",
+        "bionic/bionic_arc4random.cpp",
         "bionic/bionic_elf_tls.cpp",
-        "bionic/__cxa_thread_atexit_impl.cpp",
+        "bionic/bionic_futex.cpp",
+        "bionic/bionic_netlink.cpp",
+        "bionic/bionic_systrace.cpp",
+        "bionic/bionic_time_conversions.cpp",
+        "bionic/brk.cpp",
+        "bionic/c16rtomb.cpp",
+        "bionic/c32rtomb.cpp",
+        "bionic/chmod.cpp",
+        "bionic/chown.cpp",
+        "bionic/clearenv.cpp",
+        "bionic/clock.cpp",
+        "bionic/clock_getcpuclockid.cpp",
+        "bionic/clock_nanosleep.cpp",
+        "bionic/clone.cpp",
+        "bionic/ctype.cpp",
+        "bionic/dirent.cpp",
+        "bionic/dup.cpp",
+        "bionic/environ.cpp",
+        "bionic/error.cpp",
+        "bionic/eventfd.cpp",
+        "bionic/exec.cpp",
+        "bionic/execinfo.cpp",
+        "bionic/faccessat.cpp",
+        "bionic/fchmod.cpp",
+        "bionic/fchmodat.cpp",
+        "bionic/fcntl.cpp",
+        "bionic/fdsan.cpp",
+        "bionic/fdtrack.cpp",
+        "bionic/ffs.cpp",
+        "bionic/fgetxattr.cpp",
+        "bionic/flistxattr.cpp",
         "bionic/fork.cpp",
+        "bionic/fpclassify.cpp",
+        "bionic/fsetxattr.cpp",
+        "bionic/ftruncate.cpp",
+        "bionic/ftw.cpp",
+        "bionic/futimens.cpp",
+        "bionic/getcwd.cpp",
+        "bionic/getdomainname.cpp",
+        "bionic/getentropy.cpp",
+        "bionic/gethostname.cpp",
+        "bionic/getloadavg.cpp",
+        "bionic/getpagesize.cpp",
+        "bionic/getpgrp.cpp",
+        "bionic/getpid.cpp",
+        "bionic/getpriority.cpp",
+        "bionic/gettid.cpp",
+        "bionic/get_device_api_level.cpp",
+        "bionic/grp_pwd.cpp",
+        "bionic/grp_pwd_file.cpp",
+        "bionic/heap_zero_init.cpp",
+        "bionic/iconv.cpp",
+        "bionic/icu_wrappers.cpp",
+        "bionic/ifaddrs.cpp",
+        "bionic/inotify_init.cpp",
+        "bionic/ioctl.cpp",
+        "bionic/killpg.cpp",
+        "bionic/langinfo.cpp",
+        "bionic/lchown.cpp",
+        "bionic/lfs64_support.cpp",
+        "bionic/libc_init_common.cpp",
+        "bionic/libgen.cpp",
+        "bionic/link.cpp",
+        "bionic/locale.cpp",
+        "bionic/lockf.cpp",
+        "bionic/lstat.cpp",
+        "bionic/mblen.cpp",
+        "bionic/mbrtoc16.cpp",
+        "bionic/mbrtoc32.cpp",
+        "bionic/mempcpy.cpp",
+        "bionic/memset_explicit.cpp",
+        "bionic/mkdir.cpp",
+        "bionic/mkfifo.cpp",
+        "bionic/mknod.cpp",
+        "bionic/mntent.cpp",
+        "bionic/mremap.cpp",
+        "bionic/net_if.cpp",
+        "bionic/netdb.cpp",
+        "bionic/netinet_in.cpp",
+        "bionic/nl_types.cpp",
+        "bionic/open.cpp",
+        "bionic/pathconf.cpp",
+        "bionic/pause.cpp",
+        "bionic/pidfd.cpp",
+        "bionic/pipe.cpp",
+        "bionic/poll.cpp",
+        "bionic/posix_fadvise.cpp",
+        "bionic/posix_fallocate.cpp",
+        "bionic/posix_madvise.cpp",
+        "bionic/posix_timers.cpp",
+        "bionic/preadv_pwritev.cpp",
         "bionic/pthread_atfork.cpp",
         "bionic/pthread_attr.cpp",
         "bionic/pthread_barrier.cpp",
@@ -862,10 +967,82 @@
         "bionic/pthread_setname_np.cpp",
         "bionic/pthread_setschedparam.cpp",
         "bionic/pthread_spinlock.cpp",
+        "bionic/ptrace.cpp",
+        "bionic/pty.cpp",
+        "bionic/raise.cpp",
+        "bionic/rand.cpp",
+        "bionic/readlink.cpp",
+        "bionic/realpath.cpp",
+        "bionic/reboot.cpp",
+        "bionic/recv.cpp",
+        "bionic/recvmsg.cpp",
+        "bionic/rename.cpp",
+        "bionic/rmdir.cpp",
+        "bionic/scandir.cpp",
+        "bionic/sched_getaffinity.cpp",
+        "bionic/sched_getcpu.cpp",
+        "bionic/semaphore.cpp",
+        "bionic/send.cpp",
+        "bionic/setegid.cpp",
+        "bionic/seteuid.cpp",
         "bionic/setjmp_cookie.cpp",
+        "bionic/setpgrp.cpp",
+        "bionic/sigaction.cpp",
+        "bionic/signal.cpp",
+        "bionic/sigprocmask.cpp",
+        "bionic/sleep.cpp",
+        "bionic/socketpair.cpp",
+        "bionic/spawn.cpp",
+        "bionic/stat.cpp",
+        "bionic/stdlib_l.cpp",
+        "bionic/strerror.cpp",
+        "bionic/string_l.cpp",
+        "bionic/strings_l.cpp",
+        "bionic/strsignal.cpp",
+        "bionic/strtol.cpp",
+        "bionic/strtold.cpp",
+        "bionic/swab.cpp",
+        "bionic/symlink.cpp",
+        "bionic/sync_file_range.cpp",
         "bionic/sysconf.cpp",
+        "bionic/sys_epoll.cpp",
+        "bionic/sys_msg.cpp",
+        "bionic/sys_sem.cpp",
+        "bionic/sys_shm.cpp",
+        "bionic/sys_signalfd.cpp",
+        "bionic/sys_statfs.cpp",
+        "bionic/sys_statvfs.cpp",
         "bionic/sys_thread_properties.cpp",
+        "bionic/sys_time.cpp",
+        "bionic/sysinfo.cpp",
+        "bionic/syslog.cpp",
+        "bionic/system.cpp",
+        "bionic/system_property_api.cpp",
+        "bionic/system_property_set.cpp",
+        "bionic/tdestroy.cpp",
+        "bionic/termios.cpp",
+        "bionic/thread_private.cpp",
+        "bionic/threads.cpp",
+        "bionic/time.cpp",
+        "bionic/time_l.cpp",
+        "bionic/tmpfile.cpp",
+        "bionic/umount.cpp",
+        "bionic/unlink.cpp",
+        "bionic/usleep.cpp",
+        "bionic/utmp.cpp",
         "bionic/vdso.cpp",
+        "bionic/wait.cpp",
+        "bionic/wchar.cpp",
+        "bionic/wchar_l.cpp",
+        "bionic/wcstod.cpp",
+        "bionic/wctype.cpp",
+        "bionic/wcwidth.cpp",
+        "bionic/wmempcpy.cpp",
+
+        // TODO: why isn't this in a static-libc-only module?
+        // This contains a weak stub implementation of __find_icu_symbol for wctype.cpp,
+        // which will be overridden by the actual one in libc.so.
+        "bionic/icu_static.cpp",
     ],
 
     arch: {
@@ -1074,6 +1251,20 @@
         },
     },
 
+    // TODO: move to libc/bionic/legacy_32_bit_support.cpp or #if __LP64__ instead.
+    multilib: {
+        lib32: {
+            srcs: ["bionic/mmap.cpp"],
+        },
+    },
+
+    local_include_dirs: ["stdio"],
+    generated_headers: ["generated_android_ids"],
+
+    whole_static_libs: [
+        "libsystemproperties",
+    ],
+
     cppflags: ["-Wold-style-cast"],
     include_dirs: ["bionic/libstdc++/include"],
     name: "libc_bionic",
@@ -1088,208 +1279,6 @@
 }
 
 // ========================================================
-// libc_bionic_ndk.a- The portions of libc_bionic that can
-// be safely used in libc_ndk.a (no troublesome global data
-// or constructors).
-// ========================================================
-cc_library_static {
-    defaults: ["libc_defaults"],
-    srcs: [
-        "bionic/NetdClientDispatch.cpp",
-        "bionic/__bionic_get_shell_path.cpp",
-        "bionic/__cmsg_nxthdr.cpp",
-        "bionic/__errno.cpp",
-        "bionic/__gnu_basename.cpp",
-        "bionic/__libc_current_sigrtmax.cpp",
-        "bionic/__libc_current_sigrtmin.cpp",
-        "bionic/abort.cpp",
-        "bionic/accept.cpp",
-        "bionic/access.cpp",
-        "bionic/arpa_inet.cpp",
-        "bionic/assert.cpp",
-        "bionic/atof.cpp",
-        "bionic/bionic_allocator.cpp",
-        "bionic/bionic_arc4random.cpp",
-        "bionic/bionic_futex.cpp",
-        "bionic/bionic_netlink.cpp",
-        "bionic/bionic_systrace.cpp",
-        "bionic/bionic_time_conversions.cpp",
-        "bionic/brk.cpp",
-        "bionic/c16rtomb.cpp",
-        "bionic/c32rtomb.cpp",
-        "bionic/chmod.cpp",
-        "bionic/chown.cpp",
-        "bionic/clearenv.cpp",
-        "bionic/clock.cpp",
-        "bionic/clock_getcpuclockid.cpp",
-        "bionic/clock_nanosleep.cpp",
-        "bionic/clone.cpp",
-        "bionic/ctype.cpp",
-        "bionic/dirent.cpp",
-        "bionic/dup.cpp",
-        "bionic/environ.cpp",
-        "bionic/error.cpp",
-        "bionic/eventfd.cpp",
-        "bionic/exec.cpp",
-        "bionic/execinfo.cpp",
-        "bionic/faccessat.cpp",
-        "bionic/fchmod.cpp",
-        "bionic/fchmodat.cpp",
-        "bionic/fcntl.cpp",
-        "bionic/fdsan.cpp",
-        "bionic/fdtrack.cpp",
-        "bionic/ffs.cpp",
-        "bionic/fgetxattr.cpp",
-        "bionic/flistxattr.cpp",
-        "bionic/fpclassify.cpp",
-        "bionic/fsetxattr.cpp",
-        "bionic/ftruncate.cpp",
-        "bionic/ftw.cpp",
-        "bionic/futimens.cpp",
-        "bionic/getcwd.cpp",
-        "bionic/getdomainname.cpp",
-        "bionic/getentropy.cpp",
-        "bionic/gethostname.cpp",
-        "bionic/getloadavg.cpp",
-        "bionic/getpagesize.cpp",
-        "bionic/getpgrp.cpp",
-        "bionic/getpid.cpp",
-        "bionic/getpriority.cpp",
-        "bionic/gettid.cpp",
-        "bionic/get_device_api_level.cpp",
-        "bionic/grp_pwd.cpp",
-        "bionic/grp_pwd_file.cpp",
-        "bionic/heap_zero_init.cpp",
-        "bionic/iconv.cpp",
-        "bionic/icu_wrappers.cpp",
-        "bionic/ifaddrs.cpp",
-        "bionic/inotify_init.cpp",
-        "bionic/ioctl.cpp",
-        "bionic/killpg.cpp",
-        "bionic/langinfo.cpp",
-        "bionic/lchown.cpp",
-        "bionic/lfs64_support.cpp",
-        "bionic/libc_init_common.cpp",
-        "bionic/libgen.cpp",
-        "bionic/link.cpp",
-        "bionic/locale.cpp",
-        "bionic/lockf.cpp",
-        "bionic/lstat.cpp",
-        "bionic/mblen.cpp",
-        "bionic/mbrtoc16.cpp",
-        "bionic/mbrtoc32.cpp",
-        "bionic/mempcpy.cpp",
-        "bionic/memset_explicit.cpp",
-        "bionic/mkdir.cpp",
-        "bionic/mkfifo.cpp",
-        "bionic/mknod.cpp",
-        "bionic/mntent.cpp",
-        "bionic/mremap.cpp",
-        "bionic/net_if.cpp",
-        "bionic/netdb.cpp",
-        "bionic/netinet_in.cpp",
-        "bionic/nl_types.cpp",
-        "bionic/open.cpp",
-        "bionic/pathconf.cpp",
-        "bionic/pause.cpp",
-        "bionic/pidfd.cpp",
-        "bionic/pipe.cpp",
-        "bionic/poll.cpp",
-        "bionic/posix_fadvise.cpp",
-        "bionic/posix_fallocate.cpp",
-        "bionic/posix_madvise.cpp",
-        "bionic/posix_timers.cpp",
-        "bionic/preadv_pwritev.cpp",
-        "bionic/ptrace.cpp",
-        "bionic/pty.cpp",
-        "bionic/raise.cpp",
-        "bionic/rand.cpp",
-        "bionic/readlink.cpp",
-        "bionic/realpath.cpp",
-        "bionic/reboot.cpp",
-        "bionic/recv.cpp",
-        "bionic/recvmsg.cpp",
-        "bionic/rename.cpp",
-        "bionic/rmdir.cpp",
-        "bionic/scandir.cpp",
-        "bionic/sched_getaffinity.cpp",
-        "bionic/sched_getcpu.cpp",
-        "bionic/semaphore.cpp",
-        "bionic/send.cpp",
-        "bionic/setegid.cpp",
-        "bionic/seteuid.cpp",
-        "bionic/setpgrp.cpp",
-        "bionic/sigaction.cpp",
-        "bionic/signal.cpp",
-        "bionic/sigprocmask.cpp",
-        "bionic/sleep.cpp",
-        "bionic/socketpair.cpp",
-        "bionic/spawn.cpp",
-        "bionic/stat.cpp",
-        "bionic/stdlib_l.cpp",
-        "bionic/strerror.cpp",
-        "bionic/string_l.cpp",
-        "bionic/strings_l.cpp",
-        "bionic/strsignal.cpp",
-        "bionic/strtol.cpp",
-        "bionic/strtold.cpp",
-        "bionic/swab.cpp",
-        "bionic/symlink.cpp",
-        "bionic/sync_file_range.cpp",
-        "bionic/sys_epoll.cpp",
-        "bionic/sys_msg.cpp",
-        "bionic/sys_sem.cpp",
-        "bionic/sys_shm.cpp",
-        "bionic/sys_signalfd.cpp",
-        "bionic/sys_statfs.cpp",
-        "bionic/sys_statvfs.cpp",
-        "bionic/sys_time.cpp",
-        "bionic/sysinfo.cpp",
-        "bionic/syslog.cpp",
-        "bionic/system.cpp",
-        "bionic/system_property_api.cpp",
-        "bionic/system_property_set.cpp",
-        "bionic/tdestroy.cpp",
-        "bionic/termios.cpp",
-        "bionic/thread_private.cpp",
-        "bionic/threads.cpp",
-        "bionic/time.cpp",
-        "bionic/time_l.cpp",
-        "bionic/tmpfile.cpp",
-        "bionic/umount.cpp",
-        "bionic/unlink.cpp",
-        "bionic/usleep.cpp",
-        "bionic/utmp.cpp",
-        "bionic/wait.cpp",
-        "bionic/wchar.cpp",
-        "bionic/wchar_l.cpp",
-        "bionic/wcstod.cpp",
-        "bionic/wctype.cpp",
-        "bionic/wcwidth.cpp",
-        "bionic/wmempcpy.cpp",
-
-        // This contains a weak stub implementation of __find_icu_symbol for wctype.cpp,
-        // which will be overridden by the actual one in libc.so.
-        "bionic/icu_static.cpp",
-    ],
-
-    multilib: {
-        lib32: {
-            // LP32 cruft
-            srcs: ["bionic/mmap.cpp"],
-        },
-    },
-    whole_static_libs: [
-        "libsystemproperties",
-    ],
-    cppflags: ["-Wold-style-cast"],
-    local_include_dirs: ["stdio"],
-    include_dirs: ["bionic/libstdc++/include"],
-    name: "libc_bionic_ndk",
-    generated_headers: ["generated_android_ids"],
-}
-
-// ========================================================
 // libc_syscalls.a
 // ========================================================
 
@@ -1393,7 +1382,6 @@
         "libarm-optimized-routines-string",
         "libasync_safe",
         "libc_bionic",
-        "libc_bionic_ndk",
         "libc_bootstrap",
         "libc_dns",
         "libc_fortify",
diff --git a/libc/bionic/heap_tagging.cpp b/libc/bionic/heap_tagging.cpp
index 0c1e506..48ec955 100644
--- a/libc/bionic/heap_tagging.cpp
+++ b/libc/bionic/heap_tagging.cpp
@@ -57,7 +57,6 @@
         break;
       case M_HEAP_TAGGING_LEVEL_SYNC:
       case M_HEAP_TAGGING_LEVEL_ASYNC:
-        atomic_store(&globals->memtag, true);
         atomic_store(&globals->memtag_stack, __libc_shared_globals()->initial_memtag_stack);
         break;
       default:
@@ -114,7 +113,6 @@
           globals->heap_pointer_tag = static_cast<uintptr_t>(0xffull << UNTAG_SHIFT);
         }
         atomic_store(&globals->memtag_stack, false);
-        atomic_store(&globals->memtag, false);
       });
 
       if (heap_tagging_level != M_HEAP_TAGGING_LEVEL_TBI) {
diff --git a/libc/bionic/libc_init_dynamic.cpp b/libc/bionic/libc_init_dynamic.cpp
index 295484b..c61810e 100644
--- a/libc/bionic/libc_init_dynamic.cpp
+++ b/libc/bionic/libc_init_dynamic.cpp
@@ -39,12 +39,11 @@
  *   all dynamic linking has been performed.
  */
 
-#include <elf.h>
 #include <stddef.h>
-#include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include "bionic/pthread_internal.h"
+#include <stdint.h>
+#include <elf.h>
 #include "libc_init_common.h"
 
 #include "private/bionic_defs.h"
@@ -60,10 +59,6 @@
   extern int __cxa_atexit(void (*)(void *), void *, void *);
 };
 
-void memtag_stack_dlopen_callback() {
-  __pthread_internal_remap_stack_with_mte();
-}
-
 // Use an initializer so __libc_sysinfo will have a fallback implementation
 // while .preinit_array constructors run.
 #if defined(__i386__)
@@ -161,10 +156,6 @@
 
   __libc_init_mte_late();
 
-  // This roundabout way is needed so we don't use the static libc linked into the linker, which
-  // will not affect the process.
-  __libc_shared_globals()->memtag_stack_dlopen_callback = memtag_stack_dlopen_callback;
-
   exit(slingshot(args.argc - __libc_shared_globals()->initial_linker_arg_count,
                  args.argv + __libc_shared_globals()->initial_linker_arg_count,
                  args.envp));
diff --git a/libc/bionic/pthread_attr.cpp b/libc/bionic/pthread_attr.cpp
index f6c0401..de4cc9e 100644
--- a/libc/bionic/pthread_attr.cpp
+++ b/libc/bionic/pthread_attr.cpp
@@ -155,6 +155,36 @@
   return 0;
 }
 
+static uintptr_t __get_main_stack_startstack() {
+  FILE* fp = fopen("/proc/self/stat", "re");
+  if (fp == nullptr) {
+    async_safe_fatal("couldn't open /proc/self/stat: %m");
+  }
+
+  char line[BUFSIZ];
+  if (fgets(line, sizeof(line), fp) == nullptr) {
+    async_safe_fatal("couldn't read /proc/self/stat: %m");
+  }
+
+  fclose(fp);
+
+  // See man 5 proc. There's no reason comm can't contain ' ' or ')',
+  // so we search backwards for the end of it. We're looking for this field:
+  //
+  //  startstack %lu (28) The address of the start (i.e., bottom) of the stack.
+  uintptr_t startstack = 0;
+  const char* end_of_comm = strrchr(line, ')');
+  if (sscanf(end_of_comm + 1, " %*c "
+             "%*d %*d %*d %*d %*d "
+             "%*u %*u %*u %*u %*u %*u %*u "
+             "%*d %*d %*d %*d %*d %*d "
+             "%*u %*u %*d %*u %*u %*u %" SCNuPTR, &startstack) != 1) {
+    async_safe_fatal("couldn't parse /proc/self/stat");
+  }
+
+  return startstack;
+}
+
 static int __pthread_attr_getstack_main_thread(void** stack_base, size_t* stack_size) {
   ErrnoRestorer errno_restorer;
 
@@ -168,11 +198,28 @@
   if (stack_limit.rlim_cur == RLIM_INFINITY) {
     stack_limit.rlim_cur = 8 * 1024 * 1024;
   }
-  uintptr_t lo, hi;
-  __find_main_stack_limits(&lo, &hi);
-  *stack_size = stack_limit.rlim_cur;
-  *stack_base = reinterpret_cast<void*>(hi - *stack_size);
-  return 0;
+
+  // Ask the kernel where our main thread's stack started.
+  uintptr_t startstack = __get_main_stack_startstack();
+
+  // Hunt for the region that contains that address.
+  FILE* fp = fopen("/proc/self/maps", "re");
+  if (fp == nullptr) {
+    async_safe_fatal("couldn't open /proc/self/maps: %m");
+  }
+  char line[BUFSIZ];
+  while (fgets(line, sizeof(line), fp) != nullptr) {
+    uintptr_t lo, hi;
+    if (sscanf(line, "%" SCNxPTR "-%" SCNxPTR, &lo, &hi) == 2) {
+      if (lo <= startstack && startstack <= hi) {
+        *stack_size = stack_limit.rlim_cur;
+        *stack_base = reinterpret_cast<void*>(hi - *stack_size);
+        fclose(fp);
+        return 0;
+      }
+    }
+  }
+  async_safe_fatal("stack not found in /proc/self/maps");
 }
 
 __BIONIC_WEAK_FOR_NATIVE_BRIDGE
diff --git a/libc/bionic/pthread_internal.cpp b/libc/bionic/pthread_internal.cpp
index bfe2f98..6a7ee2f 100644
--- a/libc/bionic/pthread_internal.cpp
+++ b/libc/bionic/pthread_internal.cpp
@@ -40,7 +40,6 @@
 #include "private/ErrnoRestorer.h"
 #include "private/ScopedRWLock.h"
 #include "private/bionic_futex.h"
-#include "private/bionic_globals.h"
 #include "private/bionic_tls.h"
 
 static pthread_internal_t* g_thread_list = nullptr;
@@ -120,89 +119,6 @@
   return nullptr;
 }
 
-static uintptr_t __get_main_stack_startstack() {
-  FILE* fp = fopen("/proc/self/stat", "re");
-  if (fp == nullptr) {
-    async_safe_fatal("couldn't open /proc/self/stat: %m");
-  }
-
-  char line[BUFSIZ];
-  if (fgets(line, sizeof(line), fp) == nullptr) {
-    async_safe_fatal("couldn't read /proc/self/stat: %m");
-  }
-
-  fclose(fp);
-
-  // See man 5 proc. There's no reason comm can't contain ' ' or ')',
-  // so we search backwards for the end of it. We're looking for this field:
-  //
-  //  startstack %lu (28) The address of the start (i.e., bottom) of the stack.
-  uintptr_t startstack = 0;
-  const char* end_of_comm = strrchr(line, ')');
-  if (sscanf(end_of_comm + 1,
-             " %*c "
-             "%*d %*d %*d %*d %*d "
-             "%*u %*u %*u %*u %*u %*u %*u "
-             "%*d %*d %*d %*d %*d %*d "
-             "%*u %*u %*d %*u %*u %*u %" SCNuPTR,
-             &startstack) != 1) {
-    async_safe_fatal("couldn't parse /proc/self/stat");
-  }
-
-  return startstack;
-}
-
-void __find_main_stack_limits(uintptr_t* low, uintptr_t* high) {
-  // Ask the kernel where our main thread's stack started.
-  uintptr_t startstack = __get_main_stack_startstack();
-
-  // Hunt for the region that contains that address.
-  FILE* fp = fopen("/proc/self/maps", "re");
-  if (fp == nullptr) {
-    async_safe_fatal("couldn't open /proc/self/maps: %m");
-  }
-  char line[BUFSIZ];
-  while (fgets(line, sizeof(line), fp) != nullptr) {
-    uintptr_t lo, hi;
-    if (sscanf(line, "%" SCNxPTR "-%" SCNxPTR, &lo, &hi) == 2) {
-      if (lo <= startstack && startstack <= hi) {
-        *low = lo;
-        *high = hi;
-        fclose(fp);
-        return;
-      }
-    }
-  }
-  async_safe_fatal("stack not found in /proc/self/maps");
-}
-
-void __pthread_internal_remap_stack_with_mte() {
-#if defined(__aarch64__)
-  // If process doesn't have MTE enabled, we don't need to do anything.
-  if (!__libc_globals->memtag) return;
-  bool prev = true;
-  __libc_globals.mutate(
-      [&prev](libc_globals* globals) { prev = atomic_exchange(&globals->memtag_stack, true); });
-  if (prev) return;
-  uintptr_t lo, hi;
-  __find_main_stack_limits(&lo, &hi);
-
-  if (mprotect(reinterpret_cast<void*>(lo), hi - lo,
-               PROT_READ | PROT_WRITE | PROT_MTE | PROT_GROWSDOWN)) {
-    async_safe_fatal("error: failed to set PROT_MTE on main thread");
-  }
-  ScopedWriteLock creation_locker(&g_thread_creation_lock);
-  ScopedReadLock list_locker(&g_thread_list_lock);
-  for (pthread_internal_t* t = g_thread_list; t != nullptr; t = t->next) {
-    if (t->terminating || t->is_main()) continue;
-    if (mprotect(t->mmap_base_unguarded, t->mmap_size_unguarded,
-                 PROT_READ | PROT_WRITE | PROT_MTE)) {
-      async_safe_fatal("error: failed to set PROT_MTE on thread: %d", t->tid);
-    }
-  }
-#endif
-}
-
 bool android_run_on_all_threads(bool (*func)(void*), void* arg) {
   // Take the locks in this order to avoid inversion (pthread_create ->
   // __pthread_internal_add).
diff --git a/libc/bionic/pthread_internal.h b/libc/bionic/pthread_internal.h
index 091f711..3b9e6a4 100644
--- a/libc/bionic/pthread_internal.h
+++ b/libc/bionic/pthread_internal.h
@@ -178,7 +178,6 @@
   bionic_tls* bionic_tls;
 
   int errno_value;
-  bool is_main() { return start_routine == nullptr; }
 };
 
 struct ThreadMapping {
@@ -208,7 +207,6 @@
 __LIBC_HIDDEN__ pid_t __pthread_internal_gettid(pthread_t pthread_id, const char* caller);
 __LIBC_HIDDEN__ void __pthread_internal_remove(pthread_internal_t* thread);
 __LIBC_HIDDEN__ void __pthread_internal_remove_and_free(pthread_internal_t* thread);
-__LIBC_HIDDEN__ void __find_main_stack_limits(uintptr_t* low, uintptr_t* high);
 
 static inline __always_inline bionic_tcb* __get_bionic_tcb() {
   return reinterpret_cast<bionic_tcb*>(&__get_tls()[MIN_TLS_SLOT]);
@@ -268,9 +266,6 @@
 __LIBC_HIDDEN__ extern void __bionic_atfork_run_child();
 __LIBC_HIDDEN__ extern void __bionic_atfork_run_parent();
 
-// Re-map all threads and successively launched threads with PROT_MTE.
-__LIBC_HIDDEN__ void __pthread_internal_remap_stack_with_mte();
-
 extern "C" bool android_run_on_all_threads(bool (*func)(void*), void* arg);
 
 extern pthread_rwlock_t g_thread_creation_lock;
diff --git a/libc/private/bionic_globals.h b/libc/private/bionic_globals.h
index 08a61f0..8ea7d4d 100644
--- a/libc/private/bionic_globals.h
+++ b/libc/private/bionic_globals.h
@@ -50,7 +50,6 @@
   uintptr_t heap_pointer_tag;
   _Atomic(bool) memtag_stack;
   _Atomic(bool) decay_time_enabled;
-  _Atomic(bool) memtag;
 
   // In order to allow a complete switch between dispatch tables without
   // the need for copying each function by function in the structure,
@@ -136,8 +135,6 @@
   HeapTaggingLevel initial_heap_tagging_level = M_HEAP_TAGGING_LEVEL_NONE;
   bool initial_memtag_stack = false;
   int64_t heap_tagging_upgrade_timer_sec = 0;
-
-  void (*memtag_stack_dlopen_callback)() = nullptr;
 };
 
 __LIBC_HIDDEN__ libc_shared_globals* __libc_shared_globals();
diff --git a/linker/linker.cpp b/linker/linker.cpp
index 60c8e31..d680206 100644
--- a/linker/linker.cpp
+++ b/linker/linker.cpp
@@ -2213,14 +2213,6 @@
   loading_trace.End();
 
   if (si != nullptr) {
-    if (si->has_min_version(7) && si->memtag_stack()) {
-      LD_LOG(kLogDlopen, "... dlopen enabling MTE for: realpath=\"%s\", soname=\"%s\"",
-             si->get_realpath(), si->get_soname());
-      if (auto* cb = __libc_shared_globals()->memtag_stack_dlopen_callback) {
-        cb();
-      }
-    }
-
     void* handle = si->to_handle();
     LD_LOG(kLogDlopen,
            "... dlopen calling constructors: realpath=\"%s\", soname=\"%s\", handle=%p",
diff --git a/linker/linker_phdr.cpp b/linker/linker_phdr.cpp
index 6bfe80c..af0ef1d 100644
--- a/linker/linker_phdr.cpp
+++ b/linker/linker_phdr.cpp
@@ -721,8 +721,9 @@
     // at most 1 PT_NOTE mapped at anytime during this search.
     MappedFileFragment note_fragment;
     if (!note_fragment.Map(fd_, file_offset_, phdr->p_offset, phdr->p_memsz)) {
-      DL_ERR("\"%s\" note mmap failed: %s", name_.c_str(), strerror(errno));
-      return false;
+      DL_WARN("\"%s\" note mmap failed: %s", name_.c_str(), strerror(errno));
+      // If mmap failed, skip the optimization but don't block ELF loading
+      return true;
     }
 
     const ElfW(Nhdr)* note_hdr = nullptr;
diff --git a/tests/Android.bp b/tests/Android.bp
index 0ba91ea..a62abab 100644
--- a/tests/Android.bp
+++ b/tests/Android.bp
@@ -1108,31 +1108,6 @@
 }
 
 cc_test {
-    name: "memtag_stack_dlopen_test",
-    enabled: false,
-    // This does not use bionic_tests_defaults because it is not supported on
-    // host.
-    arch: {
-        arm64: {
-            enabled: true,
-        },
-    },
-    sanitize: {
-        memtag_heap: true,
-        memtag_stack: false,
-    },
-    srcs: [
-        "memtag_stack_dlopen_test.cpp",
-    ],
-    shared_libs: [
-        "libbase",
-    ],
-    data_libs: ["libtest_simple_memtag_stack"],
-    header_libs: ["bionic_libc_platform_headers"],
-    test_suites: ["device-tests"],
-}
-
-cc_test {
     name: "bionic-stress-tests",
     defaults: [
         "bionic_tests_defaults",
diff --git a/tests/libs/Android.bp b/tests/libs/Android.bp
index 68efbd9..039d1e1 100644
--- a/tests/libs/Android.bp
+++ b/tests/libs/Android.bp
@@ -234,17 +234,6 @@
 }
 
 // -----------------------------------------------------------------------------
-// Library used by memtag_stack_dlopen_test tests
-// -----------------------------------------------------------------------------
-cc_test_library {
-    name: "libtest_simple_memtag_stack",
-    sanitize: {
-        memtag_stack: true,
-    },
-    srcs: ["dlopen_testlib_simple.cpp"],
-}
-
-// -----------------------------------------------------------------------------
 // Libraries used by hwasan_test
 // -----------------------------------------------------------------------------
 cc_test_library {
diff --git a/tests/memtag_stack_dlopen_test.cpp b/tests/memtag_stack_dlopen_test.cpp
deleted file mode 100644
index 308af1e..0000000
--- a/tests/memtag_stack_dlopen_test.cpp
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- * Copyright (C) 2023 The Android Open Source Project
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
- * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
- * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <thread>
-
-#include <dlfcn.h>
-#include <stdlib.h>
-
-#include <gtest/gtest.h>
-
-#include <android-base/silent_death_test.h>
-#include <android-base/test_utils.h>
-#include "utils.h"
-
-#if defined(__BIONIC__) && defined(__aarch64__)
-__attribute__((target("mte"))) bool is_stack_mte_on() {
-  alignas(16) int x = 0;
-  void* p = reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(&x) + (1UL << 57));
-  void* p_cpy = p;
-  __builtin_arm_stg(p);
-  p = __builtin_arm_ldg(p);
-  __builtin_arm_stg(&x);
-  return p == p_cpy;
-}
-
-// We can't use pthread_getattr_np because that uses the rlimit rather than the actual mapping
-// bounds.
-static void find_main_stack_limits(uintptr_t* low, uintptr_t* high) {
-  uintptr_t startstack = reinterpret_cast<uintptr_t>(__builtin_frame_address(0));
-
-  // Hunt for the region that contains that address.
-  FILE* fp = fopen("/proc/self/maps", "re");
-  if (fp == nullptr) {
-    abort();
-  }
-  char line[BUFSIZ];
-  while (fgets(line, sizeof(line), fp) != nullptr) {
-    uintptr_t lo, hi;
-    if (sscanf(line, "%" SCNxPTR "-%" SCNxPTR, &lo, &hi) == 2) {
-      if (lo <= startstack && startstack <= hi) {
-        *low = lo;
-        *high = hi;
-        fclose(fp);
-        return;
-      }
-    }
-  }
-  abort();
-}
-
-template <typename Fn>
-unsigned int fault_new_stack_page(uintptr_t low, Fn f) {
-  uintptr_t new_low;
-  uintptr_t new_high;
-  volatile char buf[4096];
-  buf[4095] = 1;
-  find_main_stack_limits(&new_low, &new_high);
-  if (new_low < low) {
-    f();
-    return new_high;
-  }
-  // Useless, but should defeat TCO.
-  return new_low + fault_new_stack_page(low, f);
-}
-
-#endif
-
-enum State { kInit, kThreadStarted, kStackRemapped };
-
-TEST(MemtagStackDlopenTest, DlopenRemapsStack) {
-#if defined(__BIONIC__) && defined(__aarch64__)
-  if (!running_with_mte()) GTEST_SKIP() << "Test requires MTE.";
-
-  std::string path = android::base::GetExecutableDirectory() + "/libtest_simple_memtag_stack.so";
-  ASSERT_EQ(0, access(path.c_str(), F_OK));  // Verify test setup.
-  EXPECT_FALSE(is_stack_mte_on());
-  std::mutex m;
-  std::condition_variable cv;
-  State state = kInit;
-
-  bool is_early_thread_mte_on = false;
-  std::thread early_th([&] {
-    {
-      std::lock_guard lk(m);
-      state = kThreadStarted;
-    }
-    cv.notify_one();
-    {
-      std::unique_lock lk(m);
-      cv.wait(lk, [&] { return state == kStackRemapped; });
-    }
-    is_early_thread_mte_on = is_stack_mte_on();
-  });
-  {
-    std::unique_lock lk(m);
-    cv.wait(lk, [&] { return state == kThreadStarted; });
-  }
-  void* handle = dlopen(path.c_str(), RTLD_NOW);
-  {
-    std::lock_guard lk(m);
-    state = kStackRemapped;
-  }
-  cv.notify_one();
-  ASSERT_NE(handle, nullptr);
-  EXPECT_TRUE(is_stack_mte_on());
-
-  bool new_stack_page_mte_on = false;
-  uintptr_t low;
-  uintptr_t high;
-  find_main_stack_limits(&low, &high);
-  fault_new_stack_page(low, [&] { new_stack_page_mte_on = is_stack_mte_on(); });
-  EXPECT_TRUE(new_stack_page_mte_on);
-
-  bool is_late_thread_mte_on = false;
-  std::thread late_th([&] { is_late_thread_mte_on = is_stack_mte_on(); });
-  late_th.join();
-  early_th.join();
-  EXPECT_TRUE(is_early_thread_mte_on);
-  EXPECT_TRUE(is_late_thread_mte_on);
-#else
-  GTEST_SKIP() << "requires bionic arm64";
-#endif
-}