Re-land^2 linker support for MTE globals

The original reland was fine, but there was an app compat problem
because it incremented SOINFO_VERSION. This is a problem with the
app, but turns out we don't actually need to increment the version,
because it is unused on aarch64 and memtag-globals are aarch64
only.

The original patch (aosp/1845896) had a major bug. This happened for
out-of-bounds RELR relocations, as we'd attempt to materialize the
address tag of the result unconditionally. This meant crashes happened
on non-MTE devices (particularly the webview) when loading DSOs
containing an OOB RELR reloc.

This patch includes a fix, where we only materialize the tag during RELR
and other relocations if the result is in a binary that uses MTE
globals. Note that that's not the binary *containing* the relocation has
MTE globals (for RELR/RELATIVE relocations the binary containing the
relocation and the result is the same binary, but that's not the case
for GLOB_DAT).

Other than that, from the original patch, this adds the necessary
bionic code for the linker to protect global data during MTE.

The implementation is described in the MemtagABI addendum to the
AArch64 ELF ABI:
https://github.com/ARM-software/abi-aa/blob/main/memtagabielf64/memtagabielf64.rst

In summary, this patch includes:

1. When MTE globals is requested, the linker maps writable SHF_ALLOC
   sections as anonymous pages with PROT_MTE (copying the file contents
   into the anonymous mapping), rather than using a file-backed private
   mapping. This is required as file-based mappings are not necessarily
   backed by the kernel with tag-capable memory. For sections already
   mapped by the kernel when the linker is invoked via. PT_INTERP, we
   unmap the contents, remap a PROT_MTE+anonymous mapping in its place,
   and re-load the file contents from disk.

2. When MTE globals is requested, the linker tags areas of global memory
   (as defined in SHT_AARCH64_MEMTAG_GLOBALS_DYNAMIC) with random tags,
   but ensuring that adjacent globals are never tagged using the same
   memory tag (to provide detemrinistic overflow detection).

3. Changes to RELATIVE, ABS64, and GLOB_DAT relocations to load and
   store tags in the right places. This ensures that the address tags are
   materialized into the GOT entries as well. These changes are a
   functional no-op to existing binaries and/or non-MTE capable hardware.

Bug: 315182011
Test: On both an MTE-enabled and non-MTE-enabled device:
Test: atest libprocinfo_test bionic-unit-tests bionic-unit-tests-static CtsGwpAsanTestCases gwp_asan_unittest debuggerd_test memtag_stack_dlopen_test
Change-Id: I75f2ca4a5a08550d80b6b0978fc1bcefff284505
diff --git a/libc/bionic/__libc_init_main_thread.cpp b/libc/bionic/__libc_init_main_thread.cpp
index 1b539f2..0d557f1 100644
--- a/libc/bionic/__libc_init_main_thread.cpp
+++ b/libc/bionic/__libc_init_main_thread.cpp
@@ -44,7 +44,7 @@
 // Declared in "private/bionic_ssp.h".
 uintptr_t __stack_chk_guard = 0;
 
-static pthread_internal_t main_thread;
+BIONIC_USED_BEFORE_LINKER_RELOCATES static pthread_internal_t main_thread;
 
 // Setup for the main thread. For dynamic executables, this is called by the
 // linker _before_ libc is mapped in memory. This means that all writes to
diff --git a/libc/bionic/bionic_call_ifunc_resolver.cpp b/libc/bionic/bionic_call_ifunc_resolver.cpp
index e44d998..d5a812c 100644
--- a/libc/bionic/bionic_call_ifunc_resolver.cpp
+++ b/libc/bionic/bionic_call_ifunc_resolver.cpp
@@ -31,6 +31,7 @@
 #include <sys/hwprobe.h>
 #include <sys/ifunc.h>
 
+#include "bionic/macros.h"
 #include "private/bionic_auxv.h"
 
 // This code is called in the linker before it has been relocated, so minimize calls into other
@@ -40,8 +41,8 @@
 ElfW(Addr) __bionic_call_ifunc_resolver(ElfW(Addr) resolver_addr) {
 #if defined(__aarch64__)
   typedef ElfW(Addr) (*ifunc_resolver_t)(uint64_t, __ifunc_arg_t*);
-  static __ifunc_arg_t arg;
-  static bool initialized = false;
+  BIONIC_USED_BEFORE_LINKER_RELOCATES static __ifunc_arg_t arg;
+  BIONIC_USED_BEFORE_LINKER_RELOCATES static bool initialized = false;
   if (!initialized) {
     initialized = true;
     arg._size = sizeof(__ifunc_arg_t);
diff --git a/libc/bionic/libc_init_static.cpp b/libc/bionic/libc_init_static.cpp
index b54ef85..fac270a 100644
--- a/libc/bionic/libc_init_static.cpp
+++ b/libc/bionic/libc_init_static.cpp
@@ -517,7 +517,7 @@
 // compiled with -ffreestanding to avoid implicit string.h function calls. (It shouldn't strictly
 // be necessary, though.)
 __LIBC_HIDDEN__ libc_shared_globals* __libc_shared_globals() {
-  static libc_shared_globals globals;
+  BIONIC_USED_BEFORE_LINKER_RELOCATES static libc_shared_globals globals;
   return &globals;
 }
 
diff --git a/libc/platform/bionic/macros.h b/libc/platform/bionic/macros.h
index 93268c1..b2d6f96 100644
--- a/libc/platform/bionic/macros.h
+++ b/libc/platform/bionic/macros.h
@@ -97,3 +97,26 @@
 static inline T* _Nonnull untag_address(T* _Nonnull p) {
   return reinterpret_cast<T*>(untag_address(reinterpret_cast<uintptr_t>(p)));
 }
+
+// MTE globals protects internal and external global variables. One of the main
+// things that MTE globals does is force all global variable accesses to go
+// through the GOT. In the linker though, some global variables are accessed (or
+// address-taken) prior to relocations being processed. Because relocations
+// haven't run yet, the GOT entry hasn't been populated, and this leads to
+// crashes. Thus, any globals used by the linker prior to relocation should be
+// annotated with this attribute, which suppresses tagging of this global
+// variable, restoring the pc-relative address computation.
+//
+// A way to find global variables that need this attribute is to build the
+// linker/libc with `SANITIZE_TARGET=memtag_globals`, push them onto a device
+// (it doesn't have to be MTE capable), and then run an executable using
+// LD_LIBRARY_PATH and using the linker in interpreter mode (e.g.
+// `LD_LIBRARY_PATH=/data/tmp/ /data/tmp/linker64 /data/tmp/my_binary`). A
+// good heuristic is that the global variable is in a file that should be
+// compiled with `-ffreestanding` (but there are global variables there that
+// don't need this attribute).
+#if __has_feature(memtag_globals)
+#define BIONIC_USED_BEFORE_LINKER_RELOCATES __attribute__((no_sanitize("memtag")))
+#else  // __has_feature(memtag_globals)
+#define BIONIC_USED_BEFORE_LINKER_RELOCATES
+#endif  // __has_feature(memtag_globals)
diff --git a/libc/platform/bionic/mte.h b/libc/platform/bionic/mte.h
index 98b3d27..423ed0f 100644
--- a/libc/platform/bionic/mte.h
+++ b/libc/platform/bionic/mte.h
@@ -28,6 +28,7 @@
 
 #pragma once
 
+#include <stddef.h>
 #include <sys/auxv.h>
 #include <sys/mman.h>
 #include <sys/prctl.h>
@@ -49,6 +50,36 @@
   return supported;
 }
 
+inline void* get_tagged_address(const void* ptr) {
+#if defined(__aarch64__)
+  if (mte_supported()) {
+    __asm__ __volatile__(".arch_extension mte; ldg %0, [%0]" : "+r"(ptr));
+  }
+#endif  // aarch64
+  return const_cast<void*>(ptr);
+}
+
+// Inserts a random tag tag to `ptr`, using any of the set lower 16 bits in
+// `mask` to exclude the corresponding tag from being generated. Note: This does
+// not tag memory. This generates a pointer to be used with set_memory_tag.
+inline void* insert_random_tag(const void* ptr, __attribute__((unused)) uint64_t mask = 0) {
+#if defined(__aarch64__)
+  if (mte_supported() && ptr) {
+    __asm__ __volatile__(".arch_extension mte; irg %0, %0, %1" : "+r"(ptr) : "r"(mask));
+  }
+#endif  // aarch64
+  return const_cast<void*>(ptr);
+}
+
+// Stores the address tag in `ptr` to memory, at `ptr`.
+inline void set_memory_tag(__attribute__((unused)) void* ptr) {
+#if defined(__aarch64__)
+  if (mte_supported()) {
+    __asm__ __volatile__(".arch_extension mte; stg %0, [%0]" : "+r"(ptr));
+  }
+#endif  // aarch64
+}
+
 #ifdef __aarch64__
 class ScopedDisableMTE {
   size_t prev_tco_;