Re-land linker support for MTE globals

The original patch (aosp/1845896) had a major bug. This happened for
out-of-bounds RELR relocations, as we'd attempt to materialize the
address tag of the result unconditionally. This meant crashes happened
on non-MTE devices (particularly the webview) when loading DSOs
containing an OOB RELR reloc.

This patch includes a fix, where we only materialize the tag during RELR
and other relocations if the result is in a binary that uses MTE
globals. Note that that's not the binary *containing* the relocation has
MTE globals (for RELR/RELATIVE relocations the binary containing the
relocation and the result is the same binary, but that's not the case
for GLOB_DAT).

Other than that, from the original patch, this adds the necessary
bionic code for the linker to protect global data during MTE.

The implementation is described in the MemtagABI addendum to the
AArch64 ELF ABI:
https://github.com/ARM-software/abi-aa/blob/main/memtagabielf64/memtagabielf64.rst

In summary, this patch includes:

1. When MTE globals is requested, the linker maps writable SHF_ALLOC
   sections as anonymous pages with PROT_MTE (copying the file contents
   into the anonymous mapping), rather than using a file-backed private
   mapping. This is required as file-based mappings are not necessarily
   backed by the kernel with tag-capable memory. For sections already
   mapped by the kernel when the linker is invoked via. PT_INTERP, we
   unmap the contents, remap a PROT_MTE+anonymous mapping in its place,
   and re-load the file contents from disk.

2. When MTE globals is requested, the linker tags areas of global memory
   (as defined in SHT_AARCH64_MEMTAG_GLOBALS_DYNAMIC) with random tags,
   but ensuring that adjacent globals are never tagged using the same
   memory tag (to provide detemrinistic overflow detection).

3. Changes to RELATIVE, ABS64, and GLOB_DAT relocations to load and
   store tags in the right places. This ensures that the address tags are
   materialized into the GOT entries as well. These changes are a
   functional no-op to existing binaries and/or non-MTE capable hardware.

Bug: 315182011
Test: On both an MTE-enabled and non-MTE-enabled device:
Test: atest libprocinfo_test bionic-unit-tests bionic-unit-tests-static CtsGwpAsanTestCases gwp_asan_unittest debuggerd_test memtag_stack_dlopen_test
Change-Id: Ibe203918f2e67b133b5ccdd57dbb07fe69a4c2ba
diff --git a/linker/linker_phdr.cpp b/linker/linker_phdr.cpp
index 7691031..2bdd7f8 100644
--- a/linker/linker_phdr.cpp
+++ b/linker/linker_phdr.cpp
@@ -37,9 +37,12 @@
 #include <unistd.h>
 
 #include "linker.h"
+#include "linker_debug.h"
 #include "linker_dlwarning.h"
 #include "linker_globals.h"
-#include "linker_debug.h"
+#include "linker_logger.h"
+#include "linker_main.h"
+#include "linker_soinfo.h"
 #include "linker_utils.h"
 
 #include "private/bionic_asm_note.h"
@@ -1172,6 +1175,125 @@
                                    should_use_16kib_app_compat);
 }
 
+static bool segment_needs_memtag_globals_remapping(const ElfW(Phdr) * phdr) {
+  // For now, MTE globals is only supported on writeable data segments.
+  return phdr->p_type == PT_LOAD && !(phdr->p_flags & PF_X) && (phdr->p_flags & PF_W);
+}
+
+/* When MTE globals are requested by the binary, and when the hardware supports
+ * it, remap the executable's PT_LOAD data pages to have PROT_MTE.
+ *
+ * Returns 0 on success, -1 on failure (error code in errno).
+ */
+int remap_memtag_globals_segments(const ElfW(Phdr) * phdr_table __unused,
+                                  size_t phdr_count __unused, ElfW(Addr) load_bias __unused) {
+#if defined(__aarch64__)
+  for (const ElfW(Phdr)* phdr = phdr_table; phdr < phdr_table + phdr_count; phdr++) {
+    if (!segment_needs_memtag_globals_remapping(phdr)) {
+      continue;
+    }
+
+    uintptr_t seg_page_start = page_start(phdr->p_vaddr) + load_bias;
+    uintptr_t seg_page_end = page_end(phdr->p_vaddr + phdr->p_memsz) + load_bias;
+    size_t seg_page_aligned_size = seg_page_end - seg_page_start;
+
+    int prot = PFLAGS_TO_PROT(phdr->p_flags);
+    // For anonymous private mappings, it may be possible to simply mprotect()
+    // the PROT_MTE flag over the top. For file-based mappings, this will fail,
+    // and we'll need to fall back. We also allow PROT_WRITE here to allow
+    // writing memory tags (in `soinfo::tag_globals()`), and set these sections
+    // back to read-only after tags are applied (similar to RELRO).
+    prot |= PROT_MTE;
+    if (mprotect(reinterpret_cast<void*>(seg_page_start), seg_page_aligned_size,
+                 prot | PROT_WRITE) == 0) {
+      continue;
+    }
+
+    void* mapping_copy = mmap(nullptr, seg_page_aligned_size, PROT_READ | PROT_WRITE,
+                              MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+    linker_memcpy(mapping_copy, reinterpret_cast<void*>(seg_page_start), seg_page_aligned_size);
+
+    void* seg_addr = mmap(reinterpret_cast<void*>(seg_page_start), seg_page_aligned_size,
+                          prot | PROT_WRITE, MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+    if (seg_addr == MAP_FAILED) return -1;
+
+    linker_memcpy(seg_addr, mapping_copy, seg_page_aligned_size);
+    munmap(mapping_copy, seg_page_aligned_size);
+  }
+#endif  // defined(__aarch64__)
+  return 0;
+}
+
+void protect_memtag_globals_ro_segments(const ElfW(Phdr) * phdr_table __unused,
+                                        size_t phdr_count __unused, ElfW(Addr) load_bias __unused) {
+#if defined(__aarch64__)
+  for (const ElfW(Phdr)* phdr = phdr_table; phdr < phdr_table + phdr_count; phdr++) {
+    int prot = PFLAGS_TO_PROT(phdr->p_flags);
+    if (!segment_needs_memtag_globals_remapping(phdr) || (prot & PROT_WRITE)) {
+      continue;
+    }
+
+    prot |= PROT_MTE;
+
+    uintptr_t seg_page_start = page_start(phdr->p_vaddr) + load_bias;
+    uintptr_t seg_page_end = page_end(phdr->p_vaddr + phdr->p_memsz) + load_bias;
+    size_t seg_page_aligned_size = seg_page_end - seg_page_start;
+    mprotect(reinterpret_cast<void*>(seg_page_start), seg_page_aligned_size, prot);
+  }
+#endif  // defined(__aarch64__)
+}
+
+void name_memtag_globals_segments(const ElfW(Phdr) * phdr_table, size_t phdr_count,
+                                  ElfW(Addr) load_bias, const char* soname,
+                                  std::list<std::string>& vma_names) {
+  for (const ElfW(Phdr)* phdr = phdr_table; phdr < phdr_table + phdr_count; phdr++) {
+    if (!segment_needs_memtag_globals_remapping(phdr)) {
+      continue;
+    }
+
+    uintptr_t seg_page_start = page_start(phdr->p_vaddr) + load_bias;
+    uintptr_t seg_page_end = page_end(phdr->p_vaddr + phdr->p_memsz) + load_bias;
+    size_t seg_page_aligned_size = seg_page_end - seg_page_start;
+
+    // For file-based mappings that we're now forcing to be anonymous mappings, set the VMA name to
+    // make debugging easier.
+    // Once we are targeting only devices that run kernel 5.10 or newer (and thus include
+    // https://android-review.git.corp.google.com/c/kernel/common/+/1934723 which causes the
+    // VMA_ANON_NAME to be copied into the kernel), we can get rid of the storage here.
+    // For now, that is not the case:
+    // https://source.android.com/docs/core/architecture/kernel/android-common#compatibility-matrix
+    constexpr int kVmaNameLimit = 80;
+    std::string& vma_name = vma_names.emplace_back('\0', kVmaNameLimit);
+    int full_vma_length =
+        async_safe_format_buffer(vma_name.data(), kVmaNameLimit, "mt:%s+%" PRIxPTR, soname,
+                                 page_start(phdr->p_vaddr)) +
+        /* include the null terminator */ 1;
+    // There's an upper limit of 80 characters, including the null terminator, in the anonymous VMA
+    // name. If we run over that limit, we end up truncating the segment offset and parts of the
+    // DSO's name, starting on the right hand side of the basename. Because the basename is the most
+    // important thing, chop off the soname from the left hand side first.
+    //
+    // Example (with '#' as the null terminator):
+    //   - "mt:/data/nativetest64/bionic-unit-tests/bionic-loader-test-libs/libdlext_test.so+e000#"
+    //     is a `full_vma_length` == 86.
+    //
+    // We need to left-truncate (86 - 80) 6 characters from the soname, plus the
+    // `vma_truncation_prefix`, so 9 characters total.
+    if (full_vma_length > kVmaNameLimit) {
+      const char vma_truncation_prefix[] = "...";
+      int soname_truncated_bytes =
+          full_vma_length - kVmaNameLimit + sizeof(vma_truncation_prefix) - 1;
+      async_safe_format_buffer(vma_name.data(), kVmaNameLimit, "mt:%s%s+%" PRIxPTR,
+                               vma_truncation_prefix, soname + soname_truncated_bytes,
+                               page_start(phdr->p_vaddr));
+    }
+    if (prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, reinterpret_cast<void*>(seg_page_start),
+              seg_page_aligned_size, vma_name.data()) != 0) {
+      DL_WARN("Failed to re-name memtag global segment.");
+    }
+  }
+}
+
 /* Change the protection of all loaded segments in memory to writable.
  * This is useful before performing relocations. Once completed, you
  * will have to call phdr_table_protect_segments to restore the original