Re-land linker support for MTE globals
The original patch (aosp/1845896) had a major bug. This happened for
out-of-bounds RELR relocations, as we'd attempt to materialize the
address tag of the result unconditionally. This meant crashes happened
on non-MTE devices (particularly the webview) when loading DSOs
containing an OOB RELR reloc.
This patch includes a fix, where we only materialize the tag during RELR
and other relocations if the result is in a binary that uses MTE
globals. Note that that's not the binary *containing* the relocation has
MTE globals (for RELR/RELATIVE relocations the binary containing the
relocation and the result is the same binary, but that's not the case
for GLOB_DAT).
Other than that, from the original patch, this adds the necessary
bionic code for the linker to protect global data during MTE.
The implementation is described in the MemtagABI addendum to the
AArch64 ELF ABI:
https://github.com/ARM-software/abi-aa/blob/main/memtagabielf64/memtagabielf64.rst
In summary, this patch includes:
1. When MTE globals is requested, the linker maps writable SHF_ALLOC
sections as anonymous pages with PROT_MTE (copying the file contents
into the anonymous mapping), rather than using a file-backed private
mapping. This is required as file-based mappings are not necessarily
backed by the kernel with tag-capable memory. For sections already
mapped by the kernel when the linker is invoked via. PT_INTERP, we
unmap the contents, remap a PROT_MTE+anonymous mapping in its place,
and re-load the file contents from disk.
2. When MTE globals is requested, the linker tags areas of global memory
(as defined in SHT_AARCH64_MEMTAG_GLOBALS_DYNAMIC) with random tags,
but ensuring that adjacent globals are never tagged using the same
memory tag (to provide detemrinistic overflow detection).
3. Changes to RELATIVE, ABS64, and GLOB_DAT relocations to load and
store tags in the right places. This ensures that the address tags are
materialized into the GOT entries as well. These changes are a
functional no-op to existing binaries and/or non-MTE capable hardware.
Bug: 315182011
Test: On both an MTE-enabled and non-MTE-enabled device:
Test: atest libprocinfo_test bionic-unit-tests bionic-unit-tests-static CtsGwpAsanTestCases gwp_asan_unittest debuggerd_test memtag_stack_dlopen_test
Change-Id: Ibe203918f2e67b133b5ccdd57dbb07fe69a4c2ba
diff --git a/libc/bionic/__libc_init_main_thread.cpp b/libc/bionic/__libc_init_main_thread.cpp
index 1b539f2..0d557f1 100644
--- a/libc/bionic/__libc_init_main_thread.cpp
+++ b/libc/bionic/__libc_init_main_thread.cpp
@@ -44,7 +44,7 @@
// Declared in "private/bionic_ssp.h".
uintptr_t __stack_chk_guard = 0;
-static pthread_internal_t main_thread;
+BIONIC_USED_BEFORE_LINKER_RELOCATES static pthread_internal_t main_thread;
// Setup for the main thread. For dynamic executables, this is called by the
// linker _before_ libc is mapped in memory. This means that all writes to
diff --git a/libc/bionic/bionic_call_ifunc_resolver.cpp b/libc/bionic/bionic_call_ifunc_resolver.cpp
index e44d998..d5a812c 100644
--- a/libc/bionic/bionic_call_ifunc_resolver.cpp
+++ b/libc/bionic/bionic_call_ifunc_resolver.cpp
@@ -31,6 +31,7 @@
#include <sys/hwprobe.h>
#include <sys/ifunc.h>
+#include "bionic/macros.h"
#include "private/bionic_auxv.h"
// This code is called in the linker before it has been relocated, so minimize calls into other
@@ -40,8 +41,8 @@
ElfW(Addr) __bionic_call_ifunc_resolver(ElfW(Addr) resolver_addr) {
#if defined(__aarch64__)
typedef ElfW(Addr) (*ifunc_resolver_t)(uint64_t, __ifunc_arg_t*);
- static __ifunc_arg_t arg;
- static bool initialized = false;
+ BIONIC_USED_BEFORE_LINKER_RELOCATES static __ifunc_arg_t arg;
+ BIONIC_USED_BEFORE_LINKER_RELOCATES static bool initialized = false;
if (!initialized) {
initialized = true;
arg._size = sizeof(__ifunc_arg_t);
diff --git a/libc/bionic/libc_init_static.cpp b/libc/bionic/libc_init_static.cpp
index 7c46113..ae86da1 100644
--- a/libc/bionic/libc_init_static.cpp
+++ b/libc/bionic/libc_init_static.cpp
@@ -344,19 +344,12 @@
// This function is called from the linker before the main executable is relocated.
__attribute__((no_sanitize("hwaddress", "memtag"))) void __libc_init_mte(
const memtag_dynamic_entries_t* memtag_dynamic_entries, const void* phdr_start, size_t phdr_ct,
- uintptr_t load_bias, void* stack_top) {
+ uintptr_t load_bias) {
bool memtag_stack = false;
HeapTaggingLevel level =
__get_tagging_level(memtag_dynamic_entries, phdr_start, phdr_ct, load_bias, &memtag_stack);
- // initial_memtag_stack is used by the linker (in linker.cpp) to communicate than any library
- // linked by this executable enables memtag-stack.
- // memtag_stack is also set for static executables if they request memtag stack via the note,
- // in which case it will differ from initial_memtag_stack.
- if (__libc_shared_globals()->initial_memtag_stack || memtag_stack) {
- memtag_stack = true;
- __libc_shared_globals()->initial_memtag_stack_abi = true;
- __get_bionic_tcb()->tls_slot(TLS_SLOT_STACK_MTE) = __allocate_stack_mte_ringbuffer(0, nullptr);
- }
+ __libc_shared_globals()->initial_memtag_stack = memtag_stack;
+
if (int64_t timed_upgrade = __get_memtag_upgrade_secs()) {
if (level == M_HEAP_TAGGING_LEVEL_ASYNC) {
async_safe_format_log(ANDROID_LOG_INFO, "libc",
@@ -380,15 +373,7 @@
if (prctl(PR_SET_TAGGED_ADDR_CTRL, prctl_arg | PR_MTE_TCF_SYNC, 0, 0, 0) == 0 ||
prctl(PR_SET_TAGGED_ADDR_CTRL, prctl_arg, 0, 0, 0) == 0) {
__libc_shared_globals()->initial_heap_tagging_level = level;
- __libc_shared_globals()->initial_memtag_stack = memtag_stack;
- if (memtag_stack) {
- void* pg_start =
- reinterpret_cast<void*>(page_start(reinterpret_cast<uintptr_t>(stack_top)));
- if (mprotect(pg_start, page_size(), PROT_READ | PROT_WRITE | PROT_MTE | PROT_GROWSDOWN)) {
- async_safe_fatal("error: failed to set PROT_MTE on main thread stack: %m");
- }
- }
struct sigaction action = {};
action.sa_flags = SA_SIGINFO | SA_RESTART;
action.sa_sigaction = __enable_mte_signal_handler;
@@ -404,11 +389,36 @@
}
// We did not enable MTE, so we do not need to arm the upgrade timer.
__libc_shared_globals()->heap_tagging_upgrade_timer_sec = 0;
- // We also didn't enable memtag_stack.
- __libc_shared_globals()->initial_memtag_stack = false;
}
+
+// Figure out whether we need to map the stack as PROT_MTE.
+// For dynamic executables, this has to be called after loading all
+// DT_NEEDED libraries, in case one of them needs stack MTE.
+__attribute__((no_sanitize("hwaddress", "memtag"))) void __libc_init_mte_stack(void* stack_top) {
+ if (!__libc_shared_globals()->initial_memtag_stack) {
+ return;
+ }
+
+ // Even if the device doesn't support MTE, we have to allocate stack
+ // history buffers for code compiled for stack MTE. That is because the
+ // codegen expects a buffer to be present in TLS_SLOT_STACK_MTE either
+ // way.
+ __libc_shared_globals()->initial_memtag_stack_abi = true;
+ __get_bionic_tcb()->tls_slot(TLS_SLOT_STACK_MTE) = __allocate_stack_mte_ringbuffer(0, nullptr);
+
+ if (!__libc_mte_enabled()) {
+ __libc_shared_globals()->initial_memtag_stack = false;
+ return;
+ }
+ void* pg_start = reinterpret_cast<void*>(page_start(reinterpret_cast<uintptr_t>(stack_top)));
+ if (mprotect(pg_start, page_size(), PROT_READ | PROT_WRITE | PROT_MTE | PROT_GROWSDOWN)) {
+ async_safe_fatal("error: failed to set PROT_MTE on main thread stack: %m");
+ }
+}
+
#else // __aarch64__
-void __libc_init_mte(const memtag_dynamic_entries_t*, const void*, size_t, uintptr_t, void*) {}
+void __libc_init_mte(const memtag_dynamic_entries_t*, const void*, size_t, uintptr_t) {}
+void __libc_init_mte_stack(void*) {}
#endif // __aarch64__
void __libc_init_profiling_handlers() {
@@ -436,7 +446,8 @@
__libc_init_common();
__libc_init_mte(/*memtag_dynamic_entries=*/nullptr,
reinterpret_cast<ElfW(Phdr)*>(getauxval(AT_PHDR)), getauxval(AT_PHNUM),
- /*load_bias = */ 0, /*stack_top = */ raw_args);
+ /*load_bias = */ 0);
+ __libc_init_mte_stack(/*stack_top = */ raw_args);
__libc_init_scudo();
__libc_init_profiling_handlers();
__libc_init_fork_handler();
@@ -508,6 +519,11 @@
// compiled with -ffreestanding to avoid implicit string.h function calls. (It shouldn't strictly
// be necessary, though.)
__LIBC_HIDDEN__ libc_shared_globals* __libc_shared_globals() {
- static libc_shared_globals globals;
+ BIONIC_USED_BEFORE_LINKER_RELOCATES static libc_shared_globals globals;
return &globals;
}
+
+__LIBC_HIDDEN__ bool __libc_mte_enabled() {
+ HeapTaggingLevel lvl = __libc_shared_globals()->initial_heap_tagging_level;
+ return lvl == M_HEAP_TAGGING_LEVEL_SYNC || lvl == M_HEAP_TAGGING_LEVEL_ASYNC;
+}
diff --git a/libc/platform/bionic/macros.h b/libc/platform/bionic/macros.h
index 93268c1..837583e 100644
--- a/libc/platform/bionic/macros.h
+++ b/libc/platform/bionic/macros.h
@@ -97,3 +97,26 @@
static inline T* _Nonnull untag_address(T* _Nonnull p) {
return reinterpret_cast<T*>(untag_address(reinterpret_cast<uintptr_t>(p)));
}
+
+// MTE globals protects internal and external global variables. One of the main
+// things that MTE globals does is force all global variable accesses to go
+// through the GOT. In the linker though, some global variables are accessed (or
+// address-taken) prior to relocations being processed. Because relocations
+// haven't run yet, the GOT entry hasn't been populated, and this leads to
+// crashes. Thus, any globals used by the linker prior to relocation should be
+// annotated with this attribute, which suppresses tagging of this global
+// variable, restoring the pc-relative address computation.
+//
+// A way to find global variables that need this attribute is to build the
+// linker/libc with `SANITIZE_TARGET=memtag_globals`, push them onto a device
+// (it doesn't have to be MTE capable), and then run an executable using
+// LD_LIBRARY_PATH and using the linker in interpreter mode (e.g.
+// `LD_LIBRARY_PATH=/data/tmp/ /data/tmp/linker64 /data/tmp/my_binary`). A
+// good heuristic is that the global variable is in a file that should be
+// compiled with `-ffreestanding` (but there are global variables there that
+// don't need thisattribute).
+#if __has_feature(memtag_globals)
+#define BIONIC_USED_BEFORE_LINKER_RELOCATES __attribute__((no_sanitize("memtag")))
+#else // __has_feature(memtag_globals)
+#define BIONIC_USED_BEFORE_LINKER_RELOCATES
+#endif // __has_feature(memtag_globals)
diff --git a/libc/platform/bionic/mte.h b/libc/platform/bionic/mte.h
index 98b3d27..423ed0f 100644
--- a/libc/platform/bionic/mte.h
+++ b/libc/platform/bionic/mte.h
@@ -28,6 +28,7 @@
#pragma once
+#include <stddef.h>
#include <sys/auxv.h>
#include <sys/mman.h>
#include <sys/prctl.h>
@@ -49,6 +50,36 @@
return supported;
}
+inline void* get_tagged_address(const void* ptr) {
+#if defined(__aarch64__)
+ if (mte_supported()) {
+ __asm__ __volatile__(".arch_extension mte; ldg %0, [%0]" : "+r"(ptr));
+ }
+#endif // aarch64
+ return const_cast<void*>(ptr);
+}
+
+// Inserts a random tag tag to `ptr`, using any of the set lower 16 bits in
+// `mask` to exclude the corresponding tag from being generated. Note: This does
+// not tag memory. This generates a pointer to be used with set_memory_tag.
+inline void* insert_random_tag(const void* ptr, __attribute__((unused)) uint64_t mask = 0) {
+#if defined(__aarch64__)
+ if (mte_supported() && ptr) {
+ __asm__ __volatile__(".arch_extension mte; irg %0, %0, %1" : "+r"(ptr) : "r"(mask));
+ }
+#endif // aarch64
+ return const_cast<void*>(ptr);
+}
+
+// Stores the address tag in `ptr` to memory, at `ptr`.
+inline void set_memory_tag(__attribute__((unused)) void* ptr) {
+#if defined(__aarch64__)
+ if (mte_supported()) {
+ __asm__ __volatile__(".arch_extension mte; stg %0, [%0]" : "+r"(ptr));
+ }
+#endif // aarch64
+}
+
#ifdef __aarch64__
class ScopedDisableMTE {
size_t prev_tco_;
diff --git a/libc/private/bionic_globals.h b/libc/private/bionic_globals.h
index a1bebda..02713fc 100644
--- a/libc/private/bionic_globals.h
+++ b/libc/private/bionic_globals.h
@@ -157,6 +157,7 @@
};
__LIBC_HIDDEN__ libc_shared_globals* __libc_shared_globals();
+__LIBC_HIDDEN__ bool __libc_mte_enabled();
__LIBC_HIDDEN__ void __libc_init_fdsan();
__LIBC_HIDDEN__ void __libc_init_fdtrack();
__LIBC_HIDDEN__ void __libc_init_profiling_handlers();