bionic: Allocate a shadow call stack for each thread.
Bug: 112907825
Change-Id: I7c1479a0cd68696739bf6aa5e0700ba4f2a137ec
Merged-In: I7c1479a0cd68696739bf6aa5e0700ba4f2a137ec
diff --git a/libc/bionic/__libc_init_main_thread.cpp b/libc/bionic/__libc_init_main_thread.cpp
index 758b295..be9d32e 100644
--- a/libc/bionic/__libc_init_main_thread.cpp
+++ b/libc/bionic/__libc_init_main_thread.cpp
@@ -101,5 +101,5 @@
__init_thread(&main_thread);
- __init_alternate_signal_stack(&main_thread);
+ __init_additional_stacks(&main_thread);
}
diff --git a/libc/bionic/pthread_create.cpp b/libc/bionic/pthread_create.cpp
index 543fdc5..3ba787b 100644
--- a/libc/bionic/pthread_create.cpp
+++ b/libc/bionic/pthread_create.cpp
@@ -32,6 +32,7 @@
#include <string.h>
#include <sys/mman.h>
#include <sys/prctl.h>
+#include <sys/random.h>
#include <unistd.h>
#include "pthread_internal.h"
@@ -86,7 +87,7 @@
thread->tls[TLS_SLOT_STACK_GUARD] = reinterpret_cast<void*>(__stack_chk_guard);
}
-void __init_alternate_signal_stack(pthread_internal_t* thread) {
+static void __init_alternate_signal_stack(pthread_internal_t* thread) {
// Create and set an alternate signal stack.
void* stack_base = mmap(nullptr, SIGNAL_STACK_SIZE, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
if (stack_base != MAP_FAILED) {
@@ -109,6 +110,32 @@
}
}
+static void __init_shadow_call_stack(pthread_internal_t* thread) {
+ (void)thread;
+#ifdef __aarch64__
+ char* scs_guard_region = reinterpret_cast<char*>(
+ mmap(nullptr, SCS_GUARD_REGION_SIZE, 0, MAP_PRIVATE | MAP_ANON, -1, 0));
+ thread->shadow_call_stack_guard_region = scs_guard_region;
+
+ // We need to page align scs_offset and ensure that [scs_offset,scs_offset+SCS_SIZE) is in the
+ // guard region. We can't use arc4random_uniform in init because /dev/urandom might not have
+ // been created yet.
+ size_t scs_offset =
+ (getpid() == 1) ? 0 : (arc4random_uniform(SCS_GUARD_REGION_SIZE / SCS_SIZE) * SCS_SIZE);
+
+ // Allocate the stack and store its address in register x18. This is deliberately the only place
+ // where the address is stored.
+ __asm__ __volatile__(
+ "mov x18, %0" ::"r"(mmap(scs_guard_region + scs_offset, SCS_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0)));
+#endif
+}
+
+void __init_additional_stacks(pthread_internal_t* thread) {
+ __init_alternate_signal_stack(thread);
+ __init_shadow_call_stack(thread);
+}
+
int __init_thread(pthread_internal_t* thread) {
thread->cleanup_stack = nullptr;
@@ -252,7 +279,7 @@
// accesses previously made by the creating thread are visible to us.
thread->startup_handshake_lock.lock();
- __init_alternate_signal_stack(thread);
+ __init_additional_stacks(thread);
void* result = thread->start_routine(thread->start_routine_arg);
pthread_exit(result);
diff --git a/libc/bionic/pthread_exit.cpp b/libc/bionic/pthread_exit.cpp
index 220f7a0..010cc06 100644
--- a/libc/bionic/pthread_exit.cpp
+++ b/libc/bionic/pthread_exit.cpp
@@ -103,6 +103,11 @@
thread->alternate_signal_stack = nullptr;
}
+#ifdef __aarch64__
+ // Free the shadow call stack and guard pages.
+ munmap(thread->shadow_call_stack_guard_region, SCS_GUARD_REGION_SIZE);
+#endif
+
ThreadJoinState old_state = THREAD_NOT_JOINED;
while (old_state == THREAD_NOT_JOINED &&
!atomic_compare_exchange_weak(&thread->join_state, &old_state, THREAD_EXITED_NOT_JOINED)) {
diff --git a/libc/bionic/pthread_internal.h b/libc/bionic/pthread_internal.h
index 65ec5ff..b7173a3 100644
--- a/libc/bionic/pthread_internal.h
+++ b/libc/bionic/pthread_internal.h
@@ -106,6 +106,29 @@
void* alternate_signal_stack;
+ // The start address of the shadow call stack's guard region (arm64 only).
+ // This address is only used to deallocate the shadow call stack on thread
+ // exit; the address of the stack itself is stored only in the x18 register.
+ // Because the protection offered by SCS relies on the secrecy of the stack
+ // address, storing the address here weakens the protection, but only
+ // slightly, because it is relatively easy for an attacker to discover the
+ // address of the guard region anyway (e.g. it can be discovered by reference
+ // to other allocations), but not the stack itself, which is <0.1% of the size
+ // of the guard region.
+ //
+ // There are at least two other options for discovering the start address of
+ // the guard region on thread exit, but they are not as simple as storing in
+ // TLS.
+ // 1) Derive it from the value of the x18 register. This is only possible in
+ // processes that do not contain legacy code that might clobber x18,
+ // therefore each process must declare early during process startup whether
+ // it might load legacy code.
+ // 2) Mark the guard region as such using prctl(PR_SET_VMA_ANON_NAME) and
+ // discover its address by reading /proc/self/maps. One issue with this is
+ // that reading /proc/self/maps can race with allocations, so we may need
+ // code to handle retries.
+ void* shadow_call_stack_guard_region;
+
Lock startup_handshake_lock;
size_t mmap_size;
@@ -129,7 +152,7 @@
__LIBC_HIDDEN__ int __init_thread(pthread_internal_t* thread);
__LIBC_HIDDEN__ bool __init_tls(pthread_internal_t* thread);
__LIBC_HIDDEN__ void __init_thread_stack_guard(pthread_internal_t* thread);
-__LIBC_HIDDEN__ void __init_alternate_signal_stack(pthread_internal_t*);
+__LIBC_HIDDEN__ void __init_additional_stacks(pthread_internal_t*);
__LIBC_HIDDEN__ pthread_t __pthread_internal_add(pthread_internal_t* thread);
__LIBC_HIDDEN__ pthread_internal_t* __pthread_internal_find(pthread_t pthread_id);
@@ -178,6 +201,13 @@
// Leave room for a guard page in the internally created signal stacks.
#define SIGNAL_STACK_SIZE (SIGNAL_STACK_SIZE_WITHOUT_GUARD + PTHREAD_GUARD_SIZE)
+// Size of the shadow call stack.
+#define SCS_SIZE (8 * 1024)
+
+// The shadow call stack is allocated at a random address within a guard region
+// of this size.
+#define SCS_GUARD_REGION_SIZE (16 * 1024 * 1024)
+
// Needed by fork.
__LIBC_HIDDEN__ extern void __bionic_atfork_run_prepare();
__LIBC_HIDDEN__ extern void __bionic_atfork_run_child();