bionic: Allocate a shadow call stack for each thread.

Instead of allocating the stack within a 16MB guard region as we
were doing before, just allocate the stack on its own. This isn't
as secure as with the guard region (since it means that an attacker
who can read the pthread_internal_t can determine the address of the
SCS), but it will at least allow us to discover more blockers until
a solution to b/118642754 is decided on.

Bug: 112907825
Bug: 118642754
Change-Id: Ibe5dffbad1b4700eaa0e24177eea792e7c329a61
diff --git a/libc/bionic/__libc_init_main_thread.cpp b/libc/bionic/__libc_init_main_thread.cpp
index 758b295..be9d32e 100644
--- a/libc/bionic/__libc_init_main_thread.cpp
+++ b/libc/bionic/__libc_init_main_thread.cpp
@@ -101,5 +101,5 @@
 
   __init_thread(&main_thread);
 
-  __init_alternate_signal_stack(&main_thread);
+  __init_additional_stacks(&main_thread);
 }
diff --git a/libc/bionic/pthread_create.cpp b/libc/bionic/pthread_create.cpp
index 543fdc5..720a3ae 100644
--- a/libc/bionic/pthread_create.cpp
+++ b/libc/bionic/pthread_create.cpp
@@ -32,6 +32,7 @@
 #include <string.h>
 #include <sys/mman.h>
 #include <sys/prctl.h>
+#include <sys/random.h>
 #include <unistd.h>
 
 #include "pthread_internal.h"
@@ -86,7 +87,7 @@
   thread->tls[TLS_SLOT_STACK_GUARD] = reinterpret_cast<void*>(__stack_chk_guard);
 }
 
-void __init_alternate_signal_stack(pthread_internal_t* thread) {
+static void __init_alternate_signal_stack(pthread_internal_t* thread) {
   // Create and set an alternate signal stack.
   void* stack_base = mmap(nullptr, SIGNAL_STACK_SIZE, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
   if (stack_base != MAP_FAILED) {
@@ -109,6 +110,25 @@
   }
 }
 
+static void __init_shadow_call_stack(pthread_internal_t* thread __unused) {
+#ifdef __aarch64__
+  // Allocate the stack and store its address in register x18.
+  // TODO(pcc): We ought to allocate a guard region here and then allocate the SCS at a random
+  // location within it. This will provide greater security since it would mean that an attacker who
+  // can read the pthread_internal_t won't be able to discover the address of the SCS. However,
+  // doing so is blocked on a solution to b/118642754.
+  char* scs = reinterpret_cast<char*>(
+      mmap(nullptr, SCS_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0));
+  thread->shadow_call_stack_guard_region = scs;
+  __asm__ __volatile__("mov x18, %0" ::"r"(scs));
+#endif
+}
+
+void __init_additional_stacks(pthread_internal_t* thread) {
+  __init_alternate_signal_stack(thread);
+  __init_shadow_call_stack(thread);
+}
+
 int __init_thread(pthread_internal_t* thread) {
   thread->cleanup_stack = nullptr;
 
@@ -252,7 +272,7 @@
   // accesses previously made by the creating thread are visible to us.
   thread->startup_handshake_lock.lock();
 
-  __init_alternate_signal_stack(thread);
+  __init_additional_stacks(thread);
 
   void* result = thread->start_routine(thread->start_routine_arg);
   pthread_exit(result);
diff --git a/libc/bionic/pthread_exit.cpp b/libc/bionic/pthread_exit.cpp
index 220f7a0..a884be5 100644
--- a/libc/bionic/pthread_exit.cpp
+++ b/libc/bionic/pthread_exit.cpp
@@ -103,6 +103,11 @@
     thread->alternate_signal_stack = nullptr;
   }
 
+#ifdef __aarch64__
+  // Free the shadow call stack and guard pages.
+  munmap(thread->shadow_call_stack_guard_region, SCS_SIZE);
+#endif
+
   ThreadJoinState old_state = THREAD_NOT_JOINED;
   while (old_state == THREAD_NOT_JOINED &&
          !atomic_compare_exchange_weak(&thread->join_state, &old_state, THREAD_EXITED_NOT_JOINED)) {
diff --git a/libc/bionic/pthread_internal.h b/libc/bionic/pthread_internal.h
index bb33054..3d58121 100644
--- a/libc/bionic/pthread_internal.h
+++ b/libc/bionic/pthread_internal.h
@@ -155,7 +155,7 @@
 __LIBC_HIDDEN__ int __init_thread(pthread_internal_t* thread);
 __LIBC_HIDDEN__ bool __init_tls(pthread_internal_t* thread);
 __LIBC_HIDDEN__ void __init_thread_stack_guard(pthread_internal_t* thread);
-__LIBC_HIDDEN__ void __init_alternate_signal_stack(pthread_internal_t*);
+__LIBC_HIDDEN__ void __init_additional_stacks(pthread_internal_t*);
 
 __LIBC_HIDDEN__ pthread_t           __pthread_internal_add(pthread_internal_t* thread);
 __LIBC_HIDDEN__ pthread_internal_t* __pthread_internal_find(pthread_t pthread_id);
@@ -209,6 +209,9 @@
 // Leave room for a guard page in the internally created signal stacks.
 #define SIGNAL_STACK_SIZE (SIGNAL_STACK_SIZE_WITHOUT_GUARD + PTHREAD_GUARD_SIZE)
 
+// Size of the shadow call stack.
+#define SCS_SIZE (8 * 1024)
+
 // Needed by fork.
 __LIBC_HIDDEN__ extern void __bionic_atfork_run_prepare();
 __LIBC_HIDDEN__ extern void __bionic_atfork_run_child();