Allocate a small guard region around the shadow call stack.

This lets us do two things:

1) Make setjmp and longjmp compatible with shadow call stack.
   To avoid leaking the shadow call stack address into memory, only the
   lower log2(SCS_SIZE) bits of x18 are stored to jmp_buf. This requires
   allocating an additional guard page so that we're guaranteed to be
   able to allocate a sufficiently aligned SCS.

2) SCS overflow detection. Overflows now result in a SIGSEGV instead
   of corrupting the allocation that comes after it.

Change-Id: I04d6634f96162bf625684672a87fba8b402b7fd1
Test: bionic-unit-tests
diff --git a/libc/arch-arm64/bionic/setjmp.S b/libc/arch-arm64/bionic/setjmp.S
index 5e62c28..a2b2370 100644
--- a/libc/arch-arm64/bionic/setjmp.S
+++ b/libc/arch-arm64/bionic/setjmp.S
@@ -27,6 +27,7 @@
  */
 
 #include <private/bionic_asm.h>
+#include <private/bionic_constants.h>
 
 // According to AARCH64 PCS document we need to save the following
 // registers:
@@ -44,10 +45,12 @@
 // word   name            description
 // 0      sigflag/cookie  setjmp cookie in top 31 bits, signal mask flag in low bit
 // 1      sigmask         signal mask (not used with _setjmp / _longjmp)
-// 2      core_base       base of core registers (x19-x30, sp)
-// 15     float_base      base of float registers (d8-d15)
-// 23     checksum        checksum of core registers
-// 24     reserved        reserved entries (room to grow)
+// 2      core_base       base of core registers (x18-x30, sp)
+//                        (We only store the low bits of x18 to avoid leaking the
+//                        shadow call stack address into memory.)
+// 16     float_base      base of float registers (d8-d15)
+// 24     checksum        checksum of core registers
+// 25     reserved        reserved entries (room to grow)
 // 32
 
 #define _JB_SIGFLAG     0
@@ -58,18 +61,20 @@
 #define _JB_X24_X25     (_JB_X26_X27 + 2)
 #define _JB_X22_X23     (_JB_X24_X25 + 2)
 #define _JB_X20_X21     (_JB_X22_X23 + 2)
-#define _JB_X19         (_JB_X20_X21 + 2)
-#define _JB_D14_D15     (_JB_X19 + 1)
+#define _JB_SCS_X19     (_JB_X20_X21 + 2)
+#define _JB_D14_D15     (_JB_SCS_X19 + 2)
 #define _JB_D12_D13     (_JB_D14_D15 + 2)
 #define _JB_D10_D11     (_JB_D12_D13 + 2)
 #define _JB_D8_D9       (_JB_D10_D11 + 2)
 #define _JB_CHECKSUM    (_JB_D8_D9 + 2)
 
+#define SCS_MASK (SCS_SIZE - 1)
 #define MANGLE_REGISTERS 1
 #define USE_CHECKSUM 1
 
 .macro m_mangle_registers reg, sp_reg
 #if MANGLE_REGISTERS
+  eor x3, x3, \reg
   eor x19, x19, \reg
   eor x20, x20, \reg
   eor x21, x21, \reg
@@ -88,7 +93,7 @@
 
 .macro m_calculate_checksum dst, src, scratch
   mov \dst, #0
-  .irp i,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22
+  .irp i,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23
     ldr \scratch, [\src, #(\i * 8)]
     eor \dst, \dst, \scratch
   .endr
@@ -151,6 +156,9 @@
   // Mask off the signal flag bit.
   bic x1, x1, #1
 
+  // Mask off the high bits of the shadow call stack pointer.
+  and x3, x18, #SCS_MASK
+
   // Save core registers.
   mov x10, sp
   m_mangle_registers x1, sp_reg=x10
@@ -160,7 +168,7 @@
   stp x24, x25, [x0, #(_JB_X24_X25 * 8)]
   stp x22, x23, [x0, #(_JB_X22_X23 * 8)]
   stp x20, x21, [x0, #(_JB_X20_X21 * 8)]
-  str x19,      [x0, #(_JB_X19     * 8)]
+  stp x3,  x19, [x0, #(_JB_SCS_X19 * 8)]
   m_unmangle_registers x1, sp_reg=x10
 
   // Save floating point registers.
@@ -248,10 +256,14 @@
   ldp x24, x25, [x0, #(_JB_X24_X25 * 8)]
   ldp x22, x23, [x0, #(_JB_X22_X23 * 8)]
   ldp x20, x21, [x0, #(_JB_X20_X21 * 8)]
-  ldr x19,      [x0, #(_JB_X19     * 8)]
+  ldp x3,  x19, [x0, #(_JB_SCS_X19 * 8)]
   m_unmangle_registers x2, sp_reg=x10
   mov sp, x10
 
+  // Restore the low bits of the shadow call stack pointer.
+  and x18, x18, #~SCS_MASK
+  orr x18, x3, x18
+
   stp x0, x1, [sp, #-16]!
   .cfi_adjust_cfa_offset 16
   .cfi_rel_offset x0, 0
diff --git a/libc/bionic/pthread_create.cpp b/libc/bionic/pthread_create.cpp
index 720a3ae..6f632e8 100644
--- a/libc/bionic/pthread_create.cpp
+++ b/libc/bionic/pthread_create.cpp
@@ -39,6 +39,7 @@
 
 #include <async_safe/log.h>
 
+#include "private/bionic_constants.h"
 #include "private/bionic_defs.h"
 #include "private/bionic_macros.h"
 #include "private/bionic_ssp.h"
@@ -112,14 +113,19 @@
 
 static void __init_shadow_call_stack(pthread_internal_t* thread __unused) {
 #ifdef __aarch64__
-  // Allocate the stack and store its address in register x18.
-  // TODO(pcc): We ought to allocate a guard region here and then allocate the SCS at a random
-  // location within it. This will provide greater security since it would mean that an attacker who
-  // can read the pthread_internal_t won't be able to discover the address of the SCS. However,
-  // doing so is blocked on a solution to b/118642754.
-  char* scs = reinterpret_cast<char*>(
-      mmap(nullptr, SCS_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0));
-  thread->shadow_call_stack_guard_region = scs;
+  // Allocate the stack and store its address in register x18. The address is aligned to SCS_SIZE so
+  // that we only need to store the lower log2(SCS_SIZE) bits in jmp_buf.
+  // TODO(pcc): We ought to allocate a larger guard region here and then allocate the SCS at a
+  // random location within it. This will provide greater security since it would mean that an
+  // attacker who can read the pthread_internal_t won't be able to discover the address of the SCS.
+  // However, doing so is blocked on a solution to b/118642754.
+  char* scs_guard_region = reinterpret_cast<char*>(
+      mmap(nullptr, SCS_GUARD_REGION_SIZE, 0, MAP_PRIVATE | MAP_ANON, -1, 0));
+  thread->shadow_call_stack_guard_region = scs_guard_region;
+
+  char* scs =
+      reinterpret_cast<char*>(align_up(reinterpret_cast<uintptr_t>(scs_guard_region), SCS_SIZE));
+  mprotect(scs, SCS_SIZE, PROT_READ | PROT_WRITE);
   __asm__ __volatile__("mov x18, %0" ::"r"(scs));
 #endif
 }
diff --git a/libc/bionic/pthread_exit.cpp b/libc/bionic/pthread_exit.cpp
index a884be5..2d4d6cf 100644
--- a/libc/bionic/pthread_exit.cpp
+++ b/libc/bionic/pthread_exit.cpp
@@ -33,6 +33,7 @@
 #include <string.h>
 #include <sys/mman.h>
 
+#include "private/bionic_constants.h"
 #include "private/bionic_defs.h"
 #include "private/ScopedSignalBlocker.h"
 #include "pthread_internal.h"
@@ -105,7 +106,7 @@
 
 #ifdef __aarch64__
   // Free the shadow call stack and guard pages.
-  munmap(thread->shadow_call_stack_guard_region, SCS_SIZE);
+  munmap(thread->shadow_call_stack_guard_region, SCS_GUARD_REGION_SIZE);
 #endif
 
   ThreadJoinState old_state = THREAD_NOT_JOINED;
diff --git a/libc/bionic/pthread_internal.h b/libc/bionic/pthread_internal.h
index 3d58121..81b885a 100644
--- a/libc/bionic/pthread_internal.h
+++ b/libc/bionic/pthread_internal.h
@@ -209,9 +209,6 @@
 // Leave room for a guard page in the internally created signal stacks.
 #define SIGNAL_STACK_SIZE (SIGNAL_STACK_SIZE_WITHOUT_GUARD + PTHREAD_GUARD_SIZE)
 
-// Size of the shadow call stack.
-#define SCS_SIZE (8 * 1024)
-
 // Needed by fork.
 __LIBC_HIDDEN__ extern void __bionic_atfork_run_prepare();
 __LIBC_HIDDEN__ extern void __bionic_atfork_run_child();
diff --git a/libc/private/bionic_constants.h b/libc/private/bionic_constants.h
index 9ae1c8d..e64c826 100644
--- a/libc/private/bionic_constants.h
+++ b/libc/private/bionic_constants.h
@@ -19,4 +19,14 @@
 
 #define NS_PER_S 1000000000
 
+// Size of the shadow call stack. This must be a power of 2.
+#define SCS_SIZE (8 * 1024)
+
+// The shadow call stack is allocated at an aligned address within a guard region of this size. The
+// guard region must be large enough that we can allocate an SCS_SIZE-aligned SCS while ensuring
+// that there is at least one guard page after the SCS so that a stack overflow results in a SIGSEGV
+// instead of corrupting the allocation that comes after it.
+// TODO(b/118642754): Use a larger guard region.
+#define SCS_GUARD_REGION_SIZE (SCS_SIZE * 2)
+
 #endif // _BIONIC_CONSTANTS_H_
diff --git a/tests/Android.bp b/tests/Android.bp
index 6a644b6..899fc66 100644
--- a/tests/Android.bp
+++ b/tests/Android.bp
@@ -121,6 +121,7 @@
         "regex_test.cpp",
         "resolv_test.cpp",
         "sched_test.cpp",
+        "scs_test.cpp",
         "scsi_sg_test.cpp",
         "search_test.cpp",
         "semaphore_test.cpp",
diff --git a/tests/scs_test.cpp b/tests/scs_test.cpp
new file mode 100644
index 0000000..24cb347
--- /dev/null
+++ b/tests/scs_test.cpp
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#if __has_feature(shadow_call_stack)
+
+#include <gtest/gtest.h>
+
+#include "private/bionic_constants.h"
+
+int recurse2(int count);
+
+__attribute__((weak, noinline)) int recurse1(int count) {
+  if (count != 0) return recurse2(count - 1) + 1;
+  return 0;
+}
+
+__attribute__((weak, noinline)) int recurse2(int count) {
+  if (count != 0) return recurse1(count - 1) + 1;
+  return 0;
+}
+
+TEST(scs_test, stack_overflow) {
+  ASSERT_EXIT(recurse1(SCS_SIZE), testing::KilledBySignal(SIGSEGV), "");
+}
+
+#endif
diff --git a/tests/setjmp_test.cpp b/tests/setjmp_test.cpp
index dde0be1..44d8af1 100644
--- a/tests/setjmp_test.cpp
+++ b/tests/setjmp_test.cpp
@@ -264,3 +264,14 @@
     fprintf(stderr, "setjmp_cookie_checksum: longjmp succeeded?");
   }
 }
+
+__attribute__((noinline)) void call_longjmp(jmp_buf buf) {
+  longjmp(buf, 123);
+}
+
+TEST(setjmp, setjmp_stack) {
+  jmp_buf buf;
+  int value = setjmp(buf);
+  if (value == 0) call_longjmp(buf);
+  EXPECT_EQ(123, value);
+}