diff --git a/libc/bionic/pthread_attr.cpp b/libc/bionic/pthread_attr.cpp
index 914dd36..fc9e74a 100644
--- a/libc/bionic/pthread_attr.cpp
+++ b/libc/bionic/pthread_attr.cpp
@@ -43,7 +43,7 @@
   attr->flags = 0;
   attr->stack_base = NULL;
   attr->stack_size = PTHREAD_STACK_SIZE_DEFAULT;
-  attr->guard_size = PAGE_SIZE;
+  attr->guard_size = PTHREAD_GUARD_SIZE;
   attr->sched_policy = SCHED_NORMAL;
   attr->sched_priority = 0;
   return 0;
diff --git a/libc/bionic/pthread_create.cpp b/libc/bionic/pthread_create.cpp
index 5010a64..09ae16c 100644
--- a/libc/bionic/pthread_create.cpp
+++ b/libc/bionic/pthread_create.cpp
@@ -55,15 +55,16 @@
   thread->tls[TLS_SLOT_SELF] = thread->tls;
   thread->tls[TLS_SLOT_THREAD_ID] = thread;
 
-  // Add a guard page before and after.
-  size_t allocation_size = BIONIC_TLS_SIZE + 2 * PAGE_SIZE;
+  // Add a guard before and after.
+  size_t allocation_size = BIONIC_TLS_SIZE + (2 * PTHREAD_GUARD_SIZE);
   void* allocation = mmap(nullptr, allocation_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
   if (allocation == MAP_FAILED) {
     async_safe_fatal("failed to allocate TLS: %s", strerror(errno));
   }
-  prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, allocation, allocation_size, "bionic TLS guard page");
+  prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, allocation, allocation_size, "bionic TLS guard");
 
-  thread->bionic_tls = reinterpret_cast<bionic_tls*>(static_cast<char*>(allocation) + PAGE_SIZE);
+  thread->bionic_tls = reinterpret_cast<bionic_tls*>(static_cast<char*>(allocation) +
+                                                     PTHREAD_GUARD_SIZE);
   if (mprotect(thread->bionic_tls, BIONIC_TLS_SIZE, PROT_READ | PROT_WRITE) != 0) {
     async_safe_fatal("failed to mprotect TLS: %s", strerror(errno));
   }
@@ -79,15 +80,14 @@
   // Create and set an alternate signal stack.
   void* stack_base = mmap(NULL, SIGNAL_STACK_SIZE, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
   if (stack_base != MAP_FAILED) {
-
-    // Create a guard page to catch stack overflows in signal handlers.
-    if (mprotect(stack_base, PAGE_SIZE, PROT_NONE) == -1) {
+    // Create a guard to catch stack overflows in signal handlers.
+    if (mprotect(stack_base, PTHREAD_GUARD_SIZE, PROT_NONE) == -1) {
       munmap(stack_base, SIGNAL_STACK_SIZE);
       return;
     }
     stack_t ss;
-    ss.ss_sp = reinterpret_cast<uint8_t*>(stack_base) + PAGE_SIZE;
-    ss.ss_size = SIGNAL_STACK_SIZE - PAGE_SIZE;
+    ss.ss_sp = reinterpret_cast<uint8_t*>(stack_base) + PTHREAD_GUARD_SIZE;
+    ss.ss_size = SIGNAL_STACK_SIZE - PTHREAD_GUARD_SIZE;
     ss.ss_flags = 0;
     sigaltstack(&ss, NULL);
     thread->alternate_signal_stack = stack_base;
@@ -95,7 +95,7 @@
     // We can only use const static allocated string for mapped region name, as Android kernel
     // uses the string pointer directly when dumping /proc/pid/maps.
     prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, ss.ss_sp, ss.ss_size, "thread signal stack");
-    prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, stack_base, PAGE_SIZE, "thread signal stack guard page");
+    prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, stack_base, PTHREAD_GUARD_SIZE, "thread signal stack guard");
   }
 }
 
@@ -149,7 +149,7 @@
     munmap(space, mmap_size);
     return NULL;
   }
-  prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, space, stack_guard_size, "thread stack guard page");
+  prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, space, stack_guard_size, "thread stack guard");
 
   return space;
 }
@@ -161,7 +161,9 @@
   if (attr->stack_base == NULL) {
     // The caller didn't provide a stack, so allocate one.
     // Make sure the stack size and guard size are multiples of PAGE_SIZE.
-    mmap_size = BIONIC_ALIGN(attr->stack_size + sizeof(pthread_internal_t), PAGE_SIZE);
+    if (__builtin_add_overflow(attr->stack_size, attr->guard_size, &mmap_size)) return EAGAIN;
+    if (__builtin_add_overflow(mmap_size, sizeof(pthread_internal_t), &mmap_size)) return EAGAIN;
+    mmap_size = BIONIC_ALIGN(mmap_size, PAGE_SIZE);
     attr->guard_size = BIONIC_ALIGN(attr->guard_size, PAGE_SIZE);
     attr->stack_base = __create_thread_mapped_space(mmap_size, attr->guard_size);
     if (attr->stack_base == NULL) {
@@ -176,7 +178,7 @@
 
   // Mapped space(or user allocated stack) is used for:
   //   pthread_internal_t
-  //   thread stack (including guard page)
+  //   thread stack (including guard)
 
   // To safely access the pthread_internal_t and thread stack, we need to find a 16-byte aligned boundary.
   stack_top = reinterpret_cast<uint8_t*>(
diff --git a/libc/bionic/pthread_exit.cpp b/libc/bionic/pthread_exit.cpp
index 9adf405..ac29c1d 100644
--- a/libc/bionic/pthread_exit.cpp
+++ b/libc/bionic/pthread_exit.cpp
@@ -93,8 +93,8 @@
   }
 
   // Unmap the bionic TLS, including guard pages.
-  void* allocation = reinterpret_cast<char*>(thread->bionic_tls) - PAGE_SIZE;
-  munmap(allocation, BIONIC_TLS_SIZE + 2 * PAGE_SIZE);
+  void* allocation = reinterpret_cast<char*>(thread->bionic_tls) - PTHREAD_GUARD_SIZE;
+  munmap(allocation, BIONIC_TLS_SIZE + 2 * PTHREAD_GUARD_SIZE);
 
   ThreadJoinState old_state = THREAD_NOT_JOINED;
   while (old_state == THREAD_NOT_JOINED &&
diff --git a/libc/bionic/pthread_internal.cpp b/libc/bionic/pthread_internal.cpp
index abd403b..829194c 100644
--- a/libc/bionic/pthread_internal.cpp
+++ b/libc/bionic/pthread_internal.cpp
@@ -104,9 +104,13 @@
   // Check if we're looking for ourselves before acquiring the lock.
   if (thread == __get_thread()) return thread;
 
-  ScopedReadLock locker(&g_thread_list_lock);
-  for (pthread_internal_t* t = g_thread_list; t != nullptr; t = t->next) {
-    if (t == thread) return thread;
+  {
+    // Make sure to release the lock before the abort below. Otherwise,
+    // some apps might deadlock in their own crash handlers (see b/6565627).
+    ScopedReadLock locker(&g_thread_list_lock);
+    for (pthread_internal_t* t = g_thread_list; t != nullptr; t = t->next) {
+      if (t == thread) return thread;
+    }
   }
 
   // Historically we'd return null, but
diff --git a/libc/bionic/pthread_internal.h b/libc/bionic/pthread_internal.h
index 6faf5a4..77bdd85 100644
--- a/libc/bionic/pthread_internal.h
+++ b/libc/bionic/pthread_internal.h
@@ -141,25 +141,29 @@
 
 __LIBC_HIDDEN__ void pthread_key_clean_all(void);
 
-// SIGSTKSZ (8kB) is not big enough.
-// snprintf to a stack buffer of size PATH_MAX consumes ~7kB of stack.
-// Also, on 64-bit, logging uses more than 8kB by itself:
-// https://code.google.com/p/android/issues/detail?id=187064
-#define SIGNAL_STACK_SIZE_WITHOUT_GUARD_PAGE (16 * 1024)
+// Address space is precious on LP32, so use the minimum unit: one page.
+// On LP64, we could use more but there's no obvious advantage to doing
+// so, and the various media processes use RLIMIT_AS as a way to limit
+// the amount of allocation they'll do.
+#define PTHREAD_GUARD_SIZE PAGE_SIZE
 
-/*
- * Traditionally we gave threads a 1MiB stack. When we started
- * allocating per-thread alternate signal stacks to ease debugging of
- * stack overflows, we subtracted the same amount we were using there
- * from the default thread stack size. This should keep memory usage
- * roughly constant.
- */
-#define PTHREAD_STACK_SIZE_DEFAULT ((1 * 1024 * 1024) - SIGNAL_STACK_SIZE_WITHOUT_GUARD_PAGE)
+// SIGSTKSZ (8KiB) is not big enough.
+// An snprintf to a stack buffer of size PATH_MAX consumes ~7KiB of stack.
+// Also, on 64-bit, logging uses more than 8KiB by itself:
+// https://code.google.com/p/android/issues/detail?id=187064
+#define SIGNAL_STACK_SIZE_WITHOUT_GUARD (16 * 1024)
+
+// Traditionally we gave threads a 1MiB stack. When we started
+// allocating per-thread alternate signal stacks to ease debugging of
+// stack overflows, we subtracted the same amount we were using there
+// from the default thread stack size. This should keep memory usage
+// roughly constant.
+#define PTHREAD_STACK_SIZE_DEFAULT ((1 * 1024 * 1024) - SIGNAL_STACK_SIZE_WITHOUT_GUARD)
 
 // Leave room for a guard page in the internally created signal stacks.
-#define SIGNAL_STACK_SIZE (SIGNAL_STACK_SIZE_WITHOUT_GUARD_PAGE + PAGE_SIZE)
+#define SIGNAL_STACK_SIZE (SIGNAL_STACK_SIZE_WITHOUT_GUARD + PTHREAD_GUARD_SIZE)
 
-/* Needed by fork. */
+// Needed by fork.
 __LIBC_HIDDEN__ extern void __bionic_atfork_run_prepare();
 __LIBC_HIDDEN__ extern void __bionic_atfork_run_child();
 __LIBC_HIDDEN__ extern void __bionic_atfork_run_parent();
diff --git a/libc/include/math.h b/libc/include/math.h
index 3d0ec49..6f8b863 100644
--- a/libc/include/math.h
+++ b/libc/include/math.h
@@ -145,7 +145,7 @@
 
 int ilogb(double __x) __attribute_const__;
 int ilogbf(float __x) __attribute_const__;
-int ilogbl(long double __x) __attribute_const__ __RENAME_LDBL(ilogb, 3, 3);
+int ilogbl(long double __x) __RENAME_LDBL(ilogb, 3, 3) __attribute_const__;
 
 double ldexp(double __x, int __exponent);
 float ldexpf(float __x, int __exponent);
@@ -190,7 +190,7 @@
 
 double fabs(double __x) __attribute_const__;
 float fabsf(float __x) __attribute_const__;
-long double fabsl(long double __x) __attribute_const__ __RENAME_LDBL(fabs, 3, 3);
+long double fabsl(long double __x) __RENAME_LDBL(fabs, 3, 3) __attribute_const__;
 
 double hypot(double __x, double __y);
 float hypotf(float __x, float __y);
@@ -274,11 +274,11 @@
 
 double copysign(double __value, double __sign) __attribute_const__;
 float copysignf(float __value, float __sign) __attribute_const__;
-long double copysignl(long double __value, long double __sign) __attribute_const__ __RENAME_LDBL(copysign, 3, 3);
+long double copysignl(long double __value, long double __sign) __RENAME_LDBL(copysign, 3, 3) __attribute_const__;
 
 double nan(const char* __kind) __attribute_const__ __INTRODUCED_IN_ARM(13) __INTRODUCED_IN_MIPS(13) __INTRODUCED_IN_X86(9);
 float nanf(const char* __kind) __attribute_const__ __INTRODUCED_IN_ARM(13) __INTRODUCED_IN_MIPS(13) __INTRODUCED_IN_X86(9);
-long double nanl(const char* __kind) __attribute_const__ __RENAME_LDBL(nan, 13, 13);
+long double nanl(const char* __kind) __RENAME_LDBL(nan, 13, 13) __attribute_const__;
 
 double nextafter(double __x, double __y);
 float nextafterf(float __x, float __y);
@@ -294,11 +294,11 @@
 
 double fmax(double __x, double __y) __attribute_const__;
 float fmaxf(float __x, float __y) __attribute_const__;
-long double fmaxl(long double __x, long double __y) __attribute_const__ __RENAME_LDBL(fmax, 3, 3);
+long double fmaxl(long double __x, long double __y) __RENAME_LDBL(fmax, 3, 3) __attribute_const__;
 
 double fmin(double __x, double __y) __attribute_const__;
 float fminf(float __x, float __y) __attribute_const__;
-long double fminl(long double __x, long double __y) __attribute_const__ __RENAME_LDBL(fmin, 3, 3);
+long double fminl(long double __x, long double __y) __RENAME_LDBL(fmin, 3, 3) __attribute_const__;
 
 double fma(double __x, double __y, double __z);
 float fmaf(float __x, float __y, float __z);
diff --git a/tests/pthread_test.cpp b/tests/pthread_test.cpp
index d64bc48..712a6d7 100755
--- a/tests/pthread_test.cpp
+++ b/tests/pthread_test.cpp
@@ -660,30 +660,54 @@
   return result;
 }
 
-TEST(pthread, pthread_attr_setguardsize) {
+TEST(pthread, pthread_attr_setguardsize_tiny) {
   pthread_attr_t attributes;
   ASSERT_EQ(0, pthread_attr_init(&attributes));
 
-  // Get the default guard size.
-  size_t default_guard_size;
-  ASSERT_EQ(0, pthread_attr_getguardsize(&attributes, &default_guard_size));
-
   // No such thing as too small: will be rounded up to one page by pthread_create.
   ASSERT_EQ(0, pthread_attr_setguardsize(&attributes, 128));
   size_t guard_size;
   ASSERT_EQ(0, pthread_attr_getguardsize(&attributes, &guard_size));
   ASSERT_EQ(128U, guard_size);
   ASSERT_EQ(4096U, GetActualGuardSize(attributes));
+}
+
+TEST(pthread, pthread_attr_setguardsize_reasonable) {
+  pthread_attr_t attributes;
+  ASSERT_EQ(0, pthread_attr_init(&attributes));
 
   // Large enough and a multiple of the page size.
   ASSERT_EQ(0, pthread_attr_setguardsize(&attributes, 32*1024));
+  size_t guard_size;
   ASSERT_EQ(0, pthread_attr_getguardsize(&attributes, &guard_size));
   ASSERT_EQ(32*1024U, guard_size);
+  ASSERT_EQ(32*1024U, GetActualGuardSize(attributes));
+}
 
-  // Large enough but not a multiple of the page size; will be rounded up by pthread_create.
+TEST(pthread, pthread_attr_setguardsize_needs_rounding) {
+  pthread_attr_t attributes;
+  ASSERT_EQ(0, pthread_attr_init(&attributes));
+
+  // Large enough but not a multiple of the page size.
   ASSERT_EQ(0, pthread_attr_setguardsize(&attributes, 32*1024 + 1));
+  size_t guard_size;
   ASSERT_EQ(0, pthread_attr_getguardsize(&attributes, &guard_size));
   ASSERT_EQ(32*1024U + 1, guard_size);
+  ASSERT_EQ(36*1024U, GetActualGuardSize(attributes));
+}
+
+TEST(pthread, pthread_attr_setguardsize_enormous) {
+  pthread_attr_t attributes;
+  ASSERT_EQ(0, pthread_attr_init(&attributes));
+
+  // Larger than the stack itself. (Historically we mistakenly carved
+  // the guard out of the stack itself, rather than adding it after the
+  // end.)
+  ASSERT_EQ(0, pthread_attr_setguardsize(&attributes, 32*1024*1024));
+  size_t guard_size;
+  ASSERT_EQ(0, pthread_attr_getguardsize(&attributes, &guard_size));
+  ASSERT_EQ(32*1024*1024U, guard_size);
+  ASSERT_EQ(32*1024*1024U, GetActualGuardSize(attributes));
 }
 
 TEST(pthread, pthread_attr_setstacksize) {
