Reorganize static TLS memory for ELF TLS

For ELF TLS "local-exec" accesses, the static linker assumes that an
executable's TLS segment is located at a statically-known offset from the
thread pointer (i.e. "variant 1" for ARM and "variant 2" for x86).
Because these layouts are incompatible, Bionic generally needs to allocate
its TLS slots differently between different architectures.

To allow per-architecture TLS slots:
 - Replace the TLS_SLOT_xxx enumerators with macros. New ARM slots are
   generally negative, while new x86 slots are generally positive.
 - Define a bionic_tcb struct that provides two things:
    - a void* raw_slots_storage[BIONIC_TLS_SLOTS] field
    - an inline accessor function: void*& tls_slot(size_t tpindex);

For ELF TLS, it's necessary to allocate a temporary TCB (i.e. TLS slots),
because the runtime linker doesn't know how large the static TLS area is
until after it has loaded all of the initial solibs.

To accommodate Golang, it's necessary to allocate the pthread keys at a
fixed, small, positive offset from the thread pointer.

This CL moves the pthread keys into bionic_tls, then allocates a single
mapping per thread that looks like so:
 - stack guard
 - stack [omitted for main thread and with pthread_attr_setstack]
 - static TLS:
    - bionic_tcb [exec TLS will either precede or succeed the TCB]
    - bionic_tls [prefixed by the pthread keys]
    - [solib TLS segments will be placed here]
 - guard page

As before, if the new mapping includes a stack, the pthread_internal_t
is allocated on it.

At startup, Bionic allocates a temporary bionic_tcb object on the stack,
then allocates a temporary bionic_tls object using mmap. This mmap is
delayed because the linker can't currently call async_safe_fatal() before
relocating itself.

Later, Bionic allocates a stack-less thread mapping for the main thread,
and copies slots from the temporary TCB to the new TCB.
(See *::copy_from_bootstrap methods.)

Bug: http://b/78026329
Test: bionic unit tests
Test: verify that a Golang app still works
Test: verify that a Golang app crashes if bionic_{tls,tcb} are swapped
Merged-In: I6543063752f4ec8ef6dc9c7f2a06ce2a18fc5af3
Change-Id: I6543063752f4ec8ef6dc9c7f2a06ce2a18fc5af3
(cherry picked from commit 1e660b70da625fcbf1e43dfae09b7b4817fa1660)
diff --git a/libc/Android.bp b/libc/Android.bp
index e74060c..e0d0fee 100644
--- a/libc/Android.bp
+++ b/libc/Android.bp
@@ -1199,6 +1199,7 @@
 cc_library_static {
     defaults: ["libc_defaults"],
     srcs: [
+        "bionic/bionic_elf_tls.cpp",
         "bionic/pthread_atfork.cpp",
         "bionic/pthread_attr.cpp",
         "bionic/pthread_barrier.cpp",
diff --git a/libc/NOTICE b/libc/NOTICE
index 40a5704..120c4fd 100644
--- a/libc/NOTICE
+++ b/libc/NOTICE
@@ -1068,6 +1068,34 @@
 
 -------------------------------------------------------------------
 
+Copyright (C) 2019 The Android Open Source Project
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+ * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in
+   the documentation and/or other materials provided with the
+   distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGE.
+
+-------------------------------------------------------------------
+
 Copyright (c) 1980, 1983, 1988, 1993
    The Regents of the University of California.  All rights reserved.
 
diff --git a/libc/bionic/__libc_init_main_thread.cpp b/libc/bionic/__libc_init_main_thread.cpp
index 5abdc07..2b90c90 100644
--- a/libc/bionic/__libc_init_main_thread.cpp
+++ b/libc/bionic/__libc_init_main_thread.cpp
@@ -28,9 +28,12 @@
 
 #include "libc_init_common.h"
 
+#include <async_safe/log.h>
+
 #include "private/KernelArgumentBlock.h"
 #include "private/bionic_arc4random.h"
 #include "private/bionic_defs.h"
+#include "private/bionic_elf_tls.h"
 #include "private/bionic_globals.h"
 #include "private/bionic_ssp.h"
 #include "pthread_internal.h"
@@ -43,11 +46,6 @@
 
 static pthread_internal_t main_thread;
 
-__attribute__((no_sanitize("hwaddress")))
-pthread_internal_t* __get_main_thread() {
-  return &main_thread;
-}
-
 // Setup for the main thread. For dynamic executables, this is called by the
 // linker _before_ libc is mapped in memory. This means that all writes to
 // globals from this function will apply to linker-private copies and will not
@@ -69,35 +67,28 @@
 // linker, the linker binary hasn't been relocated yet, so certain kinds of code
 // are hazardous, such as accessing non-hidden global variables.
 __BIONIC_WEAK_FOR_NATIVE_BRIDGE
-void __libc_init_main_thread_early(KernelArgumentBlock& args) {
+extern "C" void __libc_init_main_thread_early(const KernelArgumentBlock& args,
+                                              bionic_tcb* temp_tcb) {
   __libc_shared_globals()->auxv = args.auxv;
 #if defined(__i386__)
-  __libc_init_sysinfo();
+  __libc_init_sysinfo(); // uses AT_SYSINFO auxv entry
 #endif
-  __set_tls(main_thread.tls);
-  __init_tls(&main_thread);
+  __init_tcb(temp_tcb, &main_thread);
+  __set_tls(&temp_tcb->tls_slot(0));
   main_thread.tid = __getpid();
   main_thread.set_cached_pid(main_thread.tid);
 }
 
 // Finish initializing the main thread.
 __BIONIC_WEAK_FOR_NATIVE_BRIDGE
-void __libc_init_main_thread_late() {
-  main_thread.bionic_tls = __allocate_bionic_tls();
-  if (main_thread.bionic_tls == nullptr) {
-    // Avoid strerror because it might need bionic_tls.
-    async_safe_fatal("failed to allocate bionic_tls: error %d", errno);
-  }
+extern "C" void __libc_init_main_thread_late() {
+  __init_bionic_tls_ptrs(__get_bionic_tcb(), __allocate_temp_bionic_tls());
 
   // Tell the kernel to clear our tid field when we exit, so we're like any other pthread.
+  // For threads created by pthread_create, this setup happens during the clone syscall (i.e.
+  // CLONE_CHILD_CLEARTID).
   __set_tid_address(&main_thread.tid);
 
-  // We don't want to free the main thread's stack even when the main thread exits
-  // because things like environment variables with global scope live on it.
-  // We also can't free the pthread_internal_t itself, since it is a static variable.
-  // The main thread has no mmap allocated space for stack or pthread_internal_t.
-  main_thread.mmap_size = 0;
-
   pthread_attr_init(&main_thread.attr);
   // We don't want to explicitly set the main thread's scheduler attributes (http://b/68328561).
   pthread_attr_setinheritsched(&main_thread.attr, PTHREAD_INHERIT_SCHED);
@@ -110,9 +101,40 @@
   // before we initialize the TLS. Dynamic executables will initialize their copy of the global
   // stack protector from the one in the main thread's TLS.
   __libc_safe_arc4random_buf(&__stack_chk_guard, sizeof(__stack_chk_guard));
-  __init_tls_stack_guard(&main_thread);
+  __init_tcb_stack_guard(__get_bionic_tcb());
 
   __init_thread(&main_thread);
 
   __init_additional_stacks(&main_thread);
 }
+
+// Once all ELF modules are loaded, allocate the final copy of the main thread's
+// static TLS memory.
+__BIONIC_WEAK_FOR_NATIVE_BRIDGE
+extern "C" void __libc_init_main_thread_final() {
+  bionic_tcb* temp_tcb = __get_bionic_tcb();
+  bionic_tls* temp_tls = &__get_bionic_tls();
+
+  // Allocate the main thread's static TLS. (This mapping doesn't include a
+  // stack.)
+  ThreadMapping mapping = __allocate_thread_mapping(0, PTHREAD_GUARD_SIZE);
+  if (mapping.mmap_base == nullptr) {
+    async_safe_fatal("failed to mmap main thread static TLS: %s", strerror(errno));
+  }
+
+  const StaticTlsLayout& layout = __libc_shared_globals()->static_tls_layout;
+  auto new_tcb = reinterpret_cast<bionic_tcb*>(mapping.static_tls + layout.offset_bionic_tcb());
+  auto new_tls = reinterpret_cast<bionic_tls*>(mapping.static_tls + layout.offset_bionic_tls());
+
+  new_tcb->copy_from_bootstrap(temp_tcb);
+  new_tls->copy_from_bootstrap(temp_tls);
+  __init_tcb(new_tcb, &main_thread);
+  __init_bionic_tls_ptrs(new_tcb, new_tls);
+
+  main_thread.mmap_base = mapping.mmap_base;
+  main_thread.mmap_size = mapping.mmap_size;
+
+  __set_tls(&new_tcb->tls_slot(0));
+
+  __free_temp_bionic_tls(temp_tls);
+}
diff --git a/libc/bionic/bionic_elf_tls.cpp b/libc/bionic/bionic_elf_tls.cpp
new file mode 100644
index 0000000..55c2c31
--- /dev/null
+++ b/libc/bionic/bionic_elf_tls.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "private/bionic_elf_tls.h"
+
+#include <sys/param.h>
+
+#include "private/bionic_macros.h"
+#include "private/bionic_tls.h"
+
+void StaticTlsLayout::reserve_tcb() {
+  offset_bionic_tcb_ = reserve_type<bionic_tcb>();
+}
+
+void StaticTlsLayout::reserve_bionic_tls() {
+  offset_bionic_tls_ = reserve_type<bionic_tls>();
+}
+
+void StaticTlsLayout::finish_layout() {
+  // Round the offset up to the alignment.
+  offset_ = round_up_with_overflow_check(offset_, alignment_);
+}
+
+// The size is not required to be a multiple of the alignment. The alignment
+// must be a positive power-of-two.
+size_t StaticTlsLayout::reserve(size_t size, size_t alignment) {
+  offset_ = round_up_with_overflow_check(offset_, alignment);
+  const size_t result = offset_;
+  if (__builtin_add_overflow(offset_, size, &offset_)) overflowed_ = true;
+  alignment_ = MAX(alignment_, alignment);
+  return result;
+}
+
+size_t StaticTlsLayout::round_up_with_overflow_check(size_t value, size_t alignment) {
+  const size_t old_value = value;
+  value = __BIONIC_ALIGN(value, alignment);
+  if (value < old_value) overflowed_ = true;
+  return value;
+}
diff --git a/libc/bionic/libc_init_static.cpp b/libc/bionic/libc_init_static.cpp
index b2f8bbf..68650ed 100644
--- a/libc/bionic/libc_init_static.cpp
+++ b/libc/bionic/libc_init_static.cpp
@@ -39,6 +39,7 @@
 #include "libc_init_common.h"
 #include "pthread_internal.h"
 
+#include "private/bionic_elf_tls.h"
 #include "private/bionic_globals.h"
 #include "private/bionic_macros.h"
 #include "private/bionic_page.h"
@@ -82,6 +83,13 @@
   }
 }
 
+static void layout_static_tls() {
+  StaticTlsLayout& layout = __libc_shared_globals()->static_tls_layout;
+  layout.reserve_bionic_tls();
+  layout.reserve_tcb();
+  layout.finish_layout();
+}
+
 // The program startup function __libc_init() defined here is
 // used for static executables only (i.e. those that don't depend
 // on shared libraries). It is called from arch-$ARCH/bionic/crtbegin_static.S
@@ -92,16 +100,19 @@
 __noreturn static void __real_libc_init(void *raw_args,
                                         void (*onexit)(void) __unused,
                                         int (*slingshot)(int, char**, char**),
-                                        structors_array_t const * const structors) {
+                                        structors_array_t const * const structors,
+                                        bionic_tcb* temp_tcb) {
   BIONIC_STOP_UNWIND;
 
   // Initialize TLS early so system calls and errno work.
   KernelArgumentBlock args(raw_args);
-  __libc_init_main_thread_early(args);
+  __libc_init_main_thread_early(args, temp_tcb);
   __libc_init_main_thread_late();
   __libc_init_globals();
   __libc_shared_globals()->init_progname = args.argv[0];
   __libc_init_AT_SECURE(args.envp);
+  layout_static_tls();
+  __libc_init_main_thread_final();
   __libc_init_common();
 
   apply_gnu_relro();
@@ -129,16 +140,16 @@
                             void (*onexit)(void) __unused,
                             int (*slingshot)(int, char**, char**),
                             structors_array_t const * const structors) {
+  bionic_tcb temp_tcb = {};
 #if __has_feature(hwaddress_sanitizer)
   // Install main thread TLS early. It will be initialized later in __libc_init_main_thread. For now
-  // all we need is access to TLS_SLOT_TSAN.
-  pthread_internal_t* main_thread = __get_main_thread();
-  __set_tls(main_thread->tls);
-  // Initialize HWASan. This sets up TLS_SLOT_TSAN, among other things.
+  // all we need is access to TLS_SLOT_SANITIZER.
+  __set_tls(&temp_tcb.tls_slot(0));
+  // Initialize HWASan. This sets up TLS_SLOT_SANITIZER, among other things.
   __hwasan_init();
   // We are ready to run HWASan-instrumented code, proceed with libc initialization...
 #endif
-  __real_libc_init(raw_args, onexit, slingshot, structors);
+  __real_libc_init(raw_args, onexit, slingshot, structors, &temp_tcb);
 }
 
 static int g_target_sdk_version{__ANDROID_API__};
diff --git a/libc/bionic/pthread_create.cpp b/libc/bionic/pthread_create.cpp
index 06d2ecb..8676a45 100644
--- a/libc/bionic/pthread_create.cpp
+++ b/libc/bionic/pthread_create.cpp
@@ -41,6 +41,7 @@
 
 #include "private/bionic_constants.h"
 #include "private/bionic_defs.h"
+#include "private/bionic_globals.h"
 #include "private/bionic_macros.h"
 #include "private/bionic_ssp.h"
 #include "private/bionic_tls.h"
@@ -54,39 +55,43 @@
 
 // This code is used both by each new pthread and the code that initializes the main thread.
 __attribute__((no_stack_protector))
-void __init_tls(pthread_internal_t* thread) {
-  // Slot 0 must point to itself. The x86 Linux kernel reads the TLS from %fs:0.
-  thread->tls[TLS_SLOT_SELF] = thread->tls;
-  thread->tls[TLS_SLOT_THREAD_ID] = thread;
+void __init_tcb(bionic_tcb* tcb, pthread_internal_t* thread) {
+#ifdef TLS_SLOT_SELF
+  // On x86, slot 0 must point to itself so code can read the thread pointer by
+  // loading %fs:0 or %gs:0.
+  tcb->tls_slot(TLS_SLOT_SELF) = &tcb->tls_slot(TLS_SLOT_SELF);
+#endif
+  tcb->tls_slot(TLS_SLOT_THREAD_ID) = thread;
 }
 
 __attribute__((no_stack_protector))
-void __init_tls_stack_guard(pthread_internal_t* thread) {
+void __init_tcb_stack_guard(bionic_tcb* tcb) {
   // GCC looks in the TLS for the stack guard on x86, so copy it there from our global.
-  thread->tls[TLS_SLOT_STACK_GUARD] = reinterpret_cast<void*>(__stack_chk_guard);
+  tcb->tls_slot(TLS_SLOT_STACK_GUARD) = reinterpret_cast<void*>(__stack_chk_guard);
 }
 
-bionic_tls* __allocate_bionic_tls() {
-  // Add a guard before and after.
-  size_t allocation_size = BIONIC_TLS_SIZE + (2 * PTHREAD_GUARD_SIZE);
-  void* allocation = mmap(nullptr, allocation_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+void __init_bionic_tls_ptrs(bionic_tcb* tcb, bionic_tls* tls) {
+  tcb->thread()->bionic_tls = tls;
+  tcb->tls_slot(TLS_SLOT_BIONIC_TLS) = tls;
+}
+
+// Allocate a temporary bionic_tls that the dynamic linker's main thread can
+// use while it's loading the initial set of ELF modules.
+bionic_tls* __allocate_temp_bionic_tls() {
+  size_t allocation_size = __BIONIC_ALIGN(sizeof(bionic_tls), PAGE_SIZE);
+  void* allocation = mmap(nullptr, allocation_size,
+                          PROT_READ | PROT_WRITE,
+                          MAP_PRIVATE | MAP_ANONYMOUS,
+                          -1, 0);
   if (allocation == MAP_FAILED) {
-    async_safe_format_log(ANDROID_LOG_WARN, "libc",
-                          "pthread_create failed: couldn't allocate TLS: %s", strerror(errno));
-    return nullptr;
+    // Avoid strerror because it might need bionic_tls.
+    async_safe_fatal("failed to allocate bionic_tls: error %d", errno);
   }
+  return static_cast<bionic_tls*>(allocation);
+}
 
-  // Carve out the writable TLS section.
-  bionic_tls* result = reinterpret_cast<bionic_tls*>(static_cast<char*>(allocation) +
-                                                     PTHREAD_GUARD_SIZE);
-  if (mprotect(result, BIONIC_TLS_SIZE, PROT_READ | PROT_WRITE) != 0) {
-    async_safe_format_log(ANDROID_LOG_WARN, "libc",
-                          "pthread_create failed: couldn't mprotect TLS: %s", strerror(errno));
-    munmap(allocation, allocation_size);
-    return nullptr;
-  }
-
-  return result;
+void __free_temp_bionic_tls(bionic_tls* tls) {
+  munmap(tls, __BIONIC_ALIGN(sizeof(bionic_tls), PAGE_SIZE));
 }
 
 static void __init_alternate_signal_stack(pthread_internal_t* thread) {
@@ -188,82 +193,112 @@
   return 0;
 }
 
-static void* __create_thread_mapped_space(size_t mmap_size, size_t stack_guard_size) {
-  // Create a new private anonymous map.
-  int prot = PROT_READ | PROT_WRITE;
-  int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
-  void* space = mmap(nullptr, mmap_size, prot, flags, -1, 0);
+
+// Allocate a thread's primary mapping. This mapping includes static TLS and
+// optionally a stack. Static TLS includes ELF TLS segments and the bionic_tls
+// struct.
+//
+// The stack_guard_size must be a multiple of the PAGE_SIZE.
+ThreadMapping __allocate_thread_mapping(size_t stack_size, size_t stack_guard_size) {
+  const StaticTlsLayout& layout = __libc_shared_globals()->static_tls_layout;
+
+  // Allocate in order: stack guard, stack, static TLS, guard page.
+  size_t mmap_size;
+  if (__builtin_add_overflow(stack_size, stack_guard_size, &mmap_size)) return {};
+  if (__builtin_add_overflow(mmap_size, layout.size(), &mmap_size)) return {};
+  if (__builtin_add_overflow(mmap_size, PTHREAD_GUARD_SIZE, &mmap_size)) return {};
+
+  // Align the result to a page size.
+  const size_t unaligned_size = mmap_size;
+  mmap_size = __BIONIC_ALIGN(mmap_size, PAGE_SIZE);
+  if (mmap_size < unaligned_size) return {};
+
+  // Create a new private anonymous map. Make the entire mapping PROT_NONE, then carve out a
+  // read+write area in the middle.
+  const int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
+  char* const space = static_cast<char*>(mmap(nullptr, mmap_size, PROT_NONE, flags, -1, 0));
   if (space == MAP_FAILED) {
     async_safe_format_log(ANDROID_LOG_WARN,
-                      "libc",
-                      "pthread_create failed: couldn't allocate %zu-bytes mapped space: %s",
-                      mmap_size, strerror(errno));
-    return nullptr;
+                          "libc",
+                          "pthread_create failed: couldn't allocate %zu-bytes mapped space: %s",
+                          mmap_size, strerror(errno));
+    return {};
+  }
+  const size_t writable_size = mmap_size - stack_guard_size - PTHREAD_GUARD_SIZE;
+  if (mprotect(space + stack_guard_size,
+               writable_size,
+               PROT_READ | PROT_WRITE) != 0) {
+    async_safe_format_log(ANDROID_LOG_WARN, "libc",
+                          "pthread_create failed: couldn't mprotect R+W %zu-byte thread mapping region: %s",
+                          writable_size, strerror(errno));
+    munmap(space, mmap_size);
+    return {};
   }
 
-  // Stack is at the lower end of mapped space, stack guard region is at the lower end of stack.
-  // Set the stack guard region to PROT_NONE, so we can detect thread stack overflow.
-  if (mprotect(space, stack_guard_size, PROT_NONE) == -1) {
-    async_safe_format_log(ANDROID_LOG_WARN, "libc",
-                          "pthread_create failed: couldn't mprotect PROT_NONE %zu-byte stack guard region: %s",
-                          stack_guard_size, strerror(errno));
-    munmap(space, mmap_size);
-    return nullptr;
-  }
-  return space;
+  ThreadMapping result = {};
+  result.mmap_base = space;
+  result.mmap_size = mmap_size;
+  result.static_tls = space + mmap_size - PTHREAD_GUARD_SIZE - layout.size();
+  result.stack_base = space;
+  result.stack_top = result.static_tls;
+  return result;
 }
 
-static int __allocate_thread(pthread_attr_t* attr, pthread_internal_t** threadp, void** child_stack) {
-  size_t mmap_size;
-  uint8_t* stack_top;
+static int __allocate_thread(pthread_attr_t* attr, bionic_tcb** tcbp, void** child_stack) {
+  ThreadMapping mapping;
+  char* stack_top;
+  bool stack_clean = false;
 
   if (attr->stack_base == nullptr) {
     // The caller didn't provide a stack, so allocate one.
-    // Make sure the stack size and guard size are multiples of PAGE_SIZE.
-    if (__builtin_add_overflow(attr->stack_size, attr->guard_size, &mmap_size)) return EAGAIN;
-    if (__builtin_add_overflow(mmap_size, sizeof(pthread_internal_t), &mmap_size)) return EAGAIN;
-    mmap_size = __BIONIC_ALIGN(mmap_size, PAGE_SIZE);
+
+    // Make sure the guard size is a multiple of PAGE_SIZE.
+    const size_t unaligned_guard_size = attr->guard_size;
     attr->guard_size = __BIONIC_ALIGN(attr->guard_size, PAGE_SIZE);
-    attr->stack_base = __create_thread_mapped_space(mmap_size, attr->guard_size);
-    if (attr->stack_base == nullptr) {
-      return EAGAIN;
-    }
-    stack_top = reinterpret_cast<uint8_t*>(attr->stack_base) + mmap_size;
+    if (attr->guard_size < unaligned_guard_size) return EAGAIN;
+
+    mapping = __allocate_thread_mapping(attr->stack_size, attr->guard_size);
+    if (mapping.mmap_base == nullptr) return EAGAIN;
+
+    stack_top = mapping.stack_top;
+    attr->stack_base = mapping.stack_base;
+    stack_clean = true;
   } else {
-    // Remember the mmap size is zero and we don't need to free it.
-    mmap_size = 0;
-    stack_top = reinterpret_cast<uint8_t*>(attr->stack_base) + attr->stack_size;
+    mapping = __allocate_thread_mapping(0, PTHREAD_GUARD_SIZE);
+    if (mapping.mmap_base == nullptr) return EAGAIN;
+
+    stack_top = static_cast<char*>(attr->stack_base) + attr->stack_size;
   }
 
-  // Mapped space(or user allocated stack) is used for:
-  //   pthread_internal_t
-  //   thread stack (including guard)
+  // Carve out space from the stack for the thread's pthread_internal_t. This
+  // memory isn't counted in pthread_attr_getstacksize.
 
   // To safely access the pthread_internal_t and thread stack, we need to find a 16-byte aligned boundary.
-  stack_top = reinterpret_cast<uint8_t*>(
-                (reinterpret_cast<uintptr_t>(stack_top) - sizeof(pthread_internal_t)) & ~0xf);
+  stack_top = align_down(stack_top - sizeof(pthread_internal_t), 16);
 
   pthread_internal_t* thread = reinterpret_cast<pthread_internal_t*>(stack_top);
-  if (mmap_size == 0) {
+  if (!stack_clean) {
     // If thread was not allocated by mmap(), it may not have been cleared to zero.
     // So assume the worst and zero it.
     memset(thread, 0, sizeof(pthread_internal_t));
   }
-  attr->stack_size = stack_top - reinterpret_cast<uint8_t*>(attr->stack_base);
 
-  thread->mmap_size = mmap_size;
+  // Locate static TLS structures within the mapped region.
+  const StaticTlsLayout& layout = __libc_shared_globals()->static_tls_layout;
+  auto tcb = reinterpret_cast<bionic_tcb*>(mapping.static_tls + layout.offset_bionic_tcb());
+  auto tls = reinterpret_cast<bionic_tls*>(mapping.static_tls + layout.offset_bionic_tls());
+
+  // (Re)initialize TLS pointers.
+  __init_tcb(tcb, thread);
+  __init_tcb_stack_guard(tcb);
+  __init_bionic_tls_ptrs(tcb, tls);
+
+  attr->stack_size = stack_top - static_cast<char*>(attr->stack_base);
   thread->attr = *attr;
+  thread->mmap_base = mapping.mmap_base;
+  thread->mmap_size = mapping.mmap_size;
 
-  thread->bionic_tls = __allocate_bionic_tls();
-  if (thread->bionic_tls == nullptr) {
-    if (thread->mmap_size != 0) munmap(thread->attr.stack_base, thread->mmap_size);
-    return EAGAIN;
-  }
-
-  __init_tls(thread);
-  __init_tls_stack_guard(thread);
-
-  *threadp = thread;
+  *tcbp = tcb;
   *child_stack = stack_top;
   return 0;
 }
@@ -309,13 +344,15 @@
     attr = nullptr; // Prevent misuse below.
   }
 
-  pthread_internal_t* thread = nullptr;
+  bionic_tcb* tcb = nullptr;
   void* child_stack = nullptr;
-  int result = __allocate_thread(&thread_attr, &thread, &child_stack);
+  int result = __allocate_thread(&thread_attr, &tcb, &child_stack);
   if (result != 0) {
     return result;
   }
 
+  pthread_internal_t* thread = tcb->thread();
+
   // Create a lock for the thread to wait on once it starts so we can keep
   // it from doing anything until after we notify the debugger about it
   //
@@ -332,7 +369,7 @@
 
   int flags = CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM |
       CLONE_SETTLS | CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID;
-  void* tls = reinterpret_cast<void*>(thread->tls);
+  void* tls = &tcb->tls_slot(0);
 #if defined(__i386__)
   // On x86 (but not x86-64), CLONE_SETTLS takes a pointer to a struct user_desc rather than
   // a pointer to the TLS itself.
@@ -348,7 +385,7 @@
     // reminder that you can't rewrite this function to use a ScopedPthreadMutexLocker.
     thread->startup_handshake_lock.unlock();
     if (thread->mmap_size != 0) {
-      munmap(thread->attr.stack_base, thread->mmap_size);
+      munmap(thread->mmap_base, thread->mmap_size);
     }
     async_safe_format_log(ANDROID_LOG_WARN, "libc", "pthread_create failed: clone failed: %s",
                           strerror(clone_errno));
diff --git a/libc/bionic/pthread_exit.cpp b/libc/bionic/pthread_exit.cpp
index 2d4d6cf..84ea2e6 100644
--- a/libc/bionic/pthread_exit.cpp
+++ b/libc/bionic/pthread_exit.cpp
@@ -65,12 +65,6 @@
   }
 }
 
-static void __pthread_unmap_tls(pthread_internal_t* thread) {
-  // Unmap the bionic TLS, including guard pages.
-  void* allocation = reinterpret_cast<char*>(thread->bionic_tls) - PTHREAD_GUARD_SIZE;
-  munmap(allocation, BIONIC_TLS_SIZE + 2 * PTHREAD_GUARD_SIZE);
-}
-
 __BIONIC_WEAK_FOR_NATIVE_BRIDGE
 void pthread_exit(void* return_value) {
   // Call dtors for thread_local objects first.
@@ -131,15 +125,13 @@
       // We don't want to take a signal after we've unmapped the stack.
       // That's one last thing we can do before dropping to assembler.
       ScopedSignalBlocker ssb;
-      __pthread_unmap_tls(thread);
       __hwasan_thread_exit();
-      _exit_with_stack_teardown(thread->attr.stack_base, thread->mmap_size);
+      _exit_with_stack_teardown(thread->mmap_base, thread->mmap_size);
     }
   }
 
   // No need to free mapped space. Either there was no space mapped, or it is left for
   // the pthread_join caller to clean up.
-  __pthread_unmap_tls(thread);
   __hwasan_thread_exit();
   __exit(0);
 }
diff --git a/libc/bionic/pthread_internal.cpp b/libc/bionic/pthread_internal.cpp
index c058384..870a526 100644
--- a/libc/bionic/pthread_internal.cpp
+++ b/libc/bionic/pthread_internal.cpp
@@ -88,7 +88,7 @@
 static void __pthread_internal_free(pthread_internal_t* thread) {
   if (thread->mmap_size != 0) {
     // Free mapped space, including thread stack and pthread_internal_t.
-    munmap(thread->attr.stack_base, thread->mmap_size);
+    munmap(thread->mmap_base, thread->mmap_size);
   }
 }
 
diff --git a/libc/bionic/pthread_internal.h b/libc/bionic/pthread_internal.h
index 4c13dcb..27ab3df 100644
--- a/libc/bionic/pthread_internal.h
+++ b/libc/bionic/pthread_internal.h
@@ -52,12 +52,6 @@
 #define PTHREAD_ATTR_FLAG_INHERIT 0x00000004
 #define PTHREAD_ATTR_FLAG_EXPLICIT 0x00000008
 
-class pthread_key_data_t {
- public:
-  uintptr_t seq; // Use uintptr_t just for alignment, as we use pointer below.
-  void* data;
-};
-
 enum ThreadJoinState {
   THREAD_NOT_JOINED,
   THREAD_EXITED_NOT_JOINED,
@@ -131,6 +125,7 @@
 
   Lock startup_handshake_lock;
 
+  void* mmap_base;
   size_t mmap_size;
 
   thread_local_dtor* thread_local_dtors;
@@ -146,42 +141,44 @@
   bionic_tls* bionic_tls;
 
   int errno_value;
-
-  // The thread pointer (__get_tls()) points at this field. This field must come last so that
-  // an executable's TLS segment can be allocated at a fixed offset after the thread pointer.
-  void* tls[BIONIC_TLS_SLOTS];
-
-  // The golang runtime currently expects this field to come after the slots.
-  pthread_key_data_t key_data[BIONIC_PTHREAD_KEY_COUNT];
 };
 
-__LIBC_HIDDEN__ void __init_tls(pthread_internal_t* thread);
-__LIBC_HIDDEN__ void __init_tls_stack_guard(pthread_internal_t* thread);
-__LIBC_HIDDEN__ bionic_tls* __allocate_bionic_tls();
+struct ThreadMapping {
+  char* mmap_base;
+  size_t mmap_size;
+
+  char* static_tls;
+  char* stack_base;
+  char* stack_top;
+};
+
+__LIBC_HIDDEN__ void __init_tcb(bionic_tcb* tcb, pthread_internal_t* thread);
+__LIBC_HIDDEN__ void __init_tcb_stack_guard(bionic_tcb* tcb);
+__LIBC_HIDDEN__ void __init_bionic_tls_ptrs(bionic_tcb* tcb, bionic_tls* tls);
+__LIBC_HIDDEN__ bionic_tls* __allocate_temp_bionic_tls();
+__LIBC_HIDDEN__ void __free_temp_bionic_tls(bionic_tls* tls);
 __LIBC_HIDDEN__ void __init_additional_stacks(pthread_internal_t*);
 __LIBC_HIDDEN__ int __init_thread(pthread_internal_t* thread);
+__LIBC_HIDDEN__ ThreadMapping __allocate_thread_mapping(size_t stack_size, size_t stack_guard_size);
 
 __LIBC_HIDDEN__ pthread_t           __pthread_internal_add(pthread_internal_t* thread);
 __LIBC_HIDDEN__ pthread_internal_t* __pthread_internal_find(pthread_t pthread_id);
 __LIBC_HIDDEN__ void                __pthread_internal_remove(pthread_internal_t* thread);
 __LIBC_HIDDEN__ void                __pthread_internal_remove_and_free(pthread_internal_t* thread);
 
+static inline __always_inline bionic_tcb* __get_bionic_tcb() {
+  return reinterpret_cast<bionic_tcb*>(&__get_tls()[MIN_TLS_SLOT]);
+}
+
 // Make __get_thread() inlined for performance reason. See http://b/19825434.
 static inline __always_inline pthread_internal_t* __get_thread() {
-  void** tls = __get_tls();
-  if (__predict_true(tls)) {
-    return reinterpret_cast<pthread_internal_t*>(tls[TLS_SLOT_THREAD_ID]);
-  }
-
-  // This happens when called during libc initialization before TLS has been initialized.
-  return nullptr;
+  return static_cast<pthread_internal_t*>(__get_tls()[TLS_SLOT_THREAD_ID]);
 }
 
 static inline __always_inline bionic_tls& __get_bionic_tls() {
-  return *__get_thread()->bionic_tls;
+  return *static_cast<bionic_tls*>(__get_tls()[TLS_SLOT_BIONIC_TLS]);
 }
 
-extern __LIBC_HIDDEN__ pthread_internal_t* __get_main_thread();
 extern "C" __LIBC_HIDDEN__ int __set_tls(void* ptr);
 
 __LIBC_HIDDEN__ void pthread_key_clean_all(void);
diff --git a/libc/bionic/pthread_key.cpp b/libc/bionic/pthread_key.cpp
index 0356ccc..53f0f11 100644
--- a/libc/bionic/pthread_key.cpp
+++ b/libc/bionic/pthread_key.cpp
@@ -70,7 +70,7 @@
 }
 
 static inline pthread_key_data_t* get_thread_key_data() {
-  return __get_thread()->key_data;
+  return __get_bionic_tls().key_data;
 }
 
 // Called from pthread_exit() to remove all pthread keys. This must call the destructor of
diff --git a/libc/private/bionic_asm_tls.h b/libc/private/bionic_asm_tls.h
new file mode 100644
index 0000000..06e3dce
--- /dev/null
+++ b/libc/private/bionic_asm_tls.h
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#pragma once
+
+/** WARNING WARNING WARNING
+ **
+ ** This header file is *NOT* part of the public Bionic ABI/API and should not
+ ** be used/included by user-serviceable parts of the system (e.g.
+ ** applications).
+ **
+ ** It is only provided here for the benefit of Android components that need a
+ ** pre-allocated slot for performance reasons (including ART, the OpenGL
+ ** subsystem, and sanitizers).
+ **/
+
+// Bionic TCB / TLS slots:
+//
+//  - TLS_SLOT_SELF: On x86-{32,64}, the kernel makes TLS memory available via
+//    the gs/fs segments. To get the address of a TLS variable, the first slot
+//    of TLS memory (accessed using %gs:0 / %fs:0) holds the address of the
+//    gs/fs segment. This slot is used by:
+//     - OpenGL and compiler-rt
+//     - Accesses of x86 ELF TLS variables
+//
+//  - TLS_SLOT_OPENGL and TLS_SLOT_OPENGL_API: These two aren't used by bionic
+//    itself, but allow the graphics code to access TLS directly rather than
+//    using the pthread API.
+//
+//  - TLS_SLOT_STACK_GUARD: Used for -fstack-protector by:
+//     - Clang targeting Android/arm64
+//     - gcc targeting Linux/x86-{32,64}
+//
+//  - TLS_SLOT_SANITIZER: Lets sanitizers avoid using pthread_getspecific for
+//    finding the current thread state.
+//
+//  - TLS_SLOT_DTV: Pointer to ELF TLS dynamic thread vector.
+//
+//  - TLS_SLOT_ART_THREAD_SELF: Fast storage for Thread::Current() in ART.
+//
+//  - TLS_SLOT_BIONIC_TLS: Optimizes accesses to bionic_tls by one load versus
+//    finding it using __get_thread().
+
+#if defined(__arm__) || defined(__aarch64__)
+
+// The ARM ELF TLS ABI specifies[1] that the thread pointer points at a 2-word
+// TCB followed by the executable's TLS segment. Both the TCB and the
+// executable's segment are aligned according to the segment, so Bionic requires
+// a minimum segment alignment, which effectively reserves an 8-word TCB. The
+// ARM spec allocates the first TCB word to the DTV.
+//
+// [1] "Addenda to, and Errata in, the ABI for the ARM Architecture". Section 3.
+// http://infocenter.arm.com/help/topic/com.arm.doc.ihi0045e/IHI0045E_ABI_addenda.pdf
+
+#define MIN_TLS_SLOT              -1 // update this value when reserving a slot
+#define TLS_SLOT_BIONIC_TLS       -1
+#define TLS_SLOT_DTV              0
+#define TLS_SLOT_THREAD_ID        1
+// Slot 2 is free (was historically used for TLS_SLOT_ERRNO)
+#define TLS_SLOT_OPENGL           3
+#define TLS_SLOT_OPENGL_API       4
+#define TLS_SLOT_STACK_GUARD      5
+#define TLS_SLOT_SANITIZER        6 // was historically used for dlerror
+#define TLS_SLOT_ART_THREAD_SELF  7
+#define TLS_SLOT_TSAN             8 // should be replaced with TLS_SLOT_SANITIZER
+
+// The maximum slot is fixed by the minimum TLS alignment in Bionic executables.
+// It should be changed to 7 once TLS_SLOT_TSAN is removed.
+#define MAX_TLS_SLOT              8
+
+#elif defined(__i386__) || defined(__x86_64__)
+
+// x86 uses variant 2 ELF TLS layout, which places the executable's TLS segment
+// immediately before the thread pointer. New slots are allocated at positive
+// offsets from the thread pointer.
+
+#define MIN_TLS_SLOT              0
+
+#define TLS_SLOT_SELF             0
+#define TLS_SLOT_THREAD_ID        1
+// Slot 2 is free (was historically used for TLS_SLOT_ERRNO)
+#define TLS_SLOT_OPENGL           3
+#define TLS_SLOT_OPENGL_API       4
+#define TLS_SLOT_STACK_GUARD      5
+#define TLS_SLOT_SANITIZER        6 // was historically used for dlerror
+#define TLS_SLOT_ART_THREAD_SELF  7
+#define TLS_SLOT_TSAN             8 // should be replaced with TLS_SLOT_SANITIZER
+#define TLS_SLOT_DTV              9
+#define TLS_SLOT_BIONIC_TLS       10
+#define MAX_TLS_SLOT              10 // update this value when reserving a slot
+
+#endif
+
+#define BIONIC_TLS_SLOTS (MAX_TLS_SLOT - MIN_TLS_SLOT + 1)
diff --git a/libc/private/bionic_elf_tls.h b/libc/private/bionic_elf_tls.h
new file mode 100644
index 0000000..e847669
--- /dev/null
+++ b/libc/private/bionic_elf_tls.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#pragma once
+
+#include <stdint.h>
+
+struct StaticTlsLayout {
+  constexpr StaticTlsLayout() {}
+
+private:
+  size_t offset_ = 0;
+  size_t alignment_ = 1;
+  bool overflowed_ = false;
+
+  // Offsets to various Bionic TLS structs from the beginning of static TLS.
+  size_t offset_bionic_tcb_ = SIZE_MAX;
+  size_t offset_bionic_tls_ = SIZE_MAX;
+
+public:
+  size_t offset_bionic_tcb() const { return offset_bionic_tcb_; }
+  size_t offset_bionic_tls() const { return offset_bionic_tls_; }
+
+  size_t size() const { return offset_; }
+  size_t alignment() const { return alignment_; }
+  bool overflowed() const { return overflowed_; }
+
+  void reserve_tcb();
+  void reserve_bionic_tls();
+  void finish_layout();
+
+private:
+  size_t reserve(size_t size, size_t alignment);
+
+  template <typename T> size_t reserve_type() {
+    return reserve(sizeof(T), alignof(T));
+  }
+
+  size_t round_up_with_overflow_check(size_t value, size_t alignment);
+};
diff --git a/libc/private/bionic_globals.h b/libc/private/bionic_globals.h
index ceda38a..b5e677e 100644
--- a/libc/private/bionic_globals.h
+++ b/libc/private/bionic_globals.h
@@ -33,6 +33,7 @@
 #include <link.h>
 #include <pthread.h>
 
+#include "private/bionic_elf_tls.h"
 #include "private/bionic_fdsan.h"
 #include "private/bionic_malloc_dispatch.h"
 #include "private/bionic_vdso.h"
@@ -67,6 +68,8 @@
   pthread_mutex_t abort_msg_lock = PTHREAD_MUTEX_INITIALIZER;
   abort_msg_t* abort_msg = nullptr;
 
+  StaticTlsLayout static_tls_layout;
+
   // Values passed from the linker to libc.so.
   const char* init_progname = nullptr;
   char** init_environ = nullptr;
diff --git a/libc/private/bionic_tls.h b/libc/private/bionic_tls.h
index 4749cee..90914c3 100644
--- a/libc/private/bionic_tls.h
+++ b/libc/private/bionic_tls.h
@@ -26,8 +26,7 @@
  * SUCH DAMAGE.
  */
 
-#ifndef __BIONIC_PRIVATE_BIONIC_TLS_H_
-#define __BIONIC_PRIVATE_BIONIC_TLS_H_
+#pragma once
 
 #include <locale.h>
 #include <mntent.h>
@@ -35,73 +34,48 @@
 #include <sys/cdefs.h>
 #include <sys/param.h>
 
+#include "bionic_asm_tls.h"
 #include "bionic_macros.h"
 #include "__get_tls.h"
 #include "grp_pwd.h"
 
-__BEGIN_DECLS
-
 /** WARNING WARNING WARNING
  **
- ** This header file is *NOT* part of the public Bionic ABI/API
- ** and should not be used/included by user-serviceable parts of
- ** the system (e.g. applications).
- **
- ** It is only provided here for the benefit of the system dynamic
- ** linker and the OpenGL sub-system (which needs to access the
- ** pre-allocated slot directly for performance reason).
+ ** This header file is *NOT* part of the public Bionic ABI/API and should not
+ ** be used/included by user-serviceable parts of the system (e.g.
+ ** applications).
  **/
 
-// Well-known TLS slots. What data goes in which slot is arbitrary unless otherwise noted.
-enum {
-  TLS_SLOT_SELF = 0, // The kernel requires this specific slot for x86.
-  TLS_SLOT_THREAD_ID,
+class pthread_internal_t;
 
-  // TLS slot 2 was used for errno but is now free.
+// This struct is small, so the linker can allocate a temporary copy on its
+// stack. It can't be combined with pthread_internal_t because:
+//  - native bridge requires pthread_internal_t to have the same layout across
+//    architectures, and
+//  - On x86, this struct would have to be placed at the front of
+//    pthread_internal_t, moving fields like `tid`.
+//  - We'd like to avoid having a temporary pthread_internal_t object that
+//    needs to be transferred once the final size of static TLS is known.
+struct bionic_tcb {
+  void* raw_slots_storage[BIONIC_TLS_SLOTS];
 
-  // These two aren't used by bionic itself, but allow the graphics code to
-  // access TLS directly rather than using the pthread API.
-  TLS_SLOT_OPENGL_API = 3,
-  TLS_SLOT_OPENGL = 4,
+  // Return a reference to a slot given its TP-relative TLS_SLOT_xxx index.
+  // The thread pointer (i.e. __get_tls()) points at &tls_slot(0).
+  void*& tls_slot(size_t tpindex) {
+    return raw_slots_storage[tpindex - MIN_TLS_SLOT];
+  }
 
-  TLS_SLOT_STACK_GUARD = 5, // GCC requires this specific slot for x86.
+  // Initialize the main thread's final object using its bootstrap object.
+  void copy_from_bootstrap(const bionic_tcb* boot) {
+    // Copy everything. Problematic slots will be reinitialized.
+    *this = *boot;
+  }
 
-  // Lets sanitizers avoid using pthread_getspecific for finding the current
-  // thread state. (Slot 6 was historically used for dlerror instead.)
-  TLS_SLOT_SANITIZER = 6,
-
-  // Fast storage for Thread::Current() in ART.
-  TLS_SLOT_ART_THREAD_SELF = 7,
-
-  // Lets TSAN avoid using pthread_getspecific for finding the current thread
-  // state.
-  TLS_SLOT_TSAN = 8,
-
-  BIONIC_TLS_SLOTS // Must come last!
+  pthread_internal_t* thread() {
+    return static_cast<pthread_internal_t*>(tls_slot(TLS_SLOT_THREAD_ID));
+  }
 };
 
-// ~3 pages.
-struct bionic_tls {
-  locale_t locale;
-
-  char basename_buf[MAXPATHLEN];
-  char dirname_buf[MAXPATHLEN];
-
-  mntent mntent_buf;
-  char mntent_strings[BUFSIZ];
-
-  char ptsname_buf[32];
-  char ttyname_buf[64];
-
-  char strerror_buf[NL_TEXTMAX];
-  char strsignal_buf[NL_TEXTMAX];
-
-  group_state_t group;
-  passwd_state_t passwd;
-};
-
-#define BIONIC_TLS_SIZE (__BIONIC_ALIGN(sizeof(bionic_tls), PAGE_SIZE))
-
 /*
  * Bionic uses some pthread keys internally. All pthread keys used internally
  * should be created in constructors, except for keys that may be used in or
@@ -126,12 +100,42 @@
  */
 #define BIONIC_PTHREAD_KEY_COUNT (BIONIC_PTHREAD_KEY_RESERVED_COUNT + PTHREAD_KEYS_MAX)
 
-__END_DECLS
+class pthread_key_data_t {
+ public:
+  uintptr_t seq; // Use uintptr_t just for alignment, as we use pointer below.
+  void* data;
+};
 
-#if defined(__cplusplus)
+// ~3 pages. This struct is allocated as static TLS memory (i.e. at a fixed
+// offset from the thread pointer).
+struct bionic_tls {
+  pthread_key_data_t key_data[BIONIC_PTHREAD_KEY_COUNT];
+
+  locale_t locale;
+
+  char basename_buf[MAXPATHLEN];
+  char dirname_buf[MAXPATHLEN];
+
+  mntent mntent_buf;
+  char mntent_strings[BUFSIZ];
+
+  char ptsname_buf[32];
+  char ttyname_buf[64];
+
+  char strerror_buf[NL_TEXTMAX];
+  char strsignal_buf[NL_TEXTMAX];
+
+  group_state_t group;
+  passwd_state_t passwd;
+
+  // Initialize the main thread's final object using its bootstrap object.
+  void copy_from_bootstrap(const bionic_tls* boot __attribute__((unused))) {
+    // Nothing in bionic_tls needs to be preserved in the transition to the
+    // final TLS objects, so don't copy anything.
+  }
+};
+
 class KernelArgumentBlock;
-extern void __libc_init_main_thread_early(KernelArgumentBlock& args);
-extern void __libc_init_main_thread_late();
-#endif
-
-#endif /* __BIONIC_PRIVATE_BIONIC_TLS_H_ */
+extern "C" void __libc_init_main_thread_early(const KernelArgumentBlock& args, bionic_tcb* temp_tcb);
+extern "C" void __libc_init_main_thread_late();
+extern "C" void __libc_init_main_thread_final();
diff --git a/libc/private/linker_native_bridge.h b/libc/private/linker_native_bridge.h
new file mode 100644
index 0000000..bfd0153
--- /dev/null
+++ b/libc/private/linker_native_bridge.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#pragma once
+
+extern "C" void __linker_reserve_bionic_tls_in_static_tls();
diff --git a/linker/Android.bp b/linker/Android.bp
index 0e3484a..38a53f8 100644
--- a/linker/Android.bp
+++ b/linker/Android.bp
@@ -86,6 +86,7 @@
         "linker_phdr.cpp",
         "linker_sdk_versions.cpp",
         "linker_soinfo.cpp",
+        "linker_tls.cpp",
         "linker_utils.cpp",
         "rt.cpp",
     ],
diff --git a/linker/linker_main.cpp b/linker/linker_main.cpp
index 3318c2c..9b4ce47 100644
--- a/linker/linker_main.cpp
+++ b/linker/linker_main.cpp
@@ -36,6 +36,7 @@
 #include "linker_gdb_support.h"
 #include "linker_globals.h"
 #include "linker_phdr.h"
+#include "linker_tls.h"
 #include "linker_utils.h"
 
 #include "private/bionic_globals.h"
@@ -51,6 +52,7 @@
 
 #include <async_safe/log.h>
 #include <bionic/libc_init_common.h>
+#include <bionic/pthread_internal.h>
 
 #include <vector>
 
@@ -450,6 +452,10 @@
     si->increment_ref_count();
   }
 
+  layout_linker_static_tls();
+
+  __libc_init_main_thread_final();
+
   if (!get_cfi_shadow()->InitialLinkDone(solist)) __linker_cannot_link(g_argv[0]);
 
   si->call_pre_init_constructors();
@@ -557,7 +563,8 @@
 extern "C" ElfW(Addr) __linker_init(void* raw_args) {
   // Initialize TLS early so system calls and errno work.
   KernelArgumentBlock args(raw_args);
-  __libc_init_main_thread_early(args);
+  bionic_tcb temp_tcb = {};
+  __libc_init_main_thread_early(args, &temp_tcb);
 
   // When the linker is run by itself (rather than as an interpreter for
   // another program), AT_BASE is 0.
diff --git a/linker/linker_tls.cpp b/linker/linker_tls.cpp
new file mode 100644
index 0000000..3327453
--- /dev/null
+++ b/linker/linker_tls.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "linker_tls.h"
+
+#include "private/bionic_defs.h"
+#include "private/bionic_elf_tls.h"
+#include "private/bionic_globals.h"
+#include "private/linker_native_bridge.h"
+
+__BIONIC_WEAK_FOR_NATIVE_BRIDGE
+extern "C" void __linker_reserve_bionic_tls_in_static_tls() {
+  __libc_shared_globals()->static_tls_layout.reserve_bionic_tls();
+}
+
+// Stub for linker static TLS layout.
+void layout_linker_static_tls() {
+  StaticTlsLayout& layout = __libc_shared_globals()->static_tls_layout;
+  layout.reserve_tcb();
+
+  // The pthread key data is located at the very front of bionic_tls. As a
+  // temporary workaround, allocate bionic_tls just after the thread pointer so
+  // Golang can find its pthread key, as long as the executable's TLS segment is
+  // small enough. Specifically, Golang scans forward 384 words from the TP on
+  // ARM.
+  //  - http://b/118381796
+  //  - https://groups.google.com/d/msg/golang-dev/yVrkFnYrYPE/2G3aFzYqBgAJ
+  __linker_reserve_bionic_tls_in_static_tls();
+
+  layout.finish_layout();
+}
diff --git a/linker/linker_tls.h b/linker/linker_tls.h
new file mode 100644
index 0000000..2f0a57d
--- /dev/null
+++ b/linker/linker_tls.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#pragma once
+
+void layout_linker_static_tls();
diff --git a/tests/pthread_test.cpp b/tests/pthread_test.cpp
index 84ce531..1c57264 100644
--- a/tests/pthread_test.cpp
+++ b/tests/pthread_test.cpp
@@ -2582,9 +2582,9 @@
     ASSERT_EQ(0, munmap(pages[i], kPageSize));
   }
 
-  // Creating a thread uses at least six VMAs: the stack, the TLS, and a guard each side of both.
-  // So we should have seen at least six failures.
-  ASSERT_GE(EAGAIN_count, 6U);
+  // Creating a thread uses at least three VMAs: the combined stack and TLS, and a guard on each
+  // side. So we should have seen at least three failures.
+  ASSERT_GE(EAGAIN_count, 3U);
 
   for (; i < pages.size(); ++i) {
     ASSERT_EQ(0, munmap(pages[i], kPageSize));