Merge "Track movement of ICU .dat file"
diff --git a/libc/Android.bp b/libc/Android.bp
index 01c0cad..e0d0fee 100644
--- a/libc/Android.bp
+++ b/libc/Android.bp
@@ -1199,6 +1199,7 @@
 cc_library_static {
     defaults: ["libc_defaults"],
     srcs: [
+        "bionic/bionic_elf_tls.cpp",
         "bionic/pthread_atfork.cpp",
         "bionic/pthread_attr.cpp",
         "bionic/pthread_barrier.cpp",
@@ -1606,6 +1607,11 @@
             version_script: ":libc.x86_64.map",
         },
     },
+
+    stubs: {
+        symbol_file: "libc.map.txt",
+        versions: ["10000"],
+    },
 }
 
 genrule {
diff --git a/libc/bionic/__libc_init_main_thread.cpp b/libc/bionic/__libc_init_main_thread.cpp
index 5abdc07..2b90c90 100644
--- a/libc/bionic/__libc_init_main_thread.cpp
+++ b/libc/bionic/__libc_init_main_thread.cpp
@@ -28,9 +28,12 @@
 
 #include "libc_init_common.h"
 
+#include <async_safe/log.h>
+
 #include "private/KernelArgumentBlock.h"
 #include "private/bionic_arc4random.h"
 #include "private/bionic_defs.h"
+#include "private/bionic_elf_tls.h"
 #include "private/bionic_globals.h"
 #include "private/bionic_ssp.h"
 #include "pthread_internal.h"
@@ -43,11 +46,6 @@
 
 static pthread_internal_t main_thread;
 
-__attribute__((no_sanitize("hwaddress")))
-pthread_internal_t* __get_main_thread() {
-  return &main_thread;
-}
-
 // Setup for the main thread. For dynamic executables, this is called by the
 // linker _before_ libc is mapped in memory. This means that all writes to
 // globals from this function will apply to linker-private copies and will not
@@ -69,35 +67,28 @@
 // linker, the linker binary hasn't been relocated yet, so certain kinds of code
 // are hazardous, such as accessing non-hidden global variables.
 __BIONIC_WEAK_FOR_NATIVE_BRIDGE
-void __libc_init_main_thread_early(KernelArgumentBlock& args) {
+extern "C" void __libc_init_main_thread_early(const KernelArgumentBlock& args,
+                                              bionic_tcb* temp_tcb) {
   __libc_shared_globals()->auxv = args.auxv;
 #if defined(__i386__)
-  __libc_init_sysinfo();
+  __libc_init_sysinfo(); // uses AT_SYSINFO auxv entry
 #endif
-  __set_tls(main_thread.tls);
-  __init_tls(&main_thread);
+  __init_tcb(temp_tcb, &main_thread);
+  __set_tls(&temp_tcb->tls_slot(0));
   main_thread.tid = __getpid();
   main_thread.set_cached_pid(main_thread.tid);
 }
 
 // Finish initializing the main thread.
 __BIONIC_WEAK_FOR_NATIVE_BRIDGE
-void __libc_init_main_thread_late() {
-  main_thread.bionic_tls = __allocate_bionic_tls();
-  if (main_thread.bionic_tls == nullptr) {
-    // Avoid strerror because it might need bionic_tls.
-    async_safe_fatal("failed to allocate bionic_tls: error %d", errno);
-  }
+extern "C" void __libc_init_main_thread_late() {
+  __init_bionic_tls_ptrs(__get_bionic_tcb(), __allocate_temp_bionic_tls());
 
   // Tell the kernel to clear our tid field when we exit, so we're like any other pthread.
+  // For threads created by pthread_create, this setup happens during the clone syscall (i.e.
+  // CLONE_CHILD_CLEARTID).
   __set_tid_address(&main_thread.tid);
 
-  // We don't want to free the main thread's stack even when the main thread exits
-  // because things like environment variables with global scope live on it.
-  // We also can't free the pthread_internal_t itself, since it is a static variable.
-  // The main thread has no mmap allocated space for stack or pthread_internal_t.
-  main_thread.mmap_size = 0;
-
   pthread_attr_init(&main_thread.attr);
   // We don't want to explicitly set the main thread's scheduler attributes (http://b/68328561).
   pthread_attr_setinheritsched(&main_thread.attr, PTHREAD_INHERIT_SCHED);
@@ -110,9 +101,40 @@
   // before we initialize the TLS. Dynamic executables will initialize their copy of the global
   // stack protector from the one in the main thread's TLS.
   __libc_safe_arc4random_buf(&__stack_chk_guard, sizeof(__stack_chk_guard));
-  __init_tls_stack_guard(&main_thread);
+  __init_tcb_stack_guard(__get_bionic_tcb());
 
   __init_thread(&main_thread);
 
   __init_additional_stacks(&main_thread);
 }
+
+// Once all ELF modules are loaded, allocate the final copy of the main thread's
+// static TLS memory.
+__BIONIC_WEAK_FOR_NATIVE_BRIDGE
+extern "C" void __libc_init_main_thread_final() {
+  bionic_tcb* temp_tcb = __get_bionic_tcb();
+  bionic_tls* temp_tls = &__get_bionic_tls();
+
+  // Allocate the main thread's static TLS. (This mapping doesn't include a
+  // stack.)
+  ThreadMapping mapping = __allocate_thread_mapping(0, PTHREAD_GUARD_SIZE);
+  if (mapping.mmap_base == nullptr) {
+    async_safe_fatal("failed to mmap main thread static TLS: %s", strerror(errno));
+  }
+
+  const StaticTlsLayout& layout = __libc_shared_globals()->static_tls_layout;
+  auto new_tcb = reinterpret_cast<bionic_tcb*>(mapping.static_tls + layout.offset_bionic_tcb());
+  auto new_tls = reinterpret_cast<bionic_tls*>(mapping.static_tls + layout.offset_bionic_tls());
+
+  new_tcb->copy_from_bootstrap(temp_tcb);
+  new_tls->copy_from_bootstrap(temp_tls);
+  __init_tcb(new_tcb, &main_thread);
+  __init_bionic_tls_ptrs(new_tcb, new_tls);
+
+  main_thread.mmap_base = mapping.mmap_base;
+  main_thread.mmap_size = mapping.mmap_size;
+
+  __set_tls(&new_tcb->tls_slot(0));
+
+  __free_temp_bionic_tls(temp_tls);
+}
diff --git a/libc/bionic/bionic_elf_tls.cpp b/libc/bionic/bionic_elf_tls.cpp
new file mode 100644
index 0000000..55c2c31
--- /dev/null
+++ b/libc/bionic/bionic_elf_tls.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "private/bionic_elf_tls.h"
+
+#include <sys/param.h>
+
+#include "private/bionic_macros.h"
+#include "private/bionic_tls.h"
+
+void StaticTlsLayout::reserve_tcb() {
+  offset_bionic_tcb_ = reserve_type<bionic_tcb>();
+}
+
+void StaticTlsLayout::reserve_bionic_tls() {
+  offset_bionic_tls_ = reserve_type<bionic_tls>();
+}
+
+void StaticTlsLayout::finish_layout() {
+  // Round the offset up to the alignment.
+  offset_ = round_up_with_overflow_check(offset_, alignment_);
+}
+
+// The size is not required to be a multiple of the alignment. The alignment
+// must be a positive power-of-two.
+size_t StaticTlsLayout::reserve(size_t size, size_t alignment) {
+  offset_ = round_up_with_overflow_check(offset_, alignment);
+  const size_t result = offset_;
+  if (__builtin_add_overflow(offset_, size, &offset_)) overflowed_ = true;
+  alignment_ = MAX(alignment_, alignment);
+  return result;
+}
+
+size_t StaticTlsLayout::round_up_with_overflow_check(size_t value, size_t alignment) {
+  const size_t old_value = value;
+  value = __BIONIC_ALIGN(value, alignment);
+  if (value < old_value) overflowed_ = true;
+  return value;
+}
diff --git a/libc/bionic/libc_init_static.cpp b/libc/bionic/libc_init_static.cpp
index b2f8bbf..68650ed 100644
--- a/libc/bionic/libc_init_static.cpp
+++ b/libc/bionic/libc_init_static.cpp
@@ -39,6 +39,7 @@
 #include "libc_init_common.h"
 #include "pthread_internal.h"
 
+#include "private/bionic_elf_tls.h"
 #include "private/bionic_globals.h"
 #include "private/bionic_macros.h"
 #include "private/bionic_page.h"
@@ -82,6 +83,13 @@
   }
 }
 
+static void layout_static_tls() {
+  StaticTlsLayout& layout = __libc_shared_globals()->static_tls_layout;
+  layout.reserve_bionic_tls();
+  layout.reserve_tcb();
+  layout.finish_layout();
+}
+
 // The program startup function __libc_init() defined here is
 // used for static executables only (i.e. those that don't depend
 // on shared libraries). It is called from arch-$ARCH/bionic/crtbegin_static.S
@@ -92,16 +100,19 @@
 __noreturn static void __real_libc_init(void *raw_args,
                                         void (*onexit)(void) __unused,
                                         int (*slingshot)(int, char**, char**),
-                                        structors_array_t const * const structors) {
+                                        structors_array_t const * const structors,
+                                        bionic_tcb* temp_tcb) {
   BIONIC_STOP_UNWIND;
 
   // Initialize TLS early so system calls and errno work.
   KernelArgumentBlock args(raw_args);
-  __libc_init_main_thread_early(args);
+  __libc_init_main_thread_early(args, temp_tcb);
   __libc_init_main_thread_late();
   __libc_init_globals();
   __libc_shared_globals()->init_progname = args.argv[0];
   __libc_init_AT_SECURE(args.envp);
+  layout_static_tls();
+  __libc_init_main_thread_final();
   __libc_init_common();
 
   apply_gnu_relro();
@@ -129,16 +140,16 @@
                             void (*onexit)(void) __unused,
                             int (*slingshot)(int, char**, char**),
                             structors_array_t const * const structors) {
+  bionic_tcb temp_tcb = {};
 #if __has_feature(hwaddress_sanitizer)
   // Install main thread TLS early. It will be initialized later in __libc_init_main_thread. For now
-  // all we need is access to TLS_SLOT_TSAN.
-  pthread_internal_t* main_thread = __get_main_thread();
-  __set_tls(main_thread->tls);
-  // Initialize HWASan. This sets up TLS_SLOT_TSAN, among other things.
+  // all we need is access to TLS_SLOT_SANITIZER.
+  __set_tls(&temp_tcb.tls_slot(0));
+  // Initialize HWASan. This sets up TLS_SLOT_SANITIZER, among other things.
   __hwasan_init();
   // We are ready to run HWASan-instrumented code, proceed with libc initialization...
 #endif
-  __real_libc_init(raw_args, onexit, slingshot, structors);
+  __real_libc_init(raw_args, onexit, slingshot, structors, &temp_tcb);
 }
 
 static int g_target_sdk_version{__ANDROID_API__};
diff --git a/libc/bionic/pthread_create.cpp b/libc/bionic/pthread_create.cpp
index 06d2ecb..8676a45 100644
--- a/libc/bionic/pthread_create.cpp
+++ b/libc/bionic/pthread_create.cpp
@@ -41,6 +41,7 @@
 
 #include "private/bionic_constants.h"
 #include "private/bionic_defs.h"
+#include "private/bionic_globals.h"
 #include "private/bionic_macros.h"
 #include "private/bionic_ssp.h"
 #include "private/bionic_tls.h"
@@ -54,39 +55,43 @@
 
 // This code is used both by each new pthread and the code that initializes the main thread.
 __attribute__((no_stack_protector))
-void __init_tls(pthread_internal_t* thread) {
-  // Slot 0 must point to itself. The x86 Linux kernel reads the TLS from %fs:0.
-  thread->tls[TLS_SLOT_SELF] = thread->tls;
-  thread->tls[TLS_SLOT_THREAD_ID] = thread;
+void __init_tcb(bionic_tcb* tcb, pthread_internal_t* thread) {
+#ifdef TLS_SLOT_SELF
+  // On x86, slot 0 must point to itself so code can read the thread pointer by
+  // loading %fs:0 or %gs:0.
+  tcb->tls_slot(TLS_SLOT_SELF) = &tcb->tls_slot(TLS_SLOT_SELF);
+#endif
+  tcb->tls_slot(TLS_SLOT_THREAD_ID) = thread;
 }
 
 __attribute__((no_stack_protector))
-void __init_tls_stack_guard(pthread_internal_t* thread) {
+void __init_tcb_stack_guard(bionic_tcb* tcb) {
   // GCC looks in the TLS for the stack guard on x86, so copy it there from our global.
-  thread->tls[TLS_SLOT_STACK_GUARD] = reinterpret_cast<void*>(__stack_chk_guard);
+  tcb->tls_slot(TLS_SLOT_STACK_GUARD) = reinterpret_cast<void*>(__stack_chk_guard);
 }
 
-bionic_tls* __allocate_bionic_tls() {
-  // Add a guard before and after.
-  size_t allocation_size = BIONIC_TLS_SIZE + (2 * PTHREAD_GUARD_SIZE);
-  void* allocation = mmap(nullptr, allocation_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+void __init_bionic_tls_ptrs(bionic_tcb* tcb, bionic_tls* tls) {
+  tcb->thread()->bionic_tls = tls;
+  tcb->tls_slot(TLS_SLOT_BIONIC_TLS) = tls;
+}
+
+// Allocate a temporary bionic_tls that the dynamic linker's main thread can
+// use while it's loading the initial set of ELF modules.
+bionic_tls* __allocate_temp_bionic_tls() {
+  size_t allocation_size = __BIONIC_ALIGN(sizeof(bionic_tls), PAGE_SIZE);
+  void* allocation = mmap(nullptr, allocation_size,
+                          PROT_READ | PROT_WRITE,
+                          MAP_PRIVATE | MAP_ANONYMOUS,
+                          -1, 0);
   if (allocation == MAP_FAILED) {
-    async_safe_format_log(ANDROID_LOG_WARN, "libc",
-                          "pthread_create failed: couldn't allocate TLS: %s", strerror(errno));
-    return nullptr;
+    // Avoid strerror because it might need bionic_tls.
+    async_safe_fatal("failed to allocate bionic_tls: error %d", errno);
   }
+  return static_cast<bionic_tls*>(allocation);
+}
 
-  // Carve out the writable TLS section.
-  bionic_tls* result = reinterpret_cast<bionic_tls*>(static_cast<char*>(allocation) +
-                                                     PTHREAD_GUARD_SIZE);
-  if (mprotect(result, BIONIC_TLS_SIZE, PROT_READ | PROT_WRITE) != 0) {
-    async_safe_format_log(ANDROID_LOG_WARN, "libc",
-                          "pthread_create failed: couldn't mprotect TLS: %s", strerror(errno));
-    munmap(allocation, allocation_size);
-    return nullptr;
-  }
-
-  return result;
+void __free_temp_bionic_tls(bionic_tls* tls) {
+  munmap(tls, __BIONIC_ALIGN(sizeof(bionic_tls), PAGE_SIZE));
 }
 
 static void __init_alternate_signal_stack(pthread_internal_t* thread) {
@@ -188,82 +193,112 @@
   return 0;
 }
 
-static void* __create_thread_mapped_space(size_t mmap_size, size_t stack_guard_size) {
-  // Create a new private anonymous map.
-  int prot = PROT_READ | PROT_WRITE;
-  int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
-  void* space = mmap(nullptr, mmap_size, prot, flags, -1, 0);
+
+// Allocate a thread's primary mapping. This mapping includes static TLS and
+// optionally a stack. Static TLS includes ELF TLS segments and the bionic_tls
+// struct.
+//
+// The stack_guard_size must be a multiple of the PAGE_SIZE.
+ThreadMapping __allocate_thread_mapping(size_t stack_size, size_t stack_guard_size) {
+  const StaticTlsLayout& layout = __libc_shared_globals()->static_tls_layout;
+
+  // Allocate in order: stack guard, stack, static TLS, guard page.
+  size_t mmap_size;
+  if (__builtin_add_overflow(stack_size, stack_guard_size, &mmap_size)) return {};
+  if (__builtin_add_overflow(mmap_size, layout.size(), &mmap_size)) return {};
+  if (__builtin_add_overflow(mmap_size, PTHREAD_GUARD_SIZE, &mmap_size)) return {};
+
+  // Align the result to a page size.
+  const size_t unaligned_size = mmap_size;
+  mmap_size = __BIONIC_ALIGN(mmap_size, PAGE_SIZE);
+  if (mmap_size < unaligned_size) return {};
+
+  // Create a new private anonymous map. Make the entire mapping PROT_NONE, then carve out a
+  // read+write area in the middle.
+  const int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
+  char* const space = static_cast<char*>(mmap(nullptr, mmap_size, PROT_NONE, flags, -1, 0));
   if (space == MAP_FAILED) {
     async_safe_format_log(ANDROID_LOG_WARN,
-                      "libc",
-                      "pthread_create failed: couldn't allocate %zu-bytes mapped space: %s",
-                      mmap_size, strerror(errno));
-    return nullptr;
+                          "libc",
+                          "pthread_create failed: couldn't allocate %zu-bytes mapped space: %s",
+                          mmap_size, strerror(errno));
+    return {};
+  }
+  const size_t writable_size = mmap_size - stack_guard_size - PTHREAD_GUARD_SIZE;
+  if (mprotect(space + stack_guard_size,
+               writable_size,
+               PROT_READ | PROT_WRITE) != 0) {
+    async_safe_format_log(ANDROID_LOG_WARN, "libc",
+                          "pthread_create failed: couldn't mprotect R+W %zu-byte thread mapping region: %s",
+                          writable_size, strerror(errno));
+    munmap(space, mmap_size);
+    return {};
   }
 
-  // Stack is at the lower end of mapped space, stack guard region is at the lower end of stack.
-  // Set the stack guard region to PROT_NONE, so we can detect thread stack overflow.
-  if (mprotect(space, stack_guard_size, PROT_NONE) == -1) {
-    async_safe_format_log(ANDROID_LOG_WARN, "libc",
-                          "pthread_create failed: couldn't mprotect PROT_NONE %zu-byte stack guard region: %s",
-                          stack_guard_size, strerror(errno));
-    munmap(space, mmap_size);
-    return nullptr;
-  }
-  return space;
+  ThreadMapping result = {};
+  result.mmap_base = space;
+  result.mmap_size = mmap_size;
+  result.static_tls = space + mmap_size - PTHREAD_GUARD_SIZE - layout.size();
+  result.stack_base = space;
+  result.stack_top = result.static_tls;
+  return result;
 }
 
-static int __allocate_thread(pthread_attr_t* attr, pthread_internal_t** threadp, void** child_stack) {
-  size_t mmap_size;
-  uint8_t* stack_top;
+static int __allocate_thread(pthread_attr_t* attr, bionic_tcb** tcbp, void** child_stack) {
+  ThreadMapping mapping;
+  char* stack_top;
+  bool stack_clean = false;
 
   if (attr->stack_base == nullptr) {
     // The caller didn't provide a stack, so allocate one.
-    // Make sure the stack size and guard size are multiples of PAGE_SIZE.
-    if (__builtin_add_overflow(attr->stack_size, attr->guard_size, &mmap_size)) return EAGAIN;
-    if (__builtin_add_overflow(mmap_size, sizeof(pthread_internal_t), &mmap_size)) return EAGAIN;
-    mmap_size = __BIONIC_ALIGN(mmap_size, PAGE_SIZE);
+
+    // Make sure the guard size is a multiple of PAGE_SIZE.
+    const size_t unaligned_guard_size = attr->guard_size;
     attr->guard_size = __BIONIC_ALIGN(attr->guard_size, PAGE_SIZE);
-    attr->stack_base = __create_thread_mapped_space(mmap_size, attr->guard_size);
-    if (attr->stack_base == nullptr) {
-      return EAGAIN;
-    }
-    stack_top = reinterpret_cast<uint8_t*>(attr->stack_base) + mmap_size;
+    if (attr->guard_size < unaligned_guard_size) return EAGAIN;
+
+    mapping = __allocate_thread_mapping(attr->stack_size, attr->guard_size);
+    if (mapping.mmap_base == nullptr) return EAGAIN;
+
+    stack_top = mapping.stack_top;
+    attr->stack_base = mapping.stack_base;
+    stack_clean = true;
   } else {
-    // Remember the mmap size is zero and we don't need to free it.
-    mmap_size = 0;
-    stack_top = reinterpret_cast<uint8_t*>(attr->stack_base) + attr->stack_size;
+    mapping = __allocate_thread_mapping(0, PTHREAD_GUARD_SIZE);
+    if (mapping.mmap_base == nullptr) return EAGAIN;
+
+    stack_top = static_cast<char*>(attr->stack_base) + attr->stack_size;
   }
 
-  // Mapped space(or user allocated stack) is used for:
-  //   pthread_internal_t
-  //   thread stack (including guard)
+  // Carve out space from the stack for the thread's pthread_internal_t. This
+  // memory isn't counted in pthread_attr_getstacksize.
 
   // To safely access the pthread_internal_t and thread stack, we need to find a 16-byte aligned boundary.
-  stack_top = reinterpret_cast<uint8_t*>(
-                (reinterpret_cast<uintptr_t>(stack_top) - sizeof(pthread_internal_t)) & ~0xf);
+  stack_top = align_down(stack_top - sizeof(pthread_internal_t), 16);
 
   pthread_internal_t* thread = reinterpret_cast<pthread_internal_t*>(stack_top);
-  if (mmap_size == 0) {
+  if (!stack_clean) {
     // If thread was not allocated by mmap(), it may not have been cleared to zero.
     // So assume the worst and zero it.
     memset(thread, 0, sizeof(pthread_internal_t));
   }
-  attr->stack_size = stack_top - reinterpret_cast<uint8_t*>(attr->stack_base);
 
-  thread->mmap_size = mmap_size;
+  // Locate static TLS structures within the mapped region.
+  const StaticTlsLayout& layout = __libc_shared_globals()->static_tls_layout;
+  auto tcb = reinterpret_cast<bionic_tcb*>(mapping.static_tls + layout.offset_bionic_tcb());
+  auto tls = reinterpret_cast<bionic_tls*>(mapping.static_tls + layout.offset_bionic_tls());
+
+  // (Re)initialize TLS pointers.
+  __init_tcb(tcb, thread);
+  __init_tcb_stack_guard(tcb);
+  __init_bionic_tls_ptrs(tcb, tls);
+
+  attr->stack_size = stack_top - static_cast<char*>(attr->stack_base);
   thread->attr = *attr;
+  thread->mmap_base = mapping.mmap_base;
+  thread->mmap_size = mapping.mmap_size;
 
-  thread->bionic_tls = __allocate_bionic_tls();
-  if (thread->bionic_tls == nullptr) {
-    if (thread->mmap_size != 0) munmap(thread->attr.stack_base, thread->mmap_size);
-    return EAGAIN;
-  }
-
-  __init_tls(thread);
-  __init_tls_stack_guard(thread);
-
-  *threadp = thread;
+  *tcbp = tcb;
   *child_stack = stack_top;
   return 0;
 }
@@ -309,13 +344,15 @@
     attr = nullptr; // Prevent misuse below.
   }
 
-  pthread_internal_t* thread = nullptr;
+  bionic_tcb* tcb = nullptr;
   void* child_stack = nullptr;
-  int result = __allocate_thread(&thread_attr, &thread, &child_stack);
+  int result = __allocate_thread(&thread_attr, &tcb, &child_stack);
   if (result != 0) {
     return result;
   }
 
+  pthread_internal_t* thread = tcb->thread();
+
   // Create a lock for the thread to wait on once it starts so we can keep
   // it from doing anything until after we notify the debugger about it
   //
@@ -332,7 +369,7 @@
 
   int flags = CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM |
       CLONE_SETTLS | CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID;
-  void* tls = reinterpret_cast<void*>(thread->tls);
+  void* tls = &tcb->tls_slot(0);
 #if defined(__i386__)
   // On x86 (but not x86-64), CLONE_SETTLS takes a pointer to a struct user_desc rather than
   // a pointer to the TLS itself.
@@ -348,7 +385,7 @@
     // reminder that you can't rewrite this function to use a ScopedPthreadMutexLocker.
     thread->startup_handshake_lock.unlock();
     if (thread->mmap_size != 0) {
-      munmap(thread->attr.stack_base, thread->mmap_size);
+      munmap(thread->mmap_base, thread->mmap_size);
     }
     async_safe_format_log(ANDROID_LOG_WARN, "libc", "pthread_create failed: clone failed: %s",
                           strerror(clone_errno));
diff --git a/libc/bionic/pthread_exit.cpp b/libc/bionic/pthread_exit.cpp
index 2d4d6cf..84ea2e6 100644
--- a/libc/bionic/pthread_exit.cpp
+++ b/libc/bionic/pthread_exit.cpp
@@ -65,12 +65,6 @@
   }
 }
 
-static void __pthread_unmap_tls(pthread_internal_t* thread) {
-  // Unmap the bionic TLS, including guard pages.
-  void* allocation = reinterpret_cast<char*>(thread->bionic_tls) - PTHREAD_GUARD_SIZE;
-  munmap(allocation, BIONIC_TLS_SIZE + 2 * PTHREAD_GUARD_SIZE);
-}
-
 __BIONIC_WEAK_FOR_NATIVE_BRIDGE
 void pthread_exit(void* return_value) {
   // Call dtors for thread_local objects first.
@@ -131,15 +125,13 @@
       // We don't want to take a signal after we've unmapped the stack.
       // That's one last thing we can do before dropping to assembler.
       ScopedSignalBlocker ssb;
-      __pthread_unmap_tls(thread);
       __hwasan_thread_exit();
-      _exit_with_stack_teardown(thread->attr.stack_base, thread->mmap_size);
+      _exit_with_stack_teardown(thread->mmap_base, thread->mmap_size);
     }
   }
 
   // No need to free mapped space. Either there was no space mapped, or it is left for
   // the pthread_join caller to clean up.
-  __pthread_unmap_tls(thread);
   __hwasan_thread_exit();
   __exit(0);
 }
diff --git a/libc/bionic/pthread_internal.cpp b/libc/bionic/pthread_internal.cpp
index c058384..870a526 100644
--- a/libc/bionic/pthread_internal.cpp
+++ b/libc/bionic/pthread_internal.cpp
@@ -88,7 +88,7 @@
 static void __pthread_internal_free(pthread_internal_t* thread) {
   if (thread->mmap_size != 0) {
     // Free mapped space, including thread stack and pthread_internal_t.
-    munmap(thread->attr.stack_base, thread->mmap_size);
+    munmap(thread->mmap_base, thread->mmap_size);
   }
 }
 
diff --git a/libc/bionic/pthread_internal.h b/libc/bionic/pthread_internal.h
index 4c13dcb..27ab3df 100644
--- a/libc/bionic/pthread_internal.h
+++ b/libc/bionic/pthread_internal.h
@@ -52,12 +52,6 @@
 #define PTHREAD_ATTR_FLAG_INHERIT 0x00000004
 #define PTHREAD_ATTR_FLAG_EXPLICIT 0x00000008
 
-class pthread_key_data_t {
- public:
-  uintptr_t seq; // Use uintptr_t just for alignment, as we use pointer below.
-  void* data;
-};
-
 enum ThreadJoinState {
   THREAD_NOT_JOINED,
   THREAD_EXITED_NOT_JOINED,
@@ -131,6 +125,7 @@
 
   Lock startup_handshake_lock;
 
+  void* mmap_base;
   size_t mmap_size;
 
   thread_local_dtor* thread_local_dtors;
@@ -146,42 +141,44 @@
   bionic_tls* bionic_tls;
 
   int errno_value;
-
-  // The thread pointer (__get_tls()) points at this field. This field must come last so that
-  // an executable's TLS segment can be allocated at a fixed offset after the thread pointer.
-  void* tls[BIONIC_TLS_SLOTS];
-
-  // The golang runtime currently expects this field to come after the slots.
-  pthread_key_data_t key_data[BIONIC_PTHREAD_KEY_COUNT];
 };
 
-__LIBC_HIDDEN__ void __init_tls(pthread_internal_t* thread);
-__LIBC_HIDDEN__ void __init_tls_stack_guard(pthread_internal_t* thread);
-__LIBC_HIDDEN__ bionic_tls* __allocate_bionic_tls();
+struct ThreadMapping {
+  char* mmap_base;
+  size_t mmap_size;
+
+  char* static_tls;
+  char* stack_base;
+  char* stack_top;
+};
+
+__LIBC_HIDDEN__ void __init_tcb(bionic_tcb* tcb, pthread_internal_t* thread);
+__LIBC_HIDDEN__ void __init_tcb_stack_guard(bionic_tcb* tcb);
+__LIBC_HIDDEN__ void __init_bionic_tls_ptrs(bionic_tcb* tcb, bionic_tls* tls);
+__LIBC_HIDDEN__ bionic_tls* __allocate_temp_bionic_tls();
+__LIBC_HIDDEN__ void __free_temp_bionic_tls(bionic_tls* tls);
 __LIBC_HIDDEN__ void __init_additional_stacks(pthread_internal_t*);
 __LIBC_HIDDEN__ int __init_thread(pthread_internal_t* thread);
+__LIBC_HIDDEN__ ThreadMapping __allocate_thread_mapping(size_t stack_size, size_t stack_guard_size);
 
 __LIBC_HIDDEN__ pthread_t           __pthread_internal_add(pthread_internal_t* thread);
 __LIBC_HIDDEN__ pthread_internal_t* __pthread_internal_find(pthread_t pthread_id);
 __LIBC_HIDDEN__ void                __pthread_internal_remove(pthread_internal_t* thread);
 __LIBC_HIDDEN__ void                __pthread_internal_remove_and_free(pthread_internal_t* thread);
 
+static inline __always_inline bionic_tcb* __get_bionic_tcb() {
+  return reinterpret_cast<bionic_tcb*>(&__get_tls()[MIN_TLS_SLOT]);
+}
+
 // Make __get_thread() inlined for performance reason. See http://b/19825434.
 static inline __always_inline pthread_internal_t* __get_thread() {
-  void** tls = __get_tls();
-  if (__predict_true(tls)) {
-    return reinterpret_cast<pthread_internal_t*>(tls[TLS_SLOT_THREAD_ID]);
-  }
-
-  // This happens when called during libc initialization before TLS has been initialized.
-  return nullptr;
+  return static_cast<pthread_internal_t*>(__get_tls()[TLS_SLOT_THREAD_ID]);
 }
 
 static inline __always_inline bionic_tls& __get_bionic_tls() {
-  return *__get_thread()->bionic_tls;
+  return *static_cast<bionic_tls*>(__get_tls()[TLS_SLOT_BIONIC_TLS]);
 }
 
-extern __LIBC_HIDDEN__ pthread_internal_t* __get_main_thread();
 extern "C" __LIBC_HIDDEN__ int __set_tls(void* ptr);
 
 __LIBC_HIDDEN__ void pthread_key_clean_all(void);
diff --git a/libc/bionic/pthread_key.cpp b/libc/bionic/pthread_key.cpp
index f3878a6..53f0f11 100644
--- a/libc/bionic/pthread_key.cpp
+++ b/libc/bionic/pthread_key.cpp
@@ -69,12 +69,16 @@
   return (key < (KEY_VALID_FLAG | BIONIC_PTHREAD_KEY_COUNT));
 }
 
+static inline pthread_key_data_t* get_thread_key_data() {
+  return __get_bionic_tls().key_data;
+}
+
 // Called from pthread_exit() to remove all pthread keys. This must call the destructor of
 // all keys that have a non-NULL data value and a non-NULL destructor.
 __LIBC_HIDDEN__ void pthread_key_clean_all() {
   // Because destructors can do funky things like deleting/creating other keys,
   // we need to implement this in a loop.
-  pthread_key_data_t* key_data = __get_thread()->key_data;
+  pthread_key_data_t* key_data = get_thread_key_data();
   for (size_t rounds = PTHREAD_DESTRUCTOR_ITERATIONS; rounds > 0; --rounds) {
     size_t called_destructor_count = 0;
     for (size_t i = 0; i < BIONIC_PTHREAD_KEY_COUNT; ++i) {
@@ -158,7 +162,7 @@
   }
   key &= ~KEY_VALID_FLAG;
   uintptr_t seq = atomic_load_explicit(&key_map[key].seq, memory_order_relaxed);
-  pthread_key_data_t* data = &(__get_thread()->key_data[key]);
+  pthread_key_data_t* data = &get_thread_key_data()[key];
   // It is user's responsibility to synchornize between the creation and use of pthread keys,
   // so we use memory_order_relaxed when checking the sequence number.
   if (__predict_true(SeqOfKeyInUse(seq) && data->seq == seq)) {
@@ -178,7 +182,7 @@
   key &= ~KEY_VALID_FLAG;
   uintptr_t seq = atomic_load_explicit(&key_map[key].seq, memory_order_relaxed);
   if (__predict_true(SeqOfKeyInUse(seq))) {
-    pthread_key_data_t* data = &(__get_thread()->key_data[key]);
+    pthread_key_data_t* data = &get_thread_key_data()[key];
     data->seq = seq;
     data->data = const_cast<void*>(ptr);
     return 0;
diff --git a/libc/kernel/tools/cpp.py b/libc/kernel/tools/cpp.py
index 336a9c8..1ada59e 100755
--- a/libc/kernel/tools/cpp.py
+++ b/libc/kernel/tools/cpp.py
@@ -1037,11 +1037,14 @@
             if t.id == '{':
                 buf += ' {'
                 result.append(strip_space(buf))
-                indent += 2
+                # Do not indent if this is extern "C" {
+                if i < 2 or tokens[i-2].id != 'extern' or tokens[i-1].id != '"C"':
+                    indent += 2
                 buf = ''
                 newline = True
             elif t.id == '}':
-                indent -= 2
+                if indent >= 2:
+                    indent -= 2
                 if not newline:
                     result.append(strip_space(buf))
                 # Look ahead to determine if it's the end of line.
@@ -1221,133 +1224,140 @@
         function declarations are removed. We only accept typedefs and
         enum/structs/union declarations.
 
+        In addition, remove any macros expanding in the headers. Usually,
+        these macros are static inline functions, which is why they are
+        removed.
+
         However, we keep the definitions corresponding to the set of known
         static inline functions in the set 'keep', which is useful
         for optimized byteorder swap functions and stuff like that.
         """
 
-        # NOTE: It's also removing function-like macros, such as __SYSCALL(...)
-        # in uapi/asm-generic/unistd.h, or KEY_FIELD(...) in linux/bcache.h.
-        # It could be problematic when we have function-like macros but without
-        # '}' following them. It will skip all the tokens/blocks until seeing a
-        # '}' as the function end. Fortunately we don't have such cases in the
-        # current kernel headers.
+        # state = NORMAL => normal (i.e. LN + spaces)
+        # state = OTHER_DECL => typedef/struct encountered, ends with ";"
+        # state = VAR_DECL => var declaration encountered, ends with ";"
+        # state = FUNC_DECL => func declaration encountered, ends with "}"
+        NORMAL = 0
+        OTHER_DECL = 1
+        VAR_DECL = 2
+        FUNC_DECL = 3
 
-        # state = 0 => normal (i.e. LN + spaces)
-        # state = 1 => typedef/struct encountered, ends with ";"
-        # state = 2 => var declaration encountered, ends with ";"
-        # state = 3 => func declaration encountered, ends with "}"
-
-        state = 0
+        state = NORMAL
         depth = 0
-        blocks2 = []
-        skipTokens = False
-        for b in self.blocks:
-            if b.isDirective():
-                blocks2.append(b)
-            else:
-                n = len(b.tokens)
-                i = 0
-                if skipTokens:
-                    first = n
-                else:
-                    first = 0
-                while i < n:
-                    tok = b.tokens[i]
-                    tokid = tok.id
-                    # If we are not looking for the start of a new
-                    # type/var/func, then skip over tokens until
-                    # we find our terminator, managing the depth of
-                    # accolades as we go.
-                    if state > 0:
-                        terminator = False
-                        if tokid == '{':
-                            depth += 1
-                        elif tokid == '}':
-                            if depth > 0:
-                                depth -= 1
-                            if (depth == 0) and (state == 3):
-                                terminator = True
-                        elif tokid == ';' and depth == 0:
-                            terminator = True
-
-                        if terminator:
-                            # we found the terminator
-                            state = 0
-                            if skipTokens:
-                                skipTokens = False
-                                first = i + 1
-
-                        i += 1
-                        continue
-
-                    # Is it a new type definition, then start recording it
-                    if tok.id in ['struct', 'typedef', 'enum', 'union',
-                                  '__extension__']:
-                        state = 1
-                        i += 1
-                        continue
-
-                    # Is it a variable or function definition. If so, first
-                    # try to determine which type it is, and also extract
-                    # its name.
-                    #
-                    # We're going to parse the next tokens of the same block
-                    # until we find a semicolon or a left parenthesis.
-                    #
-                    # The semicolon corresponds to a variable definition,
-                    # the left-parenthesis to a function definition.
-                    #
-                    # We also assume that the var/func name is the last
-                    # identifier before the terminator.
-                    #
-                    j = i + 1
-                    ident = ""
-                    while j < n:
-                        tokid = b.tokens[j].id
-                        if tokid == '(':  # a function declaration
-                            state = 3
-                            break
-                        elif tokid == ';':  # a variable declaration
-                            state = 2
-                            break
-                        if b.tokens[j].kind == TokenKind.IDENTIFIER:
-                            ident = b.tokens[j].id
-                        j += 1
-
-                    if j >= n:
-                        # This can only happen when the declaration
-                        # does not end on the current block (e.g. with
-                        # a directive mixed inside it.
-                        #
-                        # We will treat it as malformed because
-                        # it's very hard to recover from this case
-                        # without making our parser much more
-                        # complex.
-                        #
-                        logging.debug("### skip unterminated static '%s'",
-                                      ident)
-                        break
-
-                    if ident in keep:
-                        logging.debug("### keep var/func '%s': %s", ident,
-                                      repr(b.tokens[i:j]))
+        blocksToKeep = []
+        blocksInProgress = []
+        blocksOfDirectives = []
+        ident = ""
+        state_token = ""
+        macros = set()
+        for block in self.blocks:
+            if block.isDirective():
+                # Record all macros.
+                if block.directive == 'define':
+                    macro_name = block.define_id
+                    paren_index = macro_name.find('(')
+                    if paren_index == -1:
+                        macros.add(macro_name)
                     else:
-                        # We're going to skip the tokens for this declaration
-                        logging.debug("### skip var/func '%s': %s", ident,
-                                      repr(b.tokens[i:j]))
-                        if i > first:
-                            blocks2.append(Block(b.tokens[first:i]))
-                        skipTokens = True
-                        first = n
+                        macros.add(macro_name[0:paren_index])
+                blocksInProgress.append(block)
+                # If this is in a function/variable declaration, we might need
+                # to emit the directives alone, so save them separately.
+                blocksOfDirectives.append(block)
+                continue
 
-                    i += 1
+            numTokens = len(block.tokens)
+            lastTerminatorIndex = 0
+            i = 0
+            while i < numTokens:
+                token_id = block.tokens[i].id
+                terminator = False
+                if token_id == '{':
+                    depth += 1
+                    if (i >= 2 and block.tokens[i-2].id == 'extern' and
+                        block.tokens[i-1].id == '"C"'):
+                        # For an extern "C" { pretend as though this is depth 0.
+                        depth -= 1
+                elif token_id == '}':
+                    if depth > 0:
+                        depth -= 1
+                    if depth == 0:
+                        if state == OTHER_DECL:
+                            # Loop through until we hit the ';'
+                            i += 1
+                            while i < numTokens:
+                                if block.tokens[i].id == ';':
+                                    token_id = ';'
+                                    break
+                                i += 1
+                            # If we didn't hit the ';', just consider this the
+                            # terminator any way.
+                        terminator = True
+                elif depth == 0:
+                    if token_id == ';':
+                        if state == NORMAL:
+                            blocksToKeep.extend(blocksInProgress)
+                            blocksInProgress = []
+                            blocksOfDirectives = []
+                            state = FUNC_DECL
+                        terminator = True
+                    elif (state == NORMAL and token_id == '(' and i >= 1 and
+                          block.tokens[i-1].kind == TokenKind.IDENTIFIER and
+                          block.tokens[i-1].id in macros):
+                        # This is a plain macro being expanded in the header
+                        # which needs to be removed.
+                        blocksToKeep.extend(blocksInProgress)
+                        if lastTerminatorIndex < i - 1:
+                            blocksToKeep.append(Block(block.tokens[lastTerminatorIndex:i-1]))
+                        blocksInProgress = []
+                        blocksOfDirectives = []
 
-                if i > first:
-                    #print "### final '%s'" % repr(b.tokens[first:i])
-                    blocks2.append(Block(b.tokens[first:i]))
+                        # Skip until we see the terminating ')'
+                        i += 1
+                        paren_depth = 1
+                        while i < numTokens:
+                            if block.tokens[i].id == ')':
+                                paren_depth -= 1
+                                if paren_depth == 0:
+                                    break
+                            elif block.tokens[i].id == '(':
+                                paren_depth += 1
+                            i += 1
+                        lastTerminatorIndex = i + 1
+                    elif (state != FUNC_DECL and token_id == '(' and
+                          state_token != 'typedef'):
+                        blocksToKeep.extend(blocksInProgress)
+                        blocksInProgress = []
+                        blocksOfDirectives = []
+                        state = VAR_DECL
+                    elif state == NORMAL and token_id in ['struct', 'typedef',
+                                                          'enum', 'union',
+                                                          '__extension__']:
+                        state = OTHER_DECL
+                        state_token = token_id
+                    elif block.tokens[i].kind == TokenKind.IDENTIFIER:
+                        if state != VAR_DECL or ident == "":
+                            ident = token_id
 
-        self.blocks = blocks2
+                if terminator:
+                    if state != VAR_DECL and state != FUNC_DECL or ident in keep:
+                        blocksInProgress.append(Block(block.tokens[lastTerminatorIndex:i+1]))
+                        blocksToKeep.extend(blocksInProgress)
+                    else:
+                        # Only keep the directives found.
+                        blocksToKeep.extend(blocksOfDirectives)
+                    lastTerminatorIndex = i + 1
+                    blocksInProgress = []
+                    blocksOfDirectives = []
+                    state = NORMAL
+                    ident = ""
+                    state_token = ""
+                i += 1
+            if lastTerminatorIndex < numTokens:
+                blocksInProgress.append(Block(block.tokens[lastTerminatorIndex:numTokens]))
+        if len(blocksInProgress) > 0:
+            blocksToKeep.extend(blocksInProgress)
+        self.blocks = blocksToKeep
 
     def replaceTokens(self, replacements):
         """Replace tokens according to the given dict."""
@@ -1938,6 +1948,299 @@
         expected = ""
         self.assertEqual(self.parse(text), expected)
 
+class FullPathTest(unittest.TestCase):
+    """Test of the full path parsing."""
+
+    def parse(self, text, keep=None):
+        if not keep:
+            keep = set()
+        out = utils.StringOutput()
+        blocks = BlockParser().parse(CppStringTokenizer(text))
+        blocks.removeVarsAndFuncs(keep)
+        blocks.replaceTokens(kernel_token_replacements)
+        blocks.optimizeAll(None)
+        blocks.write(out)
+        return out.get()
+
+    def test_function_removed(self):
+        text = """\
+static inline __u64 function()
+{
+}
+"""
+        expected = ""
+        self.assertEqual(self.parse(text), expected)
+
+    def test_function_removed_with_struct(self):
+        text = """\
+static inline struct something* function()
+{
+}
+"""
+        expected = ""
+        self.assertEqual(self.parse(text), expected)
+
+    def test_function_kept(self):
+        text = """\
+static inline __u64 function()
+{
+}
+"""
+        expected = """\
+static inline __u64 function() {
+}
+"""
+        self.assertEqual(self.parse(text, set(["function"])), expected)
+
+    def test_var_removed(self):
+        text = "__u64 variable;"
+        expected = ""
+        self.assertEqual(self.parse(text), expected)
+
+    def test_var_kept(self):
+        text = "__u64 variable;"
+        expected = "__u64 variable;\n"
+        self.assertEqual(self.parse(text, set(["variable"])), expected)
+
+    def test_keep_function_typedef(self):
+        text = "typedef void somefunction_t(void);"
+        expected = "typedef void somefunction_t(void);\n"
+        self.assertEqual(self.parse(text), expected)
+
+    def test_struct_keep_attribute(self):
+        text = """\
+struct something_s {
+  __u32 s1;
+  __u32 s2;
+} __attribute__((packed));
+"""
+        expected = """\
+struct something_s {
+  __u32 s1;
+  __u32 s2;
+} __attribute__((packed));
+"""
+        self.assertEqual(self.parse(text), expected)
+
+    def test_function_keep_attribute_structs(self):
+        text = """\
+static __inline__ struct some_struct1 * function(struct some_struct2 * e) {
+}
+"""
+        expected = """\
+static __inline__ struct some_struct1 * function(struct some_struct2 * e) {
+}
+"""
+        self.assertEqual(self.parse(text, set(["function"])), expected)
+
+    def test_struct_after_struct(self):
+        text = """\
+struct first {
+};
+
+struct second {
+  unsigned short s1;
+#define SOMETHING 8
+  unsigned short s2;
+};
+"""
+        expected = """\
+struct first {
+};
+struct second {
+  unsigned short s1;
+#define SOMETHING 8
+  unsigned short s2;
+};
+"""
+        self.assertEqual(self.parse(text), expected)
+
+    def test_other_not_removed(self):
+        text = """\
+typedef union {
+  __u64 tu1;
+  __u64 tu2;
+} typedef_name;
+
+union {
+  __u64 u1;
+  __u64 u2;
+};
+
+struct {
+  __u64 s1;
+  __u64 s2;
+};
+
+enum {
+  ENUM1 = 0,
+  ENUM2,
+};
+
+__extension__ typedef __signed__ long long __s64;
+"""
+        expected = """\
+typedef union {
+  __u64 tu1;
+  __u64 tu2;
+} typedef_name;
+union {
+  __u64 u1;
+  __u64 u2;
+};
+struct {
+  __u64 s1;
+  __u64 s2;
+};
+enum {
+  ENUM1 = 0,
+  ENUM2,
+};
+__extension__ typedef __signed__ long long __s64;
+"""
+
+        self.assertEqual(self.parse(text), expected)
+
+    def test_semicolon_after_function(self):
+        text = """\
+static inline __u64 function()
+{
+};
+
+struct should_see {
+        __u32                           field;
+};
+"""
+        expected = """\
+struct should_see {
+  __u32 field;
+};
+"""
+        self.assertEqual(self.parse(text), expected)
+
+    def test_define_in_middle_keep(self):
+        text = """\
+enum {
+  ENUM0 = 0x10,
+  ENUM1 = 0x20,
+#define SOMETHING SOMETHING_ELSE
+  ENUM2 = 0x40,
+};
+"""
+        expected = """\
+enum {
+  ENUM0 = 0x10,
+  ENUM1 = 0x20,
+#define SOMETHING SOMETHING_ELSE
+  ENUM2 = 0x40,
+};
+"""
+        self.assertEqual(self.parse(text), expected)
+
+    def test_define_in_middle_remove(self):
+        text = """\
+static inline function() {
+#define SOMETHING1 SOMETHING_ELSE1
+  i = 0;
+  {
+    i = 1;
+  }
+#define SOMETHING2 SOMETHING_ELSE2
+}
+"""
+        expected = """\
+#define SOMETHING1 SOMETHING_ELSE1
+#define SOMETHING2 SOMETHING_ELSE2
+"""
+        self.assertEqual(self.parse(text), expected)
+
+    def test_define_in_middle_force_keep(self):
+        text = """\
+static inline function() {
+#define SOMETHING1 SOMETHING_ELSE1
+  i = 0;
+  {
+    i = 1;
+  }
+#define SOMETHING2 SOMETHING_ELSE2
+}
+"""
+        expected = """\
+static inline function() {
+#define SOMETHING1 SOMETHING_ELSE1
+  i = 0;
+ {
+    i = 1;
+  }
+#define SOMETHING2 SOMETHING_ELSE2
+}
+"""
+        self.assertEqual(self.parse(text, set(["function"])), expected)
+
+    def test_define_before_remove(self):
+        text = """\
+#define SHOULD_BE_KEPT NOTHING1
+#define ANOTHER_TO_KEEP NOTHING2
+static inline function() {
+#define SOMETHING1 SOMETHING_ELSE1
+  i = 0;
+  {
+    i = 1;
+  }
+#define SOMETHING2 SOMETHING_ELSE2
+}
+"""
+        expected = """\
+#define SHOULD_BE_KEPT NOTHING1
+#define ANOTHER_TO_KEEP NOTHING2
+#define SOMETHING1 SOMETHING_ELSE1
+#define SOMETHING2 SOMETHING_ELSE2
+"""
+        self.assertEqual(self.parse(text), expected)
+
+    def test_extern_C(self):
+        text = """\
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+struct something {
+};
+
+#if defined(__cplusplus)
+}
+#endif
+"""
+        expected = """\
+#ifdef __cplusplus
+extern "C" {
+#endif
+struct something {
+};
+#ifdef __cplusplus
+}
+#endif
+"""
+        self.assertEqual(self.parse(text), expected)
+
+    def test_macro_definition_removed(self):
+        text = """\
+#define MACRO_FUNCTION_NO_PARAMS static inline some_func() {}
+MACRO_FUNCTION_NO_PARAMS()
+
+#define MACRO_FUNCTION_PARAMS(a) static inline some_func() { a; }
+MACRO_FUNCTION_PARAMS(a = 1)
+
+something that should still be kept
+MACRO_FUNCTION_PARAMS(b)
+"""
+        expected = """\
+#define MACRO_FUNCTION_NO_PARAMS static inline some_func() { }
+#define MACRO_FUNCTION_PARAMS(a) static inline some_func() { a; }
+something that should still be kept
+"""
+        self.assertEqual(self.parse(text), expected)
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/libc/kernel/uapi/drm/amdgpu_drm.h b/libc/kernel/uapi/drm/amdgpu_drm.h
index bdf59d6..2013fa7 100644
--- a/libc/kernel/uapi/drm/amdgpu_drm.h
+++ b/libc/kernel/uapi/drm/amdgpu_drm.h
@@ -20,6 +20,7 @@
 #define __AMDGPU_DRM_H__
 #include "drm.h"
 #ifdef __cplusplus
+extern "C" {
 #endif
 #define DRM_AMDGPU_GEM_CREATE 0x00
 #define DRM_AMDGPU_GEM_MMAP 0x01
@@ -593,5 +594,6 @@
 #define AMDGPU_FAMILY_AI 141
 #define AMDGPU_FAMILY_RV 142
 #ifdef __cplusplus
+}
 #endif
 #endif
diff --git a/libc/kernel/uapi/drm/armada_drm.h b/libc/kernel/uapi/drm/armada_drm.h
index ea1f37d..aabd23b 100644
--- a/libc/kernel/uapi/drm/armada_drm.h
+++ b/libc/kernel/uapi/drm/armada_drm.h
@@ -20,6 +20,7 @@
 #define DRM_ARMADA_IOCTL_H
 #include "drm.h"
 #ifdef __cplusplus
+extern "C" {
 #endif
 #define DRM_ARMADA_GEM_CREATE 0x00
 #define DRM_ARMADA_GEM_MMAP 0x02
@@ -46,5 +47,6 @@
 };
 #define DRM_IOCTL_ARMADA_GEM_PWRITE ARMADA_IOCTL(IOW, GEM_PWRITE, gem_pwrite)
 #ifdef __cplusplus
+}
 #endif
 #endif
diff --git a/libc/kernel/uapi/drm/drm.h b/libc/kernel/uapi/drm/drm.h
index 21f23f4..ef64ed7 100644
--- a/libc/kernel/uapi/drm/drm.h
+++ b/libc/kernel/uapi/drm/drm.h
@@ -37,6 +37,7 @@
 typedef unsigned long drm_handle_t;
 #endif
 #ifdef __cplusplus
+extern "C" {
 #endif
 #define DRM_NAME "drm"
 #define DRM_MIN_ORDER 5
@@ -429,9 +430,11 @@
   __u64 user_data;
 };
 #ifdef __cplusplus
+}
 #endif
 #include "drm_mode.h"
 #ifdef __cplusplus
+extern "C" {
 #endif
 #define DRM_IOCTL_BASE 'd'
 #define DRM_IO(nr) _IO(DRM_IOCTL_BASE, nr)
@@ -603,5 +606,6 @@
 typedef struct drm_scatter_gather drm_scatter_gather_t;
 typedef struct drm_set_version drm_set_version_t;
 #ifdef __cplusplus
+}
 #endif
 #endif
diff --git a/libc/kernel/uapi/drm/drm_fourcc.h b/libc/kernel/uapi/drm/drm_fourcc.h
index 9634e99..4589cfe 100644
--- a/libc/kernel/uapi/drm/drm_fourcc.h
+++ b/libc/kernel/uapi/drm/drm_fourcc.h
@@ -20,6 +20,7 @@
 #define DRM_FOURCC_H
 #include "drm.h"
 #ifdef __cplusplus
+extern "C" {
 #endif
 #define fourcc_code(a,b,c,d) ((__u32) (a) | ((__u32) (b) << 8) | ((__u32) (c) << 16) | ((__u32) (d) << 24))
 #define DRM_FORMAT_BIG_ENDIAN (1 << 31)
@@ -156,5 +157,6 @@
 #define AFBC_FORMAT_MOD_TILED (1ULL << 8)
 #define AFBC_FORMAT_MOD_SC (1ULL << 9)
 #ifdef __cplusplus
+}
 #endif
 #endif
diff --git a/libc/kernel/uapi/drm/drm_mode.h b/libc/kernel/uapi/drm/drm_mode.h
index 06c26e4..dff9f34 100644
--- a/libc/kernel/uapi/drm/drm_mode.h
+++ b/libc/kernel/uapi/drm/drm_mode.h
@@ -20,6 +20,7 @@
 #define _DRM_MODE_H
 #include "drm.h"
 #ifdef __cplusplus
+extern "C" {
 #endif
 #define DRM_DISPLAY_INFO_LEN 32
 #define DRM_CONNECTOR_NAME_LEN 32
@@ -463,5 +464,6 @@
   __u32 lessee_id;
 };
 #ifdef __cplusplus
+}
 #endif
 #endif
diff --git a/libc/kernel/uapi/drm/drm_sarea.h b/libc/kernel/uapi/drm/drm_sarea.h
index 03317b9..a0c7f3a 100644
--- a/libc/kernel/uapi/drm/drm_sarea.h
+++ b/libc/kernel/uapi/drm/drm_sarea.h
@@ -20,6 +20,7 @@
 #define _DRM_SAREA_H_
 #include "drm.h"
 #ifdef __cplusplus
+extern "C" {
 #endif
 #ifdef __alpha__
 #define SAREA_MAX 0x2000U
@@ -54,5 +55,6 @@
 typedef struct drm_sarea_frame drm_sarea_frame_t;
 typedef struct drm_sarea drm_sarea_t;
 #ifdef __cplusplus
+}
 #endif
 #endif
diff --git a/libc/kernel/uapi/drm/etnaviv_drm.h b/libc/kernel/uapi/drm/etnaviv_drm.h
index bb502d9..4c09e6c 100644
--- a/libc/kernel/uapi/drm/etnaviv_drm.h
+++ b/libc/kernel/uapi/drm/etnaviv_drm.h
@@ -20,6 +20,7 @@
 #define __ETNAVIV_DRM_H__
 #include "drm.h"
 #ifdef __cplusplus
+extern "C" {
 #endif
 struct drm_etnaviv_timespec {
   __s64 tv_sec;
@@ -193,5 +194,6 @@
 #define DRM_IOCTL_ETNAVIV_PM_QUERY_DOM DRM_IOWR(DRM_COMMAND_BASE + DRM_ETNAVIV_PM_QUERY_DOM, struct drm_etnaviv_pm_domain)
 #define DRM_IOCTL_ETNAVIV_PM_QUERY_SIG DRM_IOWR(DRM_COMMAND_BASE + DRM_ETNAVIV_PM_QUERY_SIG, struct drm_etnaviv_pm_signal)
 #ifdef __cplusplus
+}
 #endif
 #endif
diff --git a/libc/kernel/uapi/drm/exynos_drm.h b/libc/kernel/uapi/drm/exynos_drm.h
index 8b27cbb..4918035 100644
--- a/libc/kernel/uapi/drm/exynos_drm.h
+++ b/libc/kernel/uapi/drm/exynos_drm.h
@@ -20,6 +20,7 @@
 #define _UAPI_EXYNOS_DRM_H_
 #include "drm.h"
 #ifdef __cplusplus
+extern "C" {
 #endif
 struct drm_exynos_gem_create {
   __u64 size;
@@ -225,5 +226,6 @@
   __u64 reserved;
 };
 #ifdef __cplusplus
+}
 #endif
 #endif
diff --git a/libc/kernel/uapi/drm/i810_drm.h b/libc/kernel/uapi/drm/i810_drm.h
index 1fd3c46..e33387d 100644
--- a/libc/kernel/uapi/drm/i810_drm.h
+++ b/libc/kernel/uapi/drm/i810_drm.h
@@ -20,6 +20,7 @@
 #define _I810_DRM_H_
 #include "drm.h"
 #ifdef __cplusplus
+extern "C" {
 #endif
 #ifndef _I810_DEFINES_
 #define _I810_DEFINES_
@@ -214,5 +215,6 @@
   unsigned int last_render;
 } drm_i810_mc_t;
 #ifdef __cplusplus
+}
 #endif
 #endif
diff --git a/libc/kernel/uapi/drm/i915_drm.h b/libc/kernel/uapi/drm/i915_drm.h
index 4c1d87f..53d1548 100644
--- a/libc/kernel/uapi/drm/i915_drm.h
+++ b/libc/kernel/uapi/drm/i915_drm.h
@@ -20,6 +20,7 @@
 #define _UAPI_I915_DRM_H_
 #include "drm.h"
 #ifdef __cplusplus
+extern "C" {
 #endif
 #define I915_L3_PARITY_UEVENT "L3_PARITY_ERROR"
 #define I915_ERROR_UEVENT "ERROR"
@@ -775,5 +776,6 @@
   __u8 data[];
 };
 #ifdef __cplusplus
+}
 #endif
 #endif
diff --git a/libc/kernel/uapi/drm/mga_drm.h b/libc/kernel/uapi/drm/mga_drm.h
index 4d5ad0c..4959502 100644
--- a/libc/kernel/uapi/drm/mga_drm.h
+++ b/libc/kernel/uapi/drm/mga_drm.h
@@ -20,6 +20,7 @@
 #define __MGA_DRM_H__
 #include "drm.h"
 #ifdef __cplusplus
+extern "C" {
 #endif
 #ifndef __MGA_SAREA_DEFINES__
 #define __MGA_SAREA_DEFINES__
@@ -240,5 +241,6 @@
   void __user * value;
 } drm_mga_getparam_t;
 #ifdef __cplusplus
+}
 #endif
 #endif
diff --git a/libc/kernel/uapi/drm/msm_drm.h b/libc/kernel/uapi/drm/msm_drm.h
index 1d53c5d..df8119f 100644
--- a/libc/kernel/uapi/drm/msm_drm.h
+++ b/libc/kernel/uapi/drm/msm_drm.h
@@ -20,6 +20,7 @@
 #define __MSM_DRM_H__
 #include "drm.h"
 #ifdef __cplusplus
+extern "C" {
 #endif
 #define MSM_PIPE_NONE 0x00
 #define MSM_PIPE_2D0 0x01
@@ -158,5 +159,6 @@
 #define DRM_IOCTL_MSM_SUBMITQUEUE_NEW DRM_IOWR(DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_NEW, struct drm_msm_submitqueue)
 #define DRM_IOCTL_MSM_SUBMITQUEUE_CLOSE DRM_IOW(DRM_COMMAND_BASE + DRM_MSM_SUBMITQUEUE_CLOSE, __u32)
 #ifdef __cplusplus
+}
 #endif
 #endif
diff --git a/libc/kernel/uapi/drm/nouveau_drm.h b/libc/kernel/uapi/drm/nouveau_drm.h
index f9c3447..54c3b97 100644
--- a/libc/kernel/uapi/drm/nouveau_drm.h
+++ b/libc/kernel/uapi/drm/nouveau_drm.h
@@ -21,6 +21,7 @@
 #define DRM_NOUVEAU_EVENT_NVIF 0x80000000
 #include "drm.h"
 #ifdef __cplusplus
+extern "C" {
 #endif
 #define NOUVEAU_GEM_DOMAIN_CPU (1 << 0)
 #define NOUVEAU_GEM_DOMAIN_VRAM (1 << 1)
@@ -122,5 +123,6 @@
 #define DRM_IOCTL_NOUVEAU_GEM_CPU_FINI DRM_IOW(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_CPU_FINI, struct drm_nouveau_gem_cpu_fini)
 #define DRM_IOCTL_NOUVEAU_GEM_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_INFO, struct drm_nouveau_gem_info)
 #ifdef __cplusplus
+}
 #endif
 #endif
diff --git a/libc/kernel/uapi/drm/omap_drm.h b/libc/kernel/uapi/drm/omap_drm.h
index 54b539a..3c2fc08 100644
--- a/libc/kernel/uapi/drm/omap_drm.h
+++ b/libc/kernel/uapi/drm/omap_drm.h
@@ -20,6 +20,7 @@
 #define __OMAP_DRM_H__
 #include "drm.h"
 #ifdef __cplusplus
+extern "C" {
 #endif
 #define OMAP_PARAM_CHIPSET_ID 1
 struct drm_omap_param {
@@ -84,5 +85,6 @@
 #define DRM_IOCTL_OMAP_GEM_CPU_FINI DRM_IOW(DRM_COMMAND_BASE + DRM_OMAP_GEM_CPU_FINI, struct drm_omap_gem_cpu_fini)
 #define DRM_IOCTL_OMAP_GEM_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_OMAP_GEM_INFO, struct drm_omap_gem_info)
 #ifdef __cplusplus
+}
 #endif
 #endif
diff --git a/libc/kernel/uapi/drm/qxl_drm.h b/libc/kernel/uapi/drm/qxl_drm.h
index 5942635..e9521c4 100644
--- a/libc/kernel/uapi/drm/qxl_drm.h
+++ b/libc/kernel/uapi/drm/qxl_drm.h
@@ -20,6 +20,7 @@
 #define QXL_DRM_H
 #include "drm.h"
 #ifdef __cplusplus
+extern "C" {
 #endif
 #define QXL_GEM_DOMAIN_CPU 0
 #define QXL_GEM_DOMAIN_VRAM 1
@@ -97,5 +98,6 @@
 #define DRM_IOCTL_QXL_CLIENTCAP DRM_IOW(DRM_COMMAND_BASE + DRM_QXL_CLIENTCAP, struct drm_qxl_clientcap)
 #define DRM_IOCTL_QXL_ALLOC_SURF DRM_IOWR(DRM_COMMAND_BASE + DRM_QXL_ALLOC_SURF, struct drm_qxl_alloc_surf)
 #ifdef __cplusplus
+}
 #endif
 #endif
diff --git a/libc/kernel/uapi/drm/r128_drm.h b/libc/kernel/uapi/drm/r128_drm.h
index 85dea7e..618b6dc 100644
--- a/libc/kernel/uapi/drm/r128_drm.h
+++ b/libc/kernel/uapi/drm/r128_drm.h
@@ -20,6 +20,7 @@
 #define __R128_DRM_H__
 #include "drm.h"
 #ifdef __cplusplus
+extern "C" {
 #endif
 #ifndef __R128_SAREA_DEFINES__
 #define __R128_SAREA_DEFINES__
@@ -229,5 +230,6 @@
   void __user * value;
 } drm_r128_getparam_t;
 #ifdef __cplusplus
+}
 #endif
 #endif
diff --git a/libc/kernel/uapi/drm/radeon_drm.h b/libc/kernel/uapi/drm/radeon_drm.h
index fe8fe67..9dc69ad 100644
--- a/libc/kernel/uapi/drm/radeon_drm.h
+++ b/libc/kernel/uapi/drm/radeon_drm.h
@@ -20,6 +20,7 @@
 #define __RADEON_DRM_H__
 #include "drm.h"
 #ifdef __cplusplus
+extern "C" {
 #endif
 #ifndef __RADEON_SAREA_DEFINES__
 #define __RADEON_SAREA_DEFINES__
@@ -801,5 +802,6 @@
 #define SI_TILE_MODE_DEPTH_STENCIL_2D_8AA 2
 #define CIK_TILE_MODE_DEPTH_STENCIL_1D 5
 #ifdef __cplusplus
+}
 #endif
 #endif
diff --git a/libc/kernel/uapi/drm/savage_drm.h b/libc/kernel/uapi/drm/savage_drm.h
index 8c5a172..ae87d21 100644
--- a/libc/kernel/uapi/drm/savage_drm.h
+++ b/libc/kernel/uapi/drm/savage_drm.h
@@ -20,6 +20,7 @@
 #define __SAVAGE_DRM_H__
 #include "drm.h"
 #ifdef __cplusplus
+extern "C" {
 #endif
 #ifndef __SAVAGE_SAREA_DEFINES__
 #define __SAVAGE_SAREA_DEFINES__
@@ -151,5 +152,6 @@
   } clear1;
 };
 #ifdef __cplusplus
+}
 #endif
 #endif
diff --git a/libc/kernel/uapi/drm/sis_drm.h b/libc/kernel/uapi/drm/sis_drm.h
index ba88ea9..1606a85 100644
--- a/libc/kernel/uapi/drm/sis_drm.h
+++ b/libc/kernel/uapi/drm/sis_drm.h
@@ -20,6 +20,7 @@
 #define __SIS_DRM_H__
 #include "drm.h"
 #ifdef __cplusplus
+extern "C" {
 #endif
 #define NOT_USED_0_3
 #define DRM_SIS_FB_ALLOC 0x04
@@ -48,5 +49,6 @@
   unsigned long offset, size;
 } drm_sis_fb_t;
 #ifdef __cplusplus
+}
 #endif
 #endif
diff --git a/libc/kernel/uapi/drm/tegra_drm.h b/libc/kernel/uapi/drm/tegra_drm.h
index ee111dc..5244a27 100644
--- a/libc/kernel/uapi/drm/tegra_drm.h
+++ b/libc/kernel/uapi/drm/tegra_drm.h
@@ -20,6 +20,7 @@
 #define _UAPI_TEGRA_DRM_H_
 #include "drm.h"
 #ifdef __cplusplus
+extern "C" {
 #endif
 #define DRM_TEGRA_GEM_CREATE_TILED (1 << 0)
 #define DRM_TEGRA_GEM_CREATE_BOTTOM_UP (1 << 1)
@@ -163,5 +164,6 @@
 #define DRM_IOCTL_TEGRA_GEM_SET_FLAGS DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GEM_SET_FLAGS, struct drm_tegra_gem_set_flags)
 #define DRM_IOCTL_TEGRA_GEM_GET_FLAGS DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GEM_GET_FLAGS, struct drm_tegra_gem_get_flags)
 #ifdef __cplusplus
+}
 #endif
 #endif
diff --git a/libc/kernel/uapi/drm/v3d_drm.h b/libc/kernel/uapi/drm/v3d_drm.h
index c3e58cb..8865911 100644
--- a/libc/kernel/uapi/drm/v3d_drm.h
+++ b/libc/kernel/uapi/drm/v3d_drm.h
@@ -20,6 +20,7 @@
 #define _V3D_DRM_H_
 #include "drm.h"
 #ifdef __cplusplus
+extern "C" {
 #endif
 #define DRM_V3D_SUBMIT_CL 0x00
 #define DRM_V3D_WAIT_BO 0x01
@@ -83,5 +84,6 @@
   __u32 offset;
 };
 #ifdef __cplusplus
+}
 #endif
 #endif
diff --git a/libc/kernel/uapi/drm/vc4_drm.h b/libc/kernel/uapi/drm/vc4_drm.h
index 29eb872..fde443f 100644
--- a/libc/kernel/uapi/drm/vc4_drm.h
+++ b/libc/kernel/uapi/drm/vc4_drm.h
@@ -20,6 +20,7 @@
 #define _UAPI_VC4_DRM_H_
 #include "drm.h"
 #ifdef __cplusplus
+extern "C" {
 #endif
 #define DRM_VC4_SUBMIT_CL 0x00
 #define DRM_VC4_WAIT_SEQNO 0x01
@@ -233,5 +234,6 @@
   __u64 values_ptr;
 };
 #ifdef __cplusplus
+}
 #endif
 #endif
diff --git a/libc/kernel/uapi/drm/vgem_drm.h b/libc/kernel/uapi/drm/vgem_drm.h
index 7b0ebc9..b33452b 100644
--- a/libc/kernel/uapi/drm/vgem_drm.h
+++ b/libc/kernel/uapi/drm/vgem_drm.h
@@ -20,6 +20,7 @@
 #define _UAPI_VGEM_DRM_H_
 #include "drm.h"
 #ifdef __cplusplus
+extern "C" {
 #endif
 #define DRM_VGEM_FENCE_ATTACH 0x1
 #define DRM_VGEM_FENCE_SIGNAL 0x2
@@ -37,5 +38,6 @@
   __u32 flags;
 };
 #ifdef __cplusplus
+}
 #endif
 #endif
diff --git a/libc/kernel/uapi/drm/via_drm.h b/libc/kernel/uapi/drm/via_drm.h
index fd4948d..9ef645a 100644
--- a/libc/kernel/uapi/drm/via_drm.h
+++ b/libc/kernel/uapi/drm/via_drm.h
@@ -20,6 +20,7 @@
 #define _VIA_DRM_H_
 #include "drm.h"
 #ifdef __cplusplus
+extern "C" {
 #endif
 #ifndef _VIA_DEFINES_
 #define _VIA_DEFINES_
@@ -196,5 +197,6 @@
   drm_via_blitsync_t sync;
 } drm_via_dmablit_t;
 #ifdef __cplusplus
+}
 #endif
 #endif
diff --git a/libc/kernel/uapi/drm/virtgpu_drm.h b/libc/kernel/uapi/drm/virtgpu_drm.h
index 6b7fb0b..84986e4 100644
--- a/libc/kernel/uapi/drm/virtgpu_drm.h
+++ b/libc/kernel/uapi/drm/virtgpu_drm.h
@@ -20,6 +20,7 @@
 #define VIRTGPU_DRM_H
 #include "drm.h"
 #ifdef __cplusplus
+extern "C" {
 #endif
 #define DRM_VIRTGPU_MAP 0x01
 #define DRM_VIRTGPU_EXECBUFFER 0x02
@@ -113,5 +114,6 @@
 #define DRM_IOCTL_VIRTGPU_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_WAIT, struct drm_virtgpu_3d_wait)
 #define DRM_IOCTL_VIRTGPU_GET_CAPS DRM_IOWR(DRM_COMMAND_BASE + DRM_VIRTGPU_GET_CAPS, struct drm_virtgpu_get_caps)
 #ifdef __cplusplus
+}
 #endif
 #endif
diff --git a/libc/kernel/uapi/drm/vmwgfx_drm.h b/libc/kernel/uapi/drm/vmwgfx_drm.h
index 7df722a..bb1f36d 100644
--- a/libc/kernel/uapi/drm/vmwgfx_drm.h
+++ b/libc/kernel/uapi/drm/vmwgfx_drm.h
@@ -20,6 +20,7 @@
 #define __VMWGFX_DRM_H__
 #include "drm.h"
 #ifdef __cplusplus
+extern "C" {
 #endif
 #define DRM_VMW_MAX_SURFACE_FACES 6
 #define DRM_VMW_MAX_MIP_LEVELS 24
@@ -350,5 +351,6 @@
   struct drm_vmw_surface_arg req;
 };
 #ifdef __cplusplus
+}
 #endif
 #endif
diff --git a/libc/kernel/uapi/linux/nilfs2_api.h b/libc/kernel/uapi/linux/nilfs2_api.h
index 2f2692e..d6a6b8f 100644
--- a/libc/kernel/uapi/linux/nilfs2_api.h
+++ b/libc/kernel/uapi/linux/nilfs2_api.h
@@ -39,6 +39,16 @@
 #define NILFS_CPINFO_FNS(flag,name) static inline int nilfs_cpinfo_ ##name(const struct nilfs_cpinfo * cpinfo) \
 { return ! ! (cpinfo->ci_flags & (1UL << NILFS_CPINFO_ ##flag)); \
 }
+struct nilfs_suinfo {
+  __u64 sui_lastmod;
+  __u32 sui_nblocks;
+  __u32 sui_flags;
+};
+enum {
+  NILFS_SUINFO_ACTIVE,
+  NILFS_SUINFO_DIRTY,
+  NILFS_SUINFO_ERROR,
+};
 #define NILFS_SUINFO_FNS(flag,name) static inline int nilfs_suinfo_ ##name(const struct nilfs_suinfo * si) \
 { return si->sui_flags & (1UL << NILFS_SUINFO_ ##flag); \
 }
@@ -61,6 +71,15 @@
 } static inline int nilfs_suinfo_update_ ##name(const struct nilfs_suinfo_update * sup) \
 { return ! ! (sup->sup_flags & (1UL << NILFS_SUINFO_UPDATE_ ##flag)); \
 }
+enum {
+  NILFS_CHECKPOINT,
+  NILFS_SNAPSHOT,
+};
+struct nilfs_cpmode {
+  __u64 cm_cno;
+  __u32 cm_mode;
+  __u32 cm_pad;
+};
 struct nilfs_argv {
   __u64 v_base;
   __u32 v_nmembs;
diff --git a/libc/kernel/uapi/linux/nilfs2_ondisk.h b/libc/kernel/uapi/linux/nilfs2_ondisk.h
index d70b75f..e9995a1 100644
--- a/libc/kernel/uapi/linux/nilfs2_ondisk.h
+++ b/libc/kernel/uapi/linux/nilfs2_ondisk.h
@@ -254,6 +254,11 @@
 } static inline int nilfs_checkpoint_ ##name(const struct nilfs_checkpoint * cp) \
 { return ! ! (le32_to_cpu(cp->cp_flags) & (1UL << NILFS_CHECKPOINT_ ##flag)); \
 }
+struct nilfs_cpfile_header {
+  __le64 ch_ncheckpoints;
+  __le64 ch_nsnapshots;
+  struct nilfs_snapshot_list ch_snapshot_list;
+};
 #define NILFS_CPFILE_FIRST_CHECKPOINT_OFFSET ((sizeof(struct nilfs_cpfile_header) + sizeof(struct nilfs_checkpoint) - 1) / sizeof(struct nilfs_checkpoint))
 struct nilfs_segment_usage {
   __le64 su_lastmod;
diff --git a/libc/libc.map.txt b/libc/libc.map.txt
index f78014d..9dfdbc0 100644
--- a/libc/libc.map.txt
+++ b/libc/libc.map.txt
@@ -1451,16 +1451,44 @@
     android_fdsan_close_with_tag;
     android_fdsan_create_owner_tag;
     android_fdsan_exchange_owner_tag;
+    android_fdsan_get_error_level;
     android_fdsan_get_owner_tag;
     android_fdsan_get_tag_type;
     android_fdsan_get_tag_value;
-    android_fdsan_get_error_level;
     android_fdsan_set_error_level;
     android_get_device_api_level;
     getloadavg;
     pthread_sigqueue;
     reallocarray;
     timespec_get;
+
+    # Used by libselinux
+    __system_properties_init; # apex
+
+    # Used by libmemunreachable
+    malloc_backtrace; # apex
+    malloc_disable; # apex
+    malloc_enable; # apex
+    malloc_iterate; # apex
+
+    # Used by libmediautils
+    write_malloc_leak_info; # apex
+    free_malloc_leak_info; # apex
+    get_malloc_leak_info; # apex
+
+    # Used by libandroid_net
+    android_getaddrinfofornet; # apex
+
+    # Used by libandroid_runtime
+    gMallocLeakZygoteChild; # apex
+
+    # TODO(b/120266448) hide these symbols again
+    # Used by libndk_translation
+    __getdents64; # arm x86 mips apex
+    tkill; # arm x86 mips apex
+    # Used by PtsBionicDeviceTestCases
+    __bionic_brk; # arm x86 mips apex
+    __system_property_add; # apex
 } LIBC_P;
 
 LIBC_PRIVATE {
@@ -1537,7 +1565,6 @@
     __arm_fadvise64_64; # arm
     __ashldi3; # arm
     __ashrdi3; # arm
-    __bionic_brk; # arm x86 mips
     __bionic_libgcc_compat_symbols; # arm x86
     __cmpdf2; # arm
     __divdf3; # arm
@@ -1563,7 +1590,6 @@
     __gedf2; # arm
     __get_thread; # arm x86 mips
     __get_tls; # arm x86 mips
-    __getdents64; # arm x86 mips
     __gnu_ldivmod_helper; # arm
     __gnu_uldivmod_helper; # arm
     __gnu_Unwind_Backtrace; # arm
@@ -1640,7 +1666,6 @@
     _Unwind_VRS_Get; # arm
     _Unwind_VRS_Pop; # arm
     _Unwind_VRS_Set; # arm
-    android_getaddrinfofornet;
     android_getaddrinfofornetcontext;
     android_gethostbyaddrfornet;
     android_gethostbyaddrfornetcontext;
@@ -1655,12 +1680,9 @@
     dlmalloc_inspect_all; # arm x86 mips
     dlmalloc_trim; # arm x86 mips
     dlmalloc_usable_size; # arm x86 mips
-    free_malloc_leak_info;
     ftime; # arm x86 mips
-    get_malloc_leak_info;
     getdents; # arm x86 mips
     getdtablesize; # arm x86 mips
-    gMallocLeakZygoteChild;
     index; # arm x86 mips
     issetugid; # arm x86 mips
     memswap; # arm x86 mips
@@ -1675,10 +1697,8 @@
     strntoumax; # arm x86 mips
     strtotimeval; # arm x86 mips
     sysv_signal; # arm x86 mips
-    tkill; # arm x86 mips
     wait3; # arm x86 mips
     wcswcs; # arm x86 mips
-    write_malloc_leak_info;
 } LIBC_Q;
 
 LIBC_DEPRECATED {
@@ -1688,9 +1708,7 @@
 
 LIBC_PLATFORM {
   global:
-    __system_properties_init;
     __system_property_area__; # var
-    __system_property_add;
     __system_property_area_init;
     __system_property_set_filename;
     __system_property_update;
@@ -1698,8 +1716,4 @@
     android_net_res_stats_get_info_for_net;
     android_net_res_stats_aggregate;
     android_net_res_stats_get_usable_servers;
-    malloc_backtrace;
-    malloc_disable;
-    malloc_enable;
-    malloc_iterate;
 } LIBC_Q;
diff --git a/libc/private/bionic_asm_tls.h b/libc/private/bionic_asm_tls.h
new file mode 100644
index 0000000..06e3dce
--- /dev/null
+++ b/libc/private/bionic_asm_tls.h
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#pragma once
+
+/** WARNING WARNING WARNING
+ **
+ ** This header file is *NOT* part of the public Bionic ABI/API and should not
+ ** be used/included by user-serviceable parts of the system (e.g.
+ ** applications).
+ **
+ ** It is only provided here for the benefit of Android components that need a
+ ** pre-allocated slot for performance reasons (including ART, the OpenGL
+ ** subsystem, and sanitizers).
+ **/
+
+// Bionic TCB / TLS slots:
+//
+//  - TLS_SLOT_SELF: On x86-{32,64}, the kernel makes TLS memory available via
+//    the gs/fs segments. To get the address of a TLS variable, the first slot
+//    of TLS memory (accessed using %gs:0 / %fs:0) holds the address of the
+//    gs/fs segment. This slot is used by:
+//     - OpenGL and compiler-rt
+//     - Accesses of x86 ELF TLS variables
+//
+//  - TLS_SLOT_OPENGL and TLS_SLOT_OPENGL_API: These two aren't used by bionic
+//    itself, but allow the graphics code to access TLS directly rather than
+//    using the pthread API.
+//
+//  - TLS_SLOT_STACK_GUARD: Used for -fstack-protector by:
+//     - Clang targeting Android/arm64
+//     - gcc targeting Linux/x86-{32,64}
+//
+//  - TLS_SLOT_SANITIZER: Lets sanitizers avoid using pthread_getspecific for
+//    finding the current thread state.
+//
+//  - TLS_SLOT_DTV: Pointer to ELF TLS dynamic thread vector.
+//
+//  - TLS_SLOT_ART_THREAD_SELF: Fast storage for Thread::Current() in ART.
+//
+//  - TLS_SLOT_BIONIC_TLS: Optimizes accesses to bionic_tls by one load versus
+//    finding it using __get_thread().
+
+#if defined(__arm__) || defined(__aarch64__)
+
+// The ARM ELF TLS ABI specifies[1] that the thread pointer points at a 2-word
+// TCB followed by the executable's TLS segment. Both the TCB and the
+// executable's segment are aligned according to the segment, so Bionic requires
+// a minimum segment alignment, which effectively reserves an 8-word TCB. The
+// ARM spec allocates the first TCB word to the DTV.
+//
+// [1] "Addenda to, and Errata in, the ABI for the ARM Architecture". Section 3.
+// http://infocenter.arm.com/help/topic/com.arm.doc.ihi0045e/IHI0045E_ABI_addenda.pdf
+
+#define MIN_TLS_SLOT              -1 // update this value when reserving a slot
+#define TLS_SLOT_BIONIC_TLS       -1
+#define TLS_SLOT_DTV              0
+#define TLS_SLOT_THREAD_ID        1
+// Slot 2 is free (was historically used for TLS_SLOT_ERRNO)
+#define TLS_SLOT_OPENGL           3
+#define TLS_SLOT_OPENGL_API       4
+#define TLS_SLOT_STACK_GUARD      5
+#define TLS_SLOT_SANITIZER        6 // was historically used for dlerror
+#define TLS_SLOT_ART_THREAD_SELF  7
+#define TLS_SLOT_TSAN             8 // should be replaced with TLS_SLOT_SANITIZER
+
+// The maximum slot is fixed by the minimum TLS alignment in Bionic executables.
+// It should be changed to 7 once TLS_SLOT_TSAN is removed.
+#define MAX_TLS_SLOT              8
+
+#elif defined(__i386__) || defined(__x86_64__)
+
+// x86 uses variant 2 ELF TLS layout, which places the executable's TLS segment
+// immediately before the thread pointer. New slots are allocated at positive
+// offsets from the thread pointer.
+
+#define MIN_TLS_SLOT              0
+
+#define TLS_SLOT_SELF             0
+#define TLS_SLOT_THREAD_ID        1
+// Slot 2 is free (was historically used for TLS_SLOT_ERRNO)
+#define TLS_SLOT_OPENGL           3
+#define TLS_SLOT_OPENGL_API       4
+#define TLS_SLOT_STACK_GUARD      5
+#define TLS_SLOT_SANITIZER        6 // was historically used for dlerror
+#define TLS_SLOT_ART_THREAD_SELF  7
+#define TLS_SLOT_TSAN             8 // should be replaced with TLS_SLOT_SANITIZER
+#define TLS_SLOT_DTV              9
+#define TLS_SLOT_BIONIC_TLS       10
+#define MAX_TLS_SLOT              10 // update this value when reserving a slot
+
+#endif
+
+#define BIONIC_TLS_SLOTS (MAX_TLS_SLOT - MIN_TLS_SLOT + 1)
diff --git a/libc/private/bionic_elf_tls.h b/libc/private/bionic_elf_tls.h
new file mode 100644
index 0000000..e847669
--- /dev/null
+++ b/libc/private/bionic_elf_tls.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#pragma once
+
+#include <stdint.h>
+
+struct StaticTlsLayout {
+  constexpr StaticTlsLayout() {}
+
+private:
+  size_t offset_ = 0;
+  size_t alignment_ = 1;
+  bool overflowed_ = false;
+
+  // Offsets to various Bionic TLS structs from the beginning of static TLS.
+  size_t offset_bionic_tcb_ = SIZE_MAX;
+  size_t offset_bionic_tls_ = SIZE_MAX;
+
+public:
+  size_t offset_bionic_tcb() const { return offset_bionic_tcb_; }
+  size_t offset_bionic_tls() const { return offset_bionic_tls_; }
+
+  size_t size() const { return offset_; }
+  size_t alignment() const { return alignment_; }
+  bool overflowed() const { return overflowed_; }
+
+  void reserve_tcb();
+  void reserve_bionic_tls();
+  void finish_layout();
+
+private:
+  size_t reserve(size_t size, size_t alignment);
+
+  template <typename T> size_t reserve_type() {
+    return reserve(sizeof(T), alignof(T));
+  }
+
+  size_t round_up_with_overflow_check(size_t value, size_t alignment);
+};
diff --git a/libc/private/bionic_globals.h b/libc/private/bionic_globals.h
index ceda38a..b5e677e 100644
--- a/libc/private/bionic_globals.h
+++ b/libc/private/bionic_globals.h
@@ -33,6 +33,7 @@
 #include <link.h>
 #include <pthread.h>
 
+#include "private/bionic_elf_tls.h"
 #include "private/bionic_fdsan.h"
 #include "private/bionic_malloc_dispatch.h"
 #include "private/bionic_vdso.h"
@@ -67,6 +68,8 @@
   pthread_mutex_t abort_msg_lock = PTHREAD_MUTEX_INITIALIZER;
   abort_msg_t* abort_msg = nullptr;
 
+  StaticTlsLayout static_tls_layout;
+
   // Values passed from the linker to libc.so.
   const char* init_progname = nullptr;
   char** init_environ = nullptr;
diff --git a/libc/private/bionic_tls.h b/libc/private/bionic_tls.h
index 4749cee..90914c3 100644
--- a/libc/private/bionic_tls.h
+++ b/libc/private/bionic_tls.h
@@ -26,8 +26,7 @@
  * SUCH DAMAGE.
  */
 
-#ifndef __BIONIC_PRIVATE_BIONIC_TLS_H_
-#define __BIONIC_PRIVATE_BIONIC_TLS_H_
+#pragma once
 
 #include <locale.h>
 #include <mntent.h>
@@ -35,73 +34,48 @@
 #include <sys/cdefs.h>
 #include <sys/param.h>
 
+#include "bionic_asm_tls.h"
 #include "bionic_macros.h"
 #include "__get_tls.h"
 #include "grp_pwd.h"
 
-__BEGIN_DECLS
-
 /** WARNING WARNING WARNING
  **
- ** This header file is *NOT* part of the public Bionic ABI/API
- ** and should not be used/included by user-serviceable parts of
- ** the system (e.g. applications).
- **
- ** It is only provided here for the benefit of the system dynamic
- ** linker and the OpenGL sub-system (which needs to access the
- ** pre-allocated slot directly for performance reason).
+ ** This header file is *NOT* part of the public Bionic ABI/API and should not
+ ** be used/included by user-serviceable parts of the system (e.g.
+ ** applications).
  **/
 
-// Well-known TLS slots. What data goes in which slot is arbitrary unless otherwise noted.
-enum {
-  TLS_SLOT_SELF = 0, // The kernel requires this specific slot for x86.
-  TLS_SLOT_THREAD_ID,
+class pthread_internal_t;
 
-  // TLS slot 2 was used for errno but is now free.
+// This struct is small, so the linker can allocate a temporary copy on its
+// stack. It can't be combined with pthread_internal_t because:
+//  - native bridge requires pthread_internal_t to have the same layout across
+//    architectures, and
+//  - On x86, this struct would have to be placed at the front of
+//    pthread_internal_t, moving fields like `tid`.
+//  - We'd like to avoid having a temporary pthread_internal_t object that
+//    needs to be transferred once the final size of static TLS is known.
+struct bionic_tcb {
+  void* raw_slots_storage[BIONIC_TLS_SLOTS];
 
-  // These two aren't used by bionic itself, but allow the graphics code to
-  // access TLS directly rather than using the pthread API.
-  TLS_SLOT_OPENGL_API = 3,
-  TLS_SLOT_OPENGL = 4,
+  // Return a reference to a slot given its TP-relative TLS_SLOT_xxx index.
+  // The thread pointer (i.e. __get_tls()) points at &tls_slot(0).
+  void*& tls_slot(size_t tpindex) {
+    return raw_slots_storage[tpindex - MIN_TLS_SLOT];
+  }
 
-  TLS_SLOT_STACK_GUARD = 5, // GCC requires this specific slot for x86.
+  // Initialize the main thread's final object using its bootstrap object.
+  void copy_from_bootstrap(const bionic_tcb* boot) {
+    // Copy everything. Problematic slots will be reinitialized.
+    *this = *boot;
+  }
 
-  // Lets sanitizers avoid using pthread_getspecific for finding the current
-  // thread state. (Slot 6 was historically used for dlerror instead.)
-  TLS_SLOT_SANITIZER = 6,
-
-  // Fast storage for Thread::Current() in ART.
-  TLS_SLOT_ART_THREAD_SELF = 7,
-
-  // Lets TSAN avoid using pthread_getspecific for finding the current thread
-  // state.
-  TLS_SLOT_TSAN = 8,
-
-  BIONIC_TLS_SLOTS // Must come last!
+  pthread_internal_t* thread() {
+    return static_cast<pthread_internal_t*>(tls_slot(TLS_SLOT_THREAD_ID));
+  }
 };
 
-// ~3 pages.
-struct bionic_tls {
-  locale_t locale;
-
-  char basename_buf[MAXPATHLEN];
-  char dirname_buf[MAXPATHLEN];
-
-  mntent mntent_buf;
-  char mntent_strings[BUFSIZ];
-
-  char ptsname_buf[32];
-  char ttyname_buf[64];
-
-  char strerror_buf[NL_TEXTMAX];
-  char strsignal_buf[NL_TEXTMAX];
-
-  group_state_t group;
-  passwd_state_t passwd;
-};
-
-#define BIONIC_TLS_SIZE (__BIONIC_ALIGN(sizeof(bionic_tls), PAGE_SIZE))
-
 /*
  * Bionic uses some pthread keys internally. All pthread keys used internally
  * should be created in constructors, except for keys that may be used in or
@@ -126,12 +100,42 @@
  */
 #define BIONIC_PTHREAD_KEY_COUNT (BIONIC_PTHREAD_KEY_RESERVED_COUNT + PTHREAD_KEYS_MAX)
 
-__END_DECLS
+class pthread_key_data_t {
+ public:
+  uintptr_t seq; // Use uintptr_t just for alignment, as we use pointer below.
+  void* data;
+};
 
-#if defined(__cplusplus)
+// ~3 pages. This struct is allocated as static TLS memory (i.e. at a fixed
+// offset from the thread pointer).
+struct bionic_tls {
+  pthread_key_data_t key_data[BIONIC_PTHREAD_KEY_COUNT];
+
+  locale_t locale;
+
+  char basename_buf[MAXPATHLEN];
+  char dirname_buf[MAXPATHLEN];
+
+  mntent mntent_buf;
+  char mntent_strings[BUFSIZ];
+
+  char ptsname_buf[32];
+  char ttyname_buf[64];
+
+  char strerror_buf[NL_TEXTMAX];
+  char strsignal_buf[NL_TEXTMAX];
+
+  group_state_t group;
+  passwd_state_t passwd;
+
+  // Initialize the main thread's final object using its bootstrap object.
+  void copy_from_bootstrap(const bionic_tls* boot __attribute__((unused))) {
+    // Nothing in bionic_tls needs to be preserved in the transition to the
+    // final TLS objects, so don't copy anything.
+  }
+};
+
 class KernelArgumentBlock;
-extern void __libc_init_main_thread_early(KernelArgumentBlock& args);
-extern void __libc_init_main_thread_late();
-#endif
-
-#endif /* __BIONIC_PRIVATE_BIONIC_TLS_H_ */
+extern "C" void __libc_init_main_thread_early(const KernelArgumentBlock& args, bionic_tcb* temp_tcb);
+extern "C" void __libc_init_main_thread_late();
+extern "C" void __libc_init_main_thread_final();
diff --git a/libc/private/linker_native_bridge.h b/libc/private/linker_native_bridge.h
new file mode 100644
index 0000000..bfd0153
--- /dev/null
+++ b/libc/private/linker_native_bridge.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#pragma once
+
+extern "C" void __linker_reserve_bionic_tls_in_static_tls();
diff --git a/libdl/Android.bp b/libdl/Android.bp
index c4078a0..262da6c 100644
--- a/libdl/Android.bp
+++ b/libdl/Android.bp
@@ -100,6 +100,11 @@
     sanitize: {
         never: true,
     },
+
+    stubs: {
+        symbol_file: "libdl.map.txt",
+        versions: ["10000"],
+    },
 }
 
 ndk_library {
diff --git a/libdl/libdl.map.txt b/libdl/libdl.map.txt
index c5d1be4..1514827 100644
--- a/libdl/libdl.map.txt
+++ b/libdl/libdl.map.txt
@@ -41,15 +41,19 @@
     __cfi_slowpath_diag;
 } LIBC_N;
 
+LIBC_Q { # introduced=29
+  global:
+    android_create_namespace; # apex
+    android_dlwarning; # apex
+    android_get_LD_LIBRARY_PATH; # apex
+    android_get_exported_namespace; # apex
+    android_init_anonymous_namespace; # apex
+    android_link_namespaces; # apex
+    android_set_application_target_sdk_version; # apex
+} LIBC_OMR1;
+
 LIBC_PLATFORM {
   global:
     __cfi_init;
-    android_dlwarning;
-    android_set_application_target_sdk_version;
-    android_get_LD_LIBRARY_PATH;
     android_update_LD_LIBRARY_PATH;
-    android_init_anonymous_namespace;
-    android_create_namespace;
-    android_link_namespaces;
-    android_get_exported_namespace;
-} LIBC_OMR1;
+} LIBC_Q;
diff --git a/libm/Android.bp b/libm/Android.bp
index 2463dfb..28cf1fd 100644
--- a/libm/Android.bp
+++ b/libm/Android.bp
@@ -510,6 +510,11 @@
 
     // TODO(ivanlozano): Remove after b/118321713
     xom: false,
+
+    stubs: {
+        symbol_file: "libm.map.txt",
+        versions: ["10000"],
+    },
 }
 
 ndk_library {
diff --git a/linker/Android.bp b/linker/Android.bp
index 0e3484a..38a53f8 100644
--- a/linker/Android.bp
+++ b/linker/Android.bp
@@ -86,6 +86,7 @@
         "linker_phdr.cpp",
         "linker_sdk_versions.cpp",
         "linker_soinfo.cpp",
+        "linker_tls.cpp",
         "linker_utils.cpp",
         "rt.cpp",
     ],
diff --git a/linker/linker_main.cpp b/linker/linker_main.cpp
index 3318c2c..9b4ce47 100644
--- a/linker/linker_main.cpp
+++ b/linker/linker_main.cpp
@@ -36,6 +36,7 @@
 #include "linker_gdb_support.h"
 #include "linker_globals.h"
 #include "linker_phdr.h"
+#include "linker_tls.h"
 #include "linker_utils.h"
 
 #include "private/bionic_globals.h"
@@ -51,6 +52,7 @@
 
 #include <async_safe/log.h>
 #include <bionic/libc_init_common.h>
+#include <bionic/pthread_internal.h>
 
 #include <vector>
 
@@ -450,6 +452,10 @@
     si->increment_ref_count();
   }
 
+  layout_linker_static_tls();
+
+  __libc_init_main_thread_final();
+
   if (!get_cfi_shadow()->InitialLinkDone(solist)) __linker_cannot_link(g_argv[0]);
 
   si->call_pre_init_constructors();
@@ -557,7 +563,8 @@
 extern "C" ElfW(Addr) __linker_init(void* raw_args) {
   // Initialize TLS early so system calls and errno work.
   KernelArgumentBlock args(raw_args);
-  __libc_init_main_thread_early(args);
+  bionic_tcb temp_tcb = {};
+  __libc_init_main_thread_early(args, &temp_tcb);
 
   // When the linker is run by itself (rather than as an interpreter for
   // another program), AT_BASE is 0.
diff --git a/linker/linker_tls.cpp b/linker/linker_tls.cpp
new file mode 100644
index 0000000..3327453
--- /dev/null
+++ b/linker/linker_tls.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "linker_tls.h"
+
+#include "private/bionic_defs.h"
+#include "private/bionic_elf_tls.h"
+#include "private/bionic_globals.h"
+#include "private/linker_native_bridge.h"
+
+__BIONIC_WEAK_FOR_NATIVE_BRIDGE
+extern "C" void __linker_reserve_bionic_tls_in_static_tls() {
+  __libc_shared_globals()->static_tls_layout.reserve_bionic_tls();
+}
+
+// Stub for linker static TLS layout.
+void layout_linker_static_tls() {
+  StaticTlsLayout& layout = __libc_shared_globals()->static_tls_layout;
+  layout.reserve_tcb();
+
+  // The pthread key data is located at the very front of bionic_tls. As a
+  // temporary workaround, allocate bionic_tls just after the thread pointer so
+  // Golang can find its pthread key, as long as the executable's TLS segment is
+  // small enough. Specifically, Golang scans forward 384 words from the TP on
+  // ARM.
+  //  - http://b/118381796
+  //  - https://groups.google.com/d/msg/golang-dev/yVrkFnYrYPE/2G3aFzYqBgAJ
+  __linker_reserve_bionic_tls_in_static_tls();
+
+  layout.finish_layout();
+}
diff --git a/linker/linker_tls.h b/linker/linker_tls.h
new file mode 100644
index 0000000..2f0a57d
--- /dev/null
+++ b/linker/linker_tls.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#pragma once
+
+void layout_linker_static_tls();
diff --git a/tests/Android.bp b/tests/Android.bp
index 7d56a22..beed07a 100644
--- a/tests/Android.bp
+++ b/tests/Android.bp
@@ -42,6 +42,7 @@
     sanitize: {
         never: true,
     },
+    bootstrap: true,
 }
 
 // -----------------------------------------------------------------------------
diff --git a/tests/pthread_test.cpp b/tests/pthread_test.cpp
index 84ce531..1c57264 100644
--- a/tests/pthread_test.cpp
+++ b/tests/pthread_test.cpp
@@ -2582,9 +2582,9 @@
     ASSERT_EQ(0, munmap(pages[i], kPageSize));
   }
 
-  // Creating a thread uses at least six VMAs: the stack, the TLS, and a guard each side of both.
-  // So we should have seen at least six failures.
-  ASSERT_GE(EAGAIN_count, 6U);
+  // Creating a thread uses at least three VMAs: the combined stack and TLS, and a guard on each
+  // side. So we should have seen at least three failures.
+  ASSERT_GE(EAGAIN_count, 3U);
 
   for (; i < pages.size(); ++i) {
     ASSERT_EQ(0, munmap(pages[i], kPageSize));