Merge "Do not dlclose after failed reinit."
diff --git a/libc/Android.bp b/libc/Android.bp
index 6f2e347..226a81f 100644
--- a/libc/Android.bp
+++ b/libc/Android.bp
@@ -1014,6 +1014,7 @@
         "bionic/arpa_inet.cpp",
         "bionic/assert.cpp",
         "bionic/atof.cpp",
+        "bionic/bionic_allocator.cpp",
         "bionic/bionic_arc4random.cpp",
         "bionic/bionic_futex.cpp",
         "bionic/bionic_netlink.cpp",
diff --git a/libc/bionic/__libc_init_main_thread.cpp b/libc/bionic/__libc_init_main_thread.cpp
index 6279e65..4984e38 100644
--- a/libc/bionic/__libc_init_main_thread.cpp
+++ b/libc/bionic/__libc_init_main_thread.cpp
@@ -74,6 +74,7 @@
   __libc_init_sysinfo(); // uses AT_SYSINFO auxv entry
 #endif
   __init_tcb(temp_tcb, &main_thread);
+  __init_tcb_dtv(temp_tcb);
   __set_tls(&temp_tcb->tls_slot(0));
   main_thread.tid = __getpid();
   main_thread.set_cached_pid(main_thread.tid);
diff --git a/linker/linker_allocator.cpp b/libc/bionic/bionic_allocator.cpp
similarity index 77%
rename from linker/linker_allocator.cpp
rename to libc/bionic/bionic_allocator.cpp
index 015768a..d9302ad 100644
--- a/linker/linker_allocator.cpp
+++ b/libc/bionic/bionic_allocator.cpp
@@ -26,21 +26,25 @@
  * SUCH DAMAGE.
  */
 
-#include "linker_allocator.h"
-#include "linker_debug.h"
-#include "linker.h"
+#include "private/bionic_allocator.h"
 
 #include <stdlib.h>
+#include <string.h>
 #include <sys/mman.h>
+#include <sys/param.h>
 #include <sys/prctl.h>
 #include <unistd.h>
 
+#include <new>
+
 #include <async_safe/log.h>
 
+#include "private/bionic_macros.h"
+#include "private/bionic_page.h"
+
 //
-// LinkerMemeoryAllocator is general purpose allocator
-// designed to provide the same functionality as the malloc/free/realloc
-// libc functions.
+// BionicAllocator is a general purpose allocator designed to provide the same
+// functionality as the malloc/free/realloc libc functions.
 //
 // On alloc:
 // If size is >= 1k allocator proxies malloc call directly to mmap
@@ -90,7 +94,7 @@
   return result;
 }
 
-LinkerSmallObjectAllocator::LinkerSmallObjectAllocator(uint32_t type,
+BionicSmallObjectAllocator::BionicSmallObjectAllocator(uint32_t type,
                                                        size_t block_size)
     : type_(type),
       block_size_(block_size),
@@ -99,7 +103,7 @@
       free_pages_cnt_(0),
       page_list_(nullptr) {}
 
-void* LinkerSmallObjectAllocator::alloc() {
+void* BionicSmallObjectAllocator::alloc() {
   CHECK(block_size_ != 0);
 
   if (page_list_ == nullptr) {
@@ -141,7 +145,7 @@
   return block_record;
 }
 
-void LinkerSmallObjectAllocator::free_page(small_object_page_info* page) {
+void BionicSmallObjectAllocator::free_page(small_object_page_info* page) {
   CHECK(page->free_blocks_cnt == blocks_per_page_);
   if (page->prev_page) {
     page->prev_page->next_page = page->next_page;
@@ -156,7 +160,7 @@
   free_pages_cnt_--;
 }
 
-void LinkerSmallObjectAllocator::free(void* ptr) {
+void BionicSmallObjectAllocator::free(void* ptr) {
   small_object_page_info* const page =
       reinterpret_cast<small_object_page_info*>(
           PAGE_START(reinterpret_cast<uintptr_t>(ptr)));
@@ -186,7 +190,7 @@
   }
 }
 
-void LinkerSmallObjectAllocator::alloc_page() {
+void BionicSmallObjectAllocator::alloc_page() {
   void* const map_ptr = mmap(nullptr, PAGE_SIZE, PROT_READ | PROT_WRITE,
                              MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
   if (map_ptr == MAP_FAILED) {
@@ -194,7 +198,7 @@
   }
 
   prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, map_ptr, PAGE_SIZE,
-        "linker_alloc_small_objects");
+        "bionic_alloc_small_objects");
 
   small_object_page_info* const page =
       reinterpret_cast<small_object_page_info*>(map_ptr);
@@ -220,7 +224,7 @@
   free_pages_cnt_++;
 }
 
-void LinkerSmallObjectAllocator::add_to_page_list(small_object_page_info* page) {
+void BionicSmallObjectAllocator::add_to_page_list(small_object_page_info* page) {
   page->next_page = page_list_;
   page->prev_page = nullptr;
   if (page_list_) {
@@ -229,7 +233,7 @@
   page_list_ = page;
 }
 
-void LinkerSmallObjectAllocator::remove_from_page_list(
+void BionicSmallObjectAllocator::remove_from_page_list(
     small_object_page_info* page) {
   if (page->prev_page) {
     page->prev_page->next_page = page->next_page;
@@ -244,24 +248,30 @@
   page->next_page = nullptr;
 }
 
-void LinkerMemoryAllocator::initialize_allocators() {
+void BionicAllocator::initialize_allocators() {
   if (allocators_ != nullptr) {
     return;
   }
 
-  LinkerSmallObjectAllocator* allocators =
-      reinterpret_cast<LinkerSmallObjectAllocator*>(allocators_buf_);
+  BionicSmallObjectAllocator* allocators =
+      reinterpret_cast<BionicSmallObjectAllocator*>(allocators_buf_);
 
   for (size_t i = 0; i < kSmallObjectAllocatorsCount; ++i) {
     uint32_t type = i + kSmallObjectMinSizeLog2;
-    new (allocators + i) LinkerSmallObjectAllocator(type, 1 << type);
+    new (allocators + i) BionicSmallObjectAllocator(type, 1 << type);
   }
 
   allocators_ = allocators;
 }
 
-void* LinkerMemoryAllocator::alloc_mmap(size_t size) {
-  size_t allocated_size = PAGE_END(size + kPageInfoSize);
+void* BionicAllocator::alloc_mmap(size_t align, size_t size) {
+  size_t header_size = __BIONIC_ALIGN(kPageInfoSize, align);
+  size_t allocated_size;
+  if (__builtin_add_overflow(header_size, size, &allocated_size) ||
+      PAGE_END(allocated_size) < allocated_size) {
+    async_safe_fatal("overflow trying to alloc %zu bytes", size);
+  }
+  allocated_size = PAGE_END(allocated_size);
   void* map_ptr = mmap(nullptr, allocated_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
                        -1, 0);
 
@@ -269,25 +279,21 @@
     async_safe_fatal("mmap failed: %s", strerror(errno));
   }
 
-  prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, map_ptr, allocated_size, "linker_alloc_lob");
+  prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, map_ptr, allocated_size, "bionic_alloc_lob");
 
-  page_info* info = reinterpret_cast<page_info*>(map_ptr);
+  void* result = static_cast<char*>(map_ptr) + header_size;
+  page_info* info = get_page_info_unchecked(result);
   memcpy(info->signature, kSignature, sizeof(kSignature));
   info->type = kLargeObject;
   info->allocated_size = allocated_size;
 
-  return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(info) +
-                                 kPageInfoSize);
+  return result;
 }
 
-void* LinkerMemoryAllocator::alloc(size_t size) {
-  // treat alloc(0) as alloc(1)
-  if (size == 0) {
-    size = 1;
-  }
 
+inline void* BionicAllocator::alloc_impl(size_t align, size_t size) {
   if (size > kSmallObjectMaxSize) {
-    return alloc_mmap(size);
+    return alloc_mmap(align, size);
   }
 
   uint16_t log2_size = log2(size);
@@ -299,8 +305,33 @@
   return get_small_object_allocator(log2_size)->alloc();
 }
 
-page_info* LinkerMemoryAllocator::get_page_info(void* ptr) {
-  page_info* info = reinterpret_cast<page_info*>(PAGE_START(reinterpret_cast<size_t>(ptr)));
+void* BionicAllocator::alloc(size_t size) {
+  // treat alloc(0) as alloc(1)
+  if (size == 0) {
+    size = 1;
+  }
+  return alloc_impl(16, size);
+}
+
+void* BionicAllocator::memalign(size_t align, size_t size) {
+  // The Bionic allocator only supports alignment up to one page, which is good
+  // enough for ELF TLS.
+  align = MIN(align, PAGE_SIZE);
+  align = MAX(align, 16);
+  if (!powerof2(align)) {
+    align = BIONIC_ROUND_UP_POWER_OF_2(align);
+  }
+  size = MAX(size, align);
+  return alloc_impl(align, size);
+}
+
+inline page_info* BionicAllocator::get_page_info_unchecked(void* ptr) {
+  uintptr_t header_page = PAGE_START(reinterpret_cast<size_t>(ptr) - kPageInfoSize);
+  return reinterpret_cast<page_info*>(header_page);
+}
+
+inline page_info* BionicAllocator::get_page_info(void* ptr) {
+  page_info* info = get_page_info_unchecked(ptr);
   if (memcmp(info->signature, kSignature, sizeof(kSignature)) != 0) {
     async_safe_fatal("invalid pointer %p (page signature mismatch)", ptr);
   }
@@ -308,7 +339,7 @@
   return info;
 }
 
-void* LinkerMemoryAllocator::realloc(void* ptr, size_t size) {
+void* BionicAllocator::realloc(void* ptr, size_t size) {
   if (ptr == nullptr) {
     return alloc(size);
   }
@@ -323,9 +354,9 @@
   size_t old_size = 0;
 
   if (info->type == kLargeObject) {
-    old_size = info->allocated_size - kPageInfoSize;
+    old_size = info->allocated_size - (static_cast<char*>(ptr) - reinterpret_cast<char*>(info));
   } else {
-    LinkerSmallObjectAllocator* allocator = get_small_object_allocator(info->type);
+    BionicSmallObjectAllocator* allocator = get_small_object_allocator(info->type);
     if (allocator != info->allocator_addr) {
       async_safe_fatal("invalid pointer %p (page signature mismatch)", ptr);
     }
@@ -343,7 +374,7 @@
   return ptr;
 }
 
-void LinkerMemoryAllocator::free(void* ptr) {
+void BionicAllocator::free(void* ptr) {
   if (ptr == nullptr) {
     return;
   }
@@ -353,7 +384,7 @@
   if (info->type == kLargeObject) {
     munmap(info, info->allocated_size);
   } else {
-    LinkerSmallObjectAllocator* allocator = get_small_object_allocator(info->type);
+    BionicSmallObjectAllocator* allocator = get_small_object_allocator(info->type);
     if (allocator != info->allocator_addr) {
       async_safe_fatal("invalid pointer %p (invalid allocator address for the page)", ptr);
     }
@@ -362,7 +393,7 @@
   }
 }
 
-LinkerSmallObjectAllocator* LinkerMemoryAllocator::get_small_object_allocator(uint32_t type) {
+BionicSmallObjectAllocator* BionicAllocator::get_small_object_allocator(uint32_t type) {
   if (type < kSmallObjectMinSizeLog2 || type > kSmallObjectMaxSizeLog2) {
     async_safe_fatal("invalid type: %u", type);
   }
diff --git a/libc/bionic/bionic_elf_tls.cpp b/libc/bionic/bionic_elf_tls.cpp
index 4253b97..3fa5182 100644
--- a/libc/bionic/bionic_elf_tls.cpp
+++ b/libc/bionic/bionic_elf_tls.cpp
@@ -34,9 +34,22 @@
 #include <unistd.h>
 
 #include "private/ScopedRWLock.h"
+#include "private/ScopedSignalBlocker.h"
 #include "private/bionic_globals.h"
 #include "private/bionic_macros.h"
 #include "private/bionic_tls.h"
+#include "pthread_internal.h"
+
+// Every call to __tls_get_addr needs to check the generation counter, so
+// accesses to the counter need to be as fast as possible. Keep a copy of it in
+// a hidden variable, which can be accessed without using the GOT. The linker
+// will update this variable when it updates its counter.
+//
+// To allow the linker to update this variable, libc.so's constructor passes its
+// address to the linker. To accommodate a possible __tls_get_addr call before
+// libc.so's constructor, this local copy is initialized to SIZE_MAX, forcing
+// __tls_get_addr to initially use the slow path.
+__LIBC_HIDDEN__ _Atomic(size_t) __libc_tls_generation_copy = SIZE_MAX;
 
 // Search for a TLS segment in the given phdr table. Returns true if it has a
 // TLS segment and false otherwise.
@@ -168,6 +181,7 @@
   // moving the initial part. If this locking is too slow, we can duplicate the
   // static part of the table.
   TlsModules& modules = __libc_shared_globals()->tls_modules;
+  ScopedSignalBlocker ssb;
   ScopedReadLock locker(&modules.rwlock);
 
   for (size_t i = 0; i < modules.module_count; ++i) {
@@ -187,3 +201,166 @@
            module.segment.init_size);
   }
 }
+
+static inline size_t dtv_size_in_bytes(size_t module_count) {
+  return sizeof(TlsDtv) + module_count * sizeof(void*);
+}
+
+// Calculates the number of module slots to allocate in a new DTV. For small
+// objects (up to 1KiB), the TLS allocator allocates memory in power-of-2 sizes,
+// so for better space usage, ensure that the DTV size (header + slots) is a
+// power of 2.
+//
+// The lock on TlsModules must be held.
+static size_t calculate_new_dtv_count() {
+  size_t loaded_cnt = __libc_shared_globals()->tls_modules.module_count;
+  size_t bytes = dtv_size_in_bytes(MAX(1, loaded_cnt));
+  if (!powerof2(bytes)) {
+    bytes = BIONIC_ROUND_UP_POWER_OF_2(bytes);
+  }
+  return (bytes - sizeof(TlsDtv)) / sizeof(void*);
+}
+
+// This function must be called with signals blocked and a write lock on
+// TlsModules held.
+static void update_tls_dtv(bionic_tcb* tcb) {
+  const TlsModules& modules = __libc_shared_globals()->tls_modules;
+  BionicAllocator& allocator = __libc_shared_globals()->tls_allocator;
+
+  // Use the generation counter from the shared globals instead of the local
+  // copy, which won't be initialized yet if __tls_get_addr is called before
+  // libc.so's constructor.
+  if (__get_tcb_dtv(tcb)->generation == atomic_load(&modules.generation)) {
+    return;
+  }
+
+  const size_t old_cnt = __get_tcb_dtv(tcb)->count;
+
+  // If the DTV isn't large enough, allocate a larger one. Because a signal
+  // handler could interrupt the fast path of __tls_get_addr, we don't free the
+  // old DTV. Instead, we add the old DTV to a list, then free all of a thread's
+  // DTVs at thread-exit. Each time the DTV is reallocated, its size at least
+  // doubles.
+  if (modules.module_count > old_cnt) {
+    size_t new_cnt = calculate_new_dtv_count();
+    TlsDtv* const old_dtv = __get_tcb_dtv(tcb);
+    TlsDtv* const new_dtv = static_cast<TlsDtv*>(allocator.alloc(dtv_size_in_bytes(new_cnt)));
+    memcpy(new_dtv, old_dtv, dtv_size_in_bytes(old_cnt));
+    new_dtv->count = new_cnt;
+    new_dtv->next = old_dtv;
+    __set_tcb_dtv(tcb, new_dtv);
+  }
+
+  TlsDtv* const dtv = __get_tcb_dtv(tcb);
+
+  const StaticTlsLayout& layout = __libc_shared_globals()->static_tls_layout;
+  char* static_tls = reinterpret_cast<char*>(tcb) - layout.offset_bionic_tcb();
+
+  // Initialize static TLS modules and free unloaded modules.
+  for (size_t i = 0; i < dtv->count; ++i) {
+    if (i < modules.module_count) {
+      const TlsModule& mod = modules.module_table[i];
+      if (mod.static_offset != SIZE_MAX) {
+        dtv->modules[i] = static_tls + mod.static_offset;
+        continue;
+      }
+      if (mod.first_generation != kTlsGenerationNone &&
+          mod.first_generation <= dtv->generation) {
+        continue;
+      }
+    }
+    allocator.free(dtv->modules[i]);
+    dtv->modules[i] = nullptr;
+  }
+
+  dtv->generation = atomic_load(&modules.generation);
+}
+
+__attribute__((noinline)) static void* tls_get_addr_slow_path(const TlsIndex* ti) {
+  TlsModules& modules = __libc_shared_globals()->tls_modules;
+  bionic_tcb* tcb = __get_bionic_tcb();
+
+  // Block signals and lock TlsModules. We may need the allocator, so take
+  // a write lock.
+  ScopedSignalBlocker ssb;
+  ScopedWriteLock locker(&modules.rwlock);
+
+  update_tls_dtv(tcb);
+
+  TlsDtv* dtv = __get_tcb_dtv(tcb);
+  const size_t module_idx = __tls_module_id_to_idx(ti->module_id);
+  void* mod_ptr = dtv->modules[module_idx];
+  if (mod_ptr == nullptr) {
+    const TlsSegment& segment = modules.module_table[module_idx].segment;
+    mod_ptr = __libc_shared_globals()->tls_allocator.memalign(segment.alignment, segment.size);
+    if (segment.init_size > 0) {
+      memcpy(mod_ptr, segment.init_ptr, segment.init_size);
+    }
+    dtv->modules[module_idx] = mod_ptr;
+  }
+
+  return static_cast<char*>(mod_ptr) + ti->offset;
+}
+
+// Returns the address of a thread's TLS memory given a module ID and an offset
+// into that module's TLS segment. This function is called on every access to a
+// dynamic TLS variable on targets that don't use TLSDESC. arm64 uses TLSDESC,
+// so it only calls this function on a thread's first access to a module's TLS
+// segment.
+//
+// On most targets, this accessor function is __tls_get_addr and
+// TLS_GET_ADDR_CCONV is unset. 32-bit x86 uses ___tls_get_addr instead and a
+// regparm() calling convention.
+extern "C" void* TLS_GET_ADDR(const TlsIndex* ti) TLS_GET_ADDR_CCONV {
+  TlsDtv* dtv = __get_tcb_dtv(__get_bionic_tcb());
+
+  // TODO: See if we can use a relaxed memory ordering here instead.
+  size_t generation = atomic_load(&__libc_tls_generation_copy);
+  if (__predict_true(generation == dtv->generation)) {
+    void* mod_ptr = dtv->modules[__tls_module_id_to_idx(ti->module_id)];
+    if (__predict_true(mod_ptr != nullptr)) {
+      return static_cast<char*>(mod_ptr) + ti->offset;
+    }
+  }
+
+  return tls_get_addr_slow_path(ti);
+}
+
+// This function frees:
+//  - TLS modules referenced by the current DTV.
+//  - The list of DTV objects associated with the current thread.
+//
+// The caller must have already blocked signals.
+void __free_dynamic_tls(bionic_tcb* tcb) {
+  TlsModules& modules = __libc_shared_globals()->tls_modules;
+  BionicAllocator& allocator = __libc_shared_globals()->tls_allocator;
+
+  // If we didn't allocate any dynamic memory, skip out early without taking
+  // the lock.
+  TlsDtv* dtv = __get_tcb_dtv(tcb);
+  if (dtv->generation == kTlsGenerationNone) {
+    return;
+  }
+
+  // We need the write lock to use the allocator.
+  ScopedWriteLock locker(&modules.rwlock);
+
+  // First free everything in the current DTV.
+  for (size_t i = 0; i < dtv->count; ++i) {
+    if (i < modules.module_count && modules.module_table[i].static_offset != SIZE_MAX) {
+      // This module's TLS memory is allocated statically, so don't free it here.
+      continue;
+    }
+    allocator.free(dtv->modules[i]);
+  }
+
+  // Now free the thread's list of DTVs.
+  while (dtv->generation != kTlsGenerationNone) {
+    TlsDtv* next = dtv->next;
+    allocator.free(dtv);
+    dtv = next;
+  }
+
+  // Clear the DTV slot. The DTV must not be used again with this thread.
+  tcb->tls_slot(TLS_SLOT_DTV) = nullptr;
+}
diff --git a/libc/bionic/bionic_systrace.cpp b/libc/bionic/bionic_systrace.cpp
index bac3d88..6182ed8 100644
--- a/libc/bionic/bionic_systrace.cpp
+++ b/libc/bionic/bionic_systrace.cpp
@@ -82,7 +82,7 @@
     return;
   }
 
-  TEMP_FAILURE_RETRY(write(trace_marker_fd, "E", 1));
+  TEMP_FAILURE_RETRY(write(trace_marker_fd, "E|", 2));
 }
 
 ScopedTrace::ScopedTrace(const char* message) : called_end_(false) {
diff --git a/libc/bionic/libc_init_dynamic.cpp b/libc/bionic/libc_init_dynamic.cpp
index af1b847..7140776 100644
--- a/libc/bionic/libc_init_dynamic.cpp
+++ b/libc/bionic/libc_init_dynamic.cpp
@@ -51,6 +51,7 @@
 #include <elf.h>
 #include "libc_init_common.h"
 
+#include "private/bionic_elf_tls.h"
 #include "private/bionic_globals.h"
 #include "private/bionic_macros.h"
 #include "private/bionic_ssp.h"
@@ -82,6 +83,12 @@
   __libc_init_sysinfo();
 #endif
 
+  // Register libc.so's copy of the TLS generation variable so the linker can
+  // update it when it loads or unloads a shared object.
+  TlsModules& tls_modules = __libc_shared_globals()->tls_modules;
+  tls_modules.generation_libc_so = &__libc_tls_generation_copy;
+  __libc_tls_generation_copy = tls_modules.generation;
+
   __libc_init_globals();
   __libc_init_common();
 
diff --git a/libc/bionic/libc_init_static.cpp b/libc/bionic/libc_init_static.cpp
index 8fbc20e..514423d 100644
--- a/libc/bionic/libc_init_static.cpp
+++ b/libc/bionic/libc_init_static.cpp
@@ -92,19 +92,22 @@
   size_t phdr_ct = getauxval(AT_PHNUM);
 
   static TlsModule mod;
+  TlsModules& modules = __libc_shared_globals()->tls_modules;
   if (__bionic_get_tls_segment(phdr_start, phdr_ct, 0, &mod.segment)) {
     if (!__bionic_check_tls_alignment(&mod.segment.alignment)) {
       async_safe_fatal("error: TLS segment alignment in \"%s\" is not a power of 2: %zu\n",
                        progname, mod.segment.alignment);
     }
     mod.static_offset = layout.reserve_exe_segment_and_tcb(&mod.segment, progname);
-    mod.first_generation = 1;
-    __libc_shared_globals()->tls_modules.generation = 1;
-    __libc_shared_globals()->tls_modules.module_count = 1;
-    __libc_shared_globals()->tls_modules.module_table = &mod;
+    mod.first_generation = kTlsGenerationFirst;
+
+    modules.module_count = 1;
+    modules.module_table = &mod;
   } else {
     layout.reserve_exe_segment_and_tcb(nullptr, progname);
   }
+  // Enable the fast path in __tls_get_addr.
+  __libc_tls_generation_copy = modules.generation;
 
   layout.finish_layout();
 }
diff --git a/libc/bionic/ndk_cruft.cpp b/libc/bionic/ndk_cruft.cpp
index dbacf18..2c3299f 100644
--- a/libc/bionic/ndk_cruft.cpp
+++ b/libc/bionic/ndk_cruft.cpp
@@ -355,9 +355,14 @@
   return malloc(size);
 }
 
+} // extern "C"
+
 #define __get_thread __real_get_thread
 #include "pthread_internal.h"
 #undef __get_thread
+
+extern "C" {
+
 // Various third-party apps contain a backport of our pthread_rwlock implementation that uses this.
 pthread_internal_t* __get_thread() {
   return __real_get_thread();
diff --git a/libc/bionic/pthread_create.cpp b/libc/bionic/pthread_create.cpp
index 31e0378..b8784b8 100644
--- a/libc/bionic/pthread_create.cpp
+++ b/libc/bionic/pthread_create.cpp
@@ -44,6 +44,7 @@
 #include "private/bionic_globals.h"
 #include "private/bionic_macros.h"
 #include "private/bionic_ssp.h"
+#include "private/bionic_systrace.h"
 #include "private/bionic_tls.h"
 #include "private/ErrnoRestorer.h"
 
@@ -70,6 +71,14 @@
   tcb->tls_slot(TLS_SLOT_STACK_GUARD) = reinterpret_cast<void*>(__stack_chk_guard);
 }
 
+__attribute__((no_stack_protector))
+void __init_tcb_dtv(bionic_tcb* tcb) {
+  // Initialize the DTV slot to a statically-allocated empty DTV. The first
+  // access to a dynamic TLS variable allocates a new DTV.
+  static const TlsDtv zero_dtv = {};
+  __set_tcb_dtv(tcb, const_cast<TlsDtv*>(&zero_dtv));
+}
+
 void __init_bionic_tls_ptrs(bionic_tcb* tcb, bionic_tls* tls) {
   tcb->thread()->bionic_tls = tls;
   tcb->tls_slot(TLS_SLOT_BIONIC_TLS) = tls;
@@ -291,6 +300,7 @@
   // Initialize TLS memory.
   __init_static_tls(mapping.static_tls);
   __init_tcb(tcb, thread);
+  __init_tcb_dtv(tcb);
   __init_tcb_stack_guard(tcb);
   __init_bionic_tls_ptrs(tcb, tls);
 
@@ -338,6 +348,7 @@
   ErrnoRestorer errno_restorer;
 
   pthread_attr_t thread_attr;
+  ScopedTrace trace("pthread_create");
   if (attr == nullptr) {
     pthread_attr_init(&thread_attr);
   } else {
diff --git a/libc/bionic/pthread_exit.cpp b/libc/bionic/pthread_exit.cpp
index 84ea2e6..3b873b3 100644
--- a/libc/bionic/pthread_exit.cpp
+++ b/libc/bionic/pthread_exit.cpp
@@ -98,15 +98,22 @@
     thread->alternate_signal_stack = nullptr;
   }
 
+  ThreadJoinState old_state = THREAD_NOT_JOINED;
+  while (old_state == THREAD_NOT_JOINED &&
+         !atomic_compare_exchange_weak(&thread->join_state, &old_state, THREAD_EXITED_NOT_JOINED)) {
+  }
+
+  // We don't want to take a signal after unmapping the stack, the shadow call
+  // stack, or dynamic TLS memory.
+  ScopedSignalBlocker ssb;
+
 #ifdef __aarch64__
   // Free the shadow call stack and guard pages.
   munmap(thread->shadow_call_stack_guard_region, SCS_GUARD_REGION_SIZE);
 #endif
 
-  ThreadJoinState old_state = THREAD_NOT_JOINED;
-  while (old_state == THREAD_NOT_JOINED &&
-         !atomic_compare_exchange_weak(&thread->join_state, &old_state, THREAD_EXITED_NOT_JOINED)) {
-  }
+  // Free the ELF TLS DTV and all dynamically-allocated ELF TLS memory.
+  __free_dynamic_tls(__get_bionic_tcb());
 
   if (old_state == THREAD_DETACHED) {
     // The thread is detached, no one will use pthread_internal_t after pthread_exit.
@@ -121,10 +128,6 @@
     if (thread->mmap_size != 0) {
       // We need to free mapped space for detached threads when they exit.
       // That's not something we can do in C.
-
-      // We don't want to take a signal after we've unmapped the stack.
-      // That's one last thing we can do before dropping to assembler.
-      ScopedSignalBlocker ssb;
       __hwasan_thread_exit();
       _exit_with_stack_teardown(thread->mmap_base, thread->mmap_size);
     }
diff --git a/libc/bionic/pthread_internal.h b/libc/bionic/pthread_internal.h
index 27ab3df..cbcdadf 100644
--- a/libc/bionic/pthread_internal.h
+++ b/libc/bionic/pthread_internal.h
@@ -38,6 +38,7 @@
 #define __hwasan_thread_exit()
 #endif
 
+#include "private/bionic_elf_tls.h"
 #include "private/bionic_lock.h"
 #include "private/bionic_tls.h"
 
@@ -154,6 +155,7 @@
 
 __LIBC_HIDDEN__ void __init_tcb(bionic_tcb* tcb, pthread_internal_t* thread);
 __LIBC_HIDDEN__ void __init_tcb_stack_guard(bionic_tcb* tcb);
+__LIBC_HIDDEN__ void __init_tcb_dtv(bionic_tcb* tcb);
 __LIBC_HIDDEN__ void __init_bionic_tls_ptrs(bionic_tcb* tcb, bionic_tls* tls);
 __LIBC_HIDDEN__ bionic_tls* __allocate_temp_bionic_tls();
 __LIBC_HIDDEN__ void __free_temp_bionic_tls(bionic_tls* tls);
@@ -179,6 +181,15 @@
   return *static_cast<bionic_tls*>(__get_tls()[TLS_SLOT_BIONIC_TLS]);
 }
 
+static inline __always_inline TlsDtv* __get_tcb_dtv(bionic_tcb* tcb) {
+  uintptr_t dtv_slot = reinterpret_cast<uintptr_t>(tcb->tls_slot(TLS_SLOT_DTV));
+  return reinterpret_cast<TlsDtv*>(dtv_slot - offsetof(TlsDtv, generation));
+}
+
+static inline void __set_tcb_dtv(bionic_tcb* tcb, TlsDtv* val) {
+  tcb->tls_slot(TLS_SLOT_DTV) = &val->generation;
+}
+
 extern "C" __LIBC_HIDDEN__ int __set_tls(void* ptr);
 
 __LIBC_HIDDEN__ void pthread_key_clean_all(void);
diff --git a/libc/bionic/pthread_join.cpp b/libc/bionic/pthread_join.cpp
index 9aad458..8e4ca59 100644
--- a/libc/bionic/pthread_join.cpp
+++ b/libc/bionic/pthread_join.cpp
@@ -30,10 +30,12 @@
 
 #include "private/bionic_defs.h"
 #include "private/bionic_futex.h"
+#include "private/bionic_systrace.h"
 #include "pthread_internal.h"
 
 __BIONIC_WEAK_FOR_NATIVE_BRIDGE
 int pthread_join(pthread_t t, void** return_value) {
+  ScopedTrace trace("pthread_join");
   if (t == pthread_self()) {
     return EDEADLK;
   }
diff --git a/libc/libc.map.txt b/libc/libc.map.txt
index 8d67b9e..6a6ea7d 100644
--- a/libc/libc.map.txt
+++ b/libc/libc.map.txt
@@ -1446,8 +1446,10 @@
 
 LIBC_Q { # introduced=Q
   global:
+    ___tls_get_addr; # x86
     __aeabi_read_tp; # arm
     __res_randomid;
+    __tls_get_addr; # arm x86_64
     android_fdsan_close_with_tag;
     android_fdsan_create_owner_tag;
     android_fdsan_exchange_owner_tag;
diff --git a/linker/linker_allocator.h b/libc/private/bionic_allocator.h
similarity index 80%
rename from linker/linker_allocator.h
rename to libc/private/bionic_allocator.h
index 44a8b0d..c705ce4 100644
--- a/linker/linker_allocator.h
+++ b/libc/private/bionic_allocator.h
@@ -36,13 +36,11 @@
 #include <stddef.h>
 #include <unistd.h>
 
-#include <async_safe/log.h>
-
 const uint32_t kSmallObjectMaxSizeLog2 = 10;
 const uint32_t kSmallObjectMinSizeLog2 = 4;
 const uint32_t kSmallObjectAllocatorsCount = kSmallObjectMaxSizeLog2 - kSmallObjectMinSizeLog2 + 1;
 
-class LinkerSmallObjectAllocator;
+class BionicSmallObjectAllocator;
 
 // This structure is placed at the beginning of each addressable page
 // and has all information we need to find the corresponding memory allocator.
@@ -53,7 +51,7 @@
     // we use allocated_size for large objects allocator
     size_t allocated_size;
     // and allocator_addr for small ones.
-    LinkerSmallObjectAllocator* allocator_addr;
+    BionicSmallObjectAllocator* allocator_addr;
   };
 };
 
@@ -63,14 +61,14 @@
 };
 
 // This structure is placed at the beginning of each page managed by
-// LinkerSmallObjectAllocator.  Note that a page_info struct is expected at the
+// BionicSmallObjectAllocator.  Note that a page_info struct is expected at the
 // beginning of each page as well, and therefore this structure contains a
 // page_info as its *first* field.
 struct small_object_page_info {
   page_info info;  // Must be the first field.
 
   // Doubly linked list for traversing all pages allocated by a
-  // LinkerSmallObjectAllocator.
+  // BionicSmallObjectAllocator.
   small_object_page_info* next_page;
   small_object_page_info* prev_page;
 
@@ -81,9 +79,9 @@
   size_t free_blocks_cnt;
 };
 
-class LinkerSmallObjectAllocator {
+class BionicSmallObjectAllocator {
  public:
-  LinkerSmallObjectAllocator(uint32_t type, size_t block_size);
+  BionicSmallObjectAllocator(uint32_t type, size_t block_size);
   void* alloc();
   void free(void* ptr);
 
@@ -103,20 +101,23 @@
   small_object_page_info* page_list_;
 };
 
-class LinkerMemoryAllocator {
+class BionicAllocator {
  public:
-  constexpr LinkerMemoryAllocator() : allocators_(nullptr), allocators_buf_() {}
+  constexpr BionicAllocator() : allocators_(nullptr), allocators_buf_() {}
   void* alloc(size_t size);
+  void* memalign(size_t align, size_t size);
 
   // Note that this implementation of realloc never shrinks allocation
   void* realloc(void* ptr, size_t size);
   void free(void* ptr);
  private:
-  void* alloc_mmap(size_t size);
-  page_info* get_page_info(void* ptr);
-  LinkerSmallObjectAllocator* get_small_object_allocator(uint32_t type);
+  void* alloc_mmap(size_t align, size_t size);
+  inline void* alloc_impl(size_t align, size_t size);
+  inline page_info* get_page_info_unchecked(void* ptr);
+  inline page_info* get_page_info(void* ptr);
+  BionicSmallObjectAllocator* get_small_object_allocator(uint32_t type);
   void initialize_allocators();
 
-  LinkerSmallObjectAllocator* allocators_;
-  uint8_t allocators_buf_[sizeof(LinkerSmallObjectAllocator)*kSmallObjectAllocatorsCount];
+  BionicSmallObjectAllocator* allocators_;
+  uint8_t allocators_buf_[sizeof(BionicSmallObjectAllocator)*kSmallObjectAllocatorsCount];
 };
diff --git a/libc/private/bionic_elf_tls.h b/libc/private/bionic_elf_tls.h
index 09e1958..fa1af76 100644
--- a/libc/private/bionic_elf_tls.h
+++ b/libc/private/bionic_elf_tls.h
@@ -34,6 +34,8 @@
 #include <stdint.h>
 #include <sys/cdefs.h>
 
+__LIBC_HIDDEN__ extern _Atomic(size_t) __libc_tls_generation_copy;
+
 struct TlsSegment {
   size_t size = 0;
   size_t alignment = 1;
@@ -84,6 +86,16 @@
   size_t round_up_with_overflow_check(size_t value, size_t alignment);
 };
 
+static constexpr size_t kTlsGenerationNone = 0;
+static constexpr size_t kTlsGenerationFirst = 1;
+
+// The first ELF TLS module has ID 1. Zero is reserved for the first word of
+// the DTV, a generation count. Unresolved weak symbols also use module ID 0.
+static constexpr size_t kTlsUninitializedModuleId = 0;
+
+static inline size_t __tls_module_id_to_idx(size_t id) { return id - 1; }
+static inline size_t __tls_module_idx_to_id(size_t idx) { return idx + 1; }
+
 // A descriptor for a single ELF TLS module.
 struct TlsModule {
   TlsSegment segment;
@@ -93,7 +105,7 @@
 
   // The generation in which this module was loaded. Dynamic TLS lookups use
   // this field to detect when a module has been unloaded.
-  size_t first_generation = 0;
+  size_t first_generation = kTlsGenerationNone;
 
   // Used by the dynamic linker to track the associated soinfo* object.
   void* soinfo_ptr = nullptr;
@@ -105,9 +117,10 @@
 struct TlsModules {
   constexpr TlsModules() {}
 
-  // A generation counter. The value is incremented each time an solib is loaded
-  // or unloaded.
-  _Atomic(size_t) generation = 0;
+  // A pointer to the TLS generation counter in libc.so. The counter is
+  // incremented each time an solib is loaded or unloaded.
+  _Atomic(size_t) generation = kTlsGenerationFirst;
+  _Atomic(size_t) *generation_libc_so = nullptr;
 
   // Access to the TlsModule[] table requires taking this lock.
   pthread_rwlock_t rwlock = PTHREAD_RWLOCK_INITIALIZER;
@@ -119,3 +132,46 @@
 };
 
 void __init_static_tls(void* static_tls);
+
+// Dynamic Thread Vector. Each thread has a different DTV. For each module
+// (executable or solib), the DTV has a pointer to that module's TLS memory. The
+// DTV is initially empty and is allocated on-demand. It grows as more modules
+// are dlopen'ed. See https://www.akkadia.org/drepper/tls.pdf.
+//
+// The layout of the DTV is specified in various documents, but it is not part
+// of Bionic's public ABI. A compiler can't generate code to access it directly,
+// because it can't access libc's global generation counter.
+struct TlsDtv {
+  // Number of elements in this object's modules field.
+  size_t count;
+
+  // A pointer to an older TlsDtv object that should be freed when the thread
+  // exits. The objects aren't immediately freed because a DTV could be
+  // reallocated by a signal handler that interrupted __tls_get_addr's fast
+  // path.
+  TlsDtv* next;
+
+  // The DTV slot points at this field, which allows omitting an add instruction
+  // on the fast path for a TLS lookup. The arm64 tlsdesc_resolver.S depends on
+  // the layout of fields past this point.
+  size_t generation;
+  void* modules[];
+};
+
+struct TlsIndex {
+  size_t module_id;
+  size_t offset;
+};
+
+#if defined(__i386__)
+#define TLS_GET_ADDR_CCONV __attribute__((regparm(1)))
+#define TLS_GET_ADDR ___tls_get_addr
+#else
+#define TLS_GET_ADDR_CCONV
+#define TLS_GET_ADDR __tls_get_addr
+#endif
+
+extern "C" void* TLS_GET_ADDR(const TlsIndex* ti) TLS_GET_ADDR_CCONV;
+
+struct bionic_tcb;
+void __free_dynamic_tls(bionic_tcb* tcb);
diff --git a/libc/private/bionic_globals.h b/libc/private/bionic_globals.h
index 4d40476..21a2a24 100644
--- a/libc/private/bionic_globals.h
+++ b/libc/private/bionic_globals.h
@@ -33,6 +33,7 @@
 #include <link.h>
 #include <pthread.h>
 
+#include "private/bionic_allocator.h"
 #include "private/bionic_elf_tls.h"
 #include "private/bionic_fdsan.h"
 #include "private/bionic_malloc_dispatch.h"
@@ -70,6 +71,7 @@
 
   StaticTlsLayout static_tls_layout;
   TlsModules tls_modules;
+  BionicAllocator tls_allocator;
 
   // Values passed from the linker to libc.so.
   const char* init_progname = nullptr;
diff --git a/linker/Android.bp b/linker/Android.bp
index 4991935..033860a 100644
--- a/linker/Android.bp
+++ b/linker/Android.bp
@@ -4,7 +4,6 @@
     recovery_available: true,
 
     srcs: [
-        "linker_allocator.cpp",
         "linker_memory.cpp",
     ],
     cflags: [
@@ -104,6 +103,7 @@
     name: "linker_sources_arm64",
     srcs: [
         "arch/arm64/begin.S",
+        "arch/arm64/tlsdesc_resolver.S",
     ],
 }
 
diff --git a/linker/arch/arm64/tlsdesc_resolver.S b/linker/arch/arm64/tlsdesc_resolver.S
new file mode 100644
index 0000000..ef46839
--- /dev/null
+++ b/linker/arch/arm64/tlsdesc_resolver.S
@@ -0,0 +1,203 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <private/bionic_asm.h>
+#include <private/bionic_asm_tls.h>
+
+.globl __tls_get_addr
+
+// These resolver functions must preserve every register except x0. They set x0
+// to the offset of the TLS symbol relative to the thread pointer.
+
+ENTRY_PRIVATE(tlsdesc_resolver_static)
+  ldr x0, [x0, #8]
+  ret
+END(tlsdesc_resolver_static)
+
+ENTRY_PRIVATE(tlsdesc_resolver_dynamic)
+  stp x19, x20, [sp, #-32]!
+  .cfi_def_cfa_offset 32
+  .cfi_rel_offset x19, 0
+  .cfi_rel_offset x20, 8
+  stp x21, x22, [sp, #16]
+  .cfi_rel_offset x21, 16
+  .cfi_rel_offset x22, 24
+
+  mrs x19, tpidr_el0            // __get_tls()
+  ldr x20, [x19, #(TLS_SLOT_DTV * 8)]
+  ldr x21, [x20]                // TlsDtv::generation
+
+  ldr x0, [x0, #8]              // TlsDynamicResolverArg*
+  ldr x22, [x0]                 // TlsDynamicResolverArg::generation
+
+  cmp x21, x22
+  b.lo .fallback
+
+  ldr x21, [x0, #8]             // TlsIndex::module
+  ldr x22, [x0, #16]            // TlsIndex::offset
+  ldr x21, [x20, x21, lsl #3]   // TlsDtv::modules[module]
+  cbz x21, .fallback
+  add x0, x21, x22
+  sub x0, x0, x19
+
+  ldp x21, x22, [sp, #16]
+  .cfi_remember_state
+  .cfi_restore x21
+  .cfi_restore x22
+  ldp x19, x20, [sp], #32
+  .cfi_adjust_cfa_offset -32
+  .cfi_restore x19
+  .cfi_restore x20
+  ret
+
+.fallback:
+  .cfi_restore_state
+  ldp x21, x22, [sp, #16]
+  .cfi_restore x21
+  .cfi_restore x22
+  ldp x19, x20, [sp], #32
+  .cfi_adjust_cfa_offset -32
+  .cfi_restore x19
+  .cfi_restore x20
+  b tlsdesc_resolver_dynamic_slow_path
+END(tlsdesc_resolver_dynamic)
+
+#define SAVE_REG(x, slot)                 \
+    str x, [sp, #((slot) * 8)];           \
+    .cfi_rel_offset x, (slot) * 8;        \
+
+#define SAVE_GPR_PAIR(x, y, slot)         \
+    stp x, y, [sp, #((slot) * 8)];        \
+    .cfi_rel_offset x, (slot) * 8;        \
+    .cfi_rel_offset y, ((slot) + 1) * 8;  \
+
+#define SAVE_VEC_PAIR(x, y, slot)         \
+    stp x, y, [sp, #((slot) * 8)];        \
+    .cfi_rel_offset x, (slot) * 8;        \
+    .cfi_rel_offset y, ((slot) + 2) * 8;  \
+
+#define RESTORE_REG(x, slot)              \
+    ldr x, [sp, #((slot) * 8)];           \
+    .cfi_restore x;                       \
+
+#define RESTORE_REG_PAIR(x, y, slot)      \
+    ldp x, y, [sp, #((slot) * 8)];        \
+    .cfi_restore x;                       \
+    .cfi_restore y;                       \
+
+// On entry, x0 is the address of a TlsDynamicResolverArg object rather than
+// the TlsDescriptor address passed to the original resolver function.
+ENTRY_PRIVATE(tlsdesc_resolver_dynamic_slow_path)
+  sub sp, sp, #(8 * 84)
+  .cfi_def_cfa_offset (8 * 84)
+  SAVE_GPR_PAIR(x29, x30, 0)
+  mov x29, sp
+
+  // Avoid leaking the contents of the shadow call stack register (x18) into
+  // memory. x19 through x29 are callee-save registers, so we do not need to
+  // save them.
+  SAVE_GPR_PAIR(x1,  x2,  2)
+  SAVE_GPR_PAIR(x3,  x4,  4)
+  SAVE_GPR_PAIR(x5,  x6,  6)
+  SAVE_GPR_PAIR(x7,  x8,  8)
+  SAVE_GPR_PAIR(x9,  x10, 10)
+  SAVE_GPR_PAIR(x11, x12, 12)
+  SAVE_GPR_PAIR(x13, x14, 14)
+  SAVE_GPR_PAIR(x15, x16, 16)
+  SAVE_REG(x17, 18)
+
+  SAVE_VEC_PAIR(q0,  q1,  20)
+  SAVE_VEC_PAIR(q2,  q3,  24)
+  SAVE_VEC_PAIR(q4,  q5,  28)
+  SAVE_VEC_PAIR(q6,  q7,  32)
+  SAVE_VEC_PAIR(q8,  q9,  36)
+  SAVE_VEC_PAIR(q10, q11, 40)
+  SAVE_VEC_PAIR(q12, q13, 44)
+  SAVE_VEC_PAIR(q14, q15, 48)
+  SAVE_VEC_PAIR(q16, q17, 52)
+  SAVE_VEC_PAIR(q18, q19, 56)
+  SAVE_VEC_PAIR(q20, q21, 60)
+  SAVE_VEC_PAIR(q22, q23, 64)
+  SAVE_VEC_PAIR(q24, q25, 68)
+  SAVE_VEC_PAIR(q26, q27, 72)
+  SAVE_VEC_PAIR(q28, q29, 76)
+  SAVE_VEC_PAIR(q30, q31, 80)
+
+  add x0, x0, #8
+  bl __tls_get_addr
+  mrs x1, tpidr_el0 // __get_tls()
+  sub x0, x0, x1
+
+  RESTORE_REG_PAIR(q30, q31, 80)
+  RESTORE_REG_PAIR(q28, q29, 76)
+  RESTORE_REG_PAIR(q26, q27, 72)
+  RESTORE_REG_PAIR(q24, q25, 68)
+  RESTORE_REG_PAIR(q22, q23, 64)
+  RESTORE_REG_PAIR(q20, q21, 60)
+  RESTORE_REG_PAIR(q18, q19, 56)
+  RESTORE_REG_PAIR(q16, q17, 52)
+  RESTORE_REG_PAIR(q14, q15, 48)
+  RESTORE_REG_PAIR(q12, q13, 44)
+  RESTORE_REG_PAIR(q10, q11, 40)
+  RESTORE_REG_PAIR(q8,  q9,  36)
+  RESTORE_REG_PAIR(q6,  q7,  32)
+  RESTORE_REG_PAIR(q4,  q5,  28)
+  RESTORE_REG_PAIR(q2,  q3,  24)
+  RESTORE_REG_PAIR(q0,  q1,  20)
+
+  RESTORE_REG(x17, 18)
+  RESTORE_REG_PAIR(x15, x16, 16)
+  RESTORE_REG_PAIR(x13, x14, 14)
+  RESTORE_REG_PAIR(x11, x12, 12)
+  RESTORE_REG_PAIR(x9,  x10, 10)
+  RESTORE_REG_PAIR(x7,  x8,  8)
+  RESTORE_REG_PAIR(x5,  x6,  6)
+  RESTORE_REG_PAIR(x3,  x4,  4)
+  RESTORE_REG_PAIR(x1,  x2,  2)
+
+  RESTORE_REG_PAIR(x29, x30, 0)
+  add sp, sp, #(8 * 84)
+  .cfi_def_cfa_offset 0
+  ret
+END(tlsdesc_resolver_dynamic_slow_path)
+
+// The address of an unresolved weak TLS symbol evaluates to NULL with TLSDESC.
+// The value returned by this function is added to the thread pointer, so return
+// a negated thread pointer to cancel it out.
+ENTRY_PRIVATE(tlsdesc_resolver_unresolved_weak)
+  str x19, [sp, #-16]!
+  .cfi_def_cfa_offset 16
+  .cfi_rel_offset x19, 0
+  ldr x19, [x0, #8]
+  mrs x0, tpidr_el0             // __get_tls()
+  sub x0, x19, x0
+  ldr x19, [sp], #16
+  .cfi_def_cfa_offset 0
+  .cfi_restore x19
+  ret
+END(tlsdesc_resolver_unresolved_weak)
diff --git a/linker/linker.cpp b/linker/linker.cpp
index 412b8eb..d0c740b 100644
--- a/linker/linker.cpp
+++ b/linker/linker.cpp
@@ -2698,6 +2698,7 @@
 bool soinfo::relocate(const VersionTracker& version_tracker, ElfRelIteratorT&& rel_iterator,
                       const soinfo_list_t& global_group, const soinfo_list_t& local_group) {
   const size_t tls_tp_base = __libc_shared_globals()->static_tls_layout.offset_thread_pointer();
+  std::vector<std::pair<TlsDescriptor*, size_t>> deferred_tlsdesc_relocs;
 
   for (size_t idx = 0; rel_iterator.has_next(); ++idx) {
     const auto rel = rel_iterator.next();
@@ -2722,7 +2723,11 @@
     soinfo* lsi = nullptr;
 
     if (sym == 0) {
-      // Do nothing.
+      // By convention in ld.bfd and lld, an omitted symbol on a TLS relocation
+      // is a reference to the current module.
+      if (is_tls_reloc(type)) {
+        lsi = this;
+      }
     } else if (ELF_ST_BIND(symtab_[sym].st_info) == STB_LOCAL && is_tls_reloc(type)) {
       // In certain situations, the Gold linker accesses a TLS symbol using a
       // relocation to an STB_LOCAL symbol in .dynsym of either STT_SECTION or
@@ -2830,6 +2835,11 @@
                    sym_name, get_realpath());
             return false;
           }
+          if (lsi->get_tls() == nullptr) {
+            DL_ERR("TLS relocation refers to symbol \"%s\" in solib \"%s\" with no TLS segment",
+                   sym_name, lsi->get_realpath());
+            return false;
+          }
           sym_addr = s->st_value;
         } else {
           if (ELF_ST_TYPE(s->st_info) == STT_TLS) {
@@ -2916,16 +2926,12 @@
         MARK(rel->r_offset);
         {
           ElfW(Addr) tpoff = 0;
-          if (sym == 0) {
-            // By convention in ld.bfd and lld, an omitted symbol
-            // (ELFW(R_SYM) == 0) refers to the local module.
-            lsi = this;
-          }
           if (lsi == nullptr) {
             // Unresolved weak relocation. Leave tpoff at 0 to resolve
             // &weak_tls_symbol to __get_tls().
-          } else if (soinfo_tls* lsi_tls = lsi->get_tls()) {
-            const TlsModule& mod = get_tls_module(lsi_tls->module_id);
+          } else {
+            CHECK(lsi->get_tls() != nullptr); // We rejected a missing TLS segment above.
+            const TlsModule& mod = get_tls_module(lsi->get_tls()->module_id);
             if (mod.static_offset != SIZE_MAX) {
               tpoff += mod.static_offset - tls_tp_base;
             } else {
@@ -2933,10 +2939,6 @@
                      sym_name, lsi->get_realpath(), get_realpath());
               return false;
             }
-          } else {
-            DL_ERR("TLS relocation refers to symbol \"%s\" in solib \"%s\" with no TLS segment",
-                   sym_name, lsi->get_realpath());
-            return false;
           }
           tpoff += sym_addr + addend;
           TRACE_TYPE(RELO, "RELO TLS_TPREL %16p <- %16p %s\n",
@@ -2946,6 +2948,78 @@
         }
         break;
 
+#if !defined(__aarch64__)
+      // Omit support for DTPMOD/DTPREL on arm64, at least until
+      // http://b/123385182 is fixed. arm64 uses TLSDESC instead.
+      case R_GENERIC_TLS_DTPMOD:
+        count_relocation(kRelocRelative);
+        MARK(rel->r_offset);
+        {
+          size_t module_id = 0;
+          if (lsi == nullptr) {
+            // Unresolved weak relocation. Evaluate the module ID to 0.
+          } else {
+            CHECK(lsi->get_tls() != nullptr); // We rejected a missing TLS segment above.
+            module_id = lsi->get_tls()->module_id;
+          }
+          TRACE_TYPE(RELO, "RELO TLS_DTPMOD %16p <- %zu %s\n",
+                     reinterpret_cast<void*>(reloc), module_id, sym_name);
+          *reinterpret_cast<ElfW(Addr)*>(reloc) = module_id;
+        }
+        break;
+      case R_GENERIC_TLS_DTPREL:
+        count_relocation(kRelocRelative);
+        MARK(rel->r_offset);
+        TRACE_TYPE(RELO, "RELO TLS_DTPREL %16p <- %16p %s\n",
+                   reinterpret_cast<void*>(reloc),
+                   reinterpret_cast<void*>(sym_addr + addend), sym_name);
+        *reinterpret_cast<ElfW(Addr)*>(reloc) = sym_addr + addend;
+        break;
+#endif  // !defined(__aarch64__)
+
+#if defined(__aarch64__)
+      // Bionic currently only implements TLSDESC for arm64. This implementation should work with
+      // other architectures, as long as the resolver functions are implemented.
+      case R_GENERIC_TLSDESC:
+        count_relocation(kRelocRelative);
+        MARK(rel->r_offset);
+        {
+          TlsDescriptor* desc = reinterpret_cast<TlsDescriptor*>(reloc);
+          if (lsi == nullptr) {
+            // Unresolved weak relocation.
+            desc->func = tlsdesc_resolver_unresolved_weak;
+            desc->arg = addend;
+            TRACE_TYPE(RELO, "RELO TLSDESC %16p <- unresolved weak 0x%zx %s\n",
+                       reinterpret_cast<void*>(reloc), static_cast<size_t>(addend), sym_name);
+          } else {
+            CHECK(lsi->get_tls() != nullptr); // We rejected a missing TLS segment above.
+            size_t module_id = lsi->get_tls()->module_id;
+            const TlsModule& mod = get_tls_module(module_id);
+            if (mod.static_offset != SIZE_MAX) {
+              desc->func = tlsdesc_resolver_static;
+              desc->arg = mod.static_offset - tls_tp_base + sym_addr + addend;
+              TRACE_TYPE(RELO, "RELO TLSDESC %16p <- static (0x%zx - 0x%zx + 0x%zx + 0x%zx) %s\n",
+                         reinterpret_cast<void*>(reloc), mod.static_offset, tls_tp_base,
+                         static_cast<size_t>(sym_addr), static_cast<size_t>(addend), sym_name);
+            } else {
+              tlsdesc_args_.push_back({
+                .generation = mod.first_generation,
+                .index.module_id = module_id,
+                .index.offset = sym_addr + addend,
+              });
+              // Defer the TLSDESC relocation until the address of the TlsDynamicResolverArg object
+              // is finalized.
+              deferred_tlsdesc_relocs.push_back({ desc, tlsdesc_args_.size() - 1 });
+              const TlsDynamicResolverArg& desc_arg = tlsdesc_args_.back();
+              TRACE_TYPE(RELO, "RELO TLSDESC %16p <- dynamic (gen %zu, mod %zu, off %zu) %s",
+                         reinterpret_cast<void*>(reloc), desc_arg.generation,
+                         desc_arg.index.module_id, desc_arg.index.offset, sym_name);
+            }
+          }
+        }
+        break;
+#endif  // defined(R_GENERIC_TLSDESC)
+
 #if defined(__aarch64__)
       case R_AARCH64_ABS64:
         count_relocation(kRelocAbsolute);
@@ -3115,6 +3189,13 @@
         return false;
     }
   }
+
+  for (const std::pair<TlsDescriptor*, size_t>& pair : deferred_tlsdesc_relocs) {
+    TlsDescriptor* desc = pair.first;
+    desc->func = tlsdesc_resolver_dynamic;
+    desc->arg = reinterpret_cast<size_t>(&tlsdesc_args_[pair.second]);
+  }
+
   return true;
 }
 #endif  // !defined(__mips__)
@@ -3528,13 +3609,14 @@
         // this is parsed after we have strtab initialized (see below).
         break;
 
+      case DT_TLSDESC_GOT:
+      case DT_TLSDESC_PLT:
+        // These DT entries are used for lazy TLSDESC relocations. Bionic
+        // resolves everything eagerly, so these can be ignored.
+        break;
+
       default:
         if (!relocating_linker) {
-          if (d->d_tag == DT_TLSDESC_GOT || d->d_tag == DT_TLSDESC_PLT) {
-            DL_ERR("unsupported ELF TLS DT entry in \"%s\"", get_realpath());
-            return false;
-          }
-
           const char* tag_name;
           if (d->d_tag == DT_RPATH) {
             tag_name = "DT_RPATH";
diff --git a/linker/linker_block_allocator.h b/linker/linker_block_allocator.h
index 85e6bd9..458d092 100644
--- a/linker/linker_block_allocator.h
+++ b/linker/linker_block_allocator.h
@@ -68,18 +68,18 @@
  * of a single fixed-size type. Allocations are backed by page-sized private
  * anonymous mmaps.
  *
- * The differences between this allocator and LinkerMemoryAllocator are:
- * 1. This allocator manages space more efficiently. LinkerMemoryAllocator
- *    operates in power-of-two sized blocks up to 1k, when this implementation
- *    splits the page to aligned size of structure; For example for structures
- *    with size 513 this allocator will use 516 (520 for lp64) bytes of data
- *    where generalized implementation is going to use 1024 sized blocks.
+ * The differences between this allocator and BionicAllocator are:
+ * 1. This allocator manages space more efficiently. BionicAllocator operates in
+ *    power-of-two sized blocks up to 1k, when this implementation splits the
+ *    page to aligned size of structure; For example for structures with size
+ *    513 this allocator will use 516 (520 for lp64) bytes of data where
+ *    generalized implementation is going to use 1024 sized blocks.
  *
  * 2. Unless all allocated memory is freed, this allocator does not munmap
- *    allocated memory, where LinkerMemoryAllocator does.
+ *    allocated memory, where BionicAllocator does.
  *
- * 3. This allocator provides mprotect services to the user, where LinkerMemoryAllocator
- *    always treats it's memory as READ|WRITE.
+ * 3. This allocator provides mprotect services to the user, where BionicAllocator
+ *    always treats its memory as READ|WRITE.
  */
 template<typename T>
 class LinkerTypeAllocator {
diff --git a/linker/linker_memory.cpp b/linker/linker_memory.cpp
index f2cce01..ce29997 100644
--- a/linker/linker_memory.cpp
+++ b/linker/linker_memory.cpp
@@ -26,7 +26,7 @@
  * SUCH DAMAGE.
  */
 
-#include "linker_allocator.h"
+#include "private/bionic_allocator.h"
 
 #include <stdlib.h>
 #include <sys/cdefs.h>
@@ -36,7 +36,7 @@
 
 #include <async_safe/log.h>
 
-static LinkerMemoryAllocator g_linker_allocator;
+static BionicAllocator g_bionic_allocator;
 static std::atomic<pid_t> fallback_tid(0);
 
 // Used by libdebuggerd_handler to switch allocators during a crash dump, in
@@ -56,16 +56,16 @@
   }
 }
 
-static LinkerMemoryAllocator& get_fallback_allocator() {
-  static LinkerMemoryAllocator fallback_allocator;
+static BionicAllocator& get_fallback_allocator() {
+  static BionicAllocator fallback_allocator;
   return fallback_allocator;
 }
 
-static LinkerMemoryAllocator& get_allocator() {
+static BionicAllocator& get_allocator() {
   if (__predict_false(fallback_tid) && __predict_false(gettid() == fallback_tid)) {
     return get_fallback_allocator();
   }
-  return g_linker_allocator;
+  return g_bionic_allocator;
 }
 
 void* malloc(size_t byte_count) {
diff --git a/linker/linker_soinfo.h b/linker/linker_soinfo.h
index 14571de..dd9c6aa 100644
--- a/linker/linker_soinfo.h
+++ b/linker/linker_soinfo.h
@@ -32,9 +32,11 @@
 
 #include <memory>
 #include <string>
+#include <vector>
 
 #include "private/bionic_elf_tls.h"
 #include "linker_namespaces.h"
+#include "linker_tls.h"
 
 #define FLAG_LINKED           0x00000001
 #define FLAG_EXE              0x00000004 // The main executable
@@ -102,14 +104,9 @@
 // TODO(dimitry): remove reference from soinfo member functions to this class.
 class VersionTracker;
 
-// The first ELF TLS module has ID 1. Zero is reserved for the first word of
-// the DTV, a generation count, and unresolved weak symbols also use module
-// ID 0.
-static constexpr size_t kUninitializedModuleId = 0;
-
 struct soinfo_tls {
   TlsSegment segment;
-  size_t module_id = kUninitializedModuleId;
+  size_t module_id = kTlsUninitializedModuleId;
 };
 
 #if defined(__work_around_b_24465209__)
@@ -383,6 +380,7 @@
 
   // version >= 5
   std::unique_ptr<soinfo_tls> tls_;
+  std::vector<TlsDynamicResolverArg> tlsdesc_args_;
 };
 
 // This function is used by dlvsym() to calculate hash of sym_ver
diff --git a/linker/linker_tls.cpp b/linker/linker_tls.cpp
index 0d1796b..a3aa9bf 100644
--- a/linker/linker_tls.cpp
+++ b/linker/linker_tls.cpp
@@ -31,6 +31,7 @@
 #include <vector>
 
 #include "private/ScopedRWLock.h"
+#include "private/ScopedSignalBlocker.h"
 #include "private/bionic_defs.h"
 #include "private/bionic_elf_tls.h"
 #include "private/bionic_globals.h"
@@ -41,9 +42,6 @@
 static bool g_static_tls_finished;
 static std::vector<TlsModule> g_tls_modules;
 
-static inline size_t module_id_to_idx(size_t id) { return id - 1; }
-static inline size_t module_idx_to_id(size_t idx) { return idx + 1; }
-
 static size_t get_unused_module_index() {
   for (size_t i = 0; i < g_tls_modules.size(); ++i) {
     if (g_tls_modules[i].soinfo_ptr == nullptr) {
@@ -57,37 +55,47 @@
 }
 
 static void register_tls_module(soinfo* si, size_t static_offset) {
+  TlsModules& libc_modules = __libc_shared_globals()->tls_modules;
+
   // The global TLS module table points at the std::vector of modules declared
   // in this file, so acquire a write lock before modifying the std::vector.
-  ScopedWriteLock locker(&__libc_shared_globals()->tls_modules.rwlock);
+  ScopedSignalBlocker ssb;
+  ScopedWriteLock locker(&libc_modules.rwlock);
 
   size_t module_idx = get_unused_module_index();
 
   soinfo_tls* si_tls = si->get_tls();
-  si_tls->module_id = module_idx_to_id(module_idx);
+  si_tls->module_id = __tls_module_idx_to_id(module_idx);
+
+  const size_t new_generation = ++libc_modules.generation;
+  __libc_tls_generation_copy = new_generation;
+  if (libc_modules.generation_libc_so != nullptr) {
+    *libc_modules.generation_libc_so = new_generation;
+  }
 
   g_tls_modules[module_idx] = {
     .segment = si_tls->segment,
     .static_offset = static_offset,
-    .first_generation = ++__libc_shared_globals()->tls_modules.generation,
+    .first_generation = new_generation,
     .soinfo_ptr = si,
   };
 }
 
 static void unregister_tls_module(soinfo* si) {
+  ScopedSignalBlocker ssb;
   ScopedWriteLock locker(&__libc_shared_globals()->tls_modules.rwlock);
 
   soinfo_tls* si_tls = si->get_tls();
-  TlsModule& mod = g_tls_modules[module_id_to_idx(si_tls->module_id)];
+  TlsModule& mod = g_tls_modules[__tls_module_id_to_idx(si_tls->module_id)];
   CHECK(mod.static_offset == SIZE_MAX);
   CHECK(mod.soinfo_ptr == si);
   mod = {};
-  si_tls->module_id = kUninitializedModuleId;
+  si_tls->module_id = kTlsUninitializedModuleId;
 }
 
 // The reference is valid until a TLS module is registered or unregistered.
 const TlsModule& get_tls_module(size_t module_id) {
-  size_t module_idx = module_id_to_idx(module_id);
+  size_t module_idx = __tls_module_id_to_idx(module_id);
   CHECK(module_idx < g_tls_modules.size());
   return g_tls_modules[module_idx];
 }
@@ -123,7 +131,7 @@
 
 void register_soinfo_tls(soinfo* si) {
   soinfo_tls* si_tls = si->get_tls();
-  if (si_tls == nullptr || si_tls->module_id != kUninitializedModuleId) {
+  if (si_tls == nullptr || si_tls->module_id != kTlsUninitializedModuleId) {
     return;
   }
   size_t static_offset = SIZE_MAX;
@@ -136,7 +144,7 @@
 
 void unregister_soinfo_tls(soinfo* si) {
   soinfo_tls* si_tls = si->get_tls();
-  if (si_tls == nullptr || si_tls->module_id == kUninitializedModuleId) {
+  if (si_tls == nullptr || si_tls->module_id == kTlsUninitializedModuleId) {
     return;
   }
   return unregister_tls_module(si);
diff --git a/linker/linker_tls.h b/linker/linker_tls.h
index fbb1dcf..87e1f0d 100644
--- a/linker/linker_tls.h
+++ b/linker/linker_tls.h
@@ -30,6 +30,8 @@
 
 #include <stdlib.h>
 
+#include "private/bionic_elf_tls.h"
+
 struct TlsModule;
 struct soinfo;
 
@@ -40,3 +42,24 @@
 void unregister_soinfo_tls(soinfo* si);
 
 const TlsModule& get_tls_module(size_t module_id);
+
+typedef size_t TlsDescResolverFunc(size_t);
+
+struct TlsDescriptor {
+#if defined(__arm__)
+  size_t arg;
+  TlsDescResolverFunc* func;
+#else
+  TlsDescResolverFunc* func;
+  size_t arg;
+#endif
+};
+
+struct TlsDynamicResolverArg {
+  size_t generation;
+  TlsIndex index;
+};
+
+__LIBC_HIDDEN__ extern "C" size_t tlsdesc_resolver_static(size_t);
+__LIBC_HIDDEN__ extern "C" size_t tlsdesc_resolver_dynamic(size_t);
+__LIBC_HIDDEN__ extern "C" size_t tlsdesc_resolver_unresolved_weak(size_t);
diff --git a/linker/tests/Android.mk b/linker/tests/Android.mk
index 9268e31..63e0555 100644
--- a/linker/tests/Android.mk
+++ b/linker/tests/Android.mk
@@ -43,10 +43,8 @@
   linker_config_test.cpp \
   linker_globals.cpp \
   linked_list_test.cpp \
-  linker_memory_allocator_test.cpp \
   linker_sleb128_test.cpp \
   linker_utils_test.cpp \
-  ../linker_allocator.cpp \
   ../linker_block_allocator.cpp \
   ../linker_config.cpp \
   ../linker_utils.cpp \
diff --git a/tests/Android.bp b/tests/Android.bp
index 0a45a8e..b374c27 100644
--- a/tests/Android.bp
+++ b/tests/Android.bp
@@ -533,7 +533,6 @@
         "libdl_preempt_test_1",
         "libdl_preempt_test_2",
         "libdl_test_df_1_global",
-        "libelf-tls-library",
         "libgnu-hash-table-library",
         "libsysv-hash-table-library",
         "libtestshared",
@@ -571,6 +570,10 @@
         "libtest_dlsym_from_this",
         "libtest_dlsym_weak_func",
         "libtest_dt_runpath_d",
+        "libtest_elftls_dynamic",
+        "libtest_elftls_dynamic_filler_1",
+        "libtest_elftls_dynamic_filler_2",
+        "libtest_elftls_dynamic_filler_3",
         "libtest_elftls_shared_var",
         "libtest_elftls_shared_var_ie",
         "libtest_elftls_tprel",
@@ -661,6 +664,13 @@
         "gtest_preinit_debuggerd.cpp",
         "gtest_globals.cpp",
         "gtest_main.cpp",
+
+        // The Bionic allocator has its own C++ API. It isn't packaged into its
+        // own library, so it can only be tested when it's part of libc.a.
+        "bionic_allocator_test.cpp",
+    ],
+    include_dirs: [
+        "bionic/libc",
     ],
     whole_static_libs: [
         "libBionicTests",
diff --git a/linker/tests/linker_memory_allocator_test.cpp b/tests/bionic_allocator_test.cpp
similarity index 75%
rename from linker/tests/linker_memory_allocator_test.cpp
rename to tests/bionic_allocator_test.cpp
index c284eaa..f710907 100644
--- a/linker/tests/linker_memory_allocator_test.cpp
+++ b/tests/bionic_allocator_test.cpp
@@ -32,7 +32,7 @@
 
 #include <gtest/gtest.h>
 
-#include "../linker_allocator.h"
+#include "private/bionic_allocator.h"
 
 #include <unistd.h>
 
@@ -61,20 +61,20 @@
 
 static size_t kPageSize = sysconf(_SC_PAGE_SIZE);
 
-TEST(linker_memory, test_alloc_0) {
-  LinkerMemoryAllocator allocator;
+TEST(bionic_allocator, test_alloc_0) {
+  BionicAllocator allocator;
   void* ptr = allocator.alloc(0);
   ASSERT_TRUE(ptr != nullptr);
   allocator.free(ptr);
 }
 
-TEST(linker_memory, test_free_nullptr) {
-  LinkerMemoryAllocator allocator;
+TEST(bionic_allocator, test_free_nullptr) {
+  BionicAllocator allocator;
   allocator.free(nullptr);
 }
 
-TEST(linker_memory, test_realloc) {
-  LinkerMemoryAllocator allocator;
+TEST(bionic_allocator, test_realloc) {
+  BionicAllocator allocator;
   uint32_t* array = reinterpret_cast<uint32_t*>(allocator.alloc(512));
   const size_t array_size = 512 / sizeof(uint32_t);
 
@@ -127,8 +127,8 @@
   ASSERT_EQ(nullptr, allocator.realloc(reallocated_ptr, 0));
 }
 
-TEST(linker_memory, test_small_smoke) {
-  LinkerMemoryAllocator allocator;
+TEST(bionic_allocator, test_small_smoke) {
+  BionicAllocator allocator;
 
   uint8_t zeros[16];
   memset(zeros, 0, sizeof(zeros));
@@ -150,8 +150,8 @@
   allocator.free(ptr2);
 }
 
-TEST(linker_memory, test_huge_smoke) {
-  LinkerMemoryAllocator allocator;
+TEST(bionic_allocator, test_huge_smoke) {
+  BionicAllocator allocator;
 
   // this should trigger proxy-to-mmap
   test_struct_huge* ptr1 =
@@ -170,8 +170,8 @@
   allocator.free(ptr1);
 }
 
-TEST(linker_memory, test_large) {
-  LinkerMemoryAllocator allocator;
+TEST(bionic_allocator, test_large) {
+  BionicAllocator allocator;
 
   test_struct_large* ptr1 =
       reinterpret_cast<test_struct_large*>(allocator.alloc(sizeof(test_struct_large)));
@@ -212,4 +212,49 @@
   allocator.free(ptr_to_free);
 }
 
+TEST(bionic_allocator, test_memalign_small) {
+  BionicAllocator allocator;
+  void* ptr;
 
+  // simple case
+  ptr = allocator.memalign(0x100, 0x100);
+  ASSERT_TRUE(ptr != nullptr);
+  ASSERT_EQ(0U, reinterpret_cast<uintptr_t>(ptr) % 0x100);
+  allocator.free(ptr);
+
+  // small objects are automatically aligned to their size.
+  ptr = allocator.alloc(0x200);
+  ASSERT_TRUE(ptr != nullptr);
+  ASSERT_EQ(0U, reinterpret_cast<uintptr_t>(ptr) % 0x200);
+  allocator.free(ptr);
+
+  // the size (0x10) is bumped up to the alignment (0x100)
+  ptr = allocator.memalign(0x100, 0x10);
+  ASSERT_TRUE(ptr != nullptr);
+  ASSERT_EQ(0U, reinterpret_cast<uintptr_t>(ptr) % 0x100);
+  allocator.free(ptr);
+}
+
+TEST(bionic_allocator, test_memalign_large) {
+  BionicAllocator allocator;
+  void* ptr;
+
+  // a large object with alignment < PAGE_SIZE
+  ptr = allocator.memalign(0x100, 0x2000);
+  ASSERT_TRUE(ptr != nullptr);
+  ASSERT_EQ(0U, reinterpret_cast<uintptr_t>(ptr) % 0x100);
+  allocator.free(ptr);
+
+  // a large object with alignment == PAGE_SIZE
+  ptr = allocator.memalign(0x1000, 0x2000);
+  ASSERT_TRUE(ptr != nullptr);
+  ASSERT_EQ(0U, reinterpret_cast<uintptr_t>(ptr) % 0x1000);
+  allocator.free(ptr);
+
+  // A large object with alignment > PAGE_SIZE is only guaranteed to have page
+  // alignment.
+  ptr = allocator.memalign(0x2000, 0x4000);
+  ASSERT_TRUE(ptr != nullptr);
+  ASSERT_EQ(0U, reinterpret_cast<uintptr_t>(ptr) % 0x1000);
+  allocator.free(ptr);
+}
diff --git a/tests/dlfcn_test.cpp b/tests/dlfcn_test.cpp
index 176a6f8..8a3b6f3 100644
--- a/tests/dlfcn_test.cpp
+++ b/tests/dlfcn_test.cpp
@@ -1082,13 +1082,6 @@
   ASSERT_SUBSTR("libsysv-hash-table-library.so", dlinfo.dli_fname);
 }
 
-TEST(dlfcn, dlopen_library_with_ELF_TLS) {
-  dlerror(); // Clear any pending errors.
-  void* handle = dlopen("libelf-tls-library.so", RTLD_NOW);
-  ASSERT_TRUE(handle == nullptr);
-  ASSERT_SUBSTR("unknown reloc type ", dlerror());
-}
-
 TEST(dlfcn, dlopen_bad_flags) {
   dlerror(); // Clear any pending errors.
   void* handle;
diff --git a/tests/elftls_dl_test.cpp b/tests/elftls_dl_test.cpp
index 0a97c28..e908fb9 100644
--- a/tests/elftls_dl_test.cpp
+++ b/tests/elftls_dl_test.cpp
@@ -34,6 +34,10 @@
 #include "gtest_globals.h"
 #include "utils.h"
 
+#if defined(__BIONIC__)
+#include "bionic/pthread_internal.h"
+#endif
+
 // Access libtest_elftls_shared_var.so's TLS variable using an IE access.
 __attribute__((tls_model("initial-exec"))) extern "C" __thread int elftls_shared_var;
 
@@ -78,3 +82,175 @@
   eth.SetArgs({ helper.c_str(), nullptr });
   eth.Run([&]() { execve(helper.c_str(), eth.GetArgs(), eth.GetEnv()); }, 0, error.c_str());
 }
+
+// Use a GD access (__tls_get_addr or TLSDESC) to modify a variable in static
+// TLS memory.
+TEST(elftls_dl, access_static_tls) {
+  void* lib = dlopen("libtest_elftls_dynamic.so", RTLD_LOCAL | RTLD_NOW);
+  ASSERT_NE(nullptr, lib);
+
+  auto bump_shared_var = reinterpret_cast<int(*)()>(dlsym(lib, "bump_shared_var"));
+  ASSERT_NE(nullptr, bump_shared_var);
+
+  ASSERT_EQ(21, ++elftls_shared_var);
+  ASSERT_EQ(22, bump_shared_var());
+
+  std::thread([bump_shared_var] {
+    ASSERT_EQ(21, ++elftls_shared_var);
+    ASSERT_EQ(22, bump_shared_var());
+  }).join();
+}
+
+TEST(elftls_dl, bump_local_vars) {
+  void* lib = dlopen("libtest_elftls_dynamic.so", RTLD_LOCAL | RTLD_NOW);
+  ASSERT_NE(nullptr, lib);
+
+  auto bump_local_vars = reinterpret_cast<int(*)()>(dlsym(lib, "bump_local_vars"));
+  ASSERT_NE(nullptr, bump_local_vars);
+
+  ASSERT_EQ(42, bump_local_vars());
+  std::thread([bump_local_vars] {
+    ASSERT_EQ(42, bump_local_vars());
+  }).join();
+}
+
+// The behavior of accessing an unresolved weak TLS symbol using a dynamic TLS
+// relocation depends on which kind of implementation the target uses. With
+// TLSDESC, the result is NULL. With __tls_get_addr, the result is the
+// generation count (or maybe undefined behavior)? This test only tests TLSDESC.
+TEST(elftls_dl, missing_weak) {
+#if defined(__aarch64__)
+  void* lib = dlopen("libtest_elftls_dynamic.so", RTLD_LOCAL | RTLD_NOW);
+  ASSERT_NE(nullptr, lib);
+
+  auto missing_weak_dyn_tls_addr = reinterpret_cast<int*(*)()>(dlsym(lib, "missing_weak_dyn_tls_addr"));
+  ASSERT_NE(nullptr, missing_weak_dyn_tls_addr);
+
+  ASSERT_EQ(nullptr, missing_weak_dyn_tls_addr());
+  std::thread([missing_weak_dyn_tls_addr] {
+    ASSERT_EQ(nullptr, missing_weak_dyn_tls_addr());
+  }).join();
+#else
+  GTEST_LOG_(INFO) << "This test is only run on TLSDESC-based targets.\n";
+#endif
+}
+
+TEST(elftls_dl, dtv_resize) {
+#if defined(__BIONIC__)
+#define LOAD_LIB(soname) ({                           \
+    auto lib = dlopen(soname, RTLD_LOCAL | RTLD_NOW); \
+    ASSERT_NE(nullptr, lib);                          \
+    reinterpret_cast<int(*)()>(dlsym(lib, "bump"));   \
+  })
+
+  auto dtv = []() -> TlsDtv* { return __get_tcb_dtv(__get_bionic_tcb()); };
+
+  static_assert(sizeof(TlsDtv) == 3 * sizeof(void*),
+                "This test assumes that the Dtv has a 3-word header");
+
+  // Initially there are 3 modules:
+  //  - the main test executable
+  //  - libtest_elftls_shared_var
+  //  - libtest_elftls_tprel
+
+  // The initial DTV is an empty DTV with no generation and a size of 0.
+  TlsDtv* zero_dtv = dtv();
+  ASSERT_EQ(0u, zero_dtv->count);
+  ASSERT_EQ(nullptr, zero_dtv->next);
+  ASSERT_EQ(kTlsGenerationNone, zero_dtv->generation);
+
+  // Load the fourth module.
+  auto func1 = LOAD_LIB("libtest_elftls_dynamic_filler_1.so");
+  ASSERT_EQ(101, func1());
+
+  // After loading one module, the DTV should be initialized to the next
+  // power-of-2 size (including the header).
+  TlsDtv* initial_dtv = dtv();
+  ASSERT_EQ(5u, initial_dtv->count);
+  ASSERT_EQ(zero_dtv, initial_dtv->next);
+  ASSERT_LT(0u, initial_dtv->generation);
+
+  // Load module 5.
+  auto func2 = LOAD_LIB("libtest_elftls_dynamic_filler_2.so");
+  ASSERT_EQ(102, func1());
+  ASSERT_EQ(201, func2());
+  ASSERT_EQ(initial_dtv, dtv());
+  ASSERT_EQ(5u, initial_dtv->count);
+
+  // Load module 6.
+  auto func3 = LOAD_LIB("libtest_elftls_dynamic_filler_3.so");
+  ASSERT_EQ(103, func1());
+  ASSERT_EQ(202, func2());
+
+#if defined(__aarch64__)
+  // The arm64 TLSDESC resolver doesn't update the DTV if it is new enough for
+  // the given access.
+  ASSERT_EQ(5u, dtv()->count);
+#else
+  // __tls_get_addr updates the DTV anytime the generation counter changes.
+  ASSERT_EQ(13u, dtv()->count);
+#endif
+
+  ASSERT_EQ(301, func3());
+
+  TlsDtv* new_dtv = dtv();
+  ASSERT_EQ(13u, new_dtv->count);
+  ASSERT_NE(initial_dtv, new_dtv);
+  ASSERT_EQ(initial_dtv, new_dtv->next);
+
+#undef LOAD_LIB
+#else
+  GTEST_LOG_(INFO) << "This test is skipped for glibc because it tests Bionic internals.";
+#endif
+}
+
+// Verify that variables are reset to their initial values after the library
+// containing them is closed.
+TEST(elftls_dl, dlclose_resets_values) {
+  for (int round = 0; round < 2; ++round) {
+    void* lib = dlopen("libtest_elftls_dynamic.so", RTLD_LOCAL | RTLD_NOW);
+    ASSERT_NE(nullptr, lib);
+
+    auto bump_local_vars = reinterpret_cast<int(*)()>(dlsym(lib, "bump_local_vars"));
+    ASSERT_NE(nullptr, bump_local_vars);
+
+    ASSERT_EQ(42, bump_local_vars());
+    ASSERT_EQ(44, bump_local_vars());
+
+    ASSERT_EQ(0, dlclose(lib));
+  }
+}
+
+// Calling dlclose should remove the entry for the solib from the global list of
+// ELF TLS modules. Test that repeatedly loading and unloading a library doesn't
+// increase the DTV size.
+TEST(elftls_dl, dlclose_removes_entry) {
+#if defined(__BIONIC__)
+  auto dtv = []() -> TlsDtv* { return __get_tcb_dtv(__get_bionic_tcb()); };
+
+  bool first = true;
+  size_t count = 0;
+
+  // Use a large number of rounds in case the DTV is initially larger than
+  // expected.
+  for (int round = 0; round < 32; ++round) {
+    void* lib = dlopen("libtest_elftls_dynamic.so", RTLD_LOCAL | RTLD_NOW);
+    ASSERT_NE(nullptr, lib);
+
+    auto bump_local_vars = reinterpret_cast<int(*)()>(dlsym(lib, "bump_local_vars"));
+    ASSERT_NE(nullptr, bump_local_vars);
+
+    ASSERT_EQ(42, bump_local_vars());
+    if (first) {
+      first = false;
+      count = dtv()->count;
+    } else {
+      ASSERT_EQ(count, dtv()->count);
+    }
+
+    dlclose(lib);
+  }
+#else
+  GTEST_LOG_(INFO) << "This test is skipped for glibc because it tests Bionic internals.";
+#endif
+}
diff --git a/tests/elftls_test.cpp b/tests/elftls_test.cpp
index 11d41ce..2d83d70 100644
--- a/tests/elftls_test.cpp
+++ b/tests/elftls_test.cpp
@@ -83,3 +83,17 @@
     ASSERT_EQ(8, bump_static_tls_var_2());
   }).join();
 }
+
+// Because this C++ source file is built with -fpic, the compiler will access
+// this variable using a GD model. Typically, the static linker will relax the
+// GD to LE, but the arm32 linker doesn't do TLS relaxations, so we can test
+// calling __tls_get_addr in a static executable. The static linker knows that
+// the main executable's TlsIndex::module_id is 1 and writes that into the GOT.
+__thread int tlsvar_general = 30;
+
+TEST(elftls, general) {
+  ASSERT_EQ(31, ++tlsvar_general);
+  std::thread([] {
+    ASSERT_EQ(31, ++tlsvar_general);
+  }).join();
+}
diff --git a/tests/headers/posix/signal_h.c b/tests/headers/posix/signal_h.c
index 661b55e..c2e544e 100644
--- a/tests/headers/posix/signal_h.c
+++ b/tests/headers/posix/signal_h.c
@@ -53,7 +53,7 @@
   STRUCT_MEMBER(struct sigevent, int, sigev_signo);
   STRUCT_MEMBER(struct sigevent, union sigval, sigev_value);
   STRUCT_MEMBER_FUNCTION_POINTER(struct sigevent, void (*f)(union sigval), sigev_notify_function);
-#if defined(__BIONIC__) || defined(__GLIBC__)
+#if defined(__BIONIC__)
   STRUCT_MEMBER(struct sigevent, void*, sigev_notify_attributes);
 #else
   STRUCT_MEMBER(struct sigevent, pthread_attr_t*, sigev_notify_attributes);
diff --git a/tests/libs/Android.bp b/tests/libs/Android.bp
index 05d1ed2..d58b6b8 100644
--- a/tests/libs/Android.bp
+++ b/tests/libs/Android.bp
@@ -43,14 +43,6 @@
 // Libraries and helper binaries for ELF TLS
 // -----------------------------------------------------------------------------
 cc_test_library {
-    name: "libelf-tls-library",
-    defaults: ["bionic_testlib_defaults"],
-    srcs: ["elf_tls_test_library.cpp"],
-    cflags: ["-fno-emulated-tls"],
-    allow_undefined_symbols: true, // __tls_get_addr is undefined.
-}
-
-cc_test_library {
     name: "libtest_elftls_shared_var",
     defaults: ["bionic_testlib_defaults"],
     srcs: ["elftls_shared_var.cpp"],
@@ -79,6 +71,35 @@
     ldflags: ["-Wl,--rpath,${ORIGIN}/.."],
 }
 
+cc_test_library {
+    name: "libtest_elftls_dynamic",
+    defaults: ["bionic_testlib_defaults"],
+    srcs: ["elftls_dynamic.cpp"],
+    cflags: ["-fno-emulated-tls"],
+    shared_libs: ["libtest_elftls_shared_var"],
+}
+
+cc_test_library {
+    name: "libtest_elftls_dynamic_filler_1",
+    defaults: ["bionic_testlib_defaults"],
+    srcs: ["elftls_dynamic_filler.cpp"],
+    cflags: ["-fno-emulated-tls", "-DTLS_FILLER=100"],
+}
+
+cc_test_library {
+    name: "libtest_elftls_dynamic_filler_2",
+    defaults: ["bionic_testlib_defaults"],
+    srcs: ["elftls_dynamic_filler.cpp"],
+    cflags: ["-fno-emulated-tls", "-DTLS_FILLER=200"],
+}
+
+cc_test_library {
+    name: "libtest_elftls_dynamic_filler_3",
+    defaults: ["bionic_testlib_defaults"],
+    srcs: ["elftls_dynamic_filler.cpp"],
+    cflags: ["-fno-emulated-tls", "-DTLS_FILLER=300"],
+}
+
 // -----------------------------------------------------------------------------
 // Library to test gnu-styled hash
 // -----------------------------------------------------------------------------
diff --git a/tests/libs/elf_tls_test_library.cpp b/tests/libs/elf_tls_test_library.cpp
deleted file mode 100644
index 56d0171..0000000
--- a/tests/libs/elf_tls_test_library.cpp
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * Copyright (C) 2018 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-thread_local int elf_tls_variable;
-
-extern "C" int* get() { return &elf_tls_variable; }
diff --git a/tests/libs/elftls_dynamic.cpp b/tests/libs/elftls_dynamic.cpp
new file mode 100644
index 0000000..7fa239c
--- /dev/null
+++ b/tests/libs/elftls_dynamic.cpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+// This shared object test library is dlopen'ed by the main test executable.
+// This variable comes from libtest_elftls_shared_var.so, which is part of
+// static TLS. Verify that a GD-model access can access the variable.
+//
+// Accessing the static TLS variable from an solib prevents the static linker
+// from relaxing the GD access to IE and lets us test that __tls_get_addr and
+// the tlsdesc resolver handle a static TLS variable.
+extern "C" __thread int elftls_shared_var;
+
+extern "C" int bump_shared_var() {
+  return ++elftls_shared_var;
+}
+
+// The static linker denotes the current module by omitting the symbol from
+// the DTPMOD/TLSDESC relocations.
+static __thread int local_var_1 = 15;
+static __thread int local_var_2 = 25;
+
+extern "C" int bump_local_vars() {
+  return ++local_var_1 + ++local_var_2;
+}
+
+__attribute__((weak)) extern "C" __thread int missing_weak_dyn_tls;
+
+extern "C" int* missing_weak_dyn_tls_addr() {
+  return &missing_weak_dyn_tls;
+}
diff --git a/tests/libs/elftls_dynamic_filler.cpp b/tests/libs/elftls_dynamic_filler.cpp
new file mode 100644
index 0000000..9c00ab0
--- /dev/null
+++ b/tests/libs/elftls_dynamic_filler.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2019 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+__thread int var = TLS_FILLER;
+
+extern "C" int bump() {
+  return ++var;
+}
diff --git a/tests/pthread_test.cpp b/tests/pthread_test.cpp
index 1c57264..9c6b975 100644
--- a/tests/pthread_test.cpp
+++ b/tests/pthread_test.cpp
@@ -25,6 +25,7 @@
 #include <stdio.h>
 #include <sys/mman.h>
 #include <sys/prctl.h>
+#include <sys/resource.h>
 #include <sys/syscall.h>
 #include <time.h>
 #include <unistd.h>