Merge "Make trace end conform with other trace end prints"
diff --git a/libc/Android.bp b/libc/Android.bp
index 6f2e347..226a81f 100644
--- a/libc/Android.bp
+++ b/libc/Android.bp
@@ -1014,6 +1014,7 @@
         "bionic/arpa_inet.cpp",
         "bionic/assert.cpp",
         "bionic/atof.cpp",
+        "bionic/bionic_allocator.cpp",
         "bionic/bionic_arc4random.cpp",
         "bionic/bionic_futex.cpp",
         "bionic/bionic_netlink.cpp",
diff --git a/libc/bionic/__libc_init_main_thread.cpp b/libc/bionic/__libc_init_main_thread.cpp
index 6279e65..4984e38 100644
--- a/libc/bionic/__libc_init_main_thread.cpp
+++ b/libc/bionic/__libc_init_main_thread.cpp
@@ -74,6 +74,7 @@
   __libc_init_sysinfo(); // uses AT_SYSINFO auxv entry
 #endif
   __init_tcb(temp_tcb, &main_thread);
+  __init_tcb_dtv(temp_tcb);
   __set_tls(&temp_tcb->tls_slot(0));
   main_thread.tid = __getpid();
   main_thread.set_cached_pid(main_thread.tid);
diff --git a/linker/linker_allocator.cpp b/libc/bionic/bionic_allocator.cpp
similarity index 77%
rename from linker/linker_allocator.cpp
rename to libc/bionic/bionic_allocator.cpp
index 015768a..d9302ad 100644
--- a/linker/linker_allocator.cpp
+++ b/libc/bionic/bionic_allocator.cpp
@@ -26,21 +26,25 @@
  * SUCH DAMAGE.
  */
 
-#include "linker_allocator.h"
-#include "linker_debug.h"
-#include "linker.h"
+#include "private/bionic_allocator.h"
 
 #include <stdlib.h>
+#include <string.h>
 #include <sys/mman.h>
+#include <sys/param.h>
 #include <sys/prctl.h>
 #include <unistd.h>
 
+#include <new>
+
 #include <async_safe/log.h>
 
+#include "private/bionic_macros.h"
+#include "private/bionic_page.h"
+
 //
-// LinkerMemeoryAllocator is general purpose allocator
-// designed to provide the same functionality as the malloc/free/realloc
-// libc functions.
+// BionicAllocator is a general purpose allocator designed to provide the same
+// functionality as the malloc/free/realloc libc functions.
 //
 // On alloc:
 // If size is >= 1k allocator proxies malloc call directly to mmap
@@ -90,7 +94,7 @@
   return result;
 }
 
-LinkerSmallObjectAllocator::LinkerSmallObjectAllocator(uint32_t type,
+BionicSmallObjectAllocator::BionicSmallObjectAllocator(uint32_t type,
                                                        size_t block_size)
     : type_(type),
       block_size_(block_size),
@@ -99,7 +103,7 @@
       free_pages_cnt_(0),
       page_list_(nullptr) {}
 
-void* LinkerSmallObjectAllocator::alloc() {
+void* BionicSmallObjectAllocator::alloc() {
   CHECK(block_size_ != 0);
 
   if (page_list_ == nullptr) {
@@ -141,7 +145,7 @@
   return block_record;
 }
 
-void LinkerSmallObjectAllocator::free_page(small_object_page_info* page) {
+void BionicSmallObjectAllocator::free_page(small_object_page_info* page) {
   CHECK(page->free_blocks_cnt == blocks_per_page_);
   if (page->prev_page) {
     page->prev_page->next_page = page->next_page;
@@ -156,7 +160,7 @@
   free_pages_cnt_--;
 }
 
-void LinkerSmallObjectAllocator::free(void* ptr) {
+void BionicSmallObjectAllocator::free(void* ptr) {
   small_object_page_info* const page =
       reinterpret_cast<small_object_page_info*>(
           PAGE_START(reinterpret_cast<uintptr_t>(ptr)));
@@ -186,7 +190,7 @@
   }
 }
 
-void LinkerSmallObjectAllocator::alloc_page() {
+void BionicSmallObjectAllocator::alloc_page() {
   void* const map_ptr = mmap(nullptr, PAGE_SIZE, PROT_READ | PROT_WRITE,
                              MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
   if (map_ptr == MAP_FAILED) {
@@ -194,7 +198,7 @@
   }
 
   prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, map_ptr, PAGE_SIZE,
-        "linker_alloc_small_objects");
+        "bionic_alloc_small_objects");
 
   small_object_page_info* const page =
       reinterpret_cast<small_object_page_info*>(map_ptr);
@@ -220,7 +224,7 @@
   free_pages_cnt_++;
 }
 
-void LinkerSmallObjectAllocator::add_to_page_list(small_object_page_info* page) {
+void BionicSmallObjectAllocator::add_to_page_list(small_object_page_info* page) {
   page->next_page = page_list_;
   page->prev_page = nullptr;
   if (page_list_) {
@@ -229,7 +233,7 @@
   page_list_ = page;
 }
 
-void LinkerSmallObjectAllocator::remove_from_page_list(
+void BionicSmallObjectAllocator::remove_from_page_list(
     small_object_page_info* page) {
   if (page->prev_page) {
     page->prev_page->next_page = page->next_page;
@@ -244,24 +248,30 @@
   page->next_page = nullptr;
 }
 
-void LinkerMemoryAllocator::initialize_allocators() {
+void BionicAllocator::initialize_allocators() {
   if (allocators_ != nullptr) {
     return;
   }
 
-  LinkerSmallObjectAllocator* allocators =
-      reinterpret_cast<LinkerSmallObjectAllocator*>(allocators_buf_);
+  BionicSmallObjectAllocator* allocators =
+      reinterpret_cast<BionicSmallObjectAllocator*>(allocators_buf_);
 
   for (size_t i = 0; i < kSmallObjectAllocatorsCount; ++i) {
     uint32_t type = i + kSmallObjectMinSizeLog2;
-    new (allocators + i) LinkerSmallObjectAllocator(type, 1 << type);
+    new (allocators + i) BionicSmallObjectAllocator(type, 1 << type);
   }
 
   allocators_ = allocators;
 }
 
-void* LinkerMemoryAllocator::alloc_mmap(size_t size) {
-  size_t allocated_size = PAGE_END(size + kPageInfoSize);
+void* BionicAllocator::alloc_mmap(size_t align, size_t size) {
+  size_t header_size = __BIONIC_ALIGN(kPageInfoSize, align);
+  size_t allocated_size;
+  if (__builtin_add_overflow(header_size, size, &allocated_size) ||
+      PAGE_END(allocated_size) < allocated_size) {
+    async_safe_fatal("overflow trying to alloc %zu bytes", size);
+  }
+  allocated_size = PAGE_END(allocated_size);
   void* map_ptr = mmap(nullptr, allocated_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
                        -1, 0);
 
@@ -269,25 +279,21 @@
     async_safe_fatal("mmap failed: %s", strerror(errno));
   }
 
-  prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, map_ptr, allocated_size, "linker_alloc_lob");
+  prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, map_ptr, allocated_size, "bionic_alloc_lob");
 
-  page_info* info = reinterpret_cast<page_info*>(map_ptr);
+  void* result = static_cast<char*>(map_ptr) + header_size;
+  page_info* info = get_page_info_unchecked(result);
   memcpy(info->signature, kSignature, sizeof(kSignature));
   info->type = kLargeObject;
   info->allocated_size = allocated_size;
 
-  return reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(info) +
-                                 kPageInfoSize);
+  return result;
 }
 
-void* LinkerMemoryAllocator::alloc(size_t size) {
-  // treat alloc(0) as alloc(1)
-  if (size == 0) {
-    size = 1;
-  }
 
+inline void* BionicAllocator::alloc_impl(size_t align, size_t size) {
   if (size > kSmallObjectMaxSize) {
-    return alloc_mmap(size);
+    return alloc_mmap(align, size);
   }
 
   uint16_t log2_size = log2(size);
@@ -299,8 +305,33 @@
   return get_small_object_allocator(log2_size)->alloc();
 }
 
-page_info* LinkerMemoryAllocator::get_page_info(void* ptr) {
-  page_info* info = reinterpret_cast<page_info*>(PAGE_START(reinterpret_cast<size_t>(ptr)));
+void* BionicAllocator::alloc(size_t size) {
+  // treat alloc(0) as alloc(1)
+  if (size == 0) {
+    size = 1;
+  }
+  return alloc_impl(16, size);
+}
+
+void* BionicAllocator::memalign(size_t align, size_t size) {
+  // The Bionic allocator only supports alignment up to one page, which is good
+  // enough for ELF TLS.
+  align = MIN(align, PAGE_SIZE);
+  align = MAX(align, 16);
+  if (!powerof2(align)) {
+    align = BIONIC_ROUND_UP_POWER_OF_2(align);
+  }
+  size = MAX(size, align);
+  return alloc_impl(align, size);
+}
+
+inline page_info* BionicAllocator::get_page_info_unchecked(void* ptr) {
+  uintptr_t header_page = PAGE_START(reinterpret_cast<size_t>(ptr) - kPageInfoSize);
+  return reinterpret_cast<page_info*>(header_page);
+}
+
+inline page_info* BionicAllocator::get_page_info(void* ptr) {
+  page_info* info = get_page_info_unchecked(ptr);
   if (memcmp(info->signature, kSignature, sizeof(kSignature)) != 0) {
     async_safe_fatal("invalid pointer %p (page signature mismatch)", ptr);
   }
@@ -308,7 +339,7 @@
   return info;
 }
 
-void* LinkerMemoryAllocator::realloc(void* ptr, size_t size) {
+void* BionicAllocator::realloc(void* ptr, size_t size) {
   if (ptr == nullptr) {
     return alloc(size);
   }
@@ -323,9 +354,9 @@
   size_t old_size = 0;
 
   if (info->type == kLargeObject) {
-    old_size = info->allocated_size - kPageInfoSize;
+    old_size = info->allocated_size - (static_cast<char*>(ptr) - reinterpret_cast<char*>(info));
   } else {
-    LinkerSmallObjectAllocator* allocator = get_small_object_allocator(info->type);
+    BionicSmallObjectAllocator* allocator = get_small_object_allocator(info->type);
     if (allocator != info->allocator_addr) {
       async_safe_fatal("invalid pointer %p (page signature mismatch)", ptr);
     }
@@ -343,7 +374,7 @@
   return ptr;
 }
 
-void LinkerMemoryAllocator::free(void* ptr) {
+void BionicAllocator::free(void* ptr) {
   if (ptr == nullptr) {
     return;
   }
@@ -353,7 +384,7 @@
   if (info->type == kLargeObject) {
     munmap(info, info->allocated_size);
   } else {
-    LinkerSmallObjectAllocator* allocator = get_small_object_allocator(info->type);
+    BionicSmallObjectAllocator* allocator = get_small_object_allocator(info->type);
     if (allocator != info->allocator_addr) {
       async_safe_fatal("invalid pointer %p (invalid allocator address for the page)", ptr);
     }
@@ -362,7 +393,7 @@
   }
 }
 
-LinkerSmallObjectAllocator* LinkerMemoryAllocator::get_small_object_allocator(uint32_t type) {
+BionicSmallObjectAllocator* BionicAllocator::get_small_object_allocator(uint32_t type) {
   if (type < kSmallObjectMinSizeLog2 || type > kSmallObjectMaxSizeLog2) {
     async_safe_fatal("invalid type: %u", type);
   }
diff --git a/libc/bionic/bionic_elf_tls.cpp b/libc/bionic/bionic_elf_tls.cpp
index 4253b97..3fa5182 100644
--- a/libc/bionic/bionic_elf_tls.cpp
+++ b/libc/bionic/bionic_elf_tls.cpp
@@ -34,9 +34,22 @@
 #include <unistd.h>
 
 #include "private/ScopedRWLock.h"
+#include "private/ScopedSignalBlocker.h"
 #include "private/bionic_globals.h"
 #include "private/bionic_macros.h"
 #include "private/bionic_tls.h"
+#include "pthread_internal.h"
+
+// Every call to __tls_get_addr needs to check the generation counter, so
+// accesses to the counter need to be as fast as possible. Keep a copy of it in
+// a hidden variable, which can be accessed without using the GOT. The linker
+// will update this variable when it updates its counter.
+//
+// To allow the linker to update this variable, libc.so's constructor passes its
+// address to the linker. To accommodate a possible __tls_get_addr call before
+// libc.so's constructor, this local copy is initialized to SIZE_MAX, forcing
+// __tls_get_addr to initially use the slow path.
+__LIBC_HIDDEN__ _Atomic(size_t) __libc_tls_generation_copy = SIZE_MAX;
 
 // Search for a TLS segment in the given phdr table. Returns true if it has a
 // TLS segment and false otherwise.
@@ -168,6 +181,7 @@
   // moving the initial part. If this locking is too slow, we can duplicate the
   // static part of the table.
   TlsModules& modules = __libc_shared_globals()->tls_modules;
+  ScopedSignalBlocker ssb;
   ScopedReadLock locker(&modules.rwlock);
 
   for (size_t i = 0; i < modules.module_count; ++i) {
@@ -187,3 +201,166 @@
            module.segment.init_size);
   }
 }
+
+static inline size_t dtv_size_in_bytes(size_t module_count) {
+  return sizeof(TlsDtv) + module_count * sizeof(void*);
+}
+
+// Calculates the number of module slots to allocate in a new DTV. For small
+// objects (up to 1KiB), the TLS allocator allocates memory in power-of-2 sizes,
+// so for better space usage, ensure that the DTV size (header + slots) is a
+// power of 2.
+//
+// The lock on TlsModules must be held.
+static size_t calculate_new_dtv_count() {
+  size_t loaded_cnt = __libc_shared_globals()->tls_modules.module_count;
+  size_t bytes = dtv_size_in_bytes(MAX(1, loaded_cnt));
+  if (!powerof2(bytes)) {
+    bytes = BIONIC_ROUND_UP_POWER_OF_2(bytes);
+  }
+  return (bytes - sizeof(TlsDtv)) / sizeof(void*);
+}
+
+// This function must be called with signals blocked and a write lock on
+// TlsModules held.
+static void update_tls_dtv(bionic_tcb* tcb) {
+  const TlsModules& modules = __libc_shared_globals()->tls_modules;
+  BionicAllocator& allocator = __libc_shared_globals()->tls_allocator;
+
+  // Use the generation counter from the shared globals instead of the local
+  // copy, which won't be initialized yet if __tls_get_addr is called before
+  // libc.so's constructor.
+  if (__get_tcb_dtv(tcb)->generation == atomic_load(&modules.generation)) {
+    return;
+  }
+
+  const size_t old_cnt = __get_tcb_dtv(tcb)->count;
+
+  // If the DTV isn't large enough, allocate a larger one. Because a signal
+  // handler could interrupt the fast path of __tls_get_addr, we don't free the
+  // old DTV. Instead, we add the old DTV to a list, then free all of a thread's
+  // DTVs at thread-exit. Each time the DTV is reallocated, its size at least
+  // doubles.
+  if (modules.module_count > old_cnt) {
+    size_t new_cnt = calculate_new_dtv_count();
+    TlsDtv* const old_dtv = __get_tcb_dtv(tcb);
+    TlsDtv* const new_dtv = static_cast<TlsDtv*>(allocator.alloc(dtv_size_in_bytes(new_cnt)));
+    memcpy(new_dtv, old_dtv, dtv_size_in_bytes(old_cnt));
+    new_dtv->count = new_cnt;
+    new_dtv->next = old_dtv;
+    __set_tcb_dtv(tcb, new_dtv);
+  }
+
+  TlsDtv* const dtv = __get_tcb_dtv(tcb);
+
+  const StaticTlsLayout& layout = __libc_shared_globals()->static_tls_layout;
+  char* static_tls = reinterpret_cast<char*>(tcb) - layout.offset_bionic_tcb();
+
+  // Initialize static TLS modules and free unloaded modules.
+  for (size_t i = 0; i < dtv->count; ++i) {
+    if (i < modules.module_count) {
+      const TlsModule& mod = modules.module_table[i];
+      if (mod.static_offset != SIZE_MAX) {
+        dtv->modules[i] = static_tls + mod.static_offset;
+        continue;
+      }
+      if (mod.first_generation != kTlsGenerationNone &&
+          mod.first_generation <= dtv->generation) {
+        continue;
+      }
+    }
+    allocator.free(dtv->modules[i]);
+    dtv->modules[i] = nullptr;
+  }
+
+  dtv->generation = atomic_load(&modules.generation);
+}
+
+__attribute__((noinline)) static void* tls_get_addr_slow_path(const TlsIndex* ti) {
+  TlsModules& modules = __libc_shared_globals()->tls_modules;
+  bionic_tcb* tcb = __get_bionic_tcb();
+
+  // Block signals and lock TlsModules. We may need the allocator, so take
+  // a write lock.
+  ScopedSignalBlocker ssb;
+  ScopedWriteLock locker(&modules.rwlock);
+
+  update_tls_dtv(tcb);
+
+  TlsDtv* dtv = __get_tcb_dtv(tcb);
+  const size_t module_idx = __tls_module_id_to_idx(ti->module_id);
+  void* mod_ptr = dtv->modules[module_idx];
+  if (mod_ptr == nullptr) {
+    const TlsSegment& segment = modules.module_table[module_idx].segment;
+    mod_ptr = __libc_shared_globals()->tls_allocator.memalign(segment.alignment, segment.size);
+    if (segment.init_size > 0) {
+      memcpy(mod_ptr, segment.init_ptr, segment.init_size);
+    }
+    dtv->modules[module_idx] = mod_ptr;
+  }
+
+  return static_cast<char*>(mod_ptr) + ti->offset;
+}
+
+// Returns the address of a thread's TLS memory given a module ID and an offset
+// into that module's TLS segment. This function is called on every access to a
+// dynamic TLS variable on targets that don't use TLSDESC. arm64 uses TLSDESC,
+// so it only calls this function on a thread's first access to a module's TLS
+// segment.
+//
+// On most targets, this accessor function is __tls_get_addr and
+// TLS_GET_ADDR_CCONV is unset. 32-bit x86 uses ___tls_get_addr instead and a
+// regparm() calling convention.
+extern "C" void* TLS_GET_ADDR(const TlsIndex* ti) TLS_GET_ADDR_CCONV {
+  TlsDtv* dtv = __get_tcb_dtv(__get_bionic_tcb());
+
+  // TODO: See if we can use a relaxed memory ordering here instead.
+  size_t generation = atomic_load(&__libc_tls_generation_copy);
+  if (__predict_true(generation == dtv->generation)) {
+    void* mod_ptr = dtv->modules[__tls_module_id_to_idx(ti->module_id)];
+    if (__predict_true(mod_ptr != nullptr)) {
+      return static_cast<char*>(mod_ptr) + ti->offset;
+    }
+  }
+
+  return tls_get_addr_slow_path(ti);
+}
+
+// This function frees:
+//  - TLS modules referenced by the current DTV.
+//  - The list of DTV objects associated with the current thread.
+//
+// The caller must have already blocked signals.
+void __free_dynamic_tls(bionic_tcb* tcb) {
+  TlsModules& modules = __libc_shared_globals()->tls_modules;
+  BionicAllocator& allocator = __libc_shared_globals()->tls_allocator;
+
+  // If we didn't allocate any dynamic memory, skip out early without taking
+  // the lock.
+  TlsDtv* dtv = __get_tcb_dtv(tcb);
+  if (dtv->generation == kTlsGenerationNone) {
+    return;
+  }
+
+  // We need the write lock to use the allocator.
+  ScopedWriteLock locker(&modules.rwlock);
+
+  // First free everything in the current DTV.
+  for (size_t i = 0; i < dtv->count; ++i) {
+    if (i < modules.module_count && modules.module_table[i].static_offset != SIZE_MAX) {
+      // This module's TLS memory is allocated statically, so don't free it here.
+      continue;
+    }
+    allocator.free(dtv->modules[i]);
+  }
+
+  // Now free the thread's list of DTVs.
+  while (dtv->generation != kTlsGenerationNone) {
+    TlsDtv* next = dtv->next;
+    allocator.free(dtv);
+    dtv = next;
+  }
+
+  // Clear the DTV slot. The DTV must not be used again with this thread.
+  tcb->tls_slot(TLS_SLOT_DTV) = nullptr;
+}
diff --git a/libc/bionic/libc_init_dynamic.cpp b/libc/bionic/libc_init_dynamic.cpp
index af1b847..7140776 100644
--- a/libc/bionic/libc_init_dynamic.cpp
+++ b/libc/bionic/libc_init_dynamic.cpp
@@ -51,6 +51,7 @@
 #include <elf.h>
 #include "libc_init_common.h"
 
+#include "private/bionic_elf_tls.h"
 #include "private/bionic_globals.h"
 #include "private/bionic_macros.h"
 #include "private/bionic_ssp.h"
@@ -82,6 +83,12 @@
   __libc_init_sysinfo();
 #endif
 
+  // Register libc.so's copy of the TLS generation variable so the linker can
+  // update it when it loads or unloads a shared object.
+  TlsModules& tls_modules = __libc_shared_globals()->tls_modules;
+  tls_modules.generation_libc_so = &__libc_tls_generation_copy;
+  __libc_tls_generation_copy = tls_modules.generation;
+
   __libc_init_globals();
   __libc_init_common();
 
diff --git a/libc/bionic/libc_init_static.cpp b/libc/bionic/libc_init_static.cpp
index 8fbc20e..514423d 100644
--- a/libc/bionic/libc_init_static.cpp
+++ b/libc/bionic/libc_init_static.cpp
@@ -92,19 +92,22 @@
   size_t phdr_ct = getauxval(AT_PHNUM);
 
   static TlsModule mod;
+  TlsModules& modules = __libc_shared_globals()->tls_modules;
   if (__bionic_get_tls_segment(phdr_start, phdr_ct, 0, &mod.segment)) {
     if (!__bionic_check_tls_alignment(&mod.segment.alignment)) {
       async_safe_fatal("error: TLS segment alignment in \"%s\" is not a power of 2: %zu\n",
                        progname, mod.segment.alignment);
     }
     mod.static_offset = layout.reserve_exe_segment_and_tcb(&mod.segment, progname);
-    mod.first_generation = 1;
-    __libc_shared_globals()->tls_modules.generation = 1;
-    __libc_shared_globals()->tls_modules.module_count = 1;
-    __libc_shared_globals()->tls_modules.module_table = &mod;
+    mod.first_generation = kTlsGenerationFirst;
+
+    modules.module_count = 1;
+    modules.module_table = &mod;
   } else {
     layout.reserve_exe_segment_and_tcb(nullptr, progname);
   }
+  // Enable the fast path in __tls_get_addr.
+  __libc_tls_generation_copy = modules.generation;
 
   layout.finish_layout();
 }
diff --git a/libc/bionic/malloc_common.cpp b/libc/bionic/malloc_common.cpp
index d1aa1ea..fc65e15 100644
--- a/libc/bionic/malloc_common.cpp
+++ b/libc/bionic/malloc_common.cpp
@@ -562,13 +562,7 @@
   }
 }
 
-static void* LoadSharedLibrary(const char* shared_lib, const char* prefix, MallocDispatch* dispatch_table) {
-  void* impl_handle = dlopen(shared_lib, RTLD_NOW | RTLD_LOCAL);
-  if (impl_handle == nullptr) {
-    error_log("%s: Unable to open shared library %s: %s", getprogname(), shared_lib, dlerror());
-    return nullptr;
-  }
-
+static bool InitSharedLibrary(void* impl_handle, const char* shared_lib, const char* prefix, MallocDispatch* dispatch_table) {
   static constexpr const char* names[] = {
     "initialize",
     "finalize",
@@ -583,48 +577,61 @@
     g_functions[i] = dlsym(impl_handle, symbol);
     if (g_functions[i] == nullptr) {
       error_log("%s: %s routine not found in %s", getprogname(), symbol, shared_lib);
-      dlclose(impl_handle);
       ClearGlobalFunctions();
-      return nullptr;
+      return false;
     }
   }
 
   if (!InitMallocFunctions(impl_handle, dispatch_table, prefix)) {
-    dlclose(impl_handle);
     ClearGlobalFunctions();
+    return false;
+  }
+  return true;
+}
+
+static void* LoadSharedLibrary(const char* shared_lib, const char* prefix, MallocDispatch* dispatch_table) {
+  void* impl_handle = dlopen(shared_lib, RTLD_NOW | RTLD_LOCAL);
+  if (impl_handle == nullptr) {
+    error_log("%s: Unable to open shared library %s: %s", getprogname(), shared_lib, dlerror());
     return nullptr;
   }
 
+  if (!InitSharedLibrary(impl_handle, shared_lib, prefix, dispatch_table)) {
+    dlclose(impl_handle);
+    impl_handle = nullptr;
+  }
+
   return impl_handle;
 }
 
 // A function pointer to heapprofds init function. Used to re-initialize
 // heapprofd. This will start a new profiling session and tear down the old
 // one in case it is still active.
-static _Atomic init_func_t g_heapprofd_init_func = nullptr;
+static _Atomic (void*) g_heapprofd_handle = nullptr;
 
 static void install_hooks(libc_globals* globals, const char* options,
                           const char* prefix, const char* shared_lib) {
-  init_func_t init_func = atomic_load(&g_heapprofd_init_func);
-  if (init_func != nullptr) {
-    init_func(&__libc_malloc_default_dispatch, &gMallocLeakZygoteChild, options);
-    info_log("%s: malloc %s re-enabled", getprogname(), prefix);
-    return;
-  }
-
   MallocDispatch dispatch_table;
-  void* impl_handle = LoadSharedLibrary(shared_lib, prefix, &dispatch_table);
-  if (impl_handle == nullptr) {
-    return;
+
+  void* impl_handle = atomic_load(&g_heapprofd_handle);
+  if (impl_handle != nullptr) {
+    if (!InitSharedLibrary(impl_handle, shared_lib, prefix, &dispatch_table)) {
+      return;
+    }
+  } else {
+    impl_handle = LoadSharedLibrary(shared_lib, prefix, &dispatch_table);
+    if (impl_handle == nullptr) {
+      return;
+    }
   }
-  init_func = reinterpret_cast<init_func_t>(g_functions[FUNC_INITIALIZE]);
+  init_func_t init_func = reinterpret_cast<init_func_t>(g_functions[FUNC_INITIALIZE]);
   if (!init_func(&__libc_malloc_default_dispatch, &gMallocLeakZygoteChild, options)) {
+    error_log("%s: failed to enable malloc %s", getprogname(), prefix);
     dlclose(impl_handle);
     ClearGlobalFunctions();
     return;
   }
 
-  atomic_store(&g_heapprofd_init_func, init_func);
   // We assign free  first explicitly to prevent the case where we observe a
   // alloc, but miss the corresponding free because of initialization order.
   //
@@ -636,6 +643,7 @@
   // _Atomic. Assigning to an _Atomic is an atomic_store operation.
   // The assignment is done in declaration order.
   globals->malloc_dispatch = dispatch_table;
+  atomic_store(&g_heapprofd_handle, impl_handle);
 
   info_log("%s: malloc %s enabled", getprogname(), prefix);
 
@@ -761,6 +769,18 @@
 // =============================================================================
 
 #if !defined(LIBC_STATIC)
+bool MallocDispatchReset() {
+  if (!atomic_exchange(&g_heapprofd_init_in_progress, true)) {
+    __libc_globals.mutate([](libc_globals* globals) {
+      globals->malloc_dispatch = __libc_malloc_default_dispatch;
+    });
+    atomic_store(&g_heapprofd_init_in_progress, false);
+    return true;
+  }
+  errno = EAGAIN;
+  return false;
+}
+
 // Marks this process as a profileable zygote child.
 bool HandleInitZygoteChildProfiling() {
   atomic_store_explicit(&gMallocZygoteChildProfileable, true,
@@ -777,6 +797,10 @@
 
 #else
 
+bool MallocDispatchReset() {
+  return true;
+}
+
 bool HandleInitZygoteChildProfiling() {
   return true;
 }
@@ -791,6 +815,13 @@
     }
     return HandleInitZygoteChildProfiling();
   }
+  if (opcode == M_RESET_HOOKS) {
+    if (arg != nullptr || arg_size != 0) {
+      errno = EINVAL;
+      return false;
+    }
+    return MallocDispatchReset();
+  }
 
   errno = ENOTSUP;
   return false;
diff --git a/libc/bionic/ndk_cruft.cpp b/libc/bionic/ndk_cruft.cpp
index dbacf18..2c3299f 100644
--- a/libc/bionic/ndk_cruft.cpp
+++ b/libc/bionic/ndk_cruft.cpp
@@ -355,9 +355,14 @@
   return malloc(size);
 }
 
+} // extern "C"
+
 #define __get_thread __real_get_thread
 #include "pthread_internal.h"
 #undef __get_thread
+
+extern "C" {
+
 // Various third-party apps contain a backport of our pthread_rwlock implementation that uses this.
 pthread_internal_t* __get_thread() {
   return __real_get_thread();
diff --git a/libc/bionic/pthread_create.cpp b/libc/bionic/pthread_create.cpp
index 31e0378..b8784b8 100644
--- a/libc/bionic/pthread_create.cpp
+++ b/libc/bionic/pthread_create.cpp
@@ -44,6 +44,7 @@
 #include "private/bionic_globals.h"
 #include "private/bionic_macros.h"
 #include "private/bionic_ssp.h"
+#include "private/bionic_systrace.h"
 #include "private/bionic_tls.h"
 #include "private/ErrnoRestorer.h"
 
@@ -70,6 +71,14 @@
   tcb->tls_slot(TLS_SLOT_STACK_GUARD) = reinterpret_cast<void*>(__stack_chk_guard);
 }
 
+__attribute__((no_stack_protector))
+void __init_tcb_dtv(bionic_tcb* tcb) {
+  // Initialize the DTV slot to a statically-allocated empty DTV. The first
+  // access to a dynamic TLS variable allocates a new DTV.
+  static const TlsDtv zero_dtv = {};
+  __set_tcb_dtv(tcb, const_cast<TlsDtv*>(&zero_dtv));
+}
+
 void __init_bionic_tls_ptrs(bionic_tcb* tcb, bionic_tls* tls) {
   tcb->thread()->bionic_tls = tls;
   tcb->tls_slot(TLS_SLOT_BIONIC_TLS) = tls;
@@ -291,6 +300,7 @@
   // Initialize TLS memory.
   __init_static_tls(mapping.static_tls);
   __init_tcb(tcb, thread);
+  __init_tcb_dtv(tcb);
   __init_tcb_stack_guard(tcb);
   __init_bionic_tls_ptrs(tcb, tls);
 
@@ -338,6 +348,7 @@
   ErrnoRestorer errno_restorer;
 
   pthread_attr_t thread_attr;
+  ScopedTrace trace("pthread_create");
   if (attr == nullptr) {
     pthread_attr_init(&thread_attr);
   } else {
diff --git a/libc/bionic/pthread_exit.cpp b/libc/bionic/pthread_exit.cpp
index 84ea2e6..3b873b3 100644
--- a/libc/bionic/pthread_exit.cpp
+++ b/libc/bionic/pthread_exit.cpp
@@ -98,15 +98,22 @@
     thread->alternate_signal_stack = nullptr;
   }
 
+  ThreadJoinState old_state = THREAD_NOT_JOINED;
+  while (old_state == THREAD_NOT_JOINED &&
+         !atomic_compare_exchange_weak(&thread->join_state, &old_state, THREAD_EXITED_NOT_JOINED)) {
+  }
+
+  // We don't want to take a signal after unmapping the stack, the shadow call
+  // stack, or dynamic TLS memory.
+  ScopedSignalBlocker ssb;
+
 #ifdef __aarch64__
   // Free the shadow call stack and guard pages.
   munmap(thread->shadow_call_stack_guard_region, SCS_GUARD_REGION_SIZE);
 #endif
 
-  ThreadJoinState old_state = THREAD_NOT_JOINED;
-  while (old_state == THREAD_NOT_JOINED &&
-         !atomic_compare_exchange_weak(&thread->join_state, &old_state, THREAD_EXITED_NOT_JOINED)) {
-  }
+  // Free the ELF TLS DTV and all dynamically-allocated ELF TLS memory.
+  __free_dynamic_tls(__get_bionic_tcb());
 
   if (old_state == THREAD_DETACHED) {
     // The thread is detached, no one will use pthread_internal_t after pthread_exit.
@@ -121,10 +128,6 @@
     if (thread->mmap_size != 0) {
       // We need to free mapped space for detached threads when they exit.
       // That's not something we can do in C.
-
-      // We don't want to take a signal after we've unmapped the stack.
-      // That's one last thing we can do before dropping to assembler.
-      ScopedSignalBlocker ssb;
       __hwasan_thread_exit();
       _exit_with_stack_teardown(thread->mmap_base, thread->mmap_size);
     }
diff --git a/libc/bionic/pthread_internal.h b/libc/bionic/pthread_internal.h
index 27ab3df..cbcdadf 100644
--- a/libc/bionic/pthread_internal.h
+++ b/libc/bionic/pthread_internal.h
@@ -38,6 +38,7 @@
 #define __hwasan_thread_exit()
 #endif
 
+#include "private/bionic_elf_tls.h"
 #include "private/bionic_lock.h"
 #include "private/bionic_tls.h"
 
@@ -154,6 +155,7 @@
 
 __LIBC_HIDDEN__ void __init_tcb(bionic_tcb* tcb, pthread_internal_t* thread);
 __LIBC_HIDDEN__ void __init_tcb_stack_guard(bionic_tcb* tcb);
+__LIBC_HIDDEN__ void __init_tcb_dtv(bionic_tcb* tcb);
 __LIBC_HIDDEN__ void __init_bionic_tls_ptrs(bionic_tcb* tcb, bionic_tls* tls);
 __LIBC_HIDDEN__ bionic_tls* __allocate_temp_bionic_tls();
 __LIBC_HIDDEN__ void __free_temp_bionic_tls(bionic_tls* tls);
@@ -179,6 +181,15 @@
   return *static_cast<bionic_tls*>(__get_tls()[TLS_SLOT_BIONIC_TLS]);
 }
 
+static inline __always_inline TlsDtv* __get_tcb_dtv(bionic_tcb* tcb) {
+  uintptr_t dtv_slot = reinterpret_cast<uintptr_t>(tcb->tls_slot(TLS_SLOT_DTV));
+  return reinterpret_cast<TlsDtv*>(dtv_slot - offsetof(TlsDtv, generation));
+}
+
+static inline void __set_tcb_dtv(bionic_tcb* tcb, TlsDtv* val) {
+  tcb->tls_slot(TLS_SLOT_DTV) = &val->generation;
+}
+
 extern "C" __LIBC_HIDDEN__ int __set_tls(void* ptr);
 
 __LIBC_HIDDEN__ void pthread_key_clean_all(void);
diff --git a/libc/bionic/pthread_join.cpp b/libc/bionic/pthread_join.cpp
index 9aad458..8e4ca59 100644
--- a/libc/bionic/pthread_join.cpp
+++ b/libc/bionic/pthread_join.cpp
@@ -30,10 +30,12 @@
 
 #include "private/bionic_defs.h"
 #include "private/bionic_futex.h"
+#include "private/bionic_systrace.h"
 #include "pthread_internal.h"
 
 __BIONIC_WEAK_FOR_NATIVE_BRIDGE
 int pthread_join(pthread_t t, void** return_value) {
+  ScopedTrace trace("pthread_join");
   if (t == pthread_self()) {
     return EDEADLK;
   }
diff --git a/libc/bionic/sigaction.cpp b/libc/bionic/sigaction.cpp
index 42dcccd..96e6f3c 100644
--- a/libc/bionic/sigaction.cpp
+++ b/libc/bionic/sigaction.cpp
@@ -43,7 +43,8 @@
   if (bionic_new_action != nullptr) {
     kernel_new_action.sa_flags = bionic_new_action->sa_flags;
     kernel_new_action.sa_handler = bionic_new_action->sa_handler;
-    kernel_new_action.sa_mask = filter_reserved_signals(bionic_new_action->sa_mask, SIG_SETMASK);
+    // Don't filter signals here; if the caller asked for everything to be blocked, we should obey.
+    kernel_new_action.sa_mask = bionic_new_action->sa_mask;
 #if defined(SA_RESTORER)
     kernel_new_action.sa_restorer = bionic_new_action->sa_restorer;
 #if defined(__aarch64__)
@@ -95,6 +96,7 @@
 #if defined(SA_RESTORER)
     kernel_new.sa_restorer = bionic_new->sa_restorer;
 #endif
+    // Don't filter signals here; if the caller asked for everything to be blocked, we should obey.
     memcpy(&kernel_new.sa_mask, &bionic_new->sa_mask, sizeof(bionic_new->sa_mask));
   }
 
@@ -122,7 +124,8 @@
       kernel_new.sa_restorer = (kernel_new.sa_flags & SA_SIGINFO) ? &__restore_rt : &__restore;
     }
 #endif
-    kernel_new.sa_mask = filter_reserved_signals(kernel_new.sa_mask, SIG_SETMASK);
+    // Don't filter signals here; if the caller asked for everything to be blocked, we should obey.
+    kernel_new.sa_mask = kernel_new.sa_mask;
   }
 
   return __rt_sigaction(signal,
diff --git a/libc/libc.map.txt b/libc/libc.map.txt
index 8d67b9e..6a6ea7d 100644
--- a/libc/libc.map.txt
+++ b/libc/libc.map.txt
@@ -1446,8 +1446,10 @@
 
 LIBC_Q { # introduced=Q
   global:
+    ___tls_get_addr; # x86
     __aeabi_read_tp; # arm
     __res_randomid;
+    __tls_get_addr; # arm x86_64
     android_fdsan_close_with_tag;
     android_fdsan_create_owner_tag;
     android_fdsan_exchange_owner_tag;
diff --git a/linker/linker_allocator.h b/libc/private/bionic_allocator.h
similarity index 80%
rename from linker/linker_allocator.h
rename to libc/private/bionic_allocator.h
index 44a8b0d..c705ce4 100644
--- a/linker/linker_allocator.h
+++ b/libc/private/bionic_allocator.h
@@ -36,13 +36,11 @@
 #include <stddef.h>
 #include <unistd.h>
 
-#include <async_safe/log.h>
-
 const uint32_t kSmallObjectMaxSizeLog2 = 10;
 const uint32_t kSmallObjectMinSizeLog2 = 4;
 const uint32_t kSmallObjectAllocatorsCount = kSmallObjectMaxSizeLog2 - kSmallObjectMinSizeLog2 + 1;
 
-class LinkerSmallObjectAllocator;
+class BionicSmallObjectAllocator;
 
 // This structure is placed at the beginning of each addressable page
 // and has all information we need to find the corresponding memory allocator.
@@ -53,7 +51,7 @@
     // we use allocated_size for large objects allocator
     size_t allocated_size;
     // and allocator_addr for small ones.
-    LinkerSmallObjectAllocator* allocator_addr;
+    BionicSmallObjectAllocator* allocator_addr;
   };
 };
 
@@ -63,14 +61,14 @@
 };
 
 // This structure is placed at the beginning of each page managed by
-// LinkerSmallObjectAllocator.  Note that a page_info struct is expected at the
+// BionicSmallObjectAllocator.  Note that a page_info struct is expected at the
 // beginning of each page as well, and therefore this structure contains a
 // page_info as its *first* field.
 struct small_object_page_info {
   page_info info;  // Must be the first field.
 
   // Doubly linked list for traversing all pages allocated by a
-  // LinkerSmallObjectAllocator.
+  // BionicSmallObjectAllocator.
   small_object_page_info* next_page;
   small_object_page_info* prev_page;
 
@@ -81,9 +79,9 @@
   size_t free_blocks_cnt;
 };
 
-class LinkerSmallObjectAllocator {
+class BionicSmallObjectAllocator {
  public:
-  LinkerSmallObjectAllocator(uint32_t type, size_t block_size);
+  BionicSmallObjectAllocator(uint32_t type, size_t block_size);
   void* alloc();
   void free(void* ptr);
 
@@ -103,20 +101,23 @@
   small_object_page_info* page_list_;
 };
 
-class LinkerMemoryAllocator {
+class BionicAllocator {
  public:
-  constexpr LinkerMemoryAllocator() : allocators_(nullptr), allocators_buf_() {}
+  constexpr BionicAllocator() : allocators_(nullptr), allocators_buf_() {}
   void* alloc(size_t size);
+  void* memalign(size_t align, size_t size);
 
   // Note that this implementation of realloc never shrinks allocation
   void* realloc(void* ptr, size_t size);
   void free(void* ptr);
  private:
-  void* alloc_mmap(size_t size);
-  page_info* get_page_info(void* ptr);
-  LinkerSmallObjectAllocator* get_small_object_allocator(uint32_t type);
+  void* alloc_mmap(size_t align, size_t size);
+  inline void* alloc_impl(size_t align, size_t size);
+  inline page_info* get_page_info_unchecked(void* ptr);
+  inline page_info* get_page_info(void* ptr);
+  BionicSmallObjectAllocator* get_small_object_allocator(uint32_t type);
   void initialize_allocators();
 
-  LinkerSmallObjectAllocator* allocators_;
-  uint8_t allocators_buf_[sizeof(LinkerSmallObjectAllocator)*kSmallObjectAllocatorsCount];
+  BionicSmallObjectAllocator* allocators_;
+  uint8_t allocators_buf_[sizeof(BionicSmallObjectAllocator)*kSmallObjectAllocatorsCount];
 };
diff --git a/libc/private/bionic_elf_tls.h b/libc/private/bionic_elf_tls.h
index 09e1958..fa1af76 100644
--- a/libc/private/bionic_elf_tls.h
+++ b/libc/private/bionic_elf_tls.h
@@ -34,6 +34,8 @@
 #include <stdint.h>
 #include <sys/cdefs.h>
 
+__LIBC_HIDDEN__ extern _Atomic(size_t) __libc_tls_generation_copy;
+
 struct TlsSegment {
   size_t size = 0;
   size_t alignment = 1;
@@ -84,6 +86,16 @@
   size_t round_up_with_overflow_check(size_t value, size_t alignment);
 };
 
+static constexpr size_t kTlsGenerationNone = 0;
+static constexpr size_t kTlsGenerationFirst = 1;
+
+// The first ELF TLS module has ID 1. Zero is reserved for the first word of
+// the DTV, a generation count. Unresolved weak symbols also use module ID 0.
+static constexpr size_t kTlsUninitializedModuleId = 0;
+
+static inline size_t __tls_module_id_to_idx(size_t id) { return id - 1; }
+static inline size_t __tls_module_idx_to_id(size_t idx) { return idx + 1; }
+
 // A descriptor for a single ELF TLS module.
 struct TlsModule {
   TlsSegment segment;
@@ -93,7 +105,7 @@
 
   // The generation in which this module was loaded. Dynamic TLS lookups use
   // this field to detect when a module has been unloaded.
-  size_t first_generation = 0;
+  size_t first_generation = kTlsGenerationNone;
 
   // Used by the dynamic linker to track the associated soinfo* object.
   void* soinfo_ptr = nullptr;
@@ -105,9 +117,10 @@
 struct TlsModules {
   constexpr TlsModules() {}
 
-  // A generation counter. The value is incremented each time an solib is loaded
-  // or unloaded.
-  _Atomic(size_t) generation = 0;
+  // A pointer to the TLS generation counter in libc.so. The counter is
+  // incremented each time an solib is loaded or unloaded.
+  _Atomic(size_t) generation = kTlsGenerationFirst;
+  _Atomic(size_t) *generation_libc_so = nullptr;
 
   // Access to the TlsModule[] table requires taking this lock.
   pthread_rwlock_t rwlock = PTHREAD_RWLOCK_INITIALIZER;
@@ -119,3 +132,46 @@
 };
 
 void __init_static_tls(void* static_tls);
+
+// Dynamic Thread Vector. Each thread has a different DTV. For each module
+// (executable or solib), the DTV has a pointer to that module's TLS memory. The
+// DTV is initially empty and is allocated on-demand. It grows as more modules
+// are dlopen'ed. See https://www.akkadia.org/drepper/tls.pdf.
+//
+// The layout of the DTV is specified in various documents, but it is not part
+// of Bionic's public ABI. A compiler can't generate code to access it directly,
+// because it can't access libc's global generation counter.
+struct TlsDtv {
+  // Number of elements in this object's modules field.
+  size_t count;
+
+  // A pointer to an older TlsDtv object that should be freed when the thread
+  // exits. The objects aren't immediately freed because a DTV could be
+  // reallocated by a signal handler that interrupted __tls_get_addr's fast
+  // path.
+  TlsDtv* next;
+
+  // The DTV slot points at this field, which allows omitting an add instruction
+  // on the fast path for a TLS lookup. The arm64 tlsdesc_resolver.S depends on
+  // the layout of fields past this point.
+  size_t generation;
+  void* modules[];
+};
+
+struct TlsIndex {
+  size_t module_id;
+  size_t offset;
+};
+
+#if defined(__i386__)
+#define TLS_GET_ADDR_CCONV __attribute__((regparm(1)))
+#define TLS_GET_ADDR ___tls_get_addr
+#else
+#define TLS_GET_ADDR_CCONV
+#define TLS_GET_ADDR __tls_get_addr
+#endif
+
+extern "C" void* TLS_GET_ADDR(const TlsIndex* ti) TLS_GET_ADDR_CCONV;
+
+struct bionic_tcb;
+void __free_dynamic_tls(bionic_tcb* tcb);
diff --git a/libc/private/bionic_globals.h b/libc/private/bionic_globals.h
index 4d40476..21a2a24 100644
--- a/libc/private/bionic_globals.h
+++ b/libc/private/bionic_globals.h
@@ -33,6 +33,7 @@
 #include <link.h>
 #include <pthread.h>
 
+#include "private/bionic_allocator.h"
 #include "private/bionic_elf_tls.h"
 #include "private/bionic_fdsan.h"
 #include "private/bionic_malloc_dispatch.h"
@@ -70,6 +71,7 @@
 
   StaticTlsLayout static_tls_layout;
   TlsModules tls_modules;
+  BionicAllocator tls_allocator;
 
   // Values passed from the linker to libc.so.
   const char* init_progname = nullptr;
diff --git a/libc/private/bionic_malloc.h b/libc/private/bionic_malloc.h
index a9fa22d..5f4a75d 100644
--- a/libc/private/bionic_malloc.h
+++ b/libc/private/bionic_malloc.h
@@ -32,11 +32,13 @@
 
 // Opcodes for android_mallopt.
 
-// Marks the calling process as a profileable zygote child, possibly
-// initializing profiling infrastructure.
 enum {
+  // Marks the calling process as a profileable zygote child, possibly
+  // initializing profiling infrastructure.
   M_INIT_ZYGOTE_CHILD_PROFILING = 1,
 #define M_INIT_ZYGOTE_CHILD_PROFILING M_INIT_ZYGOTE_CHILD_PROFILING
+  M_RESET_HOOKS = 2,
+#define M_RESET_HOOKS M_RESET_HOOKS
 };
 
 // Manipulates bionic-specific handling of memory allocation APIs such as
diff --git a/libc/symbol_ordering b/libc/symbol_ordering
index 5b365f0..b672b35 100644
--- a/libc/symbol_ordering
+++ b/libc/symbol_ordering
@@ -86,7 +86,6 @@
 __realloc_hook
 __free_hook
 __memalign_hook
-_ZL21g_heapprofd_init_func
 je_malloc_conf
 malloc_initializer
 a0
diff --git a/linker/Android.bp b/linker/Android.bp
index 4991935..5ae09ba 100644
--- a/linker/Android.bp
+++ b/linker/Android.bp
@@ -4,7 +4,6 @@
     recovery_available: true,
 
     srcs: [
-        "linker_allocator.cpp",
         "linker_memory.cpp",
     ],
     cflags: [
diff --git a/linker/linker.cpp b/linker/linker.cpp
index 412b8eb..4dcdf7e 100644
--- a/linker/linker.cpp
+++ b/linker/linker.cpp
@@ -2722,7 +2722,11 @@
     soinfo* lsi = nullptr;
 
     if (sym == 0) {
-      // Do nothing.
+      // By convention in ld.bfd and lld, an omitted symbol on a TLS relocation
+      // is a reference to the current module.
+      if (is_tls_reloc(type)) {
+        lsi = this;
+      }
     } else if (ELF_ST_BIND(symtab_[sym].st_info) == STB_LOCAL && is_tls_reloc(type)) {
       // In certain situations, the Gold linker accesses a TLS symbol using a
       // relocation to an STB_LOCAL symbol in .dynsym of either STT_SECTION or
@@ -2830,6 +2834,11 @@
                    sym_name, get_realpath());
             return false;
           }
+          if (lsi->get_tls() == nullptr) {
+            DL_ERR("TLS relocation refers to symbol \"%s\" in solib \"%s\" with no TLS segment",
+                   sym_name, lsi->get_realpath());
+            return false;
+          }
           sym_addr = s->st_value;
         } else {
           if (ELF_ST_TYPE(s->st_info) == STT_TLS) {
@@ -2916,16 +2925,12 @@
         MARK(rel->r_offset);
         {
           ElfW(Addr) tpoff = 0;
-          if (sym == 0) {
-            // By convention in ld.bfd and lld, an omitted symbol
-            // (ELFW(R_SYM) == 0) refers to the local module.
-            lsi = this;
-          }
           if (lsi == nullptr) {
             // Unresolved weak relocation. Leave tpoff at 0 to resolve
             // &weak_tls_symbol to __get_tls().
-          } else if (soinfo_tls* lsi_tls = lsi->get_tls()) {
-            const TlsModule& mod = get_tls_module(lsi_tls->module_id);
+          } else {
+            CHECK(lsi->get_tls() != nullptr); // We rejected a missing TLS segment above.
+            const TlsModule& mod = get_tls_module(lsi->get_tls()->module_id);
             if (mod.static_offset != SIZE_MAX) {
               tpoff += mod.static_offset - tls_tp_base;
             } else {
@@ -2933,10 +2938,6 @@
                      sym_name, lsi->get_realpath(), get_realpath());
               return false;
             }
-          } else {
-            DL_ERR("TLS relocation refers to symbol \"%s\" in solib \"%s\" with no TLS segment",
-                   sym_name, lsi->get_realpath());
-            return false;
           }
           tpoff += sym_addr + addend;
           TRACE_TYPE(RELO, "RELO TLS_TPREL %16p <- %16p %s\n",
@@ -2946,6 +2947,35 @@
         }
         break;
 
+#if !defined(__aarch64__)
+      // Omit support for DTPMOD/DTPREL on arm64, at least until
+      // http://b/123385182 is fixed. arm64 uses TLSDESC instead.
+      case R_GENERIC_TLS_DTPMOD:
+        count_relocation(kRelocRelative);
+        MARK(rel->r_offset);
+        {
+          size_t module_id = 0;
+          if (lsi == nullptr) {
+            // Unresolved weak relocation. Evaluate the module ID to 0.
+          } else {
+            CHECK(lsi->get_tls() != nullptr); // We rejected a missing TLS segment above.
+            module_id = lsi->get_tls()->module_id;
+          }
+          TRACE_TYPE(RELO, "RELO TLS_DTPMOD %16p <- %zu %s\n",
+                     reinterpret_cast<void*>(reloc), module_id, sym_name);
+          *reinterpret_cast<ElfW(Addr)*>(reloc) = module_id;
+        }
+        break;
+      case R_GENERIC_TLS_DTPREL:
+        count_relocation(kRelocRelative);
+        MARK(rel->r_offset);
+        TRACE_TYPE(RELO, "RELO TLS_DTPREL %16p <- %16p %s\n",
+                   reinterpret_cast<void*>(reloc),
+                   reinterpret_cast<void*>(sym_addr + addend), sym_name);
+        *reinterpret_cast<ElfW(Addr)*>(reloc) = sym_addr + addend;
+        break;
+#endif  // !defined(__aarch64__)
+
 #if defined(__aarch64__)
       case R_AARCH64_ABS64:
         count_relocation(kRelocAbsolute);
@@ -3528,13 +3558,14 @@
         // this is parsed after we have strtab initialized (see below).
         break;
 
+      case DT_TLSDESC_GOT:
+      case DT_TLSDESC_PLT:
+        // These DT entries are used for lazy TLSDESC relocations. Bionic
+        // resolves everything eagerly, so these can be ignored.
+        break;
+
       default:
         if (!relocating_linker) {
-          if (d->d_tag == DT_TLSDESC_GOT || d->d_tag == DT_TLSDESC_PLT) {
-            DL_ERR("unsupported ELF TLS DT entry in \"%s\"", get_realpath());
-            return false;
-          }
-
           const char* tag_name;
           if (d->d_tag == DT_RPATH) {
             tag_name = "DT_RPATH";
diff --git a/linker/linker_block_allocator.cpp b/linker/linker_block_allocator.cpp
index dca944e..27f1e38 100644
--- a/linker/linker_block_allocator.cpp
+++ b/linker/linker_block_allocator.cpp
@@ -33,6 +33,9 @@
 #include <sys/prctl.h>
 #include <unistd.h>
 
+static constexpr size_t kAllocateSize = PAGE_SIZE * 100;
+static_assert(kAllocateSize % PAGE_SIZE == 0, "Invalid kAllocateSize.");
+
 // the multiplier should be power of 2
 static constexpr size_t round_up(size_t size, size_t multiplier) {
   return (size + (multiplier - 1)) & ~(multiplier-1);
@@ -40,7 +43,7 @@
 
 struct LinkerBlockAllocatorPage {
   LinkerBlockAllocatorPage* next;
-  uint8_t bytes[PAGE_SIZE - 16] __attribute__((aligned(16)));
+  uint8_t bytes[kAllocateSize - 16] __attribute__((aligned(16)));
 };
 
 struct FreeBlockInfo {
@@ -52,7 +55,8 @@
   : block_size_(
       round_up(block_size < sizeof(FreeBlockInfo) ? sizeof(FreeBlockInfo) : block_size, 16)),
     page_list_(nullptr),
-    free_block_list_(nullptr)
+    free_block_list_(nullptr),
+    allocated_(0)
 {}
 
 void* LinkerBlockAllocator::alloc() {
@@ -73,6 +77,8 @@
 
   memset(block_info, 0, block_size_);
 
+  ++allocated_;
+
   return block_info;
 }
 
@@ -101,32 +107,37 @@
   block_info->num_free_blocks = 1;
 
   free_block_list_ = block_info;
+
+  --allocated_;
+  if (allocated_ == 0) {
+    free_all_pages();
+  }
 }
 
 void LinkerBlockAllocator::protect_all(int prot) {
   for (LinkerBlockAllocatorPage* page = page_list_; page != nullptr; page = page->next) {
-    if (mprotect(page, PAGE_SIZE, prot) == -1) {
+    if (mprotect(page, kAllocateSize, prot) == -1) {
       abort();
     }
   }
 }
 
 void LinkerBlockAllocator::create_new_page() {
-  static_assert(sizeof(LinkerBlockAllocatorPage) == PAGE_SIZE,
+  static_assert(sizeof(LinkerBlockAllocatorPage) == kAllocateSize,
                 "Invalid sizeof(LinkerBlockAllocatorPage)");
 
   LinkerBlockAllocatorPage* page = reinterpret_cast<LinkerBlockAllocatorPage*>(
-      mmap(nullptr, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0));
+      mmap(nullptr, kAllocateSize, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0));
 
   if (page == MAP_FAILED) {
     abort(); // oom
   }
 
-  prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, page, PAGE_SIZE, "linker_alloc");
+  prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, page, kAllocateSize, "linker_alloc");
 
   FreeBlockInfo* first_block = reinterpret_cast<FreeBlockInfo*>(page->bytes);
   first_block->next_block = free_block_list_;
-  first_block->num_free_blocks = (PAGE_SIZE - sizeof(LinkerBlockAllocatorPage*))/block_size_;
+  first_block->num_free_blocks = (kAllocateSize - sizeof(LinkerBlockAllocatorPage*))/block_size_;
 
   free_block_list_ = first_block;
 
@@ -142,7 +153,7 @@
   LinkerBlockAllocatorPage* page = page_list_;
   while (page != nullptr) {
     const uint8_t* page_ptr = reinterpret_cast<const uint8_t*>(page);
-    if (block >= (page_ptr + sizeof(page->next)) && block < (page_ptr + PAGE_SIZE)) {
+    if (block >= (page_ptr + sizeof(page->next)) && block < (page_ptr + kAllocateSize)) {
       return page;
     }
 
@@ -151,3 +162,18 @@
 
   abort();
 }
+
+void LinkerBlockAllocator::free_all_pages() {
+  if (allocated_) {
+    abort();
+  }
+
+  LinkerBlockAllocatorPage* page = page_list_;
+  while (page) {
+    LinkerBlockAllocatorPage* next = page->next;
+    munmap(page, kAllocateSize);
+    page = next;
+  }
+  page_list_ = nullptr;
+  free_block_list_ = nullptr;
+}
diff --git a/linker/linker_block_allocator.h b/linker/linker_block_allocator.h
index bd44fc8..458d092 100644
--- a/linker/linker_block_allocator.h
+++ b/linker/linker_block_allocator.h
@@ -53,10 +53,12 @@
  private:
   void create_new_page();
   LinkerBlockAllocatorPage* find_page(void* block);
+  void free_all_pages();
 
   size_t block_size_;
   LinkerBlockAllocatorPage* page_list_;
   void* free_block_list_;
+  size_t allocated_;
 
   DISALLOW_COPY_AND_ASSIGN(LinkerBlockAllocator);
 };
@@ -66,17 +68,18 @@
  * of a single fixed-size type. Allocations are backed by page-sized private
  * anonymous mmaps.
  *
- * The differences between this allocator and LinkerMemoryAllocator are:
- * 1. This allocator manages space more efficiently. LinkerMemoryAllocator
- *    operates in power-of-two sized blocks up to 1k, when this implementation
- *    splits the page to aligned size of structure; For example for structures
- *    with size 513 this allocator will use 516 (520 for lp64) bytes of data
- *    where generalized implementation is going to use 1024 sized blocks.
+ * The differences between this allocator and BionicAllocator are:
+ * 1. This allocator manages space more efficiently. BionicAllocator operates in
+ *    power-of-two sized blocks up to 1k, when this implementation splits the
+ *    page to aligned size of structure; For example for structures with size
+ *    513 this allocator will use 516 (520 for lp64) bytes of data where
+ *    generalized implementation is going to use 1024 sized blocks.
  *
- * 2. This allocator does not munmap allocated memory, where LinkerMemoryAllocator does.
+ * 2. Unless all allocated memory is freed, this allocator does not munmap
+ *    allocated memory, where BionicAllocator does.
  *
- * 3. This allocator provides mprotect services to the user, where LinkerMemoryAllocator
- *    always treats it's memory as READ|WRITE.
+ * 3. This allocator provides mprotect services to the user, where BionicAllocator
+ *    always treats its memory as READ|WRITE.
  */
 template<typename T>
 class LinkerTypeAllocator {
diff --git a/linker/linker_config.cpp b/linker/linker_config.cpp
index 0e75c85..5a728d3 100644
--- a/linker/linker_config.cpp
+++ b/linker/linker_config.cpp
@@ -41,6 +41,7 @@
 
 #include <limits.h>
 #include <stdlib.h>
+#include <unistd.h>
 
 #include <string>
 #include <unordered_map>
@@ -238,9 +239,17 @@
       // If the path can be resolved, resolve it
       char buf[PATH_MAX];
       std::string resolved_path;
-      if (realpath(value.c_str(), buf)) {
+      if (access(value.c_str(), R_OK) != 0) {
+        if (errno == ENOENT) {
+          // no need to test for non-existing path. skip.
+          continue;
+        }
+        // If not accessible, don't call realpath as it will just cause
+        // SELinux denial spam. Use the path unresolved.
+        resolved_path = value;
+      } else if (realpath(value.c_str(), buf)) {
         resolved_path = buf;
-      } else if (errno != ENOENT)  {
+      } else {
         // realpath is expected to fail with EPERM in some situations, so log
         // the failure with INFO rather than DL_WARN. e.g. A binary in
         // /data/local/tmp may attempt to stat /postinstall. See
@@ -251,9 +260,6 @@
              value.c_str(),
              strerror(errno));
         resolved_path = value;
-      } else {
-        // ENOENT: no need to test if binary is under the path
-        continue;
       }
 
       if (file_is_under_dir(binary_realpath, resolved_path)) {
diff --git a/linker/linker_memory.cpp b/linker/linker_memory.cpp
index f2cce01..ce29997 100644
--- a/linker/linker_memory.cpp
+++ b/linker/linker_memory.cpp
@@ -26,7 +26,7 @@
  * SUCH DAMAGE.
  */
 
-#include "linker_allocator.h"
+#include "private/bionic_allocator.h"
 
 #include <stdlib.h>
 #include <sys/cdefs.h>
@@ -36,7 +36,7 @@
 
 #include <async_safe/log.h>
 
-static LinkerMemoryAllocator g_linker_allocator;
+static BionicAllocator g_bionic_allocator;
 static std::atomic<pid_t> fallback_tid(0);
 
 // Used by libdebuggerd_handler to switch allocators during a crash dump, in
@@ -56,16 +56,16 @@
   }
 }
 
-static LinkerMemoryAllocator& get_fallback_allocator() {
-  static LinkerMemoryAllocator fallback_allocator;
+static BionicAllocator& get_fallback_allocator() {
+  static BionicAllocator fallback_allocator;
   return fallback_allocator;
 }
 
-static LinkerMemoryAllocator& get_allocator() {
+static BionicAllocator& get_allocator() {
   if (__predict_false(fallback_tid) && __predict_false(gettid() == fallback_tid)) {
     return get_fallback_allocator();
   }
-  return g_linker_allocator;
+  return g_bionic_allocator;
 }
 
 void* malloc(size_t byte_count) {
diff --git a/linker/linker_soinfo.h b/linker/linker_soinfo.h
index 14571de..3499cf7 100644
--- a/linker/linker_soinfo.h
+++ b/linker/linker_soinfo.h
@@ -35,6 +35,7 @@
 
 #include "private/bionic_elf_tls.h"
 #include "linker_namespaces.h"
+#include "linker_tls.h"
 
 #define FLAG_LINKED           0x00000001
 #define FLAG_EXE              0x00000004 // The main executable
@@ -102,14 +103,9 @@
 // TODO(dimitry): remove reference from soinfo member functions to this class.
 class VersionTracker;
 
-// The first ELF TLS module has ID 1. Zero is reserved for the first word of
-// the DTV, a generation count, and unresolved weak symbols also use module
-// ID 0.
-static constexpr size_t kUninitializedModuleId = 0;
-
 struct soinfo_tls {
   TlsSegment segment;
-  size_t module_id = kUninitializedModuleId;
+  size_t module_id = kTlsUninitializedModuleId;
 };
 
 #if defined(__work_around_b_24465209__)
diff --git a/linker/linker_tls.cpp b/linker/linker_tls.cpp
index 0d1796b..a3aa9bf 100644
--- a/linker/linker_tls.cpp
+++ b/linker/linker_tls.cpp
@@ -31,6 +31,7 @@
 #include <vector>
 
 #include "private/ScopedRWLock.h"
+#include "private/ScopedSignalBlocker.h"
 #include "private/bionic_defs.h"
 #include "private/bionic_elf_tls.h"
 #include "private/bionic_globals.h"
@@ -41,9 +42,6 @@
 static bool g_static_tls_finished;
 static std::vector<TlsModule> g_tls_modules;
 
-static inline size_t module_id_to_idx(size_t id) { return id - 1; }
-static inline size_t module_idx_to_id(size_t idx) { return idx + 1; }
-
 static size_t get_unused_module_index() {
   for (size_t i = 0; i < g_tls_modules.size(); ++i) {
     if (g_tls_modules[i].soinfo_ptr == nullptr) {
@@ -57,37 +55,47 @@
 }
 
 static void register_tls_module(soinfo* si, size_t static_offset) {
+  TlsModules& libc_modules = __libc_shared_globals()->tls_modules;
+
   // The global TLS module table points at the std::vector of modules declared
   // in this file, so acquire a write lock before modifying the std::vector.
-  ScopedWriteLock locker(&__libc_shared_globals()->tls_modules.rwlock);
+  ScopedSignalBlocker ssb;
+  ScopedWriteLock locker(&libc_modules.rwlock);
 
   size_t module_idx = get_unused_module_index();
 
   soinfo_tls* si_tls = si->get_tls();
-  si_tls->module_id = module_idx_to_id(module_idx);
+  si_tls->module_id = __tls_module_idx_to_id(module_idx);
+
+  const size_t new_generation = ++libc_modules.generation;
+  __libc_tls_generation_copy = new_generation;
+  if (libc_modules.generation_libc_so != nullptr) {
+    *libc_modules.generation_libc_so = new_generation;
+  }
 
   g_tls_modules[module_idx] = {
     .segment = si_tls->segment,
     .static_offset = static_offset,
-    .first_generation = ++__libc_shared_globals()->tls_modules.generation,
+    .first_generation = new_generation,
     .soinfo_ptr = si,
   };
 }
 
 static void unregister_tls_module(soinfo* si) {
+  ScopedSignalBlocker ssb;
   ScopedWriteLock locker(&__libc_shared_globals()->tls_modules.rwlock);
 
   soinfo_tls* si_tls = si->get_tls();
-  TlsModule& mod = g_tls_modules[module_id_to_idx(si_tls->module_id)];
+  TlsModule& mod = g_tls_modules[__tls_module_id_to_idx(si_tls->module_id)];
   CHECK(mod.static_offset == SIZE_MAX);
   CHECK(mod.soinfo_ptr == si);
   mod = {};
-  si_tls->module_id = kUninitializedModuleId;
+  si_tls->module_id = kTlsUninitializedModuleId;
 }
 
 // The reference is valid until a TLS module is registered or unregistered.
 const TlsModule& get_tls_module(size_t module_id) {
-  size_t module_idx = module_id_to_idx(module_id);
+  size_t module_idx = __tls_module_id_to_idx(module_id);
   CHECK(module_idx < g_tls_modules.size());
   return g_tls_modules[module_idx];
 }
@@ -123,7 +131,7 @@
 
 void register_soinfo_tls(soinfo* si) {
   soinfo_tls* si_tls = si->get_tls();
-  if (si_tls == nullptr || si_tls->module_id != kUninitializedModuleId) {
+  if (si_tls == nullptr || si_tls->module_id != kTlsUninitializedModuleId) {
     return;
   }
   size_t static_offset = SIZE_MAX;
@@ -136,7 +144,7 @@
 
 void unregister_soinfo_tls(soinfo* si) {
   soinfo_tls* si_tls = si->get_tls();
-  if (si_tls == nullptr || si_tls->module_id == kUninitializedModuleId) {
+  if (si_tls == nullptr || si_tls->module_id == kTlsUninitializedModuleId) {
     return;
   }
   return unregister_tls_module(si);
diff --git a/linker/tests/Android.mk b/linker/tests/Android.mk
index 9268e31..63e0555 100644
--- a/linker/tests/Android.mk
+++ b/linker/tests/Android.mk
@@ -43,10 +43,8 @@
   linker_config_test.cpp \
   linker_globals.cpp \
   linked_list_test.cpp \
-  linker_memory_allocator_test.cpp \
   linker_sleb128_test.cpp \
   linker_utils_test.cpp \
-  ../linker_allocator.cpp \
   ../linker_block_allocator.cpp \
   ../linker_config.cpp \
   ../linker_utils.cpp \
diff --git a/tests/Android.bp b/tests/Android.bp
index 8ac0531..8b921d8 100644
--- a/tests/Android.bp
+++ b/tests/Android.bp
@@ -384,7 +384,7 @@
                 "libdl_test.cpp",
             ],
             static_libs: [
-                "libpagemap",
+                "libmeminfo",
                 "libziparchive",
                 "libLLVMObject",
                 "libLLVMBitReader",
@@ -469,7 +469,7 @@
             static_libs: [
                 // The order of these libraries matters, do not shuffle them.
                 "libbase",
-                "libpagemap",
+                "libmeminfo",
                 "libziparchive",
                 "libz",
                 "libutils",
@@ -661,6 +661,13 @@
         "gtest_preinit_debuggerd.cpp",
         "gtest_globals.cpp",
         "gtest_main.cpp",
+
+        // The Bionic allocator has its own C++ API. It isn't packaged into its
+        // own library, so it can only be tested when it's part of libc.a.
+        "bionic_allocator_test.cpp",
+    ],
+    include_dirs: [
+        "bionic/libc",
     ],
     whole_static_libs: [
         "libBionicTests",
diff --git a/linker/tests/linker_memory_allocator_test.cpp b/tests/bionic_allocator_test.cpp
similarity index 75%
rename from linker/tests/linker_memory_allocator_test.cpp
rename to tests/bionic_allocator_test.cpp
index c284eaa..f710907 100644
--- a/linker/tests/linker_memory_allocator_test.cpp
+++ b/tests/bionic_allocator_test.cpp
@@ -32,7 +32,7 @@
 
 #include <gtest/gtest.h>
 
-#include "../linker_allocator.h"
+#include "private/bionic_allocator.h"
 
 #include <unistd.h>
 
@@ -61,20 +61,20 @@
 
 static size_t kPageSize = sysconf(_SC_PAGE_SIZE);
 
-TEST(linker_memory, test_alloc_0) {
-  LinkerMemoryAllocator allocator;
+TEST(bionic_allocator, test_alloc_0) {
+  BionicAllocator allocator;
   void* ptr = allocator.alloc(0);
   ASSERT_TRUE(ptr != nullptr);
   allocator.free(ptr);
 }
 
-TEST(linker_memory, test_free_nullptr) {
-  LinkerMemoryAllocator allocator;
+TEST(bionic_allocator, test_free_nullptr) {
+  BionicAllocator allocator;
   allocator.free(nullptr);
 }
 
-TEST(linker_memory, test_realloc) {
-  LinkerMemoryAllocator allocator;
+TEST(bionic_allocator, test_realloc) {
+  BionicAllocator allocator;
   uint32_t* array = reinterpret_cast<uint32_t*>(allocator.alloc(512));
   const size_t array_size = 512 / sizeof(uint32_t);
 
@@ -127,8 +127,8 @@
   ASSERT_EQ(nullptr, allocator.realloc(reallocated_ptr, 0));
 }
 
-TEST(linker_memory, test_small_smoke) {
-  LinkerMemoryAllocator allocator;
+TEST(bionic_allocator, test_small_smoke) {
+  BionicAllocator allocator;
 
   uint8_t zeros[16];
   memset(zeros, 0, sizeof(zeros));
@@ -150,8 +150,8 @@
   allocator.free(ptr2);
 }
 
-TEST(linker_memory, test_huge_smoke) {
-  LinkerMemoryAllocator allocator;
+TEST(bionic_allocator, test_huge_smoke) {
+  BionicAllocator allocator;
 
   // this should trigger proxy-to-mmap
   test_struct_huge* ptr1 =
@@ -170,8 +170,8 @@
   allocator.free(ptr1);
 }
 
-TEST(linker_memory, test_large) {
-  LinkerMemoryAllocator allocator;
+TEST(bionic_allocator, test_large) {
+  BionicAllocator allocator;
 
   test_struct_large* ptr1 =
       reinterpret_cast<test_struct_large*>(allocator.alloc(sizeof(test_struct_large)));
@@ -212,4 +212,49 @@
   allocator.free(ptr_to_free);
 }
 
+TEST(bionic_allocator, test_memalign_small) {
+  BionicAllocator allocator;
+  void* ptr;
 
+  // simple case
+  ptr = allocator.memalign(0x100, 0x100);
+  ASSERT_TRUE(ptr != nullptr);
+  ASSERT_EQ(0U, reinterpret_cast<uintptr_t>(ptr) % 0x100);
+  allocator.free(ptr);
+
+  // small objects are automatically aligned to their size.
+  ptr = allocator.alloc(0x200);
+  ASSERT_TRUE(ptr != nullptr);
+  ASSERT_EQ(0U, reinterpret_cast<uintptr_t>(ptr) % 0x200);
+  allocator.free(ptr);
+
+  // the size (0x10) is bumped up to the alignment (0x100)
+  ptr = allocator.memalign(0x100, 0x10);
+  ASSERT_TRUE(ptr != nullptr);
+  ASSERT_EQ(0U, reinterpret_cast<uintptr_t>(ptr) % 0x100);
+  allocator.free(ptr);
+}
+
+TEST(bionic_allocator, test_memalign_large) {
+  BionicAllocator allocator;
+  void* ptr;
+
+  // a large object with alignment < PAGE_SIZE
+  ptr = allocator.memalign(0x100, 0x2000);
+  ASSERT_TRUE(ptr != nullptr);
+  ASSERT_EQ(0U, reinterpret_cast<uintptr_t>(ptr) % 0x100);
+  allocator.free(ptr);
+
+  // a large object with alignment == PAGE_SIZE
+  ptr = allocator.memalign(0x1000, 0x2000);
+  ASSERT_TRUE(ptr != nullptr);
+  ASSERT_EQ(0U, reinterpret_cast<uintptr_t>(ptr) % 0x1000);
+  allocator.free(ptr);
+
+  // A large object with alignment > PAGE_SIZE is only guaranteed to have page
+  // alignment.
+  ptr = allocator.memalign(0x2000, 0x4000);
+  ASSERT_TRUE(ptr != nullptr);
+  ASSERT_EQ(0U, reinterpret_cast<uintptr_t>(ptr) % 0x1000);
+  allocator.free(ptr);
+}
diff --git a/tests/dlext_test.cpp b/tests/dlext_test.cpp
index 34013a7..c9ecd2e 100644
--- a/tests/dlext_test.cpp
+++ b/tests/dlext_test.cpp
@@ -36,7 +36,7 @@
 #include <sys/vfs.h>
 #include <sys/wait.h>
 
-#include <pagemap/pagemap.h>
+#include <meminfo/procmeminfo.h>
 #include <ziparchive/zip_archive.h>
 
 #include "gtest_globals.h"
@@ -488,33 +488,23 @@
 
 void GetPss(bool shared_relro, const char* lib, const char* relro_file, pid_t pid,
             size_t* total_pss) {
-  pm_kernel_t* kernel;
-  ASSERT_EQ(0, pm_kernel_create(&kernel));
-
-  pm_process_t* process;
-  ASSERT_EQ(0, pm_process_create(kernel, pid, &process));
-
-  pm_map_t** maps;
-  size_t num_maps;
-  ASSERT_EQ(0, pm_process_maps(process, &maps, &num_maps));
+  android::meminfo::ProcMemInfo proc_mem(pid);
+  const std::vector<android::meminfo::Vma>& maps = proc_mem.Maps();
+  ASSERT_GT(maps.size(), 0UL);
 
   // Calculate total PSS of the library.
   *total_pss = 0;
   bool saw_relro_file = false;
-  for (size_t i = 0; i < num_maps; ++i) {
-    if (android::base::EndsWith(maps[i]->name, lib) || strcmp(maps[i]->name, relro_file) == 0) {
-      if (strcmp(maps[i]->name, relro_file) == 0) saw_relro_file = true;
+  for (auto& vma : maps) {
+    if (android::base::EndsWith(vma.name, lib) || (vma.name == relro_file)) {
+      if (vma.name == relro_file) {
+          saw_relro_file = true;
+      }
 
-      pm_memusage_t usage;
-      ASSERT_EQ(0, pm_map_usage(maps[i], &usage));
-      *total_pss += usage.pss;
+      *total_pss += vma.usage.pss;
     }
   }
 
-  free(maps);
-  pm_process_destroy(process);
-  pm_kernel_destroy(kernel);
-
   if (shared_relro) ASSERT_TRUE(saw_relro_file);
 }
 
diff --git a/tests/dlfcn_test.cpp b/tests/dlfcn_test.cpp
index 176a6f8..e3ba227 100644
--- a/tests/dlfcn_test.cpp
+++ b/tests/dlfcn_test.cpp
@@ -1083,10 +1083,16 @@
 }
 
 TEST(dlfcn, dlopen_library_with_ELF_TLS) {
+// TODO: Remove this test. Once ELF TLS is implemented, this test will be
+// replaced with a larger set of tests. Removing the test requires matching CLs
+// in CTS and in internal test suites.
+#if 0
   dlerror(); // Clear any pending errors.
   void* handle = dlopen("libelf-tls-library.so", RTLD_NOW);
   ASSERT_TRUE(handle == nullptr);
   ASSERT_SUBSTR("unknown reloc type ", dlerror());
+#endif
+  GTEST_LOG_(INFO) << "This test is disabled pending replacement with dynamic ELF TLS tests.\n";
 }
 
 TEST(dlfcn, dlopen_bad_flags) {
diff --git a/tests/signal_test.cpp b/tests/signal_test.cpp
index dd27aef..77b004f 100644
--- a/tests/signal_test.cpp
+++ b/tests/signal_test.cpp
@@ -392,11 +392,19 @@
   static uint64_t sigset;
   struct sigaction sa = {};
   sa.sa_handler = [](int) { sigset = GetSignalMask(); };
+  sa.sa_flags = SA_ONSTACK | SA_NODEFER;
   sigfillset(&sa.sa_mask);
   sigaction(SIGUSR1, &sa, nullptr);
   raise(SIGUSR1);
-  ASSERT_NE(0ULL, sigset);
-  TestSignalMaskFiltered(sigset);
+
+  // On LP32, struct sigaction::sa_mask is only 32-bits wide.
+  unsigned long expected_sigset = ~0UL;
+
+  // SIGKILL and SIGSTOP are always blocked.
+  expected_sigset &= ~(1UL << (SIGKILL - 1));
+  expected_sigset &= ~(1UL << (SIGSTOP - 1));
+
+  ASSERT_EQ(static_cast<uint64_t>(expected_sigset), sigset);
 }
 
 TEST(signal, sigaction64_filter) {
@@ -404,11 +412,18 @@
   static uint64_t sigset;
   struct sigaction64 sa = {};
   sa.sa_handler = [](int) { sigset = GetSignalMask(); };
+  sa.sa_flags = SA_ONSTACK | SA_NODEFER;
   sigfillset64(&sa.sa_mask);
   sigaction64(SIGUSR1, &sa, nullptr);
   raise(SIGUSR1);
-  ASSERT_NE(0ULL, sigset);
-  TestSignalMaskFiltered(sigset);
+
+  uint64_t expected_sigset = ~0ULL;
+
+  // SIGKILL and SIGSTOP are always blocked.
+  expected_sigset &= ~(1ULL << (SIGKILL - 1));
+  expected_sigset &= ~(1ULL << (SIGSTOP - 1));
+
+  ASSERT_EQ(expected_sigset, sigset);
 }
 
 TEST(signal, sigprocmask_setmask_filter) {