Merge "linker: Process RELR relocations before ANDROID_REL[A]." into main
diff --git a/libc/NOTICE b/libc/NOTICE
index 91cd335..dfd93ff 100644
--- a/libc/NOTICE
+++ b/libc/NOTICE
@@ -4640,7 +4640,9 @@
 
 SPDX-License-Identifier: BSD-2-Clause
 
-Copyright (c)1999 Citrus Project,
+Copyright (c) 2017, 2018 Dell EMC
+Copyright (c) 2000, 2001, 2008, 2011, David E. O'Brien
+Copyright (c) 1998 John D. Polstra.
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
@@ -4666,11 +4668,9 @@
 
 -------------------------------------------------------------------
 
-SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+SPDX-License-Identifier: BSD-2-Clause
 
-Copyright (c) 2017, 2018 Dell EMC
-Copyright (c) 2000, 2001, 2008, 2011, David E. O'Brien
-Copyright (c) 1998 John D. Polstra.
+Copyright (c)1999 Citrus Project,
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/libc/arch-arm64/dynamic_function_dispatch.cpp b/libc/arch-arm64/dynamic_function_dispatch.cpp
index b9f657b..450138c 100644
--- a/libc/arch-arm64/dynamic_function_dispatch.cpp
+++ b/libc/arch-arm64/dynamic_function_dispatch.cpp
@@ -65,6 +65,16 @@
     }
 }
 
+typedef int memrchr_func(const void*, int, size_t);
+DEFINE_IFUNC_FOR(memrchr) {
+    RETURN_FUNC(memrchr_func, __memrchr_aarch64);
+}
+
+typedef int memset_func(void*, int, size_t);
+DEFINE_IFUNC_FOR(memset) {
+    RETURN_FUNC(memset_func, __memset_aarch64);
+}
+
 typedef char* stpcpy_func(char*, const char*, size_t);
 DEFINE_IFUNC_FOR(stpcpy) {
     // TODO: enable the SVE version.
diff --git a/libc/arch-arm64/static_function_dispatch.S b/libc/arch-arm64/static_function_dispatch.S
index c7557f8..18c3783 100644
--- a/libc/arch-arm64/static_function_dispatch.S
+++ b/libc/arch-arm64/static_function_dispatch.S
@@ -37,6 +37,8 @@
 FUNCTION_DELEGATE(memcmp, __memcmp_aarch64)
 FUNCTION_DELEGATE(memcpy, __memcpy_aarch64)
 FUNCTION_DELEGATE(memmove, __memmove_aarch64)
+FUNCTION_DELEGATE(memrchr, __memrchr_aarch64)
+FUNCTION_DELEGATE(memset, __memset_aarch64)
 FUNCTION_DELEGATE(stpcpy, __stpcpy_aarch64)
 FUNCTION_DELEGATE(strchr, __strchr_aarch64_mte)
 FUNCTION_DELEGATE(strchrnul, __strchrnul_aarch64_mte)
diff --git a/libc/bionic/bionic_elf_tls.cpp b/libc/bionic/bionic_elf_tls.cpp
index 077f310..a053c27 100644
--- a/libc/bionic/bionic_elf_tls.cpp
+++ b/libc/bionic/bionic_elf_tls.cpp
@@ -60,11 +60,18 @@
   for (size_t i = 0; i < phdr_count; ++i) {
     const ElfW(Phdr)& phdr = phdr_table[i];
     if (phdr.p_type == PT_TLS) {
-      *out = TlsSegment {
-        phdr.p_memsz,
-        phdr.p_align,
-        reinterpret_cast<void*>(load_bias + phdr.p_vaddr),
-        phdr.p_filesz,
+      *out = TlsSegment{
+          .aligned_size =
+              TlsAlignedSize{
+                  .size = phdr.p_memsz,
+                  .align =
+                      TlsAlign{
+                          .value = phdr.p_align ?: 1,  // 0 means "no alignment requirement"
+                          .skew = phdr.p_vaddr % MAX(1, phdr.p_align),
+                      },
+              },
+          .init_ptr = reinterpret_cast<void*>(load_bias + phdr.p_vaddr),
+          .init_size = phdr.p_filesz,
       };
       return true;
     }
@@ -72,114 +79,171 @@
   return false;
 }
 
-// Return true if the alignment of a TLS segment is a valid power-of-two. Also
-// cap the alignment if it's too high.
-bool __bionic_check_tls_alignment(size_t* alignment) {
-  // N.B. The size does not need to be a multiple of the alignment. With
-  // ld.bfd (or after using binutils' strip), the TLS segment's size isn't
-  // rounded up.
-  if (*alignment == 0 || !powerof2(*alignment)) {
-    return false;
-  }
-  // Bionic only respects TLS alignment up to one page.
-  *alignment = MIN(*alignment, page_size());
-  return true;
+// Return true if the alignment of a TLS segment is a valid power-of-two.
+bool __bionic_check_tls_align(size_t align) {
+  // Note: The size does not need to be a multiple of the alignment. With ld.bfd
+  // (or after using binutils' strip), the TLS segment's size isn't rounded up.
+  return powerof2(align);
+}
+
+static void static_tls_layout_overflow() {
+  async_safe_fatal("error: TLS segments in static TLS overflowed");
+}
+
+static size_t align_checked(size_t value, TlsAlign tls_align) {
+  const size_t align = tls_align.value;
+  const size_t skew = tls_align.skew;
+  CHECK(align != 0 && powerof2(align + 0) && skew < align);
+  const size_t result = ((value - skew + align - 1) & ~(align - 1)) + skew;
+  if (result < value) static_tls_layout_overflow();
+  return result;
 }
 
 size_t StaticTlsLayout::offset_thread_pointer() const {
   return offset_bionic_tcb_ + (-MIN_TLS_SLOT * sizeof(void*));
 }
 
-// Reserves space for the Bionic TCB and the executable's TLS segment. Returns
-// the offset of the executable's TLS segment.
-size_t StaticTlsLayout::reserve_exe_segment_and_tcb(const TlsSegment* exe_segment,
+// Allocates the Bionic TCB and the executable's TLS segment in the static TLS
+// layout, satisfying alignment requirements for both.
+//
+// For an executable's TLS accesses (using the LocalExec model), the static
+// linker bakes TLS offsets directly into the .text section, so the loader must
+// place the executable segment at the same offset relative to the TP.
+// Similarly, the Bionic TLS slots (bionic_tcb) must also be allocated at the
+// correct offset relative to the TP.
+//
+// Returns the offset of the executable's TLS segment.
+//
+// Note: This function has unit tests, but they are in bionic-unit-tests-static,
+// not bionic-unit-tests.
+size_t StaticTlsLayout::reserve_exe_segment_and_tcb(const TlsSegment* seg,
                                                     const char* progname __attribute__((unused))) {
   // Special case: if the executable has no TLS segment, then just allocate a
   // TCB and skip the minimum alignment check on ARM.
-  if (exe_segment == nullptr) {
+  if (seg == nullptr) {
     offset_bionic_tcb_ = reserve_type<bionic_tcb>();
     return 0;
   }
 
 #if defined(__arm__) || defined(__aarch64__)
+  // ARM uses a "variant 1" TLS layout. The ABI specifies that the TP points at
+  // a 2-word TCB, followed by the executable's segment. In practice, libc
+  // implementations actually allocate a larger TCB at negative offsets from the
+  // TP.
+  //
+  // Historically, Bionic allocated an 8-word TCB starting at TP+0, so to keep
+  // the executable's TLS segment from overlapping the last 6 slots, Bionic
+  // requires that executables have an 8-word PT_TLS alignment to ensure that
+  // the TCB fits in the alignment padding, which it accomplishes using
+  // crtbegin.c. Bionic uses negative offsets for new TLS slots to avoid this
+  // problem.
 
-  // First reserve enough space for the TCB before the executable segment.
-  reserve(sizeof(bionic_tcb), 1);
+  static_assert(MIN_TLS_SLOT <= 0 && MAX_TLS_SLOT >= 1);
+  static_assert(sizeof(bionic_tcb) == (MAX_TLS_SLOT - MIN_TLS_SLOT + 1) * sizeof(void*));
+  static_assert(alignof(bionic_tcb) == sizeof(void*));
+  const size_t max_align = MAX(alignof(bionic_tcb), seg->aligned_size.align.value);
 
-  // Then reserve the segment itself.
-  const size_t result = reserve(exe_segment->size, exe_segment->alignment);
+  // Allocate the TCB first. Split it into negative and non-negative slots and
+  // ensure that TP (i.e. the first non-negative slot) is aligned to max_align.
+  const size_t tcb_size_pre = -MIN_TLS_SLOT * sizeof(void*);
+  const size_t tcb_size_post = (MAX_TLS_SLOT + 1) * sizeof(void*);
+  const auto pair =
+      reserve_tp_pair(TlsAlignedSize{.size = tcb_size_pre},
+                      TlsAlignedSize{.size = tcb_size_post, .align = TlsAlign{.value = max_align}});
+  offset_bionic_tcb_ = pair.before;
+  const size_t offset_tp = pair.tp;
 
-  // The variant 1 ABI that ARM linkers follow specifies a 2-word TCB between
-  // the thread pointer and the start of the executable's TLS segment, but both
-  // the thread pointer and the TLS segment are aligned appropriately for the
-  // TLS segment. Calculate the distance between the thread pointer and the
-  // EXE's segment.
-  const size_t exe_tpoff = __BIONIC_ALIGN(sizeof(void*) * 2, exe_segment->alignment);
+  // Allocate the segment.
+  offset_exe_ = reserve(seg->aligned_size);
 
-  const size_t min_bionic_alignment = BIONIC_ROUND_UP_POWER_OF_2(MAX_TLS_SLOT) * sizeof(void*);
-  if (exe_tpoff < min_bionic_alignment) {
-    async_safe_fatal("error: \"%s\": executable's TLS segment is underaligned: "
-                     "alignment is %zu, needs to be at least %zu for %s Bionic",
-                     progname, exe_segment->alignment, min_bionic_alignment,
-                     (sizeof(void*) == 4 ? "ARM" : "ARM64"));
+  // Verify that the ABI and Bionic tpoff values are equal, which is equivalent
+  // to checking whether the segment is sufficiently aligned.
+  const size_t abi_tpoff = align_checked(2 * sizeof(void*), seg->aligned_size.align);
+  const size_t actual_tpoff = align_checked(tcb_size_post, seg->aligned_size.align);
+  CHECK(actual_tpoff == offset_exe_ - offset_tp);
+
+  if (abi_tpoff != actual_tpoff) {
+    async_safe_fatal(
+        "error: \"%s\": executable's TLS segment is underaligned: "
+        "alignment is %zu (skew %zu), needs to be at least %zu for %s Bionic",
+        progname, seg->aligned_size.align.value, seg->aligned_size.align.skew, tcb_size_post,
+        (sizeof(void*) == 4 ? "ARM" : "ARM64"));
   }
 
-  offset_bionic_tcb_ = result - exe_tpoff - (-MIN_TLS_SLOT * sizeof(void*));
-  return result;
-
 #elif defined(__i386__) || defined(__x86_64__)
 
-  // x86 uses variant 2 TLS layout. The executable's segment is located just
-  // before the TCB.
-  static_assert(MIN_TLS_SLOT == 0, "First slot of bionic_tcb must be slot #0 on x86");
-  const size_t exe_size = round_up_with_overflow_check(exe_segment->size, exe_segment->alignment);
-  reserve(exe_size, 1);
-  const size_t max_align = MAX(alignof(bionic_tcb), exe_segment->alignment);
-  offset_bionic_tcb_ = reserve(sizeof(bionic_tcb), max_align);
-  return offset_bionic_tcb_ - exe_size;
+  auto pair = reserve_tp_pair(seg->aligned_size, TlsAlignedSize::of_type<bionic_tcb>());
+  offset_exe_ = pair.before;
+  offset_bionic_tcb_ = pair.after;
 
 #elif defined(__riscv)
+  static_assert(MAX_TLS_SLOT == -1, "Last slot of bionic_tcb must be slot #(-1) on riscv");
 
-  // First reserve enough space for the TCB before the executable segment.
-  offset_bionic_tcb_ = reserve(sizeof(bionic_tcb), 1);
-
-  // Then reserve the segment itself.
-  const size_t exe_size = round_up_with_overflow_check(exe_segment->size, exe_segment->alignment);
-  return reserve(exe_size, 1);
+  auto pair = reserve_tp_pair(TlsAlignedSize::of_type<bionic_tcb>(), seg->aligned_size);
+  offset_bionic_tcb_ = pair.before;
+  offset_exe_ = pair.after;
 
 #else
 #error "Unrecognized architecture"
 #endif
+
+  return offset_exe_;
 }
 
-void StaticTlsLayout::reserve_bionic_tls() {
+size_t StaticTlsLayout::reserve_bionic_tls() {
   offset_bionic_tls_ = reserve_type<bionic_tls>();
+  return offset_bionic_tls_;
 }
 
 void StaticTlsLayout::finish_layout() {
   // Round the offset up to the alignment.
-  offset_ = round_up_with_overflow_check(offset_, alignment_);
-
-  if (overflowed_) {
-    async_safe_fatal("error: TLS segments in static TLS overflowed");
-  }
+  cursor_ = align_checked(cursor_, TlsAlign{.value = align_});
 }
 
-// The size is not required to be a multiple of the alignment. The alignment
-// must be a positive power-of-two.
-size_t StaticTlsLayout::reserve(size_t size, size_t alignment) {
-  offset_ = round_up_with_overflow_check(offset_, alignment);
-  const size_t result = offset_;
-  if (__builtin_add_overflow(offset_, size, &offset_)) overflowed_ = true;
-  alignment_ = MAX(alignment_, alignment);
+size_t StaticTlsLayout::align_cursor(TlsAlign align) {
+  cursor_ = align_checked(cursor_, align);
+  align_ = MAX(align_, align.value);
+  return cursor_;
+}
+
+size_t StaticTlsLayout::align_cursor_unskewed(size_t align) {
+  return align_cursor(TlsAlign{.value = align});
+}
+
+// Reserve the requested number of bytes at the requested alignment. The
+// requested size is not required to be a multiple of the alignment, nor is the
+// cursor aligned after the allocation.
+size_t StaticTlsLayout::reserve(TlsAlignedSize aligned_size) {
+  align_cursor(aligned_size.align);
+  const size_t result = cursor_;
+  if (__builtin_add_overflow(cursor_, aligned_size.size, &cursor_)) static_tls_layout_overflow();
   return result;
 }
 
-size_t StaticTlsLayout::round_up_with_overflow_check(size_t value, size_t alignment) {
-  const size_t old_value = value;
-  value = __BIONIC_ALIGN(value, alignment);
-  if (value < old_value) overflowed_ = true;
-  return value;
+// Calculate the TP offset and allocate something before it and something after
+// it. The TP will be aligned to:
+//
+//     MAX(before.align.value, after.align.value)
+//
+// The `before` and `after` allocations are each allocated as closely as
+// possible to the TP.
+StaticTlsLayout::TpAllocations StaticTlsLayout::reserve_tp_pair(TlsAlignedSize before,
+                                                                TlsAlignedSize after) {
+  // Tentative `before` allocation.
+  const size_t tentative_before = reserve(before);
+  const size_t tentative_before_end = align_cursor_unskewed(before.align.value);
+
+  const size_t offset_tp = align_cursor_unskewed(MAX(before.align.value, after.align.value));
+
+  const size_t offset_after = reserve(after);
+
+  // If the `after` allocation has higher alignment than `before`, then there
+  // may be alignment padding to remove between `before` and the TP. Shift
+  // `before` forward to remove this padding.
+  CHECK(((offset_tp - tentative_before_end) & (before.align.value - 1)) == 0);
+  const size_t offset_before = tentative_before + (offset_tp - tentative_before_end);
+
+  return TpAllocations{offset_before, offset_tp, offset_after};
 }
 
 // Copy each TLS module's initialization image into a newly-allocated block of
@@ -309,7 +373,11 @@
   void* mod_ptr = dtv->modules[module_idx];
   if (mod_ptr == nullptr) {
     const TlsSegment& segment = modules.module_table[module_idx].segment;
-    mod_ptr = __libc_shared_globals()->tls_allocator.memalign(segment.alignment, segment.size);
+    // TODO: Currently the aligned_size.align.skew property is ignored.
+    // That is, for a dynamic TLS block at addr A, (A % p_align) will be 0, not
+    // (p_vaddr % p_align).
+    mod_ptr = __libc_shared_globals()->tls_allocator.memalign(segment.aligned_size.align.value,
+                                                              segment.aligned_size.size);
     if (segment.init_size > 0) {
       memcpy(mod_ptr, segment.init_ptr, segment.init_size);
     }
@@ -317,8 +385,8 @@
 
     // Reports the allocation to the listener, if any.
     if (modules.on_creation_cb != nullptr) {
-      modules.on_creation_cb(mod_ptr,
-                             static_cast<void*>(static_cast<char*>(mod_ptr) + segment.size));
+      modules.on_creation_cb(
+          mod_ptr, static_cast<void*>(static_cast<char*>(mod_ptr) + segment.aligned_size.size));
     }
   }
 
diff --git a/libc/bionic/libc_init_static.cpp b/libc/bionic/libc_init_static.cpp
index f091ff8..d86df30 100644
--- a/libc/bionic/libc_init_static.cpp
+++ b/libc/bionic/libc_init_static.cpp
@@ -138,9 +138,9 @@
   static TlsModule mod;
   TlsModules& modules = __libc_shared_globals()->tls_modules;
   if (__bionic_get_tls_segment(phdr_start, phdr_ct, 0, &mod.segment)) {
-    if (!__bionic_check_tls_alignment(&mod.segment.alignment)) {
+    if (!__bionic_check_tls_align(mod.segment.aligned_size.align.value)) {
       async_safe_fatal("error: TLS segment alignment in \"%s\" is not a power of 2: %zu\n",
-                       progname, mod.segment.alignment);
+                       progname, mod.segment.aligned_size.align.value);
     }
     mod.static_offset = layout.reserve_exe_segment_and_tcb(&mod.segment, progname);
     mod.first_generation = kTlsGenerationFirst;
diff --git a/libc/include/android/crash_detail.h b/libc/include/android/crash_detail.h
index 1889f9f..946a3ab 100644
--- a/libc/include/android/crash_detail.h
+++ b/libc/include/android/crash_detail.h
@@ -69,7 +69,7 @@
  * Introduced in API 35.
  *
  * \param name identifying name for this extra data.
- *             this should generally be a human-readable debug string, but we are treating
+ *             this should generally be a human-readable UTF-8 string, but we are treating
  *             it as arbitrary bytes because it could be corrupted by the crash.
  * \param name_size number of bytes of the buffer pointed to by name
  * \param data a buffer containing the extra detail bytes, if null the crash detail
diff --git a/libc/include/bits/elf_common.h b/libc/include/bits/elf_common.h
index 0856f45..13d4fbf 100644
--- a/libc/include/bits/elf_common.h
+++ b/libc/include/bits/elf_common.h
@@ -1,5 +1,5 @@
 /*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ * SPDX-License-Identifier: BSD-2-Clause
  *
  * Copyright (c) 2017, 2018 Dell EMC
  * Copyright (c) 2000, 2001, 2008, 2011, David E. O'Brien
@@ -26,8 +26,6 @@
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
- *
- * $FreeBSD$
  */
 
 #ifndef _SYS_ELF_COMMON_H_
@@ -38,6 +36,26 @@
  */
 
 /*
+ * Note header.  The ".note" section contains an array of notes.  Each
+ * begins with this header, aligned to a word boundary.  Immediately
+ * following the note header is n_namesz bytes of name, padded to the
+ * next word boundary.  Then comes n_descsz bytes of descriptor, again
+ * padded to a word boundary.  The values of n_namesz and n_descsz do
+ * not include the padding.
+ */
+
+#if 0 // android-added
+#if !defined(LOCORE) && !defined(__ASSEMBLER__)
+typedef struct {
+	u_int32_t	n_namesz;	/* Length of name. */
+	u_int32_t	n_descsz;	/* Length of descriptor. */
+	u_int32_t	n_type;		/* Type of this note. */
+} Elf_Note;
+typedef Elf_Note Elf_Nhdr;
+#endif
+#endif // android-added
+
+/*
  * Option kinds.
  */
 #define	ODK_NULL	0	/* undefined */
@@ -92,6 +110,21 @@
 #define	OGP_GROUP	0x0000ffff	/* GP group number */
 #define	OGP_SELF	0x00010000	/* GP group is self-contained */
 
+/*
+ * The header for GNU-style hash sections.
+ */
+
+#if 0 // android-added
+#if !defined(LOCORE) && !defined(__ASSEMBLER__)
+typedef struct {
+	u_int32_t	gh_nbuckets;	/* Number of hash buckets. */
+	u_int32_t	gh_symndx;	/* First visible symbol in .dynsym. */
+	u_int32_t	gh_maskwords;	/* #maskwords used in bloom filter. */
+	u_int32_t	gh_shift2;	/* Bloom filter shift count. */
+} Elf_GNU_Hash_Header;
+#endif
+#endif
+
 /* Indexes into the e_ident array.  Keep synced with
    http://www.sco.com/developers/gabi/latest/ch4.eheader.html */
 #define	EI_MAG0		0	/* Magic number, byte 0. */
@@ -153,7 +186,9 @@
 #define	ELFOSABI_ARM		97	/* ARM */
 #define	ELFOSABI_STANDALONE	255	/* Standalone (embedded) application */
 
+#define	ELFOSABI_SYSV		ELFOSABI_NONE	/* symbol used in old spec */
 #define	ELFOSABI_MONTEREY	ELFOSABI_AIX	/* Monterey */
+#define	ELFOSABI_GNU		ELFOSABI_LINUX
 
 /* e_ident */
 #define	IS_ELF(ehdr)	((ehdr).e_ident[EI_MAG0] == ELFMAG0 && \
@@ -299,6 +334,7 @@
 #define	EF_ARM_EABI_VER3	0x03000000
 #define	EF_ARM_EABI_VER4	0x04000000
 #define	EF_ARM_EABI_VER5	0x05000000
+#define	EF_ARM_EABI_VERSION(x)	((x) & EF_ARM_EABIMASK)
 #define	EF_ARM_INTERWORK	0x00000004
 #define	EF_ARM_APCS_26		0x00000008
 #define	EF_ARM_APCS_FLOAT	0x00000010
@@ -418,12 +454,12 @@
 #define	SHT_HIOS		0x6fffffff	/* Last of OS specific semantics */
 #define	SHT_LOPROC		0x70000000	/* reserved range for processor */
 #define	SHT_X86_64_UNWIND	0x70000001	/* unwind information */
-#define	SHT_AMD64_UNWIND	SHT_X86_64_UNWIND
+#define	SHT_AMD64_UNWIND	SHT_X86_64_UNWIND 
 
 #define	SHT_ARM_EXIDX		0x70000001	/* Exception index table. */
-#define	SHT_ARM_PREEMPTMAP	0x70000002	/* BPABI DLL dynamic linking
+#define	SHT_ARM_PREEMPTMAP	0x70000002	/* BPABI DLL dynamic linking 
 						   pre-emption map. */
-#define	SHT_ARM_ATTRIBUTES	0x70000003	/* Object file compatibility
+#define	SHT_ARM_ATTRIBUTES	0x70000003	/* Object file compatibility 
 						   attributes. */
 #define	SHT_ARM_DEBUGOVERLAY	0x70000004	/* See DBGOVL for details. */
 #define	SHT_ARM_OVERLAYSECTION	0x70000005	/* See DBGOVL for details. */
@@ -499,6 +535,9 @@
 #define	PT_TLS		7	/* Thread local storage segment */
 #define	PT_LOOS		0x60000000	/* First OS-specific. */
 #define	PT_SUNW_UNWIND	0x6464e550	/* amd64 UNWIND program header */
+// android-removed: #define	PT_GNU_EH_FRAME	0x6474e550
+// android-removed: #define	PT_GNU_STACK	0x6474e551
+// android-removed: #define	PT_GNU_RELRO	0x6474e552
 #define	PT_DUMP_DELTA	0x6fb5d000	/* va->pa map for kernel dumps
 					   (currently arm). */
 #define	PT_LOSUNW	0x6ffffffa
@@ -648,11 +687,6 @@
 #define	DT_AARCH64_BTI_PLT		0x70000001
 #define	DT_AARCH64_PAC_PLT		0x70000003
 #define	DT_AARCH64_VARIANT_PCS		0x70000005
-#define DT_AARCH64_MEMTAG_MODE		0x70000009
-#define DT_AARCH64_MEMTAG_HEAP		0x7000000b
-#define DT_AARCH64_MEMTAG_STACK		0x7000000c
-#define DT_AARCH64_MEMTAG_GLOBALS	0x7000000d
-#define DT_AARCH64_MEMTAG_GLOBALSSZ	0x7000000f
 
 #define	DT_ARM_SYMTABSZ			0x70000001
 #define	DT_ARM_PREEMPTMAP		0x70000002
@@ -810,6 +844,7 @@
 
 #define	GNU_PROPERTY_AARCH64_FEATURE_1_AND	0xc0000000
 
+// android-removed: #define	GNU_PROPERTY_AARCH64_FEATURE_1_BTI	0x00000001
 #define	GNU_PROPERTY_AARCH64_FEATURE_1_PAC	0x00000002
 
 #define	GNU_PROPERTY_X86_FEATURE_1_AND		0xc0000002
@@ -918,6 +953,49 @@
 #define	ELFCOMPRESS_LOPROC	0x70000000	/* Processor-specific */
 #define	ELFCOMPRESS_HIPROC	0x7fffffff
 
+#if 0 // android-added
+/* Values for a_type. */
+#define	AT_NULL		0	/* Terminates the vector. */
+#define	AT_IGNORE	1	/* Ignored entry. */
+#define	AT_EXECFD	2	/* File descriptor of program to load. */
+#define	AT_PHDR		3	/* Program header of program already loaded. */
+#define	AT_PHENT	4	/* Size of each program header entry. */
+#define	AT_PHNUM	5	/* Number of program header entries. */
+#define	AT_PAGESZ	6	/* Page size in bytes. */
+#define	AT_BASE		7	/* Interpreter's base address. */
+#define	AT_FLAGS	8	/* Flags. */
+#define	AT_ENTRY	9	/* Where interpreter should transfer control. */
+#define	AT_NOTELF	10	/* Program is not ELF ?? */
+#define	AT_UID		11	/* Real uid. */
+#define	AT_EUID		12	/* Effective uid. */
+#define	AT_GID		13	/* Real gid. */
+#define	AT_EGID		14	/* Effective gid. */
+#define	AT_EXECPATH	15	/* Path to the executable. */
+#define	AT_CANARY	16	/* Canary for SSP. */
+#define	AT_CANARYLEN	17	/* Length of the canary. */
+#define	AT_OSRELDATE	18	/* OSRELDATE. */
+#define	AT_NCPUS	19	/* Number of CPUs. */
+#define	AT_PAGESIZES	20	/* Pagesizes. */
+#define	AT_PAGESIZESLEN	21	/* Number of pagesizes. */
+#define	AT_TIMEKEEP	22	/* Pointer to timehands. */
+#define	AT_STACKPROT	23	/* Initial stack protection. */
+#define	AT_EHDRFLAGS	24	/* e_flags field from elf hdr */
+#define	AT_HWCAP	25	/* CPU feature flags. */
+#define	AT_HWCAP2	26	/* CPU feature flags 2. */
+#define	AT_BSDFLAGS	27	/* ELF BSD Flags. */
+#define	AT_ARGC		28	/* Argument count */
+#define	AT_ARGV		29	/* Argument vector */
+#define	AT_ENVC		30	/* Environment count */
+#define	AT_ENVV		31	/* Environment vector */
+#define	AT_PS_STRINGS	32	/* struct ps_strings */
+#define	AT_FXRNG	33	/* Pointer to root RNG seed version. */
+#define	AT_KPRELOAD	34	/* Base of vdso, preloaded by rtld */
+#define	AT_USRSTACKBASE	35	/* Top of user stack */
+#define	AT_USRSTACKLIM	36	/* Grow limit of user stack */
+
+#define	AT_COUNT	37	/* Count of defined aux entry types. */
+#endif // android-added
+
 /*
  * Relocation types.
  *
@@ -1087,7 +1165,7 @@
 #define	R_IA_64_PCREL22		0x7a	/* immediate22	S + A - P */
 #define	R_IA_64_PCREL64I	0x7b	/* immediate64	S + A - P */
 #define	R_IA_64_IPLTMSB		0x80	/* function descriptor MSB special */
-#define	R_IA_64_IPLTLSB		0x81	/* function descriptor LSB speciaal */
+#define	R_IA_64_IPLTLSB		0x81	/* function descriptor LSB special */
 #define	R_IA_64_SUB		0x85	/* immediate64	A - S */
 #define	R_IA_64_LTOFF22X	0x86	/* immediate22	special */
 #define	R_IA_64_LDXMOV		0x87	/* immediate22	special */
@@ -1248,7 +1326,6 @@
 
 /*
  * RISC-V relocation types.
- * https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#relocations
  */
 
 /* Relocation types used by the dynamic linker. */
@@ -1264,7 +1341,6 @@
 #define	R_RISCV_TLS_DTPREL64	9
 #define	R_RISCV_TLS_TPREL32	10
 #define	R_RISCV_TLS_TPREL64	11
-#define	R_RISCV_TLSDESC    	12
 
 /* Relocation types not used by the dynamic linker. */
 #define	R_RISCV_BRANCH		16
@@ -1292,8 +1368,6 @@
 #define	R_RISCV_SUB16		38
 #define	R_RISCV_SUB32		39
 #define	R_RISCV_SUB64		40
-#define	R_RISCV_GNU_VTINHERIT	41
-#define	R_RISCV_GNU_VTENTRY	42
 #define	R_RISCV_ALIGN		43
 #define	R_RISCV_RVC_BRANCH	44
 #define	R_RISCV_RVC_JUMP	45
@@ -1306,13 +1380,6 @@
 #define	R_RISCV_SET32		56
 #define	R_RISCV_32_PCREL	57
 #define	R_RISCV_IRELATIVE	58
-#define	R_RISCV_PLT32		59
-#define	R_RISCV_SET_ULEB128	60
-#define	R_RISCV_SUB_ULEB128	61
-#define	R_RISCV_TLSDESC_HI20	62
-#define	R_RISCV_TLSDESC_LOAD_LO12 63
-#define	R_RISCV_TLSDESC_ADD_LO12 64
-#define	R_RISCV_TLSDESC_CALL	65
 
 #define	R_SPARC_NONE		0
 #define	R_SPARC_8		1
diff --git a/libc/include/dlfcn.h b/libc/include/dlfcn.h
index a506dc1..a90c4f8 100644
--- a/libc/include/dlfcn.h
+++ b/libc/include/dlfcn.h
@@ -99,7 +99,8 @@
 /**
  * [dlsym(3)](http://man7.org/linux/man-pages/man3/dlsym.3.html)
  * returns a pointer to the symbol with the given name in the shared
- * library represented by the given handle.
+ * library represented by the given handle. The handle may have been
+ * returned from dlopen(), or can be RTLD_DEFAULT or RTLD_NEXT.
  *
  * Returns the address of the symbol on success, and returns NULL on failure,
  * in which case dlerror() can be used to retrieve the specific error.
@@ -109,7 +110,8 @@
 /**
  * [dlvsym(3)](http://man7.org/linux/man-pages/man3/dlvsym.3.html)
  * returns a pointer to the symbol with the given name and version in the shared
- * library represented by the given handle.
+ * library represented by the given handle. The handle may have been
+ * returned from dlopen(), or can be RTLD_DEFAULT or RTLD_NEXT.
  *
  * Returns the address of the symbol on success, and returns NULL on failure,
  * in which case dlerror() can be used to retrieve the specific error.
diff --git a/libc/include/elf.h b/libc/include/elf.h
index 81a50db..1275f2e 100644
--- a/libc/include/elf.h
+++ b/libc/include/elf.h
@@ -202,17 +202,11 @@
 #define DF_1_SINGLETON  0x02000000
 #define DF_1_STUB       0x04000000
 
-/* http://www.sco.com/developers/gabi/latest/ch4.eheader.html */
-#define ELFOSABI_SYSV 0 /* Synonym for ELFOSABI_NONE used by valgrind. */
-#define ELFOSABI_GNU 3 /* Synonym for ELFOSABI_LINUX. */
-
 /* http://www.sco.com/developers/gabi/latest/ch4.reloc.html */
 #define ELF32_R_INFO(sym, type) ((((Elf32_Word)sym) << 8) | ((type) & 0xff))
 #define ELF64_R_INFO(sym, type) ((((Elf64_Xword)sym) << 32) | ((type) & 0xffffffff))
 
 /* http://www.sco.com/developers/gabi/latest/ch4.symtab.html */
-#undef ELF_ST_TYPE
-#define ELF_ST_TYPE(x) ((x) & 0xf)
 #define ELF_ST_INFO(b,t) (((b) << 4) + ((t) & 0xf))
 #define ELF32_ST_INFO(b,t) ELF_ST_INFO(b,t)
 #define ELF64_ST_INFO(b,t) ELF_ST_INFO(b,t)
@@ -260,6 +254,13 @@
 #define DT_ANDROID_RELA 0x60000011 // DT_LOOS + 4
 #define DT_ANDROID_RELASZ 0x60000012 // DT_LOOS + 5
 
+/* TODO: upstreamed to FreeBSD as https://github.com/freebsd/freebsd-src/pull/1141/. */
+#define DT_AARCH64_MEMTAG_MODE 0x70000009
+#define DT_AARCH64_MEMTAG_HEAP 0x7000000b
+#define DT_AARCH64_MEMTAG_STACK 0x7000000c
+#define DT_AARCH64_MEMTAG_GLOBALS 0x7000000d
+#define DT_AARCH64_MEMTAG_GLOBALSSZ 0x7000000f
+
 /* Linux traditionally doesn't have the trailing 64 that BSD has on these. */
 #define R_AARCH64_TLS_DTPREL R_AARCH64_TLS_DTPREL64
 #define R_AARCH64_TLS_DTPMOD R_AARCH64_TLS_DTPMOD64
@@ -269,5 +270,24 @@
 #define R_ARM_TLS_DESC 13
 #define R_ARM_IRELATIVE 160
 
-/* BSD spells this slightly differently to Linux. */
+/* FreeBSD is missing these, found in
+ * https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#relocations
+ * so I've sent https://github.com/freebsd/freebsd-src/pull/1141 upstream.
+ */
+#define R_RISCV_TLSDESC 12
+#define R_RISCV_PLT32 59
+#define R_RISCV_SET_ULEB128 60
+#define R_RISCV_SUB_ULEB128 61
+#define R_RISCV_TLSDESC_HI20 62
+#define R_RISCV_TLSDESC_LOAD_LO12 63
+#define R_RISCV_TLSDESC_ADD_LO12 64
+#define R_RISCV_TLSDESC_CALL 65
+
+/* FreeBSD spells this slightly differently to Linux. Linux is correct according to
+ * https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-elf.adoc#file-header
+ * so I've sent https://github.com/freebsd/freebsd-src/pull/1148 upstream.
+ */
+#define EF_RISCV_FLOAT_ABI EF_RISCV_FLOAT_ABI_MASK
+
+/* FreeBSD spells this slightly differently to Linux. */
 #define R_X86_64_JUMP_SLOT R_X86_64_JMP_SLOT
diff --git a/libc/private/bionic_elf_tls.h b/libc/private/bionic_elf_tls.h
index 79ffcc4..3a7b381 100644
--- a/libc/private/bionic_elf_tls.h
+++ b/libc/private/bionic_elf_tls.h
@@ -36,9 +36,28 @@
 
 __LIBC_HIDDEN__ extern _Atomic(size_t) __libc_tls_generation_copy;
 
-struct TlsSegment {
+struct TlsAlign {
+  size_t value = 1;
+  size_t skew = 0;  // p_vaddr % p_align
+
+  template <typename T>
+  static constexpr TlsAlign of_type() {
+    return TlsAlign{.value = alignof(T)};
+  }
+};
+
+struct TlsAlignedSize {
   size_t size = 0;
-  size_t alignment = 1;
+  TlsAlign align;
+
+  template <typename T>
+  static constexpr TlsAlignedSize of_type() {
+    return TlsAlignedSize{.size = sizeof(T), .align = TlsAlign::of_type<T>()};
+  }
+};
+
+struct TlsSegment {
+  TlsAlignedSize aligned_size;
   const void* init_ptr = "";    // Field is non-null even when init_size is 0.
   size_t init_size = 0;
 };
@@ -46,44 +65,50 @@
 __LIBC_HIDDEN__ bool __bionic_get_tls_segment(const ElfW(Phdr)* phdr_table, size_t phdr_count,
                                               ElfW(Addr) load_bias, TlsSegment* out);
 
-__LIBC_HIDDEN__ bool __bionic_check_tls_alignment(size_t* alignment);
+__LIBC_HIDDEN__ bool __bionic_check_tls_align(size_t align);
 
 struct StaticTlsLayout {
   constexpr StaticTlsLayout() {}
 
-private:
-  size_t offset_ = 0;
-  size_t alignment_ = 1;
-  bool overflowed_ = false;
-
-  // Offsets to various Bionic TLS structs from the beginning of static TLS.
-  size_t offset_bionic_tcb_ = SIZE_MAX;
-  size_t offset_bionic_tls_ = SIZE_MAX;
-
 public:
   size_t offset_bionic_tcb() const { return offset_bionic_tcb_; }
   size_t offset_bionic_tls() const { return offset_bionic_tls_; }
   size_t offset_thread_pointer() const;
+  size_t offset_exe() const { return offset_exe_; }
 
-  size_t size() const { return offset_; }
-  size_t alignment() const { return alignment_; }
-  bool overflowed() const { return overflowed_; }
+  size_t size() const { return cursor_; }
 
   size_t reserve_exe_segment_and_tcb(const TlsSegment* exe_segment, const char* progname);
-  void reserve_bionic_tls();
-  size_t reserve_solib_segment(const TlsSegment& segment) {
-    return reserve(segment.size, segment.alignment);
-  }
+  size_t reserve_bionic_tls();
+  size_t reserve_solib_segment(const TlsSegment& segment) { return reserve(segment.aligned_size); }
   void finish_layout();
 
-private:
-  size_t reserve(size_t size, size_t alignment);
+#if !defined(STATIC_TLS_LAYOUT_TEST)
+ private:
+#endif
+  size_t cursor_ = 0;
+  size_t align_ = 1;
+
+  // Offsets to various Bionic TLS structs from the beginning of static TLS.
+  size_t offset_bionic_tcb_ = SIZE_MAX;
+  size_t offset_bionic_tls_ = SIZE_MAX;
+
+  size_t offset_exe_ = SIZE_MAX;
+
+  struct TpAllocations {
+    size_t before;
+    size_t tp;
+    size_t after;
+  };
+
+  size_t align_cursor(TlsAlign align);
+  size_t align_cursor_unskewed(size_t align);
+  size_t reserve(TlsAlignedSize aligned_size);
+  TpAllocations reserve_tp_pair(TlsAlignedSize before, TlsAlignedSize after);
 
   template <typename T> size_t reserve_type() {
-    return reserve(sizeof(T), alignof(T));
+    return reserve(TlsAlignedSize::of_type<T>());
   }
-
-  size_t round_up_with_overflow_check(size_t value, size_t alignment);
 };
 
 static constexpr size_t kTlsGenerationNone = 0;
diff --git a/linker/linker.cpp b/linker/linker.cpp
index 81869b3..f813c1a 100644
--- a/linker/linker.cpp
+++ b/linker/linker.cpp
@@ -2869,9 +2869,9 @@
     // The loader does not (currently) support ELF TLS, so it shouldn't have
     // a TLS segment.
     CHECK(!relocating_linker && "TLS not supported in loader");
-    if (!__bionic_check_tls_alignment(&tls_segment.alignment)) {
+    if (!__bionic_check_tls_align(tls_segment.aligned_size.align.value)) {
       DL_ERR("TLS segment alignment in \"%s\" is not a power of 2: %zu", get_realpath(),
-             tls_segment.alignment);
+             tls_segment.aligned_size.align.value);
       return false;
     }
     tls_ = std::make_unique<soinfo_tls>();
diff --git a/tests/Android.bp b/tests/Android.bp
index 89d2267..528ccb8 100644
--- a/tests/Android.bp
+++ b/tests/Android.bp
@@ -578,6 +578,9 @@
     include_dirs: [
         "bionic/libc",
     ],
+    static_libs: [
+        "libbase",
+    ],
     shared: {
         enabled: false,
     },
@@ -834,8 +837,10 @@
     data_bins: [
         "cfi_test_helper",
         "cfi_test_helper2",
+        "elftls_align_test_helper",
         "elftls_dlopen_ie_error_helper",
         "elftls_dtv_resize_helper",
+        "elftls_skew_align_test_helper",
         "exec_linker_helper",
         "exec_linker_helper_lib",
         "heap_tagging_async_helper",
@@ -1189,9 +1194,9 @@
         "gtest_globals.cpp",
         "gtest_main.cpp",
 
-        // The Bionic allocator has its own C++ API. It isn't packaged into its
-        // own library, so it can only be tested when it's part of libc.a.
+        // Test internal parts of Bionic that aren't exposed via libc.so.
         "bionic_allocator_test.cpp",
+        "static_tls_layout_test.cpp",
     ],
     include_dirs: [
         "bionic/libc",
@@ -1221,6 +1226,8 @@
         never: true,
     },
     data_bins: [
+        "elftls_align_test_helper",
+        "elftls_skew_align_test_helper",
         "heap_tagging_async_helper",
         "heap_tagging_disabled_helper",
         "heap_tagging_static_async_helper",
diff --git a/tests/elftls_test.cpp b/tests/elftls_test.cpp
index 7c072b6..b3f511e 100644
--- a/tests/elftls_test.cpp
+++ b/tests/elftls_test.cpp
@@ -30,6 +30,9 @@
 
 #include <thread>
 
+#include "gtest_globals.h"
+#include "utils.h"
+
 // Specify the LE access model explicitly. This file is compiled into the
 // bionic-unit-tests executable, but the compiler sees an -fpic object file
 // output into a static library, so it defaults to dynamic TLS accesses.
@@ -87,3 +90,17 @@
     ASSERT_EQ(31, ++tlsvar_general);
   }).join();
 }
+
+TEST(elftls, align_test) {
+  std::string helper = GetTestLibRoot() + "/elftls_align_test_helper";
+  ExecTestHelper eth;
+  eth.SetArgs({helper.c_str(), nullptr});
+  eth.Run([&]() { execve(helper.c_str(), eth.GetArgs(), eth.GetEnv()); }, 0, nullptr);
+}
+
+TEST(elftls, skew_align_test) {
+  std::string helper = GetTestLibRoot() + "/elftls_skew_align_test_helper";
+  ExecTestHelper eth;
+  eth.SetArgs({helper.c_str(), nullptr});
+  eth.Run([&]() { execve(helper.c_str(), eth.GetArgs(), eth.GetEnv()); }, 0, nullptr);
+}
diff --git a/tests/libs/Android.bp b/tests/libs/Android.bp
index f640552..fc7fd40 100644
--- a/tests/libs/Android.bp
+++ b/tests/libs/Android.bp
@@ -156,6 +156,20 @@
     ],
 }
 
+cc_test {
+    name: "elftls_align_test_helper",
+    defaults: ["bionic_testlib_defaults"],
+    srcs: ["elftls_align_test_helper.cpp"],
+    stl: "none", // avoid including extra TLS variables in the executable
+}
+
+cc_test {
+    name: "elftls_skew_align_test_helper",
+    defaults: ["bionic_testlib_defaults"],
+    srcs: ["elftls_skew_align_test_helper.cpp"],
+    stl: "none", // avoid including extra TLS variables in the executable
+}
+
 // -----------------------------------------------------------------------------
 // Library to test gnu-styled hash
 // -----------------------------------------------------------------------------
diff --git a/tests/libs/elftls_align_test_helper.cpp b/tests/libs/elftls_align_test_helper.cpp
new file mode 100644
index 0000000..72e81da
--- /dev/null
+++ b/tests/libs/elftls_align_test_helper.cpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+
+#include "CHECK.h"
+
+struct AlignedVar {
+  int field;
+  char buffer[0x1000 - sizeof(int)];
+} __attribute__((aligned(0x400)));
+
+struct SmallVar {
+  int field;
+  char buffer[0xeee - sizeof(int)];
+};
+
+// The single .tdata section should have a size that isn't a multiple of its
+// alignment.
+__thread struct AlignedVar var1 = {13};
+__thread struct AlignedVar var2 = {17};
+__thread struct SmallVar var3 = {19};
+
+static uintptr_t var_addr(void* value) {
+  // Maybe the optimizer would assume that the variable has the alignment it is
+  // declared with.
+  asm volatile("" : "+r,m"(value) : : "memory");
+  return reinterpret_cast<uintptr_t>(value);
+}
+
+int main() {
+  CHECK((var_addr(&var1) & 0x3ff) == 0);
+  CHECK((var_addr(&var2) & 0x3ff) == 0);
+  CHECK(var1.field == 13);
+  CHECK(var2.field == 17);
+  CHECK(var3.field == 19);
+  return 0;
+}
diff --git a/tests/libs/elftls_skew_align_test_helper.cpp b/tests/libs/elftls_skew_align_test_helper.cpp
new file mode 100644
index 0000000..f7f082d
--- /dev/null
+++ b/tests/libs/elftls_skew_align_test_helper.cpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+// LLD tries not to generate a PT_TLS segment where (p_vaddr % p_align) is
+// non-zero. It can still do so if the p_align values are greater than a page.
+
+#include <stdint.h>
+#include <unistd.h>
+
+#include "CHECK.h"
+
+struct SmallVar {
+  int field;
+  char buffer[0x100 - sizeof(int)];
+};
+
+struct AlignedVar {
+  int field;
+  char buffer[0x20000 - sizeof(int)];
+} __attribute__((aligned(0x20000)));
+
+__thread struct SmallVar var1 = {13};
+__thread struct SmallVar var2 = {17};
+__thread struct AlignedVar var3;
+__thread struct AlignedVar var4;
+
+static uintptr_t var_addr(void* value) {
+  // Maybe the optimizer would assume that the variable has the alignment it is
+  // declared with.
+  asm volatile("" : "+r,m"(value) : : "memory");
+  return reinterpret_cast<uintptr_t>(value);
+}
+
+int main() {
+  // Bionic only allocates ELF TLS blocks with up to page alignment.
+  CHECK((var_addr(&var3) & (getpagesize() - 1)) == 0);
+  CHECK((var_addr(&var4) & (getpagesize() - 1)) == 0);
+
+  // TODO: These TLS accesses are broken with the current version of LLD. See
+  // https://github.com/llvm/llvm-project/issues/84743.
+#if !defined(__riscv)
+  CHECK(var1.field == 13);
+  CHECK(var2.field == 17);
+#endif
+
+  CHECK(var3.field == 0);
+  CHECK(var4.field == 0);
+  return 0;
+}
diff --git a/tests/static_tls_layout_test.cpp b/tests/static_tls_layout_test.cpp
new file mode 100644
index 0000000..bf508e8
--- /dev/null
+++ b/tests/static_tls_layout_test.cpp
@@ -0,0 +1,213 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#define STATIC_TLS_LAYOUT_TEST
+
+#include "private/bionic_elf_tls.h"
+
+#include <string>
+#include <tuple>
+
+#include <gtest/gtest.h>
+
+#include "private/bionic_tls.h"
+
+using namespace std::string_literals;
+
+struct AlignedSizeFlat {
+  size_t size = 0;
+  size_t align = 1;
+  size_t skew = 0;
+};
+
+static TlsAlignedSize unflatten_size(AlignedSizeFlat flat) {
+  return TlsAlignedSize{.size = flat.size,
+                        .align = TlsAlign{
+                            .value = flat.align,
+                            .skew = flat.skew,
+                        }};
+}
+
+TEST(static_tls_layout, reserve_tp_pair) {
+  auto reserve_tp = [](const AlignedSizeFlat& before, const AlignedSizeFlat& after,
+                       StaticTlsLayout layout = {}) {
+    auto allocs = layout.reserve_tp_pair(unflatten_size(before), unflatten_size(after));
+    return std::make_tuple(layout, allocs);
+  };
+
+  StaticTlsLayout layout;
+  StaticTlsLayout::TpAllocations allocs;
+
+  // Simple case.
+  std::tie(layout, allocs) = reserve_tp({.size = 8, .align = 2}, {.size = 16, .align = 2});
+  EXPECT_EQ(0u, allocs.before);
+  EXPECT_EQ(8u, allocs.tp);
+  EXPECT_EQ(8u, allocs.after);
+  EXPECT_EQ(24u, layout.size());
+  EXPECT_EQ(2u, layout.align_);
+
+  // Zero-sized `before`
+  std::tie(layout, allocs) = reserve_tp({.size = 0}, {.size = 64, .align = 8});
+  EXPECT_EQ(0u, allocs.before);
+  EXPECT_EQ(0u, allocs.tp);
+  EXPECT_EQ(0u, allocs.after);
+
+  // Zero-sized `after`
+  std::tie(layout, allocs) = reserve_tp({.size = 64, .align = 8}, {.size = 0});
+  EXPECT_EQ(0u, allocs.before);
+  EXPECT_EQ(64u, allocs.tp);
+  EXPECT_EQ(64u, allocs.after);
+
+  // The `before` allocation is shifted forward to the TP.
+  std::tie(layout, allocs) = reserve_tp({.size = 1}, {.size = 64, .align = 8});
+  EXPECT_EQ(7u, allocs.before);
+  EXPECT_EQ(8u, allocs.tp);
+  EXPECT_EQ(8u, allocs.after);
+
+  // Alignment gap between `before` and TP.
+  std::tie(layout, allocs) = reserve_tp({.size = 9, .align = 4}, {.size = 1});
+  EXPECT_EQ(0u, allocs.before);
+  EXPECT_EQ(12u, allocs.tp);
+  EXPECT_EQ(12u, allocs.after);
+  EXPECT_EQ(13u, layout.size());
+  EXPECT_EQ(4u, layout.align_);
+
+  // Alignment gap between `before` and TP.
+  std::tie(layout, allocs) = reserve_tp({.size = 9, .align = 4}, {.size = 128, .align = 64});
+  EXPECT_EQ(52u, allocs.before);
+  EXPECT_EQ(64u, allocs.tp);
+  EXPECT_EQ(64u, allocs.after);
+  EXPECT_EQ(192u, layout.size());
+  EXPECT_EQ(64u, layout.align_);
+
+  // Skew-aligned `before` with low alignment.
+  std::tie(layout, allocs) =
+      reserve_tp({.size = 1, .align = 4, .skew = 1}, {.size = 64, .align = 8});
+  EXPECT_EQ(5u, allocs.before);
+  EXPECT_EQ(8u, allocs.tp);
+
+  // Skew-aligned `before` with high alignment.
+  std::tie(layout, allocs) = reserve_tp({.size = 48, .align = 64, .skew = 17}, {.size = 1});
+  EXPECT_EQ(17u, allocs.before);
+  EXPECT_EQ(128u, allocs.tp);
+
+  // An unrelated byte precedes the pair in the layout. Make sure `before` is
+  // still aligned.
+  layout = {};
+  layout.reserve_type<char>();
+  std::tie(layout, allocs) = reserve_tp({.size = 12, .align = 16}, {.size = 1}, layout);
+  EXPECT_EQ(16u, allocs.before);
+  EXPECT_EQ(32u, allocs.tp);
+
+  // Skew-aligned `after`.
+  std::tie(layout, allocs) =
+      reserve_tp({.size = 32, .align = 8}, {.size = 16, .align = 4, .skew = 3});
+  EXPECT_EQ(0u, allocs.before);
+  EXPECT_EQ(32u, allocs.tp);
+  EXPECT_EQ(35u, allocs.after);
+  EXPECT_EQ(51u, layout.size());
+}
+
+// A "NUM_words" literal is the size in bytes of NUM words of memory.
+static size_t operator""_words(unsigned long long i) {
+  return i * sizeof(void*);
+}
+
+TEST(static_tls_layout, arm) {
+#if !defined(__arm__) && !defined(__aarch64__)
+  GTEST_SKIP() << "test only applies to arm32/arm64 targets";
+#endif
+
+  auto reserve_exe = [](const AlignedSizeFlat& config) {
+    StaticTlsLayout layout;
+    TlsSegment seg = {.aligned_size = unflatten_size(config)};
+    layout.reserve_exe_segment_and_tcb(&seg, "prog");
+    return layout;
+  };
+
+  auto underalign_error = [](size_t align, size_t offset) {
+    return R"(error: "prog": executable's TLS segment is underaligned: )"s
+           R"(alignment is )"s +
+           std::to_string(align) + R"( \(skew )" + std::to_string(offset) +
+           R"(\), needs to be at least (32 for ARM|64 for ARM64) Bionic)"s;
+  };
+
+  // Amount of memory needed for negative TLS slots, given a segment p_align of
+  // 8 or 16 words.
+  const size_t base8 = __BIONIC_ALIGN(-MIN_TLS_SLOT, 8) * sizeof(void*);
+  const size_t base16 = __BIONIC_ALIGN(-MIN_TLS_SLOT, 16) * sizeof(void*);
+
+  StaticTlsLayout layout;
+
+  // An executable with a single word.
+  layout = reserve_exe({.size = 1_words, .align = 8_words});
+  EXPECT_EQ(base8 + MIN_TLS_SLOT * sizeof(void*), layout.offset_bionic_tcb());
+  EXPECT_EQ(base8, layout.offset_thread_pointer());
+  EXPECT_EQ(base8 + 8_words, layout.offset_exe());
+  EXPECT_EQ(base8 + 9_words, layout.size());
+  EXPECT_EQ(8_words, layout.align_);
+
+  // Simple underalignment case.
+  EXPECT_DEATH(reserve_exe({.size = 1_words, .align = 1_words}), underalign_error(1_words, 0));
+
+  // Skewed by 1 word is OK.
+  layout = reserve_exe({.size = 1_words, .align = 8_words, .skew = 1_words});
+  EXPECT_EQ(base8, layout.offset_thread_pointer());
+  EXPECT_EQ(base8 + 9_words, layout.offset_exe());
+  EXPECT_EQ(base8 + 10_words, layout.size());
+  EXPECT_EQ(8_words, layout.align_);
+
+  // Skewed by 2 words would overlap Bionic slots, regardless of the p_align
+  // value.
+  EXPECT_DEATH(reserve_exe({.size = 1_words, .align = 8_words, .skew = 2_words}),
+               underalign_error(8_words, 2_words));
+  EXPECT_DEATH(reserve_exe({.size = 1_words, .align = 0x1000, .skew = 2_words}),
+               underalign_error(0x1000, 2_words));
+
+  // Skewed by 8 words is OK again.
+  layout = reserve_exe({.size = 1_words, .align = 16_words, .skew = 8_words});
+  EXPECT_EQ(base16, layout.offset_thread_pointer());
+  EXPECT_EQ(base16 + 8_words, layout.offset_exe());
+  EXPECT_EQ(base16 + 9_words, layout.size());
+  EXPECT_EQ(16_words, layout.align_);
+
+  // Skewed by 9 words is also OK. (The amount of skew doesn't need to be a
+  // multiple of anything.)
+  layout = reserve_exe({.size = 1_words, .align = 16_words, .skew = 9_words});
+  EXPECT_EQ(base16, layout.offset_thread_pointer());
+  EXPECT_EQ(base16 + 9_words, layout.offset_exe());
+  EXPECT_EQ(base16 + 10_words, layout.size());
+  EXPECT_EQ(16_words, layout.align_);
+
+  // Skew with large alignment.
+  layout = reserve_exe({.size = 1_words, .align = 256_words, .skew = 8_words});
+  EXPECT_EQ(256_words, layout.offset_thread_pointer());
+  EXPECT_EQ(264_words, layout.offset_exe());
+  EXPECT_EQ(265_words, layout.size());
+  EXPECT_EQ(256_words, layout.align_);
+}