Merge "Fix StaticTlsLayout for atypical alignment values" into main
diff --git a/libc/bionic/bionic_elf_tls.cpp b/libc/bionic/bionic_elf_tls.cpp
index 077f310..a053c27 100644
--- a/libc/bionic/bionic_elf_tls.cpp
+++ b/libc/bionic/bionic_elf_tls.cpp
@@ -60,11 +60,18 @@
   for (size_t i = 0; i < phdr_count; ++i) {
     const ElfW(Phdr)& phdr = phdr_table[i];
     if (phdr.p_type == PT_TLS) {
-      *out = TlsSegment {
-        phdr.p_memsz,
-        phdr.p_align,
-        reinterpret_cast<void*>(load_bias + phdr.p_vaddr),
-        phdr.p_filesz,
+      *out = TlsSegment{
+          .aligned_size =
+              TlsAlignedSize{
+                  .size = phdr.p_memsz,
+                  .align =
+                      TlsAlign{
+                          .value = phdr.p_align ?: 1,  // 0 means "no alignment requirement"
+                          .skew = phdr.p_vaddr % MAX(1, phdr.p_align),
+                      },
+              },
+          .init_ptr = reinterpret_cast<void*>(load_bias + phdr.p_vaddr),
+          .init_size = phdr.p_filesz,
       };
       return true;
     }
@@ -72,114 +79,171 @@
   return false;
 }
 
-// Return true if the alignment of a TLS segment is a valid power-of-two. Also
-// cap the alignment if it's too high.
-bool __bionic_check_tls_alignment(size_t* alignment) {
-  // N.B. The size does not need to be a multiple of the alignment. With
-  // ld.bfd (or after using binutils' strip), the TLS segment's size isn't
-  // rounded up.
-  if (*alignment == 0 || !powerof2(*alignment)) {
-    return false;
-  }
-  // Bionic only respects TLS alignment up to one page.
-  *alignment = MIN(*alignment, page_size());
-  return true;
+// Return true if the alignment of a TLS segment is a valid power-of-two.
+bool __bionic_check_tls_align(size_t align) {
+  // Note: The size does not need to be a multiple of the alignment. With ld.bfd
+  // (or after using binutils' strip), the TLS segment's size isn't rounded up.
+  return powerof2(align);
+}
+
+static void static_tls_layout_overflow() {
+  async_safe_fatal("error: TLS segments in static TLS overflowed");
+}
+
+static size_t align_checked(size_t value, TlsAlign tls_align) {
+  const size_t align = tls_align.value;
+  const size_t skew = tls_align.skew;
+  CHECK(align != 0 && powerof2(align + 0) && skew < align);
+  const size_t result = ((value - skew + align - 1) & ~(align - 1)) + skew;
+  if (result < value) static_tls_layout_overflow();
+  return result;
 }
 
 size_t StaticTlsLayout::offset_thread_pointer() const {
   return offset_bionic_tcb_ + (-MIN_TLS_SLOT * sizeof(void*));
 }
 
-// Reserves space for the Bionic TCB and the executable's TLS segment. Returns
-// the offset of the executable's TLS segment.
-size_t StaticTlsLayout::reserve_exe_segment_and_tcb(const TlsSegment* exe_segment,
+// Allocates the Bionic TCB and the executable's TLS segment in the static TLS
+// layout, satisfying alignment requirements for both.
+//
+// For an executable's TLS accesses (using the LocalExec model), the static
+// linker bakes TLS offsets directly into the .text section, so the loader must
+// place the executable segment at the same offset relative to the TP.
+// Similarly, the Bionic TLS slots (bionic_tcb) must also be allocated at the
+// correct offset relative to the TP.
+//
+// Returns the offset of the executable's TLS segment.
+//
+// Note: This function has unit tests, but they are in bionic-unit-tests-static,
+// not bionic-unit-tests.
+size_t StaticTlsLayout::reserve_exe_segment_and_tcb(const TlsSegment* seg,
                                                     const char* progname __attribute__((unused))) {
   // Special case: if the executable has no TLS segment, then just allocate a
   // TCB and skip the minimum alignment check on ARM.
-  if (exe_segment == nullptr) {
+  if (seg == nullptr) {
     offset_bionic_tcb_ = reserve_type<bionic_tcb>();
     return 0;
   }
 
 #if defined(__arm__) || defined(__aarch64__)
+  // ARM uses a "variant 1" TLS layout. The ABI specifies that the TP points at
+  // a 2-word TCB, followed by the executable's segment. In practice, libc
+  // implementations actually allocate a larger TCB at negative offsets from the
+  // TP.
+  //
+  // Historically, Bionic allocated an 8-word TCB starting at TP+0, so to keep
+  // the executable's TLS segment from overlapping the last 6 slots, Bionic
+  // requires that executables have an 8-word PT_TLS alignment to ensure that
+  // the TCB fits in the alignment padding, which it accomplishes using
+  // crtbegin.c. Bionic uses negative offsets for new TLS slots to avoid this
+  // problem.
 
-  // First reserve enough space for the TCB before the executable segment.
-  reserve(sizeof(bionic_tcb), 1);
+  static_assert(MIN_TLS_SLOT <= 0 && MAX_TLS_SLOT >= 1);
+  static_assert(sizeof(bionic_tcb) == (MAX_TLS_SLOT - MIN_TLS_SLOT + 1) * sizeof(void*));
+  static_assert(alignof(bionic_tcb) == sizeof(void*));
+  const size_t max_align = MAX(alignof(bionic_tcb), seg->aligned_size.align.value);
 
-  // Then reserve the segment itself.
-  const size_t result = reserve(exe_segment->size, exe_segment->alignment);
+  // Allocate the TCB first. Split it into negative and non-negative slots and
+  // ensure that TP (i.e. the first non-negative slot) is aligned to max_align.
+  const size_t tcb_size_pre = -MIN_TLS_SLOT * sizeof(void*);
+  const size_t tcb_size_post = (MAX_TLS_SLOT + 1) * sizeof(void*);
+  const auto pair =
+      reserve_tp_pair(TlsAlignedSize{.size = tcb_size_pre},
+                      TlsAlignedSize{.size = tcb_size_post, .align = TlsAlign{.value = max_align}});
+  offset_bionic_tcb_ = pair.before;
+  const size_t offset_tp = pair.tp;
 
-  // The variant 1 ABI that ARM linkers follow specifies a 2-word TCB between
-  // the thread pointer and the start of the executable's TLS segment, but both
-  // the thread pointer and the TLS segment are aligned appropriately for the
-  // TLS segment. Calculate the distance between the thread pointer and the
-  // EXE's segment.
-  const size_t exe_tpoff = __BIONIC_ALIGN(sizeof(void*) * 2, exe_segment->alignment);
+  // Allocate the segment.
+  offset_exe_ = reserve(seg->aligned_size);
 
-  const size_t min_bionic_alignment = BIONIC_ROUND_UP_POWER_OF_2(MAX_TLS_SLOT) * sizeof(void*);
-  if (exe_tpoff < min_bionic_alignment) {
-    async_safe_fatal("error: \"%s\": executable's TLS segment is underaligned: "
-                     "alignment is %zu, needs to be at least %zu for %s Bionic",
-                     progname, exe_segment->alignment, min_bionic_alignment,
-                     (sizeof(void*) == 4 ? "ARM" : "ARM64"));
+  // Verify that the ABI and Bionic tpoff values are equal, which is equivalent
+  // to checking whether the segment is sufficiently aligned.
+  const size_t abi_tpoff = align_checked(2 * sizeof(void*), seg->aligned_size.align);
+  const size_t actual_tpoff = align_checked(tcb_size_post, seg->aligned_size.align);
+  CHECK(actual_tpoff == offset_exe_ - offset_tp);
+
+  if (abi_tpoff != actual_tpoff) {
+    async_safe_fatal(
+        "error: \"%s\": executable's TLS segment is underaligned: "
+        "alignment is %zu (skew %zu), needs to be at least %zu for %s Bionic",
+        progname, seg->aligned_size.align.value, seg->aligned_size.align.skew, tcb_size_post,
+        (sizeof(void*) == 4 ? "ARM" : "ARM64"));
   }
 
-  offset_bionic_tcb_ = result - exe_tpoff - (-MIN_TLS_SLOT * sizeof(void*));
-  return result;
-
 #elif defined(__i386__) || defined(__x86_64__)
 
-  // x86 uses variant 2 TLS layout. The executable's segment is located just
-  // before the TCB.
-  static_assert(MIN_TLS_SLOT == 0, "First slot of bionic_tcb must be slot #0 on x86");
-  const size_t exe_size = round_up_with_overflow_check(exe_segment->size, exe_segment->alignment);
-  reserve(exe_size, 1);
-  const size_t max_align = MAX(alignof(bionic_tcb), exe_segment->alignment);
-  offset_bionic_tcb_ = reserve(sizeof(bionic_tcb), max_align);
-  return offset_bionic_tcb_ - exe_size;
+  auto pair = reserve_tp_pair(seg->aligned_size, TlsAlignedSize::of_type<bionic_tcb>());
+  offset_exe_ = pair.before;
+  offset_bionic_tcb_ = pair.after;
 
 #elif defined(__riscv)
+  static_assert(MAX_TLS_SLOT == -1, "Last slot of bionic_tcb must be slot #(-1) on riscv");
 
-  // First reserve enough space for the TCB before the executable segment.
-  offset_bionic_tcb_ = reserve(sizeof(bionic_tcb), 1);
-
-  // Then reserve the segment itself.
-  const size_t exe_size = round_up_with_overflow_check(exe_segment->size, exe_segment->alignment);
-  return reserve(exe_size, 1);
+  auto pair = reserve_tp_pair(TlsAlignedSize::of_type<bionic_tcb>(), seg->aligned_size);
+  offset_bionic_tcb_ = pair.before;
+  offset_exe_ = pair.after;
 
 #else
 #error "Unrecognized architecture"
 #endif
+
+  return offset_exe_;
 }
 
-void StaticTlsLayout::reserve_bionic_tls() {
+size_t StaticTlsLayout::reserve_bionic_tls() {
   offset_bionic_tls_ = reserve_type<bionic_tls>();
+  return offset_bionic_tls_;
 }
 
 void StaticTlsLayout::finish_layout() {
   // Round the offset up to the alignment.
-  offset_ = round_up_with_overflow_check(offset_, alignment_);
-
-  if (overflowed_) {
-    async_safe_fatal("error: TLS segments in static TLS overflowed");
-  }
+  cursor_ = align_checked(cursor_, TlsAlign{.value = align_});
 }
 
-// The size is not required to be a multiple of the alignment. The alignment
-// must be a positive power-of-two.
-size_t StaticTlsLayout::reserve(size_t size, size_t alignment) {
-  offset_ = round_up_with_overflow_check(offset_, alignment);
-  const size_t result = offset_;
-  if (__builtin_add_overflow(offset_, size, &offset_)) overflowed_ = true;
-  alignment_ = MAX(alignment_, alignment);
+size_t StaticTlsLayout::align_cursor(TlsAlign align) {
+  cursor_ = align_checked(cursor_, align);
+  align_ = MAX(align_, align.value);
+  return cursor_;
+}
+
+size_t StaticTlsLayout::align_cursor_unskewed(size_t align) {
+  return align_cursor(TlsAlign{.value = align});
+}
+
+// Reserve the requested number of bytes at the requested alignment. The
+// requested size is not required to be a multiple of the alignment, nor is the
+// cursor aligned after the allocation.
+size_t StaticTlsLayout::reserve(TlsAlignedSize aligned_size) {
+  align_cursor(aligned_size.align);
+  const size_t result = cursor_;
+  if (__builtin_add_overflow(cursor_, aligned_size.size, &cursor_)) static_tls_layout_overflow();
   return result;
 }
 
-size_t StaticTlsLayout::round_up_with_overflow_check(size_t value, size_t alignment) {
-  const size_t old_value = value;
-  value = __BIONIC_ALIGN(value, alignment);
-  if (value < old_value) overflowed_ = true;
-  return value;
+// Calculate the TP offset and allocate something before it and something after
+// it. The TP will be aligned to:
+//
+//     MAX(before.align.value, after.align.value)
+//
+// The `before` and `after` allocations are each allocated as closely as
+// possible to the TP.
+StaticTlsLayout::TpAllocations StaticTlsLayout::reserve_tp_pair(TlsAlignedSize before,
+                                                                TlsAlignedSize after) {
+  // Tentative `before` allocation.
+  const size_t tentative_before = reserve(before);
+  const size_t tentative_before_end = align_cursor_unskewed(before.align.value);
+
+  const size_t offset_tp = align_cursor_unskewed(MAX(before.align.value, after.align.value));
+
+  const size_t offset_after = reserve(after);
+
+  // If the `after` allocation has higher alignment than `before`, then there
+  // may be alignment padding to remove between `before` and the TP. Shift
+  // `before` forward to remove this padding.
+  CHECK(((offset_tp - tentative_before_end) & (before.align.value - 1)) == 0);
+  const size_t offset_before = tentative_before + (offset_tp - tentative_before_end);
+
+  return TpAllocations{offset_before, offset_tp, offset_after};
 }
 
 // Copy each TLS module's initialization image into a newly-allocated block of
@@ -309,7 +373,11 @@
   void* mod_ptr = dtv->modules[module_idx];
   if (mod_ptr == nullptr) {
     const TlsSegment& segment = modules.module_table[module_idx].segment;
-    mod_ptr = __libc_shared_globals()->tls_allocator.memalign(segment.alignment, segment.size);
+    // TODO: Currently the aligned_size.align.skew property is ignored.
+    // That is, for a dynamic TLS block at addr A, (A % p_align) will be 0, not
+    // (p_vaddr % p_align).
+    mod_ptr = __libc_shared_globals()->tls_allocator.memalign(segment.aligned_size.align.value,
+                                                              segment.aligned_size.size);
     if (segment.init_size > 0) {
       memcpy(mod_ptr, segment.init_ptr, segment.init_size);
     }
@@ -317,8 +385,8 @@
 
     // Reports the allocation to the listener, if any.
     if (modules.on_creation_cb != nullptr) {
-      modules.on_creation_cb(mod_ptr,
-                             static_cast<void*>(static_cast<char*>(mod_ptr) + segment.size));
+      modules.on_creation_cb(
+          mod_ptr, static_cast<void*>(static_cast<char*>(mod_ptr) + segment.aligned_size.size));
     }
   }
 
diff --git a/libc/bionic/libc_init_static.cpp b/libc/bionic/libc_init_static.cpp
index f091ff8..d86df30 100644
--- a/libc/bionic/libc_init_static.cpp
+++ b/libc/bionic/libc_init_static.cpp
@@ -138,9 +138,9 @@
   static TlsModule mod;
   TlsModules& modules = __libc_shared_globals()->tls_modules;
   if (__bionic_get_tls_segment(phdr_start, phdr_ct, 0, &mod.segment)) {
-    if (!__bionic_check_tls_alignment(&mod.segment.alignment)) {
+    if (!__bionic_check_tls_align(mod.segment.aligned_size.align.value)) {
       async_safe_fatal("error: TLS segment alignment in \"%s\" is not a power of 2: %zu\n",
-                       progname, mod.segment.alignment);
+                       progname, mod.segment.aligned_size.align.value);
     }
     mod.static_offset = layout.reserve_exe_segment_and_tcb(&mod.segment, progname);
     mod.first_generation = kTlsGenerationFirst;
diff --git a/libc/private/bionic_elf_tls.h b/libc/private/bionic_elf_tls.h
index 79ffcc4..3a7b381 100644
--- a/libc/private/bionic_elf_tls.h
+++ b/libc/private/bionic_elf_tls.h
@@ -36,9 +36,28 @@
 
 __LIBC_HIDDEN__ extern _Atomic(size_t) __libc_tls_generation_copy;
 
-struct TlsSegment {
+struct TlsAlign {
+  size_t value = 1;
+  size_t skew = 0;  // p_vaddr % p_align
+
+  template <typename T>
+  static constexpr TlsAlign of_type() {
+    return TlsAlign{.value = alignof(T)};
+  }
+};
+
+struct TlsAlignedSize {
   size_t size = 0;
-  size_t alignment = 1;
+  TlsAlign align;
+
+  template <typename T>
+  static constexpr TlsAlignedSize of_type() {
+    return TlsAlignedSize{.size = sizeof(T), .align = TlsAlign::of_type<T>()};
+  }
+};
+
+struct TlsSegment {
+  TlsAlignedSize aligned_size;
   const void* init_ptr = "";    // Field is non-null even when init_size is 0.
   size_t init_size = 0;
 };
@@ -46,44 +65,50 @@
 __LIBC_HIDDEN__ bool __bionic_get_tls_segment(const ElfW(Phdr)* phdr_table, size_t phdr_count,
                                               ElfW(Addr) load_bias, TlsSegment* out);
 
-__LIBC_HIDDEN__ bool __bionic_check_tls_alignment(size_t* alignment);
+__LIBC_HIDDEN__ bool __bionic_check_tls_align(size_t align);
 
 struct StaticTlsLayout {
   constexpr StaticTlsLayout() {}
 
-private:
-  size_t offset_ = 0;
-  size_t alignment_ = 1;
-  bool overflowed_ = false;
-
-  // Offsets to various Bionic TLS structs from the beginning of static TLS.
-  size_t offset_bionic_tcb_ = SIZE_MAX;
-  size_t offset_bionic_tls_ = SIZE_MAX;
-
 public:
   size_t offset_bionic_tcb() const { return offset_bionic_tcb_; }
   size_t offset_bionic_tls() const { return offset_bionic_tls_; }
   size_t offset_thread_pointer() const;
+  size_t offset_exe() const { return offset_exe_; }
 
-  size_t size() const { return offset_; }
-  size_t alignment() const { return alignment_; }
-  bool overflowed() const { return overflowed_; }
+  size_t size() const { return cursor_; }
 
   size_t reserve_exe_segment_and_tcb(const TlsSegment* exe_segment, const char* progname);
-  void reserve_bionic_tls();
-  size_t reserve_solib_segment(const TlsSegment& segment) {
-    return reserve(segment.size, segment.alignment);
-  }
+  size_t reserve_bionic_tls();
+  size_t reserve_solib_segment(const TlsSegment& segment) { return reserve(segment.aligned_size); }
   void finish_layout();
 
-private:
-  size_t reserve(size_t size, size_t alignment);
+#if !defined(STATIC_TLS_LAYOUT_TEST)
+ private:
+#endif
+  size_t cursor_ = 0;
+  size_t align_ = 1;
+
+  // Offsets to various Bionic TLS structs from the beginning of static TLS.
+  size_t offset_bionic_tcb_ = SIZE_MAX;
+  size_t offset_bionic_tls_ = SIZE_MAX;
+
+  size_t offset_exe_ = SIZE_MAX;
+
+  struct TpAllocations {
+    size_t before;
+    size_t tp;
+    size_t after;
+  };
+
+  size_t align_cursor(TlsAlign align);
+  size_t align_cursor_unskewed(size_t align);
+  size_t reserve(TlsAlignedSize aligned_size);
+  TpAllocations reserve_tp_pair(TlsAlignedSize before, TlsAlignedSize after);
 
   template <typename T> size_t reserve_type() {
-    return reserve(sizeof(T), alignof(T));
+    return reserve(TlsAlignedSize::of_type<T>());
   }
-
-  size_t round_up_with_overflow_check(size_t value, size_t alignment);
 };
 
 static constexpr size_t kTlsGenerationNone = 0;
diff --git a/linker/linker.cpp b/linker/linker.cpp
index 81869b3..f813c1a 100644
--- a/linker/linker.cpp
+++ b/linker/linker.cpp
@@ -2869,9 +2869,9 @@
     // The loader does not (currently) support ELF TLS, so it shouldn't have
     // a TLS segment.
     CHECK(!relocating_linker && "TLS not supported in loader");
-    if (!__bionic_check_tls_alignment(&tls_segment.alignment)) {
+    if (!__bionic_check_tls_align(tls_segment.aligned_size.align.value)) {
       DL_ERR("TLS segment alignment in \"%s\" is not a power of 2: %zu", get_realpath(),
-             tls_segment.alignment);
+             tls_segment.aligned_size.align.value);
       return false;
     }
     tls_ = std::make_unique<soinfo_tls>();
diff --git a/tests/Android.bp b/tests/Android.bp
index 89d2267..528ccb8 100644
--- a/tests/Android.bp
+++ b/tests/Android.bp
@@ -578,6 +578,9 @@
     include_dirs: [
         "bionic/libc",
     ],
+    static_libs: [
+        "libbase",
+    ],
     shared: {
         enabled: false,
     },
@@ -834,8 +837,10 @@
     data_bins: [
         "cfi_test_helper",
         "cfi_test_helper2",
+        "elftls_align_test_helper",
         "elftls_dlopen_ie_error_helper",
         "elftls_dtv_resize_helper",
+        "elftls_skew_align_test_helper",
         "exec_linker_helper",
         "exec_linker_helper_lib",
         "heap_tagging_async_helper",
@@ -1189,9 +1194,9 @@
         "gtest_globals.cpp",
         "gtest_main.cpp",
 
-        // The Bionic allocator has its own C++ API. It isn't packaged into its
-        // own library, so it can only be tested when it's part of libc.a.
+        // Test internal parts of Bionic that aren't exposed via libc.so.
         "bionic_allocator_test.cpp",
+        "static_tls_layout_test.cpp",
     ],
     include_dirs: [
         "bionic/libc",
@@ -1221,6 +1226,8 @@
         never: true,
     },
     data_bins: [
+        "elftls_align_test_helper",
+        "elftls_skew_align_test_helper",
         "heap_tagging_async_helper",
         "heap_tagging_disabled_helper",
         "heap_tagging_static_async_helper",
diff --git a/tests/elftls_test.cpp b/tests/elftls_test.cpp
index 7c072b6..b3f511e 100644
--- a/tests/elftls_test.cpp
+++ b/tests/elftls_test.cpp
@@ -30,6 +30,9 @@
 
 #include <thread>
 
+#include "gtest_globals.h"
+#include "utils.h"
+
 // Specify the LE access model explicitly. This file is compiled into the
 // bionic-unit-tests executable, but the compiler sees an -fpic object file
 // output into a static library, so it defaults to dynamic TLS accesses.
@@ -87,3 +90,17 @@
     ASSERT_EQ(31, ++tlsvar_general);
   }).join();
 }
+
+TEST(elftls, align_test) {
+  std::string helper = GetTestLibRoot() + "/elftls_align_test_helper";
+  ExecTestHelper eth;
+  eth.SetArgs({helper.c_str(), nullptr});
+  eth.Run([&]() { execve(helper.c_str(), eth.GetArgs(), eth.GetEnv()); }, 0, nullptr);
+}
+
+TEST(elftls, skew_align_test) {
+  std::string helper = GetTestLibRoot() + "/elftls_skew_align_test_helper";
+  ExecTestHelper eth;
+  eth.SetArgs({helper.c_str(), nullptr});
+  eth.Run([&]() { execve(helper.c_str(), eth.GetArgs(), eth.GetEnv()); }, 0, nullptr);
+}
diff --git a/tests/libs/Android.bp b/tests/libs/Android.bp
index f640552..fc7fd40 100644
--- a/tests/libs/Android.bp
+++ b/tests/libs/Android.bp
@@ -156,6 +156,20 @@
     ],
 }
 
+cc_test {
+    name: "elftls_align_test_helper",
+    defaults: ["bionic_testlib_defaults"],
+    srcs: ["elftls_align_test_helper.cpp"],
+    stl: "none", // avoid including extra TLS variables in the executable
+}
+
+cc_test {
+    name: "elftls_skew_align_test_helper",
+    defaults: ["bionic_testlib_defaults"],
+    srcs: ["elftls_skew_align_test_helper.cpp"],
+    stl: "none", // avoid including extra TLS variables in the executable
+}
+
 // -----------------------------------------------------------------------------
 // Library to test gnu-styled hash
 // -----------------------------------------------------------------------------
diff --git a/tests/libs/elftls_align_test_helper.cpp b/tests/libs/elftls_align_test_helper.cpp
new file mode 100644
index 0000000..72e81da
--- /dev/null
+++ b/tests/libs/elftls_align_test_helper.cpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+
+#include "CHECK.h"
+
+struct AlignedVar {
+  int field;
+  char buffer[0x1000 - sizeof(int)];
+} __attribute__((aligned(0x400)));
+
+struct SmallVar {
+  int field;
+  char buffer[0xeee - sizeof(int)];
+};
+
+// The single .tdata section should have a size that isn't a multiple of its
+// alignment.
+__thread struct AlignedVar var1 = {13};
+__thread struct AlignedVar var2 = {17};
+__thread struct SmallVar var3 = {19};
+
+static uintptr_t var_addr(void* value) {
+  // Maybe the optimizer would assume that the variable has the alignment it is
+  // declared with.
+  asm volatile("" : "+r,m"(value) : : "memory");
+  return reinterpret_cast<uintptr_t>(value);
+}
+
+int main() {
+  CHECK((var_addr(&var1) & 0x3ff) == 0);
+  CHECK((var_addr(&var2) & 0x3ff) == 0);
+  CHECK(var1.field == 13);
+  CHECK(var2.field == 17);
+  CHECK(var3.field == 19);
+  return 0;
+}
diff --git a/tests/libs/elftls_skew_align_test_helper.cpp b/tests/libs/elftls_skew_align_test_helper.cpp
new file mode 100644
index 0000000..f7f082d
--- /dev/null
+++ b/tests/libs/elftls_skew_align_test_helper.cpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+// LLD tries not to generate a PT_TLS segment where (p_vaddr % p_align) is
+// non-zero. It can still do so if the p_align values are greater than a page.
+
+#include <stdint.h>
+#include <unistd.h>
+
+#include "CHECK.h"
+
+struct SmallVar {
+  int field;
+  char buffer[0x100 - sizeof(int)];
+};
+
+struct AlignedVar {
+  int field;
+  char buffer[0x20000 - sizeof(int)];
+} __attribute__((aligned(0x20000)));
+
+__thread struct SmallVar var1 = {13};
+__thread struct SmallVar var2 = {17};
+__thread struct AlignedVar var3;
+__thread struct AlignedVar var4;
+
+static uintptr_t var_addr(void* value) {
+  // Maybe the optimizer would assume that the variable has the alignment it is
+  // declared with.
+  asm volatile("" : "+r,m"(value) : : "memory");
+  return reinterpret_cast<uintptr_t>(value);
+}
+
+int main() {
+  // Bionic only allocates ELF TLS blocks with up to page alignment.
+  CHECK((var_addr(&var3) & (getpagesize() - 1)) == 0);
+  CHECK((var_addr(&var4) & (getpagesize() - 1)) == 0);
+
+  // TODO: These TLS accesses are broken with the current version of LLD. See
+  // https://github.com/llvm/llvm-project/issues/84743.
+#if !defined(__riscv)
+  CHECK(var1.field == 13);
+  CHECK(var2.field == 17);
+#endif
+
+  CHECK(var3.field == 0);
+  CHECK(var4.field == 0);
+  return 0;
+}
diff --git a/tests/static_tls_layout_test.cpp b/tests/static_tls_layout_test.cpp
new file mode 100644
index 0000000..bf508e8
--- /dev/null
+++ b/tests/static_tls_layout_test.cpp
@@ -0,0 +1,213 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#define STATIC_TLS_LAYOUT_TEST
+
+#include "private/bionic_elf_tls.h"
+
+#include <string>
+#include <tuple>
+
+#include <gtest/gtest.h>
+
+#include "private/bionic_tls.h"
+
+using namespace std::string_literals;
+
+struct AlignedSizeFlat {
+  size_t size = 0;
+  size_t align = 1;
+  size_t skew = 0;
+};
+
+static TlsAlignedSize unflatten_size(AlignedSizeFlat flat) {
+  return TlsAlignedSize{.size = flat.size,
+                        .align = TlsAlign{
+                            .value = flat.align,
+                            .skew = flat.skew,
+                        }};
+}
+
+TEST(static_tls_layout, reserve_tp_pair) {
+  auto reserve_tp = [](const AlignedSizeFlat& before, const AlignedSizeFlat& after,
+                       StaticTlsLayout layout = {}) {
+    auto allocs = layout.reserve_tp_pair(unflatten_size(before), unflatten_size(after));
+    return std::make_tuple(layout, allocs);
+  };
+
+  StaticTlsLayout layout;
+  StaticTlsLayout::TpAllocations allocs;
+
+  // Simple case.
+  std::tie(layout, allocs) = reserve_tp({.size = 8, .align = 2}, {.size = 16, .align = 2});
+  EXPECT_EQ(0u, allocs.before);
+  EXPECT_EQ(8u, allocs.tp);
+  EXPECT_EQ(8u, allocs.after);
+  EXPECT_EQ(24u, layout.size());
+  EXPECT_EQ(2u, layout.align_);
+
+  // Zero-sized `before`
+  std::tie(layout, allocs) = reserve_tp({.size = 0}, {.size = 64, .align = 8});
+  EXPECT_EQ(0u, allocs.before);
+  EXPECT_EQ(0u, allocs.tp);
+  EXPECT_EQ(0u, allocs.after);
+
+  // Zero-sized `after`
+  std::tie(layout, allocs) = reserve_tp({.size = 64, .align = 8}, {.size = 0});
+  EXPECT_EQ(0u, allocs.before);
+  EXPECT_EQ(64u, allocs.tp);
+  EXPECT_EQ(64u, allocs.after);
+
+  // The `before` allocation is shifted forward to the TP.
+  std::tie(layout, allocs) = reserve_tp({.size = 1}, {.size = 64, .align = 8});
+  EXPECT_EQ(7u, allocs.before);
+  EXPECT_EQ(8u, allocs.tp);
+  EXPECT_EQ(8u, allocs.after);
+
+  // Alignment gap between `before` and TP.
+  std::tie(layout, allocs) = reserve_tp({.size = 9, .align = 4}, {.size = 1});
+  EXPECT_EQ(0u, allocs.before);
+  EXPECT_EQ(12u, allocs.tp);
+  EXPECT_EQ(12u, allocs.after);
+  EXPECT_EQ(13u, layout.size());
+  EXPECT_EQ(4u, layout.align_);
+
+  // Alignment gap between `before` and TP.
+  std::tie(layout, allocs) = reserve_tp({.size = 9, .align = 4}, {.size = 128, .align = 64});
+  EXPECT_EQ(52u, allocs.before);
+  EXPECT_EQ(64u, allocs.tp);
+  EXPECT_EQ(64u, allocs.after);
+  EXPECT_EQ(192u, layout.size());
+  EXPECT_EQ(64u, layout.align_);
+
+  // Skew-aligned `before` with low alignment.
+  std::tie(layout, allocs) =
+      reserve_tp({.size = 1, .align = 4, .skew = 1}, {.size = 64, .align = 8});
+  EXPECT_EQ(5u, allocs.before);
+  EXPECT_EQ(8u, allocs.tp);
+
+  // Skew-aligned `before` with high alignment.
+  std::tie(layout, allocs) = reserve_tp({.size = 48, .align = 64, .skew = 17}, {.size = 1});
+  EXPECT_EQ(17u, allocs.before);
+  EXPECT_EQ(128u, allocs.tp);
+
+  // An unrelated byte precedes the pair in the layout. Make sure `before` is
+  // still aligned.
+  layout = {};
+  layout.reserve_type<char>();
+  std::tie(layout, allocs) = reserve_tp({.size = 12, .align = 16}, {.size = 1}, layout);
+  EXPECT_EQ(16u, allocs.before);
+  EXPECT_EQ(32u, allocs.tp);
+
+  // Skew-aligned `after`.
+  std::tie(layout, allocs) =
+      reserve_tp({.size = 32, .align = 8}, {.size = 16, .align = 4, .skew = 3});
+  EXPECT_EQ(0u, allocs.before);
+  EXPECT_EQ(32u, allocs.tp);
+  EXPECT_EQ(35u, allocs.after);
+  EXPECT_EQ(51u, layout.size());
+}
+
+// A "NUM_words" literal is the size in bytes of NUM words of memory.
+static size_t operator""_words(unsigned long long i) {
+  return i * sizeof(void*);
+}
+
+TEST(static_tls_layout, arm) {
+#if !defined(__arm__) && !defined(__aarch64__)
+  GTEST_SKIP() << "test only applies to arm32/arm64 targets";
+#endif
+
+  auto reserve_exe = [](const AlignedSizeFlat& config) {
+    StaticTlsLayout layout;
+    TlsSegment seg = {.aligned_size = unflatten_size(config)};
+    layout.reserve_exe_segment_and_tcb(&seg, "prog");
+    return layout;
+  };
+
+  auto underalign_error = [](size_t align, size_t offset) {
+    return R"(error: "prog": executable's TLS segment is underaligned: )"s
+           R"(alignment is )"s +
+           std::to_string(align) + R"( \(skew )" + std::to_string(offset) +
+           R"(\), needs to be at least (32 for ARM|64 for ARM64) Bionic)"s;
+  };
+
+  // Amount of memory needed for negative TLS slots, given a segment p_align of
+  // 8 or 16 words.
+  const size_t base8 = __BIONIC_ALIGN(-MIN_TLS_SLOT, 8) * sizeof(void*);
+  const size_t base16 = __BIONIC_ALIGN(-MIN_TLS_SLOT, 16) * sizeof(void*);
+
+  StaticTlsLayout layout;
+
+  // An executable with a single word.
+  layout = reserve_exe({.size = 1_words, .align = 8_words});
+  EXPECT_EQ(base8 + MIN_TLS_SLOT * sizeof(void*), layout.offset_bionic_tcb());
+  EXPECT_EQ(base8, layout.offset_thread_pointer());
+  EXPECT_EQ(base8 + 8_words, layout.offset_exe());
+  EXPECT_EQ(base8 + 9_words, layout.size());
+  EXPECT_EQ(8_words, layout.align_);
+
+  // Simple underalignment case.
+  EXPECT_DEATH(reserve_exe({.size = 1_words, .align = 1_words}), underalign_error(1_words, 0));
+
+  // Skewed by 1 word is OK.
+  layout = reserve_exe({.size = 1_words, .align = 8_words, .skew = 1_words});
+  EXPECT_EQ(base8, layout.offset_thread_pointer());
+  EXPECT_EQ(base8 + 9_words, layout.offset_exe());
+  EXPECT_EQ(base8 + 10_words, layout.size());
+  EXPECT_EQ(8_words, layout.align_);
+
+  // Skewed by 2 words would overlap Bionic slots, regardless of the p_align
+  // value.
+  EXPECT_DEATH(reserve_exe({.size = 1_words, .align = 8_words, .skew = 2_words}),
+               underalign_error(8_words, 2_words));
+  EXPECT_DEATH(reserve_exe({.size = 1_words, .align = 0x1000, .skew = 2_words}),
+               underalign_error(0x1000, 2_words));
+
+  // Skewed by 8 words is OK again.
+  layout = reserve_exe({.size = 1_words, .align = 16_words, .skew = 8_words});
+  EXPECT_EQ(base16, layout.offset_thread_pointer());
+  EXPECT_EQ(base16 + 8_words, layout.offset_exe());
+  EXPECT_EQ(base16 + 9_words, layout.size());
+  EXPECT_EQ(16_words, layout.align_);
+
+  // Skewed by 9 words is also OK. (The amount of skew doesn't need to be a
+  // multiple of anything.)
+  layout = reserve_exe({.size = 1_words, .align = 16_words, .skew = 9_words});
+  EXPECT_EQ(base16, layout.offset_thread_pointer());
+  EXPECT_EQ(base16 + 9_words, layout.offset_exe());
+  EXPECT_EQ(base16 + 10_words, layout.size());
+  EXPECT_EQ(16_words, layout.align_);
+
+  // Skew with large alignment.
+  layout = reserve_exe({.size = 1_words, .align = 256_words, .skew = 8_words});
+  EXPECT_EQ(256_words, layout.offset_thread_pointer());
+  EXPECT_EQ(264_words, layout.offset_exe());
+  EXPECT_EQ(265_words, layout.size());
+  EXPECT_EQ(256_words, layout.align_);
+}