Overalign the TLS segment using crtbegin

Android's current lld build has a hack that overaligns TLS segments, but
it broke glibc when it produced TLS segments where (p_vaddr % p_align) was
non-zero. Move the hack into Bionic's crtbegin instead. It will emit a
0-sized, 64-byte alignment TLS segment into executables that don't use
TLS, but that should be harmless.

This variant of the hack is compatible with the gold and lld linkers. The
ld.bfd linker will optimize the .tdata output section out if its size is
zero, preventing the overalignment in an executable that only has .tbss
sections. This problem could be fixed by adding a ". = .;" statement
inside .tdata in ld.bfd's linker script.

See discussion on https://reviews.llvm.org/D61824.

Bug: https://bugs.llvm.org/show_bug.cgi?id=41527
Test: bionic unit tests, boot a device
Change-Id: I34df8b5594b6518d4590e4861e3d0b74d6fa754e
diff --git a/libc/arch-common/bionic/crtbegin.c b/libc/arch-common/bionic/crtbegin.c
index c4d2a5a..3630b5e 100644
--- a/libc/arch-common/bionic/crtbegin.c
+++ b/libc/arch-common/bionic/crtbegin.c
@@ -63,6 +63,45 @@
 #undef PRE
 #undef POST
 
+// On arm32 and arm64, when targeting Q and up, overalign the TLS segment to
+// (8 * sizeof(void*)), which reserves enough space between the thread pointer
+// and the executable's TLS segment for Bionic's TLS slots. It has the side
+// effect of placing a 0-sized TLS segment into Android executables that don't
+// use TLS, but this should be harmless.
+//
+// To ensure that the .tdata input section isn't deleted, the .text input
+// section (which contains _start) has a relocation to the .tdata input section.
+//
+// TODO: This file currently uses TPREL relocations from .text to ensure that
+// --gc-sections doesn't remove the .tdata input section. The relocations are
+// resolved by the static linker. (They don't appear in the executable.) Replace
+// the TPREL relocations with R_{ARM,AARCH64}_NONE once the toolchain has been
+// updated to support them:
+//  - https://reviews.llvm.org/D61992 (Support .reloc *, R_ARM_NONE, *)
+//  - https://reviews.llvm.org/D61973 (Support .reloc *, R_AARCH64_NONE, *)
+//  - https://reviews.llvm.org/D62052 (lld -r: fix R_*_NONE to section symbols on Elf*_Rel targets)
+#if __ANDROID_API__ >= __ANDROID_API_Q__
+#if defined(__arm__)
+asm("  .section .tdata,\"awT\",%progbits\n"
+    "  .p2align 5\n"
+    "__tls_align:\n"
+    "  .text\n"
+    "  .type __tls_align_reference,%function\n"
+    "__tls_align_reference:\n"
+    "  .long __tls_align(TPOFF)\n"
+    "  .size __tls_align_reference, .-__tls_align_reference\n");
+#elif defined(__aarch64__)
+asm("  .section .tdata,\"awT\",@progbits\n"
+    "  .p2align 6\n"
+    "__tls_align:\n"
+    "  .text\n"
+    "  .type __tls_align_reference,%function\n"
+    "__tls_align_reference:\n"
+    "  add x0, x0, :tprel_lo12_nc:__tls_align\n"
+    "  .size __tls_align_reference, .-__tls_align_reference\n");
+#endif
+#endif
+
 #include "__dso_handle.h"
 #include "atexit.h"
 #include "pthread_atfork.h"