Reorganize static TLS memory for ELF TLS

For ELF TLS "local-exec" accesses, the static linker assumes that an
executable's TLS segment is located at a statically-known offset from the
thread pointer (i.e. "variant 1" for ARM and "variant 2" for x86).
Because these layouts are incompatible, Bionic generally needs to allocate
its TLS slots differently between different architectures.

To allow per-architecture TLS slots:
 - Replace the TLS_SLOT_xxx enumerators with macros. New ARM slots are
   generally negative, while new x86 slots are generally positive.
 - Define a bionic_tcb struct that provides two things:
    - a void* raw_slots_storage[BIONIC_TLS_SLOTS] field
    - an inline accessor function: void*& tls_slot(size_t tpindex);

For ELF TLS, it's necessary to allocate a temporary TCB (i.e. TLS slots),
because the runtime linker doesn't know how large the static TLS area is
until after it has loaded all of the initial solibs.

To accommodate Golang, it's necessary to allocate the pthread keys at a
fixed, small, positive offset from the thread pointer.

This CL moves the pthread keys into bionic_tls, then allocates a single
mapping per thread that looks like so:
 - stack guard
 - stack [omitted for main thread and with pthread_attr_setstack]
 - static TLS:
    - bionic_tcb [exec TLS will either precede or succeed the TCB]
    - bionic_tls [prefixed by the pthread keys]
    - [solib TLS segments will be placed here]
 - guard page

As before, if the new mapping includes a stack, the pthread_internal_t
is allocated on it.

At startup, Bionic allocates a temporary bionic_tcb object on the stack,
then allocates a temporary bionic_tls object using mmap. This mmap is
delayed because the linker can't currently call async_safe_fatal() before
relocating itself.

Later, Bionic allocates a stack-less thread mapping for the main thread,
and copies slots from the temporary TCB to the new TCB.
(See *::copy_from_bootstrap methods.)

Bug: http://b/78026329
Test: bionic unit tests
Test: verify that a Golang app still works
Test: verify that a Golang app crashes if bionic_{tls,tcb} are swapped
Merged-In: I6543063752f4ec8ef6dc9c7f2a06ce2a18fc5af3
Change-Id: I6543063752f4ec8ef6dc9c7f2a06ce2a18fc5af3
(cherry picked from commit 1e660b70da625fcbf1e43dfae09b7b4817fa1660)
diff --git a/libc/bionic/libc_init_static.cpp b/libc/bionic/libc_init_static.cpp
index b2f8bbf..68650ed 100644
--- a/libc/bionic/libc_init_static.cpp
+++ b/libc/bionic/libc_init_static.cpp
@@ -39,6 +39,7 @@
 #include "libc_init_common.h"
 #include "pthread_internal.h"
 
+#include "private/bionic_elf_tls.h"
 #include "private/bionic_globals.h"
 #include "private/bionic_macros.h"
 #include "private/bionic_page.h"
@@ -82,6 +83,13 @@
   }
 }
 
+static void layout_static_tls() {
+  StaticTlsLayout& layout = __libc_shared_globals()->static_tls_layout;
+  layout.reserve_bionic_tls();
+  layout.reserve_tcb();
+  layout.finish_layout();
+}
+
 // The program startup function __libc_init() defined here is
 // used for static executables only (i.e. those that don't depend
 // on shared libraries). It is called from arch-$ARCH/bionic/crtbegin_static.S
@@ -92,16 +100,19 @@
 __noreturn static void __real_libc_init(void *raw_args,
                                         void (*onexit)(void) __unused,
                                         int (*slingshot)(int, char**, char**),
-                                        structors_array_t const * const structors) {
+                                        structors_array_t const * const structors,
+                                        bionic_tcb* temp_tcb) {
   BIONIC_STOP_UNWIND;
 
   // Initialize TLS early so system calls and errno work.
   KernelArgumentBlock args(raw_args);
-  __libc_init_main_thread_early(args);
+  __libc_init_main_thread_early(args, temp_tcb);
   __libc_init_main_thread_late();
   __libc_init_globals();
   __libc_shared_globals()->init_progname = args.argv[0];
   __libc_init_AT_SECURE(args.envp);
+  layout_static_tls();
+  __libc_init_main_thread_final();
   __libc_init_common();
 
   apply_gnu_relro();
@@ -129,16 +140,16 @@
                             void (*onexit)(void) __unused,
                             int (*slingshot)(int, char**, char**),
                             structors_array_t const * const structors) {
+  bionic_tcb temp_tcb = {};
 #if __has_feature(hwaddress_sanitizer)
   // Install main thread TLS early. It will be initialized later in __libc_init_main_thread. For now
-  // all we need is access to TLS_SLOT_TSAN.
-  pthread_internal_t* main_thread = __get_main_thread();
-  __set_tls(main_thread->tls);
-  // Initialize HWASan. This sets up TLS_SLOT_TSAN, among other things.
+  // all we need is access to TLS_SLOT_SANITIZER.
+  __set_tls(&temp_tcb.tls_slot(0));
+  // Initialize HWASan. This sets up TLS_SLOT_SANITIZER, among other things.
   __hwasan_init();
   // We are ready to run HWASan-instrumented code, proceed with libc initialization...
 #endif
-  __real_libc_init(raw_args, onexit, slingshot, structors);
+  __real_libc_init(raw_args, onexit, slingshot, structors, &temp_tcb);
 }
 
 static int g_target_sdk_version{__ANDROID_API__};