Implement arm64 TLSDESC

Each TLSDESC relocation relocates a 2-word descriptor in the GOT that
contains:
 - the address of a TLS resolver function
 - an argument to pass (indirectly) to the resolver function

(Specifically, the address of the 2-word descriptor is passed to the
resolver.)

The loader resolves R_GENERIC_TLSDESC relocations using one of three
resolver functions that it defines:
 - tlsdesc_resolver_static
 - tlsdesc_resolver_dynamic
 - tlsdesc_resolver_unresolved_weak

The resolver functions are written in assembly because they have a
restrictive calling convention. They're only allowed to modify x0 and
(apparently) the condition codes.

For a relocation to memory in static TLS (i.e. the executable or an solib
loaded initially), the loader uses a simple resolver function,
tlsdesc_resolver_static, that returns the static offset it receives from
the loader.

For relocations to dynamic TLS memory (i.e. memory in a dlopen'ed solib),
the loader uses tlsdesc_resolver_dynamic, which allocates TLS memory on
demand. It inlines the fast path of __tls_get_addr, then falls back to
__tls_get_addr when it needs to allocate memory. The loader handles these
dynamic TLS relocations in two passes:
 - In the first pass, it allocates a table of TlsDynamicResolverArg
   objects, one per dynamic TLSDESC relocation.
 - In the second pass, once the table is finalized, it writes the
   addresses of the TlsDynamicResolverArg objects into the TLSDESC
   relocations.

tlsdesc_resolver_unresolved_weak returns a negated thread pointer so that
taking the address of an unresolved weak TLS symbols produces NULL.

The loader handles R_GENERIC_TLSDESC in a target-independent way, but
only for arm64, because Bionic has only implemented the resolver functions
for arm64.

Bug: http://b/78026329
Test: bionic unit tests
Test: check that backtrace works inside a resolver function and inside
  __tls_get_addr called from a resolver
  (gdbclient.py, b __tls_get_addr, bt)
Merged-In: I752e59ff986292449892c449dad2546e6f0ff7b6
Change-Id: I752e59ff986292449892c449dad2546e6f0ff7b6
diff --git a/linker/linker.cpp b/linker/linker.cpp
index 4dcdf7e..d0c740b 100644
--- a/linker/linker.cpp
+++ b/linker/linker.cpp
@@ -2698,6 +2698,7 @@
 bool soinfo::relocate(const VersionTracker& version_tracker, ElfRelIteratorT&& rel_iterator,
                       const soinfo_list_t& global_group, const soinfo_list_t& local_group) {
   const size_t tls_tp_base = __libc_shared_globals()->static_tls_layout.offset_thread_pointer();
+  std::vector<std::pair<TlsDescriptor*, size_t>> deferred_tlsdesc_relocs;
 
   for (size_t idx = 0; rel_iterator.has_next(); ++idx) {
     const auto rel = rel_iterator.next();
@@ -2977,6 +2978,49 @@
 #endif  // !defined(__aarch64__)
 
 #if defined(__aarch64__)
+      // Bionic currently only implements TLSDESC for arm64. This implementation should work with
+      // other architectures, as long as the resolver functions are implemented.
+      case R_GENERIC_TLSDESC:
+        count_relocation(kRelocRelative);
+        MARK(rel->r_offset);
+        {
+          TlsDescriptor* desc = reinterpret_cast<TlsDescriptor*>(reloc);
+          if (lsi == nullptr) {
+            // Unresolved weak relocation.
+            desc->func = tlsdesc_resolver_unresolved_weak;
+            desc->arg = addend;
+            TRACE_TYPE(RELO, "RELO TLSDESC %16p <- unresolved weak 0x%zx %s\n",
+                       reinterpret_cast<void*>(reloc), static_cast<size_t>(addend), sym_name);
+          } else {
+            CHECK(lsi->get_tls() != nullptr); // We rejected a missing TLS segment above.
+            size_t module_id = lsi->get_tls()->module_id;
+            const TlsModule& mod = get_tls_module(module_id);
+            if (mod.static_offset != SIZE_MAX) {
+              desc->func = tlsdesc_resolver_static;
+              desc->arg = mod.static_offset - tls_tp_base + sym_addr + addend;
+              TRACE_TYPE(RELO, "RELO TLSDESC %16p <- static (0x%zx - 0x%zx + 0x%zx + 0x%zx) %s\n",
+                         reinterpret_cast<void*>(reloc), mod.static_offset, tls_tp_base,
+                         static_cast<size_t>(sym_addr), static_cast<size_t>(addend), sym_name);
+            } else {
+              tlsdesc_args_.push_back({
+                .generation = mod.first_generation,
+                .index.module_id = module_id,
+                .index.offset = sym_addr + addend,
+              });
+              // Defer the TLSDESC relocation until the address of the TlsDynamicResolverArg object
+              // is finalized.
+              deferred_tlsdesc_relocs.push_back({ desc, tlsdesc_args_.size() - 1 });
+              const TlsDynamicResolverArg& desc_arg = tlsdesc_args_.back();
+              TRACE_TYPE(RELO, "RELO TLSDESC %16p <- dynamic (gen %zu, mod %zu, off %zu) %s",
+                         reinterpret_cast<void*>(reloc), desc_arg.generation,
+                         desc_arg.index.module_id, desc_arg.index.offset, sym_name);
+            }
+          }
+        }
+        break;
+#endif  // defined(R_GENERIC_TLSDESC)
+
+#if defined(__aarch64__)
       case R_AARCH64_ABS64:
         count_relocation(kRelocAbsolute);
         MARK(rel->r_offset);
@@ -3145,6 +3189,13 @@
         return false;
     }
   }
+
+  for (const std::pair<TlsDescriptor*, size_t>& pair : deferred_tlsdesc_relocs) {
+    TlsDescriptor* desc = pair.first;
+    desc->func = tlsdesc_resolver_dynamic;
+    desc->arg = reinterpret_cast<size_t>(&tlsdesc_args_[pair.second]);
+  }
+
   return true;
 }
 #endif  // !defined(__mips__)