[riscv][bionic] Prototype TLS Descriptor support

Add basic assembly stubs for TLS Descriptor support in the dynamic
linker, and enable several code paths related to TLSDESC for RISC-V.

Note: This patch requires an updated toolchain that supports TLSDESC
for RISC-V, and the `-mtls-dialect=` compiler option specifically.

Test: adb shell /data/nativetest64/bionic-unit-tests/bionic-unit-tests --gtest_filter=*tls*
Bug: 322984914
Change-Id: I74bd0fa216b44b4ca2c5a5a6aec37b3fc47b00d9
diff --git a/linker/Android.bp b/linker/Android.bp
index 1ede380..78109e8 100644
--- a/linker/Android.bp
+++ b/linker/Android.bp
@@ -231,6 +231,7 @@
     name: "linker_sources_riscv64",
     srcs: [
         "arch/riscv64/begin.S",
+        "arch/riscv64/tlsdesc_resolver.S",
     ],
 }
 
diff --git a/linker/arch/riscv64/tlsdesc_resolver.S b/linker/arch/riscv64/tlsdesc_resolver.S
new file mode 100644
index 0000000..fedc926
--- /dev/null
+++ b/linker/arch/riscv64/tlsdesc_resolver.S
@@ -0,0 +1,201 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <platform/bionic/tls_defines.h>
+#include <private/bionic_asm.h>
+#include <private/bionic_elf_dtv_offset.h>
+
+#ifndef TLS_DTV_OFFSET
+  #error "TLS_DTV_OFFSET not defined"
+#endif
+
+.globl __tls_get_addr
+
+// spill a register onto the stack
+.macro spill reg, idx, f=
+  \f\()sd \reg, \idx*8(sp)
+  .cfi_rel_offset \reg, (\idx)*8
+.endm
+
+// reload a value from the stack
+.macro reload reg, idx, f=
+  \f\()ld \reg, \idx*8(sp)
+  .cfi_same_value \reg
+.endm
+
+.macro spill_vector_regs
+  csrr a3, vlenb
+  slli a3, a3, 3
+  sub sp, sp, a3
+  vs8r.v v0, (sp)
+  sub sp, sp, a3
+  vs8r.v v8, (sp)
+  sub sp, sp, a3
+  vs8r.v v16, (sp)
+  sub sp, sp, a3
+  vs8r.v v24, (sp)
+.endm
+
+.macro reload_vector_regs
+  csrr a3, vlenb
+  slli a3, a3, 3
+  vl8r.v v24, (sp)
+  add sp, sp, a3
+  vl8r.v v16, (sp)
+  add sp, sp, a3
+  vl8r.v v8, (sp)
+  add sp, sp, a3
+  vl8r.v v0, (sp)
+  add sp, sp, a3
+.endm
+
+// We save a total of 35 registers
+.macro for_each_saved_reg op max
+  \op ra, 1
+  \op a1, 2
+  \op a2, 3
+  \op a3, 4
+  \op a4, 5
+  \op a5, 6
+  \op a6, 7
+  \op a7, 8
+  \op t0, 9
+  \op t1, 10
+  \op t2, 11
+  \op t3, 12
+  \op t4, 13
+  \op t5, 14
+  \op t6, 15
+  // save floating point regs
+  \op ft0, 16, f
+  \op ft1, 17, f
+  \op ft2, 18, f
+  \op ft3, 19, f
+  \op ft4, 20, f
+  \op ft5, 21, f
+  \op ft6, 22, f
+  \op ft7, 23, f
+  \op ft8, 24, f
+  \op ft9, 25, f
+  \op ft10, 26, f
+  \op ft11, 27, f
+  \op fa0, 28, f
+  \op fa1, 29, f
+  \op fa2, 30, f
+  \op fa3, 31, f
+  \op fa4, 32, f
+  \op fa5, 33, f
+  \op fa6, 34, f
+  \op fa7, 35, f
+.endm
+
+// These resolver functions must preserve every register except a0. They set a0
+// to the offset of the TLS symbol relative to the thread pointer.
+
+ENTRY_PRIVATE(tlsdesc_resolver_static)
+  ld a0, 8(a0)
+  jr t0
+END(tlsdesc_resolver_static)
+
+ENTRY_PRIVATE(tlsdesc_resolver_dynamic)
+  // We only need 3 stack slots, but still require a 4th slot for alignment
+  addi sp, sp, -4*8
+  .cfi_def_cfa_offset 4*8
+  spill a1, 1
+  spill a2, 2
+  spill a3, 3
+
+  ld a2, (TLS_SLOT_DTV * 8)(tp) // a2 = &DTV
+  ld a1, (a2)                   // a1 = TlsDtv::generation (DTV[0])
+
+  ld a0, 8(a0)                  // a0 = TlsDynamicResolverArg*
+  ld a3, (a0)                   // a3 = TlsDynamicResolverArg::generation
+
+  // Fallback if TlsDtv::generation < TlsDynamicResolverArg::generation
+  // since we need to call __tls_get_addr
+  blt a1, a3, L(fallback)
+
+  // We can't modify a0 yet, since tlsdesc_resolver_dynamic_slow_path requires
+  // a pointer to the TlsIndex, which is the second field of the
+  // TlsDynamicResolverArg. As a result, we can't modify a0 until we will no
+  // longer fallback.
+  ld a1, 8(a0)                  // a1 = TlsIndex::module_id
+  slli a1, a1, 3                // a1 = module_id*8 -- scale the idx
+  add a1, a2, a1                // a1 = &TlsDtv::modules[module_id]
+  ld a1, (a1)                   // a1 = TlsDtv::modules[module_id]
+  beqz a1, L(fallback)
+  ld a3, 16(a0)                 // a3 = TlsIndex::offset
+  add a0, a1, a3                // a0 = TlsDtv::modules[module_id] + offset
+  sub a0, a0, tp                // a0 = TlsDtv::modules[module_id] + offset - tp
+
+  .cfi_remember_state
+  reload a3, 3
+  reload a2, 2
+  reload a1, 1
+  addi sp, sp, 4*8
+  .cfi_adjust_cfa_offset -4*8
+  jr t0
+
+L(fallback):
+  reload a3, 3
+  reload a2, 2
+  reload a1, 1
+  addi sp, sp, 4*8
+  .cfi_adjust_cfa_offset -4*8
+  j tlsdesc_resolver_dynamic_slow_path
+END(tlsdesc_resolver_dynamic)
+
+// On entry, a0 is the address of a TlsDynamicResolverArg object rather than
+// the TlsDescriptor address passed to the original resolver function.
+ENTRY_PRIVATE(tlsdesc_resolver_dynamic_slow_path)
+  // We save a total of 35 registers, but vector spills require an alignment
+  // of 16, so use an extra slot to align it correctly.
+  addi sp, sp, (-8*36)
+  .cfi_def_cfa_offset (8 * 36)
+  for_each_saved_reg spill, 36
+  spill_vector_regs
+
+  add a0, a0, 8
+  call __tls_get_addr
+  addi a0, a0, (-1 * TLS_DTV_OFFSET)  // Correct the address by TLS_DTV_OFFSET
+  sub a0, a0, tp
+
+  reload_vector_regs
+  for_each_saved_reg reload, 36
+  addi sp, sp, 8*36
+  .cfi_def_cfa_offset 0
+  jr t0
+END(tlsdesc_resolver_dynamic_slow_path)
+
+// The address of an unresolved weak TLS symbol evaluates to NULL with TLSDESC.
+// The value returned by this function is added to the thread pointer, so return
+// a negated thread pointer to cancel it out.
+ENTRY_PRIVATE(tlsdesc_resolver_unresolved_weak)
+  sub a0, zero, tp
+  jr t0
+END(tlsdesc_resolver_unresolved_weak)
diff --git a/linker/linker_relocate.cpp b/linker/linker_relocate.cpp
index 85f7b3a..3e36114 100644
--- a/linker/linker_relocate.cpp
+++ b/linker/linker_relocate.cpp
@@ -438,9 +438,9 @@
       }
       break;
 
-#if defined(__aarch64__)
-    // Bionic currently only implements TLSDESC for arm64. This implementation should work with
-    // other architectures, as long as the resolver functions are implemented.
+#if defined(__aarch64__) || defined(__riscv)
+    // Bionic currently implements TLSDESC for arm64 and riscv64. This implementation should work
+    // with other architectures, as long as the resolver functions are implemented.
     case R_GENERIC_TLSDESC:
       count_relocation_if<IsGeneral>(kRelocRelative);
       {
@@ -482,7 +482,7 @@
         }
       }
       break;
-#endif  // defined(__aarch64__)
+#endif  // defined(__aarch64__) || defined(__riscv)
 
 #if defined(__x86_64__)
     case R_X86_64_32:
@@ -672,14 +672,14 @@
 
   // Once the tlsdesc_args_ vector's size is finalized, we can write the addresses of its elements
   // into the TLSDESC relocations.
-#if defined(__aarch64__)
-  // Bionic currently only implements TLSDESC for arm64.
+#if defined(__aarch64__) || defined(__riscv)
+  // Bionic currently only implements TLSDESC for arm64 and riscv64.
   for (const std::pair<TlsDescriptor*, size_t>& pair : relocator.deferred_tlsdesc_relocs) {
     TlsDescriptor* desc = pair.first;
     desc->func = tlsdesc_resolver_dynamic;
     desc->arg = reinterpret_cast<size_t>(&tlsdesc_args_[pair.second]);
   }
-#endif
+#endif // defined(__aarch64__) || defined(__riscv)
 
   return true;
 }