Paul Kirth | 4d43778 | 2024-01-30 23:03:14 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2024 The Android Open Source Project |
| 3 | * All rights reserved. |
| 4 | * |
| 5 | * Redistribution and use in source and binary forms, with or without |
| 6 | * modification, are permitted provided that the following conditions |
| 7 | * are met: |
| 8 | * * Redistributions of source code must retain the above copyright |
| 9 | * notice, this list of conditions and the following disclaimer. |
| 10 | * * Redistributions in binary form must reproduce the above copyright |
| 11 | * notice, this list of conditions and the following disclaimer in |
| 12 | * the documentation and/or other materials provided with the |
| 13 | * distribution. |
| 14 | * |
| 15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 16 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 17 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS |
| 18 | * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE |
| 19 | * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, |
| 20 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, |
| 21 | * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS |
| 22 | * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED |
| 23 | * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
| 24 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT |
| 25 | * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 26 | * SUCH DAMAGE. |
| 27 | */ |
| 28 | |
| 29 | #include <platform/bionic/tls_defines.h> |
| 30 | #include <private/bionic_asm.h> |
| 31 | #include <private/bionic_elf_dtv_offset.h> |
| 32 | |
| 33 | #ifndef TLS_DTV_OFFSET |
| 34 | #error "TLS_DTV_OFFSET not defined" |
| 35 | #endif |
| 36 | |
| 37 | .globl __tls_get_addr |
| 38 | |
| 39 | // spill a register onto the stack |
| 40 | .macro spill reg, idx, f= |
| 41 | \f\()sd \reg, \idx*8(sp) |
| 42 | .cfi_rel_offset \reg, (\idx)*8 |
| 43 | .endm |
| 44 | |
| 45 | // reload a value from the stack |
| 46 | .macro reload reg, idx, f= |
| 47 | \f\()ld \reg, \idx*8(sp) |
| 48 | .cfi_same_value \reg |
| 49 | .endm |
| 50 | |
| 51 | .macro spill_vector_regs |
| 52 | csrr a3, vlenb |
| 53 | slli a3, a3, 3 |
| 54 | sub sp, sp, a3 |
| 55 | vs8r.v v0, (sp) |
| 56 | sub sp, sp, a3 |
| 57 | vs8r.v v8, (sp) |
| 58 | sub sp, sp, a3 |
| 59 | vs8r.v v16, (sp) |
| 60 | sub sp, sp, a3 |
| 61 | vs8r.v v24, (sp) |
| 62 | .endm |
| 63 | |
| 64 | .macro reload_vector_regs |
| 65 | csrr a3, vlenb |
| 66 | slli a3, a3, 3 |
| 67 | vl8r.v v24, (sp) |
| 68 | add sp, sp, a3 |
| 69 | vl8r.v v16, (sp) |
| 70 | add sp, sp, a3 |
| 71 | vl8r.v v8, (sp) |
| 72 | add sp, sp, a3 |
| 73 | vl8r.v v0, (sp) |
| 74 | add sp, sp, a3 |
| 75 | .endm |
| 76 | |
| 77 | // We save a total of 35 registers |
| 78 | .macro for_each_saved_reg op max |
| 79 | \op ra, 1 |
| 80 | \op a1, 2 |
| 81 | \op a2, 3 |
| 82 | \op a3, 4 |
| 83 | \op a4, 5 |
| 84 | \op a5, 6 |
| 85 | \op a6, 7 |
| 86 | \op a7, 8 |
| 87 | \op t0, 9 |
| 88 | \op t1, 10 |
| 89 | \op t2, 11 |
| 90 | \op t3, 12 |
| 91 | \op t4, 13 |
| 92 | \op t5, 14 |
| 93 | \op t6, 15 |
| 94 | // save floating point regs |
| 95 | \op ft0, 16, f |
| 96 | \op ft1, 17, f |
| 97 | \op ft2, 18, f |
| 98 | \op ft3, 19, f |
| 99 | \op ft4, 20, f |
| 100 | \op ft5, 21, f |
| 101 | \op ft6, 22, f |
| 102 | \op ft7, 23, f |
| 103 | \op ft8, 24, f |
| 104 | \op ft9, 25, f |
| 105 | \op ft10, 26, f |
| 106 | \op ft11, 27, f |
| 107 | \op fa0, 28, f |
| 108 | \op fa1, 29, f |
| 109 | \op fa2, 30, f |
| 110 | \op fa3, 31, f |
| 111 | \op fa4, 32, f |
| 112 | \op fa5, 33, f |
| 113 | \op fa6, 34, f |
| 114 | \op fa7, 35, f |
| 115 | .endm |
| 116 | |
| 117 | // These resolver functions must preserve every register except a0. They set a0 |
| 118 | // to the offset of the TLS symbol relative to the thread pointer. |
| 119 | |
| 120 | ENTRY_PRIVATE(tlsdesc_resolver_static) |
| 121 | ld a0, 8(a0) |
| 122 | jr t0 |
| 123 | END(tlsdesc_resolver_static) |
| 124 | |
| 125 | ENTRY_PRIVATE(tlsdesc_resolver_dynamic) |
| 126 | // We only need 3 stack slots, but still require a 4th slot for alignment |
| 127 | addi sp, sp, -4*8 |
| 128 | .cfi_def_cfa_offset 4*8 |
| 129 | spill a1, 1 |
| 130 | spill a2, 2 |
| 131 | spill a3, 3 |
| 132 | |
| 133 | ld a2, (TLS_SLOT_DTV * 8)(tp) // a2 = &DTV |
| 134 | ld a1, (a2) // a1 = TlsDtv::generation (DTV[0]) |
| 135 | |
| 136 | ld a0, 8(a0) // a0 = TlsDynamicResolverArg* |
| 137 | ld a3, (a0) // a3 = TlsDynamicResolverArg::generation |
| 138 | |
| 139 | // Fallback if TlsDtv::generation < TlsDynamicResolverArg::generation |
| 140 | // since we need to call __tls_get_addr |
| 141 | blt a1, a3, L(fallback) |
| 142 | |
| 143 | // We can't modify a0 yet, since tlsdesc_resolver_dynamic_slow_path requires |
| 144 | // a pointer to the TlsIndex, which is the second field of the |
| 145 | // TlsDynamicResolverArg. As a result, we can't modify a0 until we will no |
| 146 | // longer fallback. |
| 147 | ld a1, 8(a0) // a1 = TlsIndex::module_id |
| 148 | slli a1, a1, 3 // a1 = module_id*8 -- scale the idx |
| 149 | add a1, a2, a1 // a1 = &TlsDtv::modules[module_id] |
| 150 | ld a1, (a1) // a1 = TlsDtv::modules[module_id] |
| 151 | beqz a1, L(fallback) |
| 152 | ld a3, 16(a0) // a3 = TlsIndex::offset |
| 153 | add a0, a1, a3 // a0 = TlsDtv::modules[module_id] + offset |
| 154 | sub a0, a0, tp // a0 = TlsDtv::modules[module_id] + offset - tp |
| 155 | |
| 156 | .cfi_remember_state |
| 157 | reload a3, 3 |
| 158 | reload a2, 2 |
| 159 | reload a1, 1 |
| 160 | addi sp, sp, 4*8 |
| 161 | .cfi_adjust_cfa_offset -4*8 |
| 162 | jr t0 |
| 163 | |
| 164 | L(fallback): |
| 165 | reload a3, 3 |
| 166 | reload a2, 2 |
| 167 | reload a1, 1 |
| 168 | addi sp, sp, 4*8 |
| 169 | .cfi_adjust_cfa_offset -4*8 |
| 170 | j tlsdesc_resolver_dynamic_slow_path |
| 171 | END(tlsdesc_resolver_dynamic) |
| 172 | |
| 173 | // On entry, a0 is the address of a TlsDynamicResolverArg object rather than |
| 174 | // the TlsDescriptor address passed to the original resolver function. |
| 175 | ENTRY_PRIVATE(tlsdesc_resolver_dynamic_slow_path) |
| 176 | // We save a total of 35 registers, but vector spills require an alignment |
| 177 | // of 16, so use an extra slot to align it correctly. |
| 178 | addi sp, sp, (-8*36) |
| 179 | .cfi_def_cfa_offset (8 * 36) |
| 180 | for_each_saved_reg spill, 36 |
| 181 | spill_vector_regs |
| 182 | |
| 183 | add a0, a0, 8 |
| 184 | call __tls_get_addr |
| 185 | addi a0, a0, (-1 * TLS_DTV_OFFSET) // Correct the address by TLS_DTV_OFFSET |
| 186 | sub a0, a0, tp |
| 187 | |
| 188 | reload_vector_regs |
| 189 | for_each_saved_reg reload, 36 |
| 190 | addi sp, sp, 8*36 |
| 191 | .cfi_def_cfa_offset 0 |
| 192 | jr t0 |
| 193 | END(tlsdesc_resolver_dynamic_slow_path) |
| 194 | |
| 195 | // The address of an unresolved weak TLS symbol evaluates to NULL with TLSDESC. |
| 196 | // The value returned by this function is added to the thread pointer, so return |
| 197 | // a negated thread pointer to cancel it out. |
| 198 | ENTRY_PRIVATE(tlsdesc_resolver_unresolved_weak) |
| 199 | sub a0, zero, tp |
| 200 | jr t0 |
| 201 | END(tlsdesc_resolver_unresolved_weak) |