blob: 077f3108560834941f171fe5f707714317e2fd1f [file] [log] [blame]
Ryan Prichard45d13492019-01-03 02:51:30 -08001/*
2 * Copyright (C) 2019 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in
12 * the documentation and/or other materials provided with the
13 * distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include "private/bionic_elf_tls.h"
30
Vy Nguyend5007512020-07-14 17:37:04 -040031#include <async_safe/CHECK.h>
Ryan Prichard48097552019-01-06 18:24:10 -080032#include <async_safe/log.h>
Ryan Prichard361c1b42019-01-15 13:45:27 -080033#include <string.h>
Ryan Prichard45d13492019-01-03 02:51:30 -080034#include <sys/param.h>
Ryan Prichard48097552019-01-06 18:24:10 -080035#include <unistd.h>
Ryan Prichard45d13492019-01-03 02:51:30 -080036
Peter Collingbournebb11ee62022-05-02 12:26:16 -070037#include "platform/bionic/macros.h"
38#include "platform/bionic/page.h"
Ryan Prichard361c1b42019-01-15 13:45:27 -080039#include "private/ScopedRWLock.h"
Ryan Prichard16455b52019-01-18 01:00:59 -080040#include "private/ScopedSignalBlocker.h"
Ryan Prichard361c1b42019-01-15 13:45:27 -080041#include "private/bionic_globals.h"
Ryan Prichard45d13492019-01-03 02:51:30 -080042#include "private/bionic_tls.h"
Ryan Prichard16455b52019-01-18 01:00:59 -080043#include "pthread_internal.h"
44
45// Every call to __tls_get_addr needs to check the generation counter, so
46// accesses to the counter need to be as fast as possible. Keep a copy of it in
47// a hidden variable, which can be accessed without using the GOT. The linker
48// will update this variable when it updates its counter.
49//
50// To allow the linker to update this variable, libc.so's constructor passes its
51// address to the linker. To accommodate a possible __tls_get_addr call before
52// libc.so's constructor, this local copy is initialized to SIZE_MAX, forcing
53// __tls_get_addr to initially use the slow path.
54__LIBC_HIDDEN__ _Atomic(size_t) __libc_tls_generation_copy = SIZE_MAX;
Ryan Prichard45d13492019-01-03 02:51:30 -080055
Ryan Prichard48097552019-01-06 18:24:10 -080056// Search for a TLS segment in the given phdr table. Returns true if it has a
57// TLS segment and false otherwise.
58bool __bionic_get_tls_segment(const ElfW(Phdr)* phdr_table, size_t phdr_count,
Ryan Prichard19883502019-01-16 23:13:38 -080059 ElfW(Addr) load_bias, TlsSegment* out) {
Ryan Prichard48097552019-01-06 18:24:10 -080060 for (size_t i = 0; i < phdr_count; ++i) {
61 const ElfW(Phdr)& phdr = phdr_table[i];
62 if (phdr.p_type == PT_TLS) {
Ryan Prichard48097552019-01-06 18:24:10 -080063 *out = TlsSegment {
64 phdr.p_memsz,
Ryan Prichard19883502019-01-16 23:13:38 -080065 phdr.p_align,
Ryan Prichard48097552019-01-06 18:24:10 -080066 reinterpret_cast<void*>(load_bias + phdr.p_vaddr),
67 phdr.p_filesz,
68 };
69 return true;
70 }
71 }
72 return false;
73}
74
Ryan Prichard19883502019-01-16 23:13:38 -080075// Return true if the alignment of a TLS segment is a valid power-of-two. Also
76// cap the alignment if it's too high.
77bool __bionic_check_tls_alignment(size_t* alignment) {
78 // N.B. The size does not need to be a multiple of the alignment. With
79 // ld.bfd (or after using binutils' strip), the TLS segment's size isn't
80 // rounded up.
81 if (*alignment == 0 || !powerof2(*alignment)) {
82 return false;
83 }
84 // Bionic only respects TLS alignment up to one page.
Peter Collingbournebb11ee62022-05-02 12:26:16 -070085 *alignment = MIN(*alignment, page_size());
Ryan Prichard19883502019-01-16 23:13:38 -080086 return true;
87}
88
Ryan Prichardfb8730d2019-01-15 00:11:37 -080089size_t StaticTlsLayout::offset_thread_pointer() const {
90 return offset_bionic_tcb_ + (-MIN_TLS_SLOT * sizeof(void*));
91}
92
Ryan Prichard977e47d2019-01-14 21:52:14 -080093// Reserves space for the Bionic TCB and the executable's TLS segment. Returns
94// the offset of the executable's TLS segment.
95size_t StaticTlsLayout::reserve_exe_segment_and_tcb(const TlsSegment* exe_segment,
96 const char* progname __attribute__((unused))) {
97 // Special case: if the executable has no TLS segment, then just allocate a
98 // TCB and skip the minimum alignment check on ARM.
99 if (exe_segment == nullptr) {
100 offset_bionic_tcb_ = reserve_type<bionic_tcb>();
101 return 0;
102 }
103
104#if defined(__arm__) || defined(__aarch64__)
105
106 // First reserve enough space for the TCB before the executable segment.
107 reserve(sizeof(bionic_tcb), 1);
108
109 // Then reserve the segment itself.
110 const size_t result = reserve(exe_segment->size, exe_segment->alignment);
111
112 // The variant 1 ABI that ARM linkers follow specifies a 2-word TCB between
113 // the thread pointer and the start of the executable's TLS segment, but both
114 // the thread pointer and the TLS segment are aligned appropriately for the
115 // TLS segment. Calculate the distance between the thread pointer and the
116 // EXE's segment.
117 const size_t exe_tpoff = __BIONIC_ALIGN(sizeof(void*) * 2, exe_segment->alignment);
118
119 const size_t min_bionic_alignment = BIONIC_ROUND_UP_POWER_OF_2(MAX_TLS_SLOT) * sizeof(void*);
120 if (exe_tpoff < min_bionic_alignment) {
121 async_safe_fatal("error: \"%s\": executable's TLS segment is underaligned: "
122 "alignment is %zu, needs to be at least %zu for %s Bionic",
123 progname, exe_segment->alignment, min_bionic_alignment,
124 (sizeof(void*) == 4 ? "ARM" : "ARM64"));
125 }
126
127 offset_bionic_tcb_ = result - exe_tpoff - (-MIN_TLS_SLOT * sizeof(void*));
128 return result;
129
130#elif defined(__i386__) || defined(__x86_64__)
131
132 // x86 uses variant 2 TLS layout. The executable's segment is located just
133 // before the TCB.
134 static_assert(MIN_TLS_SLOT == 0, "First slot of bionic_tcb must be slot #0 on x86");
135 const size_t exe_size = round_up_with_overflow_check(exe_segment->size, exe_segment->alignment);
136 reserve(exe_size, 1);
137 const size_t max_align = MAX(alignof(bionic_tcb), exe_segment->alignment);
138 offset_bionic_tcb_ = reserve(sizeof(bionic_tcb), max_align);
139 return offset_bionic_tcb_ - exe_size;
140
Elliott Hughes43462702022-10-10 19:21:44 +0000141#elif defined(__riscv)
142
143 // First reserve enough space for the TCB before the executable segment.
144 offset_bionic_tcb_ = reserve(sizeof(bionic_tcb), 1);
145
146 // Then reserve the segment itself.
147 const size_t exe_size = round_up_with_overflow_check(exe_segment->size, exe_segment->alignment);
148 return reserve(exe_size, 1);
149
Ryan Prichard977e47d2019-01-14 21:52:14 -0800150#else
151#error "Unrecognized architecture"
152#endif
Ryan Prichard45d13492019-01-03 02:51:30 -0800153}
154
155void StaticTlsLayout::reserve_bionic_tls() {
156 offset_bionic_tls_ = reserve_type<bionic_tls>();
157}
158
159void StaticTlsLayout::finish_layout() {
160 // Round the offset up to the alignment.
161 offset_ = round_up_with_overflow_check(offset_, alignment_);
Ryan Prichard977e47d2019-01-14 21:52:14 -0800162
163 if (overflowed_) {
164 async_safe_fatal("error: TLS segments in static TLS overflowed");
165 }
Ryan Prichard45d13492019-01-03 02:51:30 -0800166}
167
168// The size is not required to be a multiple of the alignment. The alignment
169// must be a positive power-of-two.
170size_t StaticTlsLayout::reserve(size_t size, size_t alignment) {
171 offset_ = round_up_with_overflow_check(offset_, alignment);
172 const size_t result = offset_;
173 if (__builtin_add_overflow(offset_, size, &offset_)) overflowed_ = true;
174 alignment_ = MAX(alignment_, alignment);
175 return result;
176}
177
178size_t StaticTlsLayout::round_up_with_overflow_check(size_t value, size_t alignment) {
179 const size_t old_value = value;
180 value = __BIONIC_ALIGN(value, alignment);
181 if (value < old_value) overflowed_ = true;
182 return value;
183}
Ryan Prichard361c1b42019-01-15 13:45:27 -0800184
185// Copy each TLS module's initialization image into a newly-allocated block of
186// static TLS memory. To reduce dirty pages, this function only writes to pages
187// within the static TLS that need initialization. The memory should already be
188// zero-initialized on entry.
189void __init_static_tls(void* static_tls) {
190 // The part of the table we care about (i.e. static TLS modules) never changes
191 // after startup, but we still need the mutex because the table could grow,
192 // moving the initial part. If this locking is too slow, we can duplicate the
193 // static part of the table.
194 TlsModules& modules = __libc_shared_globals()->tls_modules;
Ryan Prichard16455b52019-01-18 01:00:59 -0800195 ScopedSignalBlocker ssb;
Ryan Prichard361c1b42019-01-15 13:45:27 -0800196 ScopedReadLock locker(&modules.rwlock);
197
198 for (size_t i = 0; i < modules.module_count; ++i) {
199 TlsModule& module = modules.module_table[i];
200 if (module.static_offset == SIZE_MAX) {
201 // All of the static modules come before all of the dynamic modules, so
202 // once we see the first dynamic module, we're done.
203 break;
204 }
205 if (module.segment.init_size == 0) {
206 // Skip the memcpy call for TLS segments with no initializer, which is
207 // common.
208 continue;
209 }
210 memcpy(static_cast<char*>(static_tls) + module.static_offset,
211 module.segment.init_ptr,
212 module.segment.init_size);
213 }
214}
Ryan Prichard16455b52019-01-18 01:00:59 -0800215
216static inline size_t dtv_size_in_bytes(size_t module_count) {
217 return sizeof(TlsDtv) + module_count * sizeof(void*);
218}
219
220// Calculates the number of module slots to allocate in a new DTV. For small
221// objects (up to 1KiB), the TLS allocator allocates memory in power-of-2 sizes,
222// so for better space usage, ensure that the DTV size (header + slots) is a
223// power of 2.
224//
225// The lock on TlsModules must be held.
226static size_t calculate_new_dtv_count() {
227 size_t loaded_cnt = __libc_shared_globals()->tls_modules.module_count;
228 size_t bytes = dtv_size_in_bytes(MAX(1, loaded_cnt));
229 if (!powerof2(bytes)) {
230 bytes = BIONIC_ROUND_UP_POWER_OF_2(bytes);
231 }
232 return (bytes - sizeof(TlsDtv)) / sizeof(void*);
233}
234
235// This function must be called with signals blocked and a write lock on
236// TlsModules held.
237static void update_tls_dtv(bionic_tcb* tcb) {
238 const TlsModules& modules = __libc_shared_globals()->tls_modules;
239 BionicAllocator& allocator = __libc_shared_globals()->tls_allocator;
240
241 // Use the generation counter from the shared globals instead of the local
242 // copy, which won't be initialized yet if __tls_get_addr is called before
243 // libc.so's constructor.
244 if (__get_tcb_dtv(tcb)->generation == atomic_load(&modules.generation)) {
245 return;
246 }
247
248 const size_t old_cnt = __get_tcb_dtv(tcb)->count;
249
250 // If the DTV isn't large enough, allocate a larger one. Because a signal
251 // handler could interrupt the fast path of __tls_get_addr, we don't free the
252 // old DTV. Instead, we add the old DTV to a list, then free all of a thread's
253 // DTVs at thread-exit. Each time the DTV is reallocated, its size at least
254 // doubles.
255 if (modules.module_count > old_cnt) {
256 size_t new_cnt = calculate_new_dtv_count();
257 TlsDtv* const old_dtv = __get_tcb_dtv(tcb);
258 TlsDtv* const new_dtv = static_cast<TlsDtv*>(allocator.alloc(dtv_size_in_bytes(new_cnt)));
259 memcpy(new_dtv, old_dtv, dtv_size_in_bytes(old_cnt));
260 new_dtv->count = new_cnt;
261 new_dtv->next = old_dtv;
262 __set_tcb_dtv(tcb, new_dtv);
263 }
264
265 TlsDtv* const dtv = __get_tcb_dtv(tcb);
266
267 const StaticTlsLayout& layout = __libc_shared_globals()->static_tls_layout;
268 char* static_tls = reinterpret_cast<char*>(tcb) - layout.offset_bionic_tcb();
269
270 // Initialize static TLS modules and free unloaded modules.
271 for (size_t i = 0; i < dtv->count; ++i) {
272 if (i < modules.module_count) {
273 const TlsModule& mod = modules.module_table[i];
274 if (mod.static_offset != SIZE_MAX) {
275 dtv->modules[i] = static_tls + mod.static_offset;
276 continue;
277 }
278 if (mod.first_generation != kTlsGenerationNone &&
279 mod.first_generation <= dtv->generation) {
280 continue;
281 }
282 }
Vy Nguyend5007512020-07-14 17:37:04 -0400283 if (modules.on_destruction_cb != nullptr) {
284 void* dtls_begin = dtv->modules[i];
285 void* dtls_end =
286 static_cast<void*>(static_cast<char*>(dtls_begin) + allocator.get_chunk_size(dtls_begin));
287 modules.on_destruction_cb(dtls_begin, dtls_end);
288 }
Ryan Prichard16455b52019-01-18 01:00:59 -0800289 allocator.free(dtv->modules[i]);
290 dtv->modules[i] = nullptr;
291 }
292
293 dtv->generation = atomic_load(&modules.generation);
294}
295
296__attribute__((noinline)) static void* tls_get_addr_slow_path(const TlsIndex* ti) {
297 TlsModules& modules = __libc_shared_globals()->tls_modules;
298 bionic_tcb* tcb = __get_bionic_tcb();
299
300 // Block signals and lock TlsModules. We may need the allocator, so take
301 // a write lock.
302 ScopedSignalBlocker ssb;
303 ScopedWriteLock locker(&modules.rwlock);
304
305 update_tls_dtv(tcb);
306
307 TlsDtv* dtv = __get_tcb_dtv(tcb);
308 const size_t module_idx = __tls_module_id_to_idx(ti->module_id);
309 void* mod_ptr = dtv->modules[module_idx];
310 if (mod_ptr == nullptr) {
311 const TlsSegment& segment = modules.module_table[module_idx].segment;
312 mod_ptr = __libc_shared_globals()->tls_allocator.memalign(segment.alignment, segment.size);
313 if (segment.init_size > 0) {
314 memcpy(mod_ptr, segment.init_ptr, segment.init_size);
315 }
316 dtv->modules[module_idx] = mod_ptr;
Vy Nguyend5007512020-07-14 17:37:04 -0400317
318 // Reports the allocation to the listener, if any.
319 if (modules.on_creation_cb != nullptr) {
320 modules.on_creation_cb(mod_ptr,
321 static_cast<void*>(static_cast<char*>(mod_ptr) + segment.size));
322 }
Ryan Prichard16455b52019-01-18 01:00:59 -0800323 }
324
Elliott Hughes43462702022-10-10 19:21:44 +0000325 return static_cast<char*>(mod_ptr) + ti->offset + TLS_DTV_OFFSET;
Ryan Prichard16455b52019-01-18 01:00:59 -0800326}
327
328// Returns the address of a thread's TLS memory given a module ID and an offset
329// into that module's TLS segment. This function is called on every access to a
330// dynamic TLS variable on targets that don't use TLSDESC. arm64 uses TLSDESC,
331// so it only calls this function on a thread's first access to a module's TLS
332// segment.
333//
334// On most targets, this accessor function is __tls_get_addr and
335// TLS_GET_ADDR_CCONV is unset. 32-bit x86 uses ___tls_get_addr instead and a
336// regparm() calling convention.
337extern "C" void* TLS_GET_ADDR(const TlsIndex* ti) TLS_GET_ADDR_CCONV {
338 TlsDtv* dtv = __get_tcb_dtv(__get_bionic_tcb());
339
340 // TODO: See if we can use a relaxed memory ordering here instead.
341 size_t generation = atomic_load(&__libc_tls_generation_copy);
342 if (__predict_true(generation == dtv->generation)) {
343 void* mod_ptr = dtv->modules[__tls_module_id_to_idx(ti->module_id)];
344 if (__predict_true(mod_ptr != nullptr)) {
Elliott Hughes43462702022-10-10 19:21:44 +0000345 return static_cast<char*>(mod_ptr) + ti->offset + TLS_DTV_OFFSET;
Ryan Prichard16455b52019-01-18 01:00:59 -0800346 }
347 }
348
349 return tls_get_addr_slow_path(ti);
350}
351
352// This function frees:
353// - TLS modules referenced by the current DTV.
354// - The list of DTV objects associated with the current thread.
355//
356// The caller must have already blocked signals.
357void __free_dynamic_tls(bionic_tcb* tcb) {
358 TlsModules& modules = __libc_shared_globals()->tls_modules;
359 BionicAllocator& allocator = __libc_shared_globals()->tls_allocator;
360
361 // If we didn't allocate any dynamic memory, skip out early without taking
362 // the lock.
363 TlsDtv* dtv = __get_tcb_dtv(tcb);
364 if (dtv->generation == kTlsGenerationNone) {
365 return;
366 }
367
368 // We need the write lock to use the allocator.
369 ScopedWriteLock locker(&modules.rwlock);
370
371 // First free everything in the current DTV.
372 for (size_t i = 0; i < dtv->count; ++i) {
373 if (i < modules.module_count && modules.module_table[i].static_offset != SIZE_MAX) {
374 // This module's TLS memory is allocated statically, so don't free it here.
375 continue;
376 }
Vy Nguyend5007512020-07-14 17:37:04 -0400377
378 if (modules.on_destruction_cb != nullptr) {
379 void* dtls_begin = dtv->modules[i];
380 void* dtls_end =
381 static_cast<void*>(static_cast<char*>(dtls_begin) + allocator.get_chunk_size(dtls_begin));
382 modules.on_destruction_cb(dtls_begin, dtls_end);
383 }
384
Ryan Prichard16455b52019-01-18 01:00:59 -0800385 allocator.free(dtv->modules[i]);
386 }
387
388 // Now free the thread's list of DTVs.
389 while (dtv->generation != kTlsGenerationNone) {
390 TlsDtv* next = dtv->next;
391 allocator.free(dtv);
392 dtv = next;
393 }
394
395 // Clear the DTV slot. The DTV must not be used again with this thread.
396 tcb->tls_slot(TLS_SLOT_DTV) = nullptr;
397}
Vy Nguyend5007512020-07-14 17:37:04 -0400398
399// Invokes all the registered thread_exit callbacks, if any.
400void __notify_thread_exit_callbacks() {
401 TlsModules& modules = __libc_shared_globals()->tls_modules;
402 if (modules.first_thread_exit_callback == nullptr) {
403 // If there is no first_thread_exit_callback, there shouldn't be a tail.
404 CHECK(modules.thread_exit_callback_tail_node == nullptr);
405 return;
406 }
407
408 // Callbacks are supposed to be invoked in the reverse order
409 // in which they were registered.
410 CallbackHolder* node = modules.thread_exit_callback_tail_node;
411 while (node != nullptr) {
412 node->cb();
413 node = node->prev;
414 }
415 modules.first_thread_exit_callback();
416}