blob: f6d7fa8421a6af3ccf1ddc886c08a3e409678b39 [file] [log] [blame]
Elliott Hughes4b4a8822013-02-12 17:15:59 -08001/*
2 * Copyright (C) 2008 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in
12 * the documentation and/or other materials provided with the
13 * distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29#include <pthread.h>
30
31#include <errno.h>
Elliott Hughes05fc1d72015-01-28 18:02:33 -080032#include <string.h>
Peter Collingbournedcbacd62021-04-22 12:13:40 -070033#include <sys/auxv.h>
Elliott Hughes4b4a8822013-02-12 17:15:59 -080034#include <sys/mman.h>
Elliott Hughes99d54652018-08-22 10:36:23 -070035#include <sys/prctl.h>
Peter Collingbourneda772e22018-09-06 22:20:44 -070036#include <sys/random.h>
Elliott Hughes7086ad62014-06-19 16:39:01 -070037#include <unistd.h>
Elliott Hughes4b4a8822013-02-12 17:15:59 -080038
39#include "pthread_internal.h"
40
Christopher Ferris7a3681e2017-04-24 17:48:32 -070041#include <async_safe/log.h>
42
Evgenii Stepanovf9fa32a2022-05-12 15:54:38 -070043#include "platform/bionic/macros.h"
44#include "platform/bionic/mte.h"
Peter Collingbournebb11ee62022-05-02 12:26:16 -070045#include "platform/bionic/page.h"
Evgenii Stepanovf9fa32a2022-05-12 15:54:38 -070046#include "private/ErrnoRestorer.h"
Peter Collingbourne5d3aa862020-09-11 15:05:17 -070047#include "private/ScopedRWLock.h"
Peter Collingbourne734beec2018-11-14 12:41:41 -080048#include "private/bionic_constants.h"
dimitryfa432522017-10-25 13:07:45 +020049#include "private/bionic_defs.h"
Ryan Prichard45d13492019-01-03 02:51:30 -080050#include "private/bionic_globals.h"
Elliott Hughes4b4a8822013-02-12 17:15:59 -080051#include "private/bionic_ssp.h"
Philip Cuadra77d0f902019-01-25 10:39:25 -080052#include "private/bionic_systrace.h"
Elliott Hughes4b4a8822013-02-12 17:15:59 -080053#include "private/bionic_tls.h"
Elliott Hughes4b4a8822013-02-12 17:15:59 -080054
Elliott Hughes0d236aa2014-05-09 14:42:16 -070055// x86 uses segment descriptors rather than a direct pointer to TLS.
Josh Gaocb728e62016-09-15 13:56:37 -070056#if defined(__i386__)
Elliott Hughes0d236aa2014-05-09 14:42:16 -070057#include <asm/ldt.h>
Elliott Hughes01b85d52016-02-09 22:44:16 -080058void __init_user_desc(struct user_desc*, bool, void*);
Elliott Hughes0d236aa2014-05-09 14:42:16 -070059#endif
60
Ryan Prichard9cfca862018-11-22 02:44:09 -080061__attribute__((no_stack_protector))
Ryan Prichard45d13492019-01-03 02:51:30 -080062void __init_tcb_stack_guard(bionic_tcb* tcb) {
Ryan Prichard9cfca862018-11-22 02:44:09 -080063 // GCC looks in the TLS for the stack guard on x86, so copy it there from our global.
Ryan Prichard45d13492019-01-03 02:51:30 -080064 tcb->tls_slot(TLS_SLOT_STACK_GUARD) = reinterpret_cast<void*>(__stack_chk_guard);
Ryan Prichard9cfca862018-11-22 02:44:09 -080065}
66
Ryan Prichard45d13492019-01-03 02:51:30 -080067void __init_bionic_tls_ptrs(bionic_tcb* tcb, bionic_tls* tls) {
Florian Mayerc0aa70a2024-06-24 15:49:20 -070068 tcb->thread()->bionic_tcb = tcb;
Ryan Prichard45d13492019-01-03 02:51:30 -080069 tcb->thread()->bionic_tls = tls;
70 tcb->tls_slot(TLS_SLOT_BIONIC_TLS) = tls;
71}
72
Vilas Bhat836370e2025-04-25 01:16:36 +000073void __init_libgen_buffers_ptr(bionic_tls* tls, libgen_buffers* lb) {
74 tls->libgen_buffers_ptr = lb;
75}
76
77static inline size_t get_temp_bionic_tls_size() {
Elliott Hughes193b0bc2025-05-14 06:35:50 -070078 return __builtin_align_up(sizeof(bionic_tls) + sizeof(libgen_buffers), page_size());
Vilas Bhat836370e2025-04-25 01:16:36 +000079}
80
Ryan Prichard45d13492019-01-03 02:51:30 -080081// Allocate a temporary bionic_tls that the dynamic linker's main thread can
82// use while it's loading the initial set of ELF modules.
83bionic_tls* __allocate_temp_bionic_tls() {
Vilas Bhat836370e2025-04-25 01:16:36 +000084 void* allocation = mmap(nullptr, get_temp_bionic_tls_size(), PROT_READ | PROT_WRITE,
85 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
Josh Gao5e2285d2017-02-22 12:19:05 -080086 if (allocation == MAP_FAILED) {
Elliott Hughes2557f732023-07-12 21:15:23 +000087 async_safe_fatal("failed to allocate bionic_tls: %m");
Josh Gao5e2285d2017-02-22 12:19:05 -080088 }
Vilas Bhat836370e2025-04-25 01:16:36 +000089 bionic_tls* tls = static_cast<bionic_tls*>(allocation);
90 tls->libgen_buffers_ptr =
91 reinterpret_cast<libgen_buffers*>(static_cast<char*>(allocation) + sizeof(bionic_tls));
92 return tls;
Ryan Prichard45d13492019-01-03 02:51:30 -080093}
Elliott Hughes53dc9dd2017-09-19 14:02:50 -070094
Ryan Prichard45d13492019-01-03 02:51:30 -080095void __free_temp_bionic_tls(bionic_tls* tls) {
Vilas Bhat836370e2025-04-25 01:16:36 +000096 munmap(tls, get_temp_bionic_tls_size());
Elliott Hughes70b24b12013-11-15 11:51:07 -080097}
Elliott Hughes4b4a8822013-02-12 17:15:59 -080098
Peter Collingbourneda772e22018-09-06 22:20:44 -070099static void __init_alternate_signal_stack(pthread_internal_t* thread) {
Elliott Hughes84114c82013-07-17 13:33:19 -0700100 // Create and set an alternate signal stack.
Evgenii Stepanovf9fa32a2022-05-12 15:54:38 -0700101 int prot = PROT_READ | PROT_WRITE;
102#ifdef __aarch64__
Florian Mayer73750dc2024-03-08 14:10:48 -0800103 if (atomic_load(&__libc_memtag_stack)) {
Evgenii Stepanovf9fa32a2022-05-12 15:54:38 -0700104 prot |= PROT_MTE;
105 }
106#endif
107 void* stack_base = mmap(nullptr, SIGNAL_STACK_SIZE, prot, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
Yabin Cuief115002015-03-30 20:03:57 -0700108 if (stack_base != MAP_FAILED) {
Elliott Hughesd6c678c2017-06-27 17:01:57 -0700109 // Create a guard to catch stack overflows in signal handlers.
110 if (mprotect(stack_base, PTHREAD_GUARD_SIZE, PROT_NONE) == -1) {
Yabin Cuief115002015-03-30 20:03:57 -0700111 munmap(stack_base, SIGNAL_STACK_SIZE);
112 return;
113 }
114 stack_t ss;
Elliott Hughesd6c678c2017-06-27 17:01:57 -0700115 ss.ss_sp = reinterpret_cast<uint8_t*>(stack_base) + PTHREAD_GUARD_SIZE;
116 ss.ss_size = SIGNAL_STACK_SIZE - PTHREAD_GUARD_SIZE;
Elliott Hughes84114c82013-07-17 13:33:19 -0700117 ss.ss_flags = 0;
Yi Kong32bc0fc2018-08-02 17:31:13 -0700118 sigaltstack(&ss, nullptr);
Yabin Cuief115002015-03-30 20:03:57 -0700119 thread->alternate_signal_stack = stack_base;
Yabin Cui8cf1b302014-12-03 21:36:24 -0800120
121 // We can only use const static allocated string for mapped region name, as Android kernel
122 // uses the string pointer directly when dumping /proc/pid/maps.
Elliott Hughesa3125fd2015-03-31 02:42:39 +0000123 prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, ss.ss_sp, ss.ss_size, "thread signal stack");
Elliott Hughes84114c82013-07-17 13:33:19 -0700124 }
Elliott Hughes4b4a8822013-02-12 17:15:59 -0800125}
126
Peter Collingbourneda772e22018-09-06 22:20:44 -0700127static void __init_shadow_call_stack(pthread_internal_t* thread __unused) {
Elliott Hughes9a7155d2023-02-10 02:00:03 +0000128#if defined(__aarch64__) || defined(__riscv)
Elliott Hughes1b362912024-07-29 16:49:01 +0000129 // Allocate the shadow call stack and its guard region.
Peter Collingbourne734beec2018-11-14 12:41:41 -0800130 char* scs_guard_region = reinterpret_cast<char*>(
Elliott Hughes1b362912024-07-29 16:49:01 +0000131 mmap(nullptr, SCS_GUARD_REGION_SIZE, PROT_NONE, MAP_PRIVATE | MAP_ANON, -1, 0));
132 if (scs_guard_region == MAP_FAILED) {
133 async_safe_fatal("failed to allocate shadow stack: %m");
134 }
Peter Collingbourne734beec2018-11-14 12:41:41 -0800135 thread->shadow_call_stack_guard_region = scs_guard_region;
136
Elliott Hughes1b362912024-07-29 16:49:01 +0000137 // Align the address to SCS_SIZE so that we only need to store the lower log2(SCS_SIZE) bits
Elliott Hughes9a7155d2023-02-10 02:00:03 +0000138 // in jmp_buf. See the SCS commentary in pthread_internal.h for more detail.
Peter Collingbournef1ed31f2019-01-31 14:26:43 -0800139 char* scs_aligned_guard_region =
Elliott Hughesfed0ce92024-12-11 09:53:34 -0800140 reinterpret_cast<char*>(__builtin_align_up(reinterpret_cast<uintptr_t>(scs_guard_region), SCS_SIZE));
Peter Collingbournef1ed31f2019-01-31 14:26:43 -0800141
142 // We need to ensure that [scs_offset,scs_offset+SCS_SIZE) is in the guard region and that there
143 // is at least one unmapped page after the shadow call stack (to catch stack overflows). We can't
144 // use arc4random_uniform in init because /dev/urandom might not have been created yet.
145 size_t scs_offset =
146 (getpid() == 1) ? 0 : (arc4random_uniform(SCS_GUARD_REGION_SIZE / SCS_SIZE - 1) * SCS_SIZE);
147
Elliott Hughes7dd38962023-04-06 14:50:31 -0700148 // Make the stack read-write, and store its address in the register we're using as the shadow
149 // stack pointer. This is deliberately the only place where the address is stored.
Elliott Hughes9a7155d2023-02-10 02:00:03 +0000150 char* scs = scs_aligned_guard_region + scs_offset;
Kelvin Zhangfd93b602023-07-11 13:09:58 -0700151 if (mprotect(scs, SCS_SIZE, PROT_READ | PROT_WRITE) == -1) {
Elliott Hughes2557f732023-07-12 21:15:23 +0000152 async_safe_fatal("shadow stack read-write mprotect(%p, %d) failed: %m", scs, SCS_SIZE);
Kelvin Zhangfd93b602023-07-11 13:09:58 -0700153 }
Elliott Hughes9a7155d2023-02-10 02:00:03 +0000154#if defined(__aarch64__)
Peter Collingbourneda772e22018-09-06 22:20:44 -0700155 __asm__ __volatile__("mov x18, %0" ::"r"(scs));
Elliott Hughes9a7155d2023-02-10 02:00:03 +0000156#elif defined(__riscv)
Elliott Hughesc35a0dc2023-05-16 16:09:30 -0700157 __asm__ __volatile__("mv x3, %0" ::"r"(scs));
Elliott Hughes9a7155d2023-02-10 02:00:03 +0000158#endif
Peter Collingbourneda772e22018-09-06 22:20:44 -0700159#endif
160}
161
162void __init_additional_stacks(pthread_internal_t* thread) {
163 __init_alternate_signal_stack(thread);
164 __init_shadow_call_stack(thread);
165}
166
Yabin Cui673b15e2015-03-19 14:19:19 -0700167int __init_thread(pthread_internal_t* thread) {
Elliott Hughes8aecba72017-10-17 15:34:41 -0700168 thread->cleanup_stack = nullptr;
Elliott Hughes4b4a8822013-02-12 17:15:59 -0800169
Nick Desaulniers2e65afe2024-11-19 09:27:06 -0800170 ThreadJoinState state = THREAD_NOT_JOINED;
171 if (__predict_false((thread->attr.flags & PTHREAD_ATTR_FLAG_DETACHED) != 0)) {
172 state = THREAD_DETACHED;
Yabin Cui58cf31b2015-03-06 17:23:53 -0800173 }
Nick Desaulniers2e65afe2024-11-19 09:27:06 -0800174 atomic_store_explicit(&thread->join_state, state, memory_order_relaxed);
Yabin Cui58cf31b2015-03-06 17:23:53 -0800175
Elliott Hughes8aecba72017-10-17 15:34:41 -0700176 // Set the scheduling policy/priority of the thread if necessary.
177 bool need_set = true;
178 int policy;
179 sched_param param;
Elliott Hughes38f01e02017-10-27 15:28:54 -0700180 if ((thread->attr.flags & PTHREAD_ATTR_FLAG_INHERIT) != 0) {
Elliott Hughes8aecba72017-10-17 15:34:41 -0700181 // Unless the parent has SCHED_RESET_ON_FORK set, we've already inherited from the parent.
182 policy = sched_getscheduler(0);
183 need_set = ((policy & SCHED_RESET_ON_FORK) != 0);
184 if (need_set) {
185 if (policy == -1) {
186 async_safe_format_log(ANDROID_LOG_WARN, "libc",
Elliott Hughes2557f732023-07-12 21:15:23 +0000187 "pthread_create sched_getscheduler failed: %m");
Elliott Hughes8aecba72017-10-17 15:34:41 -0700188 return errno;
189 }
190 if (sched_getparam(0, &param) == -1) {
Elliott Hughes2557f732023-07-12 21:15:23 +0000191 async_safe_format_log(ANDROID_LOG_WARN, "libc", "pthread_create sched_getparam failed: %m");
Elliott Hughes8aecba72017-10-17 15:34:41 -0700192 return errno;
193 }
194 }
195 } else {
196 policy = thread->attr.sched_policy;
Elliott Hughes4b4a8822013-02-12 17:15:59 -0800197 param.sched_priority = thread->attr.sched_priority;
Elliott Hughes8aecba72017-10-17 15:34:41 -0700198 }
Elliott Hughes38f01e02017-10-27 15:28:54 -0700199 // Backwards compatibility: before P, Android didn't have pthread_attr_setinheritsched,
200 // and our behavior was neither of the POSIX behaviors.
201 if ((thread->attr.flags & (PTHREAD_ATTR_FLAG_INHERIT|PTHREAD_ATTR_FLAG_EXPLICIT)) == 0) {
202 need_set = (thread->attr.sched_policy != SCHED_NORMAL);
203 }
Elliott Hughes8aecba72017-10-17 15:34:41 -0700204 if (need_set) {
205 if (sched_setscheduler(thread->tid, policy, &param) == -1) {
206 async_safe_format_log(ANDROID_LOG_WARN, "libc",
Elliott Hughes2557f732023-07-12 21:15:23 +0000207 "pthread_create sched_setscheduler(%d, {%d}) call failed: %m", policy,
208 param.sched_priority);
Josh Gaob36efa42016-09-15 13:55:41 -0700209#if defined(__LP64__)
Elliott Hughes98624c32013-10-15 16:51:17 -0700210 // For backwards compatibility reasons, we only report failures on 64-bit devices.
Elliott Hughes8aecba72017-10-17 15:34:41 -0700211 return errno;
Elliott Hughes98624c32013-10-15 16:51:17 -0700212#endif
Elliott Hughes4b4a8822013-02-12 17:15:59 -0800213 }
214 }
215
Elliott Hughes8aecba72017-10-17 15:34:41 -0700216 return 0;
Elliott Hughes4b4a8822013-02-12 17:15:59 -0800217}
218
Ryan Prichard45d13492019-01-03 02:51:30 -0800219// Allocate a thread's primary mapping. This mapping includes static TLS and
220// optionally a stack. Static TLS includes ELF TLS segments and the bionic_tls
221// struct.
222//
Peter Collingbournebb11ee62022-05-02 12:26:16 -0700223// The stack_guard_size must be a multiple of the page_size().
Ryan Prichard45d13492019-01-03 02:51:30 -0800224ThreadMapping __allocate_thread_mapping(size_t stack_size, size_t stack_guard_size) {
225 const StaticTlsLayout& layout = __libc_shared_globals()->static_tls_layout;
226
Vilas Bhat836370e2025-04-25 01:16:36 +0000227 // Allocate in order: stack guard, stack, static TLS, libgen buffers, guard page.
Ryan Prichard45d13492019-01-03 02:51:30 -0800228 size_t mmap_size;
229 if (__builtin_add_overflow(stack_size, stack_guard_size, &mmap_size)) return {};
230 if (__builtin_add_overflow(mmap_size, layout.size(), &mmap_size)) return {};
231 if (__builtin_add_overflow(mmap_size, PTHREAD_GUARD_SIZE, &mmap_size)) return {};
Vilas Bhat836370e2025-04-25 01:16:36 +0000232 // Add space for the dedicated libgen buffers page(s).
Elliott Hughes193b0bc2025-05-14 06:35:50 -0700233 size_t libgen_buffers_padded_size = __builtin_align_up(sizeof(libgen_buffers), page_size());
Vilas Bhat836370e2025-04-25 01:16:36 +0000234 if (__builtin_add_overflow(mmap_size, libgen_buffers_padded_size, &mmap_size)) return {};
Ryan Prichard45d13492019-01-03 02:51:30 -0800235
236 // Align the result to a page size.
237 const size_t unaligned_size = mmap_size;
Elliott Hughes193b0bc2025-05-14 06:35:50 -0700238 mmap_size = __builtin_align_up(mmap_size, page_size());
Ryan Prichard45d13492019-01-03 02:51:30 -0800239 if (mmap_size < unaligned_size) return {};
240
241 // Create a new private anonymous map. Make the entire mapping PROT_NONE, then carve out a
242 // read+write area in the middle.
243 const int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
244 char* const space = static_cast<char*>(mmap(nullptr, mmap_size, PROT_NONE, flags, -1, 0));
Yabin Cuiba8dfc22015-01-06 09:31:00 -0800245 if (space == MAP_FAILED) {
Elliott Hughes2557f732023-07-12 21:15:23 +0000246 async_safe_format_log(ANDROID_LOG_WARN, "libc",
247 "pthread_create failed: couldn't allocate %zu-bytes mapped space: %m",
248 mmap_size);
Ryan Prichard45d13492019-01-03 02:51:30 -0800249 return {};
250 }
251 const size_t writable_size = mmap_size - stack_guard_size - PTHREAD_GUARD_SIZE;
Evgenii Stepanovf9fa32a2022-05-12 15:54:38 -0700252 int prot = PROT_READ | PROT_WRITE;
253 const char* prot_str = "R+W";
254#ifdef __aarch64__
Florian Mayer73750dc2024-03-08 14:10:48 -0800255 if (atomic_load(&__libc_memtag_stack)) {
Evgenii Stepanovf9fa32a2022-05-12 15:54:38 -0700256 prot |= PROT_MTE;
257 prot_str = "R+W+MTE";
258 }
259#endif
260 if (mprotect(space + stack_guard_size, writable_size, prot) != 0) {
261 async_safe_format_log(
262 ANDROID_LOG_WARN, "libc",
Elliott Hughes2557f732023-07-12 21:15:23 +0000263 "pthread_create failed: couldn't mprotect %s %zu-byte thread mapping region: %m", prot_str,
264 writable_size);
Ryan Prichard45d13492019-01-03 02:51:30 -0800265 munmap(space, mmap_size);
266 return {};
Elliott Hughes4b4a8822013-02-12 17:15:59 -0800267 }
268
Vilas Bhat836370e2025-04-25 01:16:36 +0000269 // Layout from the end of the mmap-ed region (before the top PTHREAD_GUARD_SIZE):
270 //
271 // [ PTHREAD_GUARD_SIZE ]
272 // [ libgen_buffers_padded_size (for dedicated page(s) for libgen buffers) ]
273 // [ layout.size() (for static TLS) ]
274 // [ stack_size ]
275 // [ stack_guard_size ]
276
Ryan Prichard45d13492019-01-03 02:51:30 -0800277 ThreadMapping result = {};
278 result.mmap_base = space;
279 result.mmap_size = mmap_size;
Ryan Prichard03cef382019-06-17 17:57:19 -0700280 result.mmap_base_unguarded = space + stack_guard_size;
281 result.mmap_size_unguarded = mmap_size - stack_guard_size - PTHREAD_GUARD_SIZE;
Vilas Bhat836370e2025-04-25 01:16:36 +0000282 result.libgen_buffers = space + mmap_size - PTHREAD_GUARD_SIZE - libgen_buffers_padded_size;
283 result.static_tls = result.libgen_buffers - layout.size();
Ryan Prichard45d13492019-01-03 02:51:30 -0800284 result.stack_base = space;
285 result.stack_top = result.static_tls;
286 return result;
Elliott Hughes4b4a8822013-02-12 17:15:59 -0800287}
288
Ryan Prichard45d13492019-01-03 02:51:30 -0800289static int __allocate_thread(pthread_attr_t* attr, bionic_tcb** tcbp, void** child_stack) {
290 ThreadMapping mapping;
291 char* stack_top;
292 bool stack_clean = false;
Yabin Cui6a7aaf42014-12-22 19:17:33 -0800293
Yi Kong32bc0fc2018-08-02 17:31:13 -0700294 if (attr->stack_base == nullptr) {
Yabin Cui8cf1b302014-12-03 21:36:24 -0800295 // The caller didn't provide a stack, so allocate one.
Ryan Prichard45d13492019-01-03 02:51:30 -0800296
Peter Collingbournebb11ee62022-05-02 12:26:16 -0700297 // Make sure the guard size is a multiple of page_size().
Ryan Prichard45d13492019-01-03 02:51:30 -0800298 const size_t unaligned_guard_size = attr->guard_size;
Elliott Hughes193b0bc2025-05-14 06:35:50 -0700299 attr->guard_size = __builtin_align_up(attr->guard_size, page_size());
Ryan Prichard45d13492019-01-03 02:51:30 -0800300 if (attr->guard_size < unaligned_guard_size) return EAGAIN;
301
302 mapping = __allocate_thread_mapping(attr->stack_size, attr->guard_size);
303 if (mapping.mmap_base == nullptr) return EAGAIN;
304
305 stack_top = mapping.stack_top;
306 attr->stack_base = mapping.stack_base;
307 stack_clean = true;
Yabin Cui8cf1b302014-12-03 21:36:24 -0800308 } else {
Ryan Prichard45d13492019-01-03 02:51:30 -0800309 mapping = __allocate_thread_mapping(0, PTHREAD_GUARD_SIZE);
310 if (mapping.mmap_base == nullptr) return EAGAIN;
311
312 stack_top = static_cast<char*>(attr->stack_base) + attr->stack_size;
Yabin Cui8cf1b302014-12-03 21:36:24 -0800313 }
314
Ryan Prichard45d13492019-01-03 02:51:30 -0800315 // Carve out space from the stack for the thread's pthread_internal_t. This
316 // memory isn't counted in pthread_attr_getstacksize.
Yabin Cuia2db50d2015-03-20 10:58:04 -0700317
318 // To safely access the pthread_internal_t and thread stack, we need to find a 16-byte aligned boundary.
Elliott Hughesfed0ce92024-12-11 09:53:34 -0800319 stack_top = __builtin_align_down(stack_top - sizeof(pthread_internal_t), 16);
Yabin Cuia2db50d2015-03-20 10:58:04 -0700320
Yabin Cui8cf1b302014-12-03 21:36:24 -0800321 pthread_internal_t* thread = reinterpret_cast<pthread_internal_t*>(stack_top);
Ryan Prichard45d13492019-01-03 02:51:30 -0800322 if (!stack_clean) {
Yabin Cui304348a2015-12-03 13:01:42 -0800323 // If thread was not allocated by mmap(), it may not have been cleared to zero.
324 // So assume the worst and zero it.
325 memset(thread, 0, sizeof(pthread_internal_t));
326 }
Yabin Cui8cf1b302014-12-03 21:36:24 -0800327
Ryan Prichard45d13492019-01-03 02:51:30 -0800328 // Locate static TLS structures within the mapped region.
329 const StaticTlsLayout& layout = __libc_shared_globals()->static_tls_layout;
330 auto tcb = reinterpret_cast<bionic_tcb*>(mapping.static_tls + layout.offset_bionic_tcb());
331 auto tls = reinterpret_cast<bionic_tls*>(mapping.static_tls + layout.offset_bionic_tls());
Vilas Bhat836370e2025-04-25 01:16:36 +0000332 auto lb = reinterpret_cast<libgen_buffers*>(mapping.libgen_buffers);
Ryan Prichard45d13492019-01-03 02:51:30 -0800333
Ryan Prichard361c1b42019-01-15 13:45:27 -0800334 // Initialize TLS memory.
335 __init_static_tls(mapping.static_tls);
Ryan Prichard45d13492019-01-03 02:51:30 -0800336 __init_tcb(tcb, thread);
Ryan Prichard16455b52019-01-18 01:00:59 -0800337 __init_tcb_dtv(tcb);
Ryan Prichard45d13492019-01-03 02:51:30 -0800338 __init_tcb_stack_guard(tcb);
339 __init_bionic_tls_ptrs(tcb, tls);
Vilas Bhat836370e2025-04-25 01:16:36 +0000340 __init_libgen_buffers_ptr(tls, lb);
Ryan Prichard45d13492019-01-03 02:51:30 -0800341
342 attr->stack_size = stack_top - static_cast<char*>(attr->stack_base);
Yabin Cui8cf1b302014-12-03 21:36:24 -0800343 thread->attr = *attr;
Ryan Prichard45d13492019-01-03 02:51:30 -0800344 thread->mmap_base = mapping.mmap_base;
345 thread->mmap_size = mapping.mmap_size;
Ryan Prichard03cef382019-06-17 17:57:19 -0700346 thread->mmap_base_unguarded = mapping.mmap_base_unguarded;
347 thread->mmap_size_unguarded = mapping.mmap_size_unguarded;
Peter Collingbourne5f45c182020-01-14 17:59:41 -0800348 thread->stack_top = reinterpret_cast<uintptr_t>(stack_top);
Florian Mayera3809542024-12-12 05:43:28 -0800349 thread->stack_bottom = reinterpret_cast<uintptr_t>(attr->stack_base);
Ryan Prichard9cfca862018-11-22 02:44:09 -0800350
Ryan Prichard45d13492019-01-03 02:51:30 -0800351 *tcbp = tcb;
Yabin Cui8cf1b302014-12-03 21:36:24 -0800352 *child_stack = stack_top;
353 return 0;
354}
355
Ryan Prichard03cef382019-06-17 17:57:19 -0700356void __set_stack_and_tls_vma_name(bool is_main_thread) {
357 // Name the thread's stack-and-tls area to help with debugging. This mapped area also includes
358 // static TLS data, which is typically a few pages (e.g. bionic_tls).
359 pthread_internal_t* thread = __get_thread();
360 const char* name;
361 if (is_main_thread) {
362 name = "stack_and_tls:main";
363 } else {
364 // The kernel doesn't copy the name string, but this variable will last at least as long as the
365 // mapped area. The mapped area's VMAs are unmapped with a single call to munmap.
366 auto& name_buffer = thread->vma_name_buffer;
367 static_assert(arraysize(name_buffer) >= arraysize("stack_and_tls:") + 11 + 1);
368 async_safe_format_buffer(name_buffer, arraysize(name_buffer), "stack_and_tls:%d", thread->tid);
369 name = name_buffer;
370 }
371 prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, thread->mmap_base_unguarded, thread->mmap_size_unguarded,
372 name);
373}
374
Evgenii Stepanovb92d1c32019-10-02 16:26:43 -0700375extern "C" int __rt_sigprocmask(int, const sigset64_t*, sigset64_t*, size_t);
376
Florian Mayereac5f732024-11-11 12:56:37 -0800377__attribute__((no_sanitize("hwaddress", "memtag")))
Elliott Hughesa7637a82024-07-29 18:18:30 +0000378#if defined(__aarch64__)
Peter Collingbourne26d83ba2021-06-04 14:35:13 -0700379// This function doesn't return, but it does appear in stack traces. Avoid using return PAC in this
380// function because we may end up resetting IA, which may confuse unwinders due to mismatching keys.
Elliott Hughes73091772022-03-10 18:01:04 +0000381__attribute__((target("branch-protection=bti")))
382#endif
Florian Mayereac5f732024-11-11 12:56:37 -0800383static int
384__pthread_start(void* arg) {
Elliott Hughese48b6852013-11-15 14:57:45 -0800385 pthread_internal_t* thread = reinterpret_cast<pthread_internal_t*>(arg);
Florian Mayereac5f732024-11-11 12:56:37 -0800386#if defined(__aarch64__)
387 if (thread->should_allocate_stack_mte_ringbuffer) {
388 thread->bionic_tcb->tls_slot(TLS_SLOT_STACK_MTE) = __allocate_stack_mte_ringbuffer(0, thread);
389 }
390#endif
Evgenii Stepanov13e8dcb2018-09-19 16:29:12 -0700391 __hwasan_thread_enter();
392
Elliott Hughese48b6852013-11-15 14:57:45 -0800393 // Wait for our creating thread to release us. This lets it have time to
394 // notify gdb about this thread before we start doing anything.
395 // This also provides the memory barrier needed to ensure that all memory
396 // accesses previously made by the creating thread are visible to us.
Yabin Cuid26e7802015-10-22 20:07:56 -0700397 thread->startup_handshake_lock.lock();
Elliott Hughese48b6852013-11-15 14:57:45 -0800398
Ryan Prichard03cef382019-06-17 17:57:19 -0700399 __set_stack_and_tls_vma_name(false);
Peter Collingbourneda772e22018-09-06 22:20:44 -0700400 __init_additional_stacks(thread);
Evgenii Stepanovb92d1c32019-10-02 16:26:43 -0700401 __rt_sigprocmask(SIG_SETMASK, &thread->start_mask, nullptr, sizeof(thread->start_mask));
Elliott Hughesa7637a82024-07-29 18:18:30 +0000402#if defined(__aarch64__)
Peter Collingbourne811d1802021-03-25 11:46:44 -0700403 // Chrome's sandbox prevents this prctl, so only reset IA if the target SDK level is high enough.
Peter Collingbournedcbacd62021-04-22 12:13:40 -0700404 // Furthermore, processes loaded from vendor partitions may have their own sandboxes that would
Elliott Hughesa7637a82024-07-29 18:18:30 +0000405 // reject the prctl. Because no devices launched with PAC enabled before API level 31, we can
406 // avoid issues on upgrading devices by checking for PAC support before issuing the prctl.
Peter Collingbournedcbacd62021-04-22 12:13:40 -0700407 static const bool pac_supported = getauxval(AT_HWCAP) & HWCAP_PACA;
Elliott Hughesa7637a82024-07-29 18:18:30 +0000408 if (pac_supported && android_get_application_target_sdk_version() >= 31) {
Peter Collingbourne811d1802021-03-25 11:46:44 -0700409 prctl(PR_PAC_RESET_KEYS, PR_PAC_APIAKEY, 0, 0, 0);
410 }
411#endif
Elliott Hughese48b6852013-11-15 14:57:45 -0800412
Elliott Hughese48b6852013-11-15 14:57:45 -0800413 void* result = thread->start_routine(thread->start_routine_arg);
414 pthread_exit(result);
415
416 return 0;
417}
418
Elliott Hughes68ae6ad2020-07-21 16:11:30 -0700419// A no-op start routine for pthread_create failures where we've created a thread but aren't
Elliott Hughescef3fae2013-11-19 16:52:24 -0800420// going to run user code on it. We swap out the user's start routine for this and take advantage
421// of the regular thread teardown to free up resources.
422static void* __do_nothing(void*) {
Yi Kong32bc0fc2018-08-02 17:31:13 -0700423 return nullptr;
Elliott Hughescef3fae2013-11-19 16:52:24 -0800424}
425
Peter Collingbourne5d3aa862020-09-11 15:05:17 -0700426pthread_rwlock_t g_thread_creation_lock = PTHREAD_RWLOCK_INITIALIZER;
dimitryfa432522017-10-25 13:07:45 +0200427
428__BIONIC_WEAK_FOR_NATIVE_BRIDGE
Elliott Hughes4b4a8822013-02-12 17:15:59 -0800429int pthread_create(pthread_t* thread_out, pthread_attr_t const* attr,
430 void* (*start_routine)(void*), void* arg) {
431 ErrnoRestorer errno_restorer;
432
Yabin Cui8cf1b302014-12-03 21:36:24 -0800433 pthread_attr_t thread_attr;
Philip Cuadra77d0f902019-01-25 10:39:25 -0800434 ScopedTrace trace("pthread_create");
Yi Kong32bc0fc2018-08-02 17:31:13 -0700435 if (attr == nullptr) {
Yabin Cui8cf1b302014-12-03 21:36:24 -0800436 pthread_attr_init(&thread_attr);
Elliott Hughes4b4a8822013-02-12 17:15:59 -0800437 } else {
Yabin Cui8cf1b302014-12-03 21:36:24 -0800438 thread_attr = *attr;
Yi Kong32bc0fc2018-08-02 17:31:13 -0700439 attr = nullptr; // Prevent misuse below.
Elliott Hughes4b4a8822013-02-12 17:15:59 -0800440 }
441
Ryan Prichard45d13492019-01-03 02:51:30 -0800442 bionic_tcb* tcb = nullptr;
Yi Kong32bc0fc2018-08-02 17:31:13 -0700443 void* child_stack = nullptr;
Ryan Prichard45d13492019-01-03 02:51:30 -0800444 int result = __allocate_thread(&thread_attr, &tcb, &child_stack);
Yabin Cui8cf1b302014-12-03 21:36:24 -0800445 if (result != 0) {
446 return result;
Elliott Hughes4b4a8822013-02-12 17:15:59 -0800447 }
448
Ryan Prichard45d13492019-01-03 02:51:30 -0800449 pthread_internal_t* thread = tcb->thread();
450
Yabin Cuid26e7802015-10-22 20:07:56 -0700451 // Create a lock for the thread to wait on once it starts so we can keep
Elliott Hughes4b4a8822013-02-12 17:15:59 -0800452 // it from doing anything until after we notify the debugger about it
453 //
454 // This also provides the memory barrier we need to ensure that all
455 // memory accesses previously performed by this thread are visible to
456 // the new thread.
Yabin Cuid26e7802015-10-22 20:07:56 -0700457 thread->startup_handshake_lock.init(false);
458 thread->startup_handshake_lock.lock();
Elliott Hughes4b4a8822013-02-12 17:15:59 -0800459
Elliott Hughes70b24b12013-11-15 11:51:07 -0800460 thread->start_routine = start_routine;
461 thread->start_routine_arg = arg;
Elliott Hughes4b4a8822013-02-12 17:15:59 -0800462
Elliott Hughes7086ad62014-06-19 16:39:01 -0700463 thread->set_cached_pid(getpid());
464
Elliott Hughes877ec6d2013-11-15 17:40:18 -0800465 int flags = CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM |
466 CLONE_SETTLS | CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID;
Ryan Prichard45d13492019-01-03 02:51:30 -0800467 void* tls = &tcb->tls_slot(0);
Elliott Hughes80906142013-11-26 13:57:21 -0800468#if defined(__i386__)
469 // On x86 (but not x86-64), CLONE_SETTLS takes a pointer to a struct user_desc rather than
Elliott Hughes0d236aa2014-05-09 14:42:16 -0700470 // a pointer to the TLS itself.
471 user_desc tls_descriptor;
472 __init_user_desc(&tls_descriptor, false, tls);
473 tls = &tls_descriptor;
Elliott Hughes80906142013-11-26 13:57:21 -0800474#endif
Evgenii Stepanovb92d1c32019-10-02 16:26:43 -0700475
Peter Collingbourne5d3aa862020-09-11 15:05:17 -0700476 ScopedReadLock locker(&g_thread_creation_lock);
477
Florian Mayerc0aa70a2024-06-24 15:49:20 -0700478// This has to be done under g_thread_creation_lock or g_thread_list_lock to avoid racing with
479// __pthread_internal_remap_stack_with_mte.
480#ifdef __aarch64__
Florian Mayereac5f732024-11-11 12:56:37 -0800481 thread->should_allocate_stack_mte_ringbuffer = __libc_memtag_stack_abi;
482#else
483 thread->should_allocate_stack_mte_ringbuffer = false;
Florian Mayerc0aa70a2024-06-24 15:49:20 -0700484#endif
485
Evgenii Stepanovb92d1c32019-10-02 16:26:43 -0700486 sigset64_t block_all_mask;
487 sigfillset64(&block_all_mask);
488 __rt_sigprocmask(SIG_SETMASK, &block_all_mask, &thread->start_mask, sizeof(thread->start_mask));
Elliott Hughes0d236aa2014-05-09 14:42:16 -0700489 int rc = clone(__pthread_start, child_stack, flags, thread, &(thread->tid), tls, &(thread->tid));
Evgenii Stepanovb92d1c32019-10-02 16:26:43 -0700490 __rt_sigprocmask(SIG_SETMASK, &thread->start_mask, nullptr, sizeof(thread->start_mask));
Elliott Hughes877ec6d2013-11-15 17:40:18 -0800491 if (rc == -1) {
Elliott Hughes4b4a8822013-02-12 17:15:59 -0800492 int clone_errno = errno;
Elliott Hughes877ec6d2013-11-15 17:40:18 -0800493 // We don't have to unlock the mutex at all because clone(2) failed so there's no child waiting to
494 // be unblocked, but we're about to unmap the memory the mutex is stored in, so this serves as a
495 // reminder that you can't rewrite this function to use a ScopedPthreadMutexLocker.
Yabin Cuid26e7802015-10-22 20:07:56 -0700496 thread->startup_handshake_lock.unlock();
Elliott Hughes7484c212017-02-02 02:41:38 +0000497 if (thread->mmap_size != 0) {
Ryan Prichard45d13492019-01-03 02:51:30 -0800498 munmap(thread->mmap_base, thread->mmap_size);
Elliott Hughes7484c212017-02-02 02:41:38 +0000499 }
Elliott Hughes2557f732023-07-12 21:15:23 +0000500 async_safe_format_log(ANDROID_LOG_WARN, "libc", "pthread_create failed: clone failed: %m");
Elliott Hughes4b4a8822013-02-12 17:15:59 -0800501 return clone_errno;
502 }
503
Yabin Cui673b15e2015-03-19 14:19:19 -0700504 int init_errno = __init_thread(thread);
Elliott Hughes4b4a8822013-02-12 17:15:59 -0800505 if (init_errno != 0) {
Elliott Hughescef3fae2013-11-19 16:52:24 -0800506 // Mark the thread detached and replace its start_routine with a no-op.
507 // Letting the thread run is the easiest way to clean up its resources.
Yabin Cui58cf31b2015-03-06 17:23:53 -0800508 atomic_store(&thread->join_state, THREAD_DETACHED);
Elliott Hughes7484c212017-02-02 02:41:38 +0000509 __pthread_internal_add(thread);
Elliott Hughescef3fae2013-11-19 16:52:24 -0800510 thread->start_routine = __do_nothing;
Yabin Cuid26e7802015-10-22 20:07:56 -0700511 thread->startup_handshake_lock.unlock();
Elliott Hughes4b4a8822013-02-12 17:15:59 -0800512 return init_errno;
513 }
514
Elliott Hughes877ec6d2013-11-15 17:40:18 -0800515 // Publish the pthread_t and unlock the mutex to let the new thread start running.
Elliott Hughes7484c212017-02-02 02:41:38 +0000516 *thread_out = __pthread_internal_add(thread);
Yabin Cuid26e7802015-10-22 20:07:56 -0700517 thread->startup_handshake_lock.unlock();
Elliott Hughes4b4a8822013-02-12 17:15:59 -0800518
519 return 0;
520}