Use ifuncs in the linker
Using ifuncs allows the linker to select faster versions of libc functions
like strcmp, making linking faster.
The linker continues to first initialize TLS, then call the ifunc
resolvers. There are small amounts of code in Bionic that need to avoid
calling functions selected using ifuncs (generally string.h APIs). I've
tried to compile those pieces with -ffreestanding. Maybe it's unnecessary,
but maybe it could help avoid compiler-inserted memset calls, and maybe
it will be useful later on.
The ifuncs are called in a special early pass using special
__rel[a]_iplt_start / __rel[a]_iplt_end symbols. The linker will encounter
the ifuncs again as R_*_IRELATIVE dynamic relocations, so they're skipped
on the second pass.
Break linker_main.cpp into its own liblinker_main library so it can be
compiled with -ffreestanding.
On walleye, this change fixes a recent 2.3% linker64 start-up time
regression (156.6ms -> 160.2ms), but it also helps the 32-bit time by
about 1.9% on the same benchmark. I'm measuring the run-time using a
synthetic benchmark based on loading libandroid_servers.so.
Test: bionic unit tests, manual benchmarking
Bug: none
Merged-In: Ieb9446c2df13a66fc0d377596756becad0af6995
Change-Id: Ieb9446c2df13a66fc0d377596756becad0af6995
(cherry picked from commit 772bcbb0c2f7a87b18021849528240ef0c617d94)
diff --git a/libc/bionic/__libc_init_main_thread.cpp b/libc/bionic/__libc_init_main_thread.cpp
index 6e1b0de..94cf1f8 100644
--- a/libc/bionic/__libc_init_main_thread.cpp
+++ b/libc/bionic/__libc_init_main_thread.cpp
@@ -57,7 +57,9 @@
//
// This is in a file by itself because it needs to be built with
// -fno-stack-protector because it's responsible for setting up the main
-// thread's TLS (which stack protector relies on).
+// thread's TLS (which stack protector relies on). It's also built with
+// -ffreestanding because the early init function runs in the linker before
+// ifunc resolvers have run.
// Do enough setup to:
// - Let the dynamic linker invoke system calls (and access errno)
@@ -65,7 +67,8 @@
// - Allow the stack protector to work (with a zero cookie)
// Avoid doing much more because, when this code is called within the dynamic
// linker, the linker binary hasn't been relocated yet, so certain kinds of code
-// are hazardous, such as accessing non-hidden global variables.
+// are hazardous, such as accessing non-hidden global variables or calling
+// string.h functions.
__BIONIC_WEAK_FOR_NATIVE_BRIDGE
extern "C" void __libc_init_main_thread_early(const KernelArgumentBlock& args,
bionic_tcb* temp_tcb) {
@@ -80,6 +83,23 @@
main_thread.set_cached_pid(main_thread.tid);
}
+// This code is used both by each new pthread and the code that initializes the main thread.
+void __init_tcb(bionic_tcb* tcb, pthread_internal_t* thread) {
+#ifdef TLS_SLOT_SELF
+ // On x86, slot 0 must point to itself so code can read the thread pointer by
+ // loading %fs:0 or %gs:0.
+ tcb->tls_slot(TLS_SLOT_SELF) = &tcb->tls_slot(TLS_SLOT_SELF);
+#endif
+ tcb->tls_slot(TLS_SLOT_THREAD_ID) = thread;
+}
+
+void __init_tcb_dtv(bionic_tcb* tcb) {
+ // Initialize the DTV slot to a statically-allocated empty DTV. The first
+ // access to a dynamic TLS variable allocates a new DTV.
+ static const TlsDtv zero_dtv = {};
+ __set_tcb_dtv(tcb, const_cast<TlsDtv*>(&zero_dtv));
+}
+
// Finish initializing the main thread.
__BIONIC_WEAK_FOR_NATIVE_BRIDGE
extern "C" void __libc_init_main_thread_late() {
diff --git a/libc/bionic/bionic_call_ifunc_resolver.cpp b/libc/bionic/bionic_call_ifunc_resolver.cpp
index 8522835..437de78 100644
--- a/libc/bionic/bionic_call_ifunc_resolver.cpp
+++ b/libc/bionic/bionic_call_ifunc_resolver.cpp
@@ -30,14 +30,32 @@
#include <sys/auxv.h>
#include <sys/ifunc.h>
+#include "private/bionic_auxv.h"
+
+// This code is called in the linker before it has been relocated, so minimize calls into other
+// parts of Bionic. In particular, we won't ever have two ifunc resolvers called concurrently, so
+// initializing the ifunc resolver argument doesn't need to be thread-safe.
+
ElfW(Addr) __bionic_call_ifunc_resolver(ElfW(Addr) resolver_addr) {
#if defined(__aarch64__)
typedef ElfW(Addr) (*ifunc_resolver_t)(uint64_t, __ifunc_arg_t*);
- static __ifunc_arg_t arg = { sizeof(__ifunc_arg_t), getauxval(AT_HWCAP), getauxval(AT_HWCAP2) };
+ static __ifunc_arg_t arg;
+ static bool initialized = false;
+ if (!initialized) {
+ initialized = true;
+ arg._size = sizeof(__ifunc_arg_t);
+ arg._hwcap = getauxval(AT_HWCAP);
+ arg._hwcap2 = getauxval(AT_HWCAP2);
+ }
return reinterpret_cast<ifunc_resolver_t>(resolver_addr)(arg._hwcap | _IFUNC_ARG_HWCAP, &arg);
#elif defined(__arm__)
typedef ElfW(Addr) (*ifunc_resolver_t)(unsigned long);
- static unsigned long hwcap = getauxval(AT_HWCAP);
+ static unsigned long hwcap;
+ static bool initialized = false;
+ if (!initialized) {
+ initialized = true;
+ hwcap = getauxval(AT_HWCAP);
+ }
return reinterpret_cast<ifunc_resolver_t>(resolver_addr)(hwcap);
#else
typedef ElfW(Addr) (*ifunc_resolver_t)(void);
diff --git a/libc/bionic/getauxval.cpp b/libc/bionic/getauxval.cpp
index c8f867b..f865f97 100644
--- a/libc/bionic/getauxval.cpp
+++ b/libc/bionic/getauxval.cpp
@@ -36,7 +36,6 @@
// This function needs to be safe to call before TLS is set up, so it can't
// access errno or the stack protector.
-__attribute__((no_stack_protector))
__LIBC_HIDDEN__ unsigned long __bionic_getauxval(unsigned long type, bool& exists) {
for (ElfW(auxv_t)* v = __libc_shared_globals()->auxv; v->a_type != AT_NULL; ++v) {
if (v->a_type == type) {
diff --git a/libc/bionic/libc_init_static.cpp b/libc/bionic/libc_init_static.cpp
index 0b74023..28c0b0c 100644
--- a/libc/bionic/libc_init_static.cpp
+++ b/libc/bionic/libc_init_static.cpp
@@ -231,6 +231,9 @@
g_target_sdk_version = target;
}
+// This function is called in the dynamic linker before ifunc resolvers have run, so this file is
+// compiled with -ffreestanding to avoid implicit string.h function calls. (It shouldn't strictly
+// be necessary, though.)
__LIBC_HIDDEN__ libc_shared_globals* __libc_shared_globals() {
static libc_shared_globals globals;
return &globals;
diff --git a/libc/bionic/pthread_create.cpp b/libc/bionic/pthread_create.cpp
index 1dc1066..03af2d9 100644
--- a/libc/bionic/pthread_create.cpp
+++ b/libc/bionic/pthread_create.cpp
@@ -54,31 +54,12 @@
void __init_user_desc(struct user_desc*, bool, void*);
#endif
-// This code is used both by each new pthread and the code that initializes the main thread.
-__attribute__((no_stack_protector))
-void __init_tcb(bionic_tcb* tcb, pthread_internal_t* thread) {
-#ifdef TLS_SLOT_SELF
- // On x86, slot 0 must point to itself so code can read the thread pointer by
- // loading %fs:0 or %gs:0.
- tcb->tls_slot(TLS_SLOT_SELF) = &tcb->tls_slot(TLS_SLOT_SELF);
-#endif
- tcb->tls_slot(TLS_SLOT_THREAD_ID) = thread;
-}
-
__attribute__((no_stack_protector))
void __init_tcb_stack_guard(bionic_tcb* tcb) {
// GCC looks in the TLS for the stack guard on x86, so copy it there from our global.
tcb->tls_slot(TLS_SLOT_STACK_GUARD) = reinterpret_cast<void*>(__stack_chk_guard);
}
-__attribute__((no_stack_protector))
-void __init_tcb_dtv(bionic_tcb* tcb) {
- // Initialize the DTV slot to a statically-allocated empty DTV. The first
- // access to a dynamic TLS variable allocates a new DTV.
- static const TlsDtv zero_dtv = {};
- __set_tcb_dtv(tcb, const_cast<TlsDtv*>(&zero_dtv));
-}
-
void __init_bionic_tls_ptrs(bionic_tcb* tcb, bionic_tls* tls) {
tcb->thread()->bionic_tls = tls;
tcb->tls_slot(TLS_SLOT_BIONIC_TLS) = tls;