Use ifuncs in the linker
Using ifuncs allows the linker to select faster versions of libc functions
like strcmp, making linking faster.
The linker continues to first initialize TLS, then call the ifunc
resolvers. There are small amounts of code in Bionic that need to avoid
calling functions selected using ifuncs (generally string.h APIs). I've
tried to compile those pieces with -ffreestanding. Maybe it's unnecessary,
but maybe it could help avoid compiler-inserted memset calls, and maybe
it will be useful later on.
The ifuncs are called in a special early pass using special
__rel[a]_iplt_start / __rel[a]_iplt_end symbols. The linker will encounter
the ifuncs again as R_*_IRELATIVE dynamic relocations, so they're skipped
on the second pass.
Break linker_main.cpp into its own liblinker_main library so it can be
compiled with -ffreestanding.
On walleye, this change fixes a recent 2.3% linker64 start-up time
regression (156.6ms -> 160.2ms), but it also helps the 32-bit time by
about 1.9% on the same benchmark. I'm measuring the run-time using a
synthetic benchmark based on loading libandroid_servers.so.
Test: bionic unit tests, manual benchmarking
Bug: none
Merged-In: Ieb9446c2df13a66fc0d377596756becad0af6995
Change-Id: Ieb9446c2df13a66fc0d377596756becad0af6995
(cherry picked from commit 772bcbb0c2f7a87b18021849528240ef0c617d94)
diff --git a/libc/Android.bp b/libc/Android.bp
index 53a26a6..c5ea4c5 100644
--- a/libc/Android.bp
+++ b/libc/Android.bp
@@ -142,19 +142,21 @@
}
// ========================================================
-// libc_stack_protector.a - stack protector code
+// libc_bootstrap.a - -fno-stack-protector and -ffreestanding
// ========================================================
//
-// Code that implements the stack protector (or that runs
-// before TLS has been set up) needs to be compiled with
-// -fno-stack-protector, since it accesses the stack canary
-// TLS slot.
+// Code that implements the stack protector (or that runs before TLS has been set up) needs to be
+// compiled with -fno-stack-protector, since it accesses the stack canary TLS slot. In the linker,
+// some of this code runs before ifunc resolvers have made string.h functions work, so compile with
+// -ffreestanding.
cc_library_static {
srcs: [
"bionic/__libc_init_main_thread.cpp",
"bionic/__stack_chk_fail.cpp",
+ "bionic/bionic_call_ifunc_resolver.cpp",
+ "bionic/getauxval.cpp",
],
arch: {
arm64: {
@@ -172,20 +174,25 @@
},
defaults: ["libc_defaults"],
- cflags: ["-fno-stack-protector"],
- name: "libc_stack_protector",
+ cflags: ["-fno-stack-protector", "-ffreestanding"],
+ name: "libc_bootstrap",
}
-// libc_init_static.cpp also needs to be built without stack protector,
-// because it's responsible for setting up TLS for static executables.
-// This isn't the case for dynamic executables because the dynamic linker
-// has already set up the main thread's TLS.
+// libc_init_static.cpp and libc_init_dynamic.cpp need to be built without stack protector.
+// libc_init_static.cpp sets up TLS for static executables, and libc_init_dynamic.cpp initializes
+// the stack protector global variable.
cc_library_static {
name: "libc_init_static",
defaults: ["libc_defaults"],
srcs: ["bionic/libc_init_static.cpp"],
- cflags: ["-fno-stack-protector"],
+ cflags: [
+ "-fno-stack-protector",
+
+ // Compile libc_init_static.cpp with -ffreestanding, because some of its code is called
+ // from the linker before ifunc resolvers have made string.h functions available.
+ "-ffreestanding",
+ ],
}
cc_library_static {
@@ -784,12 +791,6 @@
cc_library_static {
defaults: ["libc_defaults"],
srcs: [
- // The data that backs getauxval is initialized in the libc init
- // functions which are invoked by the linker. If this file is included
- // in libc_ndk.a, only one of the copies of the global data will be
- // initialized, resulting in nullptr dereferences.
- "bionic/getauxval.cpp",
-
// These require getauxval, which isn't available on older platforms.
"bionic/sysconf.cpp",
"bionic/vdso.cpp",
@@ -1084,7 +1085,6 @@
"bionic/atof.cpp",
"bionic/bionic_allocator.cpp",
"bionic/bionic_arc4random.cpp",
- "bionic/bionic_call_ifunc_resolver.cpp",
"bionic/bionic_futex.cpp",
"bionic/bionic_netlink.cpp",
"bionic/bionic_systrace.cpp",
@@ -1427,6 +1427,7 @@
whole_static_libs: [
"libc_bionic_ndk",
+ "libc_bootstrap",
"libc_fortify",
"libc_freebsd",
"libc_freebsd_large_stack",
@@ -1434,7 +1435,6 @@
"libc_netbsd",
"libc_openbsd_large_stack",
"libc_openbsd_ndk",
- "libc_stack_protector",
"libc_syscalls",
"libc_tzcode",
"libm",
@@ -1458,6 +1458,7 @@
whole_static_libs: [
"libc_bionic",
"libc_bionic_ndk",
+ "libc_bootstrap",
"libc_dns",
"libc_fortify",
"libc_freebsd",
@@ -1467,7 +1468,6 @@
"libc_openbsd",
"libc_openbsd_large_stack",
"libc_openbsd_ndk",
- "libc_stack_protector",
"libc_syscalls",
"libc_tzcode",
"libstdc++",
@@ -1495,11 +1495,11 @@
}
// ========================================================
-// libc_common_static.a For static binaries.
+// libc_static_dispatch.a
// ========================================================
cc_library_static {
defaults: ["libc_defaults"],
- name: "libc_common_static",
+ name: "libc_static_dispatch",
arch: {
x86: {
@@ -1512,18 +1512,14 @@
srcs: ["arch-arm64/static_function_dispatch.S"],
},
},
-
- whole_static_libs: [
- "libc_common",
- ],
}
// ========================================================
-// libc_common_shared.a For shared libraries.
+// libc_dynamic_dispatch.a
// ========================================================
cc_library_static {
defaults: ["libc_defaults"],
- name: "libc_common_shared",
+ name: "libc_dynamic_dispatch",
cflags: [
"-ffreestanding",
@@ -1541,9 +1537,31 @@
srcs: ["arch-arm64/dynamic_function_dispatch.cpp"],
},
},
+}
+
+// ========================================================
+// libc_common_static.a For static binaries.
+// ========================================================
+cc_library_static {
+ defaults: ["libc_defaults"],
+ name: "libc_common_static",
whole_static_libs: [
"libc_common",
+ "libc_static_dispatch",
+ ],
+}
+
+// ========================================================
+// libc_common_shared.a For shared libraries.
+// ========================================================
+cc_library_static {
+ defaults: ["libc_defaults"],
+ name: "libc_common_shared",
+
+ whole_static_libs: [
+ "libc_common",
+ "libc_dynamic_dispatch",
],
}
@@ -1567,19 +1585,16 @@
// libc_nomalloc.a
// ========================================================
//
-// This is a version of the static C library that does not
-// include malloc. It's useful in situations when the user wants
-// to provide their own malloc implementation, or wants to
-// explicitly disallow the use of malloc, such as in the
-// dynamic linker.
+// This is a version of the static C library used by the dynamic linker that exclude malloc. It also
+// excludes functions selected using ifunc's (e.g. for string.h). Link in either
+// libc_static_dispatch or libc_dynamic_dispatch to provide those functions.
cc_library_static {
name: "libc_nomalloc",
defaults: ["libc_defaults"],
- cflags: ["-DLIBC_STATIC"],
whole_static_libs: [
- "libc_common_static",
+ "libc_common",
"libc_init_static",
"libc_unwind_static",
],
diff --git a/libc/bionic/__libc_init_main_thread.cpp b/libc/bionic/__libc_init_main_thread.cpp
index 6e1b0de..94cf1f8 100644
--- a/libc/bionic/__libc_init_main_thread.cpp
+++ b/libc/bionic/__libc_init_main_thread.cpp
@@ -57,7 +57,9 @@
//
// This is in a file by itself because it needs to be built with
// -fno-stack-protector because it's responsible for setting up the main
-// thread's TLS (which stack protector relies on).
+// thread's TLS (which stack protector relies on). It's also built with
+// -ffreestanding because the early init function runs in the linker before
+// ifunc resolvers have run.
// Do enough setup to:
// - Let the dynamic linker invoke system calls (and access errno)
@@ -65,7 +67,8 @@
// - Allow the stack protector to work (with a zero cookie)
// Avoid doing much more because, when this code is called within the dynamic
// linker, the linker binary hasn't been relocated yet, so certain kinds of code
-// are hazardous, such as accessing non-hidden global variables.
+// are hazardous, such as accessing non-hidden global variables or calling
+// string.h functions.
__BIONIC_WEAK_FOR_NATIVE_BRIDGE
extern "C" void __libc_init_main_thread_early(const KernelArgumentBlock& args,
bionic_tcb* temp_tcb) {
@@ -80,6 +83,23 @@
main_thread.set_cached_pid(main_thread.tid);
}
+// This code is used both by each new pthread and the code that initializes the main thread.
+void __init_tcb(bionic_tcb* tcb, pthread_internal_t* thread) {
+#ifdef TLS_SLOT_SELF
+ // On x86, slot 0 must point to itself so code can read the thread pointer by
+ // loading %fs:0 or %gs:0.
+ tcb->tls_slot(TLS_SLOT_SELF) = &tcb->tls_slot(TLS_SLOT_SELF);
+#endif
+ tcb->tls_slot(TLS_SLOT_THREAD_ID) = thread;
+}
+
+void __init_tcb_dtv(bionic_tcb* tcb) {
+ // Initialize the DTV slot to a statically-allocated empty DTV. The first
+ // access to a dynamic TLS variable allocates a new DTV.
+ static const TlsDtv zero_dtv = {};
+ __set_tcb_dtv(tcb, const_cast<TlsDtv*>(&zero_dtv));
+}
+
// Finish initializing the main thread.
__BIONIC_WEAK_FOR_NATIVE_BRIDGE
extern "C" void __libc_init_main_thread_late() {
diff --git a/libc/bionic/bionic_call_ifunc_resolver.cpp b/libc/bionic/bionic_call_ifunc_resolver.cpp
index 8522835..437de78 100644
--- a/libc/bionic/bionic_call_ifunc_resolver.cpp
+++ b/libc/bionic/bionic_call_ifunc_resolver.cpp
@@ -30,14 +30,32 @@
#include <sys/auxv.h>
#include <sys/ifunc.h>
+#include "private/bionic_auxv.h"
+
+// This code is called in the linker before it has been relocated, so minimize calls into other
+// parts of Bionic. In particular, we won't ever have two ifunc resolvers called concurrently, so
+// initializing the ifunc resolver argument doesn't need to be thread-safe.
+
ElfW(Addr) __bionic_call_ifunc_resolver(ElfW(Addr) resolver_addr) {
#if defined(__aarch64__)
typedef ElfW(Addr) (*ifunc_resolver_t)(uint64_t, __ifunc_arg_t*);
- static __ifunc_arg_t arg = { sizeof(__ifunc_arg_t), getauxval(AT_HWCAP), getauxval(AT_HWCAP2) };
+ static __ifunc_arg_t arg;
+ static bool initialized = false;
+ if (!initialized) {
+ initialized = true;
+ arg._size = sizeof(__ifunc_arg_t);
+ arg._hwcap = getauxval(AT_HWCAP);
+ arg._hwcap2 = getauxval(AT_HWCAP2);
+ }
return reinterpret_cast<ifunc_resolver_t>(resolver_addr)(arg._hwcap | _IFUNC_ARG_HWCAP, &arg);
#elif defined(__arm__)
typedef ElfW(Addr) (*ifunc_resolver_t)(unsigned long);
- static unsigned long hwcap = getauxval(AT_HWCAP);
+ static unsigned long hwcap;
+ static bool initialized = false;
+ if (!initialized) {
+ initialized = true;
+ hwcap = getauxval(AT_HWCAP);
+ }
return reinterpret_cast<ifunc_resolver_t>(resolver_addr)(hwcap);
#else
typedef ElfW(Addr) (*ifunc_resolver_t)(void);
diff --git a/libc/bionic/getauxval.cpp b/libc/bionic/getauxval.cpp
index c8f867b..f865f97 100644
--- a/libc/bionic/getauxval.cpp
+++ b/libc/bionic/getauxval.cpp
@@ -36,7 +36,6 @@
// This function needs to be safe to call before TLS is set up, so it can't
// access errno or the stack protector.
-__attribute__((no_stack_protector))
__LIBC_HIDDEN__ unsigned long __bionic_getauxval(unsigned long type, bool& exists) {
for (ElfW(auxv_t)* v = __libc_shared_globals()->auxv; v->a_type != AT_NULL; ++v) {
if (v->a_type == type) {
diff --git a/libc/bionic/libc_init_static.cpp b/libc/bionic/libc_init_static.cpp
index 0b74023..28c0b0c 100644
--- a/libc/bionic/libc_init_static.cpp
+++ b/libc/bionic/libc_init_static.cpp
@@ -231,6 +231,9 @@
g_target_sdk_version = target;
}
+// This function is called in the dynamic linker before ifunc resolvers have run, so this file is
+// compiled with -ffreestanding to avoid implicit string.h function calls. (It shouldn't strictly
+// be necessary, though.)
__LIBC_HIDDEN__ libc_shared_globals* __libc_shared_globals() {
static libc_shared_globals globals;
return &globals;
diff --git a/libc/bionic/pthread_create.cpp b/libc/bionic/pthread_create.cpp
index 1dc1066..03af2d9 100644
--- a/libc/bionic/pthread_create.cpp
+++ b/libc/bionic/pthread_create.cpp
@@ -54,31 +54,12 @@
void __init_user_desc(struct user_desc*, bool, void*);
#endif
-// This code is used both by each new pthread and the code that initializes the main thread.
-__attribute__((no_stack_protector))
-void __init_tcb(bionic_tcb* tcb, pthread_internal_t* thread) {
-#ifdef TLS_SLOT_SELF
- // On x86, slot 0 must point to itself so code can read the thread pointer by
- // loading %fs:0 or %gs:0.
- tcb->tls_slot(TLS_SLOT_SELF) = &tcb->tls_slot(TLS_SLOT_SELF);
-#endif
- tcb->tls_slot(TLS_SLOT_THREAD_ID) = thread;
-}
-
__attribute__((no_stack_protector))
void __init_tcb_stack_guard(bionic_tcb* tcb) {
// GCC looks in the TLS for the stack guard on x86, so copy it there from our global.
tcb->tls_slot(TLS_SLOT_STACK_GUARD) = reinterpret_cast<void*>(__stack_chk_guard);
}
-__attribute__((no_stack_protector))
-void __init_tcb_dtv(bionic_tcb* tcb) {
- // Initialize the DTV slot to a statically-allocated empty DTV. The first
- // access to a dynamic TLS variable allocates a new DTV.
- static const TlsDtv zero_dtv = {};
- __set_tcb_dtv(tcb, const_cast<TlsDtv*>(&zero_dtv));
-}
-
void __init_bionic_tls_ptrs(bionic_tcb* tcb, bionic_tls* tls) {
tcb->thread()->bionic_tls = tls;
tcb->tls_slot(TLS_SLOT_BIONIC_TLS) = tls;