Merge "Move [vdso] after exe/linker in _r_debug"
diff --git a/benchmarks/string_benchmark.cpp b/benchmarks/string_benchmark.cpp
index 38122f2..d176675 100644
--- a/benchmarks/string_benchmark.cpp
+++ b/benchmarks/string_benchmark.cpp
@@ -248,6 +248,25 @@
}
BIONIC_BENCHMARK_WITH_ARG(BM_string_strcmp, "AT_ALIGNED_TWOBUF");
+static void BM_string_strncmp(benchmark::State& state) {
+ const size_t nbytes = state.range(0);
+ const size_t s1_alignment = state.range(1);
+ const size_t s2_alignment = state.range(2);
+
+ std::vector<char> s1;
+ std::vector<char> s2;
+ char* s1_aligned = GetAlignedPtrFilled(&s1, s1_alignment, nbytes, 'x');
+ char* s2_aligned = GetAlignedPtrFilled(&s2, s2_alignment, nbytes, 'x');
+
+ volatile int c __attribute__((unused));
+ for (auto _ : state) {
+ c = strncmp(s1_aligned, s2_aligned, nbytes);
+ }
+
+ state.SetBytesProcessed(uint64_t(state.iterations()) * uint64_t(nbytes));
+}
+BIONIC_BENCHMARK_WITH_ARG(BM_string_strncmp, "AT_ALIGNED_TWOBUF");
+
static void BM_string_strstr(benchmark::State& state) {
const size_t nbytes = state.range(0);
const size_t haystack_alignment = state.range(1);
diff --git a/libc/Android.bp b/libc/Android.bp
index 7356c64..61d00cd 100644
--- a/libc/Android.bp
+++ b/libc/Android.bp
@@ -71,7 +71,8 @@
stl: "none",
system_shared_libs: [],
sanitize: {
- never: true,
+ address: false,
+ integer_overflow: false,
},
native_coverage: false,
recovery_available: true,
diff --git a/libc/arch-arm64/generic/bionic/memcmp.S b/libc/arch-arm64/generic/bionic/memcmp.S
index 3a138bf..bff54ae 100644
--- a/libc/arch-arm64/generic/bionic/memcmp.S
+++ b/libc/arch-arm64/generic/bionic/memcmp.S
@@ -33,6 +33,8 @@
#include <private/bionic_asm.h>
+#define L(l) .L ## l
+
/* Parameters and result. */
#define src1 x0
#define src2 x1
@@ -42,88 +44,124 @@
/* Internal variables. */
#define data1 x3
#define data1w w3
-#define data2 x4
-#define data2w w4
-#define tmp1 x5
+#define data1h x4
+#define data2 x5
+#define data2w w5
+#define data2h x6
+#define tmp1 x7
+#define tmp2 x8
/* Small inputs of less than 8 bytes are handled separately. This allows the
- main code to be sped up using unaligned loads since there are now at least
+ main code to be speed up using unaligned loads since there are now at least
8 bytes to be compared. If the first 8 bytes are equal, align src1.
This ensures each iteration does at most one unaligned access even if both
src1 and src2 are unaligned, and mutually aligned inputs behave as if
- aligned. After the main loop, process the last 8 bytes using unaligned
+ aligned. After the main loop, process the last 16 bytes using unaligned
accesses. */
-.p2align 6
ENTRY(memcmp)
+.p2align 6
subs limit, limit, 8
- b.lo .Lless8
+ b.lo L(less8)
/* Limit >= 8, so check first 8 bytes using unaligned loads. */
ldr data1, [src1], 8
ldr data2, [src2], 8
- and tmp1, src1, 7
- add limit, limit, tmp1
cmp data1, data2
- bne .Lreturn
+ b.ne L(return)
+
+ subs limit, limit, 8
+ b.gt L(more16)
+
+ ldr data1, [src1, limit]
+ ldr data2, [src2, limit]
+ b L(return)
+
+L(more16):
+ ldr data1, [src1], 8
+ ldr data2, [src2], 8
+ cmp data1, data2
+ bne L(return)
+
+ /* Jump directly to comparing the last 16 bytes for 32 byte (or less)
+ strings. */
+ subs limit, limit, 16
+ b.ls L(last_bytes)
+
+ /* We overlap loads between 0-32 bytes at either side of SRC1 when we
+ try to align, so limit it only to strings larger than 128 bytes. */
+ cmp limit, 96
+ b.ls L(loop16)
/* Align src1 and adjust src2 with bytes not yet done. */
+ and tmp1, src1, 15
+ add limit, limit, tmp1
sub src1, src1, tmp1
sub src2, src2, tmp1
- subs limit, limit, 8
- b.ls .Llast_bytes
-
- /* Loop performing 8 bytes per iteration using aligned src1.
- Limit is pre-decremented by 8 and must be larger than zero.
- Exit if <= 8 bytes left to do or if the data is not equal. */
+ /* Loop performing 16 bytes per iteration using aligned src1.
+ Limit is pre-decremented by 16 and must be larger than zero.
+ Exit if <= 16 bytes left to do or if the data is not equal. */
.p2align 4
-.Lloop8:
- ldr data1, [src1], 8
- ldr data2, [src2], 8
- subs limit, limit, 8
- ccmp data1, data2, 0, hi /* NZCV = 0b0000. */
- b.eq .Lloop8
+L(loop16):
+ ldp data1, data1h, [src1], 16
+ ldp data2, data2h, [src2], 16
+ subs limit, limit, 16
+ ccmp data1, data2, 0, hi
+ ccmp data1h, data2h, 0, eq
+ b.eq L(loop16)
cmp data1, data2
- bne .Lreturn
+ bne L(return)
+ mov data1, data1h
+ mov data2, data2h
+ cmp data1, data2
+ bne L(return)
- /* Compare last 1-8 bytes using unaligned access. */
-.Llast_bytes:
- ldr data1, [src1, limit]
- ldr data2, [src2, limit]
+ /* Compare last 1-16 bytes using unaligned access. */
+L(last_bytes):
+ add src1, src1, limit
+ add src2, src2, limit
+ ldp data1, data1h, [src1]
+ ldp data2, data2h, [src2]
+ cmp data1, data2
+ bne L(return)
+ mov data1, data1h
+ mov data2, data2h
+ cmp data1, data2
/* Compare data bytes and set return value to 0, -1 or 1. */
-.Lreturn:
+L(return):
#ifndef __AARCH64EB__
rev data1, data1
rev data2, data2
#endif
cmp data1, data2
-.Lret_eq:
+L(ret_eq):
cset result, ne
cneg result, result, lo
- ret
+ ret
.p2align 4
/* Compare up to 8 bytes. Limit is [-8..-1]. */
-.Lless8:
+L(less8):
adds limit, limit, 4
- b.lo .Lless4
+ b.lo L(less4)
ldr data1w, [src1], 4
ldr data2w, [src2], 4
cmp data1w, data2w
- b.ne .Lreturn
+ b.ne L(return)
sub limit, limit, 4
-.Lless4:
+L(less4):
adds limit, limit, 4
- beq .Lret_eq
-.Lbyte_loop:
+ beq L(ret_eq)
+L(byte_loop):
ldrb data1w, [src1], 1
ldrb data2w, [src2], 1
subs limit, limit, 1
ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */
- b.eq .Lbyte_loop
+ b.eq L(byte_loop)
sub result, data1w, data2w
ret
+
END(memcmp)
diff --git a/libc/arch-arm64/generic/bionic/strcmp.S b/libc/arch-arm64/generic/bionic/strcmp.S
index 271452d..fbc215e 100644
--- a/libc/arch-arm64/generic/bionic/strcmp.S
+++ b/libc/arch-arm64/generic/bionic/strcmp.S
@@ -32,6 +32,8 @@
#include <private/bionic_asm.h>
+#define L(label) .L ## label
+
#define REP8_01 0x0101010101010101
#define REP8_7f 0x7f7f7f7f7f7f7f7f
#define REP8_80 0x8080808080808080
@@ -61,24 +63,25 @@
eor tmp1, src1, src2
mov zeroones, #REP8_01
tst tmp1, #7
- b.ne .Lmisaligned8
+ b.ne L(misaligned8)
ands tmp1, src1, #7
- b.ne .Lmutual_align
+ b.ne L(mutual_align)
/* NUL detection works on the principle that (X - 1) & (~X) & 0x80
(=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and
can be done in parallel across the entire word. */
-.Lloop_aligned:
+L(loop_aligned):
ldr data1, [src1], #8
ldr data2, [src2], #8
-.Lstart_realigned:
+L(start_realigned):
sub tmp1, data1, zeroones
orr tmp2, data1, #REP8_7f
eor diff, data1, data2 /* Non-zero if differences found. */
bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
orr syndrome, diff, has_nul
- cbz syndrome, .Lloop_aligned
+ cbz syndrome, L(loop_aligned)
/* End of performance-critical section -- one 64B cache line. */
+L(end):
#ifndef __AARCH64EB__
rev syndrome, syndrome
rev data1, data1
@@ -129,7 +132,7 @@
ret
#endif
-.Lmutual_align:
+L(mutual_align):
/* Sources are mutually aligned, but are not currently at an
alignment boundary. Round down the addresses and then mask off
the bytes that preceed the start point. */
@@ -149,15 +152,41 @@
#endif
orr data1, data1, tmp2
orr data2, data2, tmp2
- b .Lstart_realigned
+ b L(start_realigned)
-.Lmisaligned8:
- /* We can do better than this. */
+L(misaligned8):
+ /* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always
+ checking to make sure that we don't access beyond page boundary in
+ SRC2. */
+ tst src1, #7
+ b.eq L(loop_misaligned)
+L(do_misaligned):
ldrb data1w, [src1], #1
ldrb data2w, [src2], #1
cmp data1w, #1
ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
- b.eq .Lmisaligned8
+ b.ne L(done)
+ tst src1, #7
+ b.ne L(do_misaligned)
+
+L(loop_misaligned):
+ /* Test if we are within the last dword of the end of a 4K page. If
+ yes then jump back to the misaligned loop to copy a byte at a time. */
+ and tmp1, src2, #0xff8
+ eor tmp1, tmp1, #0xff8
+ cbz tmp1, L(do_misaligned)
+ ldr data1, [src1], #8
+ ldr data2, [src2], #8
+
+ sub tmp1, data1, zeroones
+ orr tmp2, data1, #REP8_7f
+ eor diff, data1, data2 /* Non-zero if differences found. */
+ bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
+ orr syndrome, diff, has_nul
+ cbz syndrome, L(loop_misaligned)
+ b L(end)
+
+L(done):
sub result, data1, data2
ret
END(strcmp)
diff --git a/libc/arch-arm64/generic/bionic/strncmp.S b/libc/arch-arm64/generic/bionic/strncmp.S
index 267f663..b81f43a 100644
--- a/libc/arch-arm64/generic/bionic/strncmp.S
+++ b/libc/arch-arm64/generic/bionic/strncmp.S
@@ -58,6 +58,7 @@
#define limit_wd x13
#define mask x14
#define endloop x15
+#define count mask
.text
.p2align 6
@@ -69,9 +70,9 @@
eor tmp1, src1, src2
mov zeroones, #REP8_01
tst tmp1, #7
+ and count, src1, #7
b.ne .Lmisaligned8
- ands tmp1, src1, #7
- b.ne .Lmutual_align
+ cbnz count, .Lmutual_align
/* Calculate the number of full and partial words -1. */
sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */
@@ -176,42 +177,104 @@
bic src1, src1, #7
bic src2, src2, #7
ldr data1, [src1], #8
- neg tmp3, tmp1, lsl #3 /* 64 - bits(bytes beyond align). */
+ neg tmp3, count, lsl #3 /* 64 - bits(bytes beyond align). */
ldr data2, [src2], #8
mov tmp2, #~0
sub limit_wd, limit, #1 /* limit != 0, so no underflow. */
#ifdef __AARCH64EB__
/* Big-endian. Early bytes are at MSB. */
- lsl tmp2, tmp2, tmp3 /* Shift (tmp1 & 63). */
+ lsl tmp2, tmp2, tmp3 /* Shift (count & 63). */
#else
/* Little-endian. Early bytes are at LSB. */
- lsr tmp2, tmp2, tmp3 /* Shift (tmp1 & 63). */
+ lsr tmp2, tmp2, tmp3 /* Shift (count & 63). */
#endif
and tmp3, limit_wd, #7
lsr limit_wd, limit_wd, #3
/* Adjust the limit. Only low 3 bits used, so overflow irrelevant. */
- add limit, limit, tmp1
- add tmp3, tmp3, tmp1
+ add limit, limit, count
+ add tmp3, tmp3, count
orr data1, data1, tmp2
orr data2, data2, tmp2
add limit_wd, limit_wd, tmp3, lsr #3
b .Lstart_realigned
-.Lret0:
- mov result, #0
- ret
-
.p2align 6
+ /* Don't bother with dwords for up to 16 bytes. */
.Lmisaligned8:
- sub limit, limit, #1
-1:
+ cmp limit, #16
+ b.hs .Ltry_misaligned_words
+
+.Lbyte_loop:
/* Perhaps we can do better than this. */
ldrb data1w, [src1], #1
ldrb data2w, [src2], #1
subs limit, limit, #1
- ccmp data1w, #1, #0, cs /* NZCV = 0b0000. */
+ ccmp data1w, #1, #0, hi /* NZCV = 0b0000. */
ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
- b.eq 1b
+ b.eq .Lbyte_loop
+.Ldone:
sub result, data1, data2
ret
+ /* Align the SRC1 to a dword by doing a bytewise compare and then do
+ the dword loop. */
+.Ltry_misaligned_words:
+ lsr limit_wd, limit, #3
+ cbz count, .Ldo_misaligned
+
+ neg count, count
+ and count, count, #7
+ sub limit, limit, count
+ lsr limit_wd, limit, #3
+
+.Lpage_end_loop:
+ ldrb data1w, [src1], #1
+ ldrb data2w, [src2], #1
+ cmp data1w, #1
+ ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */
+ b.ne .Ldone
+ subs count, count, #1
+ b.hi .Lpage_end_loop
+
+.Ldo_misaligned:
+ /* Prepare ourselves for the next page crossing. Unlike the aligned
+ loop, we fetch 1 less dword because we risk crossing bounds on
+ SRC2. */
+ mov count, #8
+ subs limit_wd, limit_wd, #1
+ b.lo .Ldone_loop
+.Lloop_misaligned:
+ and tmp2, src2, #0xff8
+ eor tmp2, tmp2, #0xff8
+ cbz tmp2, .Lpage_end_loop
+
+ ldr data1, [src1], #8
+ ldr data2, [src2], #8
+ sub tmp1, data1, zeroones
+ orr tmp2, data1, #REP8_7f
+ eor diff, data1, data2 /* Non-zero if differences found. */
+ bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
+ ccmp diff, #0, #0, eq
+ b.ne .Lnot_limit
+ subs limit_wd, limit_wd, #1
+ b.pl .Lloop_misaligned
+
+.Ldone_loop:
+ /* We found a difference or a NULL before the limit was reached. */
+ and limit, limit, #7
+ cbz limit, .Lnot_limit
+ /* Read the last word. */
+ sub src1, src1, 8
+ sub src2, src2, 8
+ ldr data1, [src1, limit]
+ ldr data2, [src2, limit]
+ sub tmp1, data1, zeroones
+ orr tmp2, data1, #REP8_7f
+ eor diff, data1, data2 /* Non-zero if differences found. */
+ bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */
+ ccmp diff, #0, #0, eq
+ b.ne .Lnot_limit
+
+.Lret0:
+ mov result, #0
+ ret
END(strncmp)
diff --git a/libc/bionic/libc_init_static.cpp b/libc/bionic/libc_init_static.cpp
index 9eb574a..55506a3 100644
--- a/libc/bionic/libc_init_static.cpp
+++ b/libc/bionic/libc_init_static.cpp
@@ -45,6 +45,10 @@
#include "private/bionic_tls.h"
#include "private/KernelArgumentBlock.h"
+#if __has_feature(hwaddress_sanitizer)
+#include <sanitizer/hwasan_interface.h>
+#endif
+
// Leave the variable uninitialized for the sake of the dynamic loader, which
// links in this file. The loader will initialize this variable before
// relocating itself.
@@ -85,11 +89,10 @@
//
// The 'structors' parameter contains pointers to various initializer
// arrays that must be run before the program's 'main' routine is launched.
-
-__noreturn void __libc_init(void* raw_args,
- void (*onexit)(void) __unused,
- int (*slingshot)(int, char**, char**),
- structors_array_t const * const structors) {
+__noreturn static void __real_libc_init(void *raw_args,
+ void (*onexit)(void) __unused,
+ int (*slingshot)(int, char**, char**),
+ structors_array_t const * const structors) {
BIONIC_STOP_UNWIND;
KernelArgumentBlock args(raw_args);
@@ -124,6 +127,20 @@
exit(slingshot(args.argc, args.argv, args.envp));
}
+#if __has_feature(hwaddress_sanitizer)
+__attribute__((no_sanitize("hwaddress")))
+#endif
+__noreturn void __libc_init(void* raw_args,
+ void (*onexit)(void) __unused,
+ int (*slingshot)(int, char**, char**),
+ structors_array_t const * const structors) {
+#if __has_feature(hwaddress_sanitizer)
+ __hwasan_shadow_init();
+#endif
+ __real_libc_init(raw_args, onexit, slingshot, structors);
+}
+
+
static uint32_t g_target_sdk_version{__ANDROID_API__};
extern "C" uint32_t android_get_application_target_sdk_version() {
diff --git a/libc/bionic/malloc_common.cpp b/libc/bionic/malloc_common.cpp
index 40a0023..5a5ec76 100644
--- a/libc/bionic/malloc_common.cpp
+++ b/libc/bionic/malloc_common.cpp
@@ -47,8 +47,26 @@
#include <private/bionic_globals.h>
#include <private/bionic_malloc_dispatch.h>
+#if __has_feature(hwaddress_sanitizer)
+// FIXME: implement these in HWASan allocator.
+extern "C" int __sanitizer_iterate(uintptr_t base __unused, size_t size __unused,
+ void (*callback)(uintptr_t base, size_t size, void* arg) __unused,
+ void* arg __unused) {
+ return 0;
+}
+
+extern "C" void __sanitizer_malloc_disable() {
+}
+
+extern "C" void __sanitizer_malloc_enable() {
+}
+#include <sanitizer/hwasan_interface.h>
+#define Malloc(function) __sanitizer_ ## function
+
+#else // __has_feature(hwaddress_sanitizer)
#include "jemalloc.h"
#define Malloc(function) je_ ## function
+#endif
static constexpr MallocDispatch __libc_malloc_default_dispatch
__attribute__((unused)) = {
diff --git a/libc/bionic/pthread_create.cpp b/libc/bionic/pthread_create.cpp
index c95d400..98d1726 100644
--- a/libc/bionic/pthread_create.cpp
+++ b/libc/bionic/pthread_create.cpp
@@ -249,6 +249,8 @@
// accesses previously made by the creating thread are visible to us.
thread->startup_handshake_lock.lock();
+ __hwasan_thread_enter();
+
__init_alternate_signal_stack(thread);
void* result = thread->start_routine(thread->start_routine_arg);
diff --git a/libc/bionic/pthread_exit.cpp b/libc/bionic/pthread_exit.cpp
index ac5d429..220f7a0 100644
--- a/libc/bionic/pthread_exit.cpp
+++ b/libc/bionic/pthread_exit.cpp
@@ -126,6 +126,7 @@
// That's one last thing we can do before dropping to assembler.
ScopedSignalBlocker ssb;
__pthread_unmap_tls(thread);
+ __hwasan_thread_exit();
_exit_with_stack_teardown(thread->attr.stack_base, thread->mmap_size);
}
}
@@ -133,5 +134,6 @@
// No need to free mapped space. Either there was no space mapped, or it is left for
// the pthread_join caller to clean up.
__pthread_unmap_tls(thread);
+ __hwasan_thread_exit();
__exit(0);
}
diff --git a/libc/bionic/pthread_internal.h b/libc/bionic/pthread_internal.h
index 18f5aee..1ec201b 100644
--- a/libc/bionic/pthread_internal.h
+++ b/libc/bionic/pthread_internal.h
@@ -31,6 +31,13 @@
#include <pthread.h>
#include <stdatomic.h>
+#if __has_feature(hwaddress_sanitizer)
+#include <sanitizer/hwasan_interface.h>
+#else
+#define __hwasan_thread_enter()
+#define __hwasan_thread_exit()
+#endif
+
#include "private/bionic_lock.h"
#include "private/bionic_tls.h"
diff --git a/libm/Android.bp b/libm/Android.bp
index da13ab1..3b88fa3 100644
--- a/libm/Android.bp
+++ b/libm/Android.bp
@@ -493,6 +493,7 @@
"-D_BSD_SOURCE",
"-DFLT_EVAL_METHOD=0",
"-include freebsd-compat.h",
+ "-fno-math-errno",
"-Wall",
"-Werror",
"-Wno-missing-braces",
@@ -503,6 +504,10 @@
"-Wno-unused-variable",
],
+ ldflags: [
+ "-Wl,--Bsymbolic-functions",
+ ],
+
include_dirs: ["bionic/libc"],
system_shared_libs: ["libc"],
diff --git a/linker/Android.bp b/linker/Android.bp
index fb6aa7d..b809f76 100644
--- a/linker/Android.bp
+++ b/linker/Android.bp
@@ -228,6 +228,10 @@
// Insert an extra objcopy step to add prefix to symbols. This is needed to prevent gdb
// looking up symbols in the linker by mistake.
prefix_symbols: "__dl_",
+
+ sanitize: {
+ hwaddress: false,
+ },
}
cc_library {