Add recoverable GWP-ASan.
Recoverable GWP-ASan is a mode landed upstream in
https://reviews.llvm.org/D140173. For more information about why/what it
is, see
https://android-review.git.corp.google.com/c/platform/bionic/+/2394588.
This patch makes debuggerd call the required libc callbacks for GWP-ASan
to recover from the memory corruption. It also adds the functionality
that libart/sigchain eventually ends up calling, which dumps a GWP-ASan
report for the first error encountered.
Test: Build the platform, run sanitizer-status in recoverable mode,
asserting that it doesn't crash but we get a debuggerd report.
Bug: 247012630
Change-Id: I27212f7250844c20a8fd1e961417cdb4e5bd3626
diff --git a/debuggerd/Android.bp b/debuggerd/Android.bp
index 15b5813..5da1b40 100644
--- a/debuggerd/Android.bp
+++ b/debuggerd/Android.bp
@@ -98,6 +98,7 @@
"libbase_headers",
"libdebuggerd_common_headers",
"bionic_libc_platform_headers",
+ "gwp_asan_headers",
],
whole_static_libs: [
diff --git a/debuggerd/debuggerd_test.cpp b/debuggerd/debuggerd_test.cpp
index 9c1b136..f904d48 100644
--- a/debuggerd/debuggerd_test.cpp
+++ b/debuggerd/debuggerd_test.cpp
@@ -64,6 +64,7 @@
#include "crash_test.h"
#include "debuggerd/handler.h"
+#include "gtest/gtest.h"
#include "libdebuggerd/utility.h"
#include "protocol.h"
#include "tombstoned/tombstoned.h"
@@ -110,19 +111,6 @@
ASSERT_MATCH(result, \
R"(#\d\d pc [0-9a-f]+\s+ \S+ (\(offset 0x[0-9a-f]+\) )?\()" frame_name R"(\+)");
-// Enable GWP-ASan at the start of this process. GWP-ASan is enabled using
-// process sampling, so we need to ensure we force GWP-ASan on.
-__attribute__((constructor)) static void enable_gwp_asan() {
- android_mallopt_gwp_asan_options_t opts;
- // No, we're not an app, but let's turn ourselves on without sampling.
- // Technically, if someone's using the *.default_app sysprops, they'll adjust
- // our settings, but I don't think this will be common on a device that's
- // running debuggerd_tests.
- opts.desire = android_mallopt_gwp_asan_options_t::Action::TURN_ON_FOR_APP;
- opts.program_name = "";
- android_mallopt(M_INITIALIZE_GWP_ASAN, &opts, sizeof(android_mallopt_gwp_asan_options_t));
-}
-
static void tombstoned_intercept(pid_t target_pid, unique_fd* intercept_fd, unique_fd* output_fd,
InterceptStatus* status, DebuggerdDumpType intercept_type) {
intercept_fd->reset(socket_local_client(kTombstonedInterceptSocketName,
@@ -468,76 +456,6 @@
}
#endif
-// Number of iterations required to reliably guarantee a GWP-ASan crash.
-// GWP-ASan's sample rate is not truly nondeterministic, it initialises a
-// thread-local counter at 2*SampleRate, and decrements on each malloc(). Once
-// the counter reaches zero, we provide a sampled allocation. Then, double that
-// figure to allow for left/right allocation alignment, as this is done randomly
-// without bias.
-#define GWP_ASAN_ITERATIONS_TO_ENSURE_CRASH (0x20000)
-
-struct GwpAsanTestParameters {
- size_t alloc_size;
- bool free_before_access;
- int access_offset;
- std::string cause_needle; // Needle to be found in the "Cause: [GWP-ASan]" line.
-};
-
-struct GwpAsanCrasherTest : CrasherTest, testing::WithParamInterface<GwpAsanTestParameters> {};
-
-GwpAsanTestParameters gwp_asan_tests[] = {
- {/* alloc_size */ 7, /* free_before_access */ true, /* access_offset */ 0, "Use After Free, 0 bytes into a 7-byte allocation"},
- {/* alloc_size */ 7, /* free_before_access */ true, /* access_offset */ 1, "Use After Free, 1 byte into a 7-byte allocation"},
- {/* alloc_size */ 7, /* free_before_access */ false, /* access_offset */ 16, "Buffer Overflow, 9 bytes right of a 7-byte allocation"},
- {/* alloc_size */ 16, /* free_before_access */ false, /* access_offset */ -1, "Buffer Underflow, 1 byte left of a 16-byte allocation"},
-};
-
-INSTANTIATE_TEST_SUITE_P(GwpAsanTests, GwpAsanCrasherTest, testing::ValuesIn(gwp_asan_tests));
-
-TEST_P(GwpAsanCrasherTest, gwp_asan_uaf) {
- if (mte_supported()) {
- // Skip this test on MTE hardware, as MTE will reliably catch these errors
- // instead of GWP-ASan.
- GTEST_SKIP() << "Skipped on MTE.";
- }
- // Skip this test on HWASan, which will reliably catch test errors as well.
- SKIP_WITH_HWASAN;
-
- GwpAsanTestParameters params = GetParam();
- LogcatCollector logcat_collector;
-
- int intercept_result;
- unique_fd output_fd;
- StartProcess([¶ms]() {
- for (unsigned i = 0; i < GWP_ASAN_ITERATIONS_TO_ENSURE_CRASH; ++i) {
- volatile char* p = reinterpret_cast<volatile char*>(malloc(params.alloc_size));
- if (params.free_before_access) free(static_cast<void*>(const_cast<char*>(p)));
- p[params.access_offset] = 42;
- if (!params.free_before_access) free(static_cast<void*>(const_cast<char*>(p)));
- }
- });
-
- StartIntercept(&output_fd);
- FinishCrasher();
- AssertDeath(SIGSEGV);
- FinishIntercept(&intercept_result);
-
- ASSERT_EQ(1, intercept_result) << "tombstoned reported failure";
-
- std::vector<std::string> log_sources(2);
- ConsumeFd(std::move(output_fd), &log_sources[0]);
- logcat_collector.Collect(&log_sources[1]);
-
- for (const auto& result : log_sources) {
- ASSERT_MATCH(result, R"(signal 11 \(SIGSEGV\), code 2 \(SEGV_ACCERR\))");
- ASSERT_MATCH(result, R"(Cause: \[GWP-ASan\]: )" + params.cause_needle);
- if (params.free_before_access) {
- ASSERT_MATCH(result, R"(deallocated by thread .*\n.*#00 pc)");
- }
- ASSERT_MATCH(result, R"((^|\s)allocated by thread .*\n.*#00 pc)");
- }
-}
-
struct SizeParamCrasherTest : CrasherTest, testing::WithParamInterface<size_t> {};
INSTANTIATE_TEST_SUITE_P(Sizes, SizeParamCrasherTest, testing::Values(0, 16, 131072));
@@ -1278,7 +1196,11 @@
static const char* const kDebuggerdSeccompPolicy =
"/system/etc/seccomp_policy/crash_dump." ABI_STRING ".policy";
-static pid_t seccomp_fork_impl(void (*prejail)()) {
+static void setup_jail(minijail* jail) {
+ if (!jail) {
+ LOG(FATAL) << "failed to create minijail";
+ }
+
std::string policy;
if (!android::base::ReadFileToString(kDebuggerdSeccompPolicy, &policy)) {
PLOG(FATAL) << "failed to read policy file";
@@ -1305,15 +1227,15 @@
PLOG(FATAL) << "failed to seek tmp_fd";
}
- ScopedMinijail jail{minijail_new()};
- if (!jail) {
- LOG(FATAL) << "failed to create minijail";
- }
+ minijail_no_new_privs(jail);
+ minijail_log_seccomp_filter_failures(jail);
+ minijail_use_seccomp_filter(jail);
+ minijail_parse_seccomp_filters_from_fd(jail, tmp_fd.release());
+}
- minijail_no_new_privs(jail.get());
- minijail_log_seccomp_filter_failures(jail.get());
- minijail_use_seccomp_filter(jail.get());
- minijail_parse_seccomp_filters_from_fd(jail.get(), tmp_fd.release());
+static pid_t seccomp_fork_impl(void (*prejail)()) {
+ ScopedMinijail jail{minijail_new()};
+ setup_jail(jail.get());
pid_t result = fork();
if (result == -1) {
@@ -1627,6 +1549,138 @@
AssertDeath(SIGABRT);
}
+struct GwpAsanTestParameters {
+ size_t alloc_size;
+ bool free_before_access;
+ int access_offset;
+ std::string cause_needle; // Needle to be found in the "Cause: [GWP-ASan]" line.
+};
+
+struct GwpAsanCrasherTest
+ : CrasherTest,
+ testing::WithParamInterface<
+ std::tuple<GwpAsanTestParameters, /* recoverable */ bool, /* seccomp */ bool>> {};
+
+GwpAsanTestParameters gwp_asan_tests[] = {
+ {/* alloc_size */ 7, /* free_before_access */ true, /* access_offset */ 0,
+ "Use After Free, 0 bytes into a 7-byte allocation"},
+ {/* alloc_size */ 15, /* free_before_access */ true, /* access_offset */ 1,
+ "Use After Free, 1 byte into a 15-byte allocation"},
+ {/* alloc_size */ 4096, /* free_before_access */ false, /* access_offset */ 4098,
+ "Buffer Overflow, 2 bytes right of a 4096-byte allocation"},
+ {/* alloc_size */ 4096, /* free_before_access */ false, /* access_offset */ -1,
+ "Buffer Underflow, 1 byte left of a 4096-byte allocation"},
+};
+
+INSTANTIATE_TEST_SUITE_P(
+ GwpAsanTests, GwpAsanCrasherTest,
+ testing::Combine(testing::ValuesIn(gwp_asan_tests),
+ /* recoverable */ testing::Bool(),
+ /* seccomp */ testing::Bool()),
+ [](const testing::TestParamInfo<
+ std::tuple<GwpAsanTestParameters, /* recoverable */ bool, /* seccomp */ bool>>& info) {
+ const GwpAsanTestParameters& params = std::get<0>(info.param);
+ std::string name = params.free_before_access ? "UseAfterFree" : "Overflow";
+ name += testing::PrintToString(params.alloc_size);
+ name += "Alloc";
+ if (params.access_offset < 0) {
+ name += "Left";
+ name += testing::PrintToString(params.access_offset * -1);
+ } else {
+ name += "Right";
+ name += testing::PrintToString(params.access_offset);
+ }
+ name += "Bytes";
+ if (std::get<1>(info.param)) name += "Recoverable";
+ if (std::get<2>(info.param)) name += "Seccomp";
+ return name;
+ });
+
+TEST_P(GwpAsanCrasherTest, run_gwp_asan_test) {
+ if (mte_supported()) {
+ // Skip this test on MTE hardware, as MTE will reliably catch these errors
+ // instead of GWP-ASan.
+ GTEST_SKIP() << "Skipped on MTE.";
+ }
+ // Skip this test on HWASan, which will reliably catch test errors as well.
+ SKIP_WITH_HWASAN;
+
+ GwpAsanTestParameters params = std::get<0>(GetParam());
+ bool recoverable = std::get<1>(GetParam());
+ LogcatCollector logcat_collector;
+
+ int intercept_result;
+ unique_fd output_fd;
+ StartProcess([&recoverable]() {
+ const char* env[] = {"GWP_ASAN_SAMPLE_RATE=1", "GWP_ASAN_PROCESS_SAMPLING=1",
+ "GWP_ASAN_MAX_ALLOCS=40000", nullptr, nullptr};
+ if (recoverable) {
+ env[3] = "GWP_ASAN_RECOVERABLE=true";
+ }
+ std::string test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
+ test_name = std::regex_replace(test_name, std::regex("run_gwp_asan_test"),
+ "DISABLED_run_gwp_asan_test");
+ std::string test_filter = "--gtest_filter=*";
+ test_filter += test_name;
+ std::string this_binary = android::base::GetExecutablePath();
+ const char* args[] = {this_binary.c_str(), "--gtest_also_run_disabled_tests",
+ test_filter.c_str(), nullptr};
+ // We check the crash report from a debuggerd handler and from logcat. The
+ // echo from stdout/stderr of the subprocess trips up atest, because it
+ // doesn't like that two tests started in a row without the first one
+ // finishing (even though the second one is in a subprocess).
+ close(STDOUT_FILENO);
+ close(STDERR_FILENO);
+ execve(this_binary.c_str(), const_cast<char**>(args), const_cast<char**>(env));
+ });
+
+ StartIntercept(&output_fd);
+ FinishCrasher();
+ if (recoverable) {
+ AssertDeath(0);
+ } else {
+ AssertDeath(SIGSEGV);
+ }
+ FinishIntercept(&intercept_result);
+
+ ASSERT_EQ(1, intercept_result) << "tombstoned reported failure";
+
+ std::vector<std::string> log_sources(2);
+ ConsumeFd(std::move(output_fd), &log_sources[0]);
+ logcat_collector.Collect(&log_sources[1]);
+
+ // seccomp forces the fallback handler, which doesn't print GWP-ASan debugging
+ // information. Make sure the recovery still works, but the report won't be
+ // hugely useful, it looks like a regular SEGV.
+ bool seccomp = std::get<2>(GetParam());
+ if (!seccomp) {
+ for (const auto& result : log_sources) {
+ ASSERT_MATCH(result, R"(signal 11 \(SIGSEGV\), code 2 \(SEGV_ACCERR\))");
+ ASSERT_MATCH(result, R"(Cause: \[GWP-ASan\]: )" + params.cause_needle);
+ if (params.free_before_access) {
+ ASSERT_MATCH(result, R"(deallocated by thread .*\n.*#00 pc)");
+ }
+ ASSERT_MATCH(result, R"((^|\s)allocated by thread .*\n.*#00 pc)");
+ }
+ }
+}
+
+TEST_P(GwpAsanCrasherTest, DISABLED_run_gwp_asan_test) {
+ GwpAsanTestParameters params = std::get<0>(GetParam());
+ bool seccomp = std::get<2>(GetParam());
+ if (seccomp) {
+ ScopedMinijail jail{minijail_new()};
+ setup_jail(jail.get());
+ minijail_enter(jail.get());
+ }
+
+ // Use 'volatile' to prevent a very clever compiler eliminating the store.
+ char* volatile p = reinterpret_cast<char* volatile>(malloc(params.alloc_size));
+ if (params.free_before_access) free(static_cast<void*>(const_cast<char*>(p)));
+ p[params.access_offset] = 42;
+ if (!params.free_before_access) free(static_cast<void*>(const_cast<char*>(p)));
+}
+
TEST_F(CrasherTest, fdsan_warning_abort_message) {
int intercept_result;
unique_fd output_fd;
diff --git a/debuggerd/handler/debuggerd_handler.cpp b/debuggerd/handler/debuggerd_handler.cpp
index 7120d73..d2bf0d7 100644
--- a/debuggerd/handler/debuggerd_handler.cpp
+++ b/debuggerd/handler/debuggerd_handler.cpp
@@ -565,17 +565,38 @@
process_info = g_callbacks.get_process_info();
}
+ // GWP-ASan catches use-after-free and heap-buffer-overflow by using PROT_NONE
+ // guard pages, which lead to SEGV. Normally, debuggerd prints a bug report
+ // and the process terminates, but in some cases, we actually want to print
+ // the bug report and let the signal handler return, and restart the process.
+ // In order to do that, we need to disable GWP-ASan's guard pages. The
+ // following callbacks handle this case.
+ gwp_asan_callbacks_t gwp_asan_callbacks = g_callbacks.get_gwp_asan_callbacks();
+ bool gwp_asan_recoverable = false;
+ if (signal_number == SIGSEGV && signal_has_si_addr(info) &&
+ gwp_asan_callbacks.debuggerd_needs_gwp_asan_recovery &&
+ gwp_asan_callbacks.debuggerd_gwp_asan_pre_crash_report &&
+ gwp_asan_callbacks.debuggerd_gwp_asan_post_crash_report &&
+ gwp_asan_callbacks.debuggerd_needs_gwp_asan_recovery(info->si_addr)) {
+ gwp_asan_callbacks.debuggerd_gwp_asan_pre_crash_report(info->si_addr);
+ gwp_asan_recoverable = true;
+ }
+
// If sival_int is ~0, it means that the fallback handler has been called
// once before and this function is being called again to dump the stack
// of a specific thread. It is possible that the prctl call might return 1,
// then return 0 in subsequent calls, so check the sival_int to determine if
// the fallback handler should be called first.
- if (si_val == kDebuggerdFallbackSivalUintptrRequestDump ||
- prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0) == 1) {
+ bool no_new_privs = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0) == 1;
+ if (si_val == kDebuggerdFallbackSivalUintptrRequestDump || no_new_privs) {
// This check might be racy if another thread sets NO_NEW_PRIVS, but this should be unlikely,
// you can only set NO_NEW_PRIVS to 1, and the effect should be at worst a single missing
// ANR trace.
debuggerd_fallback_handler(info, ucontext, process_info.abort_msg);
+ if (no_new_privs && gwp_asan_recoverable) {
+ gwp_asan_callbacks.debuggerd_gwp_asan_post_crash_report(info->si_addr);
+ return;
+ }
resend_signal(info);
return;
}
@@ -649,6 +670,9 @@
// If the signal is fatal, don't unlock the mutex to prevent other crashing threads from
// starting to dump right before our death.
pthread_mutex_unlock(&crash_mutex);
+ } else if (gwp_asan_recoverable) {
+ gwp_asan_callbacks.debuggerd_gwp_asan_post_crash_report(info->si_addr);
+ pthread_mutex_unlock(&crash_mutex);
}
#ifdef __aarch64__
else if (info->si_signo == SIGSEGV &&
@@ -727,3 +751,52 @@
debuggerd_register_handlers(&action);
}
+
+// When debuggerd's signal handler is the first handler called, it's great at
+// handling the recoverable GWP-ASan mode. For apps, sigchain (from libart) is
+// always the first signal handler, and so the following function is what
+// sigchain must call before processing the signal. This allows for processing
+// of a potentially recoverable GWP-ASan crash. If the signal requires GWP-ASan
+// recovery, then dump a report (via the regular debuggerd hanndler), and patch
+// up the allocator, and allow the process to continue (indicated by returning
+// 'true'). If the crash has nothing to do with GWP-ASan, or recovery isn't
+// possible, return 'false'.
+bool debuggerd_handle_signal(int signal_number, siginfo_t* info, void* context) {
+ if (signal_number != SIGSEGV || !signal_has_si_addr(info)) return false;
+
+ gwp_asan_callbacks_t gwp_asan_callbacks = g_callbacks.get_gwp_asan_callbacks();
+ if (gwp_asan_callbacks.debuggerd_needs_gwp_asan_recovery == nullptr ||
+ gwp_asan_callbacks.debuggerd_gwp_asan_pre_crash_report == nullptr ||
+ gwp_asan_callbacks.debuggerd_gwp_asan_post_crash_report == nullptr ||
+ !gwp_asan_callbacks.debuggerd_needs_gwp_asan_recovery(info->si_addr)) {
+ return false;
+ }
+
+ // Only dump a crash report for the first GWP-ASan crash. ActivityManager
+ // doesn't like it when an app crashes multiple times, and is even more strict
+ // about an app crashing multiple times in a short time period. While the app
+ // won't crash fully when we do GWP-ASan recovery, ActivityManager still gets
+ // the information about the crash through the DropBoxManager service. If an
+ // app has multiple back-to-back GWP-ASan crashes, this would lead to the app
+ // being killed, which defeats the purpose of having the recoverable mode. To
+ // mitigate against this, only generate a debuggerd crash report for the first
+ // GWP-ASan crash encountered. We still need to do the patching up of the
+ // allocator though, so do that.
+ static pthread_mutex_t first_crash_mutex = PTHREAD_MUTEX_INITIALIZER;
+ pthread_mutex_lock(&first_crash_mutex);
+ static bool first_crash = true;
+
+ if (first_crash) {
+ // `debuggerd_signal_handler` will call
+ // `debuggerd_gwp_asan_(pre|post)_crash_report`, so no need to manually call
+ // them here.
+ debuggerd_signal_handler(signal_number, info, context);
+ first_crash = false;
+ } else {
+ gwp_asan_callbacks.debuggerd_gwp_asan_pre_crash_report(info->si_addr);
+ gwp_asan_callbacks.debuggerd_gwp_asan_post_crash_report(info->si_addr);
+ }
+
+ pthread_mutex_unlock(&first_crash_mutex);
+ return true;
+}
diff --git a/debuggerd/include/debuggerd/handler.h b/debuggerd/include/debuggerd/handler.h
index 1f9f4e2..de88be5 100644
--- a/debuggerd/include/debuggerd/handler.h
+++ b/debuggerd/include/debuggerd/handler.h
@@ -46,14 +46,25 @@
size_t scudo_ring_buffer_size;
};
+// GWP-ASan calbacks to support the recoverable mode. Separate from the
+// debuggerd_callbacks_t because these values aren't available at debuggerd_init
+// time, and have to be synthesized on request.
+typedef struct {
+ bool (*debuggerd_needs_gwp_asan_recovery)(void* fault_addr);
+ void (*debuggerd_gwp_asan_pre_crash_report)(void* fault_addr);
+ void (*debuggerd_gwp_asan_post_crash_report)(void* fault_addr);
+} gwp_asan_callbacks_t;
+
// These callbacks are called in a signal handler, and thus must be async signal safe.
// If null, the callbacks will not be called.
typedef struct {
debugger_process_info (*get_process_info)();
+ gwp_asan_callbacks_t (*get_gwp_asan_callbacks)();
void (*post_dump)();
} debuggerd_callbacks_t;
void debuggerd_init(debuggerd_callbacks_t* callbacks);
+bool debuggerd_handle_signal(int signal_number, siginfo_t* info, void* context);
// DEBUGGER_ACTION_DUMP_TOMBSTONE and DEBUGGER_ACTION_DUMP_BACKTRACE are both
// triggered via BIONIC_SIGNAL_DEBUGGER. The debugger_action_t is sent via si_value