fdtrack: emit the most common stack in the abort message.
Bug: http://b/181875562
Test: manual
Change-Id: I2826b148d55788e932fa04dcef33c78fad8be227
diff --git a/libfdtrack/fdtrack.cpp b/libfdtrack/fdtrack.cpp
index 898bc43..fd56274 100644
--- a/libfdtrack/fdtrack.cpp
+++ b/libfdtrack/fdtrack.cpp
@@ -31,9 +31,12 @@
#include <array>
#include <mutex>
+#include <thread>
+#include <utility>
#include <vector>
#include <android/fdsan.h>
+#include <android/set_abort_message.h>
#include <bionic/fdtrack.h>
#include <android-base/no_destructor.h>
@@ -48,6 +51,7 @@
};
extern "C" void fdtrack_dump();
+extern "C" void fdtrack_dump_fatal();
using fdtrack_callback_t = bool (*)(int fd, const char* const* function_names,
const uint64_t* function_offsets, size_t count, void* arg);
@@ -74,7 +78,17 @@
entry.backtrace.reserve(kStackDepth);
}
- signal(BIONIC_SIGNAL_FDTRACK, [](int) { fdtrack_dump(); });
+ struct sigaction sa = {};
+ sa.sa_sigaction = [](int, siginfo_t* siginfo, void*) {
+ if (siginfo->si_code == SI_QUEUE && siginfo->si_int == 1) {
+ fdtrack_dump_fatal();
+ } else {
+ fdtrack_dump();
+ }
+ };
+ sa.sa_flags = SA_SIGINFO | SA_ONSTACK;
+ sigaction(BIONIC_SIGNAL_FDTRACK, &sa, nullptr);
+
if (Unwinder().Init()) {
android_fdtrack_hook_t expected = nullptr;
installed = android_fdtrack_compare_exchange_hook(&expected, &fd_hook);
@@ -156,16 +170,47 @@
android_fdtrack_set_enabled(prev);
}
-void fdtrack_dump() {
+static size_t hash_stack(const char* const* function_names, const uint64_t* function_offsets,
+ size_t stack_depth) {
+ size_t hash = 0;
+ for (size_t i = 0; i < stack_depth; ++i) {
+ // To future maintainers: if a libc++ update ever makes this invalid, replace this with +.
+ hash = std::__hash_combine(hash, std::hash<std::string_view>()(function_names[i]));
+ hash = std::__hash_combine(hash, std::hash<uint64_t>()(function_offsets[i]));
+ }
+ return hash;
+}
+
+static void fdtrack_dump_impl(bool fatal) {
if (!installed) {
async_safe_format_log(ANDROID_LOG_INFO, "fdtrack", "fdtrack not installed");
} else {
async_safe_format_log(ANDROID_LOG_INFO, "fdtrack", "fdtrack dumping...");
}
+ // If we're aborting, identify the most common stack in the hopes that it's the culprit,
+ // and emit that in the abort message so crash reporting can separate different fd leaks out.
+ // This is horrible and quadratic, but we need to avoid allocation since this can happen in
+ // response to a signal generated asynchronously. We're only going to dump 1k fds by default,
+ // and we're about to blow up the entire system, so this isn't too expensive.
+ struct StackInfo {
+ size_t hash = 0;
+ size_t count = 0;
+
+ size_t stack_depth = 0;
+ const char* function_names[kStackDepth - kStackFrameSkip];
+ uint64_t function_offsets[kStackDepth - kStackFrameSkip];
+ };
+ struct StackList {
+ size_t count = 0;
+ std::array<StackInfo, 128> data;
+ };
+ static StackList stacks;
+
fdtrack_iterate(
- [](int fd, const char* const* function_names, const uint64_t* function_offsets, size_t count,
- void*) {
+ [](int fd, const char* const* function_names, const uint64_t* function_offsets,
+ size_t stack_depth, void* stacks_ptr) {
+ auto stacks = static_cast<StackList*>(stacks_ptr);
uint64_t fdsan_owner = android_fdsan_get_owner_tag(fd);
if (fdsan_owner != 0) {
async_safe_format_log(ANDROID_LOG_INFO, "fdtrack", "fd %d: (owner = 0x%" PRIx64 ")", fd,
@@ -174,12 +219,81 @@
async_safe_format_log(ANDROID_LOG_INFO, "fdtrack", "fd %d: (unowned)", fd);
}
- for (size_t i = 0; i < count; ++i) {
+ for (size_t i = 0; i < stack_depth; ++i) {
async_safe_format_log(ANDROID_LOG_INFO, "fdtrack", " %zu: %s+%" PRIu64, i,
function_names[i], function_offsets[i]);
}
+ if (stacks) {
+ size_t hash = hash_stack(function_names, function_offsets, stack_depth);
+ bool found_stack = false;
+ for (size_t i = 0; i < stacks->count; ++i) {
+ if (stacks->data[i].hash == hash) {
+ ++stacks->data[i].count;
+ found_stack = true;
+ break;
+ }
+ }
+
+ if (!found_stack) {
+ if (stacks->count < stacks->data.size()) {
+ auto& stack = stacks->data[stacks->count++];
+ stack.hash = hash;
+ stack.count = 1;
+ stack.stack_depth = stack_depth;
+ for (size_t i = 0; i < stack_depth; ++i) {
+ stack.function_names[i] = function_names[i];
+ stack.function_offsets[i] = function_offsets[i];
+ }
+ }
+ }
+ }
+
return true;
},
- nullptr);
+ fatal ? &stacks : nullptr);
+
+ if (fatal) {
+ // Find the most common stack.
+ size_t max = 0;
+ StackInfo* stack = nullptr;
+ for (size_t i = 0; i < stacks.count; ++i) {
+ if (stacks.data[i].count > max) {
+ stack = &stacks.data[i];
+ max = stack->count;
+ }
+ }
+
+ static char buf[1024];
+
+ if (!stack) {
+ async_safe_format_buffer(buf, sizeof(buf),
+ "aborting due to fd leak: failed to find most common stack");
+ } else {
+ char* p = buf;
+ p += async_safe_format_buffer(buf, sizeof(buf),
+ "aborting due to fd leak: most common stack =\n");
+
+ for (size_t i = 0; i < stack->stack_depth; ++i) {
+ ssize_t bytes_left = buf + sizeof(buf) - p;
+ if (bytes_left > 0) {
+ p += async_safe_format_buffer(p, buf + sizeof(buf) - p, " %zu: %s+%" PRIu64 "\n", i,
+ stack->function_names[i], stack->function_offsets[i]);
+ }
+ }
+ }
+
+ android_set_abort_message(buf);
+
+ // Abort on a different thread to avoid ART dumping runtime stacks.
+ std::thread([]() { abort(); }).join();
+ }
+}
+
+void fdtrack_dump() {
+ fdtrack_dump_impl(false);
+}
+
+void fdtrack_dump_fatal() {
+ fdtrack_dump_impl(true);
}
diff --git a/libfdtrack/libfdtrack.map.txt b/libfdtrack/libfdtrack.map.txt
index 7a23954..6c4015c 100644
--- a/libfdtrack/libfdtrack.map.txt
+++ b/libfdtrack/libfdtrack.map.txt
@@ -1,6 +1,7 @@
LIBFDTRACK {
global:
fdtrack_dump;
+ fdtrack_dump_fatal;
fdtrack_iterate;
local:
*;