Make stack dumping errors display better.
Currently, the code first checks if a process is cached
and then checks if the process should be backtraced. For
example, the code should not backtrace the zygote process.
This CL moves the cached process check until after the
check to see if the process should be backtraced. Before
this change, the zygote was always in the list of processes,
and is always cached so it shows up as this anonymous
process that can't be backtraced. After this change, since
the zygote is not supposed to be backtraced, it is not present.
For any pids that would have been dumped but are bypassed or have
an error, make the header include the timestamp and the command line.
Improve the message about why a cached process is skipped.
For an unwind that fails:
- If it's a java process, try a native unwind.
- Display the time it took for the failed unwind.
- Check if the process is now cached and display that error message.
Bug: 339473422
Test: Trigger bugreport and verify everything continues to work.
Test: Fake an unwind failure and verify the output.
Test: Fake an unwind failure and fake a cached process and verify the output.
Test: Fake an unwind failure for a java thread, and do a native unwind.
Test: Ran the bugreports from above through the bugreport parser.
Change-Id: I3082f2ac619e179621ad3ff39f2e839d000b2576
diff --git a/cmds/dumpstate/dumpstate.cpp b/cmds/dumpstate/dumpstate.cpp
index 9b45382..5b817c1 100644
--- a/cmds/dumpstate/dumpstate.cpp
+++ b/cmds/dumpstate/dumpstate.cpp
@@ -2186,6 +2186,74 @@
ds.AddDir(LOGPERSIST_DATA_DIR, false);
}
+static std::string GetTimestamp(const timespec& ts) {
+ tm tm;
+ localtime_r(&ts.tv_sec, &tm);
+
+ // Reserve enough space for the entire time string, includes the space
+ // for the '\0' to make the calculations below easier by using size for
+ // the total string size.
+ std::string str(sizeof("1970-01-01 00:00:00.123456789+0830"), '\0');
+ size_t n = strftime(str.data(), str.size(), "%F %H:%M", &tm);
+ if (n == 0) {
+ return "TIMESTAMP FAILURE";
+ }
+ int num_chars = snprintf(&str[n], str.size() - n, ":%02d.%09ld", tm.tm_sec, ts.tv_nsec);
+ if (num_chars > str.size() - n) {
+ return "TIMESTAMP FAILURE";
+ }
+ n += static_cast<size_t>(num_chars);
+ if (strftime(&str[n], str.size() - n, "%z", &tm) == 0) {
+ return "TIMESTAMP FAILURE";
+ }
+ return str;
+}
+
+static std::string GetCmdline(pid_t pid) {
+ std::string cmdline;
+ if (!android::base::ReadFileToString(android::base::StringPrintf("/proc/%d/cmdline", pid),
+ &cmdline)) {
+ return "UNKNOWN";
+ }
+ // There are '\0' terminators between arguments, convert them to spaces.
+ // But start by skipping all trailing '\0' values.
+ size_t cur = cmdline.size() - 1;
+ while (cur != 0 && cmdline[cur] == '\0') {
+ cur--;
+ }
+ if (cur == 0) {
+ return "UNKNOWN";
+ }
+ while ((cur = cmdline.rfind('\0', cur)) != std::string::npos) {
+ cmdline[cur] = ' ';
+ }
+ return cmdline;
+}
+
+static void DumpPidHeader(int fd, pid_t pid, const timespec& ts) {
+ // For consistency, the header to this message matches the one
+ // dumped by debuggerd.
+ dprintf(fd, "\n----- pid %d at %s -----\n", pid, GetTimestamp(ts).c_str());
+ dprintf(fd, "Cmd line: %s\n", GetCmdline(pid).c_str());
+}
+
+static void DumpPidFooter(int fd, pid_t pid) {
+ // For consistency, the footer to this message matches the one
+ // dumped by debuggerd.
+ dprintf(fd, "----- end %d -----\n", pid);
+}
+
+static bool DumpBacktrace(int fd, pid_t pid, bool is_java_process) {
+ int ret = dump_backtrace_to_file_timeout(
+ pid, is_java_process ? kDebuggerdJavaBacktrace : kDebuggerdNativeBacktrace, 3, fd);
+ if (ret == -1 && is_java_process) {
+ // Tried to unwind as a java process, try a native unwind.
+ dprintf(fd, "Java unwind failed for pid %d, trying a native unwind.\n", pid);
+ ret = dump_backtrace_to_file_timeout(pid, kDebuggerdNativeBacktrace, 3, fd);
+ }
+ return ret != -1;
+}
+
Dumpstate::RunStatus Dumpstate::DumpTraces(const char** path) {
const std::string temp_file_pattern = ds.bugreport_internal_dir_ + "/dumptrace_XXXXXX";
const size_t buf_size = temp_file_pattern.length() + 1;
@@ -2234,16 +2302,6 @@
continue;
}
- // Skip cached processes.
- if (IsCached(pid)) {
- // For consistency, the header and footer to this message match those
- // dumped by debuggerd in the success case.
- dprintf(fd, "\n---- pid %d at [unknown] ----\n", pid);
- dprintf(fd, "Dump skipped for cached process.\n");
- dprintf(fd, "---- end %d ----", pid);
- continue;
- }
-
const std::string link_name = android::base::StringPrintf("/proc/%d/exe", pid);
std::string exe;
if (!android::base::Readlink(link_name, &exe)) {
@@ -2272,16 +2330,31 @@
break;
}
- const uint64_t start = Nanotime();
- const int ret = dump_backtrace_to_file_timeout(
- pid, is_java_process ? kDebuggerdJavaBacktrace : kDebuggerdNativeBacktrace, 3, fd);
+ timespec start_timespec;
+ clock_gettime(CLOCK_REALTIME, &start_timespec);
+ if (IsCached(pid)) {
+ DumpPidHeader(fd, pid, start_timespec);
+ dprintf(fd, "Process is cached, skipping backtrace due to high chance of timeout.\n");
+ DumpPidFooter(fd, pid);
+ continue;
+ }
- if (ret == -1) {
- // For consistency, the header and footer to this message match those
- // dumped by debuggerd in the success case.
- dprintf(fd, "\n---- pid %d at [unknown] ----\n", pid);
- dprintf(fd, "Dump failed, likely due to a timeout.\n");
- dprintf(fd, "---- end %d ----", pid);
+ const uint64_t start = Nanotime();
+ if (!DumpBacktrace(fd, pid, is_java_process)) {
+ if (IsCached(pid)) {
+ DumpPidHeader(fd, pid, start_timespec);
+ dprintf(fd, "Backtrace failed, but process has become cached.\n");
+ DumpPidFooter(fd, pid);
+ continue;
+ }
+
+ DumpPidHeader(fd, pid, start_timespec);
+ dprintf(fd, "Backtrace gathering failed, likely due to a timeout.\n");
+ DumpPidFooter(fd, pid);
+
+ dprintf(fd, "\n[dump %s stack %d: %.3fs elapsed]\n",
+ is_java_process ? "dalvik" : "native", pid,
+ (float)(Nanotime() - start) / NANOS_PER_SEC);
timeout_failures++;
continue;
}