Josh Gao | cbe70cb | 2016-10-18 18:17:52 -0700 | [diff] [blame^] | 1 | /* |
| 2 | * Copyright 2016, The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #include <arpa/inet.h> |
| 18 | #include <dirent.h> |
| 19 | #include <fcntl.h> |
| 20 | #include <stdlib.h> |
| 21 | #include <syscall.h> |
| 22 | #include <sys/ptrace.h> |
| 23 | #include <sys/types.h> |
| 24 | #include <sys/un.h> |
| 25 | #include <unistd.h> |
| 26 | |
| 27 | #include <limits> |
| 28 | #include <memory> |
| 29 | #include <set> |
| 30 | #include <vector> |
| 31 | |
| 32 | #include <android-base/file.h> |
| 33 | #include <android-base/logging.h> |
| 34 | #include <android-base/parseint.h> |
| 35 | #include <android-base/properties.h> |
| 36 | #include <android-base/stringprintf.h> |
| 37 | #include <android-base/unique_fd.h> |
| 38 | #include <cutils/sockets.h> |
| 39 | #include <log/logger.h> |
| 40 | #include <procinfo/process.h> |
| 41 | #include <selinux/selinux.h> |
| 42 | |
| 43 | #include "backtrace.h" |
| 44 | #include "tombstone.h" |
| 45 | #include "utility.h" |
| 46 | |
| 47 | #include "debuggerd/handler.h" |
| 48 | #include "debuggerd/protocol.h" |
| 49 | #include "debuggerd/util.h" |
| 50 | |
| 51 | using android::base::unique_fd; |
| 52 | using android::base::StringPrintf; |
| 53 | |
| 54 | static bool pid_contains_tid(pid_t pid, pid_t tid) { |
| 55 | std::string task_path = StringPrintf("/proc/%d/task/%d", pid, tid); |
| 56 | return access(task_path.c_str(), F_OK) == 0; |
| 57 | } |
| 58 | |
| 59 | // Attach to a thread, and verify that it's still a member of the given process |
| 60 | static bool ptrace_attach_thread(pid_t pid, pid_t tid) { |
| 61 | if (ptrace(PTRACE_ATTACH, tid, 0, 0) != 0) { |
| 62 | return false; |
| 63 | } |
| 64 | |
| 65 | // Make sure that the task we attached to is actually part of the pid we're dumping. |
| 66 | if (!pid_contains_tid(pid, tid)) { |
| 67 | if (ptrace(PTRACE_DETACH, tid, 0, 0) != 0) { |
| 68 | PLOG(FATAL) << "failed to detach from thread " << tid; |
| 69 | } |
| 70 | errno = ECHILD; |
| 71 | return false; |
| 72 | } |
| 73 | return true; |
| 74 | } |
| 75 | |
| 76 | static bool activity_manager_notify(int pid, int signal, const std::string& amfd_data) { |
| 77 | android::base::unique_fd amfd(socket_local_client("/data/system/ndebugsocket", ANDROID_SOCKET_NAMESPACE_FILESYSTEM, SOCK_STREAM)); |
| 78 | if (amfd.get() == -1) { |
| 79 | PLOG(ERROR) << "unable to connect to activity manager"; |
| 80 | return false; |
| 81 | } |
| 82 | |
| 83 | struct timeval tv = { |
| 84 | .tv_sec = 1, |
| 85 | .tv_usec = 0, |
| 86 | }; |
| 87 | if (setsockopt(amfd.get(), SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof(tv)) == -1) { |
| 88 | PLOG(ERROR) << "failed to set send timeout on activity manager socket"; |
| 89 | return false; |
| 90 | } |
| 91 | tv.tv_sec = 3; // 3 seconds on handshake read |
| 92 | if (setsockopt(amfd.get(), SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)) == -1) { |
| 93 | PLOG(ERROR) << "failed to set receive timeout on activity manager socket"; |
| 94 | return false; |
| 95 | } |
| 96 | |
| 97 | // Activity Manager protocol: binary 32-bit network-byte-order ints for the |
| 98 | // pid and signal number, followed by the raw text of the dump, culminating |
| 99 | // in a zero byte that marks end-of-data. |
| 100 | uint32_t datum = htonl(pid); |
| 101 | if (!android::base::WriteFully(amfd, &datum, 4)) { |
| 102 | PLOG(ERROR) << "AM pid write failed"; |
| 103 | return false; |
| 104 | } |
| 105 | datum = htonl(signal); |
| 106 | if (!android::base::WriteFully(amfd, &datum, 4)) { |
| 107 | PLOG(ERROR) << "AM signal write failed"; |
| 108 | return false; |
| 109 | } |
| 110 | if (!android::base::WriteFully(amfd, amfd_data.c_str(), amfd_data.size() + 1)) { |
| 111 | PLOG(ERROR) << "AM data write failed"; |
| 112 | return false; |
| 113 | } |
| 114 | |
| 115 | // 3 sec timeout reading the ack; we're fine if the read fails. |
| 116 | char ack; |
| 117 | android::base::ReadFully(amfd, &ack, 1); |
| 118 | return true; |
| 119 | } |
| 120 | |
| 121 | static bool tombstoned_connect(pid_t pid, unique_fd* tombstoned_socket, unique_fd* output_fd) { |
| 122 | unique_fd sockfd(socket_local_client(kTombstonedCrashSocketName, |
| 123 | ANDROID_SOCKET_NAMESPACE_RESERVED, SOCK_SEQPACKET)); |
| 124 | if (sockfd == -1) { |
| 125 | PLOG(ERROR) << "failed to connect to tombstoned"; |
| 126 | return false; |
| 127 | } |
| 128 | |
| 129 | TombstonedCrashPacket packet = {}; |
| 130 | packet.packet_type = CrashPacketType::kDumpRequest; |
| 131 | packet.packet.dump_request.pid = pid; |
| 132 | if (TEMP_FAILURE_RETRY(write(sockfd, &packet, sizeof(packet))) != sizeof(packet)) { |
| 133 | PLOG(ERROR) << "failed to write DumpRequest packet"; |
| 134 | return false; |
| 135 | } |
| 136 | |
| 137 | unique_fd tmp_output_fd; |
| 138 | ssize_t rc = recv_fd(sockfd, &packet, sizeof(packet), &tmp_output_fd); |
| 139 | if (rc == -1) { |
| 140 | PLOG(ERROR) << "failed to read response to DumpRequest packet"; |
| 141 | return false; |
| 142 | } else if (rc != sizeof(packet)) { |
| 143 | LOG(ERROR) << "read DumpRequest response packet of incorrect length (expected " |
| 144 | << sizeof(packet) << ", got " << rc << ")"; |
| 145 | return false; |
| 146 | } |
| 147 | |
| 148 | *tombstoned_socket = std::move(sockfd); |
| 149 | *output_fd = std::move(tmp_output_fd); |
| 150 | return true; |
| 151 | } |
| 152 | |
| 153 | static bool tombstoned_notify_completion(int tombstoned_socket) { |
| 154 | TombstonedCrashPacket packet = {}; |
| 155 | packet.packet_type = CrashPacketType::kCompletedDump; |
| 156 | if (TEMP_FAILURE_RETRY(write(tombstoned_socket, &packet, sizeof(packet))) != sizeof(packet)) { |
| 157 | return false; |
| 158 | } |
| 159 | return true; |
| 160 | } |
| 161 | |
| 162 | static void abort_handler(pid_t target, const bool& tombstoned_connected, |
| 163 | unique_fd& tombstoned_socket, unique_fd& output_fd, |
| 164 | const char* abort_msg) { |
| 165 | LOG(ERROR) << abort_msg; |
| 166 | |
| 167 | // If we abort before we get an output fd, contact tombstoned to let any |
| 168 | // potential listeners know that we failed. |
| 169 | if (!tombstoned_connected) { |
| 170 | if (!tombstoned_connect(target, &tombstoned_socket, &output_fd)) { |
| 171 | // We failed to connect, not much we can do. |
| 172 | LOG(ERROR) << "failed to connected to tombstoned to report failure"; |
| 173 | _exit(1); |
| 174 | } |
| 175 | } |
| 176 | |
| 177 | dprintf(output_fd.get(), "crash_dump failed to dump process %d: %s\n", target, abort_msg); |
| 178 | |
| 179 | // Don't dump ourselves. |
| 180 | _exit(1); |
| 181 | } |
| 182 | |
| 183 | static void check_process(int proc_fd, pid_t expected_pid) { |
| 184 | android::procinfo::ProcessInfo proc_info; |
| 185 | if (!android::procinfo::GetProcessInfoFromProcPidFd(proc_fd, &proc_info)) { |
| 186 | LOG(FATAL) << "failed to fetch process info"; |
| 187 | } |
| 188 | |
| 189 | if (proc_info.pid != expected_pid) { |
| 190 | LOG(FATAL) << "pid mismatch: expected " << expected_pid << ", actual " << proc_info.ppid; |
| 191 | } |
| 192 | } |
| 193 | |
| 194 | int main(int argc, char** argv) { |
| 195 | pid_t target = getppid(); |
| 196 | bool tombstoned_connected = false; |
| 197 | unique_fd tombstoned_socket; |
| 198 | unique_fd output_fd; |
| 199 | |
| 200 | android::base::InitLogging(argv); |
| 201 | android::base::SetAborter([&](const char* abort_msg) { |
| 202 | abort_handler(target, tombstoned_connected, tombstoned_socket, output_fd, abort_msg); |
| 203 | }); |
| 204 | |
| 205 | if (argc != 2) { |
| 206 | return 1; |
| 207 | } |
| 208 | |
| 209 | pid_t main_tid; |
| 210 | |
| 211 | if (target == 1) { |
| 212 | LOG(FATAL) << "target died before we could attach"; |
| 213 | } |
| 214 | |
| 215 | if (!android::base::ParseInt(argv[1], &main_tid, 1, std::numeric_limits<pid_t>::max())) { |
| 216 | LOG(FATAL) << "invalid main tid: " << argv[1]; |
| 217 | } |
| 218 | |
| 219 | android::procinfo::ProcessInfo target_info; |
| 220 | if (!android::procinfo::GetProcessInfo(main_tid, &target_info)) { |
| 221 | LOG(FATAL) << "failed to fetch process info for target " << main_tid; |
| 222 | } |
| 223 | |
| 224 | if (main_tid != target_info.tid || target != target_info.pid) { |
| 225 | LOG(FATAL) << "target info mismatch, expected pid " << target << ", tid " << main_tid |
| 226 | << ", received pid " << target_info.pid << ", tid " << target_info.tid; |
| 227 | } |
| 228 | |
| 229 | // Open /proc/`getppid()` in the original process, and pass it down to the forked child. |
| 230 | std::string target_proc_path = "/proc/" + std::to_string(target); |
| 231 | int target_proc_fd = open(target_proc_path.c_str(), O_DIRECTORY | O_RDONLY); |
| 232 | if (target_proc_fd == -1) { |
| 233 | PLOG(FATAL) << "failed to open " << target_proc_path; |
| 234 | } |
| 235 | |
| 236 | // Reparent ourselves to init, so that the signal handler can waitpid on the |
| 237 | // original process to avoid leaving a zombie for non-fatal dumps. |
| 238 | pid_t forkpid = fork(); |
| 239 | if (forkpid == -1) { |
| 240 | PLOG(FATAL) << "fork failed"; |
| 241 | } else if (forkpid != 0) { |
| 242 | exit(0); |
| 243 | } |
| 244 | |
| 245 | check_process(target_proc_fd, target); |
| 246 | |
| 247 | int attach_error = 0; |
| 248 | if (!ptrace_attach_thread(target, main_tid)) { |
| 249 | PLOG(FATAL) << "failed to attach to thread " << main_tid << " in process " << target; |
| 250 | } |
| 251 | |
| 252 | check_process(target_proc_fd, target); |
| 253 | |
| 254 | LOG(INFO) << "obtaining output fd from tombstoned"; |
| 255 | tombstoned_connected = tombstoned_connect(target, &tombstoned_socket, &output_fd); |
| 256 | |
| 257 | // Write a '\1' to stdout to tell the crashing process to resume. |
| 258 | if (TEMP_FAILURE_RETRY(write(STDOUT_FILENO, "\1", 1)) == -1) { |
| 259 | PLOG(ERROR) << "failed to communicate to target process"; |
| 260 | } |
| 261 | |
| 262 | if (tombstoned_connected) { |
| 263 | if (TEMP_FAILURE_RETRY(dup2(output_fd.get(), STDOUT_FILENO)) == -1) { |
| 264 | PLOG(ERROR) << "failed to dup2 output fd (" << output_fd.get() << ") to STDOUT_FILENO"; |
| 265 | } |
| 266 | } else { |
| 267 | unique_fd devnull(TEMP_FAILURE_RETRY(open("/dev/null", O_RDWR))); |
| 268 | TEMP_FAILURE_RETRY(dup2(devnull.get(), STDOUT_FILENO)); |
| 269 | } |
| 270 | |
| 271 | if (attach_error != 0) { |
| 272 | PLOG(FATAL) << "failed to attach to thread " << main_tid << " in process " << target; |
| 273 | } |
| 274 | |
| 275 | LOG(INFO) << "performing dump of process " << target << " (target tid = " << main_tid << ")"; |
| 276 | |
| 277 | // At this point, the thread that made the request has been PTRACE_ATTACHed |
| 278 | // and has the signal that triggered things queued. Send PTRACE_CONT, and |
| 279 | // then wait for the signal. |
| 280 | if (ptrace(PTRACE_CONT, main_tid, 0, 0) != 0) { |
| 281 | PLOG(ERROR) << "PTRACE_CONT(" << main_tid << ") failed"; |
| 282 | exit(1); |
| 283 | } |
| 284 | |
| 285 | siginfo_t siginfo = {}; |
| 286 | if (!wait_for_signal(main_tid, &siginfo)) { |
| 287 | printf("failed to wait for signal in tid %d: %s\n", main_tid, strerror(errno)); |
| 288 | exit(1); |
| 289 | } |
| 290 | |
| 291 | int signo = siginfo.si_signo; |
| 292 | bool backtrace = false; |
| 293 | uintptr_t abort_address = 0; |
| 294 | |
| 295 | // si_value can represent three things: |
| 296 | // 0: dump tombstone |
| 297 | // 1: dump backtrace |
| 298 | // everything else: abort message address (implies dump tombstone) |
| 299 | if (siginfo.si_value.sival_int == 1) { |
| 300 | backtrace = true; |
| 301 | } else if (siginfo.si_value.sival_ptr != nullptr) { |
| 302 | abort_address = reinterpret_cast<uintptr_t>(siginfo.si_value.sival_ptr); |
| 303 | } |
| 304 | |
| 305 | // Now that we have the signal that kicked things off, attach all of the |
| 306 | // sibling threads, and then proceed. |
| 307 | bool fatal_signal = signo != DEBUGGER_SIGNAL; |
| 308 | int resume_signal = fatal_signal ? signo : 0; |
| 309 | std::set<pid_t> siblings; |
| 310 | if (resume_signal == 0) { |
| 311 | if (!android::procinfo::GetProcessTids(target, &siblings)) { |
| 312 | PLOG(FATAL) << "failed to get process siblings"; |
| 313 | } |
| 314 | siblings.erase(main_tid); |
| 315 | |
| 316 | for (pid_t sibling_tid : siblings) { |
| 317 | if (!ptrace_attach_thread(target, sibling_tid)) { |
| 318 | PLOG(FATAL) << "failed to attach to thread " << main_tid << " in process " << target; |
| 319 | } |
| 320 | } |
| 321 | } |
| 322 | |
| 323 | check_process(target_proc_fd, target); |
| 324 | |
| 325 | // TODO: Use seccomp to lock ourselves down. |
| 326 | |
| 327 | std::unique_ptr<BacktraceMap> backtrace_map(BacktraceMap::Create(main_tid)); |
| 328 | std::string amfd_data; |
| 329 | |
| 330 | if (backtrace) { |
| 331 | dump_backtrace(output_fd.get(), backtrace_map.get(), target, main_tid, siblings, 0); |
| 332 | } else { |
| 333 | // Collect the list of open files. |
| 334 | OpenFilesList open_files; |
| 335 | populate_open_files_list(target, &open_files); |
| 336 | |
| 337 | engrave_tombstone(output_fd.get(), backtrace_map.get(), open_files, target, main_tid, siblings, |
| 338 | abort_address, fatal_signal ? &amfd_data : nullptr); |
| 339 | } |
| 340 | |
| 341 | bool wait_for_gdb = android::base::GetBoolProperty("debug.debuggerd.wait_for_gdb", false); |
| 342 | if (wait_for_gdb) { |
| 343 | // Don't wait_for_gdb when the process didn't actually crash. |
| 344 | if (!fatal_signal) { |
| 345 | wait_for_gdb = false; |
| 346 | } else { |
| 347 | // Use ALOGI to line up with output from engrave_tombstone. |
| 348 | ALOGI( |
| 349 | "***********************************************************\n" |
| 350 | "* Process %d has been suspended while crashing.\n" |
| 351 | "* To attach gdbserver and start gdb, run this on the host:\n" |
| 352 | "*\n" |
| 353 | "* gdbclient.py -p %d\n" |
| 354 | "*\n" |
| 355 | "***********************************************************", |
| 356 | target, main_tid); |
| 357 | } |
| 358 | } |
| 359 | |
| 360 | for (pid_t tid : siblings) { |
| 361 | // Don't send the signal to sibling threads. |
| 362 | if (ptrace(PTRACE_DETACH, tid, 0, wait_for_gdb ? SIGSTOP : 0) != 0) { |
| 363 | PLOG(ERROR) << "ptrace detach from " << tid << " failed"; |
| 364 | } |
| 365 | } |
| 366 | |
| 367 | if (ptrace(PTRACE_DETACH, main_tid, 0, wait_for_gdb ? SIGSTOP : resume_signal)) { |
| 368 | PLOG(ERROR) << "ptrace detach from main thread " << main_tid << " failed"; |
| 369 | } |
| 370 | |
| 371 | if (wait_for_gdb) { |
| 372 | if (tgkill(target, main_tid, resume_signal) != 0) { |
| 373 | PLOG(ERROR) << "failed to resend signal to process " << target; |
| 374 | } |
| 375 | } |
| 376 | |
| 377 | if (fatal_signal) { |
| 378 | activity_manager_notify(target, signo, amfd_data); |
| 379 | } |
| 380 | |
| 381 | // Close stdout before we notify tombstoned of completion. |
| 382 | close(STDOUT_FILENO); |
| 383 | if (!tombstoned_notify_completion(tombstoned_socket.get())) { |
| 384 | LOG(ERROR) << "failed to notify tombstoned of completion"; |
| 385 | } |
| 386 | |
| 387 | return 0; |
| 388 | } |