debuggerd: advance our amazing bet.
Remove debuggerd in favor of a helper process that gets execed by
crashing processes.
Bug: http://b/30705528
Test: debuggerd_test
Change-Id: I9906c69473989cbf7fe5ea6cccf9a9c563d75906
diff --git a/debuggerd/tombstoned/intercept_manager.cpp b/debuggerd/tombstoned/intercept_manager.cpp
new file mode 100644
index 0000000..789260d
--- /dev/null
+++ b/debuggerd/tombstoned/intercept_manager.cpp
@@ -0,0 +1,182 @@
+/*
+ * Copyright 2016, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "intercept_manager.h"
+
+#include <inttypes.h>
+#include <sys/types.h>
+
+#include <unordered_map>
+
+#include <event2/event.h>
+#include <event2/listener.h>
+
+#include <android-base/logging.h>
+#include <android-base/unique_fd.h>
+#include <cutils/sockets.h>
+
+#include "debuggerd/protocol.h"
+#include "debuggerd/util.h"
+
+using android::base::unique_fd;
+
+static void intercept_close_cb(evutil_socket_t sockfd, short event, void* arg) {
+ auto intercept = reinterpret_cast<Intercept*>(arg);
+ InterceptManager* intercept_manager = intercept->intercept_manager;
+
+ CHECK_EQ(sockfd, intercept->sockfd.get());
+
+ // If we can read, either we received unexpected data from the other side, or the other side
+ // closed their end of the socket. Either way, kill the intercept.
+
+ // Ownership of intercept differs based on whether we've registered it with InterceptManager.
+ if (!intercept->registered) {
+ delete intercept;
+ } else {
+ auto it = intercept_manager->intercepts.find(intercept->intercept_pid);
+ if (it == intercept_manager->intercepts.end()) {
+ LOG(FATAL) << "intercept close callback called after intercept was already removed?";
+ }
+ if (it->second.get() != intercept) {
+ LOG(FATAL) << "intercept close callback has different Intercept from InterceptManager?";
+ }
+
+ const char* reason;
+ if ((event & EV_TIMEOUT) != 0) {
+ reason = "due to timeout";
+ } else {
+ reason = "due to input";
+ }
+
+ LOG(INFO) << "intercept for pid " << intercept->intercept_pid << " terminated " << reason;
+ intercept_manager->intercepts.erase(it);
+ }
+}
+
+static void intercept_request_cb(evutil_socket_t sockfd, short ev, void* arg) {
+ auto intercept = reinterpret_cast<Intercept*>(arg);
+ InterceptManager* intercept_manager = intercept->intercept_manager;
+
+ CHECK_EQ(sockfd, intercept->sockfd.get());
+
+ if ((ev & EV_TIMEOUT) != 0) {
+ LOG(WARNING) << "tombstoned didn't receive InterceptRequest before timeout";
+ goto fail;
+ } else if ((ev & EV_READ) == 0) {
+ LOG(WARNING) << "tombstoned received unexpected event on intercept socket";
+ goto fail;
+ }
+
+ {
+ unique_fd rcv_fd;
+ InterceptRequest intercept_request;
+ ssize_t result = recv_fd(sockfd, &intercept_request, sizeof(intercept_request), &rcv_fd);
+
+ if (result == -1) {
+ PLOG(WARNING) << "failed to read from intercept socket";
+ goto fail;
+ } else if (result != sizeof(intercept_request)) {
+ LOG(WARNING) << "intercept socket received short read of length " << result << " (expected "
+ << sizeof(intercept_request) << ")";
+ goto fail;
+ }
+
+ // Move the received FD to the upper half, in order to more easily notice FD leaks.
+ int moved_fd = fcntl(rcv_fd.get(), F_DUPFD, 512);
+ if (moved_fd == -1) {
+ LOG(WARNING) << "failed to move received fd (" << rcv_fd.get() << ")";
+ goto fail;
+ }
+ rcv_fd.reset(moved_fd);
+
+ // We trust the other side, so only do minimal validity checking.
+ if (intercept_request.pid <= 0 || intercept_request.pid > std::numeric_limits<pid_t>::max()) {
+ InterceptResponse response = {};
+ snprintf(response.error_message, sizeof(response.error_message), "invalid pid %" PRId32,
+ intercept_request.pid);
+ TEMP_FAILURE_RETRY(write(sockfd, &response, sizeof(response)));
+ goto fail;
+ }
+
+ intercept->intercept_pid = intercept_request.pid;
+
+ // Register the intercept with the InterceptManager.
+ if (intercept_manager->intercepts.count(intercept_request.pid) > 0) {
+ InterceptResponse response = {};
+ snprintf(response.error_message, sizeof(response.error_message),
+ "pid %" PRId32 " already intercepted", intercept_request.pid);
+ TEMP_FAILURE_RETRY(write(sockfd, &response, sizeof(response)));
+ LOG(WARNING) << response.error_message;
+ goto fail;
+ }
+
+ intercept->output_fd = std::move(rcv_fd);
+ intercept_manager->intercepts[intercept_request.pid] = std::unique_ptr<Intercept>(intercept);
+ intercept->registered = true;
+
+ LOG(INFO) << "tombstoned registered intercept for pid " << intercept_request.pid;
+
+ // Register a different read event on the socket so that we can remove intercepts if the socket
+ // closes (e.g. if a user CTRL-C's the process that requested the intercept).
+ event_assign(intercept->intercept_event, intercept_manager->base, sockfd, EV_READ | EV_TIMEOUT,
+ intercept_close_cb, arg);
+
+ struct timeval timeout = { .tv_sec = 10, .tv_usec = 0 };
+ event_add(intercept->intercept_event, &timeout);
+ }
+
+ return;
+
+fail:
+ delete intercept;
+}
+
+static void intercept_accept_cb(evconnlistener* listener, evutil_socket_t sockfd, sockaddr*, int,
+ void* arg) {
+ Intercept* intercept = new Intercept();
+ intercept->intercept_manager = static_cast<InterceptManager*>(arg);
+ intercept->sockfd.reset(sockfd);
+
+ struct timeval timeout = { 1, 0 };
+ event_base* base = evconnlistener_get_base(listener);
+ event* intercept_event =
+ event_new(base, sockfd, EV_TIMEOUT | EV_READ, intercept_request_cb, intercept);
+ intercept->intercept_event = intercept_event;
+ event_add(intercept_event, &timeout);
+}
+
+InterceptManager::InterceptManager(event_base* base, int intercept_socket) : base(base) {
+ this->listener = evconnlistener_new(base, intercept_accept_cb, this, -1, LEV_OPT_CLOSE_ON_FREE,
+ intercept_socket);
+}
+
+bool InterceptManager::GetIntercept(pid_t pid, android::base::unique_fd* out_fd) {
+ auto it = this->intercepts.find(pid);
+ if (it == this->intercepts.end()) {
+ return false;
+ }
+
+ auto intercept = std::move(it->second);
+ this->intercepts.erase(it);
+
+ LOG(INFO) << "found intercept fd " << intercept->output_fd.get() << " for pid " << pid;
+ InterceptResponse response = {};
+ response.success = 1;
+ TEMP_FAILURE_RETRY(write(intercept->sockfd, &response, sizeof(response)));
+ *out_fd = std::move(intercept->output_fd);
+
+ return true;
+}
diff --git a/debuggerd/tombstoned/intercept_manager.h b/debuggerd/tombstoned/intercept_manager.h
new file mode 100644
index 0000000..cb5db62
--- /dev/null
+++ b/debuggerd/tombstoned/intercept_manager.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright 2016, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <sys/types.h>
+
+#include <unordered_map>
+
+#include <event2/event.h>
+#include <event2/listener.h>
+
+#include <android-base/unique_fd.h>
+
+struct InterceptManager;
+
+struct Intercept {
+ ~Intercept() {
+ event_free(intercept_event);
+ }
+
+ InterceptManager* intercept_manager = nullptr;
+ event* intercept_event = nullptr;
+ android::base::unique_fd sockfd;
+
+ pid_t intercept_pid = -1;
+ android::base::unique_fd output_fd;
+ bool registered = false;
+};
+
+struct InterceptManager {
+ event_base* base;
+ std::unordered_map<pid_t, std::unique_ptr<Intercept>> intercepts;
+ evconnlistener* listener = nullptr;
+
+ InterceptManager(event_base* _Nonnull base, int intercept_socket);
+ InterceptManager(InterceptManager& copy) = delete;
+ InterceptManager(InterceptManager&& move) = delete;
+
+ bool GetIntercept(pid_t pid, android::base::unique_fd* out_fd);
+};
diff --git a/debuggerd/tombstoned/tombstoned.cpp b/debuggerd/tombstoned/tombstoned.cpp
new file mode 100644
index 0000000..3c1dcaf
--- /dev/null
+++ b/debuggerd/tombstoned/tombstoned.cpp
@@ -0,0 +1,283 @@
+/*
+ * Copyright 2016, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <array>
+#include <deque>
+#include <unordered_map>
+
+#include <event2/event.h>
+#include <event2/listener.h>
+#include <event2/thread.h>
+
+#include <android-base/logging.h>
+#include <android-base/stringprintf.h>
+#include <android-base/unique_fd.h>
+#include <cutils/sockets.h>
+
+#include "debuggerd/protocol.h"
+#include "debuggerd/util.h"
+
+#include "intercept_manager.h"
+
+using android::base::StringPrintf;
+using android::base::unique_fd;
+
+static InterceptManager* intercept_manager;
+
+enum CrashStatus {
+ kCrashStatusRunning,
+ kCrashStatusQueued,
+};
+
+// Ownership of Crash is a bit messy.
+// It's either owned by an active event that must have a timeout, or owned by
+// queued_requests, in the case that multiple crashes come in at the same time.
+struct Crash {
+ ~Crash() {
+ event_free(crash_event);
+ }
+
+ unique_fd crash_fd;
+ pid_t crash_pid;
+ event* crash_event = nullptr;
+};
+
+static constexpr char kTombstoneDirectory[] = "/data/tombstones/";
+static constexpr size_t kTombstoneCount = 10;
+static int tombstone_directory_fd = -1;
+static int next_tombstone = 0;
+
+static constexpr size_t kMaxConcurrentDumps = 1;
+static size_t num_concurrent_dumps = 0;
+
+static std::deque<Crash*> queued_requests;
+
+// Forward declare the callbacks so they can be placed in a sensible order.
+static void crash_accept_cb(evconnlistener* listener, evutil_socket_t sockfd, sockaddr*, int, void*);
+static void crash_request_cb(evutil_socket_t sockfd, short ev, void* arg);
+static void crash_completed_cb(evutil_socket_t sockfd, short ev, void* arg);
+
+static void find_oldest_tombstone() {
+ size_t oldest_tombstone = 0;
+ time_t oldest_time = std::numeric_limits<time_t>::max();
+
+ for (size_t i = 0; i < kTombstoneCount; ++i) {
+ std::string path = android::base::StringPrintf("%stombstone_%02zu", kTombstoneDirectory, i);
+ struct stat st;
+ if (stat(path.c_str(), &st) != 0) {
+ PLOG(ERROR) << "failed to stat " << path;
+ }
+
+ if (st.st_mtime < oldest_time) {
+ oldest_tombstone = i;
+ oldest_time = st.st_mtime;
+ }
+ }
+
+ next_tombstone = oldest_tombstone;
+}
+
+static unique_fd get_tombstone_fd() {
+ // If kMaxConcurrentDumps is greater than 1, then theoretically the same
+ // filename could be handed out to multiple processes. Unlink and create the
+ // file, instead of using O_TRUNC, to avoid two processes interleaving their
+ // output.
+ unique_fd result;
+ char buf[PATH_MAX];
+ snprintf(buf, sizeof(buf), "tombstone_%02d", next_tombstone);
+ if (unlinkat(tombstone_directory_fd, buf, 0) != 0 && errno != ENOENT) {
+ PLOG(FATAL) << "failed to unlink tombstone at " << kTombstoneDirectory << buf;
+ }
+
+ result.reset(
+ openat(tombstone_directory_fd, buf, O_CREAT | O_EXCL | O_WRONLY | O_APPEND, O_CLOEXEC, 0700));
+ if (result == -1) {
+ PLOG(FATAL) << "failed to create tombstone at " << kTombstoneDirectory << buf;
+ }
+
+ next_tombstone = (next_tombstone + 1) % kTombstoneCount;
+ return result;
+}
+
+static void dequeue_request(Crash* crash) {
+ ++num_concurrent_dumps;
+
+ unique_fd output_fd;
+ if (!intercept_manager->GetIntercept(crash->crash_pid, &output_fd)) {
+ output_fd = get_tombstone_fd();
+ }
+
+ TombstonedCrashPacket response = {
+ .packet_type = CrashPacketType::kPerformDump
+ };
+ ssize_t rc = send_fd(crash->crash_fd, &response, sizeof(response), std::move(output_fd));
+ if (rc == -1) {
+ PLOG(WARNING) << "failed to send response to CrashRequest";
+ goto fail;
+ } else if (rc != sizeof(response)) {
+ PLOG(WARNING) << "crash socket write returned short";
+ goto fail;
+ } else {
+ // TODO: Make this configurable by the interceptor?
+ struct timeval timeout = { 10, 0 };
+
+ event_base* base = event_get_base(crash->crash_event);
+ event_assign(crash->crash_event, base, crash->crash_fd, EV_TIMEOUT | EV_READ,
+ crash_completed_cb, crash);
+ event_add(crash->crash_event, &timeout);
+ }
+ return;
+
+fail:
+ delete crash;
+}
+
+static void crash_accept_cb(evconnlistener* listener, evutil_socket_t sockfd, sockaddr*, int,
+ void*) {
+ event_base* base = evconnlistener_get_base(listener);
+ Crash* crash = new Crash();
+
+ struct timeval timeout = { 1, 0 };
+ event* crash_event = event_new(base, sockfd, EV_TIMEOUT | EV_READ, crash_request_cb, crash);
+ crash->crash_fd.reset(sockfd);
+ crash->crash_event = crash_event;
+ event_add(crash_event, &timeout);
+}
+
+static void crash_request_cb(evutil_socket_t sockfd, short ev, void* arg) {
+ ssize_t rc;
+ Crash* crash = static_cast<Crash*>(arg);
+ TombstonedCrashPacket request = {};
+
+ if ((ev & EV_TIMEOUT) != 0) {
+ LOG(WARNING) << "crash request timed out";
+ goto fail;
+ } else if ((ev & EV_READ) == 0) {
+ LOG(WARNING) << "tombstoned received unexpected event from crash socket";
+ goto fail;
+ }
+
+ rc = TEMP_FAILURE_RETRY(read(sockfd, &request, sizeof(request)));
+ if (rc == -1) {
+ PLOG(WARNING) << "failed to read from crash socket";
+ goto fail;
+ } else if (rc != sizeof(request)) {
+ LOG(WARNING) << "crash socket received short read of length " << rc << " (expected "
+ << sizeof(request) << ")";
+ goto fail;
+ }
+
+ if (request.packet_type != CrashPacketType::kDumpRequest) {
+ LOG(WARNING) << "unexpected crash packet type, expected kDumpRequest, received "
+ << StringPrintf("%#2hhX", request.packet_type);
+ goto fail;
+ }
+
+ crash->crash_pid = request.packet.dump_request.pid;
+ LOG(INFO) << "received crash request for pid " << crash->crash_pid;
+
+ if (num_concurrent_dumps == kMaxConcurrentDumps) {
+ LOG(INFO) << "enqueueing crash request for pid " << crash->crash_pid;
+ queued_requests.push_back(crash);
+ } else {
+ dequeue_request(crash);
+ }
+
+ return;
+
+fail:
+ delete crash;
+}
+
+static void crash_completed_cb(evutil_socket_t sockfd, short ev, void* arg) {
+ ssize_t rc;
+ Crash* crash = static_cast<Crash*>(arg);
+ TombstonedCrashPacket request = {};
+
+ --num_concurrent_dumps;
+
+ if ((ev & EV_READ) == 0) {
+ goto fail;
+ }
+
+ rc = TEMP_FAILURE_RETRY(read(sockfd, &request, sizeof(request)));
+ if (rc == -1) {
+ PLOG(WARNING) << "failed to read from crash socket";
+ goto fail;
+ } else if (rc != sizeof(request)) {
+ LOG(WARNING) << "crash socket received short read of length " << rc << " (expected "
+ << sizeof(request) << ")";
+ goto fail;
+ }
+
+ if (request.packet_type != CrashPacketType::kCompletedDump) {
+ LOG(WARNING) << "unexpected crash packet type, expected kCompletedDump, received "
+ << uint32_t(request.packet_type);
+ goto fail;
+ }
+
+fail:
+ delete crash;
+
+ // If there's something queued up, let them proceed.
+ if (!queued_requests.empty()) {
+ Crash* next_crash = queued_requests.front();
+ queued_requests.pop_front();
+ dequeue_request(next_crash);
+ }
+}
+
+int main(int, char* []) {
+ tombstone_directory_fd = open(kTombstoneDirectory, O_DIRECTORY | O_RDONLY | O_CLOEXEC);
+ if (tombstone_directory_fd == -1) {
+ PLOG(FATAL) << "failed to open tombstone directory";
+ }
+
+ find_oldest_tombstone();
+
+ int intercept_socket = android_get_control_socket(kTombstonedInterceptSocketName);
+ int crash_socket = android_get_control_socket(kTombstonedCrashSocketName);
+
+ if (intercept_socket == -1 || crash_socket == -1) {
+ PLOG(FATAL) << "failed to get socket from init";
+ }
+
+ evutil_make_socket_nonblocking(intercept_socket);
+ evutil_make_socket_nonblocking(crash_socket);
+
+ event_base* base = event_base_new();
+ if (!base) {
+ LOG(FATAL) << "failed to create event_base";
+ }
+
+ intercept_manager = new InterceptManager(base, intercept_socket);
+
+ evconnlistener* listener =
+ evconnlistener_new(base, crash_accept_cb, nullptr, -1, LEV_OPT_CLOSE_ON_FREE, crash_socket);
+ if (!listener) {
+ LOG(FATAL) << "failed to create evconnlistener";
+ }
+
+ LOG(INFO) << "tombstoned successfully initialized";
+ event_base_dispatch(base);
+}
diff --git a/debuggerd/tombstoned/tombstoned.rc b/debuggerd/tombstoned/tombstoned.rc
new file mode 100644
index 0000000..3aacf33
--- /dev/null
+++ b/debuggerd/tombstoned/tombstoned.rc
@@ -0,0 +1,9 @@
+service tombstoned /system/bin/tombstoned
+ class core
+
+ user tombstoned
+ group system
+
+ socket tombstoned_crash seqpacket 0666 system system
+ socket tombstoned_intercept seqpacket 0666 system system
+ writepid /dev/cpuset/system-background/tasks