debuggerd: advance our amazing bet.

Remove debuggerd in favor of a helper process that gets execed by
crashing processes.

Bug: http://b/30705528
Test: debuggerd_test
Change-Id: I9906c69473989cbf7fe5ea6cccf9a9c563d75906
diff --git a/debuggerd/tombstoned/intercept_manager.cpp b/debuggerd/tombstoned/intercept_manager.cpp
new file mode 100644
index 0000000..789260d
--- /dev/null
+++ b/debuggerd/tombstoned/intercept_manager.cpp
@@ -0,0 +1,182 @@
+/*
+ * Copyright 2016, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "intercept_manager.h"
+
+#include <inttypes.h>
+#include <sys/types.h>
+
+#include <unordered_map>
+
+#include <event2/event.h>
+#include <event2/listener.h>
+
+#include <android-base/logging.h>
+#include <android-base/unique_fd.h>
+#include <cutils/sockets.h>
+
+#include "debuggerd/protocol.h"
+#include "debuggerd/util.h"
+
+using android::base::unique_fd;
+
+static void intercept_close_cb(evutil_socket_t sockfd, short event, void* arg) {
+  auto intercept = reinterpret_cast<Intercept*>(arg);
+  InterceptManager* intercept_manager = intercept->intercept_manager;
+
+  CHECK_EQ(sockfd, intercept->sockfd.get());
+
+  // If we can read, either we received unexpected data from the other side, or the other side
+  // closed their end of the socket. Either way, kill the intercept.
+
+  // Ownership of intercept differs based on whether we've registered it with InterceptManager.
+  if (!intercept->registered) {
+    delete intercept;
+  } else {
+    auto it = intercept_manager->intercepts.find(intercept->intercept_pid);
+    if (it == intercept_manager->intercepts.end()) {
+      LOG(FATAL) << "intercept close callback called after intercept was already removed?";
+    }
+    if (it->second.get() != intercept) {
+      LOG(FATAL) << "intercept close callback has different Intercept from InterceptManager?";
+    }
+
+    const char* reason;
+    if ((event & EV_TIMEOUT) != 0) {
+      reason = "due to timeout";
+    } else {
+      reason = "due to input";
+    }
+
+    LOG(INFO) << "intercept for pid " << intercept->intercept_pid << " terminated " << reason;
+    intercept_manager->intercepts.erase(it);
+  }
+}
+
+static void intercept_request_cb(evutil_socket_t sockfd, short ev, void* arg) {
+  auto intercept = reinterpret_cast<Intercept*>(arg);
+  InterceptManager* intercept_manager = intercept->intercept_manager;
+
+  CHECK_EQ(sockfd, intercept->sockfd.get());
+
+  if ((ev & EV_TIMEOUT) != 0) {
+    LOG(WARNING) << "tombstoned didn't receive InterceptRequest before timeout";
+    goto fail;
+  } else if ((ev & EV_READ) == 0) {
+    LOG(WARNING) << "tombstoned received unexpected event on intercept socket";
+    goto fail;
+  }
+
+  {
+    unique_fd rcv_fd;
+    InterceptRequest intercept_request;
+    ssize_t result = recv_fd(sockfd, &intercept_request, sizeof(intercept_request), &rcv_fd);
+
+    if (result == -1) {
+      PLOG(WARNING) << "failed to read from intercept socket";
+      goto fail;
+    } else if (result != sizeof(intercept_request)) {
+      LOG(WARNING) << "intercept socket received short read of length " << result << " (expected "
+                   << sizeof(intercept_request) << ")";
+      goto fail;
+    }
+
+    // Move the received FD to the upper half, in order to more easily notice FD leaks.
+    int moved_fd = fcntl(rcv_fd.get(), F_DUPFD, 512);
+    if (moved_fd == -1) {
+      LOG(WARNING) << "failed to move received fd (" << rcv_fd.get() << ")";
+      goto fail;
+    }
+    rcv_fd.reset(moved_fd);
+
+    // We trust the other side, so only do minimal validity checking.
+    if (intercept_request.pid <= 0 || intercept_request.pid > std::numeric_limits<pid_t>::max()) {
+      InterceptResponse response = {};
+      snprintf(response.error_message, sizeof(response.error_message), "invalid pid %" PRId32,
+               intercept_request.pid);
+      TEMP_FAILURE_RETRY(write(sockfd, &response, sizeof(response)));
+      goto fail;
+    }
+
+    intercept->intercept_pid = intercept_request.pid;
+
+    // Register the intercept with the InterceptManager.
+    if (intercept_manager->intercepts.count(intercept_request.pid) > 0) {
+      InterceptResponse response = {};
+      snprintf(response.error_message, sizeof(response.error_message),
+               "pid %" PRId32 " already intercepted", intercept_request.pid);
+      TEMP_FAILURE_RETRY(write(sockfd, &response, sizeof(response)));
+      LOG(WARNING) << response.error_message;
+      goto fail;
+    }
+
+    intercept->output_fd = std::move(rcv_fd);
+    intercept_manager->intercepts[intercept_request.pid] = std::unique_ptr<Intercept>(intercept);
+    intercept->registered = true;
+
+    LOG(INFO) << "tombstoned registered intercept for pid " << intercept_request.pid;
+
+    // Register a different read event on the socket so that we can remove intercepts if the socket
+    // closes (e.g. if a user CTRL-C's the process that requested the intercept).
+    event_assign(intercept->intercept_event, intercept_manager->base, sockfd, EV_READ | EV_TIMEOUT,
+                 intercept_close_cb, arg);
+
+    struct timeval timeout = { .tv_sec = 10, .tv_usec = 0 };
+    event_add(intercept->intercept_event, &timeout);
+  }
+
+  return;
+
+fail:
+  delete intercept;
+}
+
+static void intercept_accept_cb(evconnlistener* listener, evutil_socket_t sockfd, sockaddr*, int,
+                                void* arg) {
+  Intercept* intercept = new Intercept();
+  intercept->intercept_manager = static_cast<InterceptManager*>(arg);
+  intercept->sockfd.reset(sockfd);
+
+  struct timeval timeout = { 1, 0 };
+  event_base* base = evconnlistener_get_base(listener);
+  event* intercept_event =
+    event_new(base, sockfd, EV_TIMEOUT | EV_READ, intercept_request_cb, intercept);
+  intercept->intercept_event = intercept_event;
+  event_add(intercept_event, &timeout);
+}
+
+InterceptManager::InterceptManager(event_base* base, int intercept_socket) : base(base) {
+  this->listener = evconnlistener_new(base, intercept_accept_cb, this, -1, LEV_OPT_CLOSE_ON_FREE,
+                                      intercept_socket);
+}
+
+bool InterceptManager::GetIntercept(pid_t pid, android::base::unique_fd* out_fd) {
+  auto it = this->intercepts.find(pid);
+  if (it == this->intercepts.end()) {
+    return false;
+  }
+
+  auto intercept = std::move(it->second);
+  this->intercepts.erase(it);
+
+  LOG(INFO) << "found intercept fd " << intercept->output_fd.get() << " for pid " << pid;
+  InterceptResponse response = {};
+  response.success = 1;
+  TEMP_FAILURE_RETRY(write(intercept->sockfd, &response, sizeof(response)));
+  *out_fd = std::move(intercept->output_fd);
+
+  return true;
+}
diff --git a/debuggerd/tombstoned/intercept_manager.h b/debuggerd/tombstoned/intercept_manager.h
new file mode 100644
index 0000000..cb5db62
--- /dev/null
+++ b/debuggerd/tombstoned/intercept_manager.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright 2016, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <sys/types.h>
+
+#include <unordered_map>
+
+#include <event2/event.h>
+#include <event2/listener.h>
+
+#include <android-base/unique_fd.h>
+
+struct InterceptManager;
+
+struct Intercept {
+  ~Intercept() {
+    event_free(intercept_event);
+  }
+
+  InterceptManager* intercept_manager = nullptr;
+  event* intercept_event = nullptr;
+  android::base::unique_fd sockfd;
+
+  pid_t intercept_pid = -1;
+  android::base::unique_fd output_fd;
+  bool registered = false;
+};
+
+struct InterceptManager {
+  event_base* base;
+  std::unordered_map<pid_t, std::unique_ptr<Intercept>> intercepts;
+  evconnlistener* listener = nullptr;
+
+  InterceptManager(event_base* _Nonnull base, int intercept_socket);
+  InterceptManager(InterceptManager& copy) = delete;
+  InterceptManager(InterceptManager&& move) = delete;
+
+  bool GetIntercept(pid_t pid, android::base::unique_fd* out_fd);
+};
diff --git a/debuggerd/tombstoned/tombstoned.cpp b/debuggerd/tombstoned/tombstoned.cpp
new file mode 100644
index 0000000..3c1dcaf
--- /dev/null
+++ b/debuggerd/tombstoned/tombstoned.cpp
@@ -0,0 +1,283 @@
+/*
+ * Copyright 2016, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <array>
+#include <deque>
+#include <unordered_map>
+
+#include <event2/event.h>
+#include <event2/listener.h>
+#include <event2/thread.h>
+
+#include <android-base/logging.h>
+#include <android-base/stringprintf.h>
+#include <android-base/unique_fd.h>
+#include <cutils/sockets.h>
+
+#include "debuggerd/protocol.h"
+#include "debuggerd/util.h"
+
+#include "intercept_manager.h"
+
+using android::base::StringPrintf;
+using android::base::unique_fd;
+
+static InterceptManager* intercept_manager;
+
+enum CrashStatus {
+  kCrashStatusRunning,
+  kCrashStatusQueued,
+};
+
+// Ownership of Crash is a bit messy.
+// It's either owned by an active event that must have a timeout, or owned by
+// queued_requests, in the case that multiple crashes come in at the same time.
+struct Crash {
+  ~Crash() {
+    event_free(crash_event);
+  }
+
+  unique_fd crash_fd;
+  pid_t crash_pid;
+  event* crash_event = nullptr;
+};
+
+static constexpr char kTombstoneDirectory[] = "/data/tombstones/";
+static constexpr size_t kTombstoneCount = 10;
+static int tombstone_directory_fd = -1;
+static int next_tombstone = 0;
+
+static constexpr size_t kMaxConcurrentDumps = 1;
+static size_t num_concurrent_dumps = 0;
+
+static std::deque<Crash*> queued_requests;
+
+// Forward declare the callbacks so they can be placed in a sensible order.
+static void crash_accept_cb(evconnlistener* listener, evutil_socket_t sockfd, sockaddr*, int, void*);
+static void crash_request_cb(evutil_socket_t sockfd, short ev, void* arg);
+static void crash_completed_cb(evutil_socket_t sockfd, short ev, void* arg);
+
+static void find_oldest_tombstone() {
+  size_t oldest_tombstone = 0;
+  time_t oldest_time = std::numeric_limits<time_t>::max();
+
+  for (size_t i = 0; i < kTombstoneCount; ++i) {
+    std::string path = android::base::StringPrintf("%stombstone_%02zu", kTombstoneDirectory, i);
+    struct stat st;
+    if (stat(path.c_str(), &st) != 0) {
+      PLOG(ERROR) << "failed to stat " << path;
+    }
+
+    if (st.st_mtime < oldest_time) {
+      oldest_tombstone = i;
+      oldest_time = st.st_mtime;
+    }
+  }
+
+  next_tombstone = oldest_tombstone;
+}
+
+static unique_fd get_tombstone_fd() {
+  // If kMaxConcurrentDumps is greater than 1, then theoretically the same
+  // filename could be handed out to multiple processes. Unlink and create the
+  // file, instead of using O_TRUNC, to avoid two processes interleaving their
+  // output.
+  unique_fd result;
+  char buf[PATH_MAX];
+  snprintf(buf, sizeof(buf), "tombstone_%02d", next_tombstone);
+  if (unlinkat(tombstone_directory_fd, buf, 0) != 0 && errno != ENOENT) {
+    PLOG(FATAL) << "failed to unlink tombstone at " << kTombstoneDirectory << buf;
+  }
+
+  result.reset(
+    openat(tombstone_directory_fd, buf, O_CREAT | O_EXCL | O_WRONLY | O_APPEND, O_CLOEXEC, 0700));
+  if (result == -1) {
+    PLOG(FATAL) << "failed to create tombstone at " << kTombstoneDirectory << buf;
+  }
+
+  next_tombstone = (next_tombstone + 1) % kTombstoneCount;
+  return result;
+}
+
+static void dequeue_request(Crash* crash) {
+  ++num_concurrent_dumps;
+
+  unique_fd output_fd;
+  if (!intercept_manager->GetIntercept(crash->crash_pid, &output_fd)) {
+    output_fd = get_tombstone_fd();
+  }
+
+  TombstonedCrashPacket response = {
+    .packet_type = CrashPacketType::kPerformDump
+  };
+  ssize_t rc = send_fd(crash->crash_fd, &response, sizeof(response), std::move(output_fd));
+  if (rc == -1) {
+    PLOG(WARNING) << "failed to send response to CrashRequest";
+    goto fail;
+  } else if (rc != sizeof(response)) {
+    PLOG(WARNING) << "crash socket write returned short";
+    goto fail;
+  } else {
+    // TODO: Make this configurable by the interceptor?
+    struct timeval timeout = { 10, 0 };
+
+    event_base* base = event_get_base(crash->crash_event);
+    event_assign(crash->crash_event, base, crash->crash_fd, EV_TIMEOUT | EV_READ,
+                 crash_completed_cb, crash);
+    event_add(crash->crash_event, &timeout);
+  }
+  return;
+
+fail:
+  delete crash;
+}
+
+static void crash_accept_cb(evconnlistener* listener, evutil_socket_t sockfd, sockaddr*, int,
+                            void*) {
+  event_base* base = evconnlistener_get_base(listener);
+  Crash* crash = new Crash();
+
+  struct timeval timeout = { 1, 0 };
+  event* crash_event = event_new(base, sockfd, EV_TIMEOUT | EV_READ, crash_request_cb, crash);
+  crash->crash_fd.reset(sockfd);
+  crash->crash_event = crash_event;
+  event_add(crash_event, &timeout);
+}
+
+static void crash_request_cb(evutil_socket_t sockfd, short ev, void* arg) {
+  ssize_t rc;
+  Crash* crash = static_cast<Crash*>(arg);
+  TombstonedCrashPacket request = {};
+
+  if ((ev & EV_TIMEOUT) != 0) {
+    LOG(WARNING) << "crash request timed out";
+    goto fail;
+  } else if ((ev & EV_READ) == 0) {
+    LOG(WARNING) << "tombstoned received unexpected event from crash socket";
+    goto fail;
+  }
+
+  rc = TEMP_FAILURE_RETRY(read(sockfd, &request, sizeof(request)));
+  if (rc == -1) {
+    PLOG(WARNING) << "failed to read from crash socket";
+    goto fail;
+  } else if (rc != sizeof(request)) {
+    LOG(WARNING) << "crash socket received short read of length " << rc << " (expected "
+                 << sizeof(request) << ")";
+    goto fail;
+  }
+
+  if (request.packet_type != CrashPacketType::kDumpRequest) {
+    LOG(WARNING) << "unexpected crash packet type, expected kDumpRequest, received  "
+                 << StringPrintf("%#2hhX", request.packet_type);
+    goto fail;
+  }
+
+  crash->crash_pid = request.packet.dump_request.pid;
+  LOG(INFO) << "received crash request for pid " << crash->crash_pid;
+
+  if (num_concurrent_dumps == kMaxConcurrentDumps) {
+    LOG(INFO) << "enqueueing crash request for pid " << crash->crash_pid;
+    queued_requests.push_back(crash);
+  } else {
+    dequeue_request(crash);
+  }
+
+  return;
+
+fail:
+  delete crash;
+}
+
+static void crash_completed_cb(evutil_socket_t sockfd, short ev, void* arg) {
+  ssize_t rc;
+  Crash* crash = static_cast<Crash*>(arg);
+  TombstonedCrashPacket request = {};
+
+  --num_concurrent_dumps;
+
+  if ((ev & EV_READ) == 0) {
+    goto fail;
+  }
+
+  rc = TEMP_FAILURE_RETRY(read(sockfd, &request, sizeof(request)));
+  if (rc == -1) {
+    PLOG(WARNING) << "failed to read from crash socket";
+    goto fail;
+  } else if (rc != sizeof(request)) {
+    LOG(WARNING) << "crash socket received short read of length " << rc << " (expected "
+                 << sizeof(request) << ")";
+    goto fail;
+  }
+
+  if (request.packet_type != CrashPacketType::kCompletedDump) {
+    LOG(WARNING) << "unexpected crash packet type, expected kCompletedDump, received "
+                 << uint32_t(request.packet_type);
+    goto fail;
+  }
+
+fail:
+  delete crash;
+
+  // If there's something queued up, let them proceed.
+  if (!queued_requests.empty()) {
+    Crash* next_crash = queued_requests.front();
+    queued_requests.pop_front();
+    dequeue_request(next_crash);
+  }
+}
+
+int main(int, char* []) {
+  tombstone_directory_fd = open(kTombstoneDirectory, O_DIRECTORY | O_RDONLY | O_CLOEXEC);
+  if (tombstone_directory_fd == -1) {
+    PLOG(FATAL) << "failed to open tombstone directory";
+  }
+
+  find_oldest_tombstone();
+
+  int intercept_socket = android_get_control_socket(kTombstonedInterceptSocketName);
+  int crash_socket = android_get_control_socket(kTombstonedCrashSocketName);
+
+  if (intercept_socket == -1 || crash_socket == -1) {
+    PLOG(FATAL) << "failed to get socket from init";
+  }
+
+  evutil_make_socket_nonblocking(intercept_socket);
+  evutil_make_socket_nonblocking(crash_socket);
+
+  event_base* base = event_base_new();
+  if (!base) {
+    LOG(FATAL) << "failed to create event_base";
+  }
+
+  intercept_manager = new InterceptManager(base, intercept_socket);
+
+  evconnlistener* listener =
+    evconnlistener_new(base, crash_accept_cb, nullptr, -1, LEV_OPT_CLOSE_ON_FREE, crash_socket);
+  if (!listener) {
+    LOG(FATAL) << "failed to create evconnlistener";
+  }
+
+  LOG(INFO) << "tombstoned successfully initialized";
+  event_base_dispatch(base);
+}
diff --git a/debuggerd/tombstoned/tombstoned.rc b/debuggerd/tombstoned/tombstoned.rc
new file mode 100644
index 0000000..3aacf33
--- /dev/null
+++ b/debuggerd/tombstoned/tombstoned.rc
@@ -0,0 +1,9 @@
+service tombstoned /system/bin/tombstoned
+    class core
+
+    user tombstoned
+    group system
+
+    socket tombstoned_crash seqpacket 0666 system system
+    socket tombstoned_intercept seqpacket 0666 system system
+    writepid /dev/cpuset/system-background/tasks