Josh Gao | cbe70cb | 2016-10-18 18:17:52 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2016, The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #include <fcntl.h> |
| 18 | #include <stdio.h> |
| 19 | #include <stdlib.h> |
| 20 | #include <sys/stat.h> |
| 21 | #include <sys/types.h> |
| 22 | #include <unistd.h> |
| 23 | |
| 24 | #include <array> |
| 25 | #include <deque> |
| 26 | #include <unordered_map> |
| 27 | |
| 28 | #include <event2/event.h> |
| 29 | #include <event2/listener.h> |
| 30 | #include <event2/thread.h> |
| 31 | |
| 32 | #include <android-base/logging.h> |
| 33 | #include <android-base/stringprintf.h> |
| 34 | #include <android-base/unique_fd.h> |
| 35 | #include <cutils/sockets.h> |
| 36 | |
Josh Gao | 55f79a5 | 2017-03-06 12:24:07 -0800 | [diff] [blame^] | 37 | #include "debuggerd/handler.h" |
Josh Gao | cbe70cb | 2016-10-18 18:17:52 -0700 | [diff] [blame] | 38 | #include "debuggerd/protocol.h" |
| 39 | #include "debuggerd/util.h" |
| 40 | |
| 41 | #include "intercept_manager.h" |
| 42 | |
| 43 | using android::base::StringPrintf; |
| 44 | using android::base::unique_fd; |
| 45 | |
| 46 | static InterceptManager* intercept_manager; |
| 47 | |
| 48 | enum CrashStatus { |
| 49 | kCrashStatusRunning, |
| 50 | kCrashStatusQueued, |
| 51 | }; |
| 52 | |
| 53 | // Ownership of Crash is a bit messy. |
| 54 | // It's either owned by an active event that must have a timeout, or owned by |
| 55 | // queued_requests, in the case that multiple crashes come in at the same time. |
| 56 | struct Crash { |
| 57 | ~Crash() { |
| 58 | event_free(crash_event); |
| 59 | } |
| 60 | |
| 61 | unique_fd crash_fd; |
| 62 | pid_t crash_pid; |
| 63 | event* crash_event = nullptr; |
| 64 | }; |
| 65 | |
| 66 | static constexpr char kTombstoneDirectory[] = "/data/tombstones/"; |
| 67 | static constexpr size_t kTombstoneCount = 10; |
| 68 | static int tombstone_directory_fd = -1; |
| 69 | static int next_tombstone = 0; |
| 70 | |
| 71 | static constexpr size_t kMaxConcurrentDumps = 1; |
| 72 | static size_t num_concurrent_dumps = 0; |
| 73 | |
| 74 | static std::deque<Crash*> queued_requests; |
| 75 | |
| 76 | // Forward declare the callbacks so they can be placed in a sensible order. |
| 77 | static void crash_accept_cb(evconnlistener* listener, evutil_socket_t sockfd, sockaddr*, int, void*); |
| 78 | static void crash_request_cb(evutil_socket_t sockfd, short ev, void* arg); |
| 79 | static void crash_completed_cb(evutil_socket_t sockfd, short ev, void* arg); |
| 80 | |
| 81 | static void find_oldest_tombstone() { |
| 82 | size_t oldest_tombstone = 0; |
| 83 | time_t oldest_time = std::numeric_limits<time_t>::max(); |
| 84 | |
| 85 | for (size_t i = 0; i < kTombstoneCount; ++i) { |
| 86 | std::string path = android::base::StringPrintf("%stombstone_%02zu", kTombstoneDirectory, i); |
| 87 | struct stat st; |
| 88 | if (stat(path.c_str(), &st) != 0) { |
Josh Gao | 8498016 | 2017-01-23 15:56:35 -0800 | [diff] [blame] | 89 | if (errno == ENOENT) { |
| 90 | oldest_tombstone = i; |
| 91 | break; |
| 92 | } else { |
| 93 | PLOG(ERROR) << "failed to stat " << path; |
| 94 | continue; |
| 95 | } |
Josh Gao | cbe70cb | 2016-10-18 18:17:52 -0700 | [diff] [blame] | 96 | } |
| 97 | |
| 98 | if (st.st_mtime < oldest_time) { |
| 99 | oldest_tombstone = i; |
| 100 | oldest_time = st.st_mtime; |
| 101 | } |
| 102 | } |
| 103 | |
| 104 | next_tombstone = oldest_tombstone; |
| 105 | } |
| 106 | |
| 107 | static unique_fd get_tombstone_fd() { |
| 108 | // If kMaxConcurrentDumps is greater than 1, then theoretically the same |
| 109 | // filename could be handed out to multiple processes. Unlink and create the |
| 110 | // file, instead of using O_TRUNC, to avoid two processes interleaving their |
| 111 | // output. |
| 112 | unique_fd result; |
| 113 | char buf[PATH_MAX]; |
| 114 | snprintf(buf, sizeof(buf), "tombstone_%02d", next_tombstone); |
| 115 | if (unlinkat(tombstone_directory_fd, buf, 0) != 0 && errno != ENOENT) { |
| 116 | PLOG(FATAL) << "failed to unlink tombstone at " << kTombstoneDirectory << buf; |
| 117 | } |
| 118 | |
| 119 | result.reset( |
Josh Gao | 8830c95 | 2017-03-06 12:23:55 -0800 | [diff] [blame] | 120 | openat(tombstone_directory_fd, buf, O_CREAT | O_EXCL | O_WRONLY | O_APPEND | O_CLOEXEC, 0640)); |
Josh Gao | cbe70cb | 2016-10-18 18:17:52 -0700 | [diff] [blame] | 121 | if (result == -1) { |
| 122 | PLOG(FATAL) << "failed to create tombstone at " << kTombstoneDirectory << buf; |
| 123 | } |
| 124 | |
| 125 | next_tombstone = (next_tombstone + 1) % kTombstoneCount; |
| 126 | return result; |
| 127 | } |
| 128 | |
| 129 | static void dequeue_request(Crash* crash) { |
| 130 | ++num_concurrent_dumps; |
| 131 | |
| 132 | unique_fd output_fd; |
| 133 | if (!intercept_manager->GetIntercept(crash->crash_pid, &output_fd)) { |
| 134 | output_fd = get_tombstone_fd(); |
| 135 | } |
| 136 | |
| 137 | TombstonedCrashPacket response = { |
| 138 | .packet_type = CrashPacketType::kPerformDump |
| 139 | }; |
| 140 | ssize_t rc = send_fd(crash->crash_fd, &response, sizeof(response), std::move(output_fd)); |
| 141 | if (rc == -1) { |
| 142 | PLOG(WARNING) << "failed to send response to CrashRequest"; |
| 143 | goto fail; |
| 144 | } else if (rc != sizeof(response)) { |
| 145 | PLOG(WARNING) << "crash socket write returned short"; |
| 146 | goto fail; |
| 147 | } else { |
| 148 | // TODO: Make this configurable by the interceptor? |
| 149 | struct timeval timeout = { 10, 0 }; |
| 150 | |
| 151 | event_base* base = event_get_base(crash->crash_event); |
| 152 | event_assign(crash->crash_event, base, crash->crash_fd, EV_TIMEOUT | EV_READ, |
| 153 | crash_completed_cb, crash); |
| 154 | event_add(crash->crash_event, &timeout); |
| 155 | } |
| 156 | return; |
| 157 | |
| 158 | fail: |
| 159 | delete crash; |
| 160 | } |
| 161 | |
| 162 | static void crash_accept_cb(evconnlistener* listener, evutil_socket_t sockfd, sockaddr*, int, |
| 163 | void*) { |
| 164 | event_base* base = evconnlistener_get_base(listener); |
| 165 | Crash* crash = new Crash(); |
| 166 | |
| 167 | struct timeval timeout = { 1, 0 }; |
| 168 | event* crash_event = event_new(base, sockfd, EV_TIMEOUT | EV_READ, crash_request_cb, crash); |
| 169 | crash->crash_fd.reset(sockfd); |
| 170 | crash->crash_event = crash_event; |
| 171 | event_add(crash_event, &timeout); |
| 172 | } |
| 173 | |
| 174 | static void crash_request_cb(evutil_socket_t sockfd, short ev, void* arg) { |
| 175 | ssize_t rc; |
| 176 | Crash* crash = static_cast<Crash*>(arg); |
| 177 | TombstonedCrashPacket request = {}; |
| 178 | |
| 179 | if ((ev & EV_TIMEOUT) != 0) { |
| 180 | LOG(WARNING) << "crash request timed out"; |
| 181 | goto fail; |
| 182 | } else if ((ev & EV_READ) == 0) { |
| 183 | LOG(WARNING) << "tombstoned received unexpected event from crash socket"; |
| 184 | goto fail; |
| 185 | } |
| 186 | |
| 187 | rc = TEMP_FAILURE_RETRY(read(sockfd, &request, sizeof(request))); |
| 188 | if (rc == -1) { |
| 189 | PLOG(WARNING) << "failed to read from crash socket"; |
| 190 | goto fail; |
| 191 | } else if (rc != sizeof(request)) { |
| 192 | LOG(WARNING) << "crash socket received short read of length " << rc << " (expected " |
| 193 | << sizeof(request) << ")"; |
| 194 | goto fail; |
| 195 | } |
| 196 | |
| 197 | if (request.packet_type != CrashPacketType::kDumpRequest) { |
| 198 | LOG(WARNING) << "unexpected crash packet type, expected kDumpRequest, received " |
| 199 | << StringPrintf("%#2hhX", request.packet_type); |
| 200 | goto fail; |
| 201 | } |
| 202 | |
| 203 | crash->crash_pid = request.packet.dump_request.pid; |
| 204 | LOG(INFO) << "received crash request for pid " << crash->crash_pid; |
| 205 | |
| 206 | if (num_concurrent_dumps == kMaxConcurrentDumps) { |
| 207 | LOG(INFO) << "enqueueing crash request for pid " << crash->crash_pid; |
| 208 | queued_requests.push_back(crash); |
| 209 | } else { |
| 210 | dequeue_request(crash); |
| 211 | } |
| 212 | |
| 213 | return; |
| 214 | |
| 215 | fail: |
| 216 | delete crash; |
| 217 | } |
| 218 | |
| 219 | static void crash_completed_cb(evutil_socket_t sockfd, short ev, void* arg) { |
| 220 | ssize_t rc; |
| 221 | Crash* crash = static_cast<Crash*>(arg); |
| 222 | TombstonedCrashPacket request = {}; |
| 223 | |
| 224 | --num_concurrent_dumps; |
| 225 | |
| 226 | if ((ev & EV_READ) == 0) { |
| 227 | goto fail; |
| 228 | } |
| 229 | |
| 230 | rc = TEMP_FAILURE_RETRY(read(sockfd, &request, sizeof(request))); |
| 231 | if (rc == -1) { |
| 232 | PLOG(WARNING) << "failed to read from crash socket"; |
| 233 | goto fail; |
| 234 | } else if (rc != sizeof(request)) { |
| 235 | LOG(WARNING) << "crash socket received short read of length " << rc << " (expected " |
| 236 | << sizeof(request) << ")"; |
| 237 | goto fail; |
| 238 | } |
| 239 | |
| 240 | if (request.packet_type != CrashPacketType::kCompletedDump) { |
| 241 | LOG(WARNING) << "unexpected crash packet type, expected kCompletedDump, received " |
| 242 | << uint32_t(request.packet_type); |
| 243 | goto fail; |
| 244 | } |
| 245 | |
| 246 | fail: |
| 247 | delete crash; |
| 248 | |
| 249 | // If there's something queued up, let them proceed. |
| 250 | if (!queued_requests.empty()) { |
| 251 | Crash* next_crash = queued_requests.front(); |
| 252 | queued_requests.pop_front(); |
| 253 | dequeue_request(next_crash); |
| 254 | } |
| 255 | } |
| 256 | |
| 257 | int main(int, char* []) { |
Josh Gao | 8830c95 | 2017-03-06 12:23:55 -0800 | [diff] [blame] | 258 | umask(0137); |
| 259 | |
Josh Gao | 55f79a5 | 2017-03-06 12:24:07 -0800 | [diff] [blame^] | 260 | // Don't try to connect to ourselves if we crash. |
| 261 | struct sigaction action = {}; |
| 262 | action.sa_handler = [](int signal) { |
| 263 | LOG(ERROR) << "received fatal signal " << signal; |
| 264 | _exit(1); |
| 265 | }; |
| 266 | debuggerd_register_handlers(&action); |
| 267 | |
Josh Gao | cbe70cb | 2016-10-18 18:17:52 -0700 | [diff] [blame] | 268 | tombstone_directory_fd = open(kTombstoneDirectory, O_DIRECTORY | O_RDONLY | O_CLOEXEC); |
| 269 | if (tombstone_directory_fd == -1) { |
| 270 | PLOG(FATAL) << "failed to open tombstone directory"; |
| 271 | } |
| 272 | |
| 273 | find_oldest_tombstone(); |
| 274 | |
| 275 | int intercept_socket = android_get_control_socket(kTombstonedInterceptSocketName); |
| 276 | int crash_socket = android_get_control_socket(kTombstonedCrashSocketName); |
| 277 | |
| 278 | if (intercept_socket == -1 || crash_socket == -1) { |
| 279 | PLOG(FATAL) << "failed to get socket from init"; |
| 280 | } |
| 281 | |
| 282 | evutil_make_socket_nonblocking(intercept_socket); |
| 283 | evutil_make_socket_nonblocking(crash_socket); |
| 284 | |
| 285 | event_base* base = event_base_new(); |
| 286 | if (!base) { |
| 287 | LOG(FATAL) << "failed to create event_base"; |
| 288 | } |
| 289 | |
| 290 | intercept_manager = new InterceptManager(base, intercept_socket); |
| 291 | |
| 292 | evconnlistener* listener = |
| 293 | evconnlistener_new(base, crash_accept_cb, nullptr, -1, LEV_OPT_CLOSE_ON_FREE, crash_socket); |
| 294 | if (!listener) { |
| 295 | LOG(FATAL) << "failed to create evconnlistener"; |
| 296 | } |
| 297 | |
| 298 | LOG(INFO) << "tombstoned successfully initialized"; |
| 299 | event_base_dispatch(base); |
| 300 | } |