blob: 5dffa5b3c3c599b4adad7ba8f39b9f3b00cb8a64 [file] [log] [blame]
Josh Gaocbe70cb2016-10-18 18:17:52 -07001/*
2 * Copyright 2016, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <fcntl.h>
18#include <stdio.h>
19#include <stdlib.h>
20#include <sys/stat.h>
21#include <sys/types.h>
22#include <unistd.h>
23
24#include <array>
25#include <deque>
Josh Gaocb68a032017-06-02 13:02:10 -070026#include <string>
Josh Gaocbe70cb2016-10-18 18:17:52 -070027#include <unordered_map>
Josh Gaocb68a032017-06-02 13:02:10 -070028#include <utility>
Josh Gaocbe70cb2016-10-18 18:17:52 -070029
30#include <event2/event.h>
31#include <event2/listener.h>
32#include <event2/thread.h>
33
34#include <android-base/logging.h>
Elliott Hughes35bb6d22017-06-26 13:54:05 -070035#include <android-base/properties.h>
Josh Gaocbe70cb2016-10-18 18:17:52 -070036#include <android-base/stringprintf.h>
37#include <android-base/unique_fd.h>
38#include <cutils/sockets.h>
39
Josh Gao55f79a52017-03-06 12:24:07 -080040#include "debuggerd/handler.h"
Narayan Kamatha73df602017-05-24 15:07:25 +010041#include "dump_type.h"
Narayan Kamath2d377cd2017-05-10 10:58:59 +010042#include "protocol.h"
43#include "util.h"
Josh Gaocbe70cb2016-10-18 18:17:52 -070044
45#include "intercept_manager.h"
46
Elliott Hughes35bb6d22017-06-26 13:54:05 -070047using android::base::GetIntProperty;
Josh Gaocbe70cb2016-10-18 18:17:52 -070048using android::base::StringPrintf;
49using android::base::unique_fd;
50
51static InterceptManager* intercept_manager;
52
53enum CrashStatus {
54 kCrashStatusRunning,
55 kCrashStatusQueued,
56};
57
Elliott Hughes35bb6d22017-06-26 13:54:05 -070058// Ownership of Crash is a bit messy.
59// It's either owned by an active event that must have a timeout, or owned by
60// queued_requests, in the case that multiple crashes come in at the same time.
61struct Crash {
62 ~Crash() { event_free(crash_event); }
63
Josh Gao48383c82018-04-18 18:11:01 -070064 unique_fd crash_tombstone_fd;
65 unique_fd crash_socket_fd;
Elliott Hughes35bb6d22017-06-26 13:54:05 -070066 pid_t crash_pid;
67 event* crash_event = nullptr;
Elliott Hughes35bb6d22017-06-26 13:54:05 -070068
69 DebuggerdDumpType crash_type;
70};
Narayan Kamath922f6b22017-05-15 15:59:30 +010071
Narayan Kamatha73df602017-05-24 15:07:25 +010072class CrashQueue {
Narayan Kamath922f6b22017-05-15 15:59:30 +010073 public:
Narayan Kamatha73df602017-05-24 15:07:25 +010074 CrashQueue(const std::string& dir_path, const std::string& file_name_prefix, size_t max_artifacts,
75 size_t max_concurrent_dumps)
Narayan Kamath922f6b22017-05-15 15:59:30 +010076 : file_name_prefix_(file_name_prefix),
77 dir_path_(dir_path),
78 dir_fd_(open(dir_path.c_str(), O_DIRECTORY | O_RDONLY | O_CLOEXEC)),
79 max_artifacts_(max_artifacts),
80 next_artifact_(0),
81 max_concurrent_dumps_(max_concurrent_dumps),
82 num_concurrent_dumps_(0) {
83 if (dir_fd_ == -1) {
84 PLOG(FATAL) << "failed to open directory: " << dir_path;
85 }
86
87 // NOTE: If max_artifacts_ <= max_concurrent_dumps_, then theoretically the
88 // same filename could be handed out to multiple processes.
89 CHECK(max_artifacts_ > max_concurrent_dumps_);
90
91 find_oldest_artifact();
92 }
93
Elliott Hughes35bb6d22017-06-26 13:54:05 -070094 static CrashQueue* for_crash(const Crash* crash) {
95 return (crash->crash_type == kDebuggerdJavaBacktrace) ? for_anrs() : for_tombstones();
96 }
97
98 static CrashQueue* for_tombstones() {
99 static CrashQueue queue("/data/tombstones", "tombstone_" /* file_name_prefix */,
100 GetIntProperty("tombstoned.max_tombstone_count", 10),
101 1 /* max_concurrent_dumps */);
102 return &queue;
103 }
104
105 static CrashQueue* for_anrs() {
106 static CrashQueue queue("/data/anr", "trace_" /* file_name_prefix */,
107 GetIntProperty("tombstoned.max_anr_count", 64),
108 4 /* max_concurrent_dumps */);
109 return &queue;
110 }
111
Josh Gao48383c82018-04-18 18:11:01 -0700112 unique_fd get_output() {
113 unique_fd result(openat(dir_fd_, ".", O_WRONLY | O_APPEND | O_TMPFILE | O_CLOEXEC, 0640));
Narayan Kamath922f6b22017-05-15 15:59:30 +0100114 if (result == -1) {
Josh Gao48383c82018-04-18 18:11:01 -0700115 // We might not have O_TMPFILE. Try creating and unlinking instead.
116 result.reset(
117 openat(dir_fd_, ".temporary", O_WRONLY | O_APPEND | O_CREAT | O_TRUNC | O_CLOEXEC, 0640));
118 if (result == -1) {
119 PLOG(FATAL) << "failed to create temporary tombstone in " << dir_path_;
120 }
121 if (unlinkat(dir_fd_, ".temporary", 0) != 0) {
122 PLOG(FATAL) << "failed to unlink temporary tombstone";
123 }
Narayan Kamath922f6b22017-05-15 15:59:30 +0100124 }
Josh Gao48383c82018-04-18 18:11:01 -0700125 return result;
126 }
Narayan Kamath922f6b22017-05-15 15:59:30 +0100127
Josh Gao48383c82018-04-18 18:11:01 -0700128 std::string get_next_artifact_path() {
129 std::string file_name =
130 StringPrintf("%s/%s%02d", dir_path_.c_str(), file_name_prefix_.c_str(), next_artifact_);
Narayan Kamath922f6b22017-05-15 15:59:30 +0100131 next_artifact_ = (next_artifact_ + 1) % max_artifacts_;
Josh Gao48383c82018-04-18 18:11:01 -0700132 return file_name;
Narayan Kamath922f6b22017-05-15 15:59:30 +0100133 }
134
135 bool maybe_enqueue_crash(Crash* crash) {
136 if (num_concurrent_dumps_ == max_concurrent_dumps_) {
137 queued_requests_.push_back(crash);
138 return true;
139 }
140
141 return false;
142 }
143
144 void maybe_dequeue_crashes(void (*handler)(Crash* crash)) {
145 while (!queued_requests_.empty() && num_concurrent_dumps_ < max_concurrent_dumps_) {
146 Crash* next_crash = queued_requests_.front();
147 queued_requests_.pop_front();
148 handler(next_crash);
149 }
150 }
151
152 void on_crash_started() { ++num_concurrent_dumps_; }
153
154 void on_crash_completed() { --num_concurrent_dumps_; }
155
Narayan Kamath922f6b22017-05-15 15:59:30 +0100156 private:
157 void find_oldest_artifact() {
158 size_t oldest_tombstone = 0;
159 time_t oldest_time = std::numeric_limits<time_t>::max();
160
161 for (size_t i = 0; i < max_artifacts_; ++i) {
Josh Gaocb68a032017-06-02 13:02:10 -0700162 std::string path = StringPrintf("%s/%s%02zu", dir_path_.c_str(), file_name_prefix_.c_str(), i);
Narayan Kamath922f6b22017-05-15 15:59:30 +0100163 struct stat st;
164 if (stat(path.c_str(), &st) != 0) {
165 if (errno == ENOENT) {
166 oldest_tombstone = i;
167 break;
168 } else {
169 PLOG(ERROR) << "failed to stat " << path;
170 continue;
171 }
172 }
173
174 if (st.st_mtime < oldest_time) {
175 oldest_tombstone = i;
176 oldest_time = st.st_mtime;
177 }
178 }
179
180 next_artifact_ = oldest_tombstone;
181 }
182
183 const std::string file_name_prefix_;
184
185 const std::string dir_path_;
186 const int dir_fd_;
187
188 const size_t max_artifacts_;
189 int next_artifact_;
190
191 const size_t max_concurrent_dumps_;
192 size_t num_concurrent_dumps_;
193
194 std::deque<Crash*> queued_requests_;
195
Narayan Kamatha73df602017-05-24 15:07:25 +0100196 DISALLOW_COPY_AND_ASSIGN(CrashQueue);
Narayan Kamath922f6b22017-05-15 15:59:30 +0100197};
198
199// Whether java trace dumps are produced via tombstoned.
Narayan Kamathca5e9082017-06-02 15:42:06 +0100200static constexpr bool kJavaTraceDumpsEnabled = true;
Narayan Kamath922f6b22017-05-15 15:59:30 +0100201
Josh Gaocbe70cb2016-10-18 18:17:52 -0700202// Forward declare the callbacks so they can be placed in a sensible order.
203static void crash_accept_cb(evconnlistener* listener, evutil_socket_t sockfd, sockaddr*, int, void*);
204static void crash_request_cb(evutil_socket_t sockfd, short ev, void* arg);
205static void crash_completed_cb(evutil_socket_t sockfd, short ev, void* arg);
206
Josh Gao807a4582017-03-30 14:51:55 -0700207static void perform_request(Crash* crash) {
Josh Gaocbe70cb2016-10-18 18:17:52 -0700208 unique_fd output_fd;
Josh Gao48383c82018-04-18 18:11:01 -0700209 bool intercepted =
210 intercept_manager->GetIntercept(crash->crash_pid, crash->crash_type, &output_fd);
211 if (!intercepted) {
212 output_fd = CrashQueue::for_crash(crash)->get_output();
213 crash->crash_tombstone_fd.reset(dup(output_fd.get()));
Josh Gaocbe70cb2016-10-18 18:17:52 -0700214 }
215
216 TombstonedCrashPacket response = {
217 .packet_type = CrashPacketType::kPerformDump
218 };
Josh Gao48383c82018-04-18 18:11:01 -0700219 ssize_t rc = send_fd(crash->crash_socket_fd, &response, sizeof(response), std::move(output_fd));
Josh Gaocbe70cb2016-10-18 18:17:52 -0700220 if (rc == -1) {
221 PLOG(WARNING) << "failed to send response to CrashRequest";
222 goto fail;
223 } else if (rc != sizeof(response)) {
224 PLOG(WARNING) << "crash socket write returned short";
225 goto fail;
226 } else {
227 // TODO: Make this configurable by the interceptor?
228 struct timeval timeout = { 10, 0 };
229
230 event_base* base = event_get_base(crash->crash_event);
Josh Gao48383c82018-04-18 18:11:01 -0700231 event_assign(crash->crash_event, base, crash->crash_socket_fd, EV_TIMEOUT | EV_READ,
Josh Gaocbe70cb2016-10-18 18:17:52 -0700232 crash_completed_cb, crash);
233 event_add(crash->crash_event, &timeout);
234 }
Josh Gao13078242017-03-30 14:42:46 -0700235
Elliott Hughes35bb6d22017-06-26 13:54:05 -0700236 CrashQueue::for_crash(crash)->on_crash_started();
Josh Gaocbe70cb2016-10-18 18:17:52 -0700237 return;
238
239fail:
240 delete crash;
241}
242
243static void crash_accept_cb(evconnlistener* listener, evutil_socket_t sockfd, sockaddr*, int,
Narayan Kamatha73df602017-05-24 15:07:25 +0100244 void*) {
Josh Gaocbe70cb2016-10-18 18:17:52 -0700245 event_base* base = evconnlistener_get_base(listener);
246 Crash* crash = new Crash();
247
Narayan Kamatha73df602017-05-24 15:07:25 +0100248 // TODO: Make sure that only java crashes come in on the java socket
249 // and only native crashes on the native socket.
Josh Gaocbe70cb2016-10-18 18:17:52 -0700250 struct timeval timeout = { 1, 0 };
251 event* crash_event = event_new(base, sockfd, EV_TIMEOUT | EV_READ, crash_request_cb, crash);
Josh Gao48383c82018-04-18 18:11:01 -0700252 crash->crash_socket_fd.reset(sockfd);
Josh Gaocbe70cb2016-10-18 18:17:52 -0700253 crash->crash_event = crash_event;
254 event_add(crash_event, &timeout);
255}
256
257static void crash_request_cb(evutil_socket_t sockfd, short ev, void* arg) {
258 ssize_t rc;
259 Crash* crash = static_cast<Crash*>(arg);
Narayan Kamath922f6b22017-05-15 15:59:30 +0100260
Josh Gaocbe70cb2016-10-18 18:17:52 -0700261 TombstonedCrashPacket request = {};
262
263 if ((ev & EV_TIMEOUT) != 0) {
264 LOG(WARNING) << "crash request timed out";
265 goto fail;
266 } else if ((ev & EV_READ) == 0) {
267 LOG(WARNING) << "tombstoned received unexpected event from crash socket";
268 goto fail;
269 }
270
271 rc = TEMP_FAILURE_RETRY(read(sockfd, &request, sizeof(request)));
272 if (rc == -1) {
273 PLOG(WARNING) << "failed to read from crash socket";
274 goto fail;
275 } else if (rc != sizeof(request)) {
276 LOG(WARNING) << "crash socket received short read of length " << rc << " (expected "
277 << sizeof(request) << ")";
278 goto fail;
279 }
280
281 if (request.packet_type != CrashPacketType::kDumpRequest) {
282 LOG(WARNING) << "unexpected crash packet type, expected kDumpRequest, received "
283 << StringPrintf("%#2hhX", request.packet_type);
284 goto fail;
285 }
286
Narayan Kamatha73df602017-05-24 15:07:25 +0100287 crash->crash_type = request.packet.dump_request.dump_type;
288 if (crash->crash_type < 0 || crash->crash_type > kDebuggerdAnyIntercept) {
289 LOG(WARNING) << "unexpected crash dump type: " << crash->crash_type;
290 goto fail;
291 }
292
293 if (crash->crash_type != kDebuggerdJavaBacktrace) {
Narayan Kamath922f6b22017-05-15 15:59:30 +0100294 crash->crash_pid = request.packet.dump_request.pid;
295 } else {
296 // Requests for java traces are sent from untrusted processes, so we
297 // must not trust the PID sent down with the request. Instead, we ask the
298 // kernel.
299 ucred cr = {};
300 socklen_t len = sizeof(cr);
301 int ret = getsockopt(sockfd, SOL_SOCKET, SO_PEERCRED, &cr, &len);
302 if (ret != 0) {
303 PLOG(ERROR) << "Failed to getsockopt(..SO_PEERCRED)";
304 goto fail;
305 }
306
307 crash->crash_pid = cr.pid;
308 }
309
Josh Gaocbe70cb2016-10-18 18:17:52 -0700310 LOG(INFO) << "received crash request for pid " << crash->crash_pid;
311
Elliott Hughes35bb6d22017-06-26 13:54:05 -0700312 if (CrashQueue::for_crash(crash)->maybe_enqueue_crash(crash)) {
Josh Gaocbe70cb2016-10-18 18:17:52 -0700313 LOG(INFO) << "enqueueing crash request for pid " << crash->crash_pid;
Josh Gaocbe70cb2016-10-18 18:17:52 -0700314 } else {
Josh Gao807a4582017-03-30 14:51:55 -0700315 perform_request(crash);
Josh Gaocbe70cb2016-10-18 18:17:52 -0700316 }
317
318 return;
319
320fail:
321 delete crash;
322}
323
324static void crash_completed_cb(evutil_socket_t sockfd, short ev, void* arg) {
325 ssize_t rc;
326 Crash* crash = static_cast<Crash*>(arg);
327 TombstonedCrashPacket request = {};
328
Elliott Hughes35bb6d22017-06-26 13:54:05 -0700329 CrashQueue::for_crash(crash)->on_crash_completed();
Josh Gaocbe70cb2016-10-18 18:17:52 -0700330
331 if ((ev & EV_READ) == 0) {
332 goto fail;
333 }
334
335 rc = TEMP_FAILURE_RETRY(read(sockfd, &request, sizeof(request)));
336 if (rc == -1) {
337 PLOG(WARNING) << "failed to read from crash socket";
338 goto fail;
339 } else if (rc != sizeof(request)) {
340 LOG(WARNING) << "crash socket received short read of length " << rc << " (expected "
341 << sizeof(request) << ")";
342 goto fail;
343 }
344
345 if (request.packet_type != CrashPacketType::kCompletedDump) {
346 LOG(WARNING) << "unexpected crash packet type, expected kCompletedDump, received "
347 << uint32_t(request.packet_type);
348 goto fail;
349 }
350
Josh Gao48383c82018-04-18 18:11:01 -0700351 if (crash->crash_tombstone_fd != -1) {
352 std::string fd_path = StringPrintf("/proc/self/fd/%d", crash->crash_tombstone_fd.get());
353 std::string tombstone_path = CrashQueue::for_crash(crash)->get_next_artifact_path();
354 int rc = unlink(tombstone_path.c_str());
355 if (rc != 0) {
356 PLOG(ERROR) << "failed to unlink tombstone at " << tombstone_path;
357 goto fail;
358 }
359
360 rc = linkat(AT_FDCWD, fd_path.c_str(), AT_FDCWD, tombstone_path.c_str(), AT_SYMLINK_FOLLOW);
361 if (rc != 0) {
362 PLOG(ERROR) << "failed to link tombstone";
Narayan Kamath79dd1432017-06-21 19:42:00 +0100363 } else {
Josh Gao48383c82018-04-18 18:11:01 -0700364 if (crash->crash_type == kDebuggerdJavaBacktrace) {
365 LOG(ERROR) << "Traces for pid " << crash->crash_pid << " written to: " << tombstone_path;
366 } else {
367 // NOTE: Several tools parse this log message to figure out where the
368 // tombstone associated with a given native crash was written. Any changes
369 // to this message must be carefully considered.
370 LOG(ERROR) << "Tombstone written to: " << tombstone_path;
371 }
Narayan Kamath79dd1432017-06-21 19:42:00 +0100372 }
Josh Gaocb68a032017-06-02 13:02:10 -0700373 }
374
Josh Gaocbe70cb2016-10-18 18:17:52 -0700375fail:
Elliott Hughes35bb6d22017-06-26 13:54:05 -0700376 CrashQueue* queue = CrashQueue::for_crash(crash);
Josh Gaocbe70cb2016-10-18 18:17:52 -0700377 delete crash;
378
379 // If there's something queued up, let them proceed.
Narayan Kamatha73df602017-05-24 15:07:25 +0100380 queue->maybe_dequeue_crashes(perform_request);
Josh Gaocbe70cb2016-10-18 18:17:52 -0700381}
382
383int main(int, char* []) {
Josh Gao8830c952017-03-06 12:23:55 -0800384 umask(0137);
385
Josh Gao55f79a52017-03-06 12:24:07 -0800386 // Don't try to connect to ourselves if we crash.
387 struct sigaction action = {};
388 action.sa_handler = [](int signal) {
389 LOG(ERROR) << "received fatal signal " << signal;
390 _exit(1);
391 };
392 debuggerd_register_handlers(&action);
393
Josh Gaocbe70cb2016-10-18 18:17:52 -0700394 int intercept_socket = android_get_control_socket(kTombstonedInterceptSocketName);
395 int crash_socket = android_get_control_socket(kTombstonedCrashSocketName);
396
397 if (intercept_socket == -1 || crash_socket == -1) {
398 PLOG(FATAL) << "failed to get socket from init";
399 }
400
401 evutil_make_socket_nonblocking(intercept_socket);
402 evutil_make_socket_nonblocking(crash_socket);
403
404 event_base* base = event_base_new();
405 if (!base) {
406 LOG(FATAL) << "failed to create event_base";
407 }
408
409 intercept_manager = new InterceptManager(base, intercept_socket);
410
Narayan Kamathc2e98f62017-09-13 13:12:34 +0100411 evconnlistener* tombstone_listener =
412 evconnlistener_new(base, crash_accept_cb, CrashQueue::for_tombstones(), LEV_OPT_CLOSE_ON_FREE,
413 -1 /* backlog */, crash_socket);
Narayan Kamath922f6b22017-05-15 15:59:30 +0100414 if (!tombstone_listener) {
415 LOG(FATAL) << "failed to create evconnlistener for tombstones.";
416 }
417
418 if (kJavaTraceDumpsEnabled) {
419 const int java_trace_socket = android_get_control_socket(kTombstonedJavaTraceSocketName);
420 if (java_trace_socket == -1) {
421 PLOG(FATAL) << "failed to get socket from init";
422 }
423
424 evutil_make_socket_nonblocking(java_trace_socket);
Narayan Kamathc2e98f62017-09-13 13:12:34 +0100425 evconnlistener* java_trace_listener =
426 evconnlistener_new(base, crash_accept_cb, CrashQueue::for_anrs(), LEV_OPT_CLOSE_ON_FREE,
427 -1 /* backlog */, java_trace_socket);
Narayan Kamath922f6b22017-05-15 15:59:30 +0100428 if (!java_trace_listener) {
429 LOG(FATAL) << "failed to create evconnlistener for java traces.";
430 }
Josh Gaocbe70cb2016-10-18 18:17:52 -0700431 }
432
433 LOG(INFO) << "tombstoned successfully initialized";
434 event_base_dispatch(base);
435}