blob: 355898d04c0514baf1f912aa749482a9313a6bbe [file] [log] [blame]
Josh Gaocbe70cb2016-10-18 18:17:52 -07001/*
2 * Copyright 2016, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <fcntl.h>
18#include <stdio.h>
19#include <stdlib.h>
20#include <sys/stat.h>
21#include <sys/types.h>
22#include <unistd.h>
23
24#include <array>
25#include <deque>
Josh Gaocb68a032017-06-02 13:02:10 -070026#include <string>
Josh Gaocbe70cb2016-10-18 18:17:52 -070027#include <unordered_map>
Josh Gaocb68a032017-06-02 13:02:10 -070028#include <utility>
Josh Gaocbe70cb2016-10-18 18:17:52 -070029
30#include <event2/event.h>
31#include <event2/listener.h>
32#include <event2/thread.h>
33
34#include <android-base/logging.h>
35#include <android-base/stringprintf.h>
36#include <android-base/unique_fd.h>
37#include <cutils/sockets.h>
38
Josh Gao55f79a52017-03-06 12:24:07 -080039#include "debuggerd/handler.h"
Narayan Kamatha73df602017-05-24 15:07:25 +010040#include "dump_type.h"
Narayan Kamath2d377cd2017-05-10 10:58:59 +010041#include "protocol.h"
42#include "util.h"
Josh Gaocbe70cb2016-10-18 18:17:52 -070043
44#include "intercept_manager.h"
45
46using android::base::StringPrintf;
47using android::base::unique_fd;
48
49static InterceptManager* intercept_manager;
50
51enum CrashStatus {
52 kCrashStatusRunning,
53 kCrashStatusQueued,
54};
55
Narayan Kamath922f6b22017-05-15 15:59:30 +010056struct Crash;
57
Narayan Kamatha73df602017-05-24 15:07:25 +010058class CrashQueue {
Narayan Kamath922f6b22017-05-15 15:59:30 +010059 public:
Narayan Kamatha73df602017-05-24 15:07:25 +010060 CrashQueue(const std::string& dir_path, const std::string& file_name_prefix, size_t max_artifacts,
61 size_t max_concurrent_dumps)
Narayan Kamath922f6b22017-05-15 15:59:30 +010062 : file_name_prefix_(file_name_prefix),
63 dir_path_(dir_path),
64 dir_fd_(open(dir_path.c_str(), O_DIRECTORY | O_RDONLY | O_CLOEXEC)),
65 max_artifacts_(max_artifacts),
66 next_artifact_(0),
67 max_concurrent_dumps_(max_concurrent_dumps),
68 num_concurrent_dumps_(0) {
69 if (dir_fd_ == -1) {
70 PLOG(FATAL) << "failed to open directory: " << dir_path;
71 }
72
73 // NOTE: If max_artifacts_ <= max_concurrent_dumps_, then theoretically the
74 // same filename could be handed out to multiple processes.
75 CHECK(max_artifacts_ > max_concurrent_dumps_);
76
77 find_oldest_artifact();
78 }
79
Josh Gaocb68a032017-06-02 13:02:10 -070080 std::pair<unique_fd, std::string> get_output() {
Narayan Kamath922f6b22017-05-15 15:59:30 +010081 unique_fd result;
Josh Gaocb68a032017-06-02 13:02:10 -070082 std::string file_name = StringPrintf("%s%02d", file_name_prefix_.c_str(), next_artifact_);
83
Narayan Kamath922f6b22017-05-15 15:59:30 +010084 // Unlink and create the file, instead of using O_TRUNC, to avoid two processes
85 // interleaving their output in case we ever get into that situation.
Josh Gaocb68a032017-06-02 13:02:10 -070086 if (unlinkat(dir_fd_, file_name.c_str(), 0) != 0 && errno != ENOENT) {
87 PLOG(FATAL) << "failed to unlink tombstone at " << dir_path_ << "/" << file_name;
Narayan Kamath922f6b22017-05-15 15:59:30 +010088 }
89
Josh Gaocb68a032017-06-02 13:02:10 -070090 result.reset(openat(dir_fd_, file_name.c_str(),
91 O_CREAT | O_EXCL | O_WRONLY | O_APPEND | O_CLOEXEC, 0640));
Narayan Kamath922f6b22017-05-15 15:59:30 +010092 if (result == -1) {
Josh Gaocb68a032017-06-02 13:02:10 -070093 PLOG(FATAL) << "failed to create tombstone at " << dir_path_ << "/" << file_name;
Narayan Kamath922f6b22017-05-15 15:59:30 +010094 }
95
96 next_artifact_ = (next_artifact_ + 1) % max_artifacts_;
Josh Gaocb68a032017-06-02 13:02:10 -070097 return {std::move(result), dir_path_ + "/" + file_name};
Narayan Kamath922f6b22017-05-15 15:59:30 +010098 }
99
100 bool maybe_enqueue_crash(Crash* crash) {
101 if (num_concurrent_dumps_ == max_concurrent_dumps_) {
102 queued_requests_.push_back(crash);
103 return true;
104 }
105
106 return false;
107 }
108
109 void maybe_dequeue_crashes(void (*handler)(Crash* crash)) {
110 while (!queued_requests_.empty() && num_concurrent_dumps_ < max_concurrent_dumps_) {
111 Crash* next_crash = queued_requests_.front();
112 queued_requests_.pop_front();
113 handler(next_crash);
114 }
115 }
116
117 void on_crash_started() { ++num_concurrent_dumps_; }
118
119 void on_crash_completed() { --num_concurrent_dumps_; }
120
Narayan Kamatha73df602017-05-24 15:07:25 +0100121 static CrashQueue* const tombstone;
122 static CrashQueue* const java_trace;
Narayan Kamath922f6b22017-05-15 15:59:30 +0100123
124 private:
125 void find_oldest_artifact() {
126 size_t oldest_tombstone = 0;
127 time_t oldest_time = std::numeric_limits<time_t>::max();
128
129 for (size_t i = 0; i < max_artifacts_; ++i) {
Josh Gaocb68a032017-06-02 13:02:10 -0700130 std::string path = StringPrintf("%s/%s%02zu", dir_path_.c_str(), file_name_prefix_.c_str(), i);
Narayan Kamath922f6b22017-05-15 15:59:30 +0100131 struct stat st;
132 if (stat(path.c_str(), &st) != 0) {
133 if (errno == ENOENT) {
134 oldest_tombstone = i;
135 break;
136 } else {
137 PLOG(ERROR) << "failed to stat " << path;
138 continue;
139 }
140 }
141
142 if (st.st_mtime < oldest_time) {
143 oldest_tombstone = i;
144 oldest_time = st.st_mtime;
145 }
146 }
147
148 next_artifact_ = oldest_tombstone;
149 }
150
151 const std::string file_name_prefix_;
152
153 const std::string dir_path_;
154 const int dir_fd_;
155
156 const size_t max_artifacts_;
157 int next_artifact_;
158
159 const size_t max_concurrent_dumps_;
160 size_t num_concurrent_dumps_;
161
162 std::deque<Crash*> queued_requests_;
163
Narayan Kamatha73df602017-05-24 15:07:25 +0100164 DISALLOW_COPY_AND_ASSIGN(CrashQueue);
Narayan Kamath922f6b22017-05-15 15:59:30 +0100165};
166
167// Whether java trace dumps are produced via tombstoned.
Narayan Kamathca5e9082017-06-02 15:42:06 +0100168static constexpr bool kJavaTraceDumpsEnabled = true;
Narayan Kamath922f6b22017-05-15 15:59:30 +0100169
Narayan Kamatha73df602017-05-24 15:07:25 +0100170/* static */ CrashQueue* const CrashQueue::tombstone =
171 new CrashQueue("/data/tombstones", "tombstone_" /* file_name_prefix */, 10 /* max_artifacts */,
172 1 /* max_concurrent_dumps */);
Narayan Kamath922f6b22017-05-15 15:59:30 +0100173
Narayan Kamatha73df602017-05-24 15:07:25 +0100174/* static */ CrashQueue* const CrashQueue::java_trace =
175 (kJavaTraceDumpsEnabled ? new CrashQueue("/data/anr", "anr_" /* file_name_prefix */,
176 64 /* max_artifacts */, 4 /* max_concurrent_dumps */)
Narayan Kamath922f6b22017-05-15 15:59:30 +0100177 : nullptr);
178
Josh Gaocbe70cb2016-10-18 18:17:52 -0700179// Ownership of Crash is a bit messy.
180// It's either owned by an active event that must have a timeout, or owned by
181// queued_requests, in the case that multiple crashes come in at the same time.
182struct Crash {
Narayan Kamath922f6b22017-05-15 15:59:30 +0100183 ~Crash() { event_free(crash_event); }
Josh Gaocbe70cb2016-10-18 18:17:52 -0700184
185 unique_fd crash_fd;
186 pid_t crash_pid;
187 event* crash_event = nullptr;
Josh Gaocb68a032017-06-02 13:02:10 -0700188 std::string crash_path;
Narayan Kamath922f6b22017-05-15 15:59:30 +0100189
Narayan Kamatha73df602017-05-24 15:07:25 +0100190 DebuggerdDumpType crash_type;
Josh Gaocbe70cb2016-10-18 18:17:52 -0700191};
192
Narayan Kamatha73df602017-05-24 15:07:25 +0100193static CrashQueue* get_crash_queue(const Crash* crash) {
194 if (crash->crash_type == kDebuggerdJavaBacktrace) {
195 return CrashQueue::java_trace;
196 }
197
198 return CrashQueue::tombstone;
199}
200
Josh Gaocbe70cb2016-10-18 18:17:52 -0700201// Forward declare the callbacks so they can be placed in a sensible order.
202static void crash_accept_cb(evconnlistener* listener, evutil_socket_t sockfd, sockaddr*, int, void*);
203static void crash_request_cb(evutil_socket_t sockfd, short ev, void* arg);
204static void crash_completed_cb(evutil_socket_t sockfd, short ev, void* arg);
205
Josh Gao807a4582017-03-30 14:51:55 -0700206static void perform_request(Crash* crash) {
Josh Gaocbe70cb2016-10-18 18:17:52 -0700207 unique_fd output_fd;
Narayan Kamatha73df602017-05-24 15:07:25 +0100208 if (!intercept_manager->GetIntercept(crash->crash_pid, crash->crash_type, &output_fd)) {
Josh Gaocb68a032017-06-02 13:02:10 -0700209 std::tie(output_fd, crash->crash_path) = get_crash_queue(crash)->get_output();
Josh Gaocbe70cb2016-10-18 18:17:52 -0700210 }
211
212 TombstonedCrashPacket response = {
213 .packet_type = CrashPacketType::kPerformDump
214 };
215 ssize_t rc = send_fd(crash->crash_fd, &response, sizeof(response), std::move(output_fd));
216 if (rc == -1) {
217 PLOG(WARNING) << "failed to send response to CrashRequest";
218 goto fail;
219 } else if (rc != sizeof(response)) {
220 PLOG(WARNING) << "crash socket write returned short";
221 goto fail;
222 } else {
223 // TODO: Make this configurable by the interceptor?
224 struct timeval timeout = { 10, 0 };
225
226 event_base* base = event_get_base(crash->crash_event);
227 event_assign(crash->crash_event, base, crash->crash_fd, EV_TIMEOUT | EV_READ,
228 crash_completed_cb, crash);
229 event_add(crash->crash_event, &timeout);
230 }
Josh Gao13078242017-03-30 14:42:46 -0700231
Narayan Kamatha73df602017-05-24 15:07:25 +0100232 get_crash_queue(crash)->on_crash_started();
Josh Gaocbe70cb2016-10-18 18:17:52 -0700233 return;
234
235fail:
236 delete crash;
237}
238
239static void crash_accept_cb(evconnlistener* listener, evutil_socket_t sockfd, sockaddr*, int,
Narayan Kamatha73df602017-05-24 15:07:25 +0100240 void*) {
Josh Gaocbe70cb2016-10-18 18:17:52 -0700241 event_base* base = evconnlistener_get_base(listener);
242 Crash* crash = new Crash();
243
Narayan Kamatha73df602017-05-24 15:07:25 +0100244 // TODO: Make sure that only java crashes come in on the java socket
245 // and only native crashes on the native socket.
Josh Gaocbe70cb2016-10-18 18:17:52 -0700246 struct timeval timeout = { 1, 0 };
247 event* crash_event = event_new(base, sockfd, EV_TIMEOUT | EV_READ, crash_request_cb, crash);
248 crash->crash_fd.reset(sockfd);
249 crash->crash_event = crash_event;
250 event_add(crash_event, &timeout);
251}
252
253static void crash_request_cb(evutil_socket_t sockfd, short ev, void* arg) {
254 ssize_t rc;
255 Crash* crash = static_cast<Crash*>(arg);
Narayan Kamath922f6b22017-05-15 15:59:30 +0100256
Josh Gaocbe70cb2016-10-18 18:17:52 -0700257 TombstonedCrashPacket request = {};
258
259 if ((ev & EV_TIMEOUT) != 0) {
260 LOG(WARNING) << "crash request timed out";
261 goto fail;
262 } else if ((ev & EV_READ) == 0) {
263 LOG(WARNING) << "tombstoned received unexpected event from crash socket";
264 goto fail;
265 }
266
267 rc = TEMP_FAILURE_RETRY(read(sockfd, &request, sizeof(request)));
268 if (rc == -1) {
269 PLOG(WARNING) << "failed to read from crash socket";
270 goto fail;
271 } else if (rc != sizeof(request)) {
272 LOG(WARNING) << "crash socket received short read of length " << rc << " (expected "
273 << sizeof(request) << ")";
274 goto fail;
275 }
276
277 if (request.packet_type != CrashPacketType::kDumpRequest) {
278 LOG(WARNING) << "unexpected crash packet type, expected kDumpRequest, received "
279 << StringPrintf("%#2hhX", request.packet_type);
280 goto fail;
281 }
282
Narayan Kamatha73df602017-05-24 15:07:25 +0100283 crash->crash_type = request.packet.dump_request.dump_type;
284 if (crash->crash_type < 0 || crash->crash_type > kDebuggerdAnyIntercept) {
285 LOG(WARNING) << "unexpected crash dump type: " << crash->crash_type;
286 goto fail;
287 }
288
289 if (crash->crash_type != kDebuggerdJavaBacktrace) {
Narayan Kamath922f6b22017-05-15 15:59:30 +0100290 crash->crash_pid = request.packet.dump_request.pid;
291 } else {
292 // Requests for java traces are sent from untrusted processes, so we
293 // must not trust the PID sent down with the request. Instead, we ask the
294 // kernel.
295 ucred cr = {};
296 socklen_t len = sizeof(cr);
297 int ret = getsockopt(sockfd, SOL_SOCKET, SO_PEERCRED, &cr, &len);
298 if (ret != 0) {
299 PLOG(ERROR) << "Failed to getsockopt(..SO_PEERCRED)";
300 goto fail;
301 }
302
303 crash->crash_pid = cr.pid;
304 }
305
Josh Gaocbe70cb2016-10-18 18:17:52 -0700306 LOG(INFO) << "received crash request for pid " << crash->crash_pid;
307
Narayan Kamatha73df602017-05-24 15:07:25 +0100308 if (get_crash_queue(crash)->maybe_enqueue_crash(crash)) {
Josh Gaocbe70cb2016-10-18 18:17:52 -0700309 LOG(INFO) << "enqueueing crash request for pid " << crash->crash_pid;
Josh Gaocbe70cb2016-10-18 18:17:52 -0700310 } else {
Josh Gao807a4582017-03-30 14:51:55 -0700311 perform_request(crash);
Josh Gaocbe70cb2016-10-18 18:17:52 -0700312 }
313
314 return;
315
316fail:
317 delete crash;
318}
319
320static void crash_completed_cb(evutil_socket_t sockfd, short ev, void* arg) {
321 ssize_t rc;
322 Crash* crash = static_cast<Crash*>(arg);
323 TombstonedCrashPacket request = {};
324
Narayan Kamatha73df602017-05-24 15:07:25 +0100325 get_crash_queue(crash)->on_crash_completed();
Josh Gaocbe70cb2016-10-18 18:17:52 -0700326
327 if ((ev & EV_READ) == 0) {
328 goto fail;
329 }
330
331 rc = TEMP_FAILURE_RETRY(read(sockfd, &request, sizeof(request)));
332 if (rc == -1) {
333 PLOG(WARNING) << "failed to read from crash socket";
334 goto fail;
335 } else if (rc != sizeof(request)) {
336 LOG(WARNING) << "crash socket received short read of length " << rc << " (expected "
337 << sizeof(request) << ")";
338 goto fail;
339 }
340
341 if (request.packet_type != CrashPacketType::kCompletedDump) {
342 LOG(WARNING) << "unexpected crash packet type, expected kCompletedDump, received "
343 << uint32_t(request.packet_type);
344 goto fail;
345 }
346
Josh Gaocb68a032017-06-02 13:02:10 -0700347 if (!crash->crash_path.empty()) {
Narayan Kamath79dd1432017-06-21 19:42:00 +0100348 if (crash->crash_type == kDebuggerdJavaBacktrace) {
349 LOG(ERROR) << "Traces for pid " << crash->crash_pid << " written to: " << crash->crash_path;
350 } else {
351 // NOTE: Several tools parse this log message to figure out where the
352 // tombstone associated with a given native crash was written. Any changes
353 // to this message must be carefully considered.
354 LOG(ERROR) << "Tombstone written to: " << crash->crash_path;
355 }
Josh Gaocb68a032017-06-02 13:02:10 -0700356 }
357
Josh Gaocbe70cb2016-10-18 18:17:52 -0700358fail:
Narayan Kamatha73df602017-05-24 15:07:25 +0100359 CrashQueue* queue = get_crash_queue(crash);
Josh Gaocbe70cb2016-10-18 18:17:52 -0700360 delete crash;
361
362 // If there's something queued up, let them proceed.
Narayan Kamatha73df602017-05-24 15:07:25 +0100363 queue->maybe_dequeue_crashes(perform_request);
Josh Gaocbe70cb2016-10-18 18:17:52 -0700364}
365
366int main(int, char* []) {
Josh Gao8830c952017-03-06 12:23:55 -0800367 umask(0137);
368
Josh Gao55f79a52017-03-06 12:24:07 -0800369 // Don't try to connect to ourselves if we crash.
370 struct sigaction action = {};
371 action.sa_handler = [](int signal) {
372 LOG(ERROR) << "received fatal signal " << signal;
373 _exit(1);
374 };
375 debuggerd_register_handlers(&action);
376
Josh Gaocbe70cb2016-10-18 18:17:52 -0700377 int intercept_socket = android_get_control_socket(kTombstonedInterceptSocketName);
378 int crash_socket = android_get_control_socket(kTombstonedCrashSocketName);
379
380 if (intercept_socket == -1 || crash_socket == -1) {
381 PLOG(FATAL) << "failed to get socket from init";
382 }
383
384 evutil_make_socket_nonblocking(intercept_socket);
385 evutil_make_socket_nonblocking(crash_socket);
386
387 event_base* base = event_base_new();
388 if (!base) {
389 LOG(FATAL) << "failed to create event_base";
390 }
391
392 intercept_manager = new InterceptManager(base, intercept_socket);
393
Narayan Kamath922f6b22017-05-15 15:59:30 +0100394 evconnlistener* tombstone_listener = evconnlistener_new(
Narayan Kamatha73df602017-05-24 15:07:25 +0100395 base, crash_accept_cb, CrashQueue::tombstone, -1, LEV_OPT_CLOSE_ON_FREE, crash_socket);
Narayan Kamath922f6b22017-05-15 15:59:30 +0100396 if (!tombstone_listener) {
397 LOG(FATAL) << "failed to create evconnlistener for tombstones.";
398 }
399
400 if (kJavaTraceDumpsEnabled) {
401 const int java_trace_socket = android_get_control_socket(kTombstonedJavaTraceSocketName);
402 if (java_trace_socket == -1) {
403 PLOG(FATAL) << "failed to get socket from init";
404 }
405
406 evutil_make_socket_nonblocking(java_trace_socket);
407 evconnlistener* java_trace_listener = evconnlistener_new(
Narayan Kamatha73df602017-05-24 15:07:25 +0100408 base, crash_accept_cb, CrashQueue::java_trace, -1, LEV_OPT_CLOSE_ON_FREE, java_trace_socket);
Narayan Kamath922f6b22017-05-15 15:59:30 +0100409 if (!java_trace_listener) {
410 LOG(FATAL) << "failed to create evconnlistener for java traces.";
411 }
Josh Gaocbe70cb2016-10-18 18:17:52 -0700412 }
413
414 LOG(INFO) << "tombstoned successfully initialized";
415 event_base_dispatch(base);
416}