blob: 51d794e82d66d7fc1e79b31bf3b9884b36caedc1 [file] [log] [blame]
Josh Gaocbe70cb2016-10-18 18:17:52 -07001/*
2 * Copyright 2016, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <fcntl.h>
18#include <stdio.h>
19#include <stdlib.h>
20#include <sys/stat.h>
21#include <sys/types.h>
22#include <unistd.h>
23
24#include <array>
25#include <deque>
26#include <unordered_map>
27
28#include <event2/event.h>
29#include <event2/listener.h>
30#include <event2/thread.h>
31
32#include <android-base/logging.h>
33#include <android-base/stringprintf.h>
34#include <android-base/unique_fd.h>
35#include <cutils/sockets.h>
36
Josh Gao55f79a52017-03-06 12:24:07 -080037#include "debuggerd/handler.h"
Narayan Kamatha73df602017-05-24 15:07:25 +010038#include "dump_type.h"
Narayan Kamath2d377cd2017-05-10 10:58:59 +010039#include "protocol.h"
40#include "util.h"
Josh Gaocbe70cb2016-10-18 18:17:52 -070041
42#include "intercept_manager.h"
43
44using android::base::StringPrintf;
45using android::base::unique_fd;
46
47static InterceptManager* intercept_manager;
48
49enum CrashStatus {
50 kCrashStatusRunning,
51 kCrashStatusQueued,
52};
53
Narayan Kamath922f6b22017-05-15 15:59:30 +010054struct Crash;
55
Narayan Kamatha73df602017-05-24 15:07:25 +010056class CrashQueue {
Narayan Kamath922f6b22017-05-15 15:59:30 +010057 public:
Narayan Kamatha73df602017-05-24 15:07:25 +010058 CrashQueue(const std::string& dir_path, const std::string& file_name_prefix, size_t max_artifacts,
59 size_t max_concurrent_dumps)
Narayan Kamath922f6b22017-05-15 15:59:30 +010060 : file_name_prefix_(file_name_prefix),
61 dir_path_(dir_path),
62 dir_fd_(open(dir_path.c_str(), O_DIRECTORY | O_RDONLY | O_CLOEXEC)),
63 max_artifacts_(max_artifacts),
64 next_artifact_(0),
65 max_concurrent_dumps_(max_concurrent_dumps),
66 num_concurrent_dumps_(0) {
67 if (dir_fd_ == -1) {
68 PLOG(FATAL) << "failed to open directory: " << dir_path;
69 }
70
71 // NOTE: If max_artifacts_ <= max_concurrent_dumps_, then theoretically the
72 // same filename could be handed out to multiple processes.
73 CHECK(max_artifacts_ > max_concurrent_dumps_);
74
75 find_oldest_artifact();
76 }
77
78 unique_fd get_output_fd() {
79 unique_fd result;
80 char buf[PATH_MAX];
81 snprintf(buf, sizeof(buf), "%s%02d", file_name_prefix_.c_str(), next_artifact_);
82 // Unlink and create the file, instead of using O_TRUNC, to avoid two processes
83 // interleaving their output in case we ever get into that situation.
84 if (unlinkat(dir_fd_, buf, 0) != 0 && errno != ENOENT) {
85 PLOG(FATAL) << "failed to unlink tombstone at " << dir_path_ << buf;
86 }
87
88 result.reset(openat(dir_fd_, buf, O_CREAT | O_EXCL | O_WRONLY | O_APPEND | O_CLOEXEC, 0640));
89 if (result == -1) {
90 PLOG(FATAL) << "failed to create tombstone at " << dir_path_ << buf;
91 }
92
93 next_artifact_ = (next_artifact_ + 1) % max_artifacts_;
94 return result;
95 }
96
97 bool maybe_enqueue_crash(Crash* crash) {
98 if (num_concurrent_dumps_ == max_concurrent_dumps_) {
99 queued_requests_.push_back(crash);
100 return true;
101 }
102
103 return false;
104 }
105
106 void maybe_dequeue_crashes(void (*handler)(Crash* crash)) {
107 while (!queued_requests_.empty() && num_concurrent_dumps_ < max_concurrent_dumps_) {
108 Crash* next_crash = queued_requests_.front();
109 queued_requests_.pop_front();
110 handler(next_crash);
111 }
112 }
113
114 void on_crash_started() { ++num_concurrent_dumps_; }
115
116 void on_crash_completed() { --num_concurrent_dumps_; }
117
Narayan Kamatha73df602017-05-24 15:07:25 +0100118 static CrashQueue* const tombstone;
119 static CrashQueue* const java_trace;
Narayan Kamath922f6b22017-05-15 15:59:30 +0100120
121 private:
122 void find_oldest_artifact() {
123 size_t oldest_tombstone = 0;
124 time_t oldest_time = std::numeric_limits<time_t>::max();
125
126 for (size_t i = 0; i < max_artifacts_; ++i) {
127 std::string path = android::base::StringPrintf("%s/%s%02zu", dir_path_.c_str(),
128 file_name_prefix_.c_str(), i);
129 struct stat st;
130 if (stat(path.c_str(), &st) != 0) {
131 if (errno == ENOENT) {
132 oldest_tombstone = i;
133 break;
134 } else {
135 PLOG(ERROR) << "failed to stat " << path;
136 continue;
137 }
138 }
139
140 if (st.st_mtime < oldest_time) {
141 oldest_tombstone = i;
142 oldest_time = st.st_mtime;
143 }
144 }
145
146 next_artifact_ = oldest_tombstone;
147 }
148
149 const std::string file_name_prefix_;
150
151 const std::string dir_path_;
152 const int dir_fd_;
153
154 const size_t max_artifacts_;
155 int next_artifact_;
156
157 const size_t max_concurrent_dumps_;
158 size_t num_concurrent_dumps_;
159
160 std::deque<Crash*> queued_requests_;
161
Narayan Kamatha73df602017-05-24 15:07:25 +0100162 DISALLOW_COPY_AND_ASSIGN(CrashQueue);
Narayan Kamath922f6b22017-05-15 15:59:30 +0100163};
164
165// Whether java trace dumps are produced via tombstoned.
Narayan Kamathca5e9082017-06-02 15:42:06 +0100166static constexpr bool kJavaTraceDumpsEnabled = true;
Narayan Kamath922f6b22017-05-15 15:59:30 +0100167
Narayan Kamatha73df602017-05-24 15:07:25 +0100168/* static */ CrashQueue* const CrashQueue::tombstone =
169 new CrashQueue("/data/tombstones", "tombstone_" /* file_name_prefix */, 10 /* max_artifacts */,
170 1 /* max_concurrent_dumps */);
Narayan Kamath922f6b22017-05-15 15:59:30 +0100171
Narayan Kamatha73df602017-05-24 15:07:25 +0100172/* static */ CrashQueue* const CrashQueue::java_trace =
173 (kJavaTraceDumpsEnabled ? new CrashQueue("/data/anr", "anr_" /* file_name_prefix */,
174 64 /* max_artifacts */, 4 /* max_concurrent_dumps */)
Narayan Kamath922f6b22017-05-15 15:59:30 +0100175 : nullptr);
176
Josh Gaocbe70cb2016-10-18 18:17:52 -0700177// Ownership of Crash is a bit messy.
178// It's either owned by an active event that must have a timeout, or owned by
179// queued_requests, in the case that multiple crashes come in at the same time.
180struct Crash {
Narayan Kamath922f6b22017-05-15 15:59:30 +0100181 ~Crash() { event_free(crash_event); }
Josh Gaocbe70cb2016-10-18 18:17:52 -0700182
183 unique_fd crash_fd;
184 pid_t crash_pid;
185 event* crash_event = nullptr;
Narayan Kamath922f6b22017-05-15 15:59:30 +0100186
Narayan Kamatha73df602017-05-24 15:07:25 +0100187 DebuggerdDumpType crash_type;
Josh Gaocbe70cb2016-10-18 18:17:52 -0700188};
189
Narayan Kamatha73df602017-05-24 15:07:25 +0100190static CrashQueue* get_crash_queue(const Crash* crash) {
191 if (crash->crash_type == kDebuggerdJavaBacktrace) {
192 return CrashQueue::java_trace;
193 }
194
195 return CrashQueue::tombstone;
196}
197
Josh Gaocbe70cb2016-10-18 18:17:52 -0700198// Forward declare the callbacks so they can be placed in a sensible order.
199static void crash_accept_cb(evconnlistener* listener, evutil_socket_t sockfd, sockaddr*, int, void*);
200static void crash_request_cb(evutil_socket_t sockfd, short ev, void* arg);
201static void crash_completed_cb(evutil_socket_t sockfd, short ev, void* arg);
202
Josh Gao807a4582017-03-30 14:51:55 -0700203static void perform_request(Crash* crash) {
Josh Gaocbe70cb2016-10-18 18:17:52 -0700204 unique_fd output_fd;
Narayan Kamatha73df602017-05-24 15:07:25 +0100205 if (!intercept_manager->GetIntercept(crash->crash_pid, crash->crash_type, &output_fd)) {
206 output_fd = get_crash_queue(crash)->get_output_fd();
Josh Gaocbe70cb2016-10-18 18:17:52 -0700207 }
208
209 TombstonedCrashPacket response = {
210 .packet_type = CrashPacketType::kPerformDump
211 };
212 ssize_t rc = send_fd(crash->crash_fd, &response, sizeof(response), std::move(output_fd));
213 if (rc == -1) {
214 PLOG(WARNING) << "failed to send response to CrashRequest";
215 goto fail;
216 } else if (rc != sizeof(response)) {
217 PLOG(WARNING) << "crash socket write returned short";
218 goto fail;
219 } else {
220 // TODO: Make this configurable by the interceptor?
221 struct timeval timeout = { 10, 0 };
222
223 event_base* base = event_get_base(crash->crash_event);
224 event_assign(crash->crash_event, base, crash->crash_fd, EV_TIMEOUT | EV_READ,
225 crash_completed_cb, crash);
226 event_add(crash->crash_event, &timeout);
227 }
Josh Gao13078242017-03-30 14:42:46 -0700228
Narayan Kamatha73df602017-05-24 15:07:25 +0100229 get_crash_queue(crash)->on_crash_started();
Josh Gaocbe70cb2016-10-18 18:17:52 -0700230 return;
231
232fail:
233 delete crash;
234}
235
236static void crash_accept_cb(evconnlistener* listener, evutil_socket_t sockfd, sockaddr*, int,
Narayan Kamatha73df602017-05-24 15:07:25 +0100237 void*) {
Josh Gaocbe70cb2016-10-18 18:17:52 -0700238 event_base* base = evconnlistener_get_base(listener);
239 Crash* crash = new Crash();
240
Narayan Kamatha73df602017-05-24 15:07:25 +0100241 // TODO: Make sure that only java crashes come in on the java socket
242 // and only native crashes on the native socket.
Josh Gaocbe70cb2016-10-18 18:17:52 -0700243 struct timeval timeout = { 1, 0 };
244 event* crash_event = event_new(base, sockfd, EV_TIMEOUT | EV_READ, crash_request_cb, crash);
245 crash->crash_fd.reset(sockfd);
246 crash->crash_event = crash_event;
247 event_add(crash_event, &timeout);
248}
249
250static void crash_request_cb(evutil_socket_t sockfd, short ev, void* arg) {
251 ssize_t rc;
252 Crash* crash = static_cast<Crash*>(arg);
Narayan Kamath922f6b22017-05-15 15:59:30 +0100253
Josh Gaocbe70cb2016-10-18 18:17:52 -0700254 TombstonedCrashPacket request = {};
255
256 if ((ev & EV_TIMEOUT) != 0) {
257 LOG(WARNING) << "crash request timed out";
258 goto fail;
259 } else if ((ev & EV_READ) == 0) {
260 LOG(WARNING) << "tombstoned received unexpected event from crash socket";
261 goto fail;
262 }
263
264 rc = TEMP_FAILURE_RETRY(read(sockfd, &request, sizeof(request)));
265 if (rc == -1) {
266 PLOG(WARNING) << "failed to read from crash socket";
267 goto fail;
268 } else if (rc != sizeof(request)) {
269 LOG(WARNING) << "crash socket received short read of length " << rc << " (expected "
270 << sizeof(request) << ")";
271 goto fail;
272 }
273
274 if (request.packet_type != CrashPacketType::kDumpRequest) {
275 LOG(WARNING) << "unexpected crash packet type, expected kDumpRequest, received "
276 << StringPrintf("%#2hhX", request.packet_type);
277 goto fail;
278 }
279
Narayan Kamatha73df602017-05-24 15:07:25 +0100280 crash->crash_type = request.packet.dump_request.dump_type;
281 if (crash->crash_type < 0 || crash->crash_type > kDebuggerdAnyIntercept) {
282 LOG(WARNING) << "unexpected crash dump type: " << crash->crash_type;
283 goto fail;
284 }
285
286 if (crash->crash_type != kDebuggerdJavaBacktrace) {
Narayan Kamath922f6b22017-05-15 15:59:30 +0100287 crash->crash_pid = request.packet.dump_request.pid;
288 } else {
289 // Requests for java traces are sent from untrusted processes, so we
290 // must not trust the PID sent down with the request. Instead, we ask the
291 // kernel.
292 ucred cr = {};
293 socklen_t len = sizeof(cr);
294 int ret = getsockopt(sockfd, SOL_SOCKET, SO_PEERCRED, &cr, &len);
295 if (ret != 0) {
296 PLOG(ERROR) << "Failed to getsockopt(..SO_PEERCRED)";
297 goto fail;
298 }
299
300 crash->crash_pid = cr.pid;
301 }
302
Josh Gaocbe70cb2016-10-18 18:17:52 -0700303 LOG(INFO) << "received crash request for pid " << crash->crash_pid;
304
Narayan Kamatha73df602017-05-24 15:07:25 +0100305 if (get_crash_queue(crash)->maybe_enqueue_crash(crash)) {
Josh Gaocbe70cb2016-10-18 18:17:52 -0700306 LOG(INFO) << "enqueueing crash request for pid " << crash->crash_pid;
Josh Gaocbe70cb2016-10-18 18:17:52 -0700307 } else {
Josh Gao807a4582017-03-30 14:51:55 -0700308 perform_request(crash);
Josh Gaocbe70cb2016-10-18 18:17:52 -0700309 }
310
311 return;
312
313fail:
314 delete crash;
315}
316
317static void crash_completed_cb(evutil_socket_t sockfd, short ev, void* arg) {
318 ssize_t rc;
319 Crash* crash = static_cast<Crash*>(arg);
320 TombstonedCrashPacket request = {};
321
Narayan Kamatha73df602017-05-24 15:07:25 +0100322 get_crash_queue(crash)->on_crash_completed();
Josh Gaocbe70cb2016-10-18 18:17:52 -0700323
324 if ((ev & EV_READ) == 0) {
325 goto fail;
326 }
327
328 rc = TEMP_FAILURE_RETRY(read(sockfd, &request, sizeof(request)));
329 if (rc == -1) {
330 PLOG(WARNING) << "failed to read from crash socket";
331 goto fail;
332 } else if (rc != sizeof(request)) {
333 LOG(WARNING) << "crash socket received short read of length " << rc << " (expected "
334 << sizeof(request) << ")";
335 goto fail;
336 }
337
338 if (request.packet_type != CrashPacketType::kCompletedDump) {
339 LOG(WARNING) << "unexpected crash packet type, expected kCompletedDump, received "
340 << uint32_t(request.packet_type);
341 goto fail;
342 }
343
344fail:
Narayan Kamatha73df602017-05-24 15:07:25 +0100345 CrashQueue* queue = get_crash_queue(crash);
Josh Gaocbe70cb2016-10-18 18:17:52 -0700346 delete crash;
347
348 // If there's something queued up, let them proceed.
Narayan Kamatha73df602017-05-24 15:07:25 +0100349 queue->maybe_dequeue_crashes(perform_request);
Josh Gaocbe70cb2016-10-18 18:17:52 -0700350}
351
352int main(int, char* []) {
Josh Gao8830c952017-03-06 12:23:55 -0800353 umask(0137);
354
Josh Gao55f79a52017-03-06 12:24:07 -0800355 // Don't try to connect to ourselves if we crash.
356 struct sigaction action = {};
357 action.sa_handler = [](int signal) {
358 LOG(ERROR) << "received fatal signal " << signal;
359 _exit(1);
360 };
361 debuggerd_register_handlers(&action);
362
Josh Gaocbe70cb2016-10-18 18:17:52 -0700363 int intercept_socket = android_get_control_socket(kTombstonedInterceptSocketName);
364 int crash_socket = android_get_control_socket(kTombstonedCrashSocketName);
365
366 if (intercept_socket == -1 || crash_socket == -1) {
367 PLOG(FATAL) << "failed to get socket from init";
368 }
369
370 evutil_make_socket_nonblocking(intercept_socket);
371 evutil_make_socket_nonblocking(crash_socket);
372
373 event_base* base = event_base_new();
374 if (!base) {
375 LOG(FATAL) << "failed to create event_base";
376 }
377
378 intercept_manager = new InterceptManager(base, intercept_socket);
379
Narayan Kamath922f6b22017-05-15 15:59:30 +0100380 evconnlistener* tombstone_listener = evconnlistener_new(
Narayan Kamatha73df602017-05-24 15:07:25 +0100381 base, crash_accept_cb, CrashQueue::tombstone, -1, LEV_OPT_CLOSE_ON_FREE, crash_socket);
Narayan Kamath922f6b22017-05-15 15:59:30 +0100382 if (!tombstone_listener) {
383 LOG(FATAL) << "failed to create evconnlistener for tombstones.";
384 }
385
386 if (kJavaTraceDumpsEnabled) {
387 const int java_trace_socket = android_get_control_socket(kTombstonedJavaTraceSocketName);
388 if (java_trace_socket == -1) {
389 PLOG(FATAL) << "failed to get socket from init";
390 }
391
392 evutil_make_socket_nonblocking(java_trace_socket);
393 evconnlistener* java_trace_listener = evconnlistener_new(
Narayan Kamatha73df602017-05-24 15:07:25 +0100394 base, crash_accept_cb, CrashQueue::java_trace, -1, LEV_OPT_CLOSE_ON_FREE, java_trace_socket);
Narayan Kamath922f6b22017-05-15 15:59:30 +0100395 if (!java_trace_listener) {
396 LOG(FATAL) << "failed to create evconnlistener for java traces.";
397 }
Josh Gaocbe70cb2016-10-18 18:17:52 -0700398 }
399
400 LOG(INFO) << "tombstoned successfully initialized";
401 event_base_dispatch(base);
402}