blob: 5fca1ee5d170e35d94a58745b2e5ac3cf1e9dfb4 [file] [log] [blame]
Mark Salyzynf089e142018-02-20 10:47:40 -08001/*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "llkd.h"
18
19#include <ctype.h>
20#include <dirent.h> // opendir() and readdir()
21#include <errno.h>
22#include <fcntl.h>
23#include <pthread.h>
24#include <pwd.h> // getpwuid()
25#include <signal.h>
26#include <stdint.h>
27#include <sys/cdefs.h> // ___STRING, __predict_true() and _predict_false()
28#include <sys/mman.h> // mlockall()
29#include <sys/prctl.h>
30#include <sys/stat.h> // lstat()
31#include <sys/syscall.h> // __NR_getdents64
32#include <sys/sysinfo.h> // get_nprocs_conf()
33#include <sys/types.h>
34#include <time.h>
35#include <unistd.h>
36
37#include <chrono>
38#include <ios>
39#include <sstream>
40#include <string>
41#include <unordered_map>
42#include <unordered_set>
43
44#include <android-base/file.h>
45#include <android-base/logging.h>
46#include <android-base/parseint.h>
47#include <android-base/properties.h>
48#include <android-base/strings.h>
49#include <cutils/android_get_control_file.h>
50#include <log/log_main.h>
51
52#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
53
54#define TASK_COMM_LEN 16 // internal kernel, not uapi, from .../linux/include/linux/sched.h
55
56using namespace std::chrono_literals;
57using namespace std::chrono;
Mark Salyzyn52e54a62018-08-07 08:13:13 -070058using namespace std::literals;
Mark Salyzynf089e142018-02-20 10:47:40 -080059
60namespace {
61
62constexpr pid_t kernelPid = 0;
63constexpr pid_t initPid = 1;
64constexpr pid_t kthreaddPid = 2;
65
66constexpr char procdir[] = "/proc/";
67
68// Configuration
69milliseconds llkUpdate; // last check ms signature
70milliseconds llkCycle; // ms to next thread check
71bool llkEnable = LLK_ENABLE_DEFAULT; // llk daemon enabled
72bool llkRunning = false; // thread is running
73bool llkMlockall = LLK_MLOCKALL_DEFAULT; // run mlocked
Mark Salyzynafd66f22018-03-19 15:16:29 -070074bool llkTestWithKill = LLK_KILLTEST_DEFAULT; // issue test kills
Mark Salyzynf089e142018-02-20 10:47:40 -080075milliseconds llkTimeoutMs = LLK_TIMEOUT_MS_DEFAULT; // default timeout
Mark Salyzyn96505fa2018-08-07 08:13:13 -070076enum { // enum of state indexes
77 llkStateD, // Persistent 'D' state
78 llkStateZ, // Persistent 'Z' state
79#ifdef __PTRACE_ENABLED__ // Extra privileged states
80 llkStateStack, // stack signature
81#endif // End of extra privilege
82 llkNumStates, // Maxumum number of states
83}; // state indexes
Mark Salyzynf089e142018-02-20 10:47:40 -080084milliseconds llkStateTimeoutMs[llkNumStates]; // timeout override for each detection state
85milliseconds llkCheckMs; // checking interval to inspect any
86 // persistent live-locked states
87bool llkLowRam; // ro.config.low_ram
Mark Salyzynbd7c8562018-10-31 10:02:08 -070088bool llkEnableSysrqT = LLK_ENABLE_SYSRQ_T_DEFAULT; // sysrq stack trace dump
Mark Salyzynf089e142018-02-20 10:47:40 -080089bool khtEnable = LLK_ENABLE_DEFAULT; // [khungtaskd] panic
90// [khungtaskd] should have a timeout beyond the granularity of llkTimeoutMs.
91// Provides a wide angle of margin b/c khtTimeout is also its granularity.
92seconds khtTimeout = duration_cast<seconds>(llkTimeoutMs * (1 + LLK_CHECKS_PER_TIMEOUT_DEFAULT) /
93 LLK_CHECKS_PER_TIMEOUT_DEFAULT);
Mark Salyzyn96505fa2018-08-07 08:13:13 -070094#ifdef __PTRACE_ENABLED__
95// list of stack symbols to search for persistence.
96std::unordered_set<std::string> llkCheckStackSymbols;
97#endif
Mark Salyzynf089e142018-02-20 10:47:40 -080098
99// Blacklist variables, initialized with comma separated lists of high false
100// positive and/or dangerous references, e.g. without self restart, for pid,
101// ppid, name and uid:
102
103// list of pids, or tids or names to skip. kernel pid (0), init pid (1),
104// [kthreadd] pid (2), ourselves, "init", "[kthreadd]", "lmkd", "llkd" or
105// combinations of watchdogd in kernel and user space.
106std::unordered_set<std::string> llkBlacklistProcess;
107// list of parent pids, comm or cmdline names to skip. default:
108// kernel pid (0), [kthreadd] (2), or ourselves, enforced and implied
109std::unordered_set<std::string> llkBlacklistParent;
110// list of uids, and uid names, to skip, default nothing
111std::unordered_set<std::string> llkBlacklistUid;
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700112#ifdef __PTRACE_ENABLED__
113// list of names to skip stack checking. "init", "lmkd", "llkd", "keystore" or
114// "logd" (if not userdebug).
115std::unordered_set<std::string> llkBlacklistStack;
116#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800117
118class dir {
119 public:
120 enum level { proc, task, numLevels };
121
122 private:
123 int fd;
124 size_t available_bytes;
125 dirent* next;
126 // each directory level picked to be just north of 4K in size
127 static constexpr size_t buffEntries = 15;
128 static dirent buff[numLevels][buffEntries];
129
130 bool fill(enum level index) {
131 if (index >= numLevels) return false;
132 if (available_bytes != 0) return true;
133 if (__predict_false(fd < 0)) return false;
134 // getdents64 has no libc wrapper
135 auto rc = TEMP_FAILURE_RETRY(syscall(__NR_getdents64, fd, buff[index], sizeof(buff[0]), 0));
136 if (rc <= 0) return false;
137 available_bytes = rc;
138 next = buff[index];
139 return true;
140 }
141
142 public:
143 dir() : fd(-1), available_bytes(0), next(nullptr) {}
144
145 explicit dir(const char* directory)
146 : fd(__predict_true(directory != nullptr)
147 ? ::open(directory, O_CLOEXEC | O_DIRECTORY | O_RDONLY)
148 : -1),
149 available_bytes(0),
150 next(nullptr) {}
151
152 explicit dir(const std::string&& directory)
153 : fd(::open(directory.c_str(), O_CLOEXEC | O_DIRECTORY | O_RDONLY)),
154 available_bytes(0),
155 next(nullptr) {}
156
157 explicit dir(const std::string& directory)
158 : fd(::open(directory.c_str(), O_CLOEXEC | O_DIRECTORY | O_RDONLY)),
159 available_bytes(0),
160 next(nullptr) {}
161
162 // Don't need any copy or move constructors.
163 explicit dir(const dir& c) = delete;
164 explicit dir(dir& c) = delete;
165 explicit dir(dir&& c) = delete;
166
167 ~dir() {
168 if (fd >= 0) {
169 ::close(fd);
170 }
171 }
172
173 operator bool() const { return fd >= 0; }
174
175 void reset(void) {
176 if (fd >= 0) {
177 ::close(fd);
178 fd = -1;
179 available_bytes = 0;
180 next = nullptr;
181 }
182 }
183
184 dir& reset(const char* directory) {
185 reset();
186 // available_bytes will _always_ be zero here as its value is
187 // intimately tied to fd < 0 or not.
188 fd = ::open(directory, O_CLOEXEC | O_DIRECTORY | O_RDONLY);
189 return *this;
190 }
191
192 void rewind(void) {
193 if (fd >= 0) {
194 ::lseek(fd, off_t(0), SEEK_SET);
195 available_bytes = 0;
196 next = nullptr;
197 }
198 }
199
200 dirent* read(enum level index = proc, dirent* def = nullptr) {
201 if (!fill(index)) return def;
202 auto ret = next;
203 available_bytes -= next->d_reclen;
204 next = reinterpret_cast<dirent*>(reinterpret_cast<char*>(next) + next->d_reclen);
205 return ret;
206 }
207} llkTopDirectory;
208
209dirent dir::buff[dir::numLevels][dir::buffEntries];
210
211// helper functions
212
213bool llkIsMissingExeLink(pid_t tid) {
214 char c;
215 // CAP_SYS_PTRACE is required to prevent ret == -1, but ENOENT is signal
216 auto ret = ::readlink((procdir + std::to_string(tid) + "/exe").c_str(), &c, sizeof(c));
217 return (ret == -1) && (errno == ENOENT);
218}
219
220// Common routine where caller accepts empty content as error/passthrough.
221// Reduces the churn of reporting read errors in the callers.
222std::string ReadFile(std::string&& path) {
223 std::string content;
224 if (!android::base::ReadFileToString(path, &content)) {
225 PLOG(DEBUG) << "Read " << path << " failed";
226 content = "";
227 }
228 return content;
229}
230
231std::string llkProcGetName(pid_t tid, const char* node = "/cmdline") {
232 std::string content = ReadFile(procdir + std::to_string(tid) + node);
233 static constexpr char needles[] = " \t\r\n"; // including trailing nul
234 auto pos = content.find_first_of(needles, 0, sizeof(needles));
235 if (pos != std::string::npos) {
236 content.erase(pos);
237 }
238 return content;
239}
240
241uid_t llkProcGetUid(pid_t tid) {
242 // Get the process' uid. The following read from /status is admittedly
243 // racy, prone to corruption due to shape-changes. The consequences are
244 // not catastrophic as we sample a few times before taking action.
245 //
246 // If /loginuid worked on reliably, or on Android (all tasks report -1)...
247 // Android lmkd causes /cgroup to contain memory:/<dom>/uid_<uid>/pid_<pid>
248 // which is tighter, but also not reliable.
249 std::string content = ReadFile(procdir + std::to_string(tid) + "/status");
250 static constexpr char Uid[] = "\nUid:";
251 auto pos = content.find(Uid);
252 if (pos == std::string::npos) {
253 return -1;
254 }
255 pos += ::strlen(Uid);
256 while ((pos < content.size()) && ::isblank(content[pos])) {
257 ++pos;
258 }
259 content.erase(0, pos);
260 for (pos = 0; (pos < content.size()) && ::isdigit(content[pos]); ++pos) {
261 ;
262 }
263 // Content of form 'Uid: 0 0 0 0', newline is error
264 if ((pos >= content.size()) || !::isblank(content[pos])) {
265 return -1;
266 }
267 content.erase(pos);
268 uid_t ret;
Tom Cherrye0bc5a92018-10-05 14:29:47 -0700269 if (!android::base::ParseUint(content, &ret, uid_t(0))) {
Mark Salyzynf089e142018-02-20 10:47:40 -0800270 return -1;
271 }
272 return ret;
273}
274
275struct proc {
276 pid_t tid; // monitored thread id (in Z or D state).
277 nanoseconds schedUpdate; // /proc/<tid>/sched "se.avg.lastUpdateTime",
278 uint64_t nrSwitches; // /proc/<tid>/sched "nr_switches" for
279 // refined ABA problem detection, determine
280 // forward scheduling progress.
281 milliseconds update; // llkUpdate millisecond signature of last.
282 milliseconds count; // duration in state.
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700283#ifdef __PTRACE_ENABLED__ // Privileged state checking
284 milliseconds count_stack; // duration where stack is stagnant.
285#endif // End privilege
Mark Salyzynf089e142018-02-20 10:47:40 -0800286 pid_t pid; // /proc/<pid> before iterating through
287 // /proc/<pid>/task/<tid> for threads.
288 pid_t ppid; // /proc/<tid>/stat field 4 parent pid.
289 uid_t uid; // /proc/<tid>/status Uid: field.
290 unsigned time; // sum of /proc/<tid>/stat field 14 utime &
291 // 15 stime for coarse ABA problem detection.
292 std::string cmdline; // cached /cmdline content
293 char state; // /proc/<tid>/stat field 3: Z or D
294 // (others we do not monitor: S, R, T or ?)
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700295#ifdef __PTRACE_ENABLED__ // Privileged state checking
296 char stack; // index in llkCheckStackSymbols for matches
297#endif // and with maximum index PROP_VALUE_MAX/2.
Mark Salyzynf089e142018-02-20 10:47:40 -0800298 char comm[TASK_COMM_LEN + 3]; // space for adding '[' and ']'
299 bool exeMissingValid; // exeMissing has been cached
300 bool cmdlineValid; // cmdline has been cached
301 bool updated; // cleared before monitoring pass.
302 bool killed; // sent a kill to this thread, next panic...
303
304 void setComm(const char* _comm) { strncpy(comm + 1, _comm, sizeof(comm) - 2); }
305
306 proc(pid_t tid, pid_t pid, pid_t ppid, const char* _comm, int time, char state)
307 : tid(tid),
308 schedUpdate(0),
309 nrSwitches(0),
310 update(llkUpdate),
Mark Salyzynacecaf72018-08-10 08:15:57 -0700311 count(0ms),
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700312#ifdef __PTRACE_ENABLED__
313 count_stack(0ms),
314#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800315 pid(pid),
316 ppid(ppid),
317 uid(-1),
318 time(time),
319 state(state),
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700320#ifdef __PTRACE_ENABLED__
321 stack(-1),
322#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800323 exeMissingValid(false),
324 cmdlineValid(false),
325 updated(true),
Mark Salyzynafd66f22018-03-19 15:16:29 -0700326 killed(!llkTestWithKill) {
Mark Salyzynf089e142018-02-20 10:47:40 -0800327 memset(comm, '\0', sizeof(comm));
328 setComm(_comm);
329 }
330
331 const char* getComm(void) {
332 if (comm[1] == '\0') { // comm Valid?
333 strncpy(comm + 1, llkProcGetName(tid, "/comm").c_str(), sizeof(comm) - 2);
334 }
335 if (!exeMissingValid) {
336 if (llkIsMissingExeLink(tid)) {
337 comm[0] = '[';
338 }
339 exeMissingValid = true;
340 }
341 size_t len = strlen(comm + 1);
342 if (__predict_true(len < (sizeof(comm) - 1))) {
343 if (comm[0] == '[') {
344 if ((comm[len] != ']') && __predict_true(len < (sizeof(comm) - 2))) {
345 comm[++len] = ']';
346 comm[++len] = '\0';
347 }
348 } else {
349 if (comm[len] == ']') {
350 comm[len] = '\0';
351 }
352 }
353 }
354 return &comm[comm[0] != '['];
355 }
356
357 const char* getCmdline(void) {
358 if (!cmdlineValid) {
359 cmdline = llkProcGetName(tid);
360 cmdlineValid = true;
361 }
362 return cmdline.c_str();
363 }
364
365 uid_t getUid(void) {
366 if (uid <= 0) { // Churn on root user, because most likely to setuid()
367 uid = llkProcGetUid(tid);
368 }
369 return uid;
370 }
371
372 void reset(void) { // reset cache, if we detected pid rollover
373 uid = -1;
374 state = '?';
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700375#ifdef __PTRACE_ENABLED__
376 count_stack = 0ms;
377 stack = -1;
378#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800379 cmdline = "";
380 comm[0] = '\0';
381 exeMissingValid = false;
382 cmdlineValid = false;
383 }
384};
385
386std::unordered_map<pid_t, proc> tids;
387
388// Check range and setup defaults, in order of propagation:
389// llkTimeoutMs
390// llkCheckMs
391// ...
392// KISS to keep it all self-contained, and called multiple times as parameters
393// are interpreted so that defaults, llkCheckMs and llkCycle make sense.
394void llkValidate() {
395 if (llkTimeoutMs == 0ms) {
396 llkTimeoutMs = LLK_TIMEOUT_MS_DEFAULT;
397 }
398 llkTimeoutMs = std::max(llkTimeoutMs, LLK_TIMEOUT_MS_MINIMUM);
399 if (llkCheckMs == 0ms) {
400 llkCheckMs = llkTimeoutMs / LLK_CHECKS_PER_TIMEOUT_DEFAULT;
401 }
402 llkCheckMs = std::min(llkCheckMs, llkTimeoutMs);
403
404 for (size_t state = 0; state < ARRAY_SIZE(llkStateTimeoutMs); ++state) {
405 if (llkStateTimeoutMs[state] == 0ms) {
406 llkStateTimeoutMs[state] = llkTimeoutMs;
407 }
408 llkStateTimeoutMs[state] =
409 std::min(std::max(llkStateTimeoutMs[state], LLK_TIMEOUT_MS_MINIMUM), llkTimeoutMs);
410 llkCheckMs = std::min(llkCheckMs, llkStateTimeoutMs[state]);
411 }
412
413 llkCheckMs = std::max(llkCheckMs, LLK_CHECK_MS_MINIMUM);
414 if (llkCycle == 0ms) {
415 llkCycle = llkCheckMs;
416 }
417 llkCycle = std::min(llkCycle, llkCheckMs);
418}
419
420milliseconds llkGetTimespecDiffMs(timespec* from, timespec* to) {
421 return duration_cast<milliseconds>(seconds(to->tv_sec - from->tv_sec)) +
422 duration_cast<milliseconds>(nanoseconds(to->tv_nsec - from->tv_nsec));
423}
424
425std::string llkProcGetName(pid_t tid, const char* comm, const char* cmdline) {
426 if ((cmdline != nullptr) && (*cmdline != '\0')) {
427 return cmdline;
428 }
429 if ((comm != nullptr) && (*comm != '\0')) {
430 return comm;
431 }
432
433 // UNLIKELY! Here because killed before we kill it?
434 // Assume change is afoot, do not call llkTidAlloc
435
436 // cmdline ?
437 std::string content = llkProcGetName(tid);
438 if (content.size() != 0) {
439 return content;
440 }
441 // Comm instead?
442 content = llkProcGetName(tid, "/comm");
443 if (llkIsMissingExeLink(tid) && (content.size() != 0)) {
444 return '[' + content + ']';
445 }
446 return content;
447}
448
449int llkKillOneProcess(pid_t pid, char state, pid_t tid, const char* tcomm = nullptr,
450 const char* tcmdline = nullptr, const char* pcomm = nullptr,
451 const char* pcmdline = nullptr) {
452 std::string forTid;
453 if (tid != pid) {
454 forTid = " for '" + llkProcGetName(tid, tcomm, tcmdline) + "' (" + std::to_string(tid) + ")";
455 }
456 LOG(INFO) << "Killing '" << llkProcGetName(pid, pcomm, pcmdline) << "' (" << pid
457 << ") to check forward scheduling progress in " << state << " state" << forTid;
458 // CAP_KILL required
459 errno = 0;
460 auto r = ::kill(pid, SIGKILL);
461 if (r) {
462 PLOG(ERROR) << "kill(" << pid << ")=" << r << ' ';
463 }
464
465 return r;
466}
467
468// Kill one process
469int llkKillOneProcess(pid_t pid, proc* tprocp) {
470 return llkKillOneProcess(pid, tprocp->state, tprocp->tid, tprocp->getComm(),
471 tprocp->getCmdline());
472}
473
474// Kill one process specified by kprocp
475int llkKillOneProcess(proc* kprocp, proc* tprocp) {
476 if (kprocp == nullptr) {
477 return -2;
478 }
479
480 return llkKillOneProcess(kprocp->tid, tprocp->state, tprocp->tid, tprocp->getComm(),
481 tprocp->getCmdline(), kprocp->getComm(), kprocp->getCmdline());
482}
483
484// Acquire file descriptor from environment, or open and cache it.
485// NB: cache is unnecessary in our current context, pedantically
486// required to prevent leakage of file descriptors in the future.
487int llkFileToWriteFd(const std::string& file) {
488 static std::unordered_map<std::string, int> cache;
489 auto search = cache.find(file);
490 if (search != cache.end()) return search->second;
491 auto fd = android_get_control_file(file.c_str());
492 if (fd >= 0) return fd;
493 fd = TEMP_FAILURE_RETRY(::open(file.c_str(), O_WRONLY | O_CLOEXEC));
494 if (fd >= 0) cache.emplace(std::make_pair(file, fd));
495 return fd;
496}
497
498// Wrap android::base::WriteStringToFile to use android_get_control_file.
499bool llkWriteStringToFile(const std::string& string, const std::string& file) {
500 auto fd = llkFileToWriteFd(file);
501 if (fd < 0) return false;
502 return android::base::WriteStringToFd(string, fd);
503}
504
505bool llkWriteStringToFileConfirm(const std::string& string, const std::string& file) {
506 auto fd = llkFileToWriteFd(file);
507 auto ret = (fd < 0) ? false : android::base::WriteStringToFd(string, fd);
508 std::string content;
509 if (!android::base::ReadFileToString(file, &content)) return ret;
510 return android::base::Trim(content) == string;
511}
512
Mark Salyzynfbc3a752018-12-04 10:30:45 -0800513void llkPanicKernel(bool dump, pid_t tid, const char* state, const std::string& message = "") {
Mark Salyzyn3c3b14d2018-10-31 10:38:15 -0700514 if (!message.empty()) LOG(ERROR) << message;
Mark Salyzynf089e142018-02-20 10:47:40 -0800515 auto sysrqTriggerFd = llkFileToWriteFd("/proc/sysrq-trigger");
516 if (sysrqTriggerFd < 0) {
517 // DYB
518 llkKillOneProcess(initPid, 'R', tid);
519 // The answer to life, the universe and everything
520 ::exit(42);
521 // NOTREACHED
Mark Salyzynfbc3a752018-12-04 10:30:45 -0800522 return;
Mark Salyzynf089e142018-02-20 10:47:40 -0800523 }
524 ::sync();
525 if (dump) {
526 // Show all locks that are held
527 android::base::WriteStringToFd("d", sysrqTriggerFd);
Mark Salyzyn53e782d2018-10-31 16:03:45 -0700528 // Show all waiting tasks
529 android::base::WriteStringToFd("w", sysrqTriggerFd);
Mark Salyzynf089e142018-02-20 10:47:40 -0800530 // This can trigger hardware watchdog, that is somewhat _ok_.
531 // But useless if pstore configured for <256KB, low ram devices ...
Mark Salyzynbd7c8562018-10-31 10:02:08 -0700532 if (llkEnableSysrqT) {
Mark Salyzynf089e142018-02-20 10:47:40 -0800533 android::base::WriteStringToFd("t", sysrqTriggerFd);
Mark Salyzyn53e782d2018-10-31 16:03:45 -0700534 // Show all locks that are held (in case 't' overflows ramoops)
535 android::base::WriteStringToFd("d", sysrqTriggerFd);
536 // Show all waiting tasks (in case 't' overflows ramoops)
537 android::base::WriteStringToFd("w", sysrqTriggerFd);
Mark Salyzynf089e142018-02-20 10:47:40 -0800538 }
539 ::usleep(200000); // let everything settle
540 }
Mark Salyzyn3c3b14d2018-10-31 10:38:15 -0700541 // SysRq message matches kernel format, and propagates through bootstat
542 // ultimately to the boot reason into panic,livelock,<state>.
543 llkWriteStringToFile(message + (message.empty() ? "" : "\n") +
544 "SysRq : Trigger a crash : 'livelock,"s + state + "'\n",
545 "/dev/kmsg");
Mark Salyzynfbc3a752018-12-04 10:30:45 -0800546 // Because panic is such a serious thing to do, let us
547 // make sure that the tid being inspected still exists!
548 auto piddir = procdir + std::to_string(tid) + "/stat";
549 if (access(piddir.c_str(), F_OK) != 0) {
550 PLOG(WARNING) << piddir;
551 return;
552 }
Mark Salyzynf089e142018-02-20 10:47:40 -0800553 android::base::WriteStringToFd("c", sysrqTriggerFd);
554 // NOTREACHED
555 // DYB
556 llkKillOneProcess(initPid, 'R', tid);
557 // I sat at my desk, stared into the garden and thought '42 will do'.
558 // I typed it out. End of story
559 ::exit(42);
560 // NOTREACHED
561}
562
563void llkAlarmHandler(int) {
Mark Salyzynb3418a22018-11-19 15:24:03 -0800564 LOG(FATAL) << "alarm";
565 // NOTREACHED
566 llkPanicKernel(true, ::getpid(), "alarm");
Mark Salyzynf089e142018-02-20 10:47:40 -0800567}
568
569milliseconds GetUintProperty(const std::string& key, milliseconds def) {
570 return milliseconds(android::base::GetUintProperty(key, static_cast<uint64_t>(def.count()),
571 static_cast<uint64_t>(def.max().count())));
572}
573
574seconds GetUintProperty(const std::string& key, seconds def) {
575 return seconds(android::base::GetUintProperty(key, static_cast<uint64_t>(def.count()),
576 static_cast<uint64_t>(def.max().count())));
577}
578
579proc* llkTidLookup(pid_t tid) {
580 auto search = tids.find(tid);
581 if (search == tids.end()) {
582 return nullptr;
583 }
584 return &search->second;
585}
586
587void llkTidRemove(pid_t tid) {
588 tids.erase(tid);
589}
590
591proc* llkTidAlloc(pid_t tid, pid_t pid, pid_t ppid, const char* comm, int time, char state) {
592 auto it = tids.emplace(std::make_pair(tid, proc(tid, pid, ppid, comm, time, state)));
593 return &it.first->second;
594}
595
596std::string llkFormat(milliseconds ms) {
597 auto sec = duration_cast<seconds>(ms);
598 std::ostringstream s;
599 s << sec.count() << '.';
600 auto f = s.fill('0');
601 auto w = s.width(3);
602 s << std::right << (ms - sec).count();
603 s.width(w);
604 s.fill(f);
605 s << 's';
606 return s.str();
607}
608
609std::string llkFormat(seconds s) {
610 return std::to_string(s.count()) + 's';
611}
612
613std::string llkFormat(bool flag) {
614 return flag ? "true" : "false";
615}
616
617std::string llkFormat(const std::unordered_set<std::string>& blacklist) {
618 std::string ret;
619 for (auto entry : blacklist) {
620 if (ret.size()) {
621 ret += ",";
622 }
623 ret += entry;
624 }
625 return ret;
626}
627
628// We only officially support comma separators, but wetware being what they
629// are will take some liberty and I do not believe they should be punished.
Mark Salyzynacecaf72018-08-10 08:15:57 -0700630std::unordered_set<std::string> llkSplit(const std::string& s) {
Mark Salyzynf089e142018-02-20 10:47:40 -0800631 std::unordered_set<std::string> result;
632
Mark Salyzynacecaf72018-08-10 08:15:57 -0700633 // Special case, allow boolean false to empty the list, otherwise expected
634 // source of input from android::base::GetProperty will supply the default
635 // value on empty content in the property.
636 if (s == "false") return result;
637
Mark Salyzynf089e142018-02-20 10:47:40 -0800638 size_t base = 0;
Mark Salyzynacecaf72018-08-10 08:15:57 -0700639 while (s.size() > base) {
640 auto found = s.find_first_of(", \t:", base);
641 // Only emplace content, empty entries are not an option
642 if (found != base) result.emplace(s.substr(base, found - base));
Mark Salyzynf089e142018-02-20 10:47:40 -0800643 if (found == s.npos) break;
644 base = found + 1;
645 }
646 return result;
647}
648
649bool llkSkipName(const std::string& name,
650 const std::unordered_set<std::string>& blacklist = llkBlacklistProcess) {
651 if ((name.size() == 0) || (blacklist.size() == 0)) {
652 return false;
653 }
654
655 return blacklist.find(name) != blacklist.end();
656}
657
658bool llkSkipPid(pid_t pid) {
659 return llkSkipName(std::to_string(pid), llkBlacklistProcess);
660}
661
662bool llkSkipPpid(pid_t ppid) {
663 return llkSkipName(std::to_string(ppid), llkBlacklistParent);
664}
665
666bool llkSkipUid(uid_t uid) {
667 // Match by number?
668 if (llkSkipName(std::to_string(uid), llkBlacklistUid)) {
669 return true;
670 }
671
672 // Match by name?
673 auto pwd = ::getpwuid(uid);
674 return (pwd != nullptr) && __predict_true(pwd->pw_name != nullptr) &&
675 __predict_true(pwd->pw_name[0] != '\0') && llkSkipName(pwd->pw_name, llkBlacklistUid);
676}
677
678bool getValidTidDir(dirent* dp, std::string* piddir) {
679 if (!::isdigit(dp->d_name[0])) {
680 return false;
681 }
682
683 // Corner case can not happen in reality b/c of above ::isdigit check
684 if (__predict_false(dp->d_type != DT_DIR)) {
685 if (__predict_false(dp->d_type == DT_UNKNOWN)) { // can't b/c procfs
686 struct stat st;
687 *piddir = procdir;
688 *piddir += dp->d_name;
689 return (lstat(piddir->c_str(), &st) == 0) && (st.st_mode & S_IFDIR);
690 }
691 return false;
692 }
693
694 *piddir = procdir;
695 *piddir += dp->d_name;
696 return true;
697}
698
699bool llkIsMonitorState(char state) {
700 return (state == 'Z') || (state == 'D');
701}
702
703// returns -1 if not found
704long long getSchedValue(const std::string& schedString, const char* key) {
705 auto pos = schedString.find(key);
706 if (pos == std::string::npos) {
707 return -1;
708 }
709 pos = schedString.find(':', pos);
710 if (__predict_false(pos == std::string::npos)) {
711 return -1;
712 }
713 while ((++pos < schedString.size()) && ::isblank(schedString[pos])) {
714 ;
715 }
716 long long ret;
717 if (!android::base::ParseInt(schedString.substr(pos), &ret, static_cast<long long>(0))) {
718 return -1;
719 }
720 return ret;
721}
722
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700723#ifdef __PTRACE_ENABLED__
724bool llkCheckStack(proc* procp, const std::string& piddir) {
725 if (llkCheckStackSymbols.empty()) return false;
726 if (procp->state == 'Z') { // No brains for Zombies
727 procp->stack = -1;
728 procp->count_stack = 0ms;
729 return false;
730 }
731
732 // Don't check process that are known to block ptrace, save sepolicy noise.
733 if (llkSkipName(std::to_string(procp->pid), llkBlacklistStack)) return false;
734 if (llkSkipName(procp->getComm(), llkBlacklistStack)) return false;
735 if (llkSkipName(procp->getCmdline(), llkBlacklistStack)) return false;
Mark Salyzyne81ede82018-10-22 15:52:32 -0700736 if (llkSkipName(android::base::Basename(procp->getCmdline()), llkBlacklistStack)) return false;
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700737
738 auto kernel_stack = ReadFile(piddir + "/stack");
739 if (kernel_stack.empty()) {
740 LOG(INFO) << piddir << "/stack empty comm=" << procp->getComm()
741 << " cmdline=" << procp->getCmdline();
742 return false;
743 }
744 // A scheduling incident that should not reset count_stack
745 if (kernel_stack.find(" cpu_worker_pools+0x") != std::string::npos) return false;
746 char idx = -1;
747 char match = -1;
748 for (const auto& stack : llkCheckStackSymbols) {
749 if (++idx < 0) break;
Mark Salyzynbb1256a2018-10-18 14:39:27 -0700750 if ((kernel_stack.find(" "s + stack + "+0x") != std::string::npos) ||
751 (kernel_stack.find(" "s + stack + ".cfi+0x") != std::string::npos)) {
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700752 match = idx;
753 break;
754 }
755 }
756 if (procp->stack != match) {
757 procp->stack = match;
758 procp->count_stack = 0ms;
759 return false;
760 }
761 if (match == char(-1)) return false;
762 procp->count_stack += llkCycle;
763 return procp->count_stack >= llkStateTimeoutMs[llkStateStack];
764}
765#endif
766
Mark Salyzynf089e142018-02-20 10:47:40 -0800767// Primary ABA mitigation watching last time schedule activity happened
768void llkCheckSchedUpdate(proc* procp, const std::string& piddir) {
769 // Audit finds /proc/<tid>/sched is just over 1K, and
770 // is rarely larger than 2K, even less on Android.
771 // For example, the "se.avg.lastUpdateTime" field we are
772 // interested in typically within the primary set in
773 // the first 1K.
774 //
775 // Proc entries can not be read >1K atomically via libbase,
776 // but if there are problems we assume at least a few
777 // samples of reads occur before we take any real action.
778 std::string schedString = ReadFile(piddir + "/sched");
779 if (schedString.size() == 0) {
780 // /schedstat is not as standardized, but in 3.1+
781 // Android devices, the third field is nr_switches
782 // from /sched:
783 schedString = ReadFile(piddir + "/schedstat");
784 if (schedString.size() == 0) {
785 return;
786 }
787 auto val = static_cast<unsigned long long>(-1);
788 if (((::sscanf(schedString.c_str(), "%*d %*d %llu", &val)) == 1) &&
789 (val != static_cast<unsigned long long>(-1)) && (val != 0) &&
790 (val != procp->nrSwitches)) {
791 procp->nrSwitches = val;
792 procp->count = 0ms;
Mark Salyzynafd66f22018-03-19 15:16:29 -0700793 procp->killed = !llkTestWithKill;
Mark Salyzynf089e142018-02-20 10:47:40 -0800794 }
795 return;
796 }
797
798 auto val = getSchedValue(schedString, "\nse.avg.lastUpdateTime");
799 if (val == -1) {
800 val = getSchedValue(schedString, "\nse.svg.last_update_time");
801 }
802 if (val != -1) {
803 auto schedUpdate = nanoseconds(val);
804 if (schedUpdate != procp->schedUpdate) {
805 procp->schedUpdate = schedUpdate;
806 procp->count = 0ms;
Mark Salyzynafd66f22018-03-19 15:16:29 -0700807 procp->killed = !llkTestWithKill;
Mark Salyzynf089e142018-02-20 10:47:40 -0800808 }
809 }
810
811 val = getSchedValue(schedString, "\nnr_switches");
812 if (val != -1) {
813 if (static_cast<uint64_t>(val) != procp->nrSwitches) {
814 procp->nrSwitches = val;
815 procp->count = 0ms;
Mark Salyzynafd66f22018-03-19 15:16:29 -0700816 procp->killed = !llkTestWithKill;
Mark Salyzynf089e142018-02-20 10:47:40 -0800817 }
818 }
819}
820
821void llkLogConfig(void) {
822 LOG(INFO) << "ro.config.low_ram=" << llkFormat(llkLowRam) << "\n"
Mark Salyzynbd7c8562018-10-31 10:02:08 -0700823 << LLK_ENABLE_SYSRQ_T_PROPERTY "=" << llkFormat(llkEnableSysrqT) << "\n"
Mark Salyzynf089e142018-02-20 10:47:40 -0800824 << LLK_ENABLE_PROPERTY "=" << llkFormat(llkEnable) << "\n"
825 << KHT_ENABLE_PROPERTY "=" << llkFormat(khtEnable) << "\n"
826 << LLK_MLOCKALL_PROPERTY "=" << llkFormat(llkMlockall) << "\n"
Mark Salyzynafd66f22018-03-19 15:16:29 -0700827 << LLK_KILLTEST_PROPERTY "=" << llkFormat(llkTestWithKill) << "\n"
Mark Salyzynf089e142018-02-20 10:47:40 -0800828 << KHT_TIMEOUT_PROPERTY "=" << llkFormat(khtTimeout) << "\n"
829 << LLK_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkTimeoutMs) << "\n"
830 << LLK_D_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkStateTimeoutMs[llkStateD]) << "\n"
831 << LLK_Z_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkStateTimeoutMs[llkStateZ]) << "\n"
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700832#ifdef __PTRACE_ENABLED__
833 << LLK_STACK_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkStateTimeoutMs[llkStateStack])
834 << "\n"
835#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800836 << LLK_CHECK_MS_PROPERTY "=" << llkFormat(llkCheckMs) << "\n"
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700837#ifdef __PTRACE_ENABLED__
838 << LLK_CHECK_STACK_PROPERTY "=" << llkFormat(llkCheckStackSymbols) << "\n"
839 << LLK_BLACKLIST_STACK_PROPERTY "=" << llkFormat(llkBlacklistStack) << "\n"
840#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800841 << LLK_BLACKLIST_PROCESS_PROPERTY "=" << llkFormat(llkBlacklistProcess) << "\n"
842 << LLK_BLACKLIST_PARENT_PROPERTY "=" << llkFormat(llkBlacklistParent) << "\n"
843 << LLK_BLACKLIST_UID_PROPERTY "=" << llkFormat(llkBlacklistUid);
844}
845
846void* llkThread(void* obj) {
Mark Salyzyn4832a8b2018-08-15 11:02:18 -0700847 prctl(PR_SET_DUMPABLE, 0);
848
Mark Salyzynf089e142018-02-20 10:47:40 -0800849 LOG(INFO) << "started";
850
851 std::string name = std::to_string(::gettid());
852 if (!llkSkipName(name)) {
853 llkBlacklistProcess.emplace(name);
854 }
855 name = static_cast<const char*>(obj);
856 prctl(PR_SET_NAME, name.c_str());
857 if (__predict_false(!llkSkipName(name))) {
858 llkBlacklistProcess.insert(name);
859 }
860 // No longer modifying llkBlacklistProcess.
861 llkRunning = true;
862 llkLogConfig();
863 while (llkRunning) {
864 ::usleep(duration_cast<microseconds>(llkCheck(true)).count());
865 }
866 // NOTREACHED
867 LOG(INFO) << "exiting";
868 return nullptr;
869}
870
871} // namespace
872
873milliseconds llkCheck(bool checkRunning) {
874 if (!llkEnable || (checkRunning != llkRunning)) {
875 return milliseconds::max();
876 }
877
878 // Reset internal watchdog, which is a healthy engineering margin of
879 // double the maximum wait or cycle time for the mainloop that calls us.
880 //
881 // This alarm is effectively the live lock detection of llkd, as
882 // we understandably can not monitor ourselves otherwise.
883 ::alarm(duration_cast<seconds>(llkTimeoutMs * 2).count());
884
885 // kernel jiffy precision fastest acquisition
886 static timespec last;
887 timespec now;
888 ::clock_gettime(CLOCK_MONOTONIC_COARSE, &now);
889 auto ms = llkGetTimespecDiffMs(&last, &now);
890 if (ms < llkCycle) {
891 return llkCycle - ms;
892 }
893 last = now;
894
895 LOG(VERBOSE) << "opendir(\"" << procdir << "\")";
896 if (__predict_false(!llkTopDirectory)) {
897 // gid containing AID_READPROC required
898 llkTopDirectory.reset(procdir);
899 if (__predict_false(!llkTopDirectory)) {
900 // Most likely reason we could be here is a resource limit.
901 // Keep our processing down to a minimum, but not so low that
902 // we do not recover in a timely manner should the issue be
903 // transitory.
904 LOG(DEBUG) << "opendir(\"" << procdir << "\") failed";
905 return llkTimeoutMs;
906 }
907 }
908
909 for (auto& it : tids) {
910 it.second.updated = false;
911 }
912
913 auto prevUpdate = llkUpdate;
914 llkUpdate += ms;
915 ms -= llkCycle;
916 auto myPid = ::getpid();
917 auto myTid = ::gettid();
Mark Salyzynfbc3a752018-12-04 10:30:45 -0800918 auto dump = true;
Mark Salyzynf089e142018-02-20 10:47:40 -0800919 for (auto dp = llkTopDirectory.read(); dp != nullptr; dp = llkTopDirectory.read()) {
920 std::string piddir;
921
922 if (!getValidTidDir(dp, &piddir)) {
923 continue;
924 }
925
926 // Get the process tasks
927 std::string taskdir = piddir + "/task/";
928 int pid = -1;
929 LOG(VERBOSE) << "+opendir(\"" << taskdir << "\")";
930 dir taskDirectory(taskdir);
931 if (__predict_false(!taskDirectory)) {
932 LOG(DEBUG) << "+opendir(\"" << taskdir << "\") failed";
933 }
934 for (auto tp = taskDirectory.read(dir::task, dp); tp != nullptr;
935 tp = taskDirectory.read(dir::task)) {
936 if (!getValidTidDir(tp, &piddir)) {
937 continue;
938 }
939
940 // Get the process stat
941 std::string stat = ReadFile(piddir + "/stat");
942 if (stat.size() == 0) {
943 continue;
944 }
945 unsigned tid = -1;
946 char pdir[TASK_COMM_LEN + 1];
947 char state = '?';
948 unsigned ppid = -1;
949 unsigned utime = -1;
950 unsigned stime = -1;
951 int dummy;
952 pdir[0] = '\0';
953 // tid should not change value
954 auto match = ::sscanf(
955 stat.c_str(),
956 "%u (%" ___STRING(
957 TASK_COMM_LEN) "[^)]) %c %u %*d %*d %*d %*d %*d %*d %*d %*d %*d %u %u %d",
958 &tid, pdir, &state, &ppid, &utime, &stime, &dummy);
959 if (pid == -1) {
960 pid = tid;
961 }
962 LOG(VERBOSE) << "match " << match << ' ' << tid << " (" << pdir << ") " << state << ' '
963 << ppid << " ... " << utime << ' ' << stime << ' ' << dummy;
964 if (match != 7) {
965 continue;
966 }
967
968 auto procp = llkTidLookup(tid);
969 if (procp == nullptr) {
970 procp = llkTidAlloc(tid, pid, ppid, pdir, utime + stime, state);
971 } else {
972 // comm can change ...
973 procp->setComm(pdir);
974 procp->updated = true;
975 // pid/ppid/tid wrap?
976 if (((procp->update != prevUpdate) && (procp->update != llkUpdate)) ||
977 (procp->ppid != ppid) || (procp->pid != pid)) {
978 procp->reset();
979 } else if (procp->time != (utime + stime)) { // secondary ABA.
980 // watching utime+stime granularity jiffy
981 procp->state = '?';
982 }
983 procp->update = llkUpdate;
984 procp->pid = pid;
985 procp->ppid = ppid;
986 procp->time = utime + stime;
987 if (procp->state != state) {
988 procp->count = 0ms;
Mark Salyzynafd66f22018-03-19 15:16:29 -0700989 procp->killed = !llkTestWithKill;
Mark Salyzynf089e142018-02-20 10:47:40 -0800990 procp->state = state;
991 } else {
992 procp->count += llkCycle;
993 }
994 }
995
996 // Filter checks in intuitive order of CPU cost to evaluate
997 // If tid unique continue, if ppid or pid unique break
998
999 if (pid == myPid) {
1000 break;
1001 }
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001002#ifdef __PTRACE_ENABLED__
1003 // if no stack monitoring, we can quickly exit here
1004 if (!llkIsMonitorState(state) && llkCheckStackSymbols.empty()) {
Mark Salyzynf089e142018-02-20 10:47:40 -08001005 continue;
1006 }
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001007#else
1008 if (!llkIsMonitorState(state)) continue;
1009#endif
Mark Salyzynf089e142018-02-20 10:47:40 -08001010 if ((tid == myTid) || llkSkipPid(tid)) {
1011 continue;
1012 }
1013 if (llkSkipPpid(ppid)) {
1014 break;
1015 }
1016
1017 if (llkSkipName(procp->getComm())) {
1018 continue;
1019 }
1020 if (llkSkipName(procp->getCmdline())) {
1021 break;
1022 }
Mark Salyzyne81ede82018-10-22 15:52:32 -07001023 if (llkSkipName(android::base::Basename(procp->getCmdline()))) {
1024 break;
1025 }
Mark Salyzynf089e142018-02-20 10:47:40 -08001026
1027 auto pprocp = llkTidLookup(ppid);
1028 if (pprocp == nullptr) {
1029 pprocp = llkTidAlloc(ppid, ppid, 0, "", 0, '?');
1030 }
Mark Salyzyne81ede82018-10-22 15:52:32 -07001031 if ((pprocp != nullptr) &&
1032 (llkSkipName(pprocp->getComm(), llkBlacklistParent) ||
1033 llkSkipName(pprocp->getCmdline(), llkBlacklistParent) ||
1034 llkSkipName(android::base::Basename(pprocp->getCmdline()), llkBlacklistParent))) {
Mark Salyzynf089e142018-02-20 10:47:40 -08001035 break;
1036 }
1037
1038 if ((llkBlacklistUid.size() != 0) && llkSkipUid(procp->getUid())) {
1039 continue;
1040 }
1041
1042 // ABA mitigation watching last time schedule activity happened
1043 llkCheckSchedUpdate(procp, piddir);
1044
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001045#ifdef __PTRACE_ENABLED__
1046 auto stuck = llkCheckStack(procp, piddir);
1047 if (llkIsMonitorState(state)) {
1048 if (procp->count >= llkStateTimeoutMs[(state == 'Z') ? llkStateZ : llkStateD]) {
1049 stuck = true;
1050 } else if (procp->count != 0ms) {
1051 LOG(VERBOSE) << state << ' ' << llkFormat(procp->count) << ' ' << ppid << "->"
1052 << pid << "->" << tid << ' ' << procp->getComm();
1053 }
1054 }
1055 if (!stuck) continue;
1056#else
1057 if (procp->count >= llkStateTimeoutMs[(state == 'Z') ? llkStateZ : llkStateD]) {
1058 if (procp->count != 0ms) {
1059 LOG(VERBOSE) << state << ' ' << llkFormat(procp->count) << ' ' << ppid << "->"
1060 << pid << "->" << tid << ' ' << procp->getComm();
1061 }
Mark Salyzynf089e142018-02-20 10:47:40 -08001062 continue;
1063 }
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001064#endif
Mark Salyzynf089e142018-02-20 10:47:40 -08001065
1066 // We have to kill it to determine difference between live lock
1067 // and persistent state blocked on a resource. Is there something
1068 // wrong with a process that has no forward scheduling progress in
1069 // Z or D? Yes, generally means improper accounting in the
1070 // process, but not always ...
1071 //
1072 // Whomever we hit with a test kill must accept the Android
1073 // Aphorism that everything can be burned to the ground and
1074 // must survive.
1075 if (procp->killed == false) {
1076 procp->killed = true;
1077 // confirm: re-read uid before committing to a panic.
1078 procp->uid = -1;
1079 switch (state) {
1080 case 'Z': // kill ppid to free up a Zombie
1081 // Killing init will kernel panic without diagnostics
1082 // so skip right to controlled kernel panic with
1083 // diagnostics.
1084 if (ppid == initPid) {
1085 break;
1086 }
1087 LOG(WARNING) << "Z " << llkFormat(procp->count) << ' ' << ppid << "->"
1088 << pid << "->" << tid << ' ' << procp->getComm() << " [kill]";
1089 if ((llkKillOneProcess(pprocp, procp) >= 0) ||
1090 (llkKillOneProcess(ppid, procp) >= 0)) {
1091 continue;
1092 }
1093 break;
1094
1095 case 'D': // kill tid to free up an uninterruptible D
1096 // If ABA is doing its job, we would not need or
1097 // want the following. Test kill is a Hail Mary
1098 // to make absolutely sure there is no forward
1099 // scheduling progress. The cost when ABA is
1100 // not working is we kill a process that likes to
1101 // stay in 'D' state, instead of panicing the
1102 // kernel (worse).
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001103 default:
1104 LOG(WARNING) << state << ' ' << llkFormat(procp->count) << ' ' << pid
1105 << "->" << tid << ' ' << procp->getComm() << " [kill]";
Mark Salyzynf089e142018-02-20 10:47:40 -08001106 if ((llkKillOneProcess(llkTidLookup(pid), procp) >= 0) ||
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001107 (llkKillOneProcess(pid, state, tid) >= 0) ||
Mark Salyzynf089e142018-02-20 10:47:40 -08001108 (llkKillOneProcess(procp, procp) >= 0) ||
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001109 (llkKillOneProcess(tid, state, tid) >= 0)) {
Mark Salyzynf089e142018-02-20 10:47:40 -08001110 continue;
1111 }
1112 break;
1113 }
1114 }
1115 // We are here because we have confirmed kernel live-lock
Mark Salyzyn3c3b14d2018-10-31 10:38:15 -07001116 const auto message = state + " "s + llkFormat(procp->count) + " " +
1117 std::to_string(ppid) + "->" + std::to_string(pid) + "->" +
1118 std::to_string(tid) + " " + procp->getComm() + " [panic]";
Mark Salyzynfbc3a752018-12-04 10:30:45 -08001119 llkPanicKernel(dump, tid,
Mark Salyzyn3c3b14d2018-10-31 10:38:15 -07001120 (state == 'Z') ? "zombie" : (state == 'D') ? "driver" : "sleeping",
1121 message);
Mark Salyzynfbc3a752018-12-04 10:30:45 -08001122 dump = false;
Mark Salyzynf089e142018-02-20 10:47:40 -08001123 }
1124 LOG(VERBOSE) << "+closedir()";
1125 }
1126 llkTopDirectory.rewind();
1127 LOG(VERBOSE) << "closedir()";
1128
1129 // garbage collection of old process references
1130 for (auto p = tids.begin(); p != tids.end();) {
1131 if (!p->second.updated) {
1132 IF_ALOG(LOG_VERBOSE, LOG_TAG) {
1133 std::string ppidCmdline = llkProcGetName(p->second.ppid, nullptr, nullptr);
1134 if (ppidCmdline.size()) {
1135 ppidCmdline = "(" + ppidCmdline + ")";
1136 }
1137 std::string pidCmdline;
1138 if (p->second.pid != p->second.tid) {
1139 pidCmdline = llkProcGetName(p->second.pid, nullptr, p->second.getCmdline());
1140 if (pidCmdline.size()) {
1141 pidCmdline = "(" + pidCmdline + ")";
1142 }
1143 }
1144 std::string tidCmdline =
1145 llkProcGetName(p->second.tid, p->second.getComm(), p->second.getCmdline());
1146 if (tidCmdline.size()) {
1147 tidCmdline = "(" + tidCmdline + ")";
1148 }
1149 LOG(VERBOSE) << "thread " << p->second.ppid << ppidCmdline << "->" << p->second.pid
1150 << pidCmdline << "->" << p->second.tid << tidCmdline << " removed";
1151 }
1152 p = tids.erase(p);
1153 } else {
1154 ++p;
1155 }
1156 }
1157 if (__predict_false(tids.empty())) {
1158 llkTopDirectory.reset();
1159 }
1160
1161 llkCycle = llkCheckMs;
1162
1163 timespec end;
1164 ::clock_gettime(CLOCK_MONOTONIC_COARSE, &end);
1165 auto milli = llkGetTimespecDiffMs(&now, &end);
1166 LOG((milli > 10s) ? ERROR : (milli > 1s) ? WARNING : VERBOSE) << "sample " << llkFormat(milli);
1167
1168 // cap to minimum sleep for 1 second since last cycle
1169 if (llkCycle < (ms + 1s)) {
1170 return 1s;
1171 }
1172 return llkCycle - ms;
1173}
1174
1175unsigned llkCheckMilliseconds() {
1176 return duration_cast<milliseconds>(llkCheck()).count();
1177}
1178
Mark Salyzynbd7c8562018-10-31 10:02:08 -07001179bool llkCheckEng(const std::string& property) {
1180 return android::base::GetProperty(property, "eng") == "eng";
1181}
1182
Mark Salyzynf089e142018-02-20 10:47:40 -08001183bool llkInit(const char* threadname) {
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001184 auto debuggable = android::base::GetBoolProperty("ro.debuggable", false);
Mark Salyzynf089e142018-02-20 10:47:40 -08001185 llkLowRam = android::base::GetBoolProperty("ro.config.low_ram", false);
Mark Salyzynbd7c8562018-10-31 10:02:08 -07001186 llkEnableSysrqT &= !llkLowRam;
1187 if (debuggable) {
1188 llkEnableSysrqT |= llkCheckEng(LLK_ENABLE_SYSRQ_T_PROPERTY);
1189 if (!LLK_ENABLE_DEFAULT) { // NB: default is currently true ...
1190 llkEnable |= llkCheckEng(LLK_ENABLE_PROPERTY);
1191 khtEnable |= llkCheckEng(KHT_ENABLE_PROPERTY);
1192 }
Mark Salyzynd035dbb2018-03-26 08:23:00 -07001193 }
Mark Salyzynbd7c8562018-10-31 10:02:08 -07001194 llkEnableSysrqT = android::base::GetBoolProperty(LLK_ENABLE_SYSRQ_T_PROPERTY, llkEnableSysrqT);
Mark Salyzynf089e142018-02-20 10:47:40 -08001195 llkEnable = android::base::GetBoolProperty(LLK_ENABLE_PROPERTY, llkEnable);
1196 if (llkEnable && !llkTopDirectory.reset(procdir)) {
1197 // Most likely reason we could be here is llkd was started
1198 // incorrectly without the readproc permissions. Keep our
1199 // processing down to a minimum.
1200 llkEnable = false;
1201 }
1202 khtEnable = android::base::GetBoolProperty(KHT_ENABLE_PROPERTY, khtEnable);
1203 llkMlockall = android::base::GetBoolProperty(LLK_MLOCKALL_PROPERTY, llkMlockall);
Mark Salyzynafd66f22018-03-19 15:16:29 -07001204 llkTestWithKill = android::base::GetBoolProperty(LLK_KILLTEST_PROPERTY, llkTestWithKill);
Mark Salyzynf089e142018-02-20 10:47:40 -08001205 // if LLK_TIMOUT_MS_PROPERTY was not set, we will use a set
1206 // KHT_TIMEOUT_PROPERTY as co-operative guidance for the default value.
1207 khtTimeout = GetUintProperty(KHT_TIMEOUT_PROPERTY, khtTimeout);
1208 if (khtTimeout == 0s) {
1209 khtTimeout = duration_cast<seconds>(llkTimeoutMs * (1 + LLK_CHECKS_PER_TIMEOUT_DEFAULT) /
1210 LLK_CHECKS_PER_TIMEOUT_DEFAULT);
1211 }
1212 llkTimeoutMs =
1213 khtTimeout * LLK_CHECKS_PER_TIMEOUT_DEFAULT / (1 + LLK_CHECKS_PER_TIMEOUT_DEFAULT);
1214 llkTimeoutMs = GetUintProperty(LLK_TIMEOUT_MS_PROPERTY, llkTimeoutMs);
1215 llkValidate(); // validate llkTimeoutMs, llkCheckMs and llkCycle
1216 llkStateTimeoutMs[llkStateD] = GetUintProperty(LLK_D_TIMEOUT_MS_PROPERTY, llkTimeoutMs);
1217 llkStateTimeoutMs[llkStateZ] = GetUintProperty(LLK_Z_TIMEOUT_MS_PROPERTY, llkTimeoutMs);
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001218#ifdef __PTRACE_ENABLED__
1219 llkStateTimeoutMs[llkStateStack] = GetUintProperty(LLK_STACK_TIMEOUT_MS_PROPERTY, llkTimeoutMs);
1220#endif
Mark Salyzynf089e142018-02-20 10:47:40 -08001221 llkCheckMs = GetUintProperty(LLK_CHECK_MS_PROPERTY, llkCheckMs);
1222 llkValidate(); // validate all (effectively minus llkTimeoutMs)
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001223#ifdef __PTRACE_ENABLED__
1224 if (debuggable) {
1225 llkCheckStackSymbols = llkSplit(
1226 android::base::GetProperty(LLK_CHECK_STACK_PROPERTY, LLK_CHECK_STACK_DEFAULT));
1227 }
1228 std::string defaultBlacklistStack(LLK_BLACKLIST_STACK_DEFAULT);
1229 if (!debuggable) defaultBlacklistStack += ",logd,/system/bin/logd";
1230 llkBlacklistStack = llkSplit(
1231 android::base::GetProperty(LLK_BLACKLIST_STACK_PROPERTY, defaultBlacklistStack));
1232#endif
Mark Salyzynf089e142018-02-20 10:47:40 -08001233 std::string defaultBlacklistProcess(
1234 std::to_string(kernelPid) + "," + std::to_string(initPid) + "," +
1235 std::to_string(kthreaddPid) + "," + std::to_string(::getpid()) + "," +
1236 std::to_string(::gettid()) + "," LLK_BLACKLIST_PROCESS_DEFAULT);
1237 if (threadname) {
Mark Salyzyn52e54a62018-08-07 08:13:13 -07001238 defaultBlacklistProcess += ","s + threadname;
Mark Salyzynf089e142018-02-20 10:47:40 -08001239 }
1240 for (int cpu = 1; cpu < get_nprocs_conf(); ++cpu) {
1241 defaultBlacklistProcess += ",[watchdog/" + std::to_string(cpu) + "]";
1242 }
1243 defaultBlacklistProcess =
1244 android::base::GetProperty(LLK_BLACKLIST_PROCESS_PROPERTY, defaultBlacklistProcess);
1245 llkBlacklistProcess = llkSplit(defaultBlacklistProcess);
1246 if (!llkSkipName("[khungtaskd]")) { // ALWAYS ignore as special
1247 llkBlacklistProcess.emplace("[khungtaskd]");
1248 }
1249 llkBlacklistParent = llkSplit(android::base::GetProperty(
1250 LLK_BLACKLIST_PARENT_PROPERTY, std::to_string(kernelPid) + "," + std::to_string(kthreaddPid) +
1251 "," LLK_BLACKLIST_PARENT_DEFAULT));
1252 llkBlacklistUid =
1253 llkSplit(android::base::GetProperty(LLK_BLACKLIST_UID_PROPERTY, LLK_BLACKLIST_UID_DEFAULT));
1254
1255 // internal watchdog
1256 ::signal(SIGALRM, llkAlarmHandler);
1257
1258 // kernel hung task configuration? Otherwise leave it as-is
1259 if (khtEnable) {
1260 // EUID must be AID_ROOT to write to /proc/sys/kernel/ nodes, there
1261 // are no capability overrides. For security reasons we do not want
1262 // to run as AID_ROOT. We may not be able to write them successfully,
1263 // we will try, but the least we can do is read the values back to
1264 // confirm expectations and report whether configured or not.
1265 auto configured = llkWriteStringToFileConfirm(std::to_string(khtTimeout.count()),
1266 "/proc/sys/kernel/hung_task_timeout_secs");
1267 if (configured) {
1268 llkWriteStringToFile("65535", "/proc/sys/kernel/hung_task_warnings");
1269 llkWriteStringToFile("65535", "/proc/sys/kernel/hung_task_check_count");
1270 configured = llkWriteStringToFileConfirm("1", "/proc/sys/kernel/hung_task_panic");
1271 }
1272 if (configured) {
1273 LOG(INFO) << "[khungtaskd] configured";
1274 } else {
1275 LOG(WARNING) << "[khungtaskd] not configurable";
1276 }
1277 }
1278
1279 bool logConfig = true;
1280 if (llkEnable) {
1281 if (llkMlockall &&
1282 // MCL_ONFAULT pins pages as they fault instead of loading
1283 // everything immediately all at once. (Which would be bad,
1284 // because as of this writing, we have a lot of mapped pages we
1285 // never use.) Old kernels will see MCL_ONFAULT and fail with
1286 // EINVAL; we ignore this failure.
1287 //
1288 // N.B. read the man page for mlockall. MCL_CURRENT | MCL_ONFAULT
1289 // pins ⊆ MCL_CURRENT, converging to just MCL_CURRENT as we fault
1290 // in pages.
1291
1292 // CAP_IPC_LOCK required
1293 mlockall(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT) && (errno != EINVAL)) {
1294 PLOG(WARNING) << "mlockall failed ";
1295 }
1296
1297 if (threadname) {
1298 pthread_attr_t attr;
1299
1300 if (!pthread_attr_init(&attr)) {
1301 sched_param param;
1302
1303 memset(&param, 0, sizeof(param));
1304 pthread_attr_setschedparam(&attr, &param);
1305 pthread_attr_setschedpolicy(&attr, SCHED_BATCH);
1306 if (!pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED)) {
1307 pthread_t thread;
1308 if (!pthread_create(&thread, &attr, llkThread, const_cast<char*>(threadname))) {
1309 // wait a second for thread to start
1310 for (auto retry = 50; retry && !llkRunning; --retry) {
1311 ::usleep(20000);
1312 }
1313 logConfig = !llkRunning; // printed in llkd context?
1314 } else {
1315 LOG(ERROR) << "failed to spawn llkd thread";
1316 }
1317 } else {
1318 LOG(ERROR) << "failed to detach llkd thread";
1319 }
1320 pthread_attr_destroy(&attr);
1321 } else {
1322 LOG(ERROR) << "failed to allocate attibutes for llkd thread";
1323 }
1324 }
1325 } else {
1326 LOG(DEBUG) << "[khungtaskd] left unconfigured";
1327 }
1328 if (logConfig) {
1329 llkLogConfig();
1330 }
1331
1332 return llkEnable;
1333}