blob: 7f47fc964b63f605d7a66590665ebeed948e9712 [file] [log] [blame]
Mark Salyzynf089e142018-02-20 10:47:40 -08001/*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "llkd.h"
18
19#include <ctype.h>
20#include <dirent.h> // opendir() and readdir()
21#include <errno.h>
22#include <fcntl.h>
23#include <pthread.h>
24#include <pwd.h> // getpwuid()
25#include <signal.h>
26#include <stdint.h>
27#include <sys/cdefs.h> // ___STRING, __predict_true() and _predict_false()
28#include <sys/mman.h> // mlockall()
29#include <sys/prctl.h>
30#include <sys/stat.h> // lstat()
31#include <sys/syscall.h> // __NR_getdents64
32#include <sys/sysinfo.h> // get_nprocs_conf()
33#include <sys/types.h>
34#include <time.h>
35#include <unistd.h>
36
37#include <chrono>
38#include <ios>
39#include <sstream>
40#include <string>
41#include <unordered_map>
42#include <unordered_set>
43
44#include <android-base/file.h>
45#include <android-base/logging.h>
46#include <android-base/parseint.h>
47#include <android-base/properties.h>
48#include <android-base/strings.h>
49#include <cutils/android_get_control_file.h>
50#include <log/log_main.h>
51
52#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
53
54#define TASK_COMM_LEN 16 // internal kernel, not uapi, from .../linux/include/linux/sched.h
55
56using namespace std::chrono_literals;
57using namespace std::chrono;
Mark Salyzyn52e54a62018-08-07 08:13:13 -070058using namespace std::literals;
Mark Salyzynf089e142018-02-20 10:47:40 -080059
60namespace {
61
62constexpr pid_t kernelPid = 0;
63constexpr pid_t initPid = 1;
64constexpr pid_t kthreaddPid = 2;
65
66constexpr char procdir[] = "/proc/";
67
68// Configuration
69milliseconds llkUpdate; // last check ms signature
70milliseconds llkCycle; // ms to next thread check
71bool llkEnable = LLK_ENABLE_DEFAULT; // llk daemon enabled
72bool llkRunning = false; // thread is running
73bool llkMlockall = LLK_MLOCKALL_DEFAULT; // run mlocked
Mark Salyzynafd66f22018-03-19 15:16:29 -070074bool llkTestWithKill = LLK_KILLTEST_DEFAULT; // issue test kills
Mark Salyzynf089e142018-02-20 10:47:40 -080075milliseconds llkTimeoutMs = LLK_TIMEOUT_MS_DEFAULT; // default timeout
Mark Salyzyn96505fa2018-08-07 08:13:13 -070076enum { // enum of state indexes
77 llkStateD, // Persistent 'D' state
78 llkStateZ, // Persistent 'Z' state
79#ifdef __PTRACE_ENABLED__ // Extra privileged states
80 llkStateStack, // stack signature
81#endif // End of extra privilege
82 llkNumStates, // Maxumum number of states
83}; // state indexes
Mark Salyzynf089e142018-02-20 10:47:40 -080084milliseconds llkStateTimeoutMs[llkNumStates]; // timeout override for each detection state
85milliseconds llkCheckMs; // checking interval to inspect any
86 // persistent live-locked states
87bool llkLowRam; // ro.config.low_ram
Mark Salyzynbd7c8562018-10-31 10:02:08 -070088bool llkEnableSysrqT = LLK_ENABLE_SYSRQ_T_DEFAULT; // sysrq stack trace dump
Mark Salyzynf089e142018-02-20 10:47:40 -080089bool khtEnable = LLK_ENABLE_DEFAULT; // [khungtaskd] panic
90// [khungtaskd] should have a timeout beyond the granularity of llkTimeoutMs.
91// Provides a wide angle of margin b/c khtTimeout is also its granularity.
92seconds khtTimeout = duration_cast<seconds>(llkTimeoutMs * (1 + LLK_CHECKS_PER_TIMEOUT_DEFAULT) /
93 LLK_CHECKS_PER_TIMEOUT_DEFAULT);
Mark Salyzyn96505fa2018-08-07 08:13:13 -070094#ifdef __PTRACE_ENABLED__
95// list of stack symbols to search for persistence.
96std::unordered_set<std::string> llkCheckStackSymbols;
97#endif
Mark Salyzynf089e142018-02-20 10:47:40 -080098
99// Blacklist variables, initialized with comma separated lists of high false
100// positive and/or dangerous references, e.g. without self restart, for pid,
101// ppid, name and uid:
102
103// list of pids, or tids or names to skip. kernel pid (0), init pid (1),
104// [kthreadd] pid (2), ourselves, "init", "[kthreadd]", "lmkd", "llkd" or
105// combinations of watchdogd in kernel and user space.
106std::unordered_set<std::string> llkBlacklistProcess;
107// list of parent pids, comm or cmdline names to skip. default:
108// kernel pid (0), [kthreadd] (2), or ourselves, enforced and implied
109std::unordered_set<std::string> llkBlacklistParent;
110// list of uids, and uid names, to skip, default nothing
111std::unordered_set<std::string> llkBlacklistUid;
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700112#ifdef __PTRACE_ENABLED__
113// list of names to skip stack checking. "init", "lmkd", "llkd", "keystore" or
114// "logd" (if not userdebug).
115std::unordered_set<std::string> llkBlacklistStack;
116#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800117
118class dir {
119 public:
120 enum level { proc, task, numLevels };
121
122 private:
123 int fd;
124 size_t available_bytes;
125 dirent* next;
126 // each directory level picked to be just north of 4K in size
127 static constexpr size_t buffEntries = 15;
128 static dirent buff[numLevels][buffEntries];
129
130 bool fill(enum level index) {
131 if (index >= numLevels) return false;
132 if (available_bytes != 0) return true;
133 if (__predict_false(fd < 0)) return false;
134 // getdents64 has no libc wrapper
135 auto rc = TEMP_FAILURE_RETRY(syscall(__NR_getdents64, fd, buff[index], sizeof(buff[0]), 0));
136 if (rc <= 0) return false;
137 available_bytes = rc;
138 next = buff[index];
139 return true;
140 }
141
142 public:
143 dir() : fd(-1), available_bytes(0), next(nullptr) {}
144
145 explicit dir(const char* directory)
146 : fd(__predict_true(directory != nullptr)
147 ? ::open(directory, O_CLOEXEC | O_DIRECTORY | O_RDONLY)
148 : -1),
149 available_bytes(0),
150 next(nullptr) {}
151
152 explicit dir(const std::string&& directory)
153 : fd(::open(directory.c_str(), O_CLOEXEC | O_DIRECTORY | O_RDONLY)),
154 available_bytes(0),
155 next(nullptr) {}
156
157 explicit dir(const std::string& directory)
158 : fd(::open(directory.c_str(), O_CLOEXEC | O_DIRECTORY | O_RDONLY)),
159 available_bytes(0),
160 next(nullptr) {}
161
162 // Don't need any copy or move constructors.
163 explicit dir(const dir& c) = delete;
164 explicit dir(dir& c) = delete;
165 explicit dir(dir&& c) = delete;
166
167 ~dir() {
168 if (fd >= 0) {
169 ::close(fd);
170 }
171 }
172
173 operator bool() const { return fd >= 0; }
174
175 void reset(void) {
176 if (fd >= 0) {
177 ::close(fd);
178 fd = -1;
179 available_bytes = 0;
180 next = nullptr;
181 }
182 }
183
184 dir& reset(const char* directory) {
185 reset();
186 // available_bytes will _always_ be zero here as its value is
187 // intimately tied to fd < 0 or not.
188 fd = ::open(directory, O_CLOEXEC | O_DIRECTORY | O_RDONLY);
189 return *this;
190 }
191
192 void rewind(void) {
193 if (fd >= 0) {
194 ::lseek(fd, off_t(0), SEEK_SET);
195 available_bytes = 0;
196 next = nullptr;
197 }
198 }
199
200 dirent* read(enum level index = proc, dirent* def = nullptr) {
201 if (!fill(index)) return def;
202 auto ret = next;
203 available_bytes -= next->d_reclen;
204 next = reinterpret_cast<dirent*>(reinterpret_cast<char*>(next) + next->d_reclen);
205 return ret;
206 }
207} llkTopDirectory;
208
209dirent dir::buff[dir::numLevels][dir::buffEntries];
210
211// helper functions
212
213bool llkIsMissingExeLink(pid_t tid) {
214 char c;
215 // CAP_SYS_PTRACE is required to prevent ret == -1, but ENOENT is signal
216 auto ret = ::readlink((procdir + std::to_string(tid) + "/exe").c_str(), &c, sizeof(c));
217 return (ret == -1) && (errno == ENOENT);
218}
219
220// Common routine where caller accepts empty content as error/passthrough.
221// Reduces the churn of reporting read errors in the callers.
222std::string ReadFile(std::string&& path) {
223 std::string content;
224 if (!android::base::ReadFileToString(path, &content)) {
225 PLOG(DEBUG) << "Read " << path << " failed";
226 content = "";
227 }
228 return content;
229}
230
231std::string llkProcGetName(pid_t tid, const char* node = "/cmdline") {
232 std::string content = ReadFile(procdir + std::to_string(tid) + node);
233 static constexpr char needles[] = " \t\r\n"; // including trailing nul
234 auto pos = content.find_first_of(needles, 0, sizeof(needles));
235 if (pos != std::string::npos) {
236 content.erase(pos);
237 }
238 return content;
239}
240
241uid_t llkProcGetUid(pid_t tid) {
242 // Get the process' uid. The following read from /status is admittedly
243 // racy, prone to corruption due to shape-changes. The consequences are
244 // not catastrophic as we sample a few times before taking action.
245 //
246 // If /loginuid worked on reliably, or on Android (all tasks report -1)...
247 // Android lmkd causes /cgroup to contain memory:/<dom>/uid_<uid>/pid_<pid>
248 // which is tighter, but also not reliable.
249 std::string content = ReadFile(procdir + std::to_string(tid) + "/status");
250 static constexpr char Uid[] = "\nUid:";
251 auto pos = content.find(Uid);
252 if (pos == std::string::npos) {
253 return -1;
254 }
255 pos += ::strlen(Uid);
256 while ((pos < content.size()) && ::isblank(content[pos])) {
257 ++pos;
258 }
259 content.erase(0, pos);
260 for (pos = 0; (pos < content.size()) && ::isdigit(content[pos]); ++pos) {
261 ;
262 }
263 // Content of form 'Uid: 0 0 0 0', newline is error
264 if ((pos >= content.size()) || !::isblank(content[pos])) {
265 return -1;
266 }
267 content.erase(pos);
268 uid_t ret;
Tom Cherrye0bc5a92018-10-05 14:29:47 -0700269 if (!android::base::ParseUint(content, &ret, uid_t(0))) {
Mark Salyzynf089e142018-02-20 10:47:40 -0800270 return -1;
271 }
272 return ret;
273}
274
275struct proc {
276 pid_t tid; // monitored thread id (in Z or D state).
277 nanoseconds schedUpdate; // /proc/<tid>/sched "se.avg.lastUpdateTime",
278 uint64_t nrSwitches; // /proc/<tid>/sched "nr_switches" for
279 // refined ABA problem detection, determine
280 // forward scheduling progress.
281 milliseconds update; // llkUpdate millisecond signature of last.
282 milliseconds count; // duration in state.
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700283#ifdef __PTRACE_ENABLED__ // Privileged state checking
284 milliseconds count_stack; // duration where stack is stagnant.
285#endif // End privilege
Mark Salyzynf089e142018-02-20 10:47:40 -0800286 pid_t pid; // /proc/<pid> before iterating through
287 // /proc/<pid>/task/<tid> for threads.
288 pid_t ppid; // /proc/<tid>/stat field 4 parent pid.
289 uid_t uid; // /proc/<tid>/status Uid: field.
290 unsigned time; // sum of /proc/<tid>/stat field 14 utime &
291 // 15 stime for coarse ABA problem detection.
292 std::string cmdline; // cached /cmdline content
293 char state; // /proc/<tid>/stat field 3: Z or D
294 // (others we do not monitor: S, R, T or ?)
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700295#ifdef __PTRACE_ENABLED__ // Privileged state checking
296 char stack; // index in llkCheckStackSymbols for matches
297#endif // and with maximum index PROP_VALUE_MAX/2.
Mark Salyzynf089e142018-02-20 10:47:40 -0800298 char comm[TASK_COMM_LEN + 3]; // space for adding '[' and ']'
299 bool exeMissingValid; // exeMissing has been cached
300 bool cmdlineValid; // cmdline has been cached
301 bool updated; // cleared before monitoring pass.
302 bool killed; // sent a kill to this thread, next panic...
303
304 void setComm(const char* _comm) { strncpy(comm + 1, _comm, sizeof(comm) - 2); }
305
306 proc(pid_t tid, pid_t pid, pid_t ppid, const char* _comm, int time, char state)
307 : tid(tid),
308 schedUpdate(0),
309 nrSwitches(0),
310 update(llkUpdate),
Mark Salyzynacecaf72018-08-10 08:15:57 -0700311 count(0ms),
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700312#ifdef __PTRACE_ENABLED__
313 count_stack(0ms),
314#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800315 pid(pid),
316 ppid(ppid),
317 uid(-1),
318 time(time),
319 state(state),
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700320#ifdef __PTRACE_ENABLED__
321 stack(-1),
322#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800323 exeMissingValid(false),
324 cmdlineValid(false),
325 updated(true),
Mark Salyzynafd66f22018-03-19 15:16:29 -0700326 killed(!llkTestWithKill) {
Mark Salyzynf089e142018-02-20 10:47:40 -0800327 memset(comm, '\0', sizeof(comm));
328 setComm(_comm);
329 }
330
331 const char* getComm(void) {
332 if (comm[1] == '\0') { // comm Valid?
333 strncpy(comm + 1, llkProcGetName(tid, "/comm").c_str(), sizeof(comm) - 2);
334 }
335 if (!exeMissingValid) {
336 if (llkIsMissingExeLink(tid)) {
337 comm[0] = '[';
338 }
339 exeMissingValid = true;
340 }
341 size_t len = strlen(comm + 1);
342 if (__predict_true(len < (sizeof(comm) - 1))) {
343 if (comm[0] == '[') {
344 if ((comm[len] != ']') && __predict_true(len < (sizeof(comm) - 2))) {
345 comm[++len] = ']';
346 comm[++len] = '\0';
347 }
348 } else {
349 if (comm[len] == ']') {
350 comm[len] = '\0';
351 }
352 }
353 }
354 return &comm[comm[0] != '['];
355 }
356
357 const char* getCmdline(void) {
358 if (!cmdlineValid) {
359 cmdline = llkProcGetName(tid);
360 cmdlineValid = true;
361 }
362 return cmdline.c_str();
363 }
364
365 uid_t getUid(void) {
366 if (uid <= 0) { // Churn on root user, because most likely to setuid()
367 uid = llkProcGetUid(tid);
368 }
369 return uid;
370 }
371
372 void reset(void) { // reset cache, if we detected pid rollover
373 uid = -1;
374 state = '?';
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700375#ifdef __PTRACE_ENABLED__
376 count_stack = 0ms;
377 stack = -1;
378#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800379 cmdline = "";
380 comm[0] = '\0';
381 exeMissingValid = false;
382 cmdlineValid = false;
383 }
384};
385
386std::unordered_map<pid_t, proc> tids;
387
388// Check range and setup defaults, in order of propagation:
389// llkTimeoutMs
390// llkCheckMs
391// ...
392// KISS to keep it all self-contained, and called multiple times as parameters
393// are interpreted so that defaults, llkCheckMs and llkCycle make sense.
394void llkValidate() {
395 if (llkTimeoutMs == 0ms) {
396 llkTimeoutMs = LLK_TIMEOUT_MS_DEFAULT;
397 }
398 llkTimeoutMs = std::max(llkTimeoutMs, LLK_TIMEOUT_MS_MINIMUM);
399 if (llkCheckMs == 0ms) {
400 llkCheckMs = llkTimeoutMs / LLK_CHECKS_PER_TIMEOUT_DEFAULT;
401 }
402 llkCheckMs = std::min(llkCheckMs, llkTimeoutMs);
403
404 for (size_t state = 0; state < ARRAY_SIZE(llkStateTimeoutMs); ++state) {
405 if (llkStateTimeoutMs[state] == 0ms) {
406 llkStateTimeoutMs[state] = llkTimeoutMs;
407 }
408 llkStateTimeoutMs[state] =
409 std::min(std::max(llkStateTimeoutMs[state], LLK_TIMEOUT_MS_MINIMUM), llkTimeoutMs);
410 llkCheckMs = std::min(llkCheckMs, llkStateTimeoutMs[state]);
411 }
412
413 llkCheckMs = std::max(llkCheckMs, LLK_CHECK_MS_MINIMUM);
414 if (llkCycle == 0ms) {
415 llkCycle = llkCheckMs;
416 }
417 llkCycle = std::min(llkCycle, llkCheckMs);
418}
419
420milliseconds llkGetTimespecDiffMs(timespec* from, timespec* to) {
421 return duration_cast<milliseconds>(seconds(to->tv_sec - from->tv_sec)) +
422 duration_cast<milliseconds>(nanoseconds(to->tv_nsec - from->tv_nsec));
423}
424
425std::string llkProcGetName(pid_t tid, const char* comm, const char* cmdline) {
426 if ((cmdline != nullptr) && (*cmdline != '\0')) {
427 return cmdline;
428 }
429 if ((comm != nullptr) && (*comm != '\0')) {
430 return comm;
431 }
432
433 // UNLIKELY! Here because killed before we kill it?
434 // Assume change is afoot, do not call llkTidAlloc
435
436 // cmdline ?
437 std::string content = llkProcGetName(tid);
438 if (content.size() != 0) {
439 return content;
440 }
441 // Comm instead?
442 content = llkProcGetName(tid, "/comm");
443 if (llkIsMissingExeLink(tid) && (content.size() != 0)) {
444 return '[' + content + ']';
445 }
446 return content;
447}
448
449int llkKillOneProcess(pid_t pid, char state, pid_t tid, const char* tcomm = nullptr,
450 const char* tcmdline = nullptr, const char* pcomm = nullptr,
451 const char* pcmdline = nullptr) {
452 std::string forTid;
453 if (tid != pid) {
454 forTid = " for '" + llkProcGetName(tid, tcomm, tcmdline) + "' (" + std::to_string(tid) + ")";
455 }
456 LOG(INFO) << "Killing '" << llkProcGetName(pid, pcomm, pcmdline) << "' (" << pid
457 << ") to check forward scheduling progress in " << state << " state" << forTid;
458 // CAP_KILL required
459 errno = 0;
460 auto r = ::kill(pid, SIGKILL);
461 if (r) {
462 PLOG(ERROR) << "kill(" << pid << ")=" << r << ' ';
463 }
464
465 return r;
466}
467
468// Kill one process
469int llkKillOneProcess(pid_t pid, proc* tprocp) {
470 return llkKillOneProcess(pid, tprocp->state, tprocp->tid, tprocp->getComm(),
471 tprocp->getCmdline());
472}
473
474// Kill one process specified by kprocp
475int llkKillOneProcess(proc* kprocp, proc* tprocp) {
476 if (kprocp == nullptr) {
477 return -2;
478 }
479
480 return llkKillOneProcess(kprocp->tid, tprocp->state, tprocp->tid, tprocp->getComm(),
481 tprocp->getCmdline(), kprocp->getComm(), kprocp->getCmdline());
482}
483
484// Acquire file descriptor from environment, or open and cache it.
485// NB: cache is unnecessary in our current context, pedantically
486// required to prevent leakage of file descriptors in the future.
487int llkFileToWriteFd(const std::string& file) {
488 static std::unordered_map<std::string, int> cache;
489 auto search = cache.find(file);
490 if (search != cache.end()) return search->second;
491 auto fd = android_get_control_file(file.c_str());
492 if (fd >= 0) return fd;
493 fd = TEMP_FAILURE_RETRY(::open(file.c_str(), O_WRONLY | O_CLOEXEC));
494 if (fd >= 0) cache.emplace(std::make_pair(file, fd));
495 return fd;
496}
497
498// Wrap android::base::WriteStringToFile to use android_get_control_file.
499bool llkWriteStringToFile(const std::string& string, const std::string& file) {
500 auto fd = llkFileToWriteFd(file);
501 if (fd < 0) return false;
502 return android::base::WriteStringToFd(string, fd);
503}
504
505bool llkWriteStringToFileConfirm(const std::string& string, const std::string& file) {
506 auto fd = llkFileToWriteFd(file);
507 auto ret = (fd < 0) ? false : android::base::WriteStringToFd(string, fd);
508 std::string content;
509 if (!android::base::ReadFileToString(file, &content)) return ret;
510 return android::base::Trim(content) == string;
511}
512
Mark Salyzynfbc3a752018-12-04 10:30:45 -0800513void llkPanicKernel(bool dump, pid_t tid, const char* state, const std::string& message = "") {
Mark Salyzyn3c3b14d2018-10-31 10:38:15 -0700514 if (!message.empty()) LOG(ERROR) << message;
Mark Salyzynf089e142018-02-20 10:47:40 -0800515 auto sysrqTriggerFd = llkFileToWriteFd("/proc/sysrq-trigger");
516 if (sysrqTriggerFd < 0) {
517 // DYB
518 llkKillOneProcess(initPid, 'R', tid);
519 // The answer to life, the universe and everything
520 ::exit(42);
521 // NOTREACHED
Mark Salyzynfbc3a752018-12-04 10:30:45 -0800522 return;
Mark Salyzynf089e142018-02-20 10:47:40 -0800523 }
524 ::sync();
525 if (dump) {
526 // Show all locks that are held
527 android::base::WriteStringToFd("d", sysrqTriggerFd);
Mark Salyzyn53e782d2018-10-31 16:03:45 -0700528 // Show all waiting tasks
529 android::base::WriteStringToFd("w", sysrqTriggerFd);
Mark Salyzynf089e142018-02-20 10:47:40 -0800530 // This can trigger hardware watchdog, that is somewhat _ok_.
531 // But useless if pstore configured for <256KB, low ram devices ...
Mark Salyzynbd7c8562018-10-31 10:02:08 -0700532 if (llkEnableSysrqT) {
Mark Salyzynf089e142018-02-20 10:47:40 -0800533 android::base::WriteStringToFd("t", sysrqTriggerFd);
Mark Salyzyn53e782d2018-10-31 16:03:45 -0700534 // Show all locks that are held (in case 't' overflows ramoops)
535 android::base::WriteStringToFd("d", sysrqTriggerFd);
536 // Show all waiting tasks (in case 't' overflows ramoops)
537 android::base::WriteStringToFd("w", sysrqTriggerFd);
Mark Salyzynf089e142018-02-20 10:47:40 -0800538 }
539 ::usleep(200000); // let everything settle
540 }
Mark Salyzyn3c3b14d2018-10-31 10:38:15 -0700541 // SysRq message matches kernel format, and propagates through bootstat
542 // ultimately to the boot reason into panic,livelock,<state>.
543 llkWriteStringToFile(message + (message.empty() ? "" : "\n") +
544 "SysRq : Trigger a crash : 'livelock,"s + state + "'\n",
545 "/dev/kmsg");
Mark Salyzynfbc3a752018-12-04 10:30:45 -0800546 // Because panic is such a serious thing to do, let us
547 // make sure that the tid being inspected still exists!
548 auto piddir = procdir + std::to_string(tid) + "/stat";
549 if (access(piddir.c_str(), F_OK) != 0) {
550 PLOG(WARNING) << piddir;
551 return;
552 }
Mark Salyzynf089e142018-02-20 10:47:40 -0800553 android::base::WriteStringToFd("c", sysrqTriggerFd);
554 // NOTREACHED
555 // DYB
556 llkKillOneProcess(initPid, 'R', tid);
557 // I sat at my desk, stared into the garden and thought '42 will do'.
558 // I typed it out. End of story
559 ::exit(42);
560 // NOTREACHED
561}
562
563void llkAlarmHandler(int) {
Mark Salyzynb3418a22018-11-19 15:24:03 -0800564 LOG(FATAL) << "alarm";
565 // NOTREACHED
566 llkPanicKernel(true, ::getpid(), "alarm");
Mark Salyzynf089e142018-02-20 10:47:40 -0800567}
568
569milliseconds GetUintProperty(const std::string& key, milliseconds def) {
570 return milliseconds(android::base::GetUintProperty(key, static_cast<uint64_t>(def.count()),
571 static_cast<uint64_t>(def.max().count())));
572}
573
574seconds GetUintProperty(const std::string& key, seconds def) {
575 return seconds(android::base::GetUintProperty(key, static_cast<uint64_t>(def.count()),
576 static_cast<uint64_t>(def.max().count())));
577}
578
579proc* llkTidLookup(pid_t tid) {
580 auto search = tids.find(tid);
581 if (search == tids.end()) {
582 return nullptr;
583 }
584 return &search->second;
585}
586
587void llkTidRemove(pid_t tid) {
588 tids.erase(tid);
589}
590
591proc* llkTidAlloc(pid_t tid, pid_t pid, pid_t ppid, const char* comm, int time, char state) {
592 auto it = tids.emplace(std::make_pair(tid, proc(tid, pid, ppid, comm, time, state)));
593 return &it.first->second;
594}
595
596std::string llkFormat(milliseconds ms) {
597 auto sec = duration_cast<seconds>(ms);
598 std::ostringstream s;
599 s << sec.count() << '.';
600 auto f = s.fill('0');
601 auto w = s.width(3);
602 s << std::right << (ms - sec).count();
603 s.width(w);
604 s.fill(f);
605 s << 's';
606 return s.str();
607}
608
609std::string llkFormat(seconds s) {
610 return std::to_string(s.count()) + 's';
611}
612
613std::string llkFormat(bool flag) {
614 return flag ? "true" : "false";
615}
616
617std::string llkFormat(const std::unordered_set<std::string>& blacklist) {
618 std::string ret;
Chih-Hung Hsieh1b7b7972018-12-11 10:34:33 -0800619 for (const auto& entry : blacklist) {
Mark Salyzynf089e142018-02-20 10:47:40 -0800620 if (ret.size()) {
621 ret += ",";
622 }
623 ret += entry;
624 }
625 return ret;
626}
627
628// We only officially support comma separators, but wetware being what they
629// are will take some liberty and I do not believe they should be punished.
Mark Salyzynacecaf72018-08-10 08:15:57 -0700630std::unordered_set<std::string> llkSplit(const std::string& s) {
Mark Salyzynf089e142018-02-20 10:47:40 -0800631 std::unordered_set<std::string> result;
632
Mark Salyzynacecaf72018-08-10 08:15:57 -0700633 // Special case, allow boolean false to empty the list, otherwise expected
634 // source of input from android::base::GetProperty will supply the default
635 // value on empty content in the property.
636 if (s == "false") return result;
637
Mark Salyzynf089e142018-02-20 10:47:40 -0800638 size_t base = 0;
Mark Salyzynacecaf72018-08-10 08:15:57 -0700639 while (s.size() > base) {
640 auto found = s.find_first_of(", \t:", base);
641 // Only emplace content, empty entries are not an option
642 if (found != base) result.emplace(s.substr(base, found - base));
Mark Salyzynf089e142018-02-20 10:47:40 -0800643 if (found == s.npos) break;
644 base = found + 1;
645 }
646 return result;
647}
648
649bool llkSkipName(const std::string& name,
650 const std::unordered_set<std::string>& blacklist = llkBlacklistProcess) {
651 if ((name.size() == 0) || (blacklist.size() == 0)) {
652 return false;
653 }
654
655 return blacklist.find(name) != blacklist.end();
656}
657
658bool llkSkipPid(pid_t pid) {
659 return llkSkipName(std::to_string(pid), llkBlacklistProcess);
660}
661
662bool llkSkipPpid(pid_t ppid) {
663 return llkSkipName(std::to_string(ppid), llkBlacklistParent);
664}
665
666bool llkSkipUid(uid_t uid) {
667 // Match by number?
668 if (llkSkipName(std::to_string(uid), llkBlacklistUid)) {
669 return true;
670 }
671
672 // Match by name?
673 auto pwd = ::getpwuid(uid);
674 return (pwd != nullptr) && __predict_true(pwd->pw_name != nullptr) &&
675 __predict_true(pwd->pw_name[0] != '\0') && llkSkipName(pwd->pw_name, llkBlacklistUid);
676}
677
678bool getValidTidDir(dirent* dp, std::string* piddir) {
679 if (!::isdigit(dp->d_name[0])) {
680 return false;
681 }
682
683 // Corner case can not happen in reality b/c of above ::isdigit check
684 if (__predict_false(dp->d_type != DT_DIR)) {
685 if (__predict_false(dp->d_type == DT_UNKNOWN)) { // can't b/c procfs
686 struct stat st;
687 *piddir = procdir;
688 *piddir += dp->d_name;
689 return (lstat(piddir->c_str(), &st) == 0) && (st.st_mode & S_IFDIR);
690 }
691 return false;
692 }
693
694 *piddir = procdir;
695 *piddir += dp->d_name;
696 return true;
697}
698
699bool llkIsMonitorState(char state) {
700 return (state == 'Z') || (state == 'D');
701}
702
703// returns -1 if not found
704long long getSchedValue(const std::string& schedString, const char* key) {
705 auto pos = schedString.find(key);
706 if (pos == std::string::npos) {
707 return -1;
708 }
709 pos = schedString.find(':', pos);
710 if (__predict_false(pos == std::string::npos)) {
711 return -1;
712 }
713 while ((++pos < schedString.size()) && ::isblank(schedString[pos])) {
714 ;
715 }
716 long long ret;
717 if (!android::base::ParseInt(schedString.substr(pos), &ret, static_cast<long long>(0))) {
718 return -1;
719 }
720 return ret;
721}
722
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700723#ifdef __PTRACE_ENABLED__
724bool llkCheckStack(proc* procp, const std::string& piddir) {
725 if (llkCheckStackSymbols.empty()) return false;
726 if (procp->state == 'Z') { // No brains for Zombies
727 procp->stack = -1;
728 procp->count_stack = 0ms;
729 return false;
730 }
731
732 // Don't check process that are known to block ptrace, save sepolicy noise.
733 if (llkSkipName(std::to_string(procp->pid), llkBlacklistStack)) return false;
734 if (llkSkipName(procp->getComm(), llkBlacklistStack)) return false;
735 if (llkSkipName(procp->getCmdline(), llkBlacklistStack)) return false;
Mark Salyzyne81ede82018-10-22 15:52:32 -0700736 if (llkSkipName(android::base::Basename(procp->getCmdline()), llkBlacklistStack)) return false;
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700737
738 auto kernel_stack = ReadFile(piddir + "/stack");
739 if (kernel_stack.empty()) {
Mark Salyzyn22e05fb2019-01-02 15:04:42 -0800740 LOG(VERBOSE) << piddir << "/stack empty comm=" << procp->getComm()
741 << " cmdline=" << procp->getCmdline();
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700742 return false;
743 }
744 // A scheduling incident that should not reset count_stack
745 if (kernel_stack.find(" cpu_worker_pools+0x") != std::string::npos) return false;
746 char idx = -1;
747 char match = -1;
Mark Salyzyn22e05fb2019-01-02 15:04:42 -0800748 std::string matched_stack_symbol = "<unknown>";
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700749 for (const auto& stack : llkCheckStackSymbols) {
750 if (++idx < 0) break;
Mark Salyzynbb1256a2018-10-18 14:39:27 -0700751 if ((kernel_stack.find(" "s + stack + "+0x") != std::string::npos) ||
752 (kernel_stack.find(" "s + stack + ".cfi+0x") != std::string::npos)) {
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700753 match = idx;
Mark Salyzyn22e05fb2019-01-02 15:04:42 -0800754 matched_stack_symbol = stack;
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700755 break;
756 }
757 }
758 if (procp->stack != match) {
759 procp->stack = match;
760 procp->count_stack = 0ms;
761 return false;
762 }
763 if (match == char(-1)) return false;
764 procp->count_stack += llkCycle;
Mark Salyzyn22e05fb2019-01-02 15:04:42 -0800765 if (procp->count_stack < llkStateTimeoutMs[llkStateStack]) return false;
766 LOG(WARNING) << "Found " << matched_stack_symbol << " in stack for pid " << procp->pid;
767 return true;
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700768}
769#endif
770
Mark Salyzynf089e142018-02-20 10:47:40 -0800771// Primary ABA mitigation watching last time schedule activity happened
772void llkCheckSchedUpdate(proc* procp, const std::string& piddir) {
773 // Audit finds /proc/<tid>/sched is just over 1K, and
774 // is rarely larger than 2K, even less on Android.
775 // For example, the "se.avg.lastUpdateTime" field we are
776 // interested in typically within the primary set in
777 // the first 1K.
778 //
779 // Proc entries can not be read >1K atomically via libbase,
780 // but if there are problems we assume at least a few
781 // samples of reads occur before we take any real action.
782 std::string schedString = ReadFile(piddir + "/sched");
783 if (schedString.size() == 0) {
784 // /schedstat is not as standardized, but in 3.1+
785 // Android devices, the third field is nr_switches
786 // from /sched:
787 schedString = ReadFile(piddir + "/schedstat");
788 if (schedString.size() == 0) {
789 return;
790 }
791 auto val = static_cast<unsigned long long>(-1);
792 if (((::sscanf(schedString.c_str(), "%*d %*d %llu", &val)) == 1) &&
793 (val != static_cast<unsigned long long>(-1)) && (val != 0) &&
794 (val != procp->nrSwitches)) {
795 procp->nrSwitches = val;
796 procp->count = 0ms;
Mark Salyzynafd66f22018-03-19 15:16:29 -0700797 procp->killed = !llkTestWithKill;
Mark Salyzynf089e142018-02-20 10:47:40 -0800798 }
799 return;
800 }
801
802 auto val = getSchedValue(schedString, "\nse.avg.lastUpdateTime");
803 if (val == -1) {
804 val = getSchedValue(schedString, "\nse.svg.last_update_time");
805 }
806 if (val != -1) {
807 auto schedUpdate = nanoseconds(val);
808 if (schedUpdate != procp->schedUpdate) {
809 procp->schedUpdate = schedUpdate;
810 procp->count = 0ms;
Mark Salyzynafd66f22018-03-19 15:16:29 -0700811 procp->killed = !llkTestWithKill;
Mark Salyzynf089e142018-02-20 10:47:40 -0800812 }
813 }
814
815 val = getSchedValue(schedString, "\nnr_switches");
816 if (val != -1) {
817 if (static_cast<uint64_t>(val) != procp->nrSwitches) {
818 procp->nrSwitches = val;
819 procp->count = 0ms;
Mark Salyzynafd66f22018-03-19 15:16:29 -0700820 procp->killed = !llkTestWithKill;
Mark Salyzynf089e142018-02-20 10:47:40 -0800821 }
822 }
823}
824
825void llkLogConfig(void) {
826 LOG(INFO) << "ro.config.low_ram=" << llkFormat(llkLowRam) << "\n"
Mark Salyzynbd7c8562018-10-31 10:02:08 -0700827 << LLK_ENABLE_SYSRQ_T_PROPERTY "=" << llkFormat(llkEnableSysrqT) << "\n"
Mark Salyzynf089e142018-02-20 10:47:40 -0800828 << LLK_ENABLE_PROPERTY "=" << llkFormat(llkEnable) << "\n"
829 << KHT_ENABLE_PROPERTY "=" << llkFormat(khtEnable) << "\n"
830 << LLK_MLOCKALL_PROPERTY "=" << llkFormat(llkMlockall) << "\n"
Mark Salyzynafd66f22018-03-19 15:16:29 -0700831 << LLK_KILLTEST_PROPERTY "=" << llkFormat(llkTestWithKill) << "\n"
Mark Salyzynf089e142018-02-20 10:47:40 -0800832 << KHT_TIMEOUT_PROPERTY "=" << llkFormat(khtTimeout) << "\n"
833 << LLK_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkTimeoutMs) << "\n"
834 << LLK_D_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkStateTimeoutMs[llkStateD]) << "\n"
835 << LLK_Z_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkStateTimeoutMs[llkStateZ]) << "\n"
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700836#ifdef __PTRACE_ENABLED__
837 << LLK_STACK_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkStateTimeoutMs[llkStateStack])
838 << "\n"
839#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800840 << LLK_CHECK_MS_PROPERTY "=" << llkFormat(llkCheckMs) << "\n"
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700841#ifdef __PTRACE_ENABLED__
842 << LLK_CHECK_STACK_PROPERTY "=" << llkFormat(llkCheckStackSymbols) << "\n"
843 << LLK_BLACKLIST_STACK_PROPERTY "=" << llkFormat(llkBlacklistStack) << "\n"
844#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800845 << LLK_BLACKLIST_PROCESS_PROPERTY "=" << llkFormat(llkBlacklistProcess) << "\n"
846 << LLK_BLACKLIST_PARENT_PROPERTY "=" << llkFormat(llkBlacklistParent) << "\n"
847 << LLK_BLACKLIST_UID_PROPERTY "=" << llkFormat(llkBlacklistUid);
848}
849
850void* llkThread(void* obj) {
Mark Salyzyn4832a8b2018-08-15 11:02:18 -0700851 prctl(PR_SET_DUMPABLE, 0);
852
Mark Salyzynf089e142018-02-20 10:47:40 -0800853 LOG(INFO) << "started";
854
855 std::string name = std::to_string(::gettid());
856 if (!llkSkipName(name)) {
857 llkBlacklistProcess.emplace(name);
858 }
859 name = static_cast<const char*>(obj);
860 prctl(PR_SET_NAME, name.c_str());
861 if (__predict_false(!llkSkipName(name))) {
862 llkBlacklistProcess.insert(name);
863 }
864 // No longer modifying llkBlacklistProcess.
865 llkRunning = true;
866 llkLogConfig();
867 while (llkRunning) {
868 ::usleep(duration_cast<microseconds>(llkCheck(true)).count());
869 }
870 // NOTREACHED
871 LOG(INFO) << "exiting";
872 return nullptr;
873}
874
875} // namespace
876
877milliseconds llkCheck(bool checkRunning) {
878 if (!llkEnable || (checkRunning != llkRunning)) {
879 return milliseconds::max();
880 }
881
882 // Reset internal watchdog, which is a healthy engineering margin of
883 // double the maximum wait or cycle time for the mainloop that calls us.
884 //
885 // This alarm is effectively the live lock detection of llkd, as
886 // we understandably can not monitor ourselves otherwise.
887 ::alarm(duration_cast<seconds>(llkTimeoutMs * 2).count());
888
889 // kernel jiffy precision fastest acquisition
890 static timespec last;
891 timespec now;
892 ::clock_gettime(CLOCK_MONOTONIC_COARSE, &now);
893 auto ms = llkGetTimespecDiffMs(&last, &now);
894 if (ms < llkCycle) {
895 return llkCycle - ms;
896 }
897 last = now;
898
899 LOG(VERBOSE) << "opendir(\"" << procdir << "\")";
900 if (__predict_false(!llkTopDirectory)) {
901 // gid containing AID_READPROC required
902 llkTopDirectory.reset(procdir);
903 if (__predict_false(!llkTopDirectory)) {
904 // Most likely reason we could be here is a resource limit.
905 // Keep our processing down to a minimum, but not so low that
906 // we do not recover in a timely manner should the issue be
907 // transitory.
908 LOG(DEBUG) << "opendir(\"" << procdir << "\") failed";
909 return llkTimeoutMs;
910 }
911 }
912
913 for (auto& it : tids) {
914 it.second.updated = false;
915 }
916
917 auto prevUpdate = llkUpdate;
918 llkUpdate += ms;
919 ms -= llkCycle;
920 auto myPid = ::getpid();
921 auto myTid = ::gettid();
Mark Salyzynfbc3a752018-12-04 10:30:45 -0800922 auto dump = true;
Mark Salyzynf089e142018-02-20 10:47:40 -0800923 for (auto dp = llkTopDirectory.read(); dp != nullptr; dp = llkTopDirectory.read()) {
924 std::string piddir;
925
926 if (!getValidTidDir(dp, &piddir)) {
927 continue;
928 }
929
930 // Get the process tasks
931 std::string taskdir = piddir + "/task/";
932 int pid = -1;
933 LOG(VERBOSE) << "+opendir(\"" << taskdir << "\")";
934 dir taskDirectory(taskdir);
935 if (__predict_false(!taskDirectory)) {
936 LOG(DEBUG) << "+opendir(\"" << taskdir << "\") failed";
937 }
938 for (auto tp = taskDirectory.read(dir::task, dp); tp != nullptr;
939 tp = taskDirectory.read(dir::task)) {
940 if (!getValidTidDir(tp, &piddir)) {
941 continue;
942 }
943
944 // Get the process stat
945 std::string stat = ReadFile(piddir + "/stat");
946 if (stat.size() == 0) {
947 continue;
948 }
949 unsigned tid = -1;
950 char pdir[TASK_COMM_LEN + 1];
951 char state = '?';
952 unsigned ppid = -1;
953 unsigned utime = -1;
954 unsigned stime = -1;
955 int dummy;
956 pdir[0] = '\0';
957 // tid should not change value
958 auto match = ::sscanf(
959 stat.c_str(),
960 "%u (%" ___STRING(
961 TASK_COMM_LEN) "[^)]) %c %u %*d %*d %*d %*d %*d %*d %*d %*d %*d %u %u %d",
962 &tid, pdir, &state, &ppid, &utime, &stime, &dummy);
963 if (pid == -1) {
964 pid = tid;
965 }
966 LOG(VERBOSE) << "match " << match << ' ' << tid << " (" << pdir << ") " << state << ' '
967 << ppid << " ... " << utime << ' ' << stime << ' ' << dummy;
968 if (match != 7) {
969 continue;
970 }
971
972 auto procp = llkTidLookup(tid);
973 if (procp == nullptr) {
974 procp = llkTidAlloc(tid, pid, ppid, pdir, utime + stime, state);
975 } else {
976 // comm can change ...
977 procp->setComm(pdir);
978 procp->updated = true;
979 // pid/ppid/tid wrap?
980 if (((procp->update != prevUpdate) && (procp->update != llkUpdate)) ||
981 (procp->ppid != ppid) || (procp->pid != pid)) {
982 procp->reset();
983 } else if (procp->time != (utime + stime)) { // secondary ABA.
984 // watching utime+stime granularity jiffy
985 procp->state = '?';
986 }
987 procp->update = llkUpdate;
988 procp->pid = pid;
989 procp->ppid = ppid;
990 procp->time = utime + stime;
991 if (procp->state != state) {
992 procp->count = 0ms;
Mark Salyzynafd66f22018-03-19 15:16:29 -0700993 procp->killed = !llkTestWithKill;
Mark Salyzynf089e142018-02-20 10:47:40 -0800994 procp->state = state;
995 } else {
996 procp->count += llkCycle;
997 }
998 }
999
1000 // Filter checks in intuitive order of CPU cost to evaluate
1001 // If tid unique continue, if ppid or pid unique break
1002
1003 if (pid == myPid) {
1004 break;
1005 }
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001006#ifdef __PTRACE_ENABLED__
1007 // if no stack monitoring, we can quickly exit here
1008 if (!llkIsMonitorState(state) && llkCheckStackSymbols.empty()) {
Mark Salyzynf089e142018-02-20 10:47:40 -08001009 continue;
1010 }
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001011#else
1012 if (!llkIsMonitorState(state)) continue;
1013#endif
Mark Salyzynf089e142018-02-20 10:47:40 -08001014 if ((tid == myTid) || llkSkipPid(tid)) {
1015 continue;
1016 }
1017 if (llkSkipPpid(ppid)) {
1018 break;
1019 }
1020
1021 if (llkSkipName(procp->getComm())) {
1022 continue;
1023 }
1024 if (llkSkipName(procp->getCmdline())) {
1025 break;
1026 }
Mark Salyzyne81ede82018-10-22 15:52:32 -07001027 if (llkSkipName(android::base::Basename(procp->getCmdline()))) {
1028 break;
1029 }
Mark Salyzynf089e142018-02-20 10:47:40 -08001030
1031 auto pprocp = llkTidLookup(ppid);
1032 if (pprocp == nullptr) {
1033 pprocp = llkTidAlloc(ppid, ppid, 0, "", 0, '?');
1034 }
Mark Salyzyne81ede82018-10-22 15:52:32 -07001035 if ((pprocp != nullptr) &&
1036 (llkSkipName(pprocp->getComm(), llkBlacklistParent) ||
1037 llkSkipName(pprocp->getCmdline(), llkBlacklistParent) ||
1038 llkSkipName(android::base::Basename(pprocp->getCmdline()), llkBlacklistParent))) {
Mark Salyzynf089e142018-02-20 10:47:40 -08001039 break;
1040 }
1041
1042 if ((llkBlacklistUid.size() != 0) && llkSkipUid(procp->getUid())) {
1043 continue;
1044 }
1045
1046 // ABA mitigation watching last time schedule activity happened
1047 llkCheckSchedUpdate(procp, piddir);
1048
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001049#ifdef __PTRACE_ENABLED__
1050 auto stuck = llkCheckStack(procp, piddir);
1051 if (llkIsMonitorState(state)) {
1052 if (procp->count >= llkStateTimeoutMs[(state == 'Z') ? llkStateZ : llkStateD]) {
1053 stuck = true;
1054 } else if (procp->count != 0ms) {
1055 LOG(VERBOSE) << state << ' ' << llkFormat(procp->count) << ' ' << ppid << "->"
1056 << pid << "->" << tid << ' ' << procp->getComm();
1057 }
1058 }
1059 if (!stuck) continue;
1060#else
1061 if (procp->count >= llkStateTimeoutMs[(state == 'Z') ? llkStateZ : llkStateD]) {
1062 if (procp->count != 0ms) {
1063 LOG(VERBOSE) << state << ' ' << llkFormat(procp->count) << ' ' << ppid << "->"
1064 << pid << "->" << tid << ' ' << procp->getComm();
1065 }
Mark Salyzynf089e142018-02-20 10:47:40 -08001066 continue;
1067 }
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001068#endif
Mark Salyzynf089e142018-02-20 10:47:40 -08001069
1070 // We have to kill it to determine difference between live lock
1071 // and persistent state blocked on a resource. Is there something
1072 // wrong with a process that has no forward scheduling progress in
1073 // Z or D? Yes, generally means improper accounting in the
1074 // process, but not always ...
1075 //
1076 // Whomever we hit with a test kill must accept the Android
1077 // Aphorism that everything can be burned to the ground and
1078 // must survive.
1079 if (procp->killed == false) {
1080 procp->killed = true;
1081 // confirm: re-read uid before committing to a panic.
1082 procp->uid = -1;
1083 switch (state) {
1084 case 'Z': // kill ppid to free up a Zombie
1085 // Killing init will kernel panic without diagnostics
1086 // so skip right to controlled kernel panic with
1087 // diagnostics.
1088 if (ppid == initPid) {
1089 break;
1090 }
1091 LOG(WARNING) << "Z " << llkFormat(procp->count) << ' ' << ppid << "->"
1092 << pid << "->" << tid << ' ' << procp->getComm() << " [kill]";
1093 if ((llkKillOneProcess(pprocp, procp) >= 0) ||
1094 (llkKillOneProcess(ppid, procp) >= 0)) {
1095 continue;
1096 }
1097 break;
1098
1099 case 'D': // kill tid to free up an uninterruptible D
1100 // If ABA is doing its job, we would not need or
1101 // want the following. Test kill is a Hail Mary
1102 // to make absolutely sure there is no forward
1103 // scheduling progress. The cost when ABA is
1104 // not working is we kill a process that likes to
1105 // stay in 'D' state, instead of panicing the
1106 // kernel (worse).
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001107 default:
1108 LOG(WARNING) << state << ' ' << llkFormat(procp->count) << ' ' << pid
1109 << "->" << tid << ' ' << procp->getComm() << " [kill]";
Mark Salyzynf089e142018-02-20 10:47:40 -08001110 if ((llkKillOneProcess(llkTidLookup(pid), procp) >= 0) ||
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001111 (llkKillOneProcess(pid, state, tid) >= 0) ||
Mark Salyzynf089e142018-02-20 10:47:40 -08001112 (llkKillOneProcess(procp, procp) >= 0) ||
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001113 (llkKillOneProcess(tid, state, tid) >= 0)) {
Mark Salyzynf089e142018-02-20 10:47:40 -08001114 continue;
1115 }
1116 break;
1117 }
1118 }
1119 // We are here because we have confirmed kernel live-lock
Mark Salyzyn3c3b14d2018-10-31 10:38:15 -07001120 const auto message = state + " "s + llkFormat(procp->count) + " " +
1121 std::to_string(ppid) + "->" + std::to_string(pid) + "->" +
1122 std::to_string(tid) + " " + procp->getComm() + " [panic]";
Mark Salyzynfbc3a752018-12-04 10:30:45 -08001123 llkPanicKernel(dump, tid,
Mark Salyzyn3c3b14d2018-10-31 10:38:15 -07001124 (state == 'Z') ? "zombie" : (state == 'D') ? "driver" : "sleeping",
1125 message);
Mark Salyzynfbc3a752018-12-04 10:30:45 -08001126 dump = false;
Mark Salyzynf089e142018-02-20 10:47:40 -08001127 }
1128 LOG(VERBOSE) << "+closedir()";
1129 }
1130 llkTopDirectory.rewind();
1131 LOG(VERBOSE) << "closedir()";
1132
1133 // garbage collection of old process references
1134 for (auto p = tids.begin(); p != tids.end();) {
1135 if (!p->second.updated) {
1136 IF_ALOG(LOG_VERBOSE, LOG_TAG) {
1137 std::string ppidCmdline = llkProcGetName(p->second.ppid, nullptr, nullptr);
1138 if (ppidCmdline.size()) {
1139 ppidCmdline = "(" + ppidCmdline + ")";
1140 }
1141 std::string pidCmdline;
1142 if (p->second.pid != p->second.tid) {
1143 pidCmdline = llkProcGetName(p->second.pid, nullptr, p->second.getCmdline());
1144 if (pidCmdline.size()) {
1145 pidCmdline = "(" + pidCmdline + ")";
1146 }
1147 }
1148 std::string tidCmdline =
1149 llkProcGetName(p->second.tid, p->second.getComm(), p->second.getCmdline());
1150 if (tidCmdline.size()) {
1151 tidCmdline = "(" + tidCmdline + ")";
1152 }
1153 LOG(VERBOSE) << "thread " << p->second.ppid << ppidCmdline << "->" << p->second.pid
1154 << pidCmdline << "->" << p->second.tid << tidCmdline << " removed";
1155 }
1156 p = tids.erase(p);
1157 } else {
1158 ++p;
1159 }
1160 }
1161 if (__predict_false(tids.empty())) {
1162 llkTopDirectory.reset();
1163 }
1164
1165 llkCycle = llkCheckMs;
1166
1167 timespec end;
1168 ::clock_gettime(CLOCK_MONOTONIC_COARSE, &end);
1169 auto milli = llkGetTimespecDiffMs(&now, &end);
1170 LOG((milli > 10s) ? ERROR : (milli > 1s) ? WARNING : VERBOSE) << "sample " << llkFormat(milli);
1171
1172 // cap to minimum sleep for 1 second since last cycle
1173 if (llkCycle < (ms + 1s)) {
1174 return 1s;
1175 }
1176 return llkCycle - ms;
1177}
1178
1179unsigned llkCheckMilliseconds() {
1180 return duration_cast<milliseconds>(llkCheck()).count();
1181}
1182
Mark Salyzynbd7c8562018-10-31 10:02:08 -07001183bool llkCheckEng(const std::string& property) {
1184 return android::base::GetProperty(property, "eng") == "eng";
1185}
1186
Mark Salyzynf089e142018-02-20 10:47:40 -08001187bool llkInit(const char* threadname) {
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001188 auto debuggable = android::base::GetBoolProperty("ro.debuggable", false);
Mark Salyzynf089e142018-02-20 10:47:40 -08001189 llkLowRam = android::base::GetBoolProperty("ro.config.low_ram", false);
Mark Salyzynbd7c8562018-10-31 10:02:08 -07001190 llkEnableSysrqT &= !llkLowRam;
1191 if (debuggable) {
1192 llkEnableSysrqT |= llkCheckEng(LLK_ENABLE_SYSRQ_T_PROPERTY);
1193 if (!LLK_ENABLE_DEFAULT) { // NB: default is currently true ...
1194 llkEnable |= llkCheckEng(LLK_ENABLE_PROPERTY);
1195 khtEnable |= llkCheckEng(KHT_ENABLE_PROPERTY);
1196 }
Mark Salyzynd035dbb2018-03-26 08:23:00 -07001197 }
Mark Salyzynbd7c8562018-10-31 10:02:08 -07001198 llkEnableSysrqT = android::base::GetBoolProperty(LLK_ENABLE_SYSRQ_T_PROPERTY, llkEnableSysrqT);
Mark Salyzynf089e142018-02-20 10:47:40 -08001199 llkEnable = android::base::GetBoolProperty(LLK_ENABLE_PROPERTY, llkEnable);
1200 if (llkEnable && !llkTopDirectory.reset(procdir)) {
1201 // Most likely reason we could be here is llkd was started
1202 // incorrectly without the readproc permissions. Keep our
1203 // processing down to a minimum.
1204 llkEnable = false;
1205 }
1206 khtEnable = android::base::GetBoolProperty(KHT_ENABLE_PROPERTY, khtEnable);
1207 llkMlockall = android::base::GetBoolProperty(LLK_MLOCKALL_PROPERTY, llkMlockall);
Mark Salyzynafd66f22018-03-19 15:16:29 -07001208 llkTestWithKill = android::base::GetBoolProperty(LLK_KILLTEST_PROPERTY, llkTestWithKill);
Mark Salyzynf089e142018-02-20 10:47:40 -08001209 // if LLK_TIMOUT_MS_PROPERTY was not set, we will use a set
1210 // KHT_TIMEOUT_PROPERTY as co-operative guidance for the default value.
1211 khtTimeout = GetUintProperty(KHT_TIMEOUT_PROPERTY, khtTimeout);
1212 if (khtTimeout == 0s) {
1213 khtTimeout = duration_cast<seconds>(llkTimeoutMs * (1 + LLK_CHECKS_PER_TIMEOUT_DEFAULT) /
1214 LLK_CHECKS_PER_TIMEOUT_DEFAULT);
1215 }
1216 llkTimeoutMs =
1217 khtTimeout * LLK_CHECKS_PER_TIMEOUT_DEFAULT / (1 + LLK_CHECKS_PER_TIMEOUT_DEFAULT);
1218 llkTimeoutMs = GetUintProperty(LLK_TIMEOUT_MS_PROPERTY, llkTimeoutMs);
1219 llkValidate(); // validate llkTimeoutMs, llkCheckMs and llkCycle
1220 llkStateTimeoutMs[llkStateD] = GetUintProperty(LLK_D_TIMEOUT_MS_PROPERTY, llkTimeoutMs);
1221 llkStateTimeoutMs[llkStateZ] = GetUintProperty(LLK_Z_TIMEOUT_MS_PROPERTY, llkTimeoutMs);
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001222#ifdef __PTRACE_ENABLED__
1223 llkStateTimeoutMs[llkStateStack] = GetUintProperty(LLK_STACK_TIMEOUT_MS_PROPERTY, llkTimeoutMs);
1224#endif
Mark Salyzynf089e142018-02-20 10:47:40 -08001225 llkCheckMs = GetUintProperty(LLK_CHECK_MS_PROPERTY, llkCheckMs);
1226 llkValidate(); // validate all (effectively minus llkTimeoutMs)
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001227#ifdef __PTRACE_ENABLED__
1228 if (debuggable) {
1229 llkCheckStackSymbols = llkSplit(
1230 android::base::GetProperty(LLK_CHECK_STACK_PROPERTY, LLK_CHECK_STACK_DEFAULT));
1231 }
1232 std::string defaultBlacklistStack(LLK_BLACKLIST_STACK_DEFAULT);
1233 if (!debuggable) defaultBlacklistStack += ",logd,/system/bin/logd";
1234 llkBlacklistStack = llkSplit(
1235 android::base::GetProperty(LLK_BLACKLIST_STACK_PROPERTY, defaultBlacklistStack));
1236#endif
Mark Salyzynf089e142018-02-20 10:47:40 -08001237 std::string defaultBlacklistProcess(
1238 std::to_string(kernelPid) + "," + std::to_string(initPid) + "," +
1239 std::to_string(kthreaddPid) + "," + std::to_string(::getpid()) + "," +
1240 std::to_string(::gettid()) + "," LLK_BLACKLIST_PROCESS_DEFAULT);
1241 if (threadname) {
Mark Salyzyn52e54a62018-08-07 08:13:13 -07001242 defaultBlacklistProcess += ","s + threadname;
Mark Salyzynf089e142018-02-20 10:47:40 -08001243 }
1244 for (int cpu = 1; cpu < get_nprocs_conf(); ++cpu) {
1245 defaultBlacklistProcess += ",[watchdog/" + std::to_string(cpu) + "]";
1246 }
1247 defaultBlacklistProcess =
1248 android::base::GetProperty(LLK_BLACKLIST_PROCESS_PROPERTY, defaultBlacklistProcess);
1249 llkBlacklistProcess = llkSplit(defaultBlacklistProcess);
1250 if (!llkSkipName("[khungtaskd]")) { // ALWAYS ignore as special
1251 llkBlacklistProcess.emplace("[khungtaskd]");
1252 }
1253 llkBlacklistParent = llkSplit(android::base::GetProperty(
1254 LLK_BLACKLIST_PARENT_PROPERTY, std::to_string(kernelPid) + "," + std::to_string(kthreaddPid) +
1255 "," LLK_BLACKLIST_PARENT_DEFAULT));
1256 llkBlacklistUid =
1257 llkSplit(android::base::GetProperty(LLK_BLACKLIST_UID_PROPERTY, LLK_BLACKLIST_UID_DEFAULT));
1258
1259 // internal watchdog
1260 ::signal(SIGALRM, llkAlarmHandler);
1261
1262 // kernel hung task configuration? Otherwise leave it as-is
1263 if (khtEnable) {
1264 // EUID must be AID_ROOT to write to /proc/sys/kernel/ nodes, there
1265 // are no capability overrides. For security reasons we do not want
1266 // to run as AID_ROOT. We may not be able to write them successfully,
1267 // we will try, but the least we can do is read the values back to
1268 // confirm expectations and report whether configured or not.
1269 auto configured = llkWriteStringToFileConfirm(std::to_string(khtTimeout.count()),
1270 "/proc/sys/kernel/hung_task_timeout_secs");
1271 if (configured) {
1272 llkWriteStringToFile("65535", "/proc/sys/kernel/hung_task_warnings");
1273 llkWriteStringToFile("65535", "/proc/sys/kernel/hung_task_check_count");
1274 configured = llkWriteStringToFileConfirm("1", "/proc/sys/kernel/hung_task_panic");
1275 }
1276 if (configured) {
1277 LOG(INFO) << "[khungtaskd] configured";
1278 } else {
1279 LOG(WARNING) << "[khungtaskd] not configurable";
1280 }
1281 }
1282
1283 bool logConfig = true;
1284 if (llkEnable) {
1285 if (llkMlockall &&
1286 // MCL_ONFAULT pins pages as they fault instead of loading
1287 // everything immediately all at once. (Which would be bad,
1288 // because as of this writing, we have a lot of mapped pages we
1289 // never use.) Old kernels will see MCL_ONFAULT and fail with
1290 // EINVAL; we ignore this failure.
1291 //
1292 // N.B. read the man page for mlockall. MCL_CURRENT | MCL_ONFAULT
1293 // pins ⊆ MCL_CURRENT, converging to just MCL_CURRENT as we fault
1294 // in pages.
1295
1296 // CAP_IPC_LOCK required
1297 mlockall(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT) && (errno != EINVAL)) {
1298 PLOG(WARNING) << "mlockall failed ";
1299 }
1300
1301 if (threadname) {
1302 pthread_attr_t attr;
1303
1304 if (!pthread_attr_init(&attr)) {
1305 sched_param param;
1306
1307 memset(&param, 0, sizeof(param));
1308 pthread_attr_setschedparam(&attr, &param);
1309 pthread_attr_setschedpolicy(&attr, SCHED_BATCH);
1310 if (!pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED)) {
1311 pthread_t thread;
1312 if (!pthread_create(&thread, &attr, llkThread, const_cast<char*>(threadname))) {
1313 // wait a second for thread to start
1314 for (auto retry = 50; retry && !llkRunning; --retry) {
1315 ::usleep(20000);
1316 }
1317 logConfig = !llkRunning; // printed in llkd context?
1318 } else {
1319 LOG(ERROR) << "failed to spawn llkd thread";
1320 }
1321 } else {
1322 LOG(ERROR) << "failed to detach llkd thread";
1323 }
1324 pthread_attr_destroy(&attr);
1325 } else {
1326 LOG(ERROR) << "failed to allocate attibutes for llkd thread";
1327 }
1328 }
1329 } else {
1330 LOG(DEBUG) << "[khungtaskd] left unconfigured";
1331 }
1332 if (logConfig) {
1333 llkLogConfig();
1334 }
1335
1336 return llkEnable;
1337}