blob: d92c0cd573ff5987b0dd514692b5aea86cf45fd5 [file] [log] [blame]
Mark Salyzynf089e142018-02-20 10:47:40 -08001/*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "llkd.h"
18
19#include <ctype.h>
20#include <dirent.h> // opendir() and readdir()
21#include <errno.h>
22#include <fcntl.h>
23#include <pthread.h>
24#include <pwd.h> // getpwuid()
25#include <signal.h>
26#include <stdint.h>
27#include <sys/cdefs.h> // ___STRING, __predict_true() and _predict_false()
28#include <sys/mman.h> // mlockall()
29#include <sys/prctl.h>
30#include <sys/stat.h> // lstat()
31#include <sys/syscall.h> // __NR_getdents64
32#include <sys/sysinfo.h> // get_nprocs_conf()
33#include <sys/types.h>
34#include <time.h>
35#include <unistd.h>
36
37#include <chrono>
38#include <ios>
39#include <sstream>
40#include <string>
41#include <unordered_map>
42#include <unordered_set>
43
44#include <android-base/file.h>
45#include <android-base/logging.h>
46#include <android-base/parseint.h>
47#include <android-base/properties.h>
48#include <android-base/strings.h>
49#include <cutils/android_get_control_file.h>
50#include <log/log_main.h>
51
52#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
53
54#define TASK_COMM_LEN 16 // internal kernel, not uapi, from .../linux/include/linux/sched.h
55
56using namespace std::chrono_literals;
57using namespace std::chrono;
Mark Salyzyn52e54a62018-08-07 08:13:13 -070058using namespace std::literals;
Mark Salyzynf089e142018-02-20 10:47:40 -080059
60namespace {
61
62constexpr pid_t kernelPid = 0;
63constexpr pid_t initPid = 1;
64constexpr pid_t kthreaddPid = 2;
65
66constexpr char procdir[] = "/proc/";
67
68// Configuration
69milliseconds llkUpdate; // last check ms signature
70milliseconds llkCycle; // ms to next thread check
71bool llkEnable = LLK_ENABLE_DEFAULT; // llk daemon enabled
72bool llkRunning = false; // thread is running
73bool llkMlockall = LLK_MLOCKALL_DEFAULT; // run mlocked
Mark Salyzynafd66f22018-03-19 15:16:29 -070074bool llkTestWithKill = LLK_KILLTEST_DEFAULT; // issue test kills
Mark Salyzynf089e142018-02-20 10:47:40 -080075milliseconds llkTimeoutMs = LLK_TIMEOUT_MS_DEFAULT; // default timeout
Mark Salyzyn96505fa2018-08-07 08:13:13 -070076enum { // enum of state indexes
77 llkStateD, // Persistent 'D' state
78 llkStateZ, // Persistent 'Z' state
79#ifdef __PTRACE_ENABLED__ // Extra privileged states
80 llkStateStack, // stack signature
81#endif // End of extra privilege
82 llkNumStates, // Maxumum number of states
83}; // state indexes
Mark Salyzynf089e142018-02-20 10:47:40 -080084milliseconds llkStateTimeoutMs[llkNumStates]; // timeout override for each detection state
85milliseconds llkCheckMs; // checking interval to inspect any
86 // persistent live-locked states
87bool llkLowRam; // ro.config.low_ram
88bool khtEnable = LLK_ENABLE_DEFAULT; // [khungtaskd] panic
89// [khungtaskd] should have a timeout beyond the granularity of llkTimeoutMs.
90// Provides a wide angle of margin b/c khtTimeout is also its granularity.
91seconds khtTimeout = duration_cast<seconds>(llkTimeoutMs * (1 + LLK_CHECKS_PER_TIMEOUT_DEFAULT) /
92 LLK_CHECKS_PER_TIMEOUT_DEFAULT);
Mark Salyzyn96505fa2018-08-07 08:13:13 -070093#ifdef __PTRACE_ENABLED__
94// list of stack symbols to search for persistence.
95std::unordered_set<std::string> llkCheckStackSymbols;
96#endif
Mark Salyzynf089e142018-02-20 10:47:40 -080097
98// Blacklist variables, initialized with comma separated lists of high false
99// positive and/or dangerous references, e.g. without self restart, for pid,
100// ppid, name and uid:
101
102// list of pids, or tids or names to skip. kernel pid (0), init pid (1),
103// [kthreadd] pid (2), ourselves, "init", "[kthreadd]", "lmkd", "llkd" or
104// combinations of watchdogd in kernel and user space.
105std::unordered_set<std::string> llkBlacklistProcess;
106// list of parent pids, comm or cmdline names to skip. default:
107// kernel pid (0), [kthreadd] (2), or ourselves, enforced and implied
108std::unordered_set<std::string> llkBlacklistParent;
109// list of uids, and uid names, to skip, default nothing
110std::unordered_set<std::string> llkBlacklistUid;
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700111#ifdef __PTRACE_ENABLED__
112// list of names to skip stack checking. "init", "lmkd", "llkd", "keystore" or
113// "logd" (if not userdebug).
114std::unordered_set<std::string> llkBlacklistStack;
115#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800116
117class dir {
118 public:
119 enum level { proc, task, numLevels };
120
121 private:
122 int fd;
123 size_t available_bytes;
124 dirent* next;
125 // each directory level picked to be just north of 4K in size
126 static constexpr size_t buffEntries = 15;
127 static dirent buff[numLevels][buffEntries];
128
129 bool fill(enum level index) {
130 if (index >= numLevels) return false;
131 if (available_bytes != 0) return true;
132 if (__predict_false(fd < 0)) return false;
133 // getdents64 has no libc wrapper
134 auto rc = TEMP_FAILURE_RETRY(syscall(__NR_getdents64, fd, buff[index], sizeof(buff[0]), 0));
135 if (rc <= 0) return false;
136 available_bytes = rc;
137 next = buff[index];
138 return true;
139 }
140
141 public:
142 dir() : fd(-1), available_bytes(0), next(nullptr) {}
143
144 explicit dir(const char* directory)
145 : fd(__predict_true(directory != nullptr)
146 ? ::open(directory, O_CLOEXEC | O_DIRECTORY | O_RDONLY)
147 : -1),
148 available_bytes(0),
149 next(nullptr) {}
150
151 explicit dir(const std::string&& directory)
152 : fd(::open(directory.c_str(), O_CLOEXEC | O_DIRECTORY | O_RDONLY)),
153 available_bytes(0),
154 next(nullptr) {}
155
156 explicit dir(const std::string& directory)
157 : fd(::open(directory.c_str(), O_CLOEXEC | O_DIRECTORY | O_RDONLY)),
158 available_bytes(0),
159 next(nullptr) {}
160
161 // Don't need any copy or move constructors.
162 explicit dir(const dir& c) = delete;
163 explicit dir(dir& c) = delete;
164 explicit dir(dir&& c) = delete;
165
166 ~dir() {
167 if (fd >= 0) {
168 ::close(fd);
169 }
170 }
171
172 operator bool() const { return fd >= 0; }
173
174 void reset(void) {
175 if (fd >= 0) {
176 ::close(fd);
177 fd = -1;
178 available_bytes = 0;
179 next = nullptr;
180 }
181 }
182
183 dir& reset(const char* directory) {
184 reset();
185 // available_bytes will _always_ be zero here as its value is
186 // intimately tied to fd < 0 or not.
187 fd = ::open(directory, O_CLOEXEC | O_DIRECTORY | O_RDONLY);
188 return *this;
189 }
190
191 void rewind(void) {
192 if (fd >= 0) {
193 ::lseek(fd, off_t(0), SEEK_SET);
194 available_bytes = 0;
195 next = nullptr;
196 }
197 }
198
199 dirent* read(enum level index = proc, dirent* def = nullptr) {
200 if (!fill(index)) return def;
201 auto ret = next;
202 available_bytes -= next->d_reclen;
203 next = reinterpret_cast<dirent*>(reinterpret_cast<char*>(next) + next->d_reclen);
204 return ret;
205 }
206} llkTopDirectory;
207
208dirent dir::buff[dir::numLevels][dir::buffEntries];
209
210// helper functions
211
212bool llkIsMissingExeLink(pid_t tid) {
213 char c;
214 // CAP_SYS_PTRACE is required to prevent ret == -1, but ENOENT is signal
215 auto ret = ::readlink((procdir + std::to_string(tid) + "/exe").c_str(), &c, sizeof(c));
216 return (ret == -1) && (errno == ENOENT);
217}
218
219// Common routine where caller accepts empty content as error/passthrough.
220// Reduces the churn of reporting read errors in the callers.
221std::string ReadFile(std::string&& path) {
222 std::string content;
223 if (!android::base::ReadFileToString(path, &content)) {
224 PLOG(DEBUG) << "Read " << path << " failed";
225 content = "";
226 }
227 return content;
228}
229
230std::string llkProcGetName(pid_t tid, const char* node = "/cmdline") {
231 std::string content = ReadFile(procdir + std::to_string(tid) + node);
232 static constexpr char needles[] = " \t\r\n"; // including trailing nul
233 auto pos = content.find_first_of(needles, 0, sizeof(needles));
234 if (pos != std::string::npos) {
235 content.erase(pos);
236 }
237 return content;
238}
239
240uid_t llkProcGetUid(pid_t tid) {
241 // Get the process' uid. The following read from /status is admittedly
242 // racy, prone to corruption due to shape-changes. The consequences are
243 // not catastrophic as we sample a few times before taking action.
244 //
245 // If /loginuid worked on reliably, or on Android (all tasks report -1)...
246 // Android lmkd causes /cgroup to contain memory:/<dom>/uid_<uid>/pid_<pid>
247 // which is tighter, but also not reliable.
248 std::string content = ReadFile(procdir + std::to_string(tid) + "/status");
249 static constexpr char Uid[] = "\nUid:";
250 auto pos = content.find(Uid);
251 if (pos == std::string::npos) {
252 return -1;
253 }
254 pos += ::strlen(Uid);
255 while ((pos < content.size()) && ::isblank(content[pos])) {
256 ++pos;
257 }
258 content.erase(0, pos);
259 for (pos = 0; (pos < content.size()) && ::isdigit(content[pos]); ++pos) {
260 ;
261 }
262 // Content of form 'Uid: 0 0 0 0', newline is error
263 if ((pos >= content.size()) || !::isblank(content[pos])) {
264 return -1;
265 }
266 content.erase(pos);
267 uid_t ret;
Tom Cherrye0bc5a92018-10-05 14:29:47 -0700268 if (!android::base::ParseUint(content, &ret, uid_t(0))) {
Mark Salyzynf089e142018-02-20 10:47:40 -0800269 return -1;
270 }
271 return ret;
272}
273
274struct proc {
275 pid_t tid; // monitored thread id (in Z or D state).
276 nanoseconds schedUpdate; // /proc/<tid>/sched "se.avg.lastUpdateTime",
277 uint64_t nrSwitches; // /proc/<tid>/sched "nr_switches" for
278 // refined ABA problem detection, determine
279 // forward scheduling progress.
280 milliseconds update; // llkUpdate millisecond signature of last.
281 milliseconds count; // duration in state.
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700282#ifdef __PTRACE_ENABLED__ // Privileged state checking
283 milliseconds count_stack; // duration where stack is stagnant.
284#endif // End privilege
Mark Salyzynf089e142018-02-20 10:47:40 -0800285 pid_t pid; // /proc/<pid> before iterating through
286 // /proc/<pid>/task/<tid> for threads.
287 pid_t ppid; // /proc/<tid>/stat field 4 parent pid.
288 uid_t uid; // /proc/<tid>/status Uid: field.
289 unsigned time; // sum of /proc/<tid>/stat field 14 utime &
290 // 15 stime for coarse ABA problem detection.
291 std::string cmdline; // cached /cmdline content
292 char state; // /proc/<tid>/stat field 3: Z or D
293 // (others we do not monitor: S, R, T or ?)
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700294#ifdef __PTRACE_ENABLED__ // Privileged state checking
295 char stack; // index in llkCheckStackSymbols for matches
296#endif // and with maximum index PROP_VALUE_MAX/2.
Mark Salyzynf089e142018-02-20 10:47:40 -0800297 char comm[TASK_COMM_LEN + 3]; // space for adding '[' and ']'
298 bool exeMissingValid; // exeMissing has been cached
299 bool cmdlineValid; // cmdline has been cached
300 bool updated; // cleared before monitoring pass.
301 bool killed; // sent a kill to this thread, next panic...
302
303 void setComm(const char* _comm) { strncpy(comm + 1, _comm, sizeof(comm) - 2); }
304
305 proc(pid_t tid, pid_t pid, pid_t ppid, const char* _comm, int time, char state)
306 : tid(tid),
307 schedUpdate(0),
308 nrSwitches(0),
309 update(llkUpdate),
Mark Salyzynacecaf72018-08-10 08:15:57 -0700310 count(0ms),
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700311#ifdef __PTRACE_ENABLED__
312 count_stack(0ms),
313#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800314 pid(pid),
315 ppid(ppid),
316 uid(-1),
317 time(time),
318 state(state),
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700319#ifdef __PTRACE_ENABLED__
320 stack(-1),
321#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800322 exeMissingValid(false),
323 cmdlineValid(false),
324 updated(true),
Mark Salyzynafd66f22018-03-19 15:16:29 -0700325 killed(!llkTestWithKill) {
Mark Salyzynf089e142018-02-20 10:47:40 -0800326 memset(comm, '\0', sizeof(comm));
327 setComm(_comm);
328 }
329
330 const char* getComm(void) {
331 if (comm[1] == '\0') { // comm Valid?
332 strncpy(comm + 1, llkProcGetName(tid, "/comm").c_str(), sizeof(comm) - 2);
333 }
334 if (!exeMissingValid) {
335 if (llkIsMissingExeLink(tid)) {
336 comm[0] = '[';
337 }
338 exeMissingValid = true;
339 }
340 size_t len = strlen(comm + 1);
341 if (__predict_true(len < (sizeof(comm) - 1))) {
342 if (comm[0] == '[') {
343 if ((comm[len] != ']') && __predict_true(len < (sizeof(comm) - 2))) {
344 comm[++len] = ']';
345 comm[++len] = '\0';
346 }
347 } else {
348 if (comm[len] == ']') {
349 comm[len] = '\0';
350 }
351 }
352 }
353 return &comm[comm[0] != '['];
354 }
355
356 const char* getCmdline(void) {
357 if (!cmdlineValid) {
358 cmdline = llkProcGetName(tid);
359 cmdlineValid = true;
360 }
361 return cmdline.c_str();
362 }
363
364 uid_t getUid(void) {
365 if (uid <= 0) { // Churn on root user, because most likely to setuid()
366 uid = llkProcGetUid(tid);
367 }
368 return uid;
369 }
370
371 void reset(void) { // reset cache, if we detected pid rollover
372 uid = -1;
373 state = '?';
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700374#ifdef __PTRACE_ENABLED__
375 count_stack = 0ms;
376 stack = -1;
377#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800378 cmdline = "";
379 comm[0] = '\0';
380 exeMissingValid = false;
381 cmdlineValid = false;
382 }
383};
384
385std::unordered_map<pid_t, proc> tids;
386
387// Check range and setup defaults, in order of propagation:
388// llkTimeoutMs
389// llkCheckMs
390// ...
391// KISS to keep it all self-contained, and called multiple times as parameters
392// are interpreted so that defaults, llkCheckMs and llkCycle make sense.
393void llkValidate() {
394 if (llkTimeoutMs == 0ms) {
395 llkTimeoutMs = LLK_TIMEOUT_MS_DEFAULT;
396 }
397 llkTimeoutMs = std::max(llkTimeoutMs, LLK_TIMEOUT_MS_MINIMUM);
398 if (llkCheckMs == 0ms) {
399 llkCheckMs = llkTimeoutMs / LLK_CHECKS_PER_TIMEOUT_DEFAULT;
400 }
401 llkCheckMs = std::min(llkCheckMs, llkTimeoutMs);
402
403 for (size_t state = 0; state < ARRAY_SIZE(llkStateTimeoutMs); ++state) {
404 if (llkStateTimeoutMs[state] == 0ms) {
405 llkStateTimeoutMs[state] = llkTimeoutMs;
406 }
407 llkStateTimeoutMs[state] =
408 std::min(std::max(llkStateTimeoutMs[state], LLK_TIMEOUT_MS_MINIMUM), llkTimeoutMs);
409 llkCheckMs = std::min(llkCheckMs, llkStateTimeoutMs[state]);
410 }
411
412 llkCheckMs = std::max(llkCheckMs, LLK_CHECK_MS_MINIMUM);
413 if (llkCycle == 0ms) {
414 llkCycle = llkCheckMs;
415 }
416 llkCycle = std::min(llkCycle, llkCheckMs);
417}
418
419milliseconds llkGetTimespecDiffMs(timespec* from, timespec* to) {
420 return duration_cast<milliseconds>(seconds(to->tv_sec - from->tv_sec)) +
421 duration_cast<milliseconds>(nanoseconds(to->tv_nsec - from->tv_nsec));
422}
423
424std::string llkProcGetName(pid_t tid, const char* comm, const char* cmdline) {
425 if ((cmdline != nullptr) && (*cmdline != '\0')) {
426 return cmdline;
427 }
428 if ((comm != nullptr) && (*comm != '\0')) {
429 return comm;
430 }
431
432 // UNLIKELY! Here because killed before we kill it?
433 // Assume change is afoot, do not call llkTidAlloc
434
435 // cmdline ?
436 std::string content = llkProcGetName(tid);
437 if (content.size() != 0) {
438 return content;
439 }
440 // Comm instead?
441 content = llkProcGetName(tid, "/comm");
442 if (llkIsMissingExeLink(tid) && (content.size() != 0)) {
443 return '[' + content + ']';
444 }
445 return content;
446}
447
448int llkKillOneProcess(pid_t pid, char state, pid_t tid, const char* tcomm = nullptr,
449 const char* tcmdline = nullptr, const char* pcomm = nullptr,
450 const char* pcmdline = nullptr) {
451 std::string forTid;
452 if (tid != pid) {
453 forTid = " for '" + llkProcGetName(tid, tcomm, tcmdline) + "' (" + std::to_string(tid) + ")";
454 }
455 LOG(INFO) << "Killing '" << llkProcGetName(pid, pcomm, pcmdline) << "' (" << pid
456 << ") to check forward scheduling progress in " << state << " state" << forTid;
457 // CAP_KILL required
458 errno = 0;
459 auto r = ::kill(pid, SIGKILL);
460 if (r) {
461 PLOG(ERROR) << "kill(" << pid << ")=" << r << ' ';
462 }
463
464 return r;
465}
466
467// Kill one process
468int llkKillOneProcess(pid_t pid, proc* tprocp) {
469 return llkKillOneProcess(pid, tprocp->state, tprocp->tid, tprocp->getComm(),
470 tprocp->getCmdline());
471}
472
473// Kill one process specified by kprocp
474int llkKillOneProcess(proc* kprocp, proc* tprocp) {
475 if (kprocp == nullptr) {
476 return -2;
477 }
478
479 return llkKillOneProcess(kprocp->tid, tprocp->state, tprocp->tid, tprocp->getComm(),
480 tprocp->getCmdline(), kprocp->getComm(), kprocp->getCmdline());
481}
482
483// Acquire file descriptor from environment, or open and cache it.
484// NB: cache is unnecessary in our current context, pedantically
485// required to prevent leakage of file descriptors in the future.
486int llkFileToWriteFd(const std::string& file) {
487 static std::unordered_map<std::string, int> cache;
488 auto search = cache.find(file);
489 if (search != cache.end()) return search->second;
490 auto fd = android_get_control_file(file.c_str());
491 if (fd >= 0) return fd;
492 fd = TEMP_FAILURE_RETRY(::open(file.c_str(), O_WRONLY | O_CLOEXEC));
493 if (fd >= 0) cache.emplace(std::make_pair(file, fd));
494 return fd;
495}
496
497// Wrap android::base::WriteStringToFile to use android_get_control_file.
498bool llkWriteStringToFile(const std::string& string, const std::string& file) {
499 auto fd = llkFileToWriteFd(file);
500 if (fd < 0) return false;
501 return android::base::WriteStringToFd(string, fd);
502}
503
504bool llkWriteStringToFileConfirm(const std::string& string, const std::string& file) {
505 auto fd = llkFileToWriteFd(file);
506 auto ret = (fd < 0) ? false : android::base::WriteStringToFd(string, fd);
507 std::string content;
508 if (!android::base::ReadFileToString(file, &content)) return ret;
509 return android::base::Trim(content) == string;
510}
511
Mark Salyzynafd66f22018-03-19 15:16:29 -0700512void llkPanicKernel(bool dump, pid_t tid, const char* state) __noreturn;
513void llkPanicKernel(bool dump, pid_t tid, const char* state) {
Mark Salyzynf089e142018-02-20 10:47:40 -0800514 auto sysrqTriggerFd = llkFileToWriteFd("/proc/sysrq-trigger");
515 if (sysrqTriggerFd < 0) {
516 // DYB
517 llkKillOneProcess(initPid, 'R', tid);
518 // The answer to life, the universe and everything
519 ::exit(42);
520 // NOTREACHED
521 }
522 ::sync();
523 if (dump) {
524 // Show all locks that are held
525 android::base::WriteStringToFd("d", sysrqTriggerFd);
526 // This can trigger hardware watchdog, that is somewhat _ok_.
527 // But useless if pstore configured for <256KB, low ram devices ...
528 if (!llkLowRam) {
529 android::base::WriteStringToFd("t", sysrqTriggerFd);
530 }
531 ::usleep(200000); // let everything settle
532 }
Mark Salyzyn52e54a62018-08-07 08:13:13 -0700533 llkWriteStringToFile("SysRq : Trigger a crash : 'livelock,"s + state + "'\n", "/dev/kmsg");
Mark Salyzynf089e142018-02-20 10:47:40 -0800534 android::base::WriteStringToFd("c", sysrqTriggerFd);
535 // NOTREACHED
536 // DYB
537 llkKillOneProcess(initPid, 'R', tid);
538 // I sat at my desk, stared into the garden and thought '42 will do'.
539 // I typed it out. End of story
540 ::exit(42);
541 // NOTREACHED
542}
543
544void llkAlarmHandler(int) {
Mark Salyzynafd66f22018-03-19 15:16:29 -0700545 llkPanicKernel(false, ::getpid(), "alarm");
Mark Salyzynf089e142018-02-20 10:47:40 -0800546}
547
548milliseconds GetUintProperty(const std::string& key, milliseconds def) {
549 return milliseconds(android::base::GetUintProperty(key, static_cast<uint64_t>(def.count()),
550 static_cast<uint64_t>(def.max().count())));
551}
552
553seconds GetUintProperty(const std::string& key, seconds def) {
554 return seconds(android::base::GetUintProperty(key, static_cast<uint64_t>(def.count()),
555 static_cast<uint64_t>(def.max().count())));
556}
557
558proc* llkTidLookup(pid_t tid) {
559 auto search = tids.find(tid);
560 if (search == tids.end()) {
561 return nullptr;
562 }
563 return &search->second;
564}
565
566void llkTidRemove(pid_t tid) {
567 tids.erase(tid);
568}
569
570proc* llkTidAlloc(pid_t tid, pid_t pid, pid_t ppid, const char* comm, int time, char state) {
571 auto it = tids.emplace(std::make_pair(tid, proc(tid, pid, ppid, comm, time, state)));
572 return &it.first->second;
573}
574
575std::string llkFormat(milliseconds ms) {
576 auto sec = duration_cast<seconds>(ms);
577 std::ostringstream s;
578 s << sec.count() << '.';
579 auto f = s.fill('0');
580 auto w = s.width(3);
581 s << std::right << (ms - sec).count();
582 s.width(w);
583 s.fill(f);
584 s << 's';
585 return s.str();
586}
587
588std::string llkFormat(seconds s) {
589 return std::to_string(s.count()) + 's';
590}
591
592std::string llkFormat(bool flag) {
593 return flag ? "true" : "false";
594}
595
596std::string llkFormat(const std::unordered_set<std::string>& blacklist) {
597 std::string ret;
598 for (auto entry : blacklist) {
599 if (ret.size()) {
600 ret += ",";
601 }
602 ret += entry;
603 }
604 return ret;
605}
606
607// We only officially support comma separators, but wetware being what they
608// are will take some liberty and I do not believe they should be punished.
Mark Salyzynacecaf72018-08-10 08:15:57 -0700609std::unordered_set<std::string> llkSplit(const std::string& s) {
Mark Salyzynf089e142018-02-20 10:47:40 -0800610 std::unordered_set<std::string> result;
611
Mark Salyzynacecaf72018-08-10 08:15:57 -0700612 // Special case, allow boolean false to empty the list, otherwise expected
613 // source of input from android::base::GetProperty will supply the default
614 // value on empty content in the property.
615 if (s == "false") return result;
616
Mark Salyzynf089e142018-02-20 10:47:40 -0800617 size_t base = 0;
Mark Salyzynacecaf72018-08-10 08:15:57 -0700618 while (s.size() > base) {
619 auto found = s.find_first_of(", \t:", base);
620 // Only emplace content, empty entries are not an option
621 if (found != base) result.emplace(s.substr(base, found - base));
Mark Salyzynf089e142018-02-20 10:47:40 -0800622 if (found == s.npos) break;
623 base = found + 1;
624 }
625 return result;
626}
627
628bool llkSkipName(const std::string& name,
629 const std::unordered_set<std::string>& blacklist = llkBlacklistProcess) {
630 if ((name.size() == 0) || (blacklist.size() == 0)) {
631 return false;
632 }
633
634 return blacklist.find(name) != blacklist.end();
635}
636
637bool llkSkipPid(pid_t pid) {
638 return llkSkipName(std::to_string(pid), llkBlacklistProcess);
639}
640
641bool llkSkipPpid(pid_t ppid) {
642 return llkSkipName(std::to_string(ppid), llkBlacklistParent);
643}
644
645bool llkSkipUid(uid_t uid) {
646 // Match by number?
647 if (llkSkipName(std::to_string(uid), llkBlacklistUid)) {
648 return true;
649 }
650
651 // Match by name?
652 auto pwd = ::getpwuid(uid);
653 return (pwd != nullptr) && __predict_true(pwd->pw_name != nullptr) &&
654 __predict_true(pwd->pw_name[0] != '\0') && llkSkipName(pwd->pw_name, llkBlacklistUid);
655}
656
657bool getValidTidDir(dirent* dp, std::string* piddir) {
658 if (!::isdigit(dp->d_name[0])) {
659 return false;
660 }
661
662 // Corner case can not happen in reality b/c of above ::isdigit check
663 if (__predict_false(dp->d_type != DT_DIR)) {
664 if (__predict_false(dp->d_type == DT_UNKNOWN)) { // can't b/c procfs
665 struct stat st;
666 *piddir = procdir;
667 *piddir += dp->d_name;
668 return (lstat(piddir->c_str(), &st) == 0) && (st.st_mode & S_IFDIR);
669 }
670 return false;
671 }
672
673 *piddir = procdir;
674 *piddir += dp->d_name;
675 return true;
676}
677
678bool llkIsMonitorState(char state) {
679 return (state == 'Z') || (state == 'D');
680}
681
682// returns -1 if not found
683long long getSchedValue(const std::string& schedString, const char* key) {
684 auto pos = schedString.find(key);
685 if (pos == std::string::npos) {
686 return -1;
687 }
688 pos = schedString.find(':', pos);
689 if (__predict_false(pos == std::string::npos)) {
690 return -1;
691 }
692 while ((++pos < schedString.size()) && ::isblank(schedString[pos])) {
693 ;
694 }
695 long long ret;
696 if (!android::base::ParseInt(schedString.substr(pos), &ret, static_cast<long long>(0))) {
697 return -1;
698 }
699 return ret;
700}
701
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700702#ifdef __PTRACE_ENABLED__
703bool llkCheckStack(proc* procp, const std::string& piddir) {
704 if (llkCheckStackSymbols.empty()) return false;
705 if (procp->state == 'Z') { // No brains for Zombies
706 procp->stack = -1;
707 procp->count_stack = 0ms;
708 return false;
709 }
710
711 // Don't check process that are known to block ptrace, save sepolicy noise.
712 if (llkSkipName(std::to_string(procp->pid), llkBlacklistStack)) return false;
713 if (llkSkipName(procp->getComm(), llkBlacklistStack)) return false;
714 if (llkSkipName(procp->getCmdline(), llkBlacklistStack)) return false;
Mark Salyzyne81ede82018-10-22 15:52:32 -0700715 if (llkSkipName(android::base::Basename(procp->getCmdline()), llkBlacklistStack)) return false;
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700716
717 auto kernel_stack = ReadFile(piddir + "/stack");
718 if (kernel_stack.empty()) {
719 LOG(INFO) << piddir << "/stack empty comm=" << procp->getComm()
720 << " cmdline=" << procp->getCmdline();
721 return false;
722 }
723 // A scheduling incident that should not reset count_stack
724 if (kernel_stack.find(" cpu_worker_pools+0x") != std::string::npos) return false;
725 char idx = -1;
726 char match = -1;
727 for (const auto& stack : llkCheckStackSymbols) {
728 if (++idx < 0) break;
Mark Salyzynbb1256a2018-10-18 14:39:27 -0700729 if ((kernel_stack.find(" "s + stack + "+0x") != std::string::npos) ||
730 (kernel_stack.find(" "s + stack + ".cfi+0x") != std::string::npos)) {
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700731 match = idx;
732 break;
733 }
734 }
735 if (procp->stack != match) {
736 procp->stack = match;
737 procp->count_stack = 0ms;
738 return false;
739 }
740 if (match == char(-1)) return false;
741 procp->count_stack += llkCycle;
742 return procp->count_stack >= llkStateTimeoutMs[llkStateStack];
743}
744#endif
745
Mark Salyzynf089e142018-02-20 10:47:40 -0800746// Primary ABA mitigation watching last time schedule activity happened
747void llkCheckSchedUpdate(proc* procp, const std::string& piddir) {
748 // Audit finds /proc/<tid>/sched is just over 1K, and
749 // is rarely larger than 2K, even less on Android.
750 // For example, the "se.avg.lastUpdateTime" field we are
751 // interested in typically within the primary set in
752 // the first 1K.
753 //
754 // Proc entries can not be read >1K atomically via libbase,
755 // but if there are problems we assume at least a few
756 // samples of reads occur before we take any real action.
757 std::string schedString = ReadFile(piddir + "/sched");
758 if (schedString.size() == 0) {
759 // /schedstat is not as standardized, but in 3.1+
760 // Android devices, the third field is nr_switches
761 // from /sched:
762 schedString = ReadFile(piddir + "/schedstat");
763 if (schedString.size() == 0) {
764 return;
765 }
766 auto val = static_cast<unsigned long long>(-1);
767 if (((::sscanf(schedString.c_str(), "%*d %*d %llu", &val)) == 1) &&
768 (val != static_cast<unsigned long long>(-1)) && (val != 0) &&
769 (val != procp->nrSwitches)) {
770 procp->nrSwitches = val;
771 procp->count = 0ms;
Mark Salyzynafd66f22018-03-19 15:16:29 -0700772 procp->killed = !llkTestWithKill;
Mark Salyzynf089e142018-02-20 10:47:40 -0800773 }
774 return;
775 }
776
777 auto val = getSchedValue(schedString, "\nse.avg.lastUpdateTime");
778 if (val == -1) {
779 val = getSchedValue(schedString, "\nse.svg.last_update_time");
780 }
781 if (val != -1) {
782 auto schedUpdate = nanoseconds(val);
783 if (schedUpdate != procp->schedUpdate) {
784 procp->schedUpdate = schedUpdate;
785 procp->count = 0ms;
Mark Salyzynafd66f22018-03-19 15:16:29 -0700786 procp->killed = !llkTestWithKill;
Mark Salyzynf089e142018-02-20 10:47:40 -0800787 }
788 }
789
790 val = getSchedValue(schedString, "\nnr_switches");
791 if (val != -1) {
792 if (static_cast<uint64_t>(val) != procp->nrSwitches) {
793 procp->nrSwitches = val;
794 procp->count = 0ms;
Mark Salyzynafd66f22018-03-19 15:16:29 -0700795 procp->killed = !llkTestWithKill;
Mark Salyzynf089e142018-02-20 10:47:40 -0800796 }
797 }
798}
799
800void llkLogConfig(void) {
801 LOG(INFO) << "ro.config.low_ram=" << llkFormat(llkLowRam) << "\n"
802 << LLK_ENABLE_PROPERTY "=" << llkFormat(llkEnable) << "\n"
803 << KHT_ENABLE_PROPERTY "=" << llkFormat(khtEnable) << "\n"
804 << LLK_MLOCKALL_PROPERTY "=" << llkFormat(llkMlockall) << "\n"
Mark Salyzynafd66f22018-03-19 15:16:29 -0700805 << LLK_KILLTEST_PROPERTY "=" << llkFormat(llkTestWithKill) << "\n"
Mark Salyzynf089e142018-02-20 10:47:40 -0800806 << KHT_TIMEOUT_PROPERTY "=" << llkFormat(khtTimeout) << "\n"
807 << LLK_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkTimeoutMs) << "\n"
808 << LLK_D_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkStateTimeoutMs[llkStateD]) << "\n"
809 << LLK_Z_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkStateTimeoutMs[llkStateZ]) << "\n"
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700810#ifdef __PTRACE_ENABLED__
811 << LLK_STACK_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkStateTimeoutMs[llkStateStack])
812 << "\n"
813#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800814 << LLK_CHECK_MS_PROPERTY "=" << llkFormat(llkCheckMs) << "\n"
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700815#ifdef __PTRACE_ENABLED__
816 << LLK_CHECK_STACK_PROPERTY "=" << llkFormat(llkCheckStackSymbols) << "\n"
817 << LLK_BLACKLIST_STACK_PROPERTY "=" << llkFormat(llkBlacklistStack) << "\n"
818#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800819 << LLK_BLACKLIST_PROCESS_PROPERTY "=" << llkFormat(llkBlacklistProcess) << "\n"
820 << LLK_BLACKLIST_PARENT_PROPERTY "=" << llkFormat(llkBlacklistParent) << "\n"
821 << LLK_BLACKLIST_UID_PROPERTY "=" << llkFormat(llkBlacklistUid);
822}
823
824void* llkThread(void* obj) {
Mark Salyzyn4832a8b2018-08-15 11:02:18 -0700825 prctl(PR_SET_DUMPABLE, 0);
826
Mark Salyzynf089e142018-02-20 10:47:40 -0800827 LOG(INFO) << "started";
828
829 std::string name = std::to_string(::gettid());
830 if (!llkSkipName(name)) {
831 llkBlacklistProcess.emplace(name);
832 }
833 name = static_cast<const char*>(obj);
834 prctl(PR_SET_NAME, name.c_str());
835 if (__predict_false(!llkSkipName(name))) {
836 llkBlacklistProcess.insert(name);
837 }
838 // No longer modifying llkBlacklistProcess.
839 llkRunning = true;
840 llkLogConfig();
841 while (llkRunning) {
842 ::usleep(duration_cast<microseconds>(llkCheck(true)).count());
843 }
844 // NOTREACHED
845 LOG(INFO) << "exiting";
846 return nullptr;
847}
848
849} // namespace
850
851milliseconds llkCheck(bool checkRunning) {
852 if (!llkEnable || (checkRunning != llkRunning)) {
853 return milliseconds::max();
854 }
855
856 // Reset internal watchdog, which is a healthy engineering margin of
857 // double the maximum wait or cycle time for the mainloop that calls us.
858 //
859 // This alarm is effectively the live lock detection of llkd, as
860 // we understandably can not monitor ourselves otherwise.
861 ::alarm(duration_cast<seconds>(llkTimeoutMs * 2).count());
862
863 // kernel jiffy precision fastest acquisition
864 static timespec last;
865 timespec now;
866 ::clock_gettime(CLOCK_MONOTONIC_COARSE, &now);
867 auto ms = llkGetTimespecDiffMs(&last, &now);
868 if (ms < llkCycle) {
869 return llkCycle - ms;
870 }
871 last = now;
872
873 LOG(VERBOSE) << "opendir(\"" << procdir << "\")";
874 if (__predict_false(!llkTopDirectory)) {
875 // gid containing AID_READPROC required
876 llkTopDirectory.reset(procdir);
877 if (__predict_false(!llkTopDirectory)) {
878 // Most likely reason we could be here is a resource limit.
879 // Keep our processing down to a minimum, but not so low that
880 // we do not recover in a timely manner should the issue be
881 // transitory.
882 LOG(DEBUG) << "opendir(\"" << procdir << "\") failed";
883 return llkTimeoutMs;
884 }
885 }
886
887 for (auto& it : tids) {
888 it.second.updated = false;
889 }
890
891 auto prevUpdate = llkUpdate;
892 llkUpdate += ms;
893 ms -= llkCycle;
894 auto myPid = ::getpid();
895 auto myTid = ::gettid();
896 for (auto dp = llkTopDirectory.read(); dp != nullptr; dp = llkTopDirectory.read()) {
897 std::string piddir;
898
899 if (!getValidTidDir(dp, &piddir)) {
900 continue;
901 }
902
903 // Get the process tasks
904 std::string taskdir = piddir + "/task/";
905 int pid = -1;
906 LOG(VERBOSE) << "+opendir(\"" << taskdir << "\")";
907 dir taskDirectory(taskdir);
908 if (__predict_false(!taskDirectory)) {
909 LOG(DEBUG) << "+opendir(\"" << taskdir << "\") failed";
910 }
911 for (auto tp = taskDirectory.read(dir::task, dp); tp != nullptr;
912 tp = taskDirectory.read(dir::task)) {
913 if (!getValidTidDir(tp, &piddir)) {
914 continue;
915 }
916
917 // Get the process stat
918 std::string stat = ReadFile(piddir + "/stat");
919 if (stat.size() == 0) {
920 continue;
921 }
922 unsigned tid = -1;
923 char pdir[TASK_COMM_LEN + 1];
924 char state = '?';
925 unsigned ppid = -1;
926 unsigned utime = -1;
927 unsigned stime = -1;
928 int dummy;
929 pdir[0] = '\0';
930 // tid should not change value
931 auto match = ::sscanf(
932 stat.c_str(),
933 "%u (%" ___STRING(
934 TASK_COMM_LEN) "[^)]) %c %u %*d %*d %*d %*d %*d %*d %*d %*d %*d %u %u %d",
935 &tid, pdir, &state, &ppid, &utime, &stime, &dummy);
936 if (pid == -1) {
937 pid = tid;
938 }
939 LOG(VERBOSE) << "match " << match << ' ' << tid << " (" << pdir << ") " << state << ' '
940 << ppid << " ... " << utime << ' ' << stime << ' ' << dummy;
941 if (match != 7) {
942 continue;
943 }
944
945 auto procp = llkTidLookup(tid);
946 if (procp == nullptr) {
947 procp = llkTidAlloc(tid, pid, ppid, pdir, utime + stime, state);
948 } else {
949 // comm can change ...
950 procp->setComm(pdir);
951 procp->updated = true;
952 // pid/ppid/tid wrap?
953 if (((procp->update != prevUpdate) && (procp->update != llkUpdate)) ||
954 (procp->ppid != ppid) || (procp->pid != pid)) {
955 procp->reset();
956 } else if (procp->time != (utime + stime)) { // secondary ABA.
957 // watching utime+stime granularity jiffy
958 procp->state = '?';
959 }
960 procp->update = llkUpdate;
961 procp->pid = pid;
962 procp->ppid = ppid;
963 procp->time = utime + stime;
964 if (procp->state != state) {
965 procp->count = 0ms;
Mark Salyzynafd66f22018-03-19 15:16:29 -0700966 procp->killed = !llkTestWithKill;
Mark Salyzynf089e142018-02-20 10:47:40 -0800967 procp->state = state;
968 } else {
969 procp->count += llkCycle;
970 }
971 }
972
973 // Filter checks in intuitive order of CPU cost to evaluate
974 // If tid unique continue, if ppid or pid unique break
975
976 if (pid == myPid) {
977 break;
978 }
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700979#ifdef __PTRACE_ENABLED__
980 // if no stack monitoring, we can quickly exit here
981 if (!llkIsMonitorState(state) && llkCheckStackSymbols.empty()) {
Mark Salyzynf089e142018-02-20 10:47:40 -0800982 continue;
983 }
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700984#else
985 if (!llkIsMonitorState(state)) continue;
986#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800987 if ((tid == myTid) || llkSkipPid(tid)) {
988 continue;
989 }
990 if (llkSkipPpid(ppid)) {
991 break;
992 }
993
994 if (llkSkipName(procp->getComm())) {
995 continue;
996 }
997 if (llkSkipName(procp->getCmdline())) {
998 break;
999 }
Mark Salyzyne81ede82018-10-22 15:52:32 -07001000 if (llkSkipName(android::base::Basename(procp->getCmdline()))) {
1001 break;
1002 }
Mark Salyzynf089e142018-02-20 10:47:40 -08001003
1004 auto pprocp = llkTidLookup(ppid);
1005 if (pprocp == nullptr) {
1006 pprocp = llkTidAlloc(ppid, ppid, 0, "", 0, '?');
1007 }
Mark Salyzyne81ede82018-10-22 15:52:32 -07001008 if ((pprocp != nullptr) &&
1009 (llkSkipName(pprocp->getComm(), llkBlacklistParent) ||
1010 llkSkipName(pprocp->getCmdline(), llkBlacklistParent) ||
1011 llkSkipName(android::base::Basename(pprocp->getCmdline()), llkBlacklistParent))) {
Mark Salyzynf089e142018-02-20 10:47:40 -08001012 break;
1013 }
1014
1015 if ((llkBlacklistUid.size() != 0) && llkSkipUid(procp->getUid())) {
1016 continue;
1017 }
1018
1019 // ABA mitigation watching last time schedule activity happened
1020 llkCheckSchedUpdate(procp, piddir);
1021
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001022#ifdef __PTRACE_ENABLED__
1023 auto stuck = llkCheckStack(procp, piddir);
1024 if (llkIsMonitorState(state)) {
1025 if (procp->count >= llkStateTimeoutMs[(state == 'Z') ? llkStateZ : llkStateD]) {
1026 stuck = true;
1027 } else if (procp->count != 0ms) {
1028 LOG(VERBOSE) << state << ' ' << llkFormat(procp->count) << ' ' << ppid << "->"
1029 << pid << "->" << tid << ' ' << procp->getComm();
1030 }
1031 }
1032 if (!stuck) continue;
1033#else
1034 if (procp->count >= llkStateTimeoutMs[(state == 'Z') ? llkStateZ : llkStateD]) {
1035 if (procp->count != 0ms) {
1036 LOG(VERBOSE) << state << ' ' << llkFormat(procp->count) << ' ' << ppid << "->"
1037 << pid << "->" << tid << ' ' << procp->getComm();
1038 }
Mark Salyzynf089e142018-02-20 10:47:40 -08001039 continue;
1040 }
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001041#endif
Mark Salyzynf089e142018-02-20 10:47:40 -08001042
1043 // We have to kill it to determine difference between live lock
1044 // and persistent state blocked on a resource. Is there something
1045 // wrong with a process that has no forward scheduling progress in
1046 // Z or D? Yes, generally means improper accounting in the
1047 // process, but not always ...
1048 //
1049 // Whomever we hit with a test kill must accept the Android
1050 // Aphorism that everything can be burned to the ground and
1051 // must survive.
1052 if (procp->killed == false) {
1053 procp->killed = true;
1054 // confirm: re-read uid before committing to a panic.
1055 procp->uid = -1;
1056 switch (state) {
1057 case 'Z': // kill ppid to free up a Zombie
1058 // Killing init will kernel panic without diagnostics
1059 // so skip right to controlled kernel panic with
1060 // diagnostics.
1061 if (ppid == initPid) {
1062 break;
1063 }
1064 LOG(WARNING) << "Z " << llkFormat(procp->count) << ' ' << ppid << "->"
1065 << pid << "->" << tid << ' ' << procp->getComm() << " [kill]";
1066 if ((llkKillOneProcess(pprocp, procp) >= 0) ||
1067 (llkKillOneProcess(ppid, procp) >= 0)) {
1068 continue;
1069 }
1070 break;
1071
1072 case 'D': // kill tid to free up an uninterruptible D
1073 // If ABA is doing its job, we would not need or
1074 // want the following. Test kill is a Hail Mary
1075 // to make absolutely sure there is no forward
1076 // scheduling progress. The cost when ABA is
1077 // not working is we kill a process that likes to
1078 // stay in 'D' state, instead of panicing the
1079 // kernel (worse).
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001080 default:
1081 LOG(WARNING) << state << ' ' << llkFormat(procp->count) << ' ' << pid
1082 << "->" << tid << ' ' << procp->getComm() << " [kill]";
Mark Salyzynf089e142018-02-20 10:47:40 -08001083 if ((llkKillOneProcess(llkTidLookup(pid), procp) >= 0) ||
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001084 (llkKillOneProcess(pid, state, tid) >= 0) ||
Mark Salyzynf089e142018-02-20 10:47:40 -08001085 (llkKillOneProcess(procp, procp) >= 0) ||
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001086 (llkKillOneProcess(tid, state, tid) >= 0)) {
Mark Salyzynf089e142018-02-20 10:47:40 -08001087 continue;
1088 }
1089 break;
1090 }
1091 }
1092 // We are here because we have confirmed kernel live-lock
1093 LOG(ERROR) << state << ' ' << llkFormat(procp->count) << ' ' << ppid << "->" << pid
1094 << "->" << tid << ' ' << procp->getComm() << " [panic]";
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001095 llkPanicKernel(true, tid,
1096 (state == 'Z') ? "zombie" : (state == 'D') ? "driver" : "sleeping");
Mark Salyzynf089e142018-02-20 10:47:40 -08001097 }
1098 LOG(VERBOSE) << "+closedir()";
1099 }
1100 llkTopDirectory.rewind();
1101 LOG(VERBOSE) << "closedir()";
1102
1103 // garbage collection of old process references
1104 for (auto p = tids.begin(); p != tids.end();) {
1105 if (!p->second.updated) {
1106 IF_ALOG(LOG_VERBOSE, LOG_TAG) {
1107 std::string ppidCmdline = llkProcGetName(p->second.ppid, nullptr, nullptr);
1108 if (ppidCmdline.size()) {
1109 ppidCmdline = "(" + ppidCmdline + ")";
1110 }
1111 std::string pidCmdline;
1112 if (p->second.pid != p->second.tid) {
1113 pidCmdline = llkProcGetName(p->second.pid, nullptr, p->second.getCmdline());
1114 if (pidCmdline.size()) {
1115 pidCmdline = "(" + pidCmdline + ")";
1116 }
1117 }
1118 std::string tidCmdline =
1119 llkProcGetName(p->second.tid, p->second.getComm(), p->second.getCmdline());
1120 if (tidCmdline.size()) {
1121 tidCmdline = "(" + tidCmdline + ")";
1122 }
1123 LOG(VERBOSE) << "thread " << p->second.ppid << ppidCmdline << "->" << p->second.pid
1124 << pidCmdline << "->" << p->second.tid << tidCmdline << " removed";
1125 }
1126 p = tids.erase(p);
1127 } else {
1128 ++p;
1129 }
1130 }
1131 if (__predict_false(tids.empty())) {
1132 llkTopDirectory.reset();
1133 }
1134
1135 llkCycle = llkCheckMs;
1136
1137 timespec end;
1138 ::clock_gettime(CLOCK_MONOTONIC_COARSE, &end);
1139 auto milli = llkGetTimespecDiffMs(&now, &end);
1140 LOG((milli > 10s) ? ERROR : (milli > 1s) ? WARNING : VERBOSE) << "sample " << llkFormat(milli);
1141
1142 // cap to minimum sleep for 1 second since last cycle
1143 if (llkCycle < (ms + 1s)) {
1144 return 1s;
1145 }
1146 return llkCycle - ms;
1147}
1148
1149unsigned llkCheckMilliseconds() {
1150 return duration_cast<milliseconds>(llkCheck()).count();
1151}
1152
1153bool llkInit(const char* threadname) {
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001154 auto debuggable = android::base::GetBoolProperty("ro.debuggable", false);
Mark Salyzynf089e142018-02-20 10:47:40 -08001155 llkLowRam = android::base::GetBoolProperty("ro.config.low_ram", false);
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001156 if (!LLK_ENABLE_DEFAULT && debuggable) {
Mark Salyzynd035dbb2018-03-26 08:23:00 -07001157 llkEnable = android::base::GetProperty(LLK_ENABLE_PROPERTY, "eng") == "eng";
1158 khtEnable = android::base::GetProperty(KHT_ENABLE_PROPERTY, "eng") == "eng";
1159 }
Mark Salyzynf089e142018-02-20 10:47:40 -08001160 llkEnable = android::base::GetBoolProperty(LLK_ENABLE_PROPERTY, llkEnable);
1161 if (llkEnable && !llkTopDirectory.reset(procdir)) {
1162 // Most likely reason we could be here is llkd was started
1163 // incorrectly without the readproc permissions. Keep our
1164 // processing down to a minimum.
1165 llkEnable = false;
1166 }
1167 khtEnable = android::base::GetBoolProperty(KHT_ENABLE_PROPERTY, khtEnable);
1168 llkMlockall = android::base::GetBoolProperty(LLK_MLOCKALL_PROPERTY, llkMlockall);
Mark Salyzynafd66f22018-03-19 15:16:29 -07001169 llkTestWithKill = android::base::GetBoolProperty(LLK_KILLTEST_PROPERTY, llkTestWithKill);
Mark Salyzynf089e142018-02-20 10:47:40 -08001170 // if LLK_TIMOUT_MS_PROPERTY was not set, we will use a set
1171 // KHT_TIMEOUT_PROPERTY as co-operative guidance for the default value.
1172 khtTimeout = GetUintProperty(KHT_TIMEOUT_PROPERTY, khtTimeout);
1173 if (khtTimeout == 0s) {
1174 khtTimeout = duration_cast<seconds>(llkTimeoutMs * (1 + LLK_CHECKS_PER_TIMEOUT_DEFAULT) /
1175 LLK_CHECKS_PER_TIMEOUT_DEFAULT);
1176 }
1177 llkTimeoutMs =
1178 khtTimeout * LLK_CHECKS_PER_TIMEOUT_DEFAULT / (1 + LLK_CHECKS_PER_TIMEOUT_DEFAULT);
1179 llkTimeoutMs = GetUintProperty(LLK_TIMEOUT_MS_PROPERTY, llkTimeoutMs);
1180 llkValidate(); // validate llkTimeoutMs, llkCheckMs and llkCycle
1181 llkStateTimeoutMs[llkStateD] = GetUintProperty(LLK_D_TIMEOUT_MS_PROPERTY, llkTimeoutMs);
1182 llkStateTimeoutMs[llkStateZ] = GetUintProperty(LLK_Z_TIMEOUT_MS_PROPERTY, llkTimeoutMs);
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001183#ifdef __PTRACE_ENABLED__
1184 llkStateTimeoutMs[llkStateStack] = GetUintProperty(LLK_STACK_TIMEOUT_MS_PROPERTY, llkTimeoutMs);
1185#endif
Mark Salyzynf089e142018-02-20 10:47:40 -08001186 llkCheckMs = GetUintProperty(LLK_CHECK_MS_PROPERTY, llkCheckMs);
1187 llkValidate(); // validate all (effectively minus llkTimeoutMs)
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001188#ifdef __PTRACE_ENABLED__
1189 if (debuggable) {
1190 llkCheckStackSymbols = llkSplit(
1191 android::base::GetProperty(LLK_CHECK_STACK_PROPERTY, LLK_CHECK_STACK_DEFAULT));
1192 }
1193 std::string defaultBlacklistStack(LLK_BLACKLIST_STACK_DEFAULT);
1194 if (!debuggable) defaultBlacklistStack += ",logd,/system/bin/logd";
1195 llkBlacklistStack = llkSplit(
1196 android::base::GetProperty(LLK_BLACKLIST_STACK_PROPERTY, defaultBlacklistStack));
1197#endif
Mark Salyzynf089e142018-02-20 10:47:40 -08001198 std::string defaultBlacklistProcess(
1199 std::to_string(kernelPid) + "," + std::to_string(initPid) + "," +
1200 std::to_string(kthreaddPid) + "," + std::to_string(::getpid()) + "," +
1201 std::to_string(::gettid()) + "," LLK_BLACKLIST_PROCESS_DEFAULT);
1202 if (threadname) {
Mark Salyzyn52e54a62018-08-07 08:13:13 -07001203 defaultBlacklistProcess += ","s + threadname;
Mark Salyzynf089e142018-02-20 10:47:40 -08001204 }
1205 for (int cpu = 1; cpu < get_nprocs_conf(); ++cpu) {
1206 defaultBlacklistProcess += ",[watchdog/" + std::to_string(cpu) + "]";
1207 }
1208 defaultBlacklistProcess =
1209 android::base::GetProperty(LLK_BLACKLIST_PROCESS_PROPERTY, defaultBlacklistProcess);
1210 llkBlacklistProcess = llkSplit(defaultBlacklistProcess);
1211 if (!llkSkipName("[khungtaskd]")) { // ALWAYS ignore as special
1212 llkBlacklistProcess.emplace("[khungtaskd]");
1213 }
1214 llkBlacklistParent = llkSplit(android::base::GetProperty(
1215 LLK_BLACKLIST_PARENT_PROPERTY, std::to_string(kernelPid) + "," + std::to_string(kthreaddPid) +
1216 "," LLK_BLACKLIST_PARENT_DEFAULT));
1217 llkBlacklistUid =
1218 llkSplit(android::base::GetProperty(LLK_BLACKLIST_UID_PROPERTY, LLK_BLACKLIST_UID_DEFAULT));
1219
1220 // internal watchdog
1221 ::signal(SIGALRM, llkAlarmHandler);
1222
1223 // kernel hung task configuration? Otherwise leave it as-is
1224 if (khtEnable) {
1225 // EUID must be AID_ROOT to write to /proc/sys/kernel/ nodes, there
1226 // are no capability overrides. For security reasons we do not want
1227 // to run as AID_ROOT. We may not be able to write them successfully,
1228 // we will try, but the least we can do is read the values back to
1229 // confirm expectations and report whether configured or not.
1230 auto configured = llkWriteStringToFileConfirm(std::to_string(khtTimeout.count()),
1231 "/proc/sys/kernel/hung_task_timeout_secs");
1232 if (configured) {
1233 llkWriteStringToFile("65535", "/proc/sys/kernel/hung_task_warnings");
1234 llkWriteStringToFile("65535", "/proc/sys/kernel/hung_task_check_count");
1235 configured = llkWriteStringToFileConfirm("1", "/proc/sys/kernel/hung_task_panic");
1236 }
1237 if (configured) {
1238 LOG(INFO) << "[khungtaskd] configured";
1239 } else {
1240 LOG(WARNING) << "[khungtaskd] not configurable";
1241 }
1242 }
1243
1244 bool logConfig = true;
1245 if (llkEnable) {
1246 if (llkMlockall &&
1247 // MCL_ONFAULT pins pages as they fault instead of loading
1248 // everything immediately all at once. (Which would be bad,
1249 // because as of this writing, we have a lot of mapped pages we
1250 // never use.) Old kernels will see MCL_ONFAULT and fail with
1251 // EINVAL; we ignore this failure.
1252 //
1253 // N.B. read the man page for mlockall. MCL_CURRENT | MCL_ONFAULT
1254 // pins ⊆ MCL_CURRENT, converging to just MCL_CURRENT as we fault
1255 // in pages.
1256
1257 // CAP_IPC_LOCK required
1258 mlockall(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT) && (errno != EINVAL)) {
1259 PLOG(WARNING) << "mlockall failed ";
1260 }
1261
1262 if (threadname) {
1263 pthread_attr_t attr;
1264
1265 if (!pthread_attr_init(&attr)) {
1266 sched_param param;
1267
1268 memset(&param, 0, sizeof(param));
1269 pthread_attr_setschedparam(&attr, &param);
1270 pthread_attr_setschedpolicy(&attr, SCHED_BATCH);
1271 if (!pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED)) {
1272 pthread_t thread;
1273 if (!pthread_create(&thread, &attr, llkThread, const_cast<char*>(threadname))) {
1274 // wait a second for thread to start
1275 for (auto retry = 50; retry && !llkRunning; --retry) {
1276 ::usleep(20000);
1277 }
1278 logConfig = !llkRunning; // printed in llkd context?
1279 } else {
1280 LOG(ERROR) << "failed to spawn llkd thread";
1281 }
1282 } else {
1283 LOG(ERROR) << "failed to detach llkd thread";
1284 }
1285 pthread_attr_destroy(&attr);
1286 } else {
1287 LOG(ERROR) << "failed to allocate attibutes for llkd thread";
1288 }
1289 }
1290 } else {
1291 LOG(DEBUG) << "[khungtaskd] left unconfigured";
1292 }
1293 if (logConfig) {
1294 llkLogConfig();
1295 }
1296
1297 return llkEnable;
1298}