blob: eec3f90a9aa656c5ba2fc0e27b14e9a3516d46d5 [file] [log] [blame]
Mark Salyzynf089e142018-02-20 10:47:40 -08001/*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "llkd.h"
18
19#include <ctype.h>
20#include <dirent.h> // opendir() and readdir()
21#include <errno.h>
22#include <fcntl.h>
23#include <pthread.h>
24#include <pwd.h> // getpwuid()
25#include <signal.h>
26#include <stdint.h>
27#include <sys/cdefs.h> // ___STRING, __predict_true() and _predict_false()
28#include <sys/mman.h> // mlockall()
29#include <sys/prctl.h>
30#include <sys/stat.h> // lstat()
31#include <sys/syscall.h> // __NR_getdents64
32#include <sys/sysinfo.h> // get_nprocs_conf()
33#include <sys/types.h>
34#include <time.h>
35#include <unistd.h>
36
37#include <chrono>
38#include <ios>
39#include <sstream>
40#include <string>
41#include <unordered_map>
42#include <unordered_set>
43
44#include <android-base/file.h>
45#include <android-base/logging.h>
46#include <android-base/parseint.h>
47#include <android-base/properties.h>
48#include <android-base/strings.h>
49#include <cutils/android_get_control_file.h>
50#include <log/log_main.h>
51
52#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
53
54#define TASK_COMM_LEN 16 // internal kernel, not uapi, from .../linux/include/linux/sched.h
55
56using namespace std::chrono_literals;
57using namespace std::chrono;
Mark Salyzyn52e54a62018-08-07 08:13:13 -070058using namespace std::literals;
Mark Salyzynf089e142018-02-20 10:47:40 -080059
60namespace {
61
62constexpr pid_t kernelPid = 0;
63constexpr pid_t initPid = 1;
64constexpr pid_t kthreaddPid = 2;
65
66constexpr char procdir[] = "/proc/";
67
68// Configuration
69milliseconds llkUpdate; // last check ms signature
70milliseconds llkCycle; // ms to next thread check
71bool llkEnable = LLK_ENABLE_DEFAULT; // llk daemon enabled
72bool llkRunning = false; // thread is running
73bool llkMlockall = LLK_MLOCKALL_DEFAULT; // run mlocked
Mark Salyzynafd66f22018-03-19 15:16:29 -070074bool llkTestWithKill = LLK_KILLTEST_DEFAULT; // issue test kills
Mark Salyzynf089e142018-02-20 10:47:40 -080075milliseconds llkTimeoutMs = LLK_TIMEOUT_MS_DEFAULT; // default timeout
Mark Salyzyn96505fa2018-08-07 08:13:13 -070076enum { // enum of state indexes
77 llkStateD, // Persistent 'D' state
78 llkStateZ, // Persistent 'Z' state
79#ifdef __PTRACE_ENABLED__ // Extra privileged states
80 llkStateStack, // stack signature
81#endif // End of extra privilege
82 llkNumStates, // Maxumum number of states
83}; // state indexes
Mark Salyzynf089e142018-02-20 10:47:40 -080084milliseconds llkStateTimeoutMs[llkNumStates]; // timeout override for each detection state
85milliseconds llkCheckMs; // checking interval to inspect any
86 // persistent live-locked states
87bool llkLowRam; // ro.config.low_ram
Mark Salyzynbd7c8562018-10-31 10:02:08 -070088bool llkEnableSysrqT = LLK_ENABLE_SYSRQ_T_DEFAULT; // sysrq stack trace dump
Mark Salyzynf089e142018-02-20 10:47:40 -080089bool khtEnable = LLK_ENABLE_DEFAULT; // [khungtaskd] panic
90// [khungtaskd] should have a timeout beyond the granularity of llkTimeoutMs.
91// Provides a wide angle of margin b/c khtTimeout is also its granularity.
92seconds khtTimeout = duration_cast<seconds>(llkTimeoutMs * (1 + LLK_CHECKS_PER_TIMEOUT_DEFAULT) /
93 LLK_CHECKS_PER_TIMEOUT_DEFAULT);
Mark Salyzyn96505fa2018-08-07 08:13:13 -070094#ifdef __PTRACE_ENABLED__
95// list of stack symbols to search for persistence.
96std::unordered_set<std::string> llkCheckStackSymbols;
97#endif
Mark Salyzynf089e142018-02-20 10:47:40 -080098
99// Blacklist variables, initialized with comma separated lists of high false
100// positive and/or dangerous references, e.g. without self restart, for pid,
101// ppid, name and uid:
102
103// list of pids, or tids or names to skip. kernel pid (0), init pid (1),
104// [kthreadd] pid (2), ourselves, "init", "[kthreadd]", "lmkd", "llkd" or
105// combinations of watchdogd in kernel and user space.
106std::unordered_set<std::string> llkBlacklistProcess;
107// list of parent pids, comm or cmdline names to skip. default:
108// kernel pid (0), [kthreadd] (2), or ourselves, enforced and implied
109std::unordered_set<std::string> llkBlacklistParent;
110// list of uids, and uid names, to skip, default nothing
111std::unordered_set<std::string> llkBlacklistUid;
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700112#ifdef __PTRACE_ENABLED__
113// list of names to skip stack checking. "init", "lmkd", "llkd", "keystore" or
114// "logd" (if not userdebug).
115std::unordered_set<std::string> llkBlacklistStack;
116#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800117
118class dir {
119 public:
120 enum level { proc, task, numLevels };
121
122 private:
123 int fd;
124 size_t available_bytes;
125 dirent* next;
126 // each directory level picked to be just north of 4K in size
127 static constexpr size_t buffEntries = 15;
128 static dirent buff[numLevels][buffEntries];
129
130 bool fill(enum level index) {
131 if (index >= numLevels) return false;
132 if (available_bytes != 0) return true;
133 if (__predict_false(fd < 0)) return false;
134 // getdents64 has no libc wrapper
135 auto rc = TEMP_FAILURE_RETRY(syscall(__NR_getdents64, fd, buff[index], sizeof(buff[0]), 0));
136 if (rc <= 0) return false;
137 available_bytes = rc;
138 next = buff[index];
139 return true;
140 }
141
142 public:
143 dir() : fd(-1), available_bytes(0), next(nullptr) {}
144
145 explicit dir(const char* directory)
146 : fd(__predict_true(directory != nullptr)
147 ? ::open(directory, O_CLOEXEC | O_DIRECTORY | O_RDONLY)
148 : -1),
149 available_bytes(0),
150 next(nullptr) {}
151
152 explicit dir(const std::string&& directory)
153 : fd(::open(directory.c_str(), O_CLOEXEC | O_DIRECTORY | O_RDONLY)),
154 available_bytes(0),
155 next(nullptr) {}
156
157 explicit dir(const std::string& directory)
158 : fd(::open(directory.c_str(), O_CLOEXEC | O_DIRECTORY | O_RDONLY)),
159 available_bytes(0),
160 next(nullptr) {}
161
162 // Don't need any copy or move constructors.
163 explicit dir(const dir& c) = delete;
164 explicit dir(dir& c) = delete;
165 explicit dir(dir&& c) = delete;
166
167 ~dir() {
168 if (fd >= 0) {
169 ::close(fd);
170 }
171 }
172
173 operator bool() const { return fd >= 0; }
174
175 void reset(void) {
176 if (fd >= 0) {
177 ::close(fd);
178 fd = -1;
179 available_bytes = 0;
180 next = nullptr;
181 }
182 }
183
184 dir& reset(const char* directory) {
185 reset();
186 // available_bytes will _always_ be zero here as its value is
187 // intimately tied to fd < 0 or not.
188 fd = ::open(directory, O_CLOEXEC | O_DIRECTORY | O_RDONLY);
189 return *this;
190 }
191
192 void rewind(void) {
193 if (fd >= 0) {
194 ::lseek(fd, off_t(0), SEEK_SET);
195 available_bytes = 0;
196 next = nullptr;
197 }
198 }
199
200 dirent* read(enum level index = proc, dirent* def = nullptr) {
201 if (!fill(index)) return def;
202 auto ret = next;
203 available_bytes -= next->d_reclen;
204 next = reinterpret_cast<dirent*>(reinterpret_cast<char*>(next) + next->d_reclen);
205 return ret;
206 }
207} llkTopDirectory;
208
209dirent dir::buff[dir::numLevels][dir::buffEntries];
210
211// helper functions
212
213bool llkIsMissingExeLink(pid_t tid) {
214 char c;
215 // CAP_SYS_PTRACE is required to prevent ret == -1, but ENOENT is signal
216 auto ret = ::readlink((procdir + std::to_string(tid) + "/exe").c_str(), &c, sizeof(c));
217 return (ret == -1) && (errno == ENOENT);
218}
219
220// Common routine where caller accepts empty content as error/passthrough.
221// Reduces the churn of reporting read errors in the callers.
222std::string ReadFile(std::string&& path) {
223 std::string content;
224 if (!android::base::ReadFileToString(path, &content)) {
225 PLOG(DEBUG) << "Read " << path << " failed";
226 content = "";
227 }
228 return content;
229}
230
231std::string llkProcGetName(pid_t tid, const char* node = "/cmdline") {
232 std::string content = ReadFile(procdir + std::to_string(tid) + node);
233 static constexpr char needles[] = " \t\r\n"; // including trailing nul
234 auto pos = content.find_first_of(needles, 0, sizeof(needles));
235 if (pos != std::string::npos) {
236 content.erase(pos);
237 }
238 return content;
239}
240
241uid_t llkProcGetUid(pid_t tid) {
242 // Get the process' uid. The following read from /status is admittedly
243 // racy, prone to corruption due to shape-changes. The consequences are
244 // not catastrophic as we sample a few times before taking action.
245 //
246 // If /loginuid worked on reliably, or on Android (all tasks report -1)...
247 // Android lmkd causes /cgroup to contain memory:/<dom>/uid_<uid>/pid_<pid>
248 // which is tighter, but also not reliable.
249 std::string content = ReadFile(procdir + std::to_string(tid) + "/status");
250 static constexpr char Uid[] = "\nUid:";
251 auto pos = content.find(Uid);
252 if (pos == std::string::npos) {
253 return -1;
254 }
255 pos += ::strlen(Uid);
256 while ((pos < content.size()) && ::isblank(content[pos])) {
257 ++pos;
258 }
259 content.erase(0, pos);
260 for (pos = 0; (pos < content.size()) && ::isdigit(content[pos]); ++pos) {
261 ;
262 }
263 // Content of form 'Uid: 0 0 0 0', newline is error
264 if ((pos >= content.size()) || !::isblank(content[pos])) {
265 return -1;
266 }
267 content.erase(pos);
268 uid_t ret;
Tom Cherrye0bc5a92018-10-05 14:29:47 -0700269 if (!android::base::ParseUint(content, &ret, uid_t(0))) {
Mark Salyzynf089e142018-02-20 10:47:40 -0800270 return -1;
271 }
272 return ret;
273}
274
275struct proc {
276 pid_t tid; // monitored thread id (in Z or D state).
277 nanoseconds schedUpdate; // /proc/<tid>/sched "se.avg.lastUpdateTime",
278 uint64_t nrSwitches; // /proc/<tid>/sched "nr_switches" for
279 // refined ABA problem detection, determine
280 // forward scheduling progress.
281 milliseconds update; // llkUpdate millisecond signature of last.
282 milliseconds count; // duration in state.
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700283#ifdef __PTRACE_ENABLED__ // Privileged state checking
284 milliseconds count_stack; // duration where stack is stagnant.
285#endif // End privilege
Mark Salyzynf089e142018-02-20 10:47:40 -0800286 pid_t pid; // /proc/<pid> before iterating through
287 // /proc/<pid>/task/<tid> for threads.
288 pid_t ppid; // /proc/<tid>/stat field 4 parent pid.
289 uid_t uid; // /proc/<tid>/status Uid: field.
290 unsigned time; // sum of /proc/<tid>/stat field 14 utime &
291 // 15 stime for coarse ABA problem detection.
292 std::string cmdline; // cached /cmdline content
293 char state; // /proc/<tid>/stat field 3: Z or D
294 // (others we do not monitor: S, R, T or ?)
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700295#ifdef __PTRACE_ENABLED__ // Privileged state checking
296 char stack; // index in llkCheckStackSymbols for matches
297#endif // and with maximum index PROP_VALUE_MAX/2.
Mark Salyzynf089e142018-02-20 10:47:40 -0800298 char comm[TASK_COMM_LEN + 3]; // space for adding '[' and ']'
299 bool exeMissingValid; // exeMissing has been cached
300 bool cmdlineValid; // cmdline has been cached
301 bool updated; // cleared before monitoring pass.
302 bool killed; // sent a kill to this thread, next panic...
303
304 void setComm(const char* _comm) { strncpy(comm + 1, _comm, sizeof(comm) - 2); }
305
306 proc(pid_t tid, pid_t pid, pid_t ppid, const char* _comm, int time, char state)
307 : tid(tid),
308 schedUpdate(0),
309 nrSwitches(0),
310 update(llkUpdate),
Mark Salyzynacecaf72018-08-10 08:15:57 -0700311 count(0ms),
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700312#ifdef __PTRACE_ENABLED__
313 count_stack(0ms),
314#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800315 pid(pid),
316 ppid(ppid),
317 uid(-1),
318 time(time),
319 state(state),
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700320#ifdef __PTRACE_ENABLED__
321 stack(-1),
322#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800323 exeMissingValid(false),
324 cmdlineValid(false),
325 updated(true),
Mark Salyzynafd66f22018-03-19 15:16:29 -0700326 killed(!llkTestWithKill) {
Mark Salyzynf089e142018-02-20 10:47:40 -0800327 memset(comm, '\0', sizeof(comm));
328 setComm(_comm);
329 }
330
331 const char* getComm(void) {
332 if (comm[1] == '\0') { // comm Valid?
333 strncpy(comm + 1, llkProcGetName(tid, "/comm").c_str(), sizeof(comm) - 2);
334 }
335 if (!exeMissingValid) {
336 if (llkIsMissingExeLink(tid)) {
337 comm[0] = '[';
338 }
339 exeMissingValid = true;
340 }
341 size_t len = strlen(comm + 1);
342 if (__predict_true(len < (sizeof(comm) - 1))) {
343 if (comm[0] == '[') {
344 if ((comm[len] != ']') && __predict_true(len < (sizeof(comm) - 2))) {
345 comm[++len] = ']';
346 comm[++len] = '\0';
347 }
348 } else {
349 if (comm[len] == ']') {
350 comm[len] = '\0';
351 }
352 }
353 }
354 return &comm[comm[0] != '['];
355 }
356
357 const char* getCmdline(void) {
358 if (!cmdlineValid) {
359 cmdline = llkProcGetName(tid);
360 cmdlineValid = true;
361 }
362 return cmdline.c_str();
363 }
364
365 uid_t getUid(void) {
366 if (uid <= 0) { // Churn on root user, because most likely to setuid()
367 uid = llkProcGetUid(tid);
368 }
369 return uid;
370 }
371
372 void reset(void) { // reset cache, if we detected pid rollover
373 uid = -1;
374 state = '?';
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700375#ifdef __PTRACE_ENABLED__
376 count_stack = 0ms;
377 stack = -1;
378#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800379 cmdline = "";
380 comm[0] = '\0';
381 exeMissingValid = false;
382 cmdlineValid = false;
383 }
384};
385
386std::unordered_map<pid_t, proc> tids;
387
388// Check range and setup defaults, in order of propagation:
389// llkTimeoutMs
390// llkCheckMs
391// ...
392// KISS to keep it all self-contained, and called multiple times as parameters
393// are interpreted so that defaults, llkCheckMs and llkCycle make sense.
394void llkValidate() {
395 if (llkTimeoutMs == 0ms) {
396 llkTimeoutMs = LLK_TIMEOUT_MS_DEFAULT;
397 }
398 llkTimeoutMs = std::max(llkTimeoutMs, LLK_TIMEOUT_MS_MINIMUM);
399 if (llkCheckMs == 0ms) {
400 llkCheckMs = llkTimeoutMs / LLK_CHECKS_PER_TIMEOUT_DEFAULT;
401 }
402 llkCheckMs = std::min(llkCheckMs, llkTimeoutMs);
403
404 for (size_t state = 0; state < ARRAY_SIZE(llkStateTimeoutMs); ++state) {
405 if (llkStateTimeoutMs[state] == 0ms) {
406 llkStateTimeoutMs[state] = llkTimeoutMs;
407 }
408 llkStateTimeoutMs[state] =
409 std::min(std::max(llkStateTimeoutMs[state], LLK_TIMEOUT_MS_MINIMUM), llkTimeoutMs);
410 llkCheckMs = std::min(llkCheckMs, llkStateTimeoutMs[state]);
411 }
412
413 llkCheckMs = std::max(llkCheckMs, LLK_CHECK_MS_MINIMUM);
414 if (llkCycle == 0ms) {
415 llkCycle = llkCheckMs;
416 }
417 llkCycle = std::min(llkCycle, llkCheckMs);
418}
419
420milliseconds llkGetTimespecDiffMs(timespec* from, timespec* to) {
421 return duration_cast<milliseconds>(seconds(to->tv_sec - from->tv_sec)) +
422 duration_cast<milliseconds>(nanoseconds(to->tv_nsec - from->tv_nsec));
423}
424
425std::string llkProcGetName(pid_t tid, const char* comm, const char* cmdline) {
426 if ((cmdline != nullptr) && (*cmdline != '\0')) {
427 return cmdline;
428 }
429 if ((comm != nullptr) && (*comm != '\0')) {
430 return comm;
431 }
432
433 // UNLIKELY! Here because killed before we kill it?
434 // Assume change is afoot, do not call llkTidAlloc
435
436 // cmdline ?
437 std::string content = llkProcGetName(tid);
438 if (content.size() != 0) {
439 return content;
440 }
441 // Comm instead?
442 content = llkProcGetName(tid, "/comm");
443 if (llkIsMissingExeLink(tid) && (content.size() != 0)) {
444 return '[' + content + ']';
445 }
446 return content;
447}
448
449int llkKillOneProcess(pid_t pid, char state, pid_t tid, const char* tcomm = nullptr,
450 const char* tcmdline = nullptr, const char* pcomm = nullptr,
451 const char* pcmdline = nullptr) {
452 std::string forTid;
453 if (tid != pid) {
454 forTid = " for '" + llkProcGetName(tid, tcomm, tcmdline) + "' (" + std::to_string(tid) + ")";
455 }
456 LOG(INFO) << "Killing '" << llkProcGetName(pid, pcomm, pcmdline) << "' (" << pid
457 << ") to check forward scheduling progress in " << state << " state" << forTid;
458 // CAP_KILL required
459 errno = 0;
460 auto r = ::kill(pid, SIGKILL);
461 if (r) {
462 PLOG(ERROR) << "kill(" << pid << ")=" << r << ' ';
463 }
464
465 return r;
466}
467
468// Kill one process
469int llkKillOneProcess(pid_t pid, proc* tprocp) {
470 return llkKillOneProcess(pid, tprocp->state, tprocp->tid, tprocp->getComm(),
471 tprocp->getCmdline());
472}
473
474// Kill one process specified by kprocp
475int llkKillOneProcess(proc* kprocp, proc* tprocp) {
476 if (kprocp == nullptr) {
477 return -2;
478 }
479
480 return llkKillOneProcess(kprocp->tid, tprocp->state, tprocp->tid, tprocp->getComm(),
481 tprocp->getCmdline(), kprocp->getComm(), kprocp->getCmdline());
482}
483
484// Acquire file descriptor from environment, or open and cache it.
485// NB: cache is unnecessary in our current context, pedantically
486// required to prevent leakage of file descriptors in the future.
487int llkFileToWriteFd(const std::string& file) {
488 static std::unordered_map<std::string, int> cache;
489 auto search = cache.find(file);
490 if (search != cache.end()) return search->second;
491 auto fd = android_get_control_file(file.c_str());
492 if (fd >= 0) return fd;
493 fd = TEMP_FAILURE_RETRY(::open(file.c_str(), O_WRONLY | O_CLOEXEC));
494 if (fd >= 0) cache.emplace(std::make_pair(file, fd));
495 return fd;
496}
497
498// Wrap android::base::WriteStringToFile to use android_get_control_file.
499bool llkWriteStringToFile(const std::string& string, const std::string& file) {
500 auto fd = llkFileToWriteFd(file);
501 if (fd < 0) return false;
502 return android::base::WriteStringToFd(string, fd);
503}
504
505bool llkWriteStringToFileConfirm(const std::string& string, const std::string& file) {
506 auto fd = llkFileToWriteFd(file);
507 auto ret = (fd < 0) ? false : android::base::WriteStringToFd(string, fd);
508 std::string content;
509 if (!android::base::ReadFileToString(file, &content)) return ret;
510 return android::base::Trim(content) == string;
511}
512
Mark Salyzynafd66f22018-03-19 15:16:29 -0700513void llkPanicKernel(bool dump, pid_t tid, const char* state) __noreturn;
514void llkPanicKernel(bool dump, pid_t tid, const char* state) {
Mark Salyzynf089e142018-02-20 10:47:40 -0800515 auto sysrqTriggerFd = llkFileToWriteFd("/proc/sysrq-trigger");
516 if (sysrqTriggerFd < 0) {
517 // DYB
518 llkKillOneProcess(initPid, 'R', tid);
519 // The answer to life, the universe and everything
520 ::exit(42);
521 // NOTREACHED
522 }
523 ::sync();
524 if (dump) {
525 // Show all locks that are held
526 android::base::WriteStringToFd("d", sysrqTriggerFd);
527 // This can trigger hardware watchdog, that is somewhat _ok_.
528 // But useless if pstore configured for <256KB, low ram devices ...
Mark Salyzynbd7c8562018-10-31 10:02:08 -0700529 if (llkEnableSysrqT) {
Mark Salyzynf089e142018-02-20 10:47:40 -0800530 android::base::WriteStringToFd("t", sysrqTriggerFd);
531 }
532 ::usleep(200000); // let everything settle
533 }
Mark Salyzyn52e54a62018-08-07 08:13:13 -0700534 llkWriteStringToFile("SysRq : Trigger a crash : 'livelock,"s + state + "'\n", "/dev/kmsg");
Mark Salyzynf089e142018-02-20 10:47:40 -0800535 android::base::WriteStringToFd("c", sysrqTriggerFd);
536 // NOTREACHED
537 // DYB
538 llkKillOneProcess(initPid, 'R', tid);
539 // I sat at my desk, stared into the garden and thought '42 will do'.
540 // I typed it out. End of story
541 ::exit(42);
542 // NOTREACHED
543}
544
545void llkAlarmHandler(int) {
Mark Salyzynafd66f22018-03-19 15:16:29 -0700546 llkPanicKernel(false, ::getpid(), "alarm");
Mark Salyzynf089e142018-02-20 10:47:40 -0800547}
548
549milliseconds GetUintProperty(const std::string& key, milliseconds def) {
550 return milliseconds(android::base::GetUintProperty(key, static_cast<uint64_t>(def.count()),
551 static_cast<uint64_t>(def.max().count())));
552}
553
554seconds GetUintProperty(const std::string& key, seconds def) {
555 return seconds(android::base::GetUintProperty(key, static_cast<uint64_t>(def.count()),
556 static_cast<uint64_t>(def.max().count())));
557}
558
559proc* llkTidLookup(pid_t tid) {
560 auto search = tids.find(tid);
561 if (search == tids.end()) {
562 return nullptr;
563 }
564 return &search->second;
565}
566
567void llkTidRemove(pid_t tid) {
568 tids.erase(tid);
569}
570
571proc* llkTidAlloc(pid_t tid, pid_t pid, pid_t ppid, const char* comm, int time, char state) {
572 auto it = tids.emplace(std::make_pair(tid, proc(tid, pid, ppid, comm, time, state)));
573 return &it.first->second;
574}
575
576std::string llkFormat(milliseconds ms) {
577 auto sec = duration_cast<seconds>(ms);
578 std::ostringstream s;
579 s << sec.count() << '.';
580 auto f = s.fill('0');
581 auto w = s.width(3);
582 s << std::right << (ms - sec).count();
583 s.width(w);
584 s.fill(f);
585 s << 's';
586 return s.str();
587}
588
589std::string llkFormat(seconds s) {
590 return std::to_string(s.count()) + 's';
591}
592
593std::string llkFormat(bool flag) {
594 return flag ? "true" : "false";
595}
596
597std::string llkFormat(const std::unordered_set<std::string>& blacklist) {
598 std::string ret;
599 for (auto entry : blacklist) {
600 if (ret.size()) {
601 ret += ",";
602 }
603 ret += entry;
604 }
605 return ret;
606}
607
608// We only officially support comma separators, but wetware being what they
609// are will take some liberty and I do not believe they should be punished.
Mark Salyzynacecaf72018-08-10 08:15:57 -0700610std::unordered_set<std::string> llkSplit(const std::string& s) {
Mark Salyzynf089e142018-02-20 10:47:40 -0800611 std::unordered_set<std::string> result;
612
Mark Salyzynacecaf72018-08-10 08:15:57 -0700613 // Special case, allow boolean false to empty the list, otherwise expected
614 // source of input from android::base::GetProperty will supply the default
615 // value on empty content in the property.
616 if (s == "false") return result;
617
Mark Salyzynf089e142018-02-20 10:47:40 -0800618 size_t base = 0;
Mark Salyzynacecaf72018-08-10 08:15:57 -0700619 while (s.size() > base) {
620 auto found = s.find_first_of(", \t:", base);
621 // Only emplace content, empty entries are not an option
622 if (found != base) result.emplace(s.substr(base, found - base));
Mark Salyzynf089e142018-02-20 10:47:40 -0800623 if (found == s.npos) break;
624 base = found + 1;
625 }
626 return result;
627}
628
629bool llkSkipName(const std::string& name,
630 const std::unordered_set<std::string>& blacklist = llkBlacklistProcess) {
631 if ((name.size() == 0) || (blacklist.size() == 0)) {
632 return false;
633 }
634
635 return blacklist.find(name) != blacklist.end();
636}
637
638bool llkSkipPid(pid_t pid) {
639 return llkSkipName(std::to_string(pid), llkBlacklistProcess);
640}
641
642bool llkSkipPpid(pid_t ppid) {
643 return llkSkipName(std::to_string(ppid), llkBlacklistParent);
644}
645
646bool llkSkipUid(uid_t uid) {
647 // Match by number?
648 if (llkSkipName(std::to_string(uid), llkBlacklistUid)) {
649 return true;
650 }
651
652 // Match by name?
653 auto pwd = ::getpwuid(uid);
654 return (pwd != nullptr) && __predict_true(pwd->pw_name != nullptr) &&
655 __predict_true(pwd->pw_name[0] != '\0') && llkSkipName(pwd->pw_name, llkBlacklistUid);
656}
657
658bool getValidTidDir(dirent* dp, std::string* piddir) {
659 if (!::isdigit(dp->d_name[0])) {
660 return false;
661 }
662
663 // Corner case can not happen in reality b/c of above ::isdigit check
664 if (__predict_false(dp->d_type != DT_DIR)) {
665 if (__predict_false(dp->d_type == DT_UNKNOWN)) { // can't b/c procfs
666 struct stat st;
667 *piddir = procdir;
668 *piddir += dp->d_name;
669 return (lstat(piddir->c_str(), &st) == 0) && (st.st_mode & S_IFDIR);
670 }
671 return false;
672 }
673
674 *piddir = procdir;
675 *piddir += dp->d_name;
676 return true;
677}
678
679bool llkIsMonitorState(char state) {
680 return (state == 'Z') || (state == 'D');
681}
682
683// returns -1 if not found
684long long getSchedValue(const std::string& schedString, const char* key) {
685 auto pos = schedString.find(key);
686 if (pos == std::string::npos) {
687 return -1;
688 }
689 pos = schedString.find(':', pos);
690 if (__predict_false(pos == std::string::npos)) {
691 return -1;
692 }
693 while ((++pos < schedString.size()) && ::isblank(schedString[pos])) {
694 ;
695 }
696 long long ret;
697 if (!android::base::ParseInt(schedString.substr(pos), &ret, static_cast<long long>(0))) {
698 return -1;
699 }
700 return ret;
701}
702
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700703#ifdef __PTRACE_ENABLED__
704bool llkCheckStack(proc* procp, const std::string& piddir) {
705 if (llkCheckStackSymbols.empty()) return false;
706 if (procp->state == 'Z') { // No brains for Zombies
707 procp->stack = -1;
708 procp->count_stack = 0ms;
709 return false;
710 }
711
712 // Don't check process that are known to block ptrace, save sepolicy noise.
713 if (llkSkipName(std::to_string(procp->pid), llkBlacklistStack)) return false;
714 if (llkSkipName(procp->getComm(), llkBlacklistStack)) return false;
715 if (llkSkipName(procp->getCmdline(), llkBlacklistStack)) return false;
Mark Salyzyne81ede82018-10-22 15:52:32 -0700716 if (llkSkipName(android::base::Basename(procp->getCmdline()), llkBlacklistStack)) return false;
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700717
718 auto kernel_stack = ReadFile(piddir + "/stack");
719 if (kernel_stack.empty()) {
720 LOG(INFO) << piddir << "/stack empty comm=" << procp->getComm()
721 << " cmdline=" << procp->getCmdline();
722 return false;
723 }
724 // A scheduling incident that should not reset count_stack
725 if (kernel_stack.find(" cpu_worker_pools+0x") != std::string::npos) return false;
726 char idx = -1;
727 char match = -1;
728 for (const auto& stack : llkCheckStackSymbols) {
729 if (++idx < 0) break;
Mark Salyzynbb1256a2018-10-18 14:39:27 -0700730 if ((kernel_stack.find(" "s + stack + "+0x") != std::string::npos) ||
731 (kernel_stack.find(" "s + stack + ".cfi+0x") != std::string::npos)) {
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700732 match = idx;
733 break;
734 }
735 }
736 if (procp->stack != match) {
737 procp->stack = match;
738 procp->count_stack = 0ms;
739 return false;
740 }
741 if (match == char(-1)) return false;
742 procp->count_stack += llkCycle;
743 return procp->count_stack >= llkStateTimeoutMs[llkStateStack];
744}
745#endif
746
Mark Salyzynf089e142018-02-20 10:47:40 -0800747// Primary ABA mitigation watching last time schedule activity happened
748void llkCheckSchedUpdate(proc* procp, const std::string& piddir) {
749 // Audit finds /proc/<tid>/sched is just over 1K, and
750 // is rarely larger than 2K, even less on Android.
751 // For example, the "se.avg.lastUpdateTime" field we are
752 // interested in typically within the primary set in
753 // the first 1K.
754 //
755 // Proc entries can not be read >1K atomically via libbase,
756 // but if there are problems we assume at least a few
757 // samples of reads occur before we take any real action.
758 std::string schedString = ReadFile(piddir + "/sched");
759 if (schedString.size() == 0) {
760 // /schedstat is not as standardized, but in 3.1+
761 // Android devices, the third field is nr_switches
762 // from /sched:
763 schedString = ReadFile(piddir + "/schedstat");
764 if (schedString.size() == 0) {
765 return;
766 }
767 auto val = static_cast<unsigned long long>(-1);
768 if (((::sscanf(schedString.c_str(), "%*d %*d %llu", &val)) == 1) &&
769 (val != static_cast<unsigned long long>(-1)) && (val != 0) &&
770 (val != procp->nrSwitches)) {
771 procp->nrSwitches = val;
772 procp->count = 0ms;
Mark Salyzynafd66f22018-03-19 15:16:29 -0700773 procp->killed = !llkTestWithKill;
Mark Salyzynf089e142018-02-20 10:47:40 -0800774 }
775 return;
776 }
777
778 auto val = getSchedValue(schedString, "\nse.avg.lastUpdateTime");
779 if (val == -1) {
780 val = getSchedValue(schedString, "\nse.svg.last_update_time");
781 }
782 if (val != -1) {
783 auto schedUpdate = nanoseconds(val);
784 if (schedUpdate != procp->schedUpdate) {
785 procp->schedUpdate = schedUpdate;
786 procp->count = 0ms;
Mark Salyzynafd66f22018-03-19 15:16:29 -0700787 procp->killed = !llkTestWithKill;
Mark Salyzynf089e142018-02-20 10:47:40 -0800788 }
789 }
790
791 val = getSchedValue(schedString, "\nnr_switches");
792 if (val != -1) {
793 if (static_cast<uint64_t>(val) != procp->nrSwitches) {
794 procp->nrSwitches = val;
795 procp->count = 0ms;
Mark Salyzynafd66f22018-03-19 15:16:29 -0700796 procp->killed = !llkTestWithKill;
Mark Salyzynf089e142018-02-20 10:47:40 -0800797 }
798 }
799}
800
801void llkLogConfig(void) {
802 LOG(INFO) << "ro.config.low_ram=" << llkFormat(llkLowRam) << "\n"
Mark Salyzynbd7c8562018-10-31 10:02:08 -0700803 << LLK_ENABLE_SYSRQ_T_PROPERTY "=" << llkFormat(llkEnableSysrqT) << "\n"
Mark Salyzynf089e142018-02-20 10:47:40 -0800804 << LLK_ENABLE_PROPERTY "=" << llkFormat(llkEnable) << "\n"
805 << KHT_ENABLE_PROPERTY "=" << llkFormat(khtEnable) << "\n"
806 << LLK_MLOCKALL_PROPERTY "=" << llkFormat(llkMlockall) << "\n"
Mark Salyzynafd66f22018-03-19 15:16:29 -0700807 << LLK_KILLTEST_PROPERTY "=" << llkFormat(llkTestWithKill) << "\n"
Mark Salyzynf089e142018-02-20 10:47:40 -0800808 << KHT_TIMEOUT_PROPERTY "=" << llkFormat(khtTimeout) << "\n"
809 << LLK_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkTimeoutMs) << "\n"
810 << LLK_D_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkStateTimeoutMs[llkStateD]) << "\n"
811 << LLK_Z_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkStateTimeoutMs[llkStateZ]) << "\n"
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700812#ifdef __PTRACE_ENABLED__
813 << LLK_STACK_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkStateTimeoutMs[llkStateStack])
814 << "\n"
815#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800816 << LLK_CHECK_MS_PROPERTY "=" << llkFormat(llkCheckMs) << "\n"
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700817#ifdef __PTRACE_ENABLED__
818 << LLK_CHECK_STACK_PROPERTY "=" << llkFormat(llkCheckStackSymbols) << "\n"
819 << LLK_BLACKLIST_STACK_PROPERTY "=" << llkFormat(llkBlacklistStack) << "\n"
820#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800821 << LLK_BLACKLIST_PROCESS_PROPERTY "=" << llkFormat(llkBlacklistProcess) << "\n"
822 << LLK_BLACKLIST_PARENT_PROPERTY "=" << llkFormat(llkBlacklistParent) << "\n"
823 << LLK_BLACKLIST_UID_PROPERTY "=" << llkFormat(llkBlacklistUid);
824}
825
826void* llkThread(void* obj) {
Mark Salyzyn4832a8b2018-08-15 11:02:18 -0700827 prctl(PR_SET_DUMPABLE, 0);
828
Mark Salyzynf089e142018-02-20 10:47:40 -0800829 LOG(INFO) << "started";
830
831 std::string name = std::to_string(::gettid());
832 if (!llkSkipName(name)) {
833 llkBlacklistProcess.emplace(name);
834 }
835 name = static_cast<const char*>(obj);
836 prctl(PR_SET_NAME, name.c_str());
837 if (__predict_false(!llkSkipName(name))) {
838 llkBlacklistProcess.insert(name);
839 }
840 // No longer modifying llkBlacklistProcess.
841 llkRunning = true;
842 llkLogConfig();
843 while (llkRunning) {
844 ::usleep(duration_cast<microseconds>(llkCheck(true)).count());
845 }
846 // NOTREACHED
847 LOG(INFO) << "exiting";
848 return nullptr;
849}
850
851} // namespace
852
853milliseconds llkCheck(bool checkRunning) {
854 if (!llkEnable || (checkRunning != llkRunning)) {
855 return milliseconds::max();
856 }
857
858 // Reset internal watchdog, which is a healthy engineering margin of
859 // double the maximum wait or cycle time for the mainloop that calls us.
860 //
861 // This alarm is effectively the live lock detection of llkd, as
862 // we understandably can not monitor ourselves otherwise.
863 ::alarm(duration_cast<seconds>(llkTimeoutMs * 2).count());
864
865 // kernel jiffy precision fastest acquisition
866 static timespec last;
867 timespec now;
868 ::clock_gettime(CLOCK_MONOTONIC_COARSE, &now);
869 auto ms = llkGetTimespecDiffMs(&last, &now);
870 if (ms < llkCycle) {
871 return llkCycle - ms;
872 }
873 last = now;
874
875 LOG(VERBOSE) << "opendir(\"" << procdir << "\")";
876 if (__predict_false(!llkTopDirectory)) {
877 // gid containing AID_READPROC required
878 llkTopDirectory.reset(procdir);
879 if (__predict_false(!llkTopDirectory)) {
880 // Most likely reason we could be here is a resource limit.
881 // Keep our processing down to a minimum, but not so low that
882 // we do not recover in a timely manner should the issue be
883 // transitory.
884 LOG(DEBUG) << "opendir(\"" << procdir << "\") failed";
885 return llkTimeoutMs;
886 }
887 }
888
889 for (auto& it : tids) {
890 it.second.updated = false;
891 }
892
893 auto prevUpdate = llkUpdate;
894 llkUpdate += ms;
895 ms -= llkCycle;
896 auto myPid = ::getpid();
897 auto myTid = ::gettid();
898 for (auto dp = llkTopDirectory.read(); dp != nullptr; dp = llkTopDirectory.read()) {
899 std::string piddir;
900
901 if (!getValidTidDir(dp, &piddir)) {
902 continue;
903 }
904
905 // Get the process tasks
906 std::string taskdir = piddir + "/task/";
907 int pid = -1;
908 LOG(VERBOSE) << "+opendir(\"" << taskdir << "\")";
909 dir taskDirectory(taskdir);
910 if (__predict_false(!taskDirectory)) {
911 LOG(DEBUG) << "+opendir(\"" << taskdir << "\") failed";
912 }
913 for (auto tp = taskDirectory.read(dir::task, dp); tp != nullptr;
914 tp = taskDirectory.read(dir::task)) {
915 if (!getValidTidDir(tp, &piddir)) {
916 continue;
917 }
918
919 // Get the process stat
920 std::string stat = ReadFile(piddir + "/stat");
921 if (stat.size() == 0) {
922 continue;
923 }
924 unsigned tid = -1;
925 char pdir[TASK_COMM_LEN + 1];
926 char state = '?';
927 unsigned ppid = -1;
928 unsigned utime = -1;
929 unsigned stime = -1;
930 int dummy;
931 pdir[0] = '\0';
932 // tid should not change value
933 auto match = ::sscanf(
934 stat.c_str(),
935 "%u (%" ___STRING(
936 TASK_COMM_LEN) "[^)]) %c %u %*d %*d %*d %*d %*d %*d %*d %*d %*d %u %u %d",
937 &tid, pdir, &state, &ppid, &utime, &stime, &dummy);
938 if (pid == -1) {
939 pid = tid;
940 }
941 LOG(VERBOSE) << "match " << match << ' ' << tid << " (" << pdir << ") " << state << ' '
942 << ppid << " ... " << utime << ' ' << stime << ' ' << dummy;
943 if (match != 7) {
944 continue;
945 }
946
947 auto procp = llkTidLookup(tid);
948 if (procp == nullptr) {
949 procp = llkTidAlloc(tid, pid, ppid, pdir, utime + stime, state);
950 } else {
951 // comm can change ...
952 procp->setComm(pdir);
953 procp->updated = true;
954 // pid/ppid/tid wrap?
955 if (((procp->update != prevUpdate) && (procp->update != llkUpdate)) ||
956 (procp->ppid != ppid) || (procp->pid != pid)) {
957 procp->reset();
958 } else if (procp->time != (utime + stime)) { // secondary ABA.
959 // watching utime+stime granularity jiffy
960 procp->state = '?';
961 }
962 procp->update = llkUpdate;
963 procp->pid = pid;
964 procp->ppid = ppid;
965 procp->time = utime + stime;
966 if (procp->state != state) {
967 procp->count = 0ms;
Mark Salyzynafd66f22018-03-19 15:16:29 -0700968 procp->killed = !llkTestWithKill;
Mark Salyzynf089e142018-02-20 10:47:40 -0800969 procp->state = state;
970 } else {
971 procp->count += llkCycle;
972 }
973 }
974
975 // Filter checks in intuitive order of CPU cost to evaluate
976 // If tid unique continue, if ppid or pid unique break
977
978 if (pid == myPid) {
979 break;
980 }
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700981#ifdef __PTRACE_ENABLED__
982 // if no stack monitoring, we can quickly exit here
983 if (!llkIsMonitorState(state) && llkCheckStackSymbols.empty()) {
Mark Salyzynf089e142018-02-20 10:47:40 -0800984 continue;
985 }
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700986#else
987 if (!llkIsMonitorState(state)) continue;
988#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800989 if ((tid == myTid) || llkSkipPid(tid)) {
990 continue;
991 }
992 if (llkSkipPpid(ppid)) {
993 break;
994 }
995
996 if (llkSkipName(procp->getComm())) {
997 continue;
998 }
999 if (llkSkipName(procp->getCmdline())) {
1000 break;
1001 }
Mark Salyzyne81ede82018-10-22 15:52:32 -07001002 if (llkSkipName(android::base::Basename(procp->getCmdline()))) {
1003 break;
1004 }
Mark Salyzynf089e142018-02-20 10:47:40 -08001005
1006 auto pprocp = llkTidLookup(ppid);
1007 if (pprocp == nullptr) {
1008 pprocp = llkTidAlloc(ppid, ppid, 0, "", 0, '?');
1009 }
Mark Salyzyne81ede82018-10-22 15:52:32 -07001010 if ((pprocp != nullptr) &&
1011 (llkSkipName(pprocp->getComm(), llkBlacklistParent) ||
1012 llkSkipName(pprocp->getCmdline(), llkBlacklistParent) ||
1013 llkSkipName(android::base::Basename(pprocp->getCmdline()), llkBlacklistParent))) {
Mark Salyzynf089e142018-02-20 10:47:40 -08001014 break;
1015 }
1016
1017 if ((llkBlacklistUid.size() != 0) && llkSkipUid(procp->getUid())) {
1018 continue;
1019 }
1020
1021 // ABA mitigation watching last time schedule activity happened
1022 llkCheckSchedUpdate(procp, piddir);
1023
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001024#ifdef __PTRACE_ENABLED__
1025 auto stuck = llkCheckStack(procp, piddir);
1026 if (llkIsMonitorState(state)) {
1027 if (procp->count >= llkStateTimeoutMs[(state == 'Z') ? llkStateZ : llkStateD]) {
1028 stuck = true;
1029 } else if (procp->count != 0ms) {
1030 LOG(VERBOSE) << state << ' ' << llkFormat(procp->count) << ' ' << ppid << "->"
1031 << pid << "->" << tid << ' ' << procp->getComm();
1032 }
1033 }
1034 if (!stuck) continue;
1035#else
1036 if (procp->count >= llkStateTimeoutMs[(state == 'Z') ? llkStateZ : llkStateD]) {
1037 if (procp->count != 0ms) {
1038 LOG(VERBOSE) << state << ' ' << llkFormat(procp->count) << ' ' << ppid << "->"
1039 << pid << "->" << tid << ' ' << procp->getComm();
1040 }
Mark Salyzynf089e142018-02-20 10:47:40 -08001041 continue;
1042 }
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001043#endif
Mark Salyzynf089e142018-02-20 10:47:40 -08001044
1045 // We have to kill it to determine difference between live lock
1046 // and persistent state blocked on a resource. Is there something
1047 // wrong with a process that has no forward scheduling progress in
1048 // Z or D? Yes, generally means improper accounting in the
1049 // process, but not always ...
1050 //
1051 // Whomever we hit with a test kill must accept the Android
1052 // Aphorism that everything can be burned to the ground and
1053 // must survive.
1054 if (procp->killed == false) {
1055 procp->killed = true;
1056 // confirm: re-read uid before committing to a panic.
1057 procp->uid = -1;
1058 switch (state) {
1059 case 'Z': // kill ppid to free up a Zombie
1060 // Killing init will kernel panic without diagnostics
1061 // so skip right to controlled kernel panic with
1062 // diagnostics.
1063 if (ppid == initPid) {
1064 break;
1065 }
1066 LOG(WARNING) << "Z " << llkFormat(procp->count) << ' ' << ppid << "->"
1067 << pid << "->" << tid << ' ' << procp->getComm() << " [kill]";
1068 if ((llkKillOneProcess(pprocp, procp) >= 0) ||
1069 (llkKillOneProcess(ppid, procp) >= 0)) {
1070 continue;
1071 }
1072 break;
1073
1074 case 'D': // kill tid to free up an uninterruptible D
1075 // If ABA is doing its job, we would not need or
1076 // want the following. Test kill is a Hail Mary
1077 // to make absolutely sure there is no forward
1078 // scheduling progress. The cost when ABA is
1079 // not working is we kill a process that likes to
1080 // stay in 'D' state, instead of panicing the
1081 // kernel (worse).
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001082 default:
1083 LOG(WARNING) << state << ' ' << llkFormat(procp->count) << ' ' << pid
1084 << "->" << tid << ' ' << procp->getComm() << " [kill]";
Mark Salyzynf089e142018-02-20 10:47:40 -08001085 if ((llkKillOneProcess(llkTidLookup(pid), procp) >= 0) ||
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001086 (llkKillOneProcess(pid, state, tid) >= 0) ||
Mark Salyzynf089e142018-02-20 10:47:40 -08001087 (llkKillOneProcess(procp, procp) >= 0) ||
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001088 (llkKillOneProcess(tid, state, tid) >= 0)) {
Mark Salyzynf089e142018-02-20 10:47:40 -08001089 continue;
1090 }
1091 break;
1092 }
1093 }
1094 // We are here because we have confirmed kernel live-lock
1095 LOG(ERROR) << state << ' ' << llkFormat(procp->count) << ' ' << ppid << "->" << pid
1096 << "->" << tid << ' ' << procp->getComm() << " [panic]";
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001097 llkPanicKernel(true, tid,
1098 (state == 'Z') ? "zombie" : (state == 'D') ? "driver" : "sleeping");
Mark Salyzynf089e142018-02-20 10:47:40 -08001099 }
1100 LOG(VERBOSE) << "+closedir()";
1101 }
1102 llkTopDirectory.rewind();
1103 LOG(VERBOSE) << "closedir()";
1104
1105 // garbage collection of old process references
1106 for (auto p = tids.begin(); p != tids.end();) {
1107 if (!p->second.updated) {
1108 IF_ALOG(LOG_VERBOSE, LOG_TAG) {
1109 std::string ppidCmdline = llkProcGetName(p->second.ppid, nullptr, nullptr);
1110 if (ppidCmdline.size()) {
1111 ppidCmdline = "(" + ppidCmdline + ")";
1112 }
1113 std::string pidCmdline;
1114 if (p->second.pid != p->second.tid) {
1115 pidCmdline = llkProcGetName(p->second.pid, nullptr, p->second.getCmdline());
1116 if (pidCmdline.size()) {
1117 pidCmdline = "(" + pidCmdline + ")";
1118 }
1119 }
1120 std::string tidCmdline =
1121 llkProcGetName(p->second.tid, p->second.getComm(), p->second.getCmdline());
1122 if (tidCmdline.size()) {
1123 tidCmdline = "(" + tidCmdline + ")";
1124 }
1125 LOG(VERBOSE) << "thread " << p->second.ppid << ppidCmdline << "->" << p->second.pid
1126 << pidCmdline << "->" << p->second.tid << tidCmdline << " removed";
1127 }
1128 p = tids.erase(p);
1129 } else {
1130 ++p;
1131 }
1132 }
1133 if (__predict_false(tids.empty())) {
1134 llkTopDirectory.reset();
1135 }
1136
1137 llkCycle = llkCheckMs;
1138
1139 timespec end;
1140 ::clock_gettime(CLOCK_MONOTONIC_COARSE, &end);
1141 auto milli = llkGetTimespecDiffMs(&now, &end);
1142 LOG((milli > 10s) ? ERROR : (milli > 1s) ? WARNING : VERBOSE) << "sample " << llkFormat(milli);
1143
1144 // cap to minimum sleep for 1 second since last cycle
1145 if (llkCycle < (ms + 1s)) {
1146 return 1s;
1147 }
1148 return llkCycle - ms;
1149}
1150
1151unsigned llkCheckMilliseconds() {
1152 return duration_cast<milliseconds>(llkCheck()).count();
1153}
1154
Mark Salyzynbd7c8562018-10-31 10:02:08 -07001155bool llkCheckEng(const std::string& property) {
1156 return android::base::GetProperty(property, "eng") == "eng";
1157}
1158
Mark Salyzynf089e142018-02-20 10:47:40 -08001159bool llkInit(const char* threadname) {
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001160 auto debuggable = android::base::GetBoolProperty("ro.debuggable", false);
Mark Salyzynf089e142018-02-20 10:47:40 -08001161 llkLowRam = android::base::GetBoolProperty("ro.config.low_ram", false);
Mark Salyzynbd7c8562018-10-31 10:02:08 -07001162 llkEnableSysrqT &= !llkLowRam;
1163 if (debuggable) {
1164 llkEnableSysrqT |= llkCheckEng(LLK_ENABLE_SYSRQ_T_PROPERTY);
1165 if (!LLK_ENABLE_DEFAULT) { // NB: default is currently true ...
1166 llkEnable |= llkCheckEng(LLK_ENABLE_PROPERTY);
1167 khtEnable |= llkCheckEng(KHT_ENABLE_PROPERTY);
1168 }
Mark Salyzynd035dbb2018-03-26 08:23:00 -07001169 }
Mark Salyzynbd7c8562018-10-31 10:02:08 -07001170 llkEnableSysrqT = android::base::GetBoolProperty(LLK_ENABLE_SYSRQ_T_PROPERTY, llkEnableSysrqT);
Mark Salyzynf089e142018-02-20 10:47:40 -08001171 llkEnable = android::base::GetBoolProperty(LLK_ENABLE_PROPERTY, llkEnable);
1172 if (llkEnable && !llkTopDirectory.reset(procdir)) {
1173 // Most likely reason we could be here is llkd was started
1174 // incorrectly without the readproc permissions. Keep our
1175 // processing down to a minimum.
1176 llkEnable = false;
1177 }
1178 khtEnable = android::base::GetBoolProperty(KHT_ENABLE_PROPERTY, khtEnable);
1179 llkMlockall = android::base::GetBoolProperty(LLK_MLOCKALL_PROPERTY, llkMlockall);
Mark Salyzynafd66f22018-03-19 15:16:29 -07001180 llkTestWithKill = android::base::GetBoolProperty(LLK_KILLTEST_PROPERTY, llkTestWithKill);
Mark Salyzynf089e142018-02-20 10:47:40 -08001181 // if LLK_TIMOUT_MS_PROPERTY was not set, we will use a set
1182 // KHT_TIMEOUT_PROPERTY as co-operative guidance for the default value.
1183 khtTimeout = GetUintProperty(KHT_TIMEOUT_PROPERTY, khtTimeout);
1184 if (khtTimeout == 0s) {
1185 khtTimeout = duration_cast<seconds>(llkTimeoutMs * (1 + LLK_CHECKS_PER_TIMEOUT_DEFAULT) /
1186 LLK_CHECKS_PER_TIMEOUT_DEFAULT);
1187 }
1188 llkTimeoutMs =
1189 khtTimeout * LLK_CHECKS_PER_TIMEOUT_DEFAULT / (1 + LLK_CHECKS_PER_TIMEOUT_DEFAULT);
1190 llkTimeoutMs = GetUintProperty(LLK_TIMEOUT_MS_PROPERTY, llkTimeoutMs);
1191 llkValidate(); // validate llkTimeoutMs, llkCheckMs and llkCycle
1192 llkStateTimeoutMs[llkStateD] = GetUintProperty(LLK_D_TIMEOUT_MS_PROPERTY, llkTimeoutMs);
1193 llkStateTimeoutMs[llkStateZ] = GetUintProperty(LLK_Z_TIMEOUT_MS_PROPERTY, llkTimeoutMs);
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001194#ifdef __PTRACE_ENABLED__
1195 llkStateTimeoutMs[llkStateStack] = GetUintProperty(LLK_STACK_TIMEOUT_MS_PROPERTY, llkTimeoutMs);
1196#endif
Mark Salyzynf089e142018-02-20 10:47:40 -08001197 llkCheckMs = GetUintProperty(LLK_CHECK_MS_PROPERTY, llkCheckMs);
1198 llkValidate(); // validate all (effectively minus llkTimeoutMs)
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001199#ifdef __PTRACE_ENABLED__
1200 if (debuggable) {
1201 llkCheckStackSymbols = llkSplit(
1202 android::base::GetProperty(LLK_CHECK_STACK_PROPERTY, LLK_CHECK_STACK_DEFAULT));
1203 }
1204 std::string defaultBlacklistStack(LLK_BLACKLIST_STACK_DEFAULT);
1205 if (!debuggable) defaultBlacklistStack += ",logd,/system/bin/logd";
1206 llkBlacklistStack = llkSplit(
1207 android::base::GetProperty(LLK_BLACKLIST_STACK_PROPERTY, defaultBlacklistStack));
1208#endif
Mark Salyzynf089e142018-02-20 10:47:40 -08001209 std::string defaultBlacklistProcess(
1210 std::to_string(kernelPid) + "," + std::to_string(initPid) + "," +
1211 std::to_string(kthreaddPid) + "," + std::to_string(::getpid()) + "," +
1212 std::to_string(::gettid()) + "," LLK_BLACKLIST_PROCESS_DEFAULT);
1213 if (threadname) {
Mark Salyzyn52e54a62018-08-07 08:13:13 -07001214 defaultBlacklistProcess += ","s + threadname;
Mark Salyzynf089e142018-02-20 10:47:40 -08001215 }
1216 for (int cpu = 1; cpu < get_nprocs_conf(); ++cpu) {
1217 defaultBlacklistProcess += ",[watchdog/" + std::to_string(cpu) + "]";
1218 }
1219 defaultBlacklistProcess =
1220 android::base::GetProperty(LLK_BLACKLIST_PROCESS_PROPERTY, defaultBlacklistProcess);
1221 llkBlacklistProcess = llkSplit(defaultBlacklistProcess);
1222 if (!llkSkipName("[khungtaskd]")) { // ALWAYS ignore as special
1223 llkBlacklistProcess.emplace("[khungtaskd]");
1224 }
1225 llkBlacklistParent = llkSplit(android::base::GetProperty(
1226 LLK_BLACKLIST_PARENT_PROPERTY, std::to_string(kernelPid) + "," + std::to_string(kthreaddPid) +
1227 "," LLK_BLACKLIST_PARENT_DEFAULT));
1228 llkBlacklistUid =
1229 llkSplit(android::base::GetProperty(LLK_BLACKLIST_UID_PROPERTY, LLK_BLACKLIST_UID_DEFAULT));
1230
1231 // internal watchdog
1232 ::signal(SIGALRM, llkAlarmHandler);
1233
1234 // kernel hung task configuration? Otherwise leave it as-is
1235 if (khtEnable) {
1236 // EUID must be AID_ROOT to write to /proc/sys/kernel/ nodes, there
1237 // are no capability overrides. For security reasons we do not want
1238 // to run as AID_ROOT. We may not be able to write them successfully,
1239 // we will try, but the least we can do is read the values back to
1240 // confirm expectations and report whether configured or not.
1241 auto configured = llkWriteStringToFileConfirm(std::to_string(khtTimeout.count()),
1242 "/proc/sys/kernel/hung_task_timeout_secs");
1243 if (configured) {
1244 llkWriteStringToFile("65535", "/proc/sys/kernel/hung_task_warnings");
1245 llkWriteStringToFile("65535", "/proc/sys/kernel/hung_task_check_count");
1246 configured = llkWriteStringToFileConfirm("1", "/proc/sys/kernel/hung_task_panic");
1247 }
1248 if (configured) {
1249 LOG(INFO) << "[khungtaskd] configured";
1250 } else {
1251 LOG(WARNING) << "[khungtaskd] not configurable";
1252 }
1253 }
1254
1255 bool logConfig = true;
1256 if (llkEnable) {
1257 if (llkMlockall &&
1258 // MCL_ONFAULT pins pages as they fault instead of loading
1259 // everything immediately all at once. (Which would be bad,
1260 // because as of this writing, we have a lot of mapped pages we
1261 // never use.) Old kernels will see MCL_ONFAULT and fail with
1262 // EINVAL; we ignore this failure.
1263 //
1264 // N.B. read the man page for mlockall. MCL_CURRENT | MCL_ONFAULT
1265 // pins ⊆ MCL_CURRENT, converging to just MCL_CURRENT as we fault
1266 // in pages.
1267
1268 // CAP_IPC_LOCK required
1269 mlockall(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT) && (errno != EINVAL)) {
1270 PLOG(WARNING) << "mlockall failed ";
1271 }
1272
1273 if (threadname) {
1274 pthread_attr_t attr;
1275
1276 if (!pthread_attr_init(&attr)) {
1277 sched_param param;
1278
1279 memset(&param, 0, sizeof(param));
1280 pthread_attr_setschedparam(&attr, &param);
1281 pthread_attr_setschedpolicy(&attr, SCHED_BATCH);
1282 if (!pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED)) {
1283 pthread_t thread;
1284 if (!pthread_create(&thread, &attr, llkThread, const_cast<char*>(threadname))) {
1285 // wait a second for thread to start
1286 for (auto retry = 50; retry && !llkRunning; --retry) {
1287 ::usleep(20000);
1288 }
1289 logConfig = !llkRunning; // printed in llkd context?
1290 } else {
1291 LOG(ERROR) << "failed to spawn llkd thread";
1292 }
1293 } else {
1294 LOG(ERROR) << "failed to detach llkd thread";
1295 }
1296 pthread_attr_destroy(&attr);
1297 } else {
1298 LOG(ERROR) << "failed to allocate attibutes for llkd thread";
1299 }
1300 }
1301 } else {
1302 LOG(DEBUG) << "[khungtaskd] left unconfigured";
1303 }
1304 if (logConfig) {
1305 llkLogConfig();
1306 }
1307
1308 return llkEnable;
1309}