blob: 0827470af701a757398c4a956c227bf418a4f1b1 [file] [log] [blame]
Mark Salyzynf089e142018-02-20 10:47:40 -08001/*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "llkd.h"
18
19#include <ctype.h>
20#include <dirent.h> // opendir() and readdir()
21#include <errno.h>
22#include <fcntl.h>
23#include <pthread.h>
24#include <pwd.h> // getpwuid()
25#include <signal.h>
26#include <stdint.h>
27#include <sys/cdefs.h> // ___STRING, __predict_true() and _predict_false()
28#include <sys/mman.h> // mlockall()
29#include <sys/prctl.h>
30#include <sys/stat.h> // lstat()
31#include <sys/syscall.h> // __NR_getdents64
32#include <sys/sysinfo.h> // get_nprocs_conf()
33#include <sys/types.h>
34#include <time.h>
35#include <unistd.h>
36
37#include <chrono>
38#include <ios>
39#include <sstream>
40#include <string>
41#include <unordered_map>
42#include <unordered_set>
43
44#include <android-base/file.h>
45#include <android-base/logging.h>
46#include <android-base/parseint.h>
47#include <android-base/properties.h>
48#include <android-base/strings.h>
49#include <cutils/android_get_control_file.h>
50#include <log/log_main.h>
51
52#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
53
54#define TASK_COMM_LEN 16 // internal kernel, not uapi, from .../linux/include/linux/sched.h
55
56using namespace std::chrono_literals;
57using namespace std::chrono;
Mark Salyzyn52e54a62018-08-07 08:13:13 -070058using namespace std::literals;
Mark Salyzynf089e142018-02-20 10:47:40 -080059
60namespace {
61
62constexpr pid_t kernelPid = 0;
63constexpr pid_t initPid = 1;
64constexpr pid_t kthreaddPid = 2;
65
66constexpr char procdir[] = "/proc/";
67
68// Configuration
69milliseconds llkUpdate; // last check ms signature
70milliseconds llkCycle; // ms to next thread check
71bool llkEnable = LLK_ENABLE_DEFAULT; // llk daemon enabled
72bool llkRunning = false; // thread is running
73bool llkMlockall = LLK_MLOCKALL_DEFAULT; // run mlocked
Mark Salyzynafd66f22018-03-19 15:16:29 -070074bool llkTestWithKill = LLK_KILLTEST_DEFAULT; // issue test kills
Mark Salyzynf089e142018-02-20 10:47:40 -080075milliseconds llkTimeoutMs = LLK_TIMEOUT_MS_DEFAULT; // default timeout
Mark Salyzyn96505fa2018-08-07 08:13:13 -070076enum { // enum of state indexes
77 llkStateD, // Persistent 'D' state
78 llkStateZ, // Persistent 'Z' state
79#ifdef __PTRACE_ENABLED__ // Extra privileged states
80 llkStateStack, // stack signature
81#endif // End of extra privilege
82 llkNumStates, // Maxumum number of states
83}; // state indexes
Mark Salyzynf089e142018-02-20 10:47:40 -080084milliseconds llkStateTimeoutMs[llkNumStates]; // timeout override for each detection state
85milliseconds llkCheckMs; // checking interval to inspect any
86 // persistent live-locked states
87bool llkLowRam; // ro.config.low_ram
Mark Salyzynbd7c8562018-10-31 10:02:08 -070088bool llkEnableSysrqT = LLK_ENABLE_SYSRQ_T_DEFAULT; // sysrq stack trace dump
Mark Salyzynf089e142018-02-20 10:47:40 -080089bool khtEnable = LLK_ENABLE_DEFAULT; // [khungtaskd] panic
90// [khungtaskd] should have a timeout beyond the granularity of llkTimeoutMs.
91// Provides a wide angle of margin b/c khtTimeout is also its granularity.
92seconds khtTimeout = duration_cast<seconds>(llkTimeoutMs * (1 + LLK_CHECKS_PER_TIMEOUT_DEFAULT) /
93 LLK_CHECKS_PER_TIMEOUT_DEFAULT);
Mark Salyzyn96505fa2018-08-07 08:13:13 -070094#ifdef __PTRACE_ENABLED__
95// list of stack symbols to search for persistence.
96std::unordered_set<std::string> llkCheckStackSymbols;
97#endif
Mark Salyzynf089e142018-02-20 10:47:40 -080098
99// Blacklist variables, initialized with comma separated lists of high false
100// positive and/or dangerous references, e.g. without self restart, for pid,
101// ppid, name and uid:
102
103// list of pids, or tids or names to skip. kernel pid (0), init pid (1),
104// [kthreadd] pid (2), ourselves, "init", "[kthreadd]", "lmkd", "llkd" or
105// combinations of watchdogd in kernel and user space.
106std::unordered_set<std::string> llkBlacklistProcess;
107// list of parent pids, comm or cmdline names to skip. default:
108// kernel pid (0), [kthreadd] (2), or ourselves, enforced and implied
109std::unordered_set<std::string> llkBlacklistParent;
110// list of uids, and uid names, to skip, default nothing
111std::unordered_set<std::string> llkBlacklistUid;
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700112#ifdef __PTRACE_ENABLED__
113// list of names to skip stack checking. "init", "lmkd", "llkd", "keystore" or
114// "logd" (if not userdebug).
115std::unordered_set<std::string> llkBlacklistStack;
116#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800117
118class dir {
119 public:
120 enum level { proc, task, numLevels };
121
122 private:
123 int fd;
124 size_t available_bytes;
125 dirent* next;
126 // each directory level picked to be just north of 4K in size
127 static constexpr size_t buffEntries = 15;
128 static dirent buff[numLevels][buffEntries];
129
130 bool fill(enum level index) {
131 if (index >= numLevels) return false;
132 if (available_bytes != 0) return true;
133 if (__predict_false(fd < 0)) return false;
134 // getdents64 has no libc wrapper
135 auto rc = TEMP_FAILURE_RETRY(syscall(__NR_getdents64, fd, buff[index], sizeof(buff[0]), 0));
136 if (rc <= 0) return false;
137 available_bytes = rc;
138 next = buff[index];
139 return true;
140 }
141
142 public:
143 dir() : fd(-1), available_bytes(0), next(nullptr) {}
144
145 explicit dir(const char* directory)
146 : fd(__predict_true(directory != nullptr)
147 ? ::open(directory, O_CLOEXEC | O_DIRECTORY | O_RDONLY)
148 : -1),
149 available_bytes(0),
150 next(nullptr) {}
151
152 explicit dir(const std::string&& directory)
153 : fd(::open(directory.c_str(), O_CLOEXEC | O_DIRECTORY | O_RDONLY)),
154 available_bytes(0),
155 next(nullptr) {}
156
157 explicit dir(const std::string& directory)
158 : fd(::open(directory.c_str(), O_CLOEXEC | O_DIRECTORY | O_RDONLY)),
159 available_bytes(0),
160 next(nullptr) {}
161
162 // Don't need any copy or move constructors.
163 explicit dir(const dir& c) = delete;
164 explicit dir(dir& c) = delete;
165 explicit dir(dir&& c) = delete;
166
167 ~dir() {
168 if (fd >= 0) {
169 ::close(fd);
170 }
171 }
172
173 operator bool() const { return fd >= 0; }
174
175 void reset(void) {
176 if (fd >= 0) {
177 ::close(fd);
178 fd = -1;
179 available_bytes = 0;
180 next = nullptr;
181 }
182 }
183
184 dir& reset(const char* directory) {
185 reset();
186 // available_bytes will _always_ be zero here as its value is
187 // intimately tied to fd < 0 or not.
188 fd = ::open(directory, O_CLOEXEC | O_DIRECTORY | O_RDONLY);
189 return *this;
190 }
191
192 void rewind(void) {
193 if (fd >= 0) {
194 ::lseek(fd, off_t(0), SEEK_SET);
195 available_bytes = 0;
196 next = nullptr;
197 }
198 }
199
200 dirent* read(enum level index = proc, dirent* def = nullptr) {
201 if (!fill(index)) return def;
202 auto ret = next;
203 available_bytes -= next->d_reclen;
204 next = reinterpret_cast<dirent*>(reinterpret_cast<char*>(next) + next->d_reclen);
205 return ret;
206 }
207} llkTopDirectory;
208
209dirent dir::buff[dir::numLevels][dir::buffEntries];
210
211// helper functions
212
213bool llkIsMissingExeLink(pid_t tid) {
214 char c;
215 // CAP_SYS_PTRACE is required to prevent ret == -1, but ENOENT is signal
216 auto ret = ::readlink((procdir + std::to_string(tid) + "/exe").c_str(), &c, sizeof(c));
217 return (ret == -1) && (errno == ENOENT);
218}
219
220// Common routine where caller accepts empty content as error/passthrough.
221// Reduces the churn of reporting read errors in the callers.
222std::string ReadFile(std::string&& path) {
223 std::string content;
224 if (!android::base::ReadFileToString(path, &content)) {
225 PLOG(DEBUG) << "Read " << path << " failed";
226 content = "";
227 }
228 return content;
229}
230
231std::string llkProcGetName(pid_t tid, const char* node = "/cmdline") {
232 std::string content = ReadFile(procdir + std::to_string(tid) + node);
233 static constexpr char needles[] = " \t\r\n"; // including trailing nul
234 auto pos = content.find_first_of(needles, 0, sizeof(needles));
235 if (pos != std::string::npos) {
236 content.erase(pos);
237 }
238 return content;
239}
240
241uid_t llkProcGetUid(pid_t tid) {
242 // Get the process' uid. The following read from /status is admittedly
243 // racy, prone to corruption due to shape-changes. The consequences are
244 // not catastrophic as we sample a few times before taking action.
245 //
246 // If /loginuid worked on reliably, or on Android (all tasks report -1)...
247 // Android lmkd causes /cgroup to contain memory:/<dom>/uid_<uid>/pid_<pid>
248 // which is tighter, but also not reliable.
249 std::string content = ReadFile(procdir + std::to_string(tid) + "/status");
250 static constexpr char Uid[] = "\nUid:";
251 auto pos = content.find(Uid);
252 if (pos == std::string::npos) {
253 return -1;
254 }
255 pos += ::strlen(Uid);
256 while ((pos < content.size()) && ::isblank(content[pos])) {
257 ++pos;
258 }
259 content.erase(0, pos);
260 for (pos = 0; (pos < content.size()) && ::isdigit(content[pos]); ++pos) {
261 ;
262 }
263 // Content of form 'Uid: 0 0 0 0', newline is error
264 if ((pos >= content.size()) || !::isblank(content[pos])) {
265 return -1;
266 }
267 content.erase(pos);
268 uid_t ret;
Tom Cherrye0bc5a92018-10-05 14:29:47 -0700269 if (!android::base::ParseUint(content, &ret, uid_t(0))) {
Mark Salyzynf089e142018-02-20 10:47:40 -0800270 return -1;
271 }
272 return ret;
273}
274
275struct proc {
276 pid_t tid; // monitored thread id (in Z or D state).
277 nanoseconds schedUpdate; // /proc/<tid>/sched "se.avg.lastUpdateTime",
278 uint64_t nrSwitches; // /proc/<tid>/sched "nr_switches" for
279 // refined ABA problem detection, determine
280 // forward scheduling progress.
281 milliseconds update; // llkUpdate millisecond signature of last.
282 milliseconds count; // duration in state.
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700283#ifdef __PTRACE_ENABLED__ // Privileged state checking
284 milliseconds count_stack; // duration where stack is stagnant.
285#endif // End privilege
Mark Salyzynf089e142018-02-20 10:47:40 -0800286 pid_t pid; // /proc/<pid> before iterating through
287 // /proc/<pid>/task/<tid> for threads.
288 pid_t ppid; // /proc/<tid>/stat field 4 parent pid.
289 uid_t uid; // /proc/<tid>/status Uid: field.
290 unsigned time; // sum of /proc/<tid>/stat field 14 utime &
291 // 15 stime for coarse ABA problem detection.
292 std::string cmdline; // cached /cmdline content
293 char state; // /proc/<tid>/stat field 3: Z or D
294 // (others we do not monitor: S, R, T or ?)
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700295#ifdef __PTRACE_ENABLED__ // Privileged state checking
296 char stack; // index in llkCheckStackSymbols for matches
297#endif // and with maximum index PROP_VALUE_MAX/2.
Mark Salyzynf089e142018-02-20 10:47:40 -0800298 char comm[TASK_COMM_LEN + 3]; // space for adding '[' and ']'
299 bool exeMissingValid; // exeMissing has been cached
300 bool cmdlineValid; // cmdline has been cached
301 bool updated; // cleared before monitoring pass.
302 bool killed; // sent a kill to this thread, next panic...
303
304 void setComm(const char* _comm) { strncpy(comm + 1, _comm, sizeof(comm) - 2); }
305
306 proc(pid_t tid, pid_t pid, pid_t ppid, const char* _comm, int time, char state)
307 : tid(tid),
308 schedUpdate(0),
309 nrSwitches(0),
310 update(llkUpdate),
Mark Salyzynacecaf72018-08-10 08:15:57 -0700311 count(0ms),
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700312#ifdef __PTRACE_ENABLED__
313 count_stack(0ms),
314#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800315 pid(pid),
316 ppid(ppid),
317 uid(-1),
318 time(time),
319 state(state),
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700320#ifdef __PTRACE_ENABLED__
321 stack(-1),
322#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800323 exeMissingValid(false),
324 cmdlineValid(false),
325 updated(true),
Mark Salyzynafd66f22018-03-19 15:16:29 -0700326 killed(!llkTestWithKill) {
Mark Salyzynf089e142018-02-20 10:47:40 -0800327 memset(comm, '\0', sizeof(comm));
328 setComm(_comm);
329 }
330
331 const char* getComm(void) {
332 if (comm[1] == '\0') { // comm Valid?
333 strncpy(comm + 1, llkProcGetName(tid, "/comm").c_str(), sizeof(comm) - 2);
334 }
335 if (!exeMissingValid) {
336 if (llkIsMissingExeLink(tid)) {
337 comm[0] = '[';
338 }
339 exeMissingValid = true;
340 }
341 size_t len = strlen(comm + 1);
342 if (__predict_true(len < (sizeof(comm) - 1))) {
343 if (comm[0] == '[') {
344 if ((comm[len] != ']') && __predict_true(len < (sizeof(comm) - 2))) {
345 comm[++len] = ']';
346 comm[++len] = '\0';
347 }
348 } else {
349 if (comm[len] == ']') {
350 comm[len] = '\0';
351 }
352 }
353 }
354 return &comm[comm[0] != '['];
355 }
356
357 const char* getCmdline(void) {
358 if (!cmdlineValid) {
359 cmdline = llkProcGetName(tid);
360 cmdlineValid = true;
361 }
362 return cmdline.c_str();
363 }
364
365 uid_t getUid(void) {
366 if (uid <= 0) { // Churn on root user, because most likely to setuid()
367 uid = llkProcGetUid(tid);
368 }
369 return uid;
370 }
371
372 void reset(void) { // reset cache, if we detected pid rollover
373 uid = -1;
374 state = '?';
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700375#ifdef __PTRACE_ENABLED__
376 count_stack = 0ms;
377 stack = -1;
378#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800379 cmdline = "";
380 comm[0] = '\0';
381 exeMissingValid = false;
382 cmdlineValid = false;
383 }
384};
385
386std::unordered_map<pid_t, proc> tids;
387
388// Check range and setup defaults, in order of propagation:
389// llkTimeoutMs
390// llkCheckMs
391// ...
392// KISS to keep it all self-contained, and called multiple times as parameters
393// are interpreted so that defaults, llkCheckMs and llkCycle make sense.
394void llkValidate() {
395 if (llkTimeoutMs == 0ms) {
396 llkTimeoutMs = LLK_TIMEOUT_MS_DEFAULT;
397 }
398 llkTimeoutMs = std::max(llkTimeoutMs, LLK_TIMEOUT_MS_MINIMUM);
399 if (llkCheckMs == 0ms) {
400 llkCheckMs = llkTimeoutMs / LLK_CHECKS_PER_TIMEOUT_DEFAULT;
401 }
402 llkCheckMs = std::min(llkCheckMs, llkTimeoutMs);
403
404 for (size_t state = 0; state < ARRAY_SIZE(llkStateTimeoutMs); ++state) {
405 if (llkStateTimeoutMs[state] == 0ms) {
406 llkStateTimeoutMs[state] = llkTimeoutMs;
407 }
408 llkStateTimeoutMs[state] =
409 std::min(std::max(llkStateTimeoutMs[state], LLK_TIMEOUT_MS_MINIMUM), llkTimeoutMs);
410 llkCheckMs = std::min(llkCheckMs, llkStateTimeoutMs[state]);
411 }
412
413 llkCheckMs = std::max(llkCheckMs, LLK_CHECK_MS_MINIMUM);
414 if (llkCycle == 0ms) {
415 llkCycle = llkCheckMs;
416 }
417 llkCycle = std::min(llkCycle, llkCheckMs);
418}
419
420milliseconds llkGetTimespecDiffMs(timespec* from, timespec* to) {
421 return duration_cast<milliseconds>(seconds(to->tv_sec - from->tv_sec)) +
422 duration_cast<milliseconds>(nanoseconds(to->tv_nsec - from->tv_nsec));
423}
424
425std::string llkProcGetName(pid_t tid, const char* comm, const char* cmdline) {
426 if ((cmdline != nullptr) && (*cmdline != '\0')) {
427 return cmdline;
428 }
429 if ((comm != nullptr) && (*comm != '\0')) {
430 return comm;
431 }
432
433 // UNLIKELY! Here because killed before we kill it?
434 // Assume change is afoot, do not call llkTidAlloc
435
436 // cmdline ?
437 std::string content = llkProcGetName(tid);
438 if (content.size() != 0) {
439 return content;
440 }
441 // Comm instead?
442 content = llkProcGetName(tid, "/comm");
443 if (llkIsMissingExeLink(tid) && (content.size() != 0)) {
444 return '[' + content + ']';
445 }
446 return content;
447}
448
449int llkKillOneProcess(pid_t pid, char state, pid_t tid, const char* tcomm = nullptr,
450 const char* tcmdline = nullptr, const char* pcomm = nullptr,
451 const char* pcmdline = nullptr) {
452 std::string forTid;
453 if (tid != pid) {
454 forTid = " for '" + llkProcGetName(tid, tcomm, tcmdline) + "' (" + std::to_string(tid) + ")";
455 }
456 LOG(INFO) << "Killing '" << llkProcGetName(pid, pcomm, pcmdline) << "' (" << pid
457 << ") to check forward scheduling progress in " << state << " state" << forTid;
458 // CAP_KILL required
459 errno = 0;
460 auto r = ::kill(pid, SIGKILL);
461 if (r) {
462 PLOG(ERROR) << "kill(" << pid << ")=" << r << ' ';
463 }
464
465 return r;
466}
467
468// Kill one process
469int llkKillOneProcess(pid_t pid, proc* tprocp) {
470 return llkKillOneProcess(pid, tprocp->state, tprocp->tid, tprocp->getComm(),
471 tprocp->getCmdline());
472}
473
474// Kill one process specified by kprocp
475int llkKillOneProcess(proc* kprocp, proc* tprocp) {
476 if (kprocp == nullptr) {
477 return -2;
478 }
479
480 return llkKillOneProcess(kprocp->tid, tprocp->state, tprocp->tid, tprocp->getComm(),
481 tprocp->getCmdline(), kprocp->getComm(), kprocp->getCmdline());
482}
483
484// Acquire file descriptor from environment, or open and cache it.
485// NB: cache is unnecessary in our current context, pedantically
486// required to prevent leakage of file descriptors in the future.
487int llkFileToWriteFd(const std::string& file) {
488 static std::unordered_map<std::string, int> cache;
489 auto search = cache.find(file);
490 if (search != cache.end()) return search->second;
491 auto fd = android_get_control_file(file.c_str());
492 if (fd >= 0) return fd;
493 fd = TEMP_FAILURE_RETRY(::open(file.c_str(), O_WRONLY | O_CLOEXEC));
494 if (fd >= 0) cache.emplace(std::make_pair(file, fd));
495 return fd;
496}
497
498// Wrap android::base::WriteStringToFile to use android_get_control_file.
499bool llkWriteStringToFile(const std::string& string, const std::string& file) {
500 auto fd = llkFileToWriteFd(file);
501 if (fd < 0) return false;
502 return android::base::WriteStringToFd(string, fd);
503}
504
505bool llkWriteStringToFileConfirm(const std::string& string, const std::string& file) {
506 auto fd = llkFileToWriteFd(file);
507 auto ret = (fd < 0) ? false : android::base::WriteStringToFd(string, fd);
508 std::string content;
509 if (!android::base::ReadFileToString(file, &content)) return ret;
510 return android::base::Trim(content) == string;
511}
512
Mark Salyzyn3c3b14d2018-10-31 10:38:15 -0700513void llkPanicKernel(bool dump, pid_t tid, const char* state,
514 const std::string& message = "") __noreturn;
515void llkPanicKernel(bool dump, pid_t tid, const char* state, const std::string& message) {
516 if (!message.empty()) LOG(ERROR) << message;
Mark Salyzynf089e142018-02-20 10:47:40 -0800517 auto sysrqTriggerFd = llkFileToWriteFd("/proc/sysrq-trigger");
518 if (sysrqTriggerFd < 0) {
519 // DYB
520 llkKillOneProcess(initPid, 'R', tid);
521 // The answer to life, the universe and everything
522 ::exit(42);
523 // NOTREACHED
524 }
525 ::sync();
526 if (dump) {
527 // Show all locks that are held
528 android::base::WriteStringToFd("d", sysrqTriggerFd);
Mark Salyzyn53e782d2018-10-31 16:03:45 -0700529 // Show all waiting tasks
530 android::base::WriteStringToFd("w", sysrqTriggerFd);
Mark Salyzynf089e142018-02-20 10:47:40 -0800531 // This can trigger hardware watchdog, that is somewhat _ok_.
532 // But useless if pstore configured for <256KB, low ram devices ...
Mark Salyzynbd7c8562018-10-31 10:02:08 -0700533 if (llkEnableSysrqT) {
Mark Salyzynf089e142018-02-20 10:47:40 -0800534 android::base::WriteStringToFd("t", sysrqTriggerFd);
Mark Salyzyn53e782d2018-10-31 16:03:45 -0700535 // Show all locks that are held (in case 't' overflows ramoops)
536 android::base::WriteStringToFd("d", sysrqTriggerFd);
537 // Show all waiting tasks (in case 't' overflows ramoops)
538 android::base::WriteStringToFd("w", sysrqTriggerFd);
Mark Salyzynf089e142018-02-20 10:47:40 -0800539 }
540 ::usleep(200000); // let everything settle
541 }
Mark Salyzyn3c3b14d2018-10-31 10:38:15 -0700542 // SysRq message matches kernel format, and propagates through bootstat
543 // ultimately to the boot reason into panic,livelock,<state>.
544 llkWriteStringToFile(message + (message.empty() ? "" : "\n") +
545 "SysRq : Trigger a crash : 'livelock,"s + state + "'\n",
546 "/dev/kmsg");
Mark Salyzynf089e142018-02-20 10:47:40 -0800547 android::base::WriteStringToFd("c", sysrqTriggerFd);
548 // NOTREACHED
549 // DYB
550 llkKillOneProcess(initPid, 'R', tid);
551 // I sat at my desk, stared into the garden and thought '42 will do'.
552 // I typed it out. End of story
553 ::exit(42);
554 // NOTREACHED
555}
556
557void llkAlarmHandler(int) {
Mark Salyzynafd66f22018-03-19 15:16:29 -0700558 llkPanicKernel(false, ::getpid(), "alarm");
Mark Salyzynf089e142018-02-20 10:47:40 -0800559}
560
561milliseconds GetUintProperty(const std::string& key, milliseconds def) {
562 return milliseconds(android::base::GetUintProperty(key, static_cast<uint64_t>(def.count()),
563 static_cast<uint64_t>(def.max().count())));
564}
565
566seconds GetUintProperty(const std::string& key, seconds def) {
567 return seconds(android::base::GetUintProperty(key, static_cast<uint64_t>(def.count()),
568 static_cast<uint64_t>(def.max().count())));
569}
570
571proc* llkTidLookup(pid_t tid) {
572 auto search = tids.find(tid);
573 if (search == tids.end()) {
574 return nullptr;
575 }
576 return &search->second;
577}
578
579void llkTidRemove(pid_t tid) {
580 tids.erase(tid);
581}
582
583proc* llkTidAlloc(pid_t tid, pid_t pid, pid_t ppid, const char* comm, int time, char state) {
584 auto it = tids.emplace(std::make_pair(tid, proc(tid, pid, ppid, comm, time, state)));
585 return &it.first->second;
586}
587
588std::string llkFormat(milliseconds ms) {
589 auto sec = duration_cast<seconds>(ms);
590 std::ostringstream s;
591 s << sec.count() << '.';
592 auto f = s.fill('0');
593 auto w = s.width(3);
594 s << std::right << (ms - sec).count();
595 s.width(w);
596 s.fill(f);
597 s << 's';
598 return s.str();
599}
600
601std::string llkFormat(seconds s) {
602 return std::to_string(s.count()) + 's';
603}
604
605std::string llkFormat(bool flag) {
606 return flag ? "true" : "false";
607}
608
609std::string llkFormat(const std::unordered_set<std::string>& blacklist) {
610 std::string ret;
611 for (auto entry : blacklist) {
612 if (ret.size()) {
613 ret += ",";
614 }
615 ret += entry;
616 }
617 return ret;
618}
619
620// We only officially support comma separators, but wetware being what they
621// are will take some liberty and I do not believe they should be punished.
Mark Salyzynacecaf72018-08-10 08:15:57 -0700622std::unordered_set<std::string> llkSplit(const std::string& s) {
Mark Salyzynf089e142018-02-20 10:47:40 -0800623 std::unordered_set<std::string> result;
624
Mark Salyzynacecaf72018-08-10 08:15:57 -0700625 // Special case, allow boolean false to empty the list, otherwise expected
626 // source of input from android::base::GetProperty will supply the default
627 // value on empty content in the property.
628 if (s == "false") return result;
629
Mark Salyzynf089e142018-02-20 10:47:40 -0800630 size_t base = 0;
Mark Salyzynacecaf72018-08-10 08:15:57 -0700631 while (s.size() > base) {
632 auto found = s.find_first_of(", \t:", base);
633 // Only emplace content, empty entries are not an option
634 if (found != base) result.emplace(s.substr(base, found - base));
Mark Salyzynf089e142018-02-20 10:47:40 -0800635 if (found == s.npos) break;
636 base = found + 1;
637 }
638 return result;
639}
640
641bool llkSkipName(const std::string& name,
642 const std::unordered_set<std::string>& blacklist = llkBlacklistProcess) {
643 if ((name.size() == 0) || (blacklist.size() == 0)) {
644 return false;
645 }
646
647 return blacklist.find(name) != blacklist.end();
648}
649
650bool llkSkipPid(pid_t pid) {
651 return llkSkipName(std::to_string(pid), llkBlacklistProcess);
652}
653
654bool llkSkipPpid(pid_t ppid) {
655 return llkSkipName(std::to_string(ppid), llkBlacklistParent);
656}
657
658bool llkSkipUid(uid_t uid) {
659 // Match by number?
660 if (llkSkipName(std::to_string(uid), llkBlacklistUid)) {
661 return true;
662 }
663
664 // Match by name?
665 auto pwd = ::getpwuid(uid);
666 return (pwd != nullptr) && __predict_true(pwd->pw_name != nullptr) &&
667 __predict_true(pwd->pw_name[0] != '\0') && llkSkipName(pwd->pw_name, llkBlacklistUid);
668}
669
670bool getValidTidDir(dirent* dp, std::string* piddir) {
671 if (!::isdigit(dp->d_name[0])) {
672 return false;
673 }
674
675 // Corner case can not happen in reality b/c of above ::isdigit check
676 if (__predict_false(dp->d_type != DT_DIR)) {
677 if (__predict_false(dp->d_type == DT_UNKNOWN)) { // can't b/c procfs
678 struct stat st;
679 *piddir = procdir;
680 *piddir += dp->d_name;
681 return (lstat(piddir->c_str(), &st) == 0) && (st.st_mode & S_IFDIR);
682 }
683 return false;
684 }
685
686 *piddir = procdir;
687 *piddir += dp->d_name;
688 return true;
689}
690
691bool llkIsMonitorState(char state) {
692 return (state == 'Z') || (state == 'D');
693}
694
695// returns -1 if not found
696long long getSchedValue(const std::string& schedString, const char* key) {
697 auto pos = schedString.find(key);
698 if (pos == std::string::npos) {
699 return -1;
700 }
701 pos = schedString.find(':', pos);
702 if (__predict_false(pos == std::string::npos)) {
703 return -1;
704 }
705 while ((++pos < schedString.size()) && ::isblank(schedString[pos])) {
706 ;
707 }
708 long long ret;
709 if (!android::base::ParseInt(schedString.substr(pos), &ret, static_cast<long long>(0))) {
710 return -1;
711 }
712 return ret;
713}
714
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700715#ifdef __PTRACE_ENABLED__
716bool llkCheckStack(proc* procp, const std::string& piddir) {
717 if (llkCheckStackSymbols.empty()) return false;
718 if (procp->state == 'Z') { // No brains for Zombies
719 procp->stack = -1;
720 procp->count_stack = 0ms;
721 return false;
722 }
723
724 // Don't check process that are known to block ptrace, save sepolicy noise.
725 if (llkSkipName(std::to_string(procp->pid), llkBlacklistStack)) return false;
726 if (llkSkipName(procp->getComm(), llkBlacklistStack)) return false;
727 if (llkSkipName(procp->getCmdline(), llkBlacklistStack)) return false;
Mark Salyzyne81ede82018-10-22 15:52:32 -0700728 if (llkSkipName(android::base::Basename(procp->getCmdline()), llkBlacklistStack)) return false;
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700729
730 auto kernel_stack = ReadFile(piddir + "/stack");
731 if (kernel_stack.empty()) {
732 LOG(INFO) << piddir << "/stack empty comm=" << procp->getComm()
733 << " cmdline=" << procp->getCmdline();
734 return false;
735 }
736 // A scheduling incident that should not reset count_stack
737 if (kernel_stack.find(" cpu_worker_pools+0x") != std::string::npos) return false;
738 char idx = -1;
739 char match = -1;
740 for (const auto& stack : llkCheckStackSymbols) {
741 if (++idx < 0) break;
Mark Salyzynbb1256a2018-10-18 14:39:27 -0700742 if ((kernel_stack.find(" "s + stack + "+0x") != std::string::npos) ||
743 (kernel_stack.find(" "s + stack + ".cfi+0x") != std::string::npos)) {
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700744 match = idx;
745 break;
746 }
747 }
748 if (procp->stack != match) {
749 procp->stack = match;
750 procp->count_stack = 0ms;
751 return false;
752 }
753 if (match == char(-1)) return false;
754 procp->count_stack += llkCycle;
755 return procp->count_stack >= llkStateTimeoutMs[llkStateStack];
756}
757#endif
758
Mark Salyzynf089e142018-02-20 10:47:40 -0800759// Primary ABA mitigation watching last time schedule activity happened
760void llkCheckSchedUpdate(proc* procp, const std::string& piddir) {
761 // Audit finds /proc/<tid>/sched is just over 1K, and
762 // is rarely larger than 2K, even less on Android.
763 // For example, the "se.avg.lastUpdateTime" field we are
764 // interested in typically within the primary set in
765 // the first 1K.
766 //
767 // Proc entries can not be read >1K atomically via libbase,
768 // but if there are problems we assume at least a few
769 // samples of reads occur before we take any real action.
770 std::string schedString = ReadFile(piddir + "/sched");
771 if (schedString.size() == 0) {
772 // /schedstat is not as standardized, but in 3.1+
773 // Android devices, the third field is nr_switches
774 // from /sched:
775 schedString = ReadFile(piddir + "/schedstat");
776 if (schedString.size() == 0) {
777 return;
778 }
779 auto val = static_cast<unsigned long long>(-1);
780 if (((::sscanf(schedString.c_str(), "%*d %*d %llu", &val)) == 1) &&
781 (val != static_cast<unsigned long long>(-1)) && (val != 0) &&
782 (val != procp->nrSwitches)) {
783 procp->nrSwitches = val;
784 procp->count = 0ms;
Mark Salyzynafd66f22018-03-19 15:16:29 -0700785 procp->killed = !llkTestWithKill;
Mark Salyzynf089e142018-02-20 10:47:40 -0800786 }
787 return;
788 }
789
790 auto val = getSchedValue(schedString, "\nse.avg.lastUpdateTime");
791 if (val == -1) {
792 val = getSchedValue(schedString, "\nse.svg.last_update_time");
793 }
794 if (val != -1) {
795 auto schedUpdate = nanoseconds(val);
796 if (schedUpdate != procp->schedUpdate) {
797 procp->schedUpdate = schedUpdate;
798 procp->count = 0ms;
Mark Salyzynafd66f22018-03-19 15:16:29 -0700799 procp->killed = !llkTestWithKill;
Mark Salyzynf089e142018-02-20 10:47:40 -0800800 }
801 }
802
803 val = getSchedValue(schedString, "\nnr_switches");
804 if (val != -1) {
805 if (static_cast<uint64_t>(val) != procp->nrSwitches) {
806 procp->nrSwitches = val;
807 procp->count = 0ms;
Mark Salyzynafd66f22018-03-19 15:16:29 -0700808 procp->killed = !llkTestWithKill;
Mark Salyzynf089e142018-02-20 10:47:40 -0800809 }
810 }
811}
812
813void llkLogConfig(void) {
814 LOG(INFO) << "ro.config.low_ram=" << llkFormat(llkLowRam) << "\n"
Mark Salyzynbd7c8562018-10-31 10:02:08 -0700815 << LLK_ENABLE_SYSRQ_T_PROPERTY "=" << llkFormat(llkEnableSysrqT) << "\n"
Mark Salyzynf089e142018-02-20 10:47:40 -0800816 << LLK_ENABLE_PROPERTY "=" << llkFormat(llkEnable) << "\n"
817 << KHT_ENABLE_PROPERTY "=" << llkFormat(khtEnable) << "\n"
818 << LLK_MLOCKALL_PROPERTY "=" << llkFormat(llkMlockall) << "\n"
Mark Salyzynafd66f22018-03-19 15:16:29 -0700819 << LLK_KILLTEST_PROPERTY "=" << llkFormat(llkTestWithKill) << "\n"
Mark Salyzynf089e142018-02-20 10:47:40 -0800820 << KHT_TIMEOUT_PROPERTY "=" << llkFormat(khtTimeout) << "\n"
821 << LLK_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkTimeoutMs) << "\n"
822 << LLK_D_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkStateTimeoutMs[llkStateD]) << "\n"
823 << LLK_Z_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkStateTimeoutMs[llkStateZ]) << "\n"
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700824#ifdef __PTRACE_ENABLED__
825 << LLK_STACK_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkStateTimeoutMs[llkStateStack])
826 << "\n"
827#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800828 << LLK_CHECK_MS_PROPERTY "=" << llkFormat(llkCheckMs) << "\n"
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700829#ifdef __PTRACE_ENABLED__
830 << LLK_CHECK_STACK_PROPERTY "=" << llkFormat(llkCheckStackSymbols) << "\n"
831 << LLK_BLACKLIST_STACK_PROPERTY "=" << llkFormat(llkBlacklistStack) << "\n"
832#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800833 << LLK_BLACKLIST_PROCESS_PROPERTY "=" << llkFormat(llkBlacklistProcess) << "\n"
834 << LLK_BLACKLIST_PARENT_PROPERTY "=" << llkFormat(llkBlacklistParent) << "\n"
835 << LLK_BLACKLIST_UID_PROPERTY "=" << llkFormat(llkBlacklistUid);
836}
837
838void* llkThread(void* obj) {
Mark Salyzyn4832a8b2018-08-15 11:02:18 -0700839 prctl(PR_SET_DUMPABLE, 0);
840
Mark Salyzynf089e142018-02-20 10:47:40 -0800841 LOG(INFO) << "started";
842
843 std::string name = std::to_string(::gettid());
844 if (!llkSkipName(name)) {
845 llkBlacklistProcess.emplace(name);
846 }
847 name = static_cast<const char*>(obj);
848 prctl(PR_SET_NAME, name.c_str());
849 if (__predict_false(!llkSkipName(name))) {
850 llkBlacklistProcess.insert(name);
851 }
852 // No longer modifying llkBlacklistProcess.
853 llkRunning = true;
854 llkLogConfig();
855 while (llkRunning) {
856 ::usleep(duration_cast<microseconds>(llkCheck(true)).count());
857 }
858 // NOTREACHED
859 LOG(INFO) << "exiting";
860 return nullptr;
861}
862
863} // namespace
864
865milliseconds llkCheck(bool checkRunning) {
866 if (!llkEnable || (checkRunning != llkRunning)) {
867 return milliseconds::max();
868 }
869
870 // Reset internal watchdog, which is a healthy engineering margin of
871 // double the maximum wait or cycle time for the mainloop that calls us.
872 //
873 // This alarm is effectively the live lock detection of llkd, as
874 // we understandably can not monitor ourselves otherwise.
875 ::alarm(duration_cast<seconds>(llkTimeoutMs * 2).count());
876
877 // kernel jiffy precision fastest acquisition
878 static timespec last;
879 timespec now;
880 ::clock_gettime(CLOCK_MONOTONIC_COARSE, &now);
881 auto ms = llkGetTimespecDiffMs(&last, &now);
882 if (ms < llkCycle) {
883 return llkCycle - ms;
884 }
885 last = now;
886
887 LOG(VERBOSE) << "opendir(\"" << procdir << "\")";
888 if (__predict_false(!llkTopDirectory)) {
889 // gid containing AID_READPROC required
890 llkTopDirectory.reset(procdir);
891 if (__predict_false(!llkTopDirectory)) {
892 // Most likely reason we could be here is a resource limit.
893 // Keep our processing down to a minimum, but not so low that
894 // we do not recover in a timely manner should the issue be
895 // transitory.
896 LOG(DEBUG) << "opendir(\"" << procdir << "\") failed";
897 return llkTimeoutMs;
898 }
899 }
900
901 for (auto& it : tids) {
902 it.second.updated = false;
903 }
904
905 auto prevUpdate = llkUpdate;
906 llkUpdate += ms;
907 ms -= llkCycle;
908 auto myPid = ::getpid();
909 auto myTid = ::gettid();
910 for (auto dp = llkTopDirectory.read(); dp != nullptr; dp = llkTopDirectory.read()) {
911 std::string piddir;
912
913 if (!getValidTidDir(dp, &piddir)) {
914 continue;
915 }
916
917 // Get the process tasks
918 std::string taskdir = piddir + "/task/";
919 int pid = -1;
920 LOG(VERBOSE) << "+opendir(\"" << taskdir << "\")";
921 dir taskDirectory(taskdir);
922 if (__predict_false(!taskDirectory)) {
923 LOG(DEBUG) << "+opendir(\"" << taskdir << "\") failed";
924 }
925 for (auto tp = taskDirectory.read(dir::task, dp); tp != nullptr;
926 tp = taskDirectory.read(dir::task)) {
927 if (!getValidTidDir(tp, &piddir)) {
928 continue;
929 }
930
931 // Get the process stat
932 std::string stat = ReadFile(piddir + "/stat");
933 if (stat.size() == 0) {
934 continue;
935 }
936 unsigned tid = -1;
937 char pdir[TASK_COMM_LEN + 1];
938 char state = '?';
939 unsigned ppid = -1;
940 unsigned utime = -1;
941 unsigned stime = -1;
942 int dummy;
943 pdir[0] = '\0';
944 // tid should not change value
945 auto match = ::sscanf(
946 stat.c_str(),
947 "%u (%" ___STRING(
948 TASK_COMM_LEN) "[^)]) %c %u %*d %*d %*d %*d %*d %*d %*d %*d %*d %u %u %d",
949 &tid, pdir, &state, &ppid, &utime, &stime, &dummy);
950 if (pid == -1) {
951 pid = tid;
952 }
953 LOG(VERBOSE) << "match " << match << ' ' << tid << " (" << pdir << ") " << state << ' '
954 << ppid << " ... " << utime << ' ' << stime << ' ' << dummy;
955 if (match != 7) {
956 continue;
957 }
958
959 auto procp = llkTidLookup(tid);
960 if (procp == nullptr) {
961 procp = llkTidAlloc(tid, pid, ppid, pdir, utime + stime, state);
962 } else {
963 // comm can change ...
964 procp->setComm(pdir);
965 procp->updated = true;
966 // pid/ppid/tid wrap?
967 if (((procp->update != prevUpdate) && (procp->update != llkUpdate)) ||
968 (procp->ppid != ppid) || (procp->pid != pid)) {
969 procp->reset();
970 } else if (procp->time != (utime + stime)) { // secondary ABA.
971 // watching utime+stime granularity jiffy
972 procp->state = '?';
973 }
974 procp->update = llkUpdate;
975 procp->pid = pid;
976 procp->ppid = ppid;
977 procp->time = utime + stime;
978 if (procp->state != state) {
979 procp->count = 0ms;
Mark Salyzynafd66f22018-03-19 15:16:29 -0700980 procp->killed = !llkTestWithKill;
Mark Salyzynf089e142018-02-20 10:47:40 -0800981 procp->state = state;
982 } else {
983 procp->count += llkCycle;
984 }
985 }
986
987 // Filter checks in intuitive order of CPU cost to evaluate
988 // If tid unique continue, if ppid or pid unique break
989
990 if (pid == myPid) {
991 break;
992 }
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700993#ifdef __PTRACE_ENABLED__
994 // if no stack monitoring, we can quickly exit here
995 if (!llkIsMonitorState(state) && llkCheckStackSymbols.empty()) {
Mark Salyzynf089e142018-02-20 10:47:40 -0800996 continue;
997 }
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700998#else
999 if (!llkIsMonitorState(state)) continue;
1000#endif
Mark Salyzynf089e142018-02-20 10:47:40 -08001001 if ((tid == myTid) || llkSkipPid(tid)) {
1002 continue;
1003 }
1004 if (llkSkipPpid(ppid)) {
1005 break;
1006 }
1007
1008 if (llkSkipName(procp->getComm())) {
1009 continue;
1010 }
1011 if (llkSkipName(procp->getCmdline())) {
1012 break;
1013 }
Mark Salyzyne81ede82018-10-22 15:52:32 -07001014 if (llkSkipName(android::base::Basename(procp->getCmdline()))) {
1015 break;
1016 }
Mark Salyzynf089e142018-02-20 10:47:40 -08001017
1018 auto pprocp = llkTidLookup(ppid);
1019 if (pprocp == nullptr) {
1020 pprocp = llkTidAlloc(ppid, ppid, 0, "", 0, '?');
1021 }
Mark Salyzyne81ede82018-10-22 15:52:32 -07001022 if ((pprocp != nullptr) &&
1023 (llkSkipName(pprocp->getComm(), llkBlacklistParent) ||
1024 llkSkipName(pprocp->getCmdline(), llkBlacklistParent) ||
1025 llkSkipName(android::base::Basename(pprocp->getCmdline()), llkBlacklistParent))) {
Mark Salyzynf089e142018-02-20 10:47:40 -08001026 break;
1027 }
1028
1029 if ((llkBlacklistUid.size() != 0) && llkSkipUid(procp->getUid())) {
1030 continue;
1031 }
1032
1033 // ABA mitigation watching last time schedule activity happened
1034 llkCheckSchedUpdate(procp, piddir);
1035
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001036#ifdef __PTRACE_ENABLED__
1037 auto stuck = llkCheckStack(procp, piddir);
1038 if (llkIsMonitorState(state)) {
1039 if (procp->count >= llkStateTimeoutMs[(state == 'Z') ? llkStateZ : llkStateD]) {
1040 stuck = true;
1041 } else if (procp->count != 0ms) {
1042 LOG(VERBOSE) << state << ' ' << llkFormat(procp->count) << ' ' << ppid << "->"
1043 << pid << "->" << tid << ' ' << procp->getComm();
1044 }
1045 }
1046 if (!stuck) continue;
1047#else
1048 if (procp->count >= llkStateTimeoutMs[(state == 'Z') ? llkStateZ : llkStateD]) {
1049 if (procp->count != 0ms) {
1050 LOG(VERBOSE) << state << ' ' << llkFormat(procp->count) << ' ' << ppid << "->"
1051 << pid << "->" << tid << ' ' << procp->getComm();
1052 }
Mark Salyzynf089e142018-02-20 10:47:40 -08001053 continue;
1054 }
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001055#endif
Mark Salyzynf089e142018-02-20 10:47:40 -08001056
1057 // We have to kill it to determine difference between live lock
1058 // and persistent state blocked on a resource. Is there something
1059 // wrong with a process that has no forward scheduling progress in
1060 // Z or D? Yes, generally means improper accounting in the
1061 // process, but not always ...
1062 //
1063 // Whomever we hit with a test kill must accept the Android
1064 // Aphorism that everything can be burned to the ground and
1065 // must survive.
1066 if (procp->killed == false) {
1067 procp->killed = true;
1068 // confirm: re-read uid before committing to a panic.
1069 procp->uid = -1;
1070 switch (state) {
1071 case 'Z': // kill ppid to free up a Zombie
1072 // Killing init will kernel panic without diagnostics
1073 // so skip right to controlled kernel panic with
1074 // diagnostics.
1075 if (ppid == initPid) {
1076 break;
1077 }
1078 LOG(WARNING) << "Z " << llkFormat(procp->count) << ' ' << ppid << "->"
1079 << pid << "->" << tid << ' ' << procp->getComm() << " [kill]";
1080 if ((llkKillOneProcess(pprocp, procp) >= 0) ||
1081 (llkKillOneProcess(ppid, procp) >= 0)) {
1082 continue;
1083 }
1084 break;
1085
1086 case 'D': // kill tid to free up an uninterruptible D
1087 // If ABA is doing its job, we would not need or
1088 // want the following. Test kill is a Hail Mary
1089 // to make absolutely sure there is no forward
1090 // scheduling progress. The cost when ABA is
1091 // not working is we kill a process that likes to
1092 // stay in 'D' state, instead of panicing the
1093 // kernel (worse).
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001094 default:
1095 LOG(WARNING) << state << ' ' << llkFormat(procp->count) << ' ' << pid
1096 << "->" << tid << ' ' << procp->getComm() << " [kill]";
Mark Salyzynf089e142018-02-20 10:47:40 -08001097 if ((llkKillOneProcess(llkTidLookup(pid), procp) >= 0) ||
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001098 (llkKillOneProcess(pid, state, tid) >= 0) ||
Mark Salyzynf089e142018-02-20 10:47:40 -08001099 (llkKillOneProcess(procp, procp) >= 0) ||
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001100 (llkKillOneProcess(tid, state, tid) >= 0)) {
Mark Salyzynf089e142018-02-20 10:47:40 -08001101 continue;
1102 }
1103 break;
1104 }
1105 }
1106 // We are here because we have confirmed kernel live-lock
Mark Salyzyn3c3b14d2018-10-31 10:38:15 -07001107 const auto message = state + " "s + llkFormat(procp->count) + " " +
1108 std::to_string(ppid) + "->" + std::to_string(pid) + "->" +
1109 std::to_string(tid) + " " + procp->getComm() + " [panic]";
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001110 llkPanicKernel(true, tid,
Mark Salyzyn3c3b14d2018-10-31 10:38:15 -07001111 (state == 'Z') ? "zombie" : (state == 'D') ? "driver" : "sleeping",
1112 message);
Mark Salyzynf089e142018-02-20 10:47:40 -08001113 }
1114 LOG(VERBOSE) << "+closedir()";
1115 }
1116 llkTopDirectory.rewind();
1117 LOG(VERBOSE) << "closedir()";
1118
1119 // garbage collection of old process references
1120 for (auto p = tids.begin(); p != tids.end();) {
1121 if (!p->second.updated) {
1122 IF_ALOG(LOG_VERBOSE, LOG_TAG) {
1123 std::string ppidCmdline = llkProcGetName(p->second.ppid, nullptr, nullptr);
1124 if (ppidCmdline.size()) {
1125 ppidCmdline = "(" + ppidCmdline + ")";
1126 }
1127 std::string pidCmdline;
1128 if (p->second.pid != p->second.tid) {
1129 pidCmdline = llkProcGetName(p->second.pid, nullptr, p->second.getCmdline());
1130 if (pidCmdline.size()) {
1131 pidCmdline = "(" + pidCmdline + ")";
1132 }
1133 }
1134 std::string tidCmdline =
1135 llkProcGetName(p->second.tid, p->second.getComm(), p->second.getCmdline());
1136 if (tidCmdline.size()) {
1137 tidCmdline = "(" + tidCmdline + ")";
1138 }
1139 LOG(VERBOSE) << "thread " << p->second.ppid << ppidCmdline << "->" << p->second.pid
1140 << pidCmdline << "->" << p->second.tid << tidCmdline << " removed";
1141 }
1142 p = tids.erase(p);
1143 } else {
1144 ++p;
1145 }
1146 }
1147 if (__predict_false(tids.empty())) {
1148 llkTopDirectory.reset();
1149 }
1150
1151 llkCycle = llkCheckMs;
1152
1153 timespec end;
1154 ::clock_gettime(CLOCK_MONOTONIC_COARSE, &end);
1155 auto milli = llkGetTimespecDiffMs(&now, &end);
1156 LOG((milli > 10s) ? ERROR : (milli > 1s) ? WARNING : VERBOSE) << "sample " << llkFormat(milli);
1157
1158 // cap to minimum sleep for 1 second since last cycle
1159 if (llkCycle < (ms + 1s)) {
1160 return 1s;
1161 }
1162 return llkCycle - ms;
1163}
1164
1165unsigned llkCheckMilliseconds() {
1166 return duration_cast<milliseconds>(llkCheck()).count();
1167}
1168
Mark Salyzynbd7c8562018-10-31 10:02:08 -07001169bool llkCheckEng(const std::string& property) {
1170 return android::base::GetProperty(property, "eng") == "eng";
1171}
1172
Mark Salyzynf089e142018-02-20 10:47:40 -08001173bool llkInit(const char* threadname) {
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001174 auto debuggable = android::base::GetBoolProperty("ro.debuggable", false);
Mark Salyzynf089e142018-02-20 10:47:40 -08001175 llkLowRam = android::base::GetBoolProperty("ro.config.low_ram", false);
Mark Salyzynbd7c8562018-10-31 10:02:08 -07001176 llkEnableSysrqT &= !llkLowRam;
1177 if (debuggable) {
1178 llkEnableSysrqT |= llkCheckEng(LLK_ENABLE_SYSRQ_T_PROPERTY);
1179 if (!LLK_ENABLE_DEFAULT) { // NB: default is currently true ...
1180 llkEnable |= llkCheckEng(LLK_ENABLE_PROPERTY);
1181 khtEnable |= llkCheckEng(KHT_ENABLE_PROPERTY);
1182 }
Mark Salyzynd035dbb2018-03-26 08:23:00 -07001183 }
Mark Salyzynbd7c8562018-10-31 10:02:08 -07001184 llkEnableSysrqT = android::base::GetBoolProperty(LLK_ENABLE_SYSRQ_T_PROPERTY, llkEnableSysrqT);
Mark Salyzynf089e142018-02-20 10:47:40 -08001185 llkEnable = android::base::GetBoolProperty(LLK_ENABLE_PROPERTY, llkEnable);
1186 if (llkEnable && !llkTopDirectory.reset(procdir)) {
1187 // Most likely reason we could be here is llkd was started
1188 // incorrectly without the readproc permissions. Keep our
1189 // processing down to a minimum.
1190 llkEnable = false;
1191 }
1192 khtEnable = android::base::GetBoolProperty(KHT_ENABLE_PROPERTY, khtEnable);
1193 llkMlockall = android::base::GetBoolProperty(LLK_MLOCKALL_PROPERTY, llkMlockall);
Mark Salyzynafd66f22018-03-19 15:16:29 -07001194 llkTestWithKill = android::base::GetBoolProperty(LLK_KILLTEST_PROPERTY, llkTestWithKill);
Mark Salyzynf089e142018-02-20 10:47:40 -08001195 // if LLK_TIMOUT_MS_PROPERTY was not set, we will use a set
1196 // KHT_TIMEOUT_PROPERTY as co-operative guidance for the default value.
1197 khtTimeout = GetUintProperty(KHT_TIMEOUT_PROPERTY, khtTimeout);
1198 if (khtTimeout == 0s) {
1199 khtTimeout = duration_cast<seconds>(llkTimeoutMs * (1 + LLK_CHECKS_PER_TIMEOUT_DEFAULT) /
1200 LLK_CHECKS_PER_TIMEOUT_DEFAULT);
1201 }
1202 llkTimeoutMs =
1203 khtTimeout * LLK_CHECKS_PER_TIMEOUT_DEFAULT / (1 + LLK_CHECKS_PER_TIMEOUT_DEFAULT);
1204 llkTimeoutMs = GetUintProperty(LLK_TIMEOUT_MS_PROPERTY, llkTimeoutMs);
1205 llkValidate(); // validate llkTimeoutMs, llkCheckMs and llkCycle
1206 llkStateTimeoutMs[llkStateD] = GetUintProperty(LLK_D_TIMEOUT_MS_PROPERTY, llkTimeoutMs);
1207 llkStateTimeoutMs[llkStateZ] = GetUintProperty(LLK_Z_TIMEOUT_MS_PROPERTY, llkTimeoutMs);
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001208#ifdef __PTRACE_ENABLED__
1209 llkStateTimeoutMs[llkStateStack] = GetUintProperty(LLK_STACK_TIMEOUT_MS_PROPERTY, llkTimeoutMs);
1210#endif
Mark Salyzynf089e142018-02-20 10:47:40 -08001211 llkCheckMs = GetUintProperty(LLK_CHECK_MS_PROPERTY, llkCheckMs);
1212 llkValidate(); // validate all (effectively minus llkTimeoutMs)
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001213#ifdef __PTRACE_ENABLED__
1214 if (debuggable) {
1215 llkCheckStackSymbols = llkSplit(
1216 android::base::GetProperty(LLK_CHECK_STACK_PROPERTY, LLK_CHECK_STACK_DEFAULT));
1217 }
1218 std::string defaultBlacklistStack(LLK_BLACKLIST_STACK_DEFAULT);
1219 if (!debuggable) defaultBlacklistStack += ",logd,/system/bin/logd";
1220 llkBlacklistStack = llkSplit(
1221 android::base::GetProperty(LLK_BLACKLIST_STACK_PROPERTY, defaultBlacklistStack));
1222#endif
Mark Salyzynf089e142018-02-20 10:47:40 -08001223 std::string defaultBlacklistProcess(
1224 std::to_string(kernelPid) + "," + std::to_string(initPid) + "," +
1225 std::to_string(kthreaddPid) + "," + std::to_string(::getpid()) + "," +
1226 std::to_string(::gettid()) + "," LLK_BLACKLIST_PROCESS_DEFAULT);
1227 if (threadname) {
Mark Salyzyn52e54a62018-08-07 08:13:13 -07001228 defaultBlacklistProcess += ","s + threadname;
Mark Salyzynf089e142018-02-20 10:47:40 -08001229 }
1230 for (int cpu = 1; cpu < get_nprocs_conf(); ++cpu) {
1231 defaultBlacklistProcess += ",[watchdog/" + std::to_string(cpu) + "]";
1232 }
1233 defaultBlacklistProcess =
1234 android::base::GetProperty(LLK_BLACKLIST_PROCESS_PROPERTY, defaultBlacklistProcess);
1235 llkBlacklistProcess = llkSplit(defaultBlacklistProcess);
1236 if (!llkSkipName("[khungtaskd]")) { // ALWAYS ignore as special
1237 llkBlacklistProcess.emplace("[khungtaskd]");
1238 }
1239 llkBlacklistParent = llkSplit(android::base::GetProperty(
1240 LLK_BLACKLIST_PARENT_PROPERTY, std::to_string(kernelPid) + "," + std::to_string(kthreaddPid) +
1241 "," LLK_BLACKLIST_PARENT_DEFAULT));
1242 llkBlacklistUid =
1243 llkSplit(android::base::GetProperty(LLK_BLACKLIST_UID_PROPERTY, LLK_BLACKLIST_UID_DEFAULT));
1244
1245 // internal watchdog
1246 ::signal(SIGALRM, llkAlarmHandler);
1247
1248 // kernel hung task configuration? Otherwise leave it as-is
1249 if (khtEnable) {
1250 // EUID must be AID_ROOT to write to /proc/sys/kernel/ nodes, there
1251 // are no capability overrides. For security reasons we do not want
1252 // to run as AID_ROOT. We may not be able to write them successfully,
1253 // we will try, but the least we can do is read the values back to
1254 // confirm expectations and report whether configured or not.
1255 auto configured = llkWriteStringToFileConfirm(std::to_string(khtTimeout.count()),
1256 "/proc/sys/kernel/hung_task_timeout_secs");
1257 if (configured) {
1258 llkWriteStringToFile("65535", "/proc/sys/kernel/hung_task_warnings");
1259 llkWriteStringToFile("65535", "/proc/sys/kernel/hung_task_check_count");
1260 configured = llkWriteStringToFileConfirm("1", "/proc/sys/kernel/hung_task_panic");
1261 }
1262 if (configured) {
1263 LOG(INFO) << "[khungtaskd] configured";
1264 } else {
1265 LOG(WARNING) << "[khungtaskd] not configurable";
1266 }
1267 }
1268
1269 bool logConfig = true;
1270 if (llkEnable) {
1271 if (llkMlockall &&
1272 // MCL_ONFAULT pins pages as they fault instead of loading
1273 // everything immediately all at once. (Which would be bad,
1274 // because as of this writing, we have a lot of mapped pages we
1275 // never use.) Old kernels will see MCL_ONFAULT and fail with
1276 // EINVAL; we ignore this failure.
1277 //
1278 // N.B. read the man page for mlockall. MCL_CURRENT | MCL_ONFAULT
1279 // pins ⊆ MCL_CURRENT, converging to just MCL_CURRENT as we fault
1280 // in pages.
1281
1282 // CAP_IPC_LOCK required
1283 mlockall(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT) && (errno != EINVAL)) {
1284 PLOG(WARNING) << "mlockall failed ";
1285 }
1286
1287 if (threadname) {
1288 pthread_attr_t attr;
1289
1290 if (!pthread_attr_init(&attr)) {
1291 sched_param param;
1292
1293 memset(&param, 0, sizeof(param));
1294 pthread_attr_setschedparam(&attr, &param);
1295 pthread_attr_setschedpolicy(&attr, SCHED_BATCH);
1296 if (!pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED)) {
1297 pthread_t thread;
1298 if (!pthread_create(&thread, &attr, llkThread, const_cast<char*>(threadname))) {
1299 // wait a second for thread to start
1300 for (auto retry = 50; retry && !llkRunning; --retry) {
1301 ::usleep(20000);
1302 }
1303 logConfig = !llkRunning; // printed in llkd context?
1304 } else {
1305 LOG(ERROR) << "failed to spawn llkd thread";
1306 }
1307 } else {
1308 LOG(ERROR) << "failed to detach llkd thread";
1309 }
1310 pthread_attr_destroy(&attr);
1311 } else {
1312 LOG(ERROR) << "failed to allocate attibutes for llkd thread";
1313 }
1314 }
1315 } else {
1316 LOG(DEBUG) << "[khungtaskd] left unconfigured";
1317 }
1318 if (logConfig) {
1319 llkLogConfig();
1320 }
1321
1322 return llkEnable;
1323}