blob: 427dacef145de828f50d53f7a4fde8857d26eac8 [file] [log] [blame]
Mark Salyzynf089e142018-02-20 10:47:40 -08001/*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "llkd.h"
18
19#include <ctype.h>
20#include <dirent.h> // opendir() and readdir()
21#include <errno.h>
22#include <fcntl.h>
23#include <pthread.h>
24#include <pwd.h> // getpwuid()
25#include <signal.h>
26#include <stdint.h>
27#include <sys/cdefs.h> // ___STRING, __predict_true() and _predict_false()
28#include <sys/mman.h> // mlockall()
29#include <sys/prctl.h>
30#include <sys/stat.h> // lstat()
31#include <sys/syscall.h> // __NR_getdents64
32#include <sys/sysinfo.h> // get_nprocs_conf()
33#include <sys/types.h>
34#include <time.h>
35#include <unistd.h>
36
37#include <chrono>
38#include <ios>
39#include <sstream>
40#include <string>
41#include <unordered_map>
42#include <unordered_set>
43
44#include <android-base/file.h>
45#include <android-base/logging.h>
46#include <android-base/parseint.h>
47#include <android-base/properties.h>
48#include <android-base/strings.h>
49#include <cutils/android_get_control_file.h>
50#include <log/log_main.h>
51
52#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
53
54#define TASK_COMM_LEN 16 // internal kernel, not uapi, from .../linux/include/linux/sched.h
55
56using namespace std::chrono_literals;
57using namespace std::chrono;
Mark Salyzyn52e54a62018-08-07 08:13:13 -070058using namespace std::literals;
Mark Salyzynf089e142018-02-20 10:47:40 -080059
60namespace {
61
62constexpr pid_t kernelPid = 0;
63constexpr pid_t initPid = 1;
64constexpr pid_t kthreaddPid = 2;
65
66constexpr char procdir[] = "/proc/";
67
68// Configuration
69milliseconds llkUpdate; // last check ms signature
70milliseconds llkCycle; // ms to next thread check
71bool llkEnable = LLK_ENABLE_DEFAULT; // llk daemon enabled
72bool llkRunning = false; // thread is running
73bool llkMlockall = LLK_MLOCKALL_DEFAULT; // run mlocked
Mark Salyzynafd66f22018-03-19 15:16:29 -070074bool llkTestWithKill = LLK_KILLTEST_DEFAULT; // issue test kills
Mark Salyzynf089e142018-02-20 10:47:40 -080075milliseconds llkTimeoutMs = LLK_TIMEOUT_MS_DEFAULT; // default timeout
Mark Salyzyn96505fa2018-08-07 08:13:13 -070076enum { // enum of state indexes
77 llkStateD, // Persistent 'D' state
78 llkStateZ, // Persistent 'Z' state
79#ifdef __PTRACE_ENABLED__ // Extra privileged states
80 llkStateStack, // stack signature
81#endif // End of extra privilege
82 llkNumStates, // Maxumum number of states
83}; // state indexes
Mark Salyzynf089e142018-02-20 10:47:40 -080084milliseconds llkStateTimeoutMs[llkNumStates]; // timeout override for each detection state
85milliseconds llkCheckMs; // checking interval to inspect any
86 // persistent live-locked states
87bool llkLowRam; // ro.config.low_ram
Mark Salyzynbd7c8562018-10-31 10:02:08 -070088bool llkEnableSysrqT = LLK_ENABLE_SYSRQ_T_DEFAULT; // sysrq stack trace dump
Mark Salyzynf089e142018-02-20 10:47:40 -080089bool khtEnable = LLK_ENABLE_DEFAULT; // [khungtaskd] panic
90// [khungtaskd] should have a timeout beyond the granularity of llkTimeoutMs.
91// Provides a wide angle of margin b/c khtTimeout is also its granularity.
92seconds khtTimeout = duration_cast<seconds>(llkTimeoutMs * (1 + LLK_CHECKS_PER_TIMEOUT_DEFAULT) /
93 LLK_CHECKS_PER_TIMEOUT_DEFAULT);
Mark Salyzyn96505fa2018-08-07 08:13:13 -070094#ifdef __PTRACE_ENABLED__
95// list of stack symbols to search for persistence.
96std::unordered_set<std::string> llkCheckStackSymbols;
97#endif
Mark Salyzynf089e142018-02-20 10:47:40 -080098
99// Blacklist variables, initialized with comma separated lists of high false
100// positive and/or dangerous references, e.g. without self restart, for pid,
101// ppid, name and uid:
102
103// list of pids, or tids or names to skip. kernel pid (0), init pid (1),
104// [kthreadd] pid (2), ourselves, "init", "[kthreadd]", "lmkd", "llkd" or
105// combinations of watchdogd in kernel and user space.
106std::unordered_set<std::string> llkBlacklistProcess;
107// list of parent pids, comm or cmdline names to skip. default:
108// kernel pid (0), [kthreadd] (2), or ourselves, enforced and implied
109std::unordered_set<std::string> llkBlacklistParent;
110// list of uids, and uid names, to skip, default nothing
111std::unordered_set<std::string> llkBlacklistUid;
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700112#ifdef __PTRACE_ENABLED__
113// list of names to skip stack checking. "init", "lmkd", "llkd", "keystore" or
114// "logd" (if not userdebug).
115std::unordered_set<std::string> llkBlacklistStack;
116#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800117
118class dir {
119 public:
120 enum level { proc, task, numLevels };
121
122 private:
123 int fd;
124 size_t available_bytes;
125 dirent* next;
126 // each directory level picked to be just north of 4K in size
127 static constexpr size_t buffEntries = 15;
128 static dirent buff[numLevels][buffEntries];
129
130 bool fill(enum level index) {
131 if (index >= numLevels) return false;
132 if (available_bytes != 0) return true;
133 if (__predict_false(fd < 0)) return false;
134 // getdents64 has no libc wrapper
135 auto rc = TEMP_FAILURE_RETRY(syscall(__NR_getdents64, fd, buff[index], sizeof(buff[0]), 0));
136 if (rc <= 0) return false;
137 available_bytes = rc;
138 next = buff[index];
139 return true;
140 }
141
142 public:
143 dir() : fd(-1), available_bytes(0), next(nullptr) {}
144
145 explicit dir(const char* directory)
146 : fd(__predict_true(directory != nullptr)
147 ? ::open(directory, O_CLOEXEC | O_DIRECTORY | O_RDONLY)
148 : -1),
149 available_bytes(0),
150 next(nullptr) {}
151
152 explicit dir(const std::string&& directory)
153 : fd(::open(directory.c_str(), O_CLOEXEC | O_DIRECTORY | O_RDONLY)),
154 available_bytes(0),
155 next(nullptr) {}
156
157 explicit dir(const std::string& directory)
158 : fd(::open(directory.c_str(), O_CLOEXEC | O_DIRECTORY | O_RDONLY)),
159 available_bytes(0),
160 next(nullptr) {}
161
162 // Don't need any copy or move constructors.
163 explicit dir(const dir& c) = delete;
164 explicit dir(dir& c) = delete;
165 explicit dir(dir&& c) = delete;
166
167 ~dir() {
168 if (fd >= 0) {
169 ::close(fd);
170 }
171 }
172
173 operator bool() const { return fd >= 0; }
174
175 void reset(void) {
176 if (fd >= 0) {
177 ::close(fd);
178 fd = -1;
179 available_bytes = 0;
180 next = nullptr;
181 }
182 }
183
184 dir& reset(const char* directory) {
185 reset();
186 // available_bytes will _always_ be zero here as its value is
187 // intimately tied to fd < 0 or not.
188 fd = ::open(directory, O_CLOEXEC | O_DIRECTORY | O_RDONLY);
189 return *this;
190 }
191
192 void rewind(void) {
193 if (fd >= 0) {
194 ::lseek(fd, off_t(0), SEEK_SET);
195 available_bytes = 0;
196 next = nullptr;
197 }
198 }
199
200 dirent* read(enum level index = proc, dirent* def = nullptr) {
201 if (!fill(index)) return def;
202 auto ret = next;
203 available_bytes -= next->d_reclen;
204 next = reinterpret_cast<dirent*>(reinterpret_cast<char*>(next) + next->d_reclen);
205 return ret;
206 }
207} llkTopDirectory;
208
209dirent dir::buff[dir::numLevels][dir::buffEntries];
210
211// helper functions
212
213bool llkIsMissingExeLink(pid_t tid) {
214 char c;
215 // CAP_SYS_PTRACE is required to prevent ret == -1, but ENOENT is signal
216 auto ret = ::readlink((procdir + std::to_string(tid) + "/exe").c_str(), &c, sizeof(c));
217 return (ret == -1) && (errno == ENOENT);
218}
219
220// Common routine where caller accepts empty content as error/passthrough.
221// Reduces the churn of reporting read errors in the callers.
222std::string ReadFile(std::string&& path) {
223 std::string content;
224 if (!android::base::ReadFileToString(path, &content)) {
225 PLOG(DEBUG) << "Read " << path << " failed";
226 content = "";
227 }
228 return content;
229}
230
231std::string llkProcGetName(pid_t tid, const char* node = "/cmdline") {
232 std::string content = ReadFile(procdir + std::to_string(tid) + node);
233 static constexpr char needles[] = " \t\r\n"; // including trailing nul
234 auto pos = content.find_first_of(needles, 0, sizeof(needles));
235 if (pos != std::string::npos) {
236 content.erase(pos);
237 }
238 return content;
239}
240
241uid_t llkProcGetUid(pid_t tid) {
242 // Get the process' uid. The following read from /status is admittedly
243 // racy, prone to corruption due to shape-changes. The consequences are
244 // not catastrophic as we sample a few times before taking action.
245 //
246 // If /loginuid worked on reliably, or on Android (all tasks report -1)...
247 // Android lmkd causes /cgroup to contain memory:/<dom>/uid_<uid>/pid_<pid>
248 // which is tighter, but also not reliable.
249 std::string content = ReadFile(procdir + std::to_string(tid) + "/status");
250 static constexpr char Uid[] = "\nUid:";
251 auto pos = content.find(Uid);
252 if (pos == std::string::npos) {
253 return -1;
254 }
255 pos += ::strlen(Uid);
256 while ((pos < content.size()) && ::isblank(content[pos])) {
257 ++pos;
258 }
259 content.erase(0, pos);
260 for (pos = 0; (pos < content.size()) && ::isdigit(content[pos]); ++pos) {
261 ;
262 }
263 // Content of form 'Uid: 0 0 0 0', newline is error
264 if ((pos >= content.size()) || !::isblank(content[pos])) {
265 return -1;
266 }
267 content.erase(pos);
268 uid_t ret;
Tom Cherrye0bc5a92018-10-05 14:29:47 -0700269 if (!android::base::ParseUint(content, &ret, uid_t(0))) {
Mark Salyzynf089e142018-02-20 10:47:40 -0800270 return -1;
271 }
272 return ret;
273}
274
275struct proc {
276 pid_t tid; // monitored thread id (in Z or D state).
277 nanoseconds schedUpdate; // /proc/<tid>/sched "se.avg.lastUpdateTime",
278 uint64_t nrSwitches; // /proc/<tid>/sched "nr_switches" for
279 // refined ABA problem detection, determine
280 // forward scheduling progress.
281 milliseconds update; // llkUpdate millisecond signature of last.
282 milliseconds count; // duration in state.
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700283#ifdef __PTRACE_ENABLED__ // Privileged state checking
284 milliseconds count_stack; // duration where stack is stagnant.
285#endif // End privilege
Mark Salyzynf089e142018-02-20 10:47:40 -0800286 pid_t pid; // /proc/<pid> before iterating through
287 // /proc/<pid>/task/<tid> for threads.
288 pid_t ppid; // /proc/<tid>/stat field 4 parent pid.
289 uid_t uid; // /proc/<tid>/status Uid: field.
290 unsigned time; // sum of /proc/<tid>/stat field 14 utime &
291 // 15 stime for coarse ABA problem detection.
292 std::string cmdline; // cached /cmdline content
293 char state; // /proc/<tid>/stat field 3: Z or D
294 // (others we do not monitor: S, R, T or ?)
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700295#ifdef __PTRACE_ENABLED__ // Privileged state checking
296 char stack; // index in llkCheckStackSymbols for matches
297#endif // and with maximum index PROP_VALUE_MAX/2.
Mark Salyzynf089e142018-02-20 10:47:40 -0800298 char comm[TASK_COMM_LEN + 3]; // space for adding '[' and ']'
299 bool exeMissingValid; // exeMissing has been cached
300 bool cmdlineValid; // cmdline has been cached
301 bool updated; // cleared before monitoring pass.
302 bool killed; // sent a kill to this thread, next panic...
303
304 void setComm(const char* _comm) { strncpy(comm + 1, _comm, sizeof(comm) - 2); }
305
306 proc(pid_t tid, pid_t pid, pid_t ppid, const char* _comm, int time, char state)
307 : tid(tid),
308 schedUpdate(0),
309 nrSwitches(0),
310 update(llkUpdate),
Mark Salyzynacecaf72018-08-10 08:15:57 -0700311 count(0ms),
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700312#ifdef __PTRACE_ENABLED__
313 count_stack(0ms),
314#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800315 pid(pid),
316 ppid(ppid),
317 uid(-1),
318 time(time),
319 state(state),
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700320#ifdef __PTRACE_ENABLED__
321 stack(-1),
322#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800323 exeMissingValid(false),
324 cmdlineValid(false),
325 updated(true),
Mark Salyzynafd66f22018-03-19 15:16:29 -0700326 killed(!llkTestWithKill) {
Mark Salyzynf089e142018-02-20 10:47:40 -0800327 memset(comm, '\0', sizeof(comm));
328 setComm(_comm);
329 }
330
331 const char* getComm(void) {
332 if (comm[1] == '\0') { // comm Valid?
333 strncpy(comm + 1, llkProcGetName(tid, "/comm").c_str(), sizeof(comm) - 2);
334 }
335 if (!exeMissingValid) {
336 if (llkIsMissingExeLink(tid)) {
337 comm[0] = '[';
338 }
339 exeMissingValid = true;
340 }
341 size_t len = strlen(comm + 1);
342 if (__predict_true(len < (sizeof(comm) - 1))) {
343 if (comm[0] == '[') {
344 if ((comm[len] != ']') && __predict_true(len < (sizeof(comm) - 2))) {
345 comm[++len] = ']';
346 comm[++len] = '\0';
347 }
348 } else {
349 if (comm[len] == ']') {
350 comm[len] = '\0';
351 }
352 }
353 }
354 return &comm[comm[0] != '['];
355 }
356
357 const char* getCmdline(void) {
358 if (!cmdlineValid) {
359 cmdline = llkProcGetName(tid);
360 cmdlineValid = true;
361 }
362 return cmdline.c_str();
363 }
364
365 uid_t getUid(void) {
366 if (uid <= 0) { // Churn on root user, because most likely to setuid()
367 uid = llkProcGetUid(tid);
368 }
369 return uid;
370 }
371
372 void reset(void) { // reset cache, if we detected pid rollover
373 uid = -1;
374 state = '?';
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700375#ifdef __PTRACE_ENABLED__
376 count_stack = 0ms;
377 stack = -1;
378#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800379 cmdline = "";
380 comm[0] = '\0';
381 exeMissingValid = false;
382 cmdlineValid = false;
383 }
384};
385
386std::unordered_map<pid_t, proc> tids;
387
388// Check range and setup defaults, in order of propagation:
389// llkTimeoutMs
390// llkCheckMs
391// ...
392// KISS to keep it all self-contained, and called multiple times as parameters
393// are interpreted so that defaults, llkCheckMs and llkCycle make sense.
394void llkValidate() {
395 if (llkTimeoutMs == 0ms) {
396 llkTimeoutMs = LLK_TIMEOUT_MS_DEFAULT;
397 }
398 llkTimeoutMs = std::max(llkTimeoutMs, LLK_TIMEOUT_MS_MINIMUM);
399 if (llkCheckMs == 0ms) {
400 llkCheckMs = llkTimeoutMs / LLK_CHECKS_PER_TIMEOUT_DEFAULT;
401 }
402 llkCheckMs = std::min(llkCheckMs, llkTimeoutMs);
403
404 for (size_t state = 0; state < ARRAY_SIZE(llkStateTimeoutMs); ++state) {
405 if (llkStateTimeoutMs[state] == 0ms) {
406 llkStateTimeoutMs[state] = llkTimeoutMs;
407 }
408 llkStateTimeoutMs[state] =
409 std::min(std::max(llkStateTimeoutMs[state], LLK_TIMEOUT_MS_MINIMUM), llkTimeoutMs);
410 llkCheckMs = std::min(llkCheckMs, llkStateTimeoutMs[state]);
411 }
412
413 llkCheckMs = std::max(llkCheckMs, LLK_CHECK_MS_MINIMUM);
414 if (llkCycle == 0ms) {
415 llkCycle = llkCheckMs;
416 }
417 llkCycle = std::min(llkCycle, llkCheckMs);
418}
419
420milliseconds llkGetTimespecDiffMs(timespec* from, timespec* to) {
421 return duration_cast<milliseconds>(seconds(to->tv_sec - from->tv_sec)) +
422 duration_cast<milliseconds>(nanoseconds(to->tv_nsec - from->tv_nsec));
423}
424
425std::string llkProcGetName(pid_t tid, const char* comm, const char* cmdline) {
426 if ((cmdline != nullptr) && (*cmdline != '\0')) {
427 return cmdline;
428 }
429 if ((comm != nullptr) && (*comm != '\0')) {
430 return comm;
431 }
432
433 // UNLIKELY! Here because killed before we kill it?
434 // Assume change is afoot, do not call llkTidAlloc
435
436 // cmdline ?
437 std::string content = llkProcGetName(tid);
438 if (content.size() != 0) {
439 return content;
440 }
441 // Comm instead?
442 content = llkProcGetName(tid, "/comm");
443 if (llkIsMissingExeLink(tid) && (content.size() != 0)) {
444 return '[' + content + ']';
445 }
446 return content;
447}
448
449int llkKillOneProcess(pid_t pid, char state, pid_t tid, const char* tcomm = nullptr,
450 const char* tcmdline = nullptr, const char* pcomm = nullptr,
451 const char* pcmdline = nullptr) {
452 std::string forTid;
453 if (tid != pid) {
454 forTid = " for '" + llkProcGetName(tid, tcomm, tcmdline) + "' (" + std::to_string(tid) + ")";
455 }
456 LOG(INFO) << "Killing '" << llkProcGetName(pid, pcomm, pcmdline) << "' (" << pid
457 << ") to check forward scheduling progress in " << state << " state" << forTid;
458 // CAP_KILL required
459 errno = 0;
460 auto r = ::kill(pid, SIGKILL);
461 if (r) {
462 PLOG(ERROR) << "kill(" << pid << ")=" << r << ' ';
463 }
464
465 return r;
466}
467
468// Kill one process
469int llkKillOneProcess(pid_t pid, proc* tprocp) {
470 return llkKillOneProcess(pid, tprocp->state, tprocp->tid, tprocp->getComm(),
471 tprocp->getCmdline());
472}
473
474// Kill one process specified by kprocp
475int llkKillOneProcess(proc* kprocp, proc* tprocp) {
476 if (kprocp == nullptr) {
477 return -2;
478 }
479
480 return llkKillOneProcess(kprocp->tid, tprocp->state, tprocp->tid, tprocp->getComm(),
481 tprocp->getCmdline(), kprocp->getComm(), kprocp->getCmdline());
482}
483
484// Acquire file descriptor from environment, or open and cache it.
485// NB: cache is unnecessary in our current context, pedantically
486// required to prevent leakage of file descriptors in the future.
487int llkFileToWriteFd(const std::string& file) {
488 static std::unordered_map<std::string, int> cache;
489 auto search = cache.find(file);
490 if (search != cache.end()) return search->second;
491 auto fd = android_get_control_file(file.c_str());
492 if (fd >= 0) return fd;
493 fd = TEMP_FAILURE_RETRY(::open(file.c_str(), O_WRONLY | O_CLOEXEC));
494 if (fd >= 0) cache.emplace(std::make_pair(file, fd));
495 return fd;
496}
497
498// Wrap android::base::WriteStringToFile to use android_get_control_file.
499bool llkWriteStringToFile(const std::string& string, const std::string& file) {
500 auto fd = llkFileToWriteFd(file);
501 if (fd < 0) return false;
502 return android::base::WriteStringToFd(string, fd);
503}
504
505bool llkWriteStringToFileConfirm(const std::string& string, const std::string& file) {
506 auto fd = llkFileToWriteFd(file);
507 auto ret = (fd < 0) ? false : android::base::WriteStringToFd(string, fd);
508 std::string content;
509 if (!android::base::ReadFileToString(file, &content)) return ret;
510 return android::base::Trim(content) == string;
511}
512
Mark Salyzyn3c3b14d2018-10-31 10:38:15 -0700513void llkPanicKernel(bool dump, pid_t tid, const char* state,
514 const std::string& message = "") __noreturn;
515void llkPanicKernel(bool dump, pid_t tid, const char* state, const std::string& message) {
516 if (!message.empty()) LOG(ERROR) << message;
Mark Salyzynf089e142018-02-20 10:47:40 -0800517 auto sysrqTriggerFd = llkFileToWriteFd("/proc/sysrq-trigger");
518 if (sysrqTriggerFd < 0) {
519 // DYB
520 llkKillOneProcess(initPid, 'R', tid);
521 // The answer to life, the universe and everything
522 ::exit(42);
523 // NOTREACHED
524 }
525 ::sync();
526 if (dump) {
527 // Show all locks that are held
528 android::base::WriteStringToFd("d", sysrqTriggerFd);
Mark Salyzyn53e782d2018-10-31 16:03:45 -0700529 // Show all waiting tasks
530 android::base::WriteStringToFd("w", sysrqTriggerFd);
Mark Salyzynf089e142018-02-20 10:47:40 -0800531 // This can trigger hardware watchdog, that is somewhat _ok_.
532 // But useless if pstore configured for <256KB, low ram devices ...
Mark Salyzynbd7c8562018-10-31 10:02:08 -0700533 if (llkEnableSysrqT) {
Mark Salyzynf089e142018-02-20 10:47:40 -0800534 android::base::WriteStringToFd("t", sysrqTriggerFd);
Mark Salyzyn53e782d2018-10-31 16:03:45 -0700535 // Show all locks that are held (in case 't' overflows ramoops)
536 android::base::WriteStringToFd("d", sysrqTriggerFd);
537 // Show all waiting tasks (in case 't' overflows ramoops)
538 android::base::WriteStringToFd("w", sysrqTriggerFd);
Mark Salyzynf089e142018-02-20 10:47:40 -0800539 }
540 ::usleep(200000); // let everything settle
541 }
Mark Salyzyn3c3b14d2018-10-31 10:38:15 -0700542 // SysRq message matches kernel format, and propagates through bootstat
543 // ultimately to the boot reason into panic,livelock,<state>.
544 llkWriteStringToFile(message + (message.empty() ? "" : "\n") +
545 "SysRq : Trigger a crash : 'livelock,"s + state + "'\n",
546 "/dev/kmsg");
Mark Salyzynf089e142018-02-20 10:47:40 -0800547 android::base::WriteStringToFd("c", sysrqTriggerFd);
548 // NOTREACHED
549 // DYB
550 llkKillOneProcess(initPid, 'R', tid);
551 // I sat at my desk, stared into the garden and thought '42 will do'.
552 // I typed it out. End of story
553 ::exit(42);
554 // NOTREACHED
555}
556
557void llkAlarmHandler(int) {
Mark Salyzynb3418a22018-11-19 15:24:03 -0800558 LOG(FATAL) << "alarm";
559 // NOTREACHED
560 llkPanicKernel(true, ::getpid(), "alarm");
Mark Salyzynf089e142018-02-20 10:47:40 -0800561}
562
563milliseconds GetUintProperty(const std::string& key, milliseconds def) {
564 return milliseconds(android::base::GetUintProperty(key, static_cast<uint64_t>(def.count()),
565 static_cast<uint64_t>(def.max().count())));
566}
567
568seconds GetUintProperty(const std::string& key, seconds def) {
569 return seconds(android::base::GetUintProperty(key, static_cast<uint64_t>(def.count()),
570 static_cast<uint64_t>(def.max().count())));
571}
572
573proc* llkTidLookup(pid_t tid) {
574 auto search = tids.find(tid);
575 if (search == tids.end()) {
576 return nullptr;
577 }
578 return &search->second;
579}
580
581void llkTidRemove(pid_t tid) {
582 tids.erase(tid);
583}
584
585proc* llkTidAlloc(pid_t tid, pid_t pid, pid_t ppid, const char* comm, int time, char state) {
586 auto it = tids.emplace(std::make_pair(tid, proc(tid, pid, ppid, comm, time, state)));
587 return &it.first->second;
588}
589
590std::string llkFormat(milliseconds ms) {
591 auto sec = duration_cast<seconds>(ms);
592 std::ostringstream s;
593 s << sec.count() << '.';
594 auto f = s.fill('0');
595 auto w = s.width(3);
596 s << std::right << (ms - sec).count();
597 s.width(w);
598 s.fill(f);
599 s << 's';
600 return s.str();
601}
602
603std::string llkFormat(seconds s) {
604 return std::to_string(s.count()) + 's';
605}
606
607std::string llkFormat(bool flag) {
608 return flag ? "true" : "false";
609}
610
611std::string llkFormat(const std::unordered_set<std::string>& blacklist) {
612 std::string ret;
613 for (auto entry : blacklist) {
614 if (ret.size()) {
615 ret += ",";
616 }
617 ret += entry;
618 }
619 return ret;
620}
621
622// We only officially support comma separators, but wetware being what they
623// are will take some liberty and I do not believe they should be punished.
Mark Salyzynacecaf72018-08-10 08:15:57 -0700624std::unordered_set<std::string> llkSplit(const std::string& s) {
Mark Salyzynf089e142018-02-20 10:47:40 -0800625 std::unordered_set<std::string> result;
626
Mark Salyzynacecaf72018-08-10 08:15:57 -0700627 // Special case, allow boolean false to empty the list, otherwise expected
628 // source of input from android::base::GetProperty will supply the default
629 // value on empty content in the property.
630 if (s == "false") return result;
631
Mark Salyzynf089e142018-02-20 10:47:40 -0800632 size_t base = 0;
Mark Salyzynacecaf72018-08-10 08:15:57 -0700633 while (s.size() > base) {
634 auto found = s.find_first_of(", \t:", base);
635 // Only emplace content, empty entries are not an option
636 if (found != base) result.emplace(s.substr(base, found - base));
Mark Salyzynf089e142018-02-20 10:47:40 -0800637 if (found == s.npos) break;
638 base = found + 1;
639 }
640 return result;
641}
642
643bool llkSkipName(const std::string& name,
644 const std::unordered_set<std::string>& blacklist = llkBlacklistProcess) {
645 if ((name.size() == 0) || (blacklist.size() == 0)) {
646 return false;
647 }
648
649 return blacklist.find(name) != blacklist.end();
650}
651
652bool llkSkipPid(pid_t pid) {
653 return llkSkipName(std::to_string(pid), llkBlacklistProcess);
654}
655
656bool llkSkipPpid(pid_t ppid) {
657 return llkSkipName(std::to_string(ppid), llkBlacklistParent);
658}
659
660bool llkSkipUid(uid_t uid) {
661 // Match by number?
662 if (llkSkipName(std::to_string(uid), llkBlacklistUid)) {
663 return true;
664 }
665
666 // Match by name?
667 auto pwd = ::getpwuid(uid);
668 return (pwd != nullptr) && __predict_true(pwd->pw_name != nullptr) &&
669 __predict_true(pwd->pw_name[0] != '\0') && llkSkipName(pwd->pw_name, llkBlacklistUid);
670}
671
672bool getValidTidDir(dirent* dp, std::string* piddir) {
673 if (!::isdigit(dp->d_name[0])) {
674 return false;
675 }
676
677 // Corner case can not happen in reality b/c of above ::isdigit check
678 if (__predict_false(dp->d_type != DT_DIR)) {
679 if (__predict_false(dp->d_type == DT_UNKNOWN)) { // can't b/c procfs
680 struct stat st;
681 *piddir = procdir;
682 *piddir += dp->d_name;
683 return (lstat(piddir->c_str(), &st) == 0) && (st.st_mode & S_IFDIR);
684 }
685 return false;
686 }
687
688 *piddir = procdir;
689 *piddir += dp->d_name;
690 return true;
691}
692
693bool llkIsMonitorState(char state) {
694 return (state == 'Z') || (state == 'D');
695}
696
697// returns -1 if not found
698long long getSchedValue(const std::string& schedString, const char* key) {
699 auto pos = schedString.find(key);
700 if (pos == std::string::npos) {
701 return -1;
702 }
703 pos = schedString.find(':', pos);
704 if (__predict_false(pos == std::string::npos)) {
705 return -1;
706 }
707 while ((++pos < schedString.size()) && ::isblank(schedString[pos])) {
708 ;
709 }
710 long long ret;
711 if (!android::base::ParseInt(schedString.substr(pos), &ret, static_cast<long long>(0))) {
712 return -1;
713 }
714 return ret;
715}
716
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700717#ifdef __PTRACE_ENABLED__
718bool llkCheckStack(proc* procp, const std::string& piddir) {
719 if (llkCheckStackSymbols.empty()) return false;
720 if (procp->state == 'Z') { // No brains for Zombies
721 procp->stack = -1;
722 procp->count_stack = 0ms;
723 return false;
724 }
725
726 // Don't check process that are known to block ptrace, save sepolicy noise.
727 if (llkSkipName(std::to_string(procp->pid), llkBlacklistStack)) return false;
728 if (llkSkipName(procp->getComm(), llkBlacklistStack)) return false;
729 if (llkSkipName(procp->getCmdline(), llkBlacklistStack)) return false;
Mark Salyzyne81ede82018-10-22 15:52:32 -0700730 if (llkSkipName(android::base::Basename(procp->getCmdline()), llkBlacklistStack)) return false;
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700731
732 auto kernel_stack = ReadFile(piddir + "/stack");
733 if (kernel_stack.empty()) {
734 LOG(INFO) << piddir << "/stack empty comm=" << procp->getComm()
735 << " cmdline=" << procp->getCmdline();
736 return false;
737 }
738 // A scheduling incident that should not reset count_stack
739 if (kernel_stack.find(" cpu_worker_pools+0x") != std::string::npos) return false;
740 char idx = -1;
741 char match = -1;
742 for (const auto& stack : llkCheckStackSymbols) {
743 if (++idx < 0) break;
Mark Salyzynbb1256a2018-10-18 14:39:27 -0700744 if ((kernel_stack.find(" "s + stack + "+0x") != std::string::npos) ||
745 (kernel_stack.find(" "s + stack + ".cfi+0x") != std::string::npos)) {
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700746 match = idx;
747 break;
748 }
749 }
750 if (procp->stack != match) {
751 procp->stack = match;
752 procp->count_stack = 0ms;
753 return false;
754 }
755 if (match == char(-1)) return false;
756 procp->count_stack += llkCycle;
757 return procp->count_stack >= llkStateTimeoutMs[llkStateStack];
758}
759#endif
760
Mark Salyzynf089e142018-02-20 10:47:40 -0800761// Primary ABA mitigation watching last time schedule activity happened
762void llkCheckSchedUpdate(proc* procp, const std::string& piddir) {
763 // Audit finds /proc/<tid>/sched is just over 1K, and
764 // is rarely larger than 2K, even less on Android.
765 // For example, the "se.avg.lastUpdateTime" field we are
766 // interested in typically within the primary set in
767 // the first 1K.
768 //
769 // Proc entries can not be read >1K atomically via libbase,
770 // but if there are problems we assume at least a few
771 // samples of reads occur before we take any real action.
772 std::string schedString = ReadFile(piddir + "/sched");
773 if (schedString.size() == 0) {
774 // /schedstat is not as standardized, but in 3.1+
775 // Android devices, the third field is nr_switches
776 // from /sched:
777 schedString = ReadFile(piddir + "/schedstat");
778 if (schedString.size() == 0) {
779 return;
780 }
781 auto val = static_cast<unsigned long long>(-1);
782 if (((::sscanf(schedString.c_str(), "%*d %*d %llu", &val)) == 1) &&
783 (val != static_cast<unsigned long long>(-1)) && (val != 0) &&
784 (val != procp->nrSwitches)) {
785 procp->nrSwitches = val;
786 procp->count = 0ms;
Mark Salyzynafd66f22018-03-19 15:16:29 -0700787 procp->killed = !llkTestWithKill;
Mark Salyzynf089e142018-02-20 10:47:40 -0800788 }
789 return;
790 }
791
792 auto val = getSchedValue(schedString, "\nse.avg.lastUpdateTime");
793 if (val == -1) {
794 val = getSchedValue(schedString, "\nse.svg.last_update_time");
795 }
796 if (val != -1) {
797 auto schedUpdate = nanoseconds(val);
798 if (schedUpdate != procp->schedUpdate) {
799 procp->schedUpdate = schedUpdate;
800 procp->count = 0ms;
Mark Salyzynafd66f22018-03-19 15:16:29 -0700801 procp->killed = !llkTestWithKill;
Mark Salyzynf089e142018-02-20 10:47:40 -0800802 }
803 }
804
805 val = getSchedValue(schedString, "\nnr_switches");
806 if (val != -1) {
807 if (static_cast<uint64_t>(val) != procp->nrSwitches) {
808 procp->nrSwitches = val;
809 procp->count = 0ms;
Mark Salyzynafd66f22018-03-19 15:16:29 -0700810 procp->killed = !llkTestWithKill;
Mark Salyzynf089e142018-02-20 10:47:40 -0800811 }
812 }
813}
814
815void llkLogConfig(void) {
816 LOG(INFO) << "ro.config.low_ram=" << llkFormat(llkLowRam) << "\n"
Mark Salyzynbd7c8562018-10-31 10:02:08 -0700817 << LLK_ENABLE_SYSRQ_T_PROPERTY "=" << llkFormat(llkEnableSysrqT) << "\n"
Mark Salyzynf089e142018-02-20 10:47:40 -0800818 << LLK_ENABLE_PROPERTY "=" << llkFormat(llkEnable) << "\n"
819 << KHT_ENABLE_PROPERTY "=" << llkFormat(khtEnable) << "\n"
820 << LLK_MLOCKALL_PROPERTY "=" << llkFormat(llkMlockall) << "\n"
Mark Salyzynafd66f22018-03-19 15:16:29 -0700821 << LLK_KILLTEST_PROPERTY "=" << llkFormat(llkTestWithKill) << "\n"
Mark Salyzynf089e142018-02-20 10:47:40 -0800822 << KHT_TIMEOUT_PROPERTY "=" << llkFormat(khtTimeout) << "\n"
823 << LLK_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkTimeoutMs) << "\n"
824 << LLK_D_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkStateTimeoutMs[llkStateD]) << "\n"
825 << LLK_Z_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkStateTimeoutMs[llkStateZ]) << "\n"
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700826#ifdef __PTRACE_ENABLED__
827 << LLK_STACK_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkStateTimeoutMs[llkStateStack])
828 << "\n"
829#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800830 << LLK_CHECK_MS_PROPERTY "=" << llkFormat(llkCheckMs) << "\n"
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700831#ifdef __PTRACE_ENABLED__
832 << LLK_CHECK_STACK_PROPERTY "=" << llkFormat(llkCheckStackSymbols) << "\n"
833 << LLK_BLACKLIST_STACK_PROPERTY "=" << llkFormat(llkBlacklistStack) << "\n"
834#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800835 << LLK_BLACKLIST_PROCESS_PROPERTY "=" << llkFormat(llkBlacklistProcess) << "\n"
836 << LLK_BLACKLIST_PARENT_PROPERTY "=" << llkFormat(llkBlacklistParent) << "\n"
837 << LLK_BLACKLIST_UID_PROPERTY "=" << llkFormat(llkBlacklistUid);
838}
839
840void* llkThread(void* obj) {
Mark Salyzyn4832a8b2018-08-15 11:02:18 -0700841 prctl(PR_SET_DUMPABLE, 0);
842
Mark Salyzynf089e142018-02-20 10:47:40 -0800843 LOG(INFO) << "started";
844
845 std::string name = std::to_string(::gettid());
846 if (!llkSkipName(name)) {
847 llkBlacklistProcess.emplace(name);
848 }
849 name = static_cast<const char*>(obj);
850 prctl(PR_SET_NAME, name.c_str());
851 if (__predict_false(!llkSkipName(name))) {
852 llkBlacklistProcess.insert(name);
853 }
854 // No longer modifying llkBlacklistProcess.
855 llkRunning = true;
856 llkLogConfig();
857 while (llkRunning) {
858 ::usleep(duration_cast<microseconds>(llkCheck(true)).count());
859 }
860 // NOTREACHED
861 LOG(INFO) << "exiting";
862 return nullptr;
863}
864
865} // namespace
866
867milliseconds llkCheck(bool checkRunning) {
868 if (!llkEnable || (checkRunning != llkRunning)) {
869 return milliseconds::max();
870 }
871
872 // Reset internal watchdog, which is a healthy engineering margin of
873 // double the maximum wait or cycle time for the mainloop that calls us.
874 //
875 // This alarm is effectively the live lock detection of llkd, as
876 // we understandably can not monitor ourselves otherwise.
877 ::alarm(duration_cast<seconds>(llkTimeoutMs * 2).count());
878
879 // kernel jiffy precision fastest acquisition
880 static timespec last;
881 timespec now;
882 ::clock_gettime(CLOCK_MONOTONIC_COARSE, &now);
883 auto ms = llkGetTimespecDiffMs(&last, &now);
884 if (ms < llkCycle) {
885 return llkCycle - ms;
886 }
887 last = now;
888
889 LOG(VERBOSE) << "opendir(\"" << procdir << "\")";
890 if (__predict_false(!llkTopDirectory)) {
891 // gid containing AID_READPROC required
892 llkTopDirectory.reset(procdir);
893 if (__predict_false(!llkTopDirectory)) {
894 // Most likely reason we could be here is a resource limit.
895 // Keep our processing down to a minimum, but not so low that
896 // we do not recover in a timely manner should the issue be
897 // transitory.
898 LOG(DEBUG) << "opendir(\"" << procdir << "\") failed";
899 return llkTimeoutMs;
900 }
901 }
902
903 for (auto& it : tids) {
904 it.second.updated = false;
905 }
906
907 auto prevUpdate = llkUpdate;
908 llkUpdate += ms;
909 ms -= llkCycle;
910 auto myPid = ::getpid();
911 auto myTid = ::gettid();
912 for (auto dp = llkTopDirectory.read(); dp != nullptr; dp = llkTopDirectory.read()) {
913 std::string piddir;
914
915 if (!getValidTidDir(dp, &piddir)) {
916 continue;
917 }
918
919 // Get the process tasks
920 std::string taskdir = piddir + "/task/";
921 int pid = -1;
922 LOG(VERBOSE) << "+opendir(\"" << taskdir << "\")";
923 dir taskDirectory(taskdir);
924 if (__predict_false(!taskDirectory)) {
925 LOG(DEBUG) << "+opendir(\"" << taskdir << "\") failed";
926 }
927 for (auto tp = taskDirectory.read(dir::task, dp); tp != nullptr;
928 tp = taskDirectory.read(dir::task)) {
929 if (!getValidTidDir(tp, &piddir)) {
930 continue;
931 }
932
933 // Get the process stat
934 std::string stat = ReadFile(piddir + "/stat");
935 if (stat.size() == 0) {
936 continue;
937 }
938 unsigned tid = -1;
939 char pdir[TASK_COMM_LEN + 1];
940 char state = '?';
941 unsigned ppid = -1;
942 unsigned utime = -1;
943 unsigned stime = -1;
944 int dummy;
945 pdir[0] = '\0';
946 // tid should not change value
947 auto match = ::sscanf(
948 stat.c_str(),
949 "%u (%" ___STRING(
950 TASK_COMM_LEN) "[^)]) %c %u %*d %*d %*d %*d %*d %*d %*d %*d %*d %u %u %d",
951 &tid, pdir, &state, &ppid, &utime, &stime, &dummy);
952 if (pid == -1) {
953 pid = tid;
954 }
955 LOG(VERBOSE) << "match " << match << ' ' << tid << " (" << pdir << ") " << state << ' '
956 << ppid << " ... " << utime << ' ' << stime << ' ' << dummy;
957 if (match != 7) {
958 continue;
959 }
960
961 auto procp = llkTidLookup(tid);
962 if (procp == nullptr) {
963 procp = llkTidAlloc(tid, pid, ppid, pdir, utime + stime, state);
964 } else {
965 // comm can change ...
966 procp->setComm(pdir);
967 procp->updated = true;
968 // pid/ppid/tid wrap?
969 if (((procp->update != prevUpdate) && (procp->update != llkUpdate)) ||
970 (procp->ppid != ppid) || (procp->pid != pid)) {
971 procp->reset();
972 } else if (procp->time != (utime + stime)) { // secondary ABA.
973 // watching utime+stime granularity jiffy
974 procp->state = '?';
975 }
976 procp->update = llkUpdate;
977 procp->pid = pid;
978 procp->ppid = ppid;
979 procp->time = utime + stime;
980 if (procp->state != state) {
981 procp->count = 0ms;
Mark Salyzynafd66f22018-03-19 15:16:29 -0700982 procp->killed = !llkTestWithKill;
Mark Salyzynf089e142018-02-20 10:47:40 -0800983 procp->state = state;
984 } else {
985 procp->count += llkCycle;
986 }
987 }
988
989 // Filter checks in intuitive order of CPU cost to evaluate
990 // If tid unique continue, if ppid or pid unique break
991
992 if (pid == myPid) {
993 break;
994 }
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700995#ifdef __PTRACE_ENABLED__
996 // if no stack monitoring, we can quickly exit here
997 if (!llkIsMonitorState(state) && llkCheckStackSymbols.empty()) {
Mark Salyzynf089e142018-02-20 10:47:40 -0800998 continue;
999 }
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001000#else
1001 if (!llkIsMonitorState(state)) continue;
1002#endif
Mark Salyzynf089e142018-02-20 10:47:40 -08001003 if ((tid == myTid) || llkSkipPid(tid)) {
1004 continue;
1005 }
1006 if (llkSkipPpid(ppid)) {
1007 break;
1008 }
1009
1010 if (llkSkipName(procp->getComm())) {
1011 continue;
1012 }
1013 if (llkSkipName(procp->getCmdline())) {
1014 break;
1015 }
Mark Salyzyne81ede82018-10-22 15:52:32 -07001016 if (llkSkipName(android::base::Basename(procp->getCmdline()))) {
1017 break;
1018 }
Mark Salyzynf089e142018-02-20 10:47:40 -08001019
1020 auto pprocp = llkTidLookup(ppid);
1021 if (pprocp == nullptr) {
1022 pprocp = llkTidAlloc(ppid, ppid, 0, "", 0, '?');
1023 }
Mark Salyzyne81ede82018-10-22 15:52:32 -07001024 if ((pprocp != nullptr) &&
1025 (llkSkipName(pprocp->getComm(), llkBlacklistParent) ||
1026 llkSkipName(pprocp->getCmdline(), llkBlacklistParent) ||
1027 llkSkipName(android::base::Basename(pprocp->getCmdline()), llkBlacklistParent))) {
Mark Salyzynf089e142018-02-20 10:47:40 -08001028 break;
1029 }
1030
1031 if ((llkBlacklistUid.size() != 0) && llkSkipUid(procp->getUid())) {
1032 continue;
1033 }
1034
1035 // ABA mitigation watching last time schedule activity happened
1036 llkCheckSchedUpdate(procp, piddir);
1037
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001038#ifdef __PTRACE_ENABLED__
1039 auto stuck = llkCheckStack(procp, piddir);
1040 if (llkIsMonitorState(state)) {
1041 if (procp->count >= llkStateTimeoutMs[(state == 'Z') ? llkStateZ : llkStateD]) {
1042 stuck = true;
1043 } else if (procp->count != 0ms) {
1044 LOG(VERBOSE) << state << ' ' << llkFormat(procp->count) << ' ' << ppid << "->"
1045 << pid << "->" << tid << ' ' << procp->getComm();
1046 }
1047 }
1048 if (!stuck) continue;
1049#else
1050 if (procp->count >= llkStateTimeoutMs[(state == 'Z') ? llkStateZ : llkStateD]) {
1051 if (procp->count != 0ms) {
1052 LOG(VERBOSE) << state << ' ' << llkFormat(procp->count) << ' ' << ppid << "->"
1053 << pid << "->" << tid << ' ' << procp->getComm();
1054 }
Mark Salyzynf089e142018-02-20 10:47:40 -08001055 continue;
1056 }
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001057#endif
Mark Salyzynf089e142018-02-20 10:47:40 -08001058
1059 // We have to kill it to determine difference between live lock
1060 // and persistent state blocked on a resource. Is there something
1061 // wrong with a process that has no forward scheduling progress in
1062 // Z or D? Yes, generally means improper accounting in the
1063 // process, but not always ...
1064 //
1065 // Whomever we hit with a test kill must accept the Android
1066 // Aphorism that everything can be burned to the ground and
1067 // must survive.
1068 if (procp->killed == false) {
1069 procp->killed = true;
1070 // confirm: re-read uid before committing to a panic.
1071 procp->uid = -1;
1072 switch (state) {
1073 case 'Z': // kill ppid to free up a Zombie
1074 // Killing init will kernel panic without diagnostics
1075 // so skip right to controlled kernel panic with
1076 // diagnostics.
1077 if (ppid == initPid) {
1078 break;
1079 }
1080 LOG(WARNING) << "Z " << llkFormat(procp->count) << ' ' << ppid << "->"
1081 << pid << "->" << tid << ' ' << procp->getComm() << " [kill]";
1082 if ((llkKillOneProcess(pprocp, procp) >= 0) ||
1083 (llkKillOneProcess(ppid, procp) >= 0)) {
1084 continue;
1085 }
1086 break;
1087
1088 case 'D': // kill tid to free up an uninterruptible D
1089 // If ABA is doing its job, we would not need or
1090 // want the following. Test kill is a Hail Mary
1091 // to make absolutely sure there is no forward
1092 // scheduling progress. The cost when ABA is
1093 // not working is we kill a process that likes to
1094 // stay in 'D' state, instead of panicing the
1095 // kernel (worse).
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001096 default:
1097 LOG(WARNING) << state << ' ' << llkFormat(procp->count) << ' ' << pid
1098 << "->" << tid << ' ' << procp->getComm() << " [kill]";
Mark Salyzynf089e142018-02-20 10:47:40 -08001099 if ((llkKillOneProcess(llkTidLookup(pid), procp) >= 0) ||
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001100 (llkKillOneProcess(pid, state, tid) >= 0) ||
Mark Salyzynf089e142018-02-20 10:47:40 -08001101 (llkKillOneProcess(procp, procp) >= 0) ||
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001102 (llkKillOneProcess(tid, state, tid) >= 0)) {
Mark Salyzynf089e142018-02-20 10:47:40 -08001103 continue;
1104 }
1105 break;
1106 }
1107 }
1108 // We are here because we have confirmed kernel live-lock
Mark Salyzyn3c3b14d2018-10-31 10:38:15 -07001109 const auto message = state + " "s + llkFormat(procp->count) + " " +
1110 std::to_string(ppid) + "->" + std::to_string(pid) + "->" +
1111 std::to_string(tid) + " " + procp->getComm() + " [panic]";
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001112 llkPanicKernel(true, tid,
Mark Salyzyn3c3b14d2018-10-31 10:38:15 -07001113 (state == 'Z') ? "zombie" : (state == 'D') ? "driver" : "sleeping",
1114 message);
Mark Salyzynf089e142018-02-20 10:47:40 -08001115 }
1116 LOG(VERBOSE) << "+closedir()";
1117 }
1118 llkTopDirectory.rewind();
1119 LOG(VERBOSE) << "closedir()";
1120
1121 // garbage collection of old process references
1122 for (auto p = tids.begin(); p != tids.end();) {
1123 if (!p->second.updated) {
1124 IF_ALOG(LOG_VERBOSE, LOG_TAG) {
1125 std::string ppidCmdline = llkProcGetName(p->second.ppid, nullptr, nullptr);
1126 if (ppidCmdline.size()) {
1127 ppidCmdline = "(" + ppidCmdline + ")";
1128 }
1129 std::string pidCmdline;
1130 if (p->second.pid != p->second.tid) {
1131 pidCmdline = llkProcGetName(p->second.pid, nullptr, p->second.getCmdline());
1132 if (pidCmdline.size()) {
1133 pidCmdline = "(" + pidCmdline + ")";
1134 }
1135 }
1136 std::string tidCmdline =
1137 llkProcGetName(p->second.tid, p->second.getComm(), p->second.getCmdline());
1138 if (tidCmdline.size()) {
1139 tidCmdline = "(" + tidCmdline + ")";
1140 }
1141 LOG(VERBOSE) << "thread " << p->second.ppid << ppidCmdline << "->" << p->second.pid
1142 << pidCmdline << "->" << p->second.tid << tidCmdline << " removed";
1143 }
1144 p = tids.erase(p);
1145 } else {
1146 ++p;
1147 }
1148 }
1149 if (__predict_false(tids.empty())) {
1150 llkTopDirectory.reset();
1151 }
1152
1153 llkCycle = llkCheckMs;
1154
1155 timespec end;
1156 ::clock_gettime(CLOCK_MONOTONIC_COARSE, &end);
1157 auto milli = llkGetTimespecDiffMs(&now, &end);
1158 LOG((milli > 10s) ? ERROR : (milli > 1s) ? WARNING : VERBOSE) << "sample " << llkFormat(milli);
1159
1160 // cap to minimum sleep for 1 second since last cycle
1161 if (llkCycle < (ms + 1s)) {
1162 return 1s;
1163 }
1164 return llkCycle - ms;
1165}
1166
1167unsigned llkCheckMilliseconds() {
1168 return duration_cast<milliseconds>(llkCheck()).count();
1169}
1170
Mark Salyzynbd7c8562018-10-31 10:02:08 -07001171bool llkCheckEng(const std::string& property) {
1172 return android::base::GetProperty(property, "eng") == "eng";
1173}
1174
Mark Salyzynf089e142018-02-20 10:47:40 -08001175bool llkInit(const char* threadname) {
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001176 auto debuggable = android::base::GetBoolProperty("ro.debuggable", false);
Mark Salyzynf089e142018-02-20 10:47:40 -08001177 llkLowRam = android::base::GetBoolProperty("ro.config.low_ram", false);
Mark Salyzynbd7c8562018-10-31 10:02:08 -07001178 llkEnableSysrqT &= !llkLowRam;
1179 if (debuggable) {
1180 llkEnableSysrqT |= llkCheckEng(LLK_ENABLE_SYSRQ_T_PROPERTY);
1181 if (!LLK_ENABLE_DEFAULT) { // NB: default is currently true ...
1182 llkEnable |= llkCheckEng(LLK_ENABLE_PROPERTY);
1183 khtEnable |= llkCheckEng(KHT_ENABLE_PROPERTY);
1184 }
Mark Salyzynd035dbb2018-03-26 08:23:00 -07001185 }
Mark Salyzynbd7c8562018-10-31 10:02:08 -07001186 llkEnableSysrqT = android::base::GetBoolProperty(LLK_ENABLE_SYSRQ_T_PROPERTY, llkEnableSysrqT);
Mark Salyzynf089e142018-02-20 10:47:40 -08001187 llkEnable = android::base::GetBoolProperty(LLK_ENABLE_PROPERTY, llkEnable);
1188 if (llkEnable && !llkTopDirectory.reset(procdir)) {
1189 // Most likely reason we could be here is llkd was started
1190 // incorrectly without the readproc permissions. Keep our
1191 // processing down to a minimum.
1192 llkEnable = false;
1193 }
1194 khtEnable = android::base::GetBoolProperty(KHT_ENABLE_PROPERTY, khtEnable);
1195 llkMlockall = android::base::GetBoolProperty(LLK_MLOCKALL_PROPERTY, llkMlockall);
Mark Salyzynafd66f22018-03-19 15:16:29 -07001196 llkTestWithKill = android::base::GetBoolProperty(LLK_KILLTEST_PROPERTY, llkTestWithKill);
Mark Salyzynf089e142018-02-20 10:47:40 -08001197 // if LLK_TIMOUT_MS_PROPERTY was not set, we will use a set
1198 // KHT_TIMEOUT_PROPERTY as co-operative guidance for the default value.
1199 khtTimeout = GetUintProperty(KHT_TIMEOUT_PROPERTY, khtTimeout);
1200 if (khtTimeout == 0s) {
1201 khtTimeout = duration_cast<seconds>(llkTimeoutMs * (1 + LLK_CHECKS_PER_TIMEOUT_DEFAULT) /
1202 LLK_CHECKS_PER_TIMEOUT_DEFAULT);
1203 }
1204 llkTimeoutMs =
1205 khtTimeout * LLK_CHECKS_PER_TIMEOUT_DEFAULT / (1 + LLK_CHECKS_PER_TIMEOUT_DEFAULT);
1206 llkTimeoutMs = GetUintProperty(LLK_TIMEOUT_MS_PROPERTY, llkTimeoutMs);
1207 llkValidate(); // validate llkTimeoutMs, llkCheckMs and llkCycle
1208 llkStateTimeoutMs[llkStateD] = GetUintProperty(LLK_D_TIMEOUT_MS_PROPERTY, llkTimeoutMs);
1209 llkStateTimeoutMs[llkStateZ] = GetUintProperty(LLK_Z_TIMEOUT_MS_PROPERTY, llkTimeoutMs);
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001210#ifdef __PTRACE_ENABLED__
1211 llkStateTimeoutMs[llkStateStack] = GetUintProperty(LLK_STACK_TIMEOUT_MS_PROPERTY, llkTimeoutMs);
1212#endif
Mark Salyzynf089e142018-02-20 10:47:40 -08001213 llkCheckMs = GetUintProperty(LLK_CHECK_MS_PROPERTY, llkCheckMs);
1214 llkValidate(); // validate all (effectively minus llkTimeoutMs)
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001215#ifdef __PTRACE_ENABLED__
1216 if (debuggable) {
1217 llkCheckStackSymbols = llkSplit(
1218 android::base::GetProperty(LLK_CHECK_STACK_PROPERTY, LLK_CHECK_STACK_DEFAULT));
1219 }
1220 std::string defaultBlacklistStack(LLK_BLACKLIST_STACK_DEFAULT);
1221 if (!debuggable) defaultBlacklistStack += ",logd,/system/bin/logd";
1222 llkBlacklistStack = llkSplit(
1223 android::base::GetProperty(LLK_BLACKLIST_STACK_PROPERTY, defaultBlacklistStack));
1224#endif
Mark Salyzynf089e142018-02-20 10:47:40 -08001225 std::string defaultBlacklistProcess(
1226 std::to_string(kernelPid) + "," + std::to_string(initPid) + "," +
1227 std::to_string(kthreaddPid) + "," + std::to_string(::getpid()) + "," +
1228 std::to_string(::gettid()) + "," LLK_BLACKLIST_PROCESS_DEFAULT);
1229 if (threadname) {
Mark Salyzyn52e54a62018-08-07 08:13:13 -07001230 defaultBlacklistProcess += ","s + threadname;
Mark Salyzynf089e142018-02-20 10:47:40 -08001231 }
1232 for (int cpu = 1; cpu < get_nprocs_conf(); ++cpu) {
1233 defaultBlacklistProcess += ",[watchdog/" + std::to_string(cpu) + "]";
1234 }
1235 defaultBlacklistProcess =
1236 android::base::GetProperty(LLK_BLACKLIST_PROCESS_PROPERTY, defaultBlacklistProcess);
1237 llkBlacklistProcess = llkSplit(defaultBlacklistProcess);
1238 if (!llkSkipName("[khungtaskd]")) { // ALWAYS ignore as special
1239 llkBlacklistProcess.emplace("[khungtaskd]");
1240 }
1241 llkBlacklistParent = llkSplit(android::base::GetProperty(
1242 LLK_BLACKLIST_PARENT_PROPERTY, std::to_string(kernelPid) + "," + std::to_string(kthreaddPid) +
1243 "," LLK_BLACKLIST_PARENT_DEFAULT));
1244 llkBlacklistUid =
1245 llkSplit(android::base::GetProperty(LLK_BLACKLIST_UID_PROPERTY, LLK_BLACKLIST_UID_DEFAULT));
1246
1247 // internal watchdog
1248 ::signal(SIGALRM, llkAlarmHandler);
1249
1250 // kernel hung task configuration? Otherwise leave it as-is
1251 if (khtEnable) {
1252 // EUID must be AID_ROOT to write to /proc/sys/kernel/ nodes, there
1253 // are no capability overrides. For security reasons we do not want
1254 // to run as AID_ROOT. We may not be able to write them successfully,
1255 // we will try, but the least we can do is read the values back to
1256 // confirm expectations and report whether configured or not.
1257 auto configured = llkWriteStringToFileConfirm(std::to_string(khtTimeout.count()),
1258 "/proc/sys/kernel/hung_task_timeout_secs");
1259 if (configured) {
1260 llkWriteStringToFile("65535", "/proc/sys/kernel/hung_task_warnings");
1261 llkWriteStringToFile("65535", "/proc/sys/kernel/hung_task_check_count");
1262 configured = llkWriteStringToFileConfirm("1", "/proc/sys/kernel/hung_task_panic");
1263 }
1264 if (configured) {
1265 LOG(INFO) << "[khungtaskd] configured";
1266 } else {
1267 LOG(WARNING) << "[khungtaskd] not configurable";
1268 }
1269 }
1270
1271 bool logConfig = true;
1272 if (llkEnable) {
1273 if (llkMlockall &&
1274 // MCL_ONFAULT pins pages as they fault instead of loading
1275 // everything immediately all at once. (Which would be bad,
1276 // because as of this writing, we have a lot of mapped pages we
1277 // never use.) Old kernels will see MCL_ONFAULT and fail with
1278 // EINVAL; we ignore this failure.
1279 //
1280 // N.B. read the man page for mlockall. MCL_CURRENT | MCL_ONFAULT
1281 // pins ⊆ MCL_CURRENT, converging to just MCL_CURRENT as we fault
1282 // in pages.
1283
1284 // CAP_IPC_LOCK required
1285 mlockall(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT) && (errno != EINVAL)) {
1286 PLOG(WARNING) << "mlockall failed ";
1287 }
1288
1289 if (threadname) {
1290 pthread_attr_t attr;
1291
1292 if (!pthread_attr_init(&attr)) {
1293 sched_param param;
1294
1295 memset(&param, 0, sizeof(param));
1296 pthread_attr_setschedparam(&attr, &param);
1297 pthread_attr_setschedpolicy(&attr, SCHED_BATCH);
1298 if (!pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED)) {
1299 pthread_t thread;
1300 if (!pthread_create(&thread, &attr, llkThread, const_cast<char*>(threadname))) {
1301 // wait a second for thread to start
1302 for (auto retry = 50; retry && !llkRunning; --retry) {
1303 ::usleep(20000);
1304 }
1305 logConfig = !llkRunning; // printed in llkd context?
1306 } else {
1307 LOG(ERROR) << "failed to spawn llkd thread";
1308 }
1309 } else {
1310 LOG(ERROR) << "failed to detach llkd thread";
1311 }
1312 pthread_attr_destroy(&attr);
1313 } else {
1314 LOG(ERROR) << "failed to allocate attibutes for llkd thread";
1315 }
1316 }
1317 } else {
1318 LOG(DEBUG) << "[khungtaskd] left unconfigured";
1319 }
1320 if (logConfig) {
1321 llkLogConfig();
1322 }
1323
1324 return llkEnable;
1325}