blob: 3a593ecc1e98bb9f8267a4da0d8a009e3178ac53 [file] [log] [blame]
Mark Salyzynf089e142018-02-20 10:47:40 -08001/*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "llkd.h"
18
19#include <ctype.h>
20#include <dirent.h> // opendir() and readdir()
21#include <errno.h>
22#include <fcntl.h>
23#include <pthread.h>
24#include <pwd.h> // getpwuid()
25#include <signal.h>
26#include <stdint.h>
Mark Salyzyn8a5f0812019-01-03 08:39:38 -080027#include <string.h>
Mark Salyzynf089e142018-02-20 10:47:40 -080028#include <sys/cdefs.h> // ___STRING, __predict_true() and _predict_false()
29#include <sys/mman.h> // mlockall()
30#include <sys/prctl.h>
31#include <sys/stat.h> // lstat()
32#include <sys/syscall.h> // __NR_getdents64
33#include <sys/sysinfo.h> // get_nprocs_conf()
34#include <sys/types.h>
35#include <time.h>
36#include <unistd.h>
37
38#include <chrono>
39#include <ios>
40#include <sstream>
41#include <string>
42#include <unordered_map>
43#include <unordered_set>
44
45#include <android-base/file.h>
46#include <android-base/logging.h>
47#include <android-base/parseint.h>
48#include <android-base/properties.h>
49#include <android-base/strings.h>
50#include <cutils/android_get_control_file.h>
51#include <log/log_main.h>
52
53#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
54
55#define TASK_COMM_LEN 16 // internal kernel, not uapi, from .../linux/include/linux/sched.h
56
57using namespace std::chrono_literals;
58using namespace std::chrono;
Mark Salyzyn52e54a62018-08-07 08:13:13 -070059using namespace std::literals;
Mark Salyzynf089e142018-02-20 10:47:40 -080060
61namespace {
62
63constexpr pid_t kernelPid = 0;
64constexpr pid_t initPid = 1;
65constexpr pid_t kthreaddPid = 2;
66
67constexpr char procdir[] = "/proc/";
68
69// Configuration
70milliseconds llkUpdate; // last check ms signature
71milliseconds llkCycle; // ms to next thread check
72bool llkEnable = LLK_ENABLE_DEFAULT; // llk daemon enabled
73bool llkRunning = false; // thread is running
74bool llkMlockall = LLK_MLOCKALL_DEFAULT; // run mlocked
Mark Salyzynafd66f22018-03-19 15:16:29 -070075bool llkTestWithKill = LLK_KILLTEST_DEFAULT; // issue test kills
Mark Salyzynf089e142018-02-20 10:47:40 -080076milliseconds llkTimeoutMs = LLK_TIMEOUT_MS_DEFAULT; // default timeout
Mark Salyzyn96505fa2018-08-07 08:13:13 -070077enum { // enum of state indexes
78 llkStateD, // Persistent 'D' state
79 llkStateZ, // Persistent 'Z' state
80#ifdef __PTRACE_ENABLED__ // Extra privileged states
81 llkStateStack, // stack signature
82#endif // End of extra privilege
83 llkNumStates, // Maxumum number of states
84}; // state indexes
Mark Salyzynf089e142018-02-20 10:47:40 -080085milliseconds llkStateTimeoutMs[llkNumStates]; // timeout override for each detection state
86milliseconds llkCheckMs; // checking interval to inspect any
87 // persistent live-locked states
88bool llkLowRam; // ro.config.low_ram
Mark Salyzynbd7c8562018-10-31 10:02:08 -070089bool llkEnableSysrqT = LLK_ENABLE_SYSRQ_T_DEFAULT; // sysrq stack trace dump
Mark Salyzynf089e142018-02-20 10:47:40 -080090bool khtEnable = LLK_ENABLE_DEFAULT; // [khungtaskd] panic
91// [khungtaskd] should have a timeout beyond the granularity of llkTimeoutMs.
92// Provides a wide angle of margin b/c khtTimeout is also its granularity.
93seconds khtTimeout = duration_cast<seconds>(llkTimeoutMs * (1 + LLK_CHECKS_PER_TIMEOUT_DEFAULT) /
94 LLK_CHECKS_PER_TIMEOUT_DEFAULT);
Mark Salyzyn96505fa2018-08-07 08:13:13 -070095#ifdef __PTRACE_ENABLED__
96// list of stack symbols to search for persistence.
97std::unordered_set<std::string> llkCheckStackSymbols;
98#endif
Mark Salyzynf089e142018-02-20 10:47:40 -080099
100// Blacklist variables, initialized with comma separated lists of high false
101// positive and/or dangerous references, e.g. without self restart, for pid,
102// ppid, name and uid:
103
104// list of pids, or tids or names to skip. kernel pid (0), init pid (1),
105// [kthreadd] pid (2), ourselves, "init", "[kthreadd]", "lmkd", "llkd" or
106// combinations of watchdogd in kernel and user space.
107std::unordered_set<std::string> llkBlacklistProcess;
108// list of parent pids, comm or cmdline names to skip. default:
109// kernel pid (0), [kthreadd] (2), or ourselves, enforced and implied
110std::unordered_set<std::string> llkBlacklistParent;
111// list of uids, and uid names, to skip, default nothing
112std::unordered_set<std::string> llkBlacklistUid;
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700113#ifdef __PTRACE_ENABLED__
114// list of names to skip stack checking. "init", "lmkd", "llkd", "keystore" or
115// "logd" (if not userdebug).
116std::unordered_set<std::string> llkBlacklistStack;
117#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800118
119class dir {
120 public:
121 enum level { proc, task, numLevels };
122
123 private:
124 int fd;
125 size_t available_bytes;
126 dirent* next;
127 // each directory level picked to be just north of 4K in size
128 static constexpr size_t buffEntries = 15;
129 static dirent buff[numLevels][buffEntries];
130
131 bool fill(enum level index) {
132 if (index >= numLevels) return false;
133 if (available_bytes != 0) return true;
134 if (__predict_false(fd < 0)) return false;
135 // getdents64 has no libc wrapper
136 auto rc = TEMP_FAILURE_RETRY(syscall(__NR_getdents64, fd, buff[index], sizeof(buff[0]), 0));
137 if (rc <= 0) return false;
138 available_bytes = rc;
139 next = buff[index];
140 return true;
141 }
142
143 public:
144 dir() : fd(-1), available_bytes(0), next(nullptr) {}
145
146 explicit dir(const char* directory)
147 : fd(__predict_true(directory != nullptr)
148 ? ::open(directory, O_CLOEXEC | O_DIRECTORY | O_RDONLY)
149 : -1),
150 available_bytes(0),
151 next(nullptr) {}
152
153 explicit dir(const std::string&& directory)
154 : fd(::open(directory.c_str(), O_CLOEXEC | O_DIRECTORY | O_RDONLY)),
155 available_bytes(0),
156 next(nullptr) {}
157
158 explicit dir(const std::string& directory)
159 : fd(::open(directory.c_str(), O_CLOEXEC | O_DIRECTORY | O_RDONLY)),
160 available_bytes(0),
161 next(nullptr) {}
162
163 // Don't need any copy or move constructors.
164 explicit dir(const dir& c) = delete;
165 explicit dir(dir& c) = delete;
166 explicit dir(dir&& c) = delete;
167
168 ~dir() {
169 if (fd >= 0) {
170 ::close(fd);
171 }
172 }
173
174 operator bool() const { return fd >= 0; }
175
176 void reset(void) {
177 if (fd >= 0) {
178 ::close(fd);
179 fd = -1;
180 available_bytes = 0;
181 next = nullptr;
182 }
183 }
184
185 dir& reset(const char* directory) {
186 reset();
187 // available_bytes will _always_ be zero here as its value is
188 // intimately tied to fd < 0 or not.
189 fd = ::open(directory, O_CLOEXEC | O_DIRECTORY | O_RDONLY);
190 return *this;
191 }
192
193 void rewind(void) {
194 if (fd >= 0) {
195 ::lseek(fd, off_t(0), SEEK_SET);
196 available_bytes = 0;
197 next = nullptr;
198 }
199 }
200
201 dirent* read(enum level index = proc, dirent* def = nullptr) {
202 if (!fill(index)) return def;
203 auto ret = next;
204 available_bytes -= next->d_reclen;
205 next = reinterpret_cast<dirent*>(reinterpret_cast<char*>(next) + next->d_reclen);
206 return ret;
207 }
208} llkTopDirectory;
209
210dirent dir::buff[dir::numLevels][dir::buffEntries];
211
212// helper functions
213
214bool llkIsMissingExeLink(pid_t tid) {
215 char c;
216 // CAP_SYS_PTRACE is required to prevent ret == -1, but ENOENT is signal
217 auto ret = ::readlink((procdir + std::to_string(tid) + "/exe").c_str(), &c, sizeof(c));
218 return (ret == -1) && (errno == ENOENT);
219}
220
221// Common routine where caller accepts empty content as error/passthrough.
222// Reduces the churn of reporting read errors in the callers.
223std::string ReadFile(std::string&& path) {
224 std::string content;
225 if (!android::base::ReadFileToString(path, &content)) {
226 PLOG(DEBUG) << "Read " << path << " failed";
227 content = "";
228 }
229 return content;
230}
231
232std::string llkProcGetName(pid_t tid, const char* node = "/cmdline") {
233 std::string content = ReadFile(procdir + std::to_string(tid) + node);
234 static constexpr char needles[] = " \t\r\n"; // including trailing nul
235 auto pos = content.find_first_of(needles, 0, sizeof(needles));
236 if (pos != std::string::npos) {
237 content.erase(pos);
238 }
239 return content;
240}
241
242uid_t llkProcGetUid(pid_t tid) {
243 // Get the process' uid. The following read from /status is admittedly
244 // racy, prone to corruption due to shape-changes. The consequences are
245 // not catastrophic as we sample a few times before taking action.
246 //
247 // If /loginuid worked on reliably, or on Android (all tasks report -1)...
248 // Android lmkd causes /cgroup to contain memory:/<dom>/uid_<uid>/pid_<pid>
249 // which is tighter, but also not reliable.
250 std::string content = ReadFile(procdir + std::to_string(tid) + "/status");
251 static constexpr char Uid[] = "\nUid:";
252 auto pos = content.find(Uid);
253 if (pos == std::string::npos) {
254 return -1;
255 }
256 pos += ::strlen(Uid);
257 while ((pos < content.size()) && ::isblank(content[pos])) {
258 ++pos;
259 }
260 content.erase(0, pos);
261 for (pos = 0; (pos < content.size()) && ::isdigit(content[pos]); ++pos) {
262 ;
263 }
264 // Content of form 'Uid: 0 0 0 0', newline is error
265 if ((pos >= content.size()) || !::isblank(content[pos])) {
266 return -1;
267 }
268 content.erase(pos);
269 uid_t ret;
Tom Cherrye0bc5a92018-10-05 14:29:47 -0700270 if (!android::base::ParseUint(content, &ret, uid_t(0))) {
Mark Salyzynf089e142018-02-20 10:47:40 -0800271 return -1;
272 }
273 return ret;
274}
275
276struct proc {
277 pid_t tid; // monitored thread id (in Z or D state).
278 nanoseconds schedUpdate; // /proc/<tid>/sched "se.avg.lastUpdateTime",
279 uint64_t nrSwitches; // /proc/<tid>/sched "nr_switches" for
280 // refined ABA problem detection, determine
281 // forward scheduling progress.
282 milliseconds update; // llkUpdate millisecond signature of last.
283 milliseconds count; // duration in state.
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700284#ifdef __PTRACE_ENABLED__ // Privileged state checking
285 milliseconds count_stack; // duration where stack is stagnant.
286#endif // End privilege
Mark Salyzynf089e142018-02-20 10:47:40 -0800287 pid_t pid; // /proc/<pid> before iterating through
288 // /proc/<pid>/task/<tid> for threads.
289 pid_t ppid; // /proc/<tid>/stat field 4 parent pid.
290 uid_t uid; // /proc/<tid>/status Uid: field.
291 unsigned time; // sum of /proc/<tid>/stat field 14 utime &
292 // 15 stime for coarse ABA problem detection.
293 std::string cmdline; // cached /cmdline content
294 char state; // /proc/<tid>/stat field 3: Z or D
295 // (others we do not monitor: S, R, T or ?)
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700296#ifdef __PTRACE_ENABLED__ // Privileged state checking
297 char stack; // index in llkCheckStackSymbols for matches
298#endif // and with maximum index PROP_VALUE_MAX/2.
Mark Salyzynf089e142018-02-20 10:47:40 -0800299 char comm[TASK_COMM_LEN + 3]; // space for adding '[' and ']'
300 bool exeMissingValid; // exeMissing has been cached
301 bool cmdlineValid; // cmdline has been cached
302 bool updated; // cleared before monitoring pass.
303 bool killed; // sent a kill to this thread, next panic...
304
305 void setComm(const char* _comm) { strncpy(comm + 1, _comm, sizeof(comm) - 2); }
306
307 proc(pid_t tid, pid_t pid, pid_t ppid, const char* _comm, int time, char state)
308 : tid(tid),
309 schedUpdate(0),
310 nrSwitches(0),
311 update(llkUpdate),
Mark Salyzynacecaf72018-08-10 08:15:57 -0700312 count(0ms),
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700313#ifdef __PTRACE_ENABLED__
314 count_stack(0ms),
315#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800316 pid(pid),
317 ppid(ppid),
318 uid(-1),
319 time(time),
320 state(state),
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700321#ifdef __PTRACE_ENABLED__
322 stack(-1),
323#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800324 exeMissingValid(false),
325 cmdlineValid(false),
326 updated(true),
Mark Salyzynafd66f22018-03-19 15:16:29 -0700327 killed(!llkTestWithKill) {
Mark Salyzynf089e142018-02-20 10:47:40 -0800328 memset(comm, '\0', sizeof(comm));
329 setComm(_comm);
330 }
331
332 const char* getComm(void) {
333 if (comm[1] == '\0') { // comm Valid?
334 strncpy(comm + 1, llkProcGetName(tid, "/comm").c_str(), sizeof(comm) - 2);
335 }
336 if (!exeMissingValid) {
337 if (llkIsMissingExeLink(tid)) {
338 comm[0] = '[';
339 }
340 exeMissingValid = true;
341 }
342 size_t len = strlen(comm + 1);
343 if (__predict_true(len < (sizeof(comm) - 1))) {
344 if (comm[0] == '[') {
345 if ((comm[len] != ']') && __predict_true(len < (sizeof(comm) - 2))) {
346 comm[++len] = ']';
347 comm[++len] = '\0';
348 }
349 } else {
350 if (comm[len] == ']') {
351 comm[len] = '\0';
352 }
353 }
354 }
355 return &comm[comm[0] != '['];
356 }
357
358 const char* getCmdline(void) {
359 if (!cmdlineValid) {
360 cmdline = llkProcGetName(tid);
361 cmdlineValid = true;
362 }
363 return cmdline.c_str();
364 }
365
366 uid_t getUid(void) {
367 if (uid <= 0) { // Churn on root user, because most likely to setuid()
368 uid = llkProcGetUid(tid);
369 }
370 return uid;
371 }
372
373 void reset(void) { // reset cache, if we detected pid rollover
374 uid = -1;
375 state = '?';
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700376#ifdef __PTRACE_ENABLED__
377 count_stack = 0ms;
378 stack = -1;
379#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800380 cmdline = "";
381 comm[0] = '\0';
382 exeMissingValid = false;
383 cmdlineValid = false;
384 }
385};
386
387std::unordered_map<pid_t, proc> tids;
388
389// Check range and setup defaults, in order of propagation:
390// llkTimeoutMs
391// llkCheckMs
392// ...
393// KISS to keep it all self-contained, and called multiple times as parameters
394// are interpreted so that defaults, llkCheckMs and llkCycle make sense.
395void llkValidate() {
396 if (llkTimeoutMs == 0ms) {
397 llkTimeoutMs = LLK_TIMEOUT_MS_DEFAULT;
398 }
399 llkTimeoutMs = std::max(llkTimeoutMs, LLK_TIMEOUT_MS_MINIMUM);
400 if (llkCheckMs == 0ms) {
401 llkCheckMs = llkTimeoutMs / LLK_CHECKS_PER_TIMEOUT_DEFAULT;
402 }
403 llkCheckMs = std::min(llkCheckMs, llkTimeoutMs);
404
405 for (size_t state = 0; state < ARRAY_SIZE(llkStateTimeoutMs); ++state) {
406 if (llkStateTimeoutMs[state] == 0ms) {
407 llkStateTimeoutMs[state] = llkTimeoutMs;
408 }
409 llkStateTimeoutMs[state] =
410 std::min(std::max(llkStateTimeoutMs[state], LLK_TIMEOUT_MS_MINIMUM), llkTimeoutMs);
411 llkCheckMs = std::min(llkCheckMs, llkStateTimeoutMs[state]);
412 }
413
414 llkCheckMs = std::max(llkCheckMs, LLK_CHECK_MS_MINIMUM);
415 if (llkCycle == 0ms) {
416 llkCycle = llkCheckMs;
417 }
418 llkCycle = std::min(llkCycle, llkCheckMs);
419}
420
421milliseconds llkGetTimespecDiffMs(timespec* from, timespec* to) {
422 return duration_cast<milliseconds>(seconds(to->tv_sec - from->tv_sec)) +
423 duration_cast<milliseconds>(nanoseconds(to->tv_nsec - from->tv_nsec));
424}
425
426std::string llkProcGetName(pid_t tid, const char* comm, const char* cmdline) {
427 if ((cmdline != nullptr) && (*cmdline != '\0')) {
428 return cmdline;
429 }
430 if ((comm != nullptr) && (*comm != '\0')) {
431 return comm;
432 }
433
434 // UNLIKELY! Here because killed before we kill it?
435 // Assume change is afoot, do not call llkTidAlloc
436
437 // cmdline ?
438 std::string content = llkProcGetName(tid);
439 if (content.size() != 0) {
440 return content;
441 }
442 // Comm instead?
443 content = llkProcGetName(tid, "/comm");
444 if (llkIsMissingExeLink(tid) && (content.size() != 0)) {
445 return '[' + content + ']';
446 }
447 return content;
448}
449
450int llkKillOneProcess(pid_t pid, char state, pid_t tid, const char* tcomm = nullptr,
451 const char* tcmdline = nullptr, const char* pcomm = nullptr,
452 const char* pcmdline = nullptr) {
453 std::string forTid;
454 if (tid != pid) {
455 forTid = " for '" + llkProcGetName(tid, tcomm, tcmdline) + "' (" + std::to_string(tid) + ")";
456 }
457 LOG(INFO) << "Killing '" << llkProcGetName(pid, pcomm, pcmdline) << "' (" << pid
458 << ") to check forward scheduling progress in " << state << " state" << forTid;
459 // CAP_KILL required
460 errno = 0;
461 auto r = ::kill(pid, SIGKILL);
462 if (r) {
463 PLOG(ERROR) << "kill(" << pid << ")=" << r << ' ';
464 }
465
466 return r;
467}
468
469// Kill one process
470int llkKillOneProcess(pid_t pid, proc* tprocp) {
471 return llkKillOneProcess(pid, tprocp->state, tprocp->tid, tprocp->getComm(),
472 tprocp->getCmdline());
473}
474
475// Kill one process specified by kprocp
476int llkKillOneProcess(proc* kprocp, proc* tprocp) {
477 if (kprocp == nullptr) {
478 return -2;
479 }
480
481 return llkKillOneProcess(kprocp->tid, tprocp->state, tprocp->tid, tprocp->getComm(),
482 tprocp->getCmdline(), kprocp->getComm(), kprocp->getCmdline());
483}
484
485// Acquire file descriptor from environment, or open and cache it.
486// NB: cache is unnecessary in our current context, pedantically
487// required to prevent leakage of file descriptors in the future.
488int llkFileToWriteFd(const std::string& file) {
489 static std::unordered_map<std::string, int> cache;
490 auto search = cache.find(file);
491 if (search != cache.end()) return search->second;
492 auto fd = android_get_control_file(file.c_str());
493 if (fd >= 0) return fd;
494 fd = TEMP_FAILURE_RETRY(::open(file.c_str(), O_WRONLY | O_CLOEXEC));
495 if (fd >= 0) cache.emplace(std::make_pair(file, fd));
496 return fd;
497}
498
499// Wrap android::base::WriteStringToFile to use android_get_control_file.
500bool llkWriteStringToFile(const std::string& string, const std::string& file) {
501 auto fd = llkFileToWriteFd(file);
502 if (fd < 0) return false;
503 return android::base::WriteStringToFd(string, fd);
504}
505
506bool llkWriteStringToFileConfirm(const std::string& string, const std::string& file) {
507 auto fd = llkFileToWriteFd(file);
508 auto ret = (fd < 0) ? false : android::base::WriteStringToFd(string, fd);
509 std::string content;
510 if (!android::base::ReadFileToString(file, &content)) return ret;
511 return android::base::Trim(content) == string;
512}
513
Mark Salyzynfbc3a752018-12-04 10:30:45 -0800514void llkPanicKernel(bool dump, pid_t tid, const char* state, const std::string& message = "") {
Mark Salyzyn3c3b14d2018-10-31 10:38:15 -0700515 if (!message.empty()) LOG(ERROR) << message;
Mark Salyzynf089e142018-02-20 10:47:40 -0800516 auto sysrqTriggerFd = llkFileToWriteFd("/proc/sysrq-trigger");
517 if (sysrqTriggerFd < 0) {
518 // DYB
519 llkKillOneProcess(initPid, 'R', tid);
520 // The answer to life, the universe and everything
521 ::exit(42);
522 // NOTREACHED
Mark Salyzynfbc3a752018-12-04 10:30:45 -0800523 return;
Mark Salyzynf089e142018-02-20 10:47:40 -0800524 }
525 ::sync();
526 if (dump) {
527 // Show all locks that are held
528 android::base::WriteStringToFd("d", sysrqTriggerFd);
Mark Salyzyn53e782d2018-10-31 16:03:45 -0700529 // Show all waiting tasks
530 android::base::WriteStringToFd("w", sysrqTriggerFd);
Mark Salyzynf089e142018-02-20 10:47:40 -0800531 // This can trigger hardware watchdog, that is somewhat _ok_.
532 // But useless if pstore configured for <256KB, low ram devices ...
Mark Salyzynbd7c8562018-10-31 10:02:08 -0700533 if (llkEnableSysrqT) {
Mark Salyzynf089e142018-02-20 10:47:40 -0800534 android::base::WriteStringToFd("t", sysrqTriggerFd);
Mark Salyzyn53e782d2018-10-31 16:03:45 -0700535 // Show all locks that are held (in case 't' overflows ramoops)
536 android::base::WriteStringToFd("d", sysrqTriggerFd);
537 // Show all waiting tasks (in case 't' overflows ramoops)
538 android::base::WriteStringToFd("w", sysrqTriggerFd);
Mark Salyzynf089e142018-02-20 10:47:40 -0800539 }
540 ::usleep(200000); // let everything settle
541 }
Mark Salyzyn3c3b14d2018-10-31 10:38:15 -0700542 // SysRq message matches kernel format, and propagates through bootstat
543 // ultimately to the boot reason into panic,livelock,<state>.
544 llkWriteStringToFile(message + (message.empty() ? "" : "\n") +
545 "SysRq : Trigger a crash : 'livelock,"s + state + "'\n",
546 "/dev/kmsg");
Mark Salyzynfbc3a752018-12-04 10:30:45 -0800547 // Because panic is such a serious thing to do, let us
548 // make sure that the tid being inspected still exists!
549 auto piddir = procdir + std::to_string(tid) + "/stat";
550 if (access(piddir.c_str(), F_OK) != 0) {
551 PLOG(WARNING) << piddir;
552 return;
553 }
Mark Salyzynf089e142018-02-20 10:47:40 -0800554 android::base::WriteStringToFd("c", sysrqTriggerFd);
555 // NOTREACHED
556 // DYB
557 llkKillOneProcess(initPid, 'R', tid);
558 // I sat at my desk, stared into the garden and thought '42 will do'.
559 // I typed it out. End of story
560 ::exit(42);
561 // NOTREACHED
562}
563
564void llkAlarmHandler(int) {
Mark Salyzynb3418a22018-11-19 15:24:03 -0800565 LOG(FATAL) << "alarm";
566 // NOTREACHED
567 llkPanicKernel(true, ::getpid(), "alarm");
Mark Salyzynf089e142018-02-20 10:47:40 -0800568}
569
570milliseconds GetUintProperty(const std::string& key, milliseconds def) {
571 return milliseconds(android::base::GetUintProperty(key, static_cast<uint64_t>(def.count()),
572 static_cast<uint64_t>(def.max().count())));
573}
574
575seconds GetUintProperty(const std::string& key, seconds def) {
576 return seconds(android::base::GetUintProperty(key, static_cast<uint64_t>(def.count()),
577 static_cast<uint64_t>(def.max().count())));
578}
579
580proc* llkTidLookup(pid_t tid) {
581 auto search = tids.find(tid);
582 if (search == tids.end()) {
583 return nullptr;
584 }
585 return &search->second;
586}
587
588void llkTidRemove(pid_t tid) {
589 tids.erase(tid);
590}
591
592proc* llkTidAlloc(pid_t tid, pid_t pid, pid_t ppid, const char* comm, int time, char state) {
593 auto it = tids.emplace(std::make_pair(tid, proc(tid, pid, ppid, comm, time, state)));
594 return &it.first->second;
595}
596
597std::string llkFormat(milliseconds ms) {
598 auto sec = duration_cast<seconds>(ms);
599 std::ostringstream s;
600 s << sec.count() << '.';
601 auto f = s.fill('0');
602 auto w = s.width(3);
603 s << std::right << (ms - sec).count();
604 s.width(w);
605 s.fill(f);
606 s << 's';
607 return s.str();
608}
609
610std::string llkFormat(seconds s) {
611 return std::to_string(s.count()) + 's';
612}
613
614std::string llkFormat(bool flag) {
615 return flag ? "true" : "false";
616}
617
618std::string llkFormat(const std::unordered_set<std::string>& blacklist) {
619 std::string ret;
Chih-Hung Hsieh1b7b7972018-12-11 10:34:33 -0800620 for (const auto& entry : blacklist) {
Mark Salyzyn8a5f0812019-01-03 08:39:38 -0800621 if (!ret.empty()) ret += ",";
Mark Salyzynf089e142018-02-20 10:47:40 -0800622 ret += entry;
623 }
624 return ret;
625}
626
Mark Salyzyn8a5f0812019-01-03 08:39:38 -0800627// This function parses the properties as a list, incorporating the supplied
628// default. A leading comma separator means preserve the defaults and add
629// entries (with an optional leading + sign), or removes entries with a leading
630// - sign.
631//
Mark Salyzynf089e142018-02-20 10:47:40 -0800632// We only officially support comma separators, but wetware being what they
633// are will take some liberty and I do not believe they should be punished.
Mark Salyzyn8a5f0812019-01-03 08:39:38 -0800634std::unordered_set<std::string> llkSplit(const std::string& prop, const std::string& def) {
635 auto s = android::base::GetProperty(prop, def);
636 constexpr char separators[] = ", \t:;";
637 if (!s.empty() && (s != def) && strchr(separators, s[0])) s = def + s;
638
Mark Salyzynf089e142018-02-20 10:47:40 -0800639 std::unordered_set<std::string> result;
640
Mark Salyzynacecaf72018-08-10 08:15:57 -0700641 // Special case, allow boolean false to empty the list, otherwise expected
642 // source of input from android::base::GetProperty will supply the default
643 // value on empty content in the property.
644 if (s == "false") return result;
645
Mark Salyzynf089e142018-02-20 10:47:40 -0800646 size_t base = 0;
Mark Salyzynacecaf72018-08-10 08:15:57 -0700647 while (s.size() > base) {
Mark Salyzyn8a5f0812019-01-03 08:39:38 -0800648 auto found = s.find_first_of(separators, base);
649 // Only emplace unique content, empty entries are not an option
650 if (found != base) {
651 switch (s[base]) {
652 case '-':
653 ++base;
654 if (base >= s.size()) break;
655 if (base != found) {
656 auto have = result.find(s.substr(base, found - base));
657 if (have != result.end()) result.erase(have);
658 }
659 break;
660 case '+':
661 ++base;
662 if (base >= s.size()) break;
663 if (base == found) break;
664 // FALLTHRU (for gcc, lint, pcc, etc; following for clang)
665 FALLTHROUGH_INTENDED;
666 default:
667 result.emplace(s.substr(base, found - base));
668 break;
669 }
670 }
Mark Salyzynf089e142018-02-20 10:47:40 -0800671 if (found == s.npos) break;
672 base = found + 1;
673 }
674 return result;
675}
676
677bool llkSkipName(const std::string& name,
678 const std::unordered_set<std::string>& blacklist = llkBlacklistProcess) {
Mark Salyzyn8a5f0812019-01-03 08:39:38 -0800679 if (name.empty() || blacklist.empty()) return false;
Mark Salyzynf089e142018-02-20 10:47:40 -0800680
681 return blacklist.find(name) != blacklist.end();
682}
683
Mark Salyzyn8a5f0812019-01-03 08:39:38 -0800684bool llkSkipProc(proc* procp,
685 const std::unordered_set<std::string>& blacklist = llkBlacklistProcess) {
686 if (!procp) return false;
687 if (llkSkipName(std::to_string(procp->pid), blacklist)) return true;
688 if (llkSkipName(procp->getComm(), blacklist)) return true;
689 if (llkSkipName(procp->getCmdline(), blacklist)) return true;
690 if (llkSkipName(android::base::Basename(procp->getCmdline()), blacklist)) return true;
691 return false;
692}
693
Mark Salyzynf089e142018-02-20 10:47:40 -0800694bool llkSkipPid(pid_t pid) {
695 return llkSkipName(std::to_string(pid), llkBlacklistProcess);
696}
697
698bool llkSkipPpid(pid_t ppid) {
699 return llkSkipName(std::to_string(ppid), llkBlacklistParent);
700}
701
702bool llkSkipUid(uid_t uid) {
703 // Match by number?
704 if (llkSkipName(std::to_string(uid), llkBlacklistUid)) {
705 return true;
706 }
707
708 // Match by name?
709 auto pwd = ::getpwuid(uid);
710 return (pwd != nullptr) && __predict_true(pwd->pw_name != nullptr) &&
711 __predict_true(pwd->pw_name[0] != '\0') && llkSkipName(pwd->pw_name, llkBlacklistUid);
712}
713
714bool getValidTidDir(dirent* dp, std::string* piddir) {
715 if (!::isdigit(dp->d_name[0])) {
716 return false;
717 }
718
719 // Corner case can not happen in reality b/c of above ::isdigit check
720 if (__predict_false(dp->d_type != DT_DIR)) {
721 if (__predict_false(dp->d_type == DT_UNKNOWN)) { // can't b/c procfs
722 struct stat st;
723 *piddir = procdir;
724 *piddir += dp->d_name;
725 return (lstat(piddir->c_str(), &st) == 0) && (st.st_mode & S_IFDIR);
726 }
727 return false;
728 }
729
730 *piddir = procdir;
731 *piddir += dp->d_name;
732 return true;
733}
734
735bool llkIsMonitorState(char state) {
736 return (state == 'Z') || (state == 'D');
737}
738
739// returns -1 if not found
740long long getSchedValue(const std::string& schedString, const char* key) {
741 auto pos = schedString.find(key);
742 if (pos == std::string::npos) {
743 return -1;
744 }
745 pos = schedString.find(':', pos);
746 if (__predict_false(pos == std::string::npos)) {
747 return -1;
748 }
749 while ((++pos < schedString.size()) && ::isblank(schedString[pos])) {
750 ;
751 }
752 long long ret;
753 if (!android::base::ParseInt(schedString.substr(pos), &ret, static_cast<long long>(0))) {
754 return -1;
755 }
756 return ret;
757}
758
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700759#ifdef __PTRACE_ENABLED__
760bool llkCheckStack(proc* procp, const std::string& piddir) {
761 if (llkCheckStackSymbols.empty()) return false;
762 if (procp->state == 'Z') { // No brains for Zombies
763 procp->stack = -1;
764 procp->count_stack = 0ms;
765 return false;
766 }
767
768 // Don't check process that are known to block ptrace, save sepolicy noise.
Mark Salyzyn8a5f0812019-01-03 08:39:38 -0800769 if (llkSkipProc(procp, llkBlacklistStack)) return false;
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700770 auto kernel_stack = ReadFile(piddir + "/stack");
771 if (kernel_stack.empty()) {
Mark Salyzyn22e05fb2019-01-02 15:04:42 -0800772 LOG(VERBOSE) << piddir << "/stack empty comm=" << procp->getComm()
773 << " cmdline=" << procp->getCmdline();
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700774 return false;
775 }
776 // A scheduling incident that should not reset count_stack
777 if (kernel_stack.find(" cpu_worker_pools+0x") != std::string::npos) return false;
778 char idx = -1;
779 char match = -1;
Mark Salyzyn22e05fb2019-01-02 15:04:42 -0800780 std::string matched_stack_symbol = "<unknown>";
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700781 for (const auto& stack : llkCheckStackSymbols) {
782 if (++idx < 0) break;
Mark Salyzynbb1256a2018-10-18 14:39:27 -0700783 if ((kernel_stack.find(" "s + stack + "+0x") != std::string::npos) ||
784 (kernel_stack.find(" "s + stack + ".cfi+0x") != std::string::npos)) {
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700785 match = idx;
Mark Salyzyn22e05fb2019-01-02 15:04:42 -0800786 matched_stack_symbol = stack;
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700787 break;
788 }
789 }
790 if (procp->stack != match) {
791 procp->stack = match;
792 procp->count_stack = 0ms;
793 return false;
794 }
795 if (match == char(-1)) return false;
796 procp->count_stack += llkCycle;
Mark Salyzyn22e05fb2019-01-02 15:04:42 -0800797 if (procp->count_stack < llkStateTimeoutMs[llkStateStack]) return false;
798 LOG(WARNING) << "Found " << matched_stack_symbol << " in stack for pid " << procp->pid;
799 return true;
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700800}
801#endif
802
Mark Salyzynf089e142018-02-20 10:47:40 -0800803// Primary ABA mitigation watching last time schedule activity happened
804void llkCheckSchedUpdate(proc* procp, const std::string& piddir) {
805 // Audit finds /proc/<tid>/sched is just over 1K, and
806 // is rarely larger than 2K, even less on Android.
807 // For example, the "se.avg.lastUpdateTime" field we are
808 // interested in typically within the primary set in
809 // the first 1K.
810 //
811 // Proc entries can not be read >1K atomically via libbase,
812 // but if there are problems we assume at least a few
813 // samples of reads occur before we take any real action.
814 std::string schedString = ReadFile(piddir + "/sched");
Mark Salyzyn8a5f0812019-01-03 08:39:38 -0800815 if (schedString.empty()) {
Mark Salyzynf089e142018-02-20 10:47:40 -0800816 // /schedstat is not as standardized, but in 3.1+
817 // Android devices, the third field is nr_switches
818 // from /sched:
819 schedString = ReadFile(piddir + "/schedstat");
Mark Salyzyn8a5f0812019-01-03 08:39:38 -0800820 if (schedString.empty()) {
Mark Salyzynf089e142018-02-20 10:47:40 -0800821 return;
822 }
823 auto val = static_cast<unsigned long long>(-1);
824 if (((::sscanf(schedString.c_str(), "%*d %*d %llu", &val)) == 1) &&
825 (val != static_cast<unsigned long long>(-1)) && (val != 0) &&
826 (val != procp->nrSwitches)) {
827 procp->nrSwitches = val;
828 procp->count = 0ms;
Mark Salyzynafd66f22018-03-19 15:16:29 -0700829 procp->killed = !llkTestWithKill;
Mark Salyzynf089e142018-02-20 10:47:40 -0800830 }
831 return;
832 }
833
834 auto val = getSchedValue(schedString, "\nse.avg.lastUpdateTime");
835 if (val == -1) {
836 val = getSchedValue(schedString, "\nse.svg.last_update_time");
837 }
838 if (val != -1) {
839 auto schedUpdate = nanoseconds(val);
840 if (schedUpdate != procp->schedUpdate) {
841 procp->schedUpdate = schedUpdate;
842 procp->count = 0ms;
Mark Salyzynafd66f22018-03-19 15:16:29 -0700843 procp->killed = !llkTestWithKill;
Mark Salyzynf089e142018-02-20 10:47:40 -0800844 }
845 }
846
847 val = getSchedValue(schedString, "\nnr_switches");
848 if (val != -1) {
849 if (static_cast<uint64_t>(val) != procp->nrSwitches) {
850 procp->nrSwitches = val;
851 procp->count = 0ms;
Mark Salyzynafd66f22018-03-19 15:16:29 -0700852 procp->killed = !llkTestWithKill;
Mark Salyzynf089e142018-02-20 10:47:40 -0800853 }
854 }
855}
856
857void llkLogConfig(void) {
858 LOG(INFO) << "ro.config.low_ram=" << llkFormat(llkLowRam) << "\n"
Mark Salyzynbd7c8562018-10-31 10:02:08 -0700859 << LLK_ENABLE_SYSRQ_T_PROPERTY "=" << llkFormat(llkEnableSysrqT) << "\n"
Mark Salyzynf089e142018-02-20 10:47:40 -0800860 << LLK_ENABLE_PROPERTY "=" << llkFormat(llkEnable) << "\n"
861 << KHT_ENABLE_PROPERTY "=" << llkFormat(khtEnable) << "\n"
862 << LLK_MLOCKALL_PROPERTY "=" << llkFormat(llkMlockall) << "\n"
Mark Salyzynafd66f22018-03-19 15:16:29 -0700863 << LLK_KILLTEST_PROPERTY "=" << llkFormat(llkTestWithKill) << "\n"
Mark Salyzynf089e142018-02-20 10:47:40 -0800864 << KHT_TIMEOUT_PROPERTY "=" << llkFormat(khtTimeout) << "\n"
865 << LLK_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkTimeoutMs) << "\n"
866 << LLK_D_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkStateTimeoutMs[llkStateD]) << "\n"
867 << LLK_Z_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkStateTimeoutMs[llkStateZ]) << "\n"
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700868#ifdef __PTRACE_ENABLED__
869 << LLK_STACK_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkStateTimeoutMs[llkStateStack])
870 << "\n"
871#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800872 << LLK_CHECK_MS_PROPERTY "=" << llkFormat(llkCheckMs) << "\n"
Mark Salyzyn96505fa2018-08-07 08:13:13 -0700873#ifdef __PTRACE_ENABLED__
874 << LLK_CHECK_STACK_PROPERTY "=" << llkFormat(llkCheckStackSymbols) << "\n"
875 << LLK_BLACKLIST_STACK_PROPERTY "=" << llkFormat(llkBlacklistStack) << "\n"
876#endif
Mark Salyzynf089e142018-02-20 10:47:40 -0800877 << LLK_BLACKLIST_PROCESS_PROPERTY "=" << llkFormat(llkBlacklistProcess) << "\n"
878 << LLK_BLACKLIST_PARENT_PROPERTY "=" << llkFormat(llkBlacklistParent) << "\n"
879 << LLK_BLACKLIST_UID_PROPERTY "=" << llkFormat(llkBlacklistUid);
880}
881
882void* llkThread(void* obj) {
Mark Salyzyn4832a8b2018-08-15 11:02:18 -0700883 prctl(PR_SET_DUMPABLE, 0);
884
Mark Salyzynf089e142018-02-20 10:47:40 -0800885 LOG(INFO) << "started";
886
887 std::string name = std::to_string(::gettid());
888 if (!llkSkipName(name)) {
889 llkBlacklistProcess.emplace(name);
890 }
891 name = static_cast<const char*>(obj);
892 prctl(PR_SET_NAME, name.c_str());
893 if (__predict_false(!llkSkipName(name))) {
894 llkBlacklistProcess.insert(name);
895 }
896 // No longer modifying llkBlacklistProcess.
897 llkRunning = true;
898 llkLogConfig();
899 while (llkRunning) {
900 ::usleep(duration_cast<microseconds>(llkCheck(true)).count());
901 }
902 // NOTREACHED
903 LOG(INFO) << "exiting";
904 return nullptr;
905}
906
907} // namespace
908
909milliseconds llkCheck(bool checkRunning) {
910 if (!llkEnable || (checkRunning != llkRunning)) {
911 return milliseconds::max();
912 }
913
914 // Reset internal watchdog, which is a healthy engineering margin of
915 // double the maximum wait or cycle time for the mainloop that calls us.
916 //
917 // This alarm is effectively the live lock detection of llkd, as
918 // we understandably can not monitor ourselves otherwise.
919 ::alarm(duration_cast<seconds>(llkTimeoutMs * 2).count());
920
921 // kernel jiffy precision fastest acquisition
922 static timespec last;
923 timespec now;
924 ::clock_gettime(CLOCK_MONOTONIC_COARSE, &now);
925 auto ms = llkGetTimespecDiffMs(&last, &now);
926 if (ms < llkCycle) {
927 return llkCycle - ms;
928 }
929 last = now;
930
931 LOG(VERBOSE) << "opendir(\"" << procdir << "\")";
932 if (__predict_false(!llkTopDirectory)) {
933 // gid containing AID_READPROC required
934 llkTopDirectory.reset(procdir);
935 if (__predict_false(!llkTopDirectory)) {
936 // Most likely reason we could be here is a resource limit.
937 // Keep our processing down to a minimum, but not so low that
938 // we do not recover in a timely manner should the issue be
939 // transitory.
940 LOG(DEBUG) << "opendir(\"" << procdir << "\") failed";
941 return llkTimeoutMs;
942 }
943 }
944
945 for (auto& it : tids) {
946 it.second.updated = false;
947 }
948
949 auto prevUpdate = llkUpdate;
950 llkUpdate += ms;
951 ms -= llkCycle;
952 auto myPid = ::getpid();
953 auto myTid = ::gettid();
Mark Salyzynfbc3a752018-12-04 10:30:45 -0800954 auto dump = true;
Mark Salyzynf089e142018-02-20 10:47:40 -0800955 for (auto dp = llkTopDirectory.read(); dp != nullptr; dp = llkTopDirectory.read()) {
956 std::string piddir;
957
958 if (!getValidTidDir(dp, &piddir)) {
959 continue;
960 }
961
962 // Get the process tasks
963 std::string taskdir = piddir + "/task/";
964 int pid = -1;
965 LOG(VERBOSE) << "+opendir(\"" << taskdir << "\")";
966 dir taskDirectory(taskdir);
967 if (__predict_false(!taskDirectory)) {
968 LOG(DEBUG) << "+opendir(\"" << taskdir << "\") failed";
969 }
970 for (auto tp = taskDirectory.read(dir::task, dp); tp != nullptr;
971 tp = taskDirectory.read(dir::task)) {
972 if (!getValidTidDir(tp, &piddir)) {
973 continue;
974 }
975
976 // Get the process stat
977 std::string stat = ReadFile(piddir + "/stat");
Mark Salyzyn8a5f0812019-01-03 08:39:38 -0800978 if (stat.empty()) {
Mark Salyzynf089e142018-02-20 10:47:40 -0800979 continue;
980 }
981 unsigned tid = -1;
982 char pdir[TASK_COMM_LEN + 1];
983 char state = '?';
984 unsigned ppid = -1;
985 unsigned utime = -1;
986 unsigned stime = -1;
987 int dummy;
988 pdir[0] = '\0';
989 // tid should not change value
990 auto match = ::sscanf(
991 stat.c_str(),
992 "%u (%" ___STRING(
993 TASK_COMM_LEN) "[^)]) %c %u %*d %*d %*d %*d %*d %*d %*d %*d %*d %u %u %d",
994 &tid, pdir, &state, &ppid, &utime, &stime, &dummy);
995 if (pid == -1) {
996 pid = tid;
997 }
998 LOG(VERBOSE) << "match " << match << ' ' << tid << " (" << pdir << ") " << state << ' '
999 << ppid << " ... " << utime << ' ' << stime << ' ' << dummy;
1000 if (match != 7) {
1001 continue;
1002 }
1003
1004 auto procp = llkTidLookup(tid);
1005 if (procp == nullptr) {
1006 procp = llkTidAlloc(tid, pid, ppid, pdir, utime + stime, state);
1007 } else {
1008 // comm can change ...
1009 procp->setComm(pdir);
1010 procp->updated = true;
1011 // pid/ppid/tid wrap?
1012 if (((procp->update != prevUpdate) && (procp->update != llkUpdate)) ||
1013 (procp->ppid != ppid) || (procp->pid != pid)) {
1014 procp->reset();
1015 } else if (procp->time != (utime + stime)) { // secondary ABA.
1016 // watching utime+stime granularity jiffy
1017 procp->state = '?';
1018 }
1019 procp->update = llkUpdate;
1020 procp->pid = pid;
1021 procp->ppid = ppid;
1022 procp->time = utime + stime;
1023 if (procp->state != state) {
1024 procp->count = 0ms;
Mark Salyzynafd66f22018-03-19 15:16:29 -07001025 procp->killed = !llkTestWithKill;
Mark Salyzynf089e142018-02-20 10:47:40 -08001026 procp->state = state;
1027 } else {
1028 procp->count += llkCycle;
1029 }
1030 }
1031
1032 // Filter checks in intuitive order of CPU cost to evaluate
1033 // If tid unique continue, if ppid or pid unique break
1034
1035 if (pid == myPid) {
1036 break;
1037 }
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001038#ifdef __PTRACE_ENABLED__
1039 // if no stack monitoring, we can quickly exit here
1040 if (!llkIsMonitorState(state) && llkCheckStackSymbols.empty()) {
Mark Salyzynf089e142018-02-20 10:47:40 -08001041 continue;
1042 }
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001043#else
1044 if (!llkIsMonitorState(state)) continue;
1045#endif
Mark Salyzynf089e142018-02-20 10:47:40 -08001046 if ((tid == myTid) || llkSkipPid(tid)) {
1047 continue;
1048 }
1049 if (llkSkipPpid(ppid)) {
1050 break;
1051 }
1052
1053 if (llkSkipName(procp->getComm())) {
1054 continue;
1055 }
1056 if (llkSkipName(procp->getCmdline())) {
1057 break;
1058 }
Mark Salyzyne81ede82018-10-22 15:52:32 -07001059 if (llkSkipName(android::base::Basename(procp->getCmdline()))) {
1060 break;
1061 }
Mark Salyzynf089e142018-02-20 10:47:40 -08001062
1063 auto pprocp = llkTidLookup(ppid);
1064 if (pprocp == nullptr) {
1065 pprocp = llkTidAlloc(ppid, ppid, 0, "", 0, '?');
1066 }
Mark Salyzyn8a5f0812019-01-03 08:39:38 -08001067 if (pprocp) {
1068 if (llkSkipProc(pprocp, llkBlacklistParent)) break;
1069 } else {
1070 if (llkSkipName(std::to_string(ppid), llkBlacklistParent)) break;
Mark Salyzynf089e142018-02-20 10:47:40 -08001071 }
1072
1073 if ((llkBlacklistUid.size() != 0) && llkSkipUid(procp->getUid())) {
1074 continue;
1075 }
1076
1077 // ABA mitigation watching last time schedule activity happened
1078 llkCheckSchedUpdate(procp, piddir);
1079
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001080#ifdef __PTRACE_ENABLED__
1081 auto stuck = llkCheckStack(procp, piddir);
1082 if (llkIsMonitorState(state)) {
1083 if (procp->count >= llkStateTimeoutMs[(state == 'Z') ? llkStateZ : llkStateD]) {
1084 stuck = true;
1085 } else if (procp->count != 0ms) {
1086 LOG(VERBOSE) << state << ' ' << llkFormat(procp->count) << ' ' << ppid << "->"
1087 << pid << "->" << tid << ' ' << procp->getComm();
1088 }
1089 }
1090 if (!stuck) continue;
1091#else
1092 if (procp->count >= llkStateTimeoutMs[(state == 'Z') ? llkStateZ : llkStateD]) {
1093 if (procp->count != 0ms) {
1094 LOG(VERBOSE) << state << ' ' << llkFormat(procp->count) << ' ' << ppid << "->"
1095 << pid << "->" << tid << ' ' << procp->getComm();
1096 }
Mark Salyzynf089e142018-02-20 10:47:40 -08001097 continue;
1098 }
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001099#endif
Mark Salyzynf089e142018-02-20 10:47:40 -08001100
1101 // We have to kill it to determine difference between live lock
1102 // and persistent state blocked on a resource. Is there something
1103 // wrong with a process that has no forward scheduling progress in
1104 // Z or D? Yes, generally means improper accounting in the
1105 // process, but not always ...
1106 //
1107 // Whomever we hit with a test kill must accept the Android
1108 // Aphorism that everything can be burned to the ground and
1109 // must survive.
1110 if (procp->killed == false) {
1111 procp->killed = true;
1112 // confirm: re-read uid before committing to a panic.
1113 procp->uid = -1;
1114 switch (state) {
1115 case 'Z': // kill ppid to free up a Zombie
1116 // Killing init will kernel panic without diagnostics
1117 // so skip right to controlled kernel panic with
1118 // diagnostics.
1119 if (ppid == initPid) {
1120 break;
1121 }
1122 LOG(WARNING) << "Z " << llkFormat(procp->count) << ' ' << ppid << "->"
1123 << pid << "->" << tid << ' ' << procp->getComm() << " [kill]";
1124 if ((llkKillOneProcess(pprocp, procp) >= 0) ||
1125 (llkKillOneProcess(ppid, procp) >= 0)) {
1126 continue;
1127 }
1128 break;
1129
1130 case 'D': // kill tid to free up an uninterruptible D
1131 // If ABA is doing its job, we would not need or
1132 // want the following. Test kill is a Hail Mary
1133 // to make absolutely sure there is no forward
1134 // scheduling progress. The cost when ABA is
1135 // not working is we kill a process that likes to
1136 // stay in 'D' state, instead of panicing the
1137 // kernel (worse).
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001138 default:
1139 LOG(WARNING) << state << ' ' << llkFormat(procp->count) << ' ' << pid
1140 << "->" << tid << ' ' << procp->getComm() << " [kill]";
Mark Salyzynf089e142018-02-20 10:47:40 -08001141 if ((llkKillOneProcess(llkTidLookup(pid), procp) >= 0) ||
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001142 (llkKillOneProcess(pid, state, tid) >= 0) ||
Mark Salyzynf089e142018-02-20 10:47:40 -08001143 (llkKillOneProcess(procp, procp) >= 0) ||
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001144 (llkKillOneProcess(tid, state, tid) >= 0)) {
Mark Salyzynf089e142018-02-20 10:47:40 -08001145 continue;
1146 }
1147 break;
1148 }
1149 }
1150 // We are here because we have confirmed kernel live-lock
Mark Salyzyn3c3b14d2018-10-31 10:38:15 -07001151 const auto message = state + " "s + llkFormat(procp->count) + " " +
1152 std::to_string(ppid) + "->" + std::to_string(pid) + "->" +
1153 std::to_string(tid) + " " + procp->getComm() + " [panic]";
Mark Salyzynfbc3a752018-12-04 10:30:45 -08001154 llkPanicKernel(dump, tid,
Mark Salyzyn3c3b14d2018-10-31 10:38:15 -07001155 (state == 'Z') ? "zombie" : (state == 'D') ? "driver" : "sleeping",
1156 message);
Mark Salyzynfbc3a752018-12-04 10:30:45 -08001157 dump = false;
Mark Salyzynf089e142018-02-20 10:47:40 -08001158 }
1159 LOG(VERBOSE) << "+closedir()";
1160 }
1161 llkTopDirectory.rewind();
1162 LOG(VERBOSE) << "closedir()";
1163
1164 // garbage collection of old process references
1165 for (auto p = tids.begin(); p != tids.end();) {
1166 if (!p->second.updated) {
1167 IF_ALOG(LOG_VERBOSE, LOG_TAG) {
1168 std::string ppidCmdline = llkProcGetName(p->second.ppid, nullptr, nullptr);
Mark Salyzyn8a5f0812019-01-03 08:39:38 -08001169 if (!ppidCmdline.empty()) ppidCmdline = "(" + ppidCmdline + ")";
Mark Salyzynf089e142018-02-20 10:47:40 -08001170 std::string pidCmdline;
1171 if (p->second.pid != p->second.tid) {
1172 pidCmdline = llkProcGetName(p->second.pid, nullptr, p->second.getCmdline());
Mark Salyzyn8a5f0812019-01-03 08:39:38 -08001173 if (!pidCmdline.empty()) pidCmdline = "(" + pidCmdline + ")";
Mark Salyzynf089e142018-02-20 10:47:40 -08001174 }
1175 std::string tidCmdline =
1176 llkProcGetName(p->second.tid, p->second.getComm(), p->second.getCmdline());
Mark Salyzyn8a5f0812019-01-03 08:39:38 -08001177 if (!tidCmdline.empty()) tidCmdline = "(" + tidCmdline + ")";
Mark Salyzynf089e142018-02-20 10:47:40 -08001178 LOG(VERBOSE) << "thread " << p->second.ppid << ppidCmdline << "->" << p->second.pid
1179 << pidCmdline << "->" << p->second.tid << tidCmdline << " removed";
1180 }
1181 p = tids.erase(p);
1182 } else {
1183 ++p;
1184 }
1185 }
1186 if (__predict_false(tids.empty())) {
1187 llkTopDirectory.reset();
1188 }
1189
1190 llkCycle = llkCheckMs;
1191
1192 timespec end;
1193 ::clock_gettime(CLOCK_MONOTONIC_COARSE, &end);
1194 auto milli = llkGetTimespecDiffMs(&now, &end);
1195 LOG((milli > 10s) ? ERROR : (milli > 1s) ? WARNING : VERBOSE) << "sample " << llkFormat(milli);
1196
1197 // cap to minimum sleep for 1 second since last cycle
1198 if (llkCycle < (ms + 1s)) {
1199 return 1s;
1200 }
1201 return llkCycle - ms;
1202}
1203
1204unsigned llkCheckMilliseconds() {
1205 return duration_cast<milliseconds>(llkCheck()).count();
1206}
1207
Mark Salyzynbd7c8562018-10-31 10:02:08 -07001208bool llkCheckEng(const std::string& property) {
1209 return android::base::GetProperty(property, "eng") == "eng";
1210}
1211
Mark Salyzynf089e142018-02-20 10:47:40 -08001212bool llkInit(const char* threadname) {
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001213 auto debuggable = android::base::GetBoolProperty("ro.debuggable", false);
Mark Salyzynf089e142018-02-20 10:47:40 -08001214 llkLowRam = android::base::GetBoolProperty("ro.config.low_ram", false);
Mark Salyzynbd7c8562018-10-31 10:02:08 -07001215 llkEnableSysrqT &= !llkLowRam;
1216 if (debuggable) {
1217 llkEnableSysrqT |= llkCheckEng(LLK_ENABLE_SYSRQ_T_PROPERTY);
1218 if (!LLK_ENABLE_DEFAULT) { // NB: default is currently true ...
1219 llkEnable |= llkCheckEng(LLK_ENABLE_PROPERTY);
1220 khtEnable |= llkCheckEng(KHT_ENABLE_PROPERTY);
1221 }
Mark Salyzynd035dbb2018-03-26 08:23:00 -07001222 }
Mark Salyzynbd7c8562018-10-31 10:02:08 -07001223 llkEnableSysrqT = android::base::GetBoolProperty(LLK_ENABLE_SYSRQ_T_PROPERTY, llkEnableSysrqT);
Mark Salyzynf089e142018-02-20 10:47:40 -08001224 llkEnable = android::base::GetBoolProperty(LLK_ENABLE_PROPERTY, llkEnable);
1225 if (llkEnable && !llkTopDirectory.reset(procdir)) {
1226 // Most likely reason we could be here is llkd was started
1227 // incorrectly without the readproc permissions. Keep our
1228 // processing down to a minimum.
1229 llkEnable = false;
1230 }
1231 khtEnable = android::base::GetBoolProperty(KHT_ENABLE_PROPERTY, khtEnable);
1232 llkMlockall = android::base::GetBoolProperty(LLK_MLOCKALL_PROPERTY, llkMlockall);
Mark Salyzynafd66f22018-03-19 15:16:29 -07001233 llkTestWithKill = android::base::GetBoolProperty(LLK_KILLTEST_PROPERTY, llkTestWithKill);
Mark Salyzynf089e142018-02-20 10:47:40 -08001234 // if LLK_TIMOUT_MS_PROPERTY was not set, we will use a set
1235 // KHT_TIMEOUT_PROPERTY as co-operative guidance for the default value.
1236 khtTimeout = GetUintProperty(KHT_TIMEOUT_PROPERTY, khtTimeout);
1237 if (khtTimeout == 0s) {
1238 khtTimeout = duration_cast<seconds>(llkTimeoutMs * (1 + LLK_CHECKS_PER_TIMEOUT_DEFAULT) /
1239 LLK_CHECKS_PER_TIMEOUT_DEFAULT);
1240 }
1241 llkTimeoutMs =
1242 khtTimeout * LLK_CHECKS_PER_TIMEOUT_DEFAULT / (1 + LLK_CHECKS_PER_TIMEOUT_DEFAULT);
1243 llkTimeoutMs = GetUintProperty(LLK_TIMEOUT_MS_PROPERTY, llkTimeoutMs);
1244 llkValidate(); // validate llkTimeoutMs, llkCheckMs and llkCycle
1245 llkStateTimeoutMs[llkStateD] = GetUintProperty(LLK_D_TIMEOUT_MS_PROPERTY, llkTimeoutMs);
1246 llkStateTimeoutMs[llkStateZ] = GetUintProperty(LLK_Z_TIMEOUT_MS_PROPERTY, llkTimeoutMs);
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001247#ifdef __PTRACE_ENABLED__
1248 llkStateTimeoutMs[llkStateStack] = GetUintProperty(LLK_STACK_TIMEOUT_MS_PROPERTY, llkTimeoutMs);
1249#endif
Mark Salyzynf089e142018-02-20 10:47:40 -08001250 llkCheckMs = GetUintProperty(LLK_CHECK_MS_PROPERTY, llkCheckMs);
1251 llkValidate(); // validate all (effectively minus llkTimeoutMs)
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001252#ifdef __PTRACE_ENABLED__
1253 if (debuggable) {
Mark Salyzyn8a5f0812019-01-03 08:39:38 -08001254 llkCheckStackSymbols = llkSplit(LLK_CHECK_STACK_PROPERTY, LLK_CHECK_STACK_DEFAULT);
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001255 }
1256 std::string defaultBlacklistStack(LLK_BLACKLIST_STACK_DEFAULT);
1257 if (!debuggable) defaultBlacklistStack += ",logd,/system/bin/logd";
Mark Salyzyn8a5f0812019-01-03 08:39:38 -08001258 llkBlacklistStack = llkSplit(LLK_BLACKLIST_STACK_PROPERTY, defaultBlacklistStack);
Mark Salyzyn96505fa2018-08-07 08:13:13 -07001259#endif
Mark Salyzynf089e142018-02-20 10:47:40 -08001260 std::string defaultBlacklistProcess(
1261 std::to_string(kernelPid) + "," + std::to_string(initPid) + "," +
1262 std::to_string(kthreaddPid) + "," + std::to_string(::getpid()) + "," +
1263 std::to_string(::gettid()) + "," LLK_BLACKLIST_PROCESS_DEFAULT);
1264 if (threadname) {
Mark Salyzyn52e54a62018-08-07 08:13:13 -07001265 defaultBlacklistProcess += ","s + threadname;
Mark Salyzynf089e142018-02-20 10:47:40 -08001266 }
1267 for (int cpu = 1; cpu < get_nprocs_conf(); ++cpu) {
1268 defaultBlacklistProcess += ",[watchdog/" + std::to_string(cpu) + "]";
1269 }
Mark Salyzyn8a5f0812019-01-03 08:39:38 -08001270 llkBlacklistProcess = llkSplit(LLK_BLACKLIST_PROCESS_PROPERTY, defaultBlacklistProcess);
Mark Salyzynf089e142018-02-20 10:47:40 -08001271 if (!llkSkipName("[khungtaskd]")) { // ALWAYS ignore as special
1272 llkBlacklistProcess.emplace("[khungtaskd]");
1273 }
Mark Salyzyn8a5f0812019-01-03 08:39:38 -08001274 llkBlacklistParent = llkSplit(LLK_BLACKLIST_PARENT_PROPERTY,
1275 std::to_string(kernelPid) + "," + std::to_string(kthreaddPid) +
1276 "," LLK_BLACKLIST_PARENT_DEFAULT);
1277 llkBlacklistUid = llkSplit(LLK_BLACKLIST_UID_PROPERTY, LLK_BLACKLIST_UID_DEFAULT);
Mark Salyzynf089e142018-02-20 10:47:40 -08001278
1279 // internal watchdog
1280 ::signal(SIGALRM, llkAlarmHandler);
1281
1282 // kernel hung task configuration? Otherwise leave it as-is
1283 if (khtEnable) {
1284 // EUID must be AID_ROOT to write to /proc/sys/kernel/ nodes, there
1285 // are no capability overrides. For security reasons we do not want
1286 // to run as AID_ROOT. We may not be able to write them successfully,
1287 // we will try, but the least we can do is read the values back to
1288 // confirm expectations and report whether configured or not.
1289 auto configured = llkWriteStringToFileConfirm(std::to_string(khtTimeout.count()),
1290 "/proc/sys/kernel/hung_task_timeout_secs");
1291 if (configured) {
1292 llkWriteStringToFile("65535", "/proc/sys/kernel/hung_task_warnings");
1293 llkWriteStringToFile("65535", "/proc/sys/kernel/hung_task_check_count");
1294 configured = llkWriteStringToFileConfirm("1", "/proc/sys/kernel/hung_task_panic");
1295 }
1296 if (configured) {
1297 LOG(INFO) << "[khungtaskd] configured";
1298 } else {
1299 LOG(WARNING) << "[khungtaskd] not configurable";
1300 }
1301 }
1302
1303 bool logConfig = true;
1304 if (llkEnable) {
1305 if (llkMlockall &&
1306 // MCL_ONFAULT pins pages as they fault instead of loading
1307 // everything immediately all at once. (Which would be bad,
1308 // because as of this writing, we have a lot of mapped pages we
1309 // never use.) Old kernels will see MCL_ONFAULT and fail with
1310 // EINVAL; we ignore this failure.
1311 //
1312 // N.B. read the man page for mlockall. MCL_CURRENT | MCL_ONFAULT
1313 // pins ⊆ MCL_CURRENT, converging to just MCL_CURRENT as we fault
1314 // in pages.
1315
1316 // CAP_IPC_LOCK required
1317 mlockall(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT) && (errno != EINVAL)) {
1318 PLOG(WARNING) << "mlockall failed ";
1319 }
1320
1321 if (threadname) {
1322 pthread_attr_t attr;
1323
1324 if (!pthread_attr_init(&attr)) {
1325 sched_param param;
1326
1327 memset(&param, 0, sizeof(param));
1328 pthread_attr_setschedparam(&attr, &param);
1329 pthread_attr_setschedpolicy(&attr, SCHED_BATCH);
1330 if (!pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED)) {
1331 pthread_t thread;
1332 if (!pthread_create(&thread, &attr, llkThread, const_cast<char*>(threadname))) {
1333 // wait a second for thread to start
1334 for (auto retry = 50; retry && !llkRunning; --retry) {
1335 ::usleep(20000);
1336 }
1337 logConfig = !llkRunning; // printed in llkd context?
1338 } else {
1339 LOG(ERROR) << "failed to spawn llkd thread";
1340 }
1341 } else {
1342 LOG(ERROR) << "failed to detach llkd thread";
1343 }
1344 pthread_attr_destroy(&attr);
1345 } else {
1346 LOG(ERROR) << "failed to allocate attibutes for llkd thread";
1347 }
1348 }
1349 } else {
1350 LOG(DEBUG) << "[khungtaskd] left unconfigured";
1351 }
1352 if (logConfig) {
1353 llkLogConfig();
1354 }
1355
1356 return llkEnable;
1357}