blob: bb55d1fee5575258edb6ab29d2f084ca5e0fed26 [file] [log] [blame]
Mark Salyzynf089e142018-02-20 10:47:40 -08001/*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "llkd.h"
18
19#include <ctype.h>
20#include <dirent.h> // opendir() and readdir()
21#include <errno.h>
22#include <fcntl.h>
23#include <pthread.h>
24#include <pwd.h> // getpwuid()
25#include <signal.h>
26#include <stdint.h>
27#include <sys/cdefs.h> // ___STRING, __predict_true() and _predict_false()
28#include <sys/mman.h> // mlockall()
29#include <sys/prctl.h>
30#include <sys/stat.h> // lstat()
31#include <sys/syscall.h> // __NR_getdents64
32#include <sys/sysinfo.h> // get_nprocs_conf()
33#include <sys/types.h>
34#include <time.h>
35#include <unistd.h>
36
37#include <chrono>
38#include <ios>
39#include <sstream>
40#include <string>
41#include <unordered_map>
42#include <unordered_set>
43
44#include <android-base/file.h>
45#include <android-base/logging.h>
46#include <android-base/parseint.h>
47#include <android-base/properties.h>
48#include <android-base/strings.h>
49#include <cutils/android_get_control_file.h>
50#include <log/log_main.h>
51
52#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
53
54#define TASK_COMM_LEN 16 // internal kernel, not uapi, from .../linux/include/linux/sched.h
55
56using namespace std::chrono_literals;
57using namespace std::chrono;
Mark Salyzyn52e54a62018-08-07 08:13:13 -070058using namespace std::literals;
Mark Salyzynf089e142018-02-20 10:47:40 -080059
60namespace {
61
62constexpr pid_t kernelPid = 0;
63constexpr pid_t initPid = 1;
64constexpr pid_t kthreaddPid = 2;
65
66constexpr char procdir[] = "/proc/";
67
68// Configuration
69milliseconds llkUpdate; // last check ms signature
70milliseconds llkCycle; // ms to next thread check
71bool llkEnable = LLK_ENABLE_DEFAULT; // llk daemon enabled
72bool llkRunning = false; // thread is running
73bool llkMlockall = LLK_MLOCKALL_DEFAULT; // run mlocked
Mark Salyzynafd66f22018-03-19 15:16:29 -070074bool llkTestWithKill = LLK_KILLTEST_DEFAULT; // issue test kills
Mark Salyzynf089e142018-02-20 10:47:40 -080075milliseconds llkTimeoutMs = LLK_TIMEOUT_MS_DEFAULT; // default timeout
76enum { llkStateD, llkStateZ, llkNumStates }; // state indexes
77milliseconds llkStateTimeoutMs[llkNumStates]; // timeout override for each detection state
78milliseconds llkCheckMs; // checking interval to inspect any
79 // persistent live-locked states
80bool llkLowRam; // ro.config.low_ram
81bool khtEnable = LLK_ENABLE_DEFAULT; // [khungtaskd] panic
82// [khungtaskd] should have a timeout beyond the granularity of llkTimeoutMs.
83// Provides a wide angle of margin b/c khtTimeout is also its granularity.
84seconds khtTimeout = duration_cast<seconds>(llkTimeoutMs * (1 + LLK_CHECKS_PER_TIMEOUT_DEFAULT) /
85 LLK_CHECKS_PER_TIMEOUT_DEFAULT);
86
87// Blacklist variables, initialized with comma separated lists of high false
88// positive and/or dangerous references, e.g. without self restart, for pid,
89// ppid, name and uid:
90
91// list of pids, or tids or names to skip. kernel pid (0), init pid (1),
92// [kthreadd] pid (2), ourselves, "init", "[kthreadd]", "lmkd", "llkd" or
93// combinations of watchdogd in kernel and user space.
94std::unordered_set<std::string> llkBlacklistProcess;
95// list of parent pids, comm or cmdline names to skip. default:
96// kernel pid (0), [kthreadd] (2), or ourselves, enforced and implied
97std::unordered_set<std::string> llkBlacklistParent;
98// list of uids, and uid names, to skip, default nothing
99std::unordered_set<std::string> llkBlacklistUid;
100
101class dir {
102 public:
103 enum level { proc, task, numLevels };
104
105 private:
106 int fd;
107 size_t available_bytes;
108 dirent* next;
109 // each directory level picked to be just north of 4K in size
110 static constexpr size_t buffEntries = 15;
111 static dirent buff[numLevels][buffEntries];
112
113 bool fill(enum level index) {
114 if (index >= numLevels) return false;
115 if (available_bytes != 0) return true;
116 if (__predict_false(fd < 0)) return false;
117 // getdents64 has no libc wrapper
118 auto rc = TEMP_FAILURE_RETRY(syscall(__NR_getdents64, fd, buff[index], sizeof(buff[0]), 0));
119 if (rc <= 0) return false;
120 available_bytes = rc;
121 next = buff[index];
122 return true;
123 }
124
125 public:
126 dir() : fd(-1), available_bytes(0), next(nullptr) {}
127
128 explicit dir(const char* directory)
129 : fd(__predict_true(directory != nullptr)
130 ? ::open(directory, O_CLOEXEC | O_DIRECTORY | O_RDONLY)
131 : -1),
132 available_bytes(0),
133 next(nullptr) {}
134
135 explicit dir(const std::string&& directory)
136 : fd(::open(directory.c_str(), O_CLOEXEC | O_DIRECTORY | O_RDONLY)),
137 available_bytes(0),
138 next(nullptr) {}
139
140 explicit dir(const std::string& directory)
141 : fd(::open(directory.c_str(), O_CLOEXEC | O_DIRECTORY | O_RDONLY)),
142 available_bytes(0),
143 next(nullptr) {}
144
145 // Don't need any copy or move constructors.
146 explicit dir(const dir& c) = delete;
147 explicit dir(dir& c) = delete;
148 explicit dir(dir&& c) = delete;
149
150 ~dir() {
151 if (fd >= 0) {
152 ::close(fd);
153 }
154 }
155
156 operator bool() const { return fd >= 0; }
157
158 void reset(void) {
159 if (fd >= 0) {
160 ::close(fd);
161 fd = -1;
162 available_bytes = 0;
163 next = nullptr;
164 }
165 }
166
167 dir& reset(const char* directory) {
168 reset();
169 // available_bytes will _always_ be zero here as its value is
170 // intimately tied to fd < 0 or not.
171 fd = ::open(directory, O_CLOEXEC | O_DIRECTORY | O_RDONLY);
172 return *this;
173 }
174
175 void rewind(void) {
176 if (fd >= 0) {
177 ::lseek(fd, off_t(0), SEEK_SET);
178 available_bytes = 0;
179 next = nullptr;
180 }
181 }
182
183 dirent* read(enum level index = proc, dirent* def = nullptr) {
184 if (!fill(index)) return def;
185 auto ret = next;
186 available_bytes -= next->d_reclen;
187 next = reinterpret_cast<dirent*>(reinterpret_cast<char*>(next) + next->d_reclen);
188 return ret;
189 }
190} llkTopDirectory;
191
192dirent dir::buff[dir::numLevels][dir::buffEntries];
193
194// helper functions
195
196bool llkIsMissingExeLink(pid_t tid) {
197 char c;
198 // CAP_SYS_PTRACE is required to prevent ret == -1, but ENOENT is signal
199 auto ret = ::readlink((procdir + std::to_string(tid) + "/exe").c_str(), &c, sizeof(c));
200 return (ret == -1) && (errno == ENOENT);
201}
202
203// Common routine where caller accepts empty content as error/passthrough.
204// Reduces the churn of reporting read errors in the callers.
205std::string ReadFile(std::string&& path) {
206 std::string content;
207 if (!android::base::ReadFileToString(path, &content)) {
208 PLOG(DEBUG) << "Read " << path << " failed";
209 content = "";
210 }
211 return content;
212}
213
214std::string llkProcGetName(pid_t tid, const char* node = "/cmdline") {
215 std::string content = ReadFile(procdir + std::to_string(tid) + node);
216 static constexpr char needles[] = " \t\r\n"; // including trailing nul
217 auto pos = content.find_first_of(needles, 0, sizeof(needles));
218 if (pos != std::string::npos) {
219 content.erase(pos);
220 }
221 return content;
222}
223
224uid_t llkProcGetUid(pid_t tid) {
225 // Get the process' uid. The following read from /status is admittedly
226 // racy, prone to corruption due to shape-changes. The consequences are
227 // not catastrophic as we sample a few times before taking action.
228 //
229 // If /loginuid worked on reliably, or on Android (all tasks report -1)...
230 // Android lmkd causes /cgroup to contain memory:/<dom>/uid_<uid>/pid_<pid>
231 // which is tighter, but also not reliable.
232 std::string content = ReadFile(procdir + std::to_string(tid) + "/status");
233 static constexpr char Uid[] = "\nUid:";
234 auto pos = content.find(Uid);
235 if (pos == std::string::npos) {
236 return -1;
237 }
238 pos += ::strlen(Uid);
239 while ((pos < content.size()) && ::isblank(content[pos])) {
240 ++pos;
241 }
242 content.erase(0, pos);
243 for (pos = 0; (pos < content.size()) && ::isdigit(content[pos]); ++pos) {
244 ;
245 }
246 // Content of form 'Uid: 0 0 0 0', newline is error
247 if ((pos >= content.size()) || !::isblank(content[pos])) {
248 return -1;
249 }
250 content.erase(pos);
251 uid_t ret;
252 if (!android::base::ParseInt(content, &ret, uid_t(0))) {
253 return -1;
254 }
255 return ret;
256}
257
258struct proc {
259 pid_t tid; // monitored thread id (in Z or D state).
260 nanoseconds schedUpdate; // /proc/<tid>/sched "se.avg.lastUpdateTime",
261 uint64_t nrSwitches; // /proc/<tid>/sched "nr_switches" for
262 // refined ABA problem detection, determine
263 // forward scheduling progress.
264 milliseconds update; // llkUpdate millisecond signature of last.
265 milliseconds count; // duration in state.
266 pid_t pid; // /proc/<pid> before iterating through
267 // /proc/<pid>/task/<tid> for threads.
268 pid_t ppid; // /proc/<tid>/stat field 4 parent pid.
269 uid_t uid; // /proc/<tid>/status Uid: field.
270 unsigned time; // sum of /proc/<tid>/stat field 14 utime &
271 // 15 stime for coarse ABA problem detection.
272 std::string cmdline; // cached /cmdline content
273 char state; // /proc/<tid>/stat field 3: Z or D
274 // (others we do not monitor: S, R, T or ?)
275 char comm[TASK_COMM_LEN + 3]; // space for adding '[' and ']'
276 bool exeMissingValid; // exeMissing has been cached
277 bool cmdlineValid; // cmdline has been cached
278 bool updated; // cleared before monitoring pass.
279 bool killed; // sent a kill to this thread, next panic...
280
281 void setComm(const char* _comm) { strncpy(comm + 1, _comm, sizeof(comm) - 2); }
282
283 proc(pid_t tid, pid_t pid, pid_t ppid, const char* _comm, int time, char state)
284 : tid(tid),
285 schedUpdate(0),
286 nrSwitches(0),
287 update(llkUpdate),
Mark Salyzynacecaf72018-08-10 08:15:57 -0700288 count(0ms),
Mark Salyzynf089e142018-02-20 10:47:40 -0800289 pid(pid),
290 ppid(ppid),
291 uid(-1),
292 time(time),
293 state(state),
294 exeMissingValid(false),
295 cmdlineValid(false),
296 updated(true),
Mark Salyzynafd66f22018-03-19 15:16:29 -0700297 killed(!llkTestWithKill) {
Mark Salyzynf089e142018-02-20 10:47:40 -0800298 memset(comm, '\0', sizeof(comm));
299 setComm(_comm);
300 }
301
302 const char* getComm(void) {
303 if (comm[1] == '\0') { // comm Valid?
304 strncpy(comm + 1, llkProcGetName(tid, "/comm").c_str(), sizeof(comm) - 2);
305 }
306 if (!exeMissingValid) {
307 if (llkIsMissingExeLink(tid)) {
308 comm[0] = '[';
309 }
310 exeMissingValid = true;
311 }
312 size_t len = strlen(comm + 1);
313 if (__predict_true(len < (sizeof(comm) - 1))) {
314 if (comm[0] == '[') {
315 if ((comm[len] != ']') && __predict_true(len < (sizeof(comm) - 2))) {
316 comm[++len] = ']';
317 comm[++len] = '\0';
318 }
319 } else {
320 if (comm[len] == ']') {
321 comm[len] = '\0';
322 }
323 }
324 }
325 return &comm[comm[0] != '['];
326 }
327
328 const char* getCmdline(void) {
329 if (!cmdlineValid) {
330 cmdline = llkProcGetName(tid);
331 cmdlineValid = true;
332 }
333 return cmdline.c_str();
334 }
335
336 uid_t getUid(void) {
337 if (uid <= 0) { // Churn on root user, because most likely to setuid()
338 uid = llkProcGetUid(tid);
339 }
340 return uid;
341 }
342
343 void reset(void) { // reset cache, if we detected pid rollover
344 uid = -1;
345 state = '?';
346 cmdline = "";
347 comm[0] = '\0';
348 exeMissingValid = false;
349 cmdlineValid = false;
350 }
351};
352
353std::unordered_map<pid_t, proc> tids;
354
355// Check range and setup defaults, in order of propagation:
356// llkTimeoutMs
357// llkCheckMs
358// ...
359// KISS to keep it all self-contained, and called multiple times as parameters
360// are interpreted so that defaults, llkCheckMs and llkCycle make sense.
361void llkValidate() {
362 if (llkTimeoutMs == 0ms) {
363 llkTimeoutMs = LLK_TIMEOUT_MS_DEFAULT;
364 }
365 llkTimeoutMs = std::max(llkTimeoutMs, LLK_TIMEOUT_MS_MINIMUM);
366 if (llkCheckMs == 0ms) {
367 llkCheckMs = llkTimeoutMs / LLK_CHECKS_PER_TIMEOUT_DEFAULT;
368 }
369 llkCheckMs = std::min(llkCheckMs, llkTimeoutMs);
370
371 for (size_t state = 0; state < ARRAY_SIZE(llkStateTimeoutMs); ++state) {
372 if (llkStateTimeoutMs[state] == 0ms) {
373 llkStateTimeoutMs[state] = llkTimeoutMs;
374 }
375 llkStateTimeoutMs[state] =
376 std::min(std::max(llkStateTimeoutMs[state], LLK_TIMEOUT_MS_MINIMUM), llkTimeoutMs);
377 llkCheckMs = std::min(llkCheckMs, llkStateTimeoutMs[state]);
378 }
379
380 llkCheckMs = std::max(llkCheckMs, LLK_CHECK_MS_MINIMUM);
381 if (llkCycle == 0ms) {
382 llkCycle = llkCheckMs;
383 }
384 llkCycle = std::min(llkCycle, llkCheckMs);
385}
386
387milliseconds llkGetTimespecDiffMs(timespec* from, timespec* to) {
388 return duration_cast<milliseconds>(seconds(to->tv_sec - from->tv_sec)) +
389 duration_cast<milliseconds>(nanoseconds(to->tv_nsec - from->tv_nsec));
390}
391
392std::string llkProcGetName(pid_t tid, const char* comm, const char* cmdline) {
393 if ((cmdline != nullptr) && (*cmdline != '\0')) {
394 return cmdline;
395 }
396 if ((comm != nullptr) && (*comm != '\0')) {
397 return comm;
398 }
399
400 // UNLIKELY! Here because killed before we kill it?
401 // Assume change is afoot, do not call llkTidAlloc
402
403 // cmdline ?
404 std::string content = llkProcGetName(tid);
405 if (content.size() != 0) {
406 return content;
407 }
408 // Comm instead?
409 content = llkProcGetName(tid, "/comm");
410 if (llkIsMissingExeLink(tid) && (content.size() != 0)) {
411 return '[' + content + ']';
412 }
413 return content;
414}
415
416int llkKillOneProcess(pid_t pid, char state, pid_t tid, const char* tcomm = nullptr,
417 const char* tcmdline = nullptr, const char* pcomm = nullptr,
418 const char* pcmdline = nullptr) {
419 std::string forTid;
420 if (tid != pid) {
421 forTid = " for '" + llkProcGetName(tid, tcomm, tcmdline) + "' (" + std::to_string(tid) + ")";
422 }
423 LOG(INFO) << "Killing '" << llkProcGetName(pid, pcomm, pcmdline) << "' (" << pid
424 << ") to check forward scheduling progress in " << state << " state" << forTid;
425 // CAP_KILL required
426 errno = 0;
427 auto r = ::kill(pid, SIGKILL);
428 if (r) {
429 PLOG(ERROR) << "kill(" << pid << ")=" << r << ' ';
430 }
431
432 return r;
433}
434
435// Kill one process
436int llkKillOneProcess(pid_t pid, proc* tprocp) {
437 return llkKillOneProcess(pid, tprocp->state, tprocp->tid, tprocp->getComm(),
438 tprocp->getCmdline());
439}
440
441// Kill one process specified by kprocp
442int llkKillOneProcess(proc* kprocp, proc* tprocp) {
443 if (kprocp == nullptr) {
444 return -2;
445 }
446
447 return llkKillOneProcess(kprocp->tid, tprocp->state, tprocp->tid, tprocp->getComm(),
448 tprocp->getCmdline(), kprocp->getComm(), kprocp->getCmdline());
449}
450
451// Acquire file descriptor from environment, or open and cache it.
452// NB: cache is unnecessary in our current context, pedantically
453// required to prevent leakage of file descriptors in the future.
454int llkFileToWriteFd(const std::string& file) {
455 static std::unordered_map<std::string, int> cache;
456 auto search = cache.find(file);
457 if (search != cache.end()) return search->second;
458 auto fd = android_get_control_file(file.c_str());
459 if (fd >= 0) return fd;
460 fd = TEMP_FAILURE_RETRY(::open(file.c_str(), O_WRONLY | O_CLOEXEC));
461 if (fd >= 0) cache.emplace(std::make_pair(file, fd));
462 return fd;
463}
464
465// Wrap android::base::WriteStringToFile to use android_get_control_file.
466bool llkWriteStringToFile(const std::string& string, const std::string& file) {
467 auto fd = llkFileToWriteFd(file);
468 if (fd < 0) return false;
469 return android::base::WriteStringToFd(string, fd);
470}
471
472bool llkWriteStringToFileConfirm(const std::string& string, const std::string& file) {
473 auto fd = llkFileToWriteFd(file);
474 auto ret = (fd < 0) ? false : android::base::WriteStringToFd(string, fd);
475 std::string content;
476 if (!android::base::ReadFileToString(file, &content)) return ret;
477 return android::base::Trim(content) == string;
478}
479
Mark Salyzynafd66f22018-03-19 15:16:29 -0700480void llkPanicKernel(bool dump, pid_t tid, const char* state) __noreturn;
481void llkPanicKernel(bool dump, pid_t tid, const char* state) {
Mark Salyzynf089e142018-02-20 10:47:40 -0800482 auto sysrqTriggerFd = llkFileToWriteFd("/proc/sysrq-trigger");
483 if (sysrqTriggerFd < 0) {
484 // DYB
485 llkKillOneProcess(initPid, 'R', tid);
486 // The answer to life, the universe and everything
487 ::exit(42);
488 // NOTREACHED
489 }
490 ::sync();
491 if (dump) {
492 // Show all locks that are held
493 android::base::WriteStringToFd("d", sysrqTriggerFd);
494 // This can trigger hardware watchdog, that is somewhat _ok_.
495 // But useless if pstore configured for <256KB, low ram devices ...
496 if (!llkLowRam) {
497 android::base::WriteStringToFd("t", sysrqTriggerFd);
498 }
499 ::usleep(200000); // let everything settle
500 }
Mark Salyzyn52e54a62018-08-07 08:13:13 -0700501 llkWriteStringToFile("SysRq : Trigger a crash : 'livelock,"s + state + "'\n", "/dev/kmsg");
Mark Salyzynf089e142018-02-20 10:47:40 -0800502 android::base::WriteStringToFd("c", sysrqTriggerFd);
503 // NOTREACHED
504 // DYB
505 llkKillOneProcess(initPid, 'R', tid);
506 // I sat at my desk, stared into the garden and thought '42 will do'.
507 // I typed it out. End of story
508 ::exit(42);
509 // NOTREACHED
510}
511
512void llkAlarmHandler(int) {
Mark Salyzynafd66f22018-03-19 15:16:29 -0700513 llkPanicKernel(false, ::getpid(), "alarm");
Mark Salyzynf089e142018-02-20 10:47:40 -0800514}
515
516milliseconds GetUintProperty(const std::string& key, milliseconds def) {
517 return milliseconds(android::base::GetUintProperty(key, static_cast<uint64_t>(def.count()),
518 static_cast<uint64_t>(def.max().count())));
519}
520
521seconds GetUintProperty(const std::string& key, seconds def) {
522 return seconds(android::base::GetUintProperty(key, static_cast<uint64_t>(def.count()),
523 static_cast<uint64_t>(def.max().count())));
524}
525
526proc* llkTidLookup(pid_t tid) {
527 auto search = tids.find(tid);
528 if (search == tids.end()) {
529 return nullptr;
530 }
531 return &search->second;
532}
533
534void llkTidRemove(pid_t tid) {
535 tids.erase(tid);
536}
537
538proc* llkTidAlloc(pid_t tid, pid_t pid, pid_t ppid, const char* comm, int time, char state) {
539 auto it = tids.emplace(std::make_pair(tid, proc(tid, pid, ppid, comm, time, state)));
540 return &it.first->second;
541}
542
543std::string llkFormat(milliseconds ms) {
544 auto sec = duration_cast<seconds>(ms);
545 std::ostringstream s;
546 s << sec.count() << '.';
547 auto f = s.fill('0');
548 auto w = s.width(3);
549 s << std::right << (ms - sec).count();
550 s.width(w);
551 s.fill(f);
552 s << 's';
553 return s.str();
554}
555
556std::string llkFormat(seconds s) {
557 return std::to_string(s.count()) + 's';
558}
559
560std::string llkFormat(bool flag) {
561 return flag ? "true" : "false";
562}
563
564std::string llkFormat(const std::unordered_set<std::string>& blacklist) {
565 std::string ret;
566 for (auto entry : blacklist) {
567 if (ret.size()) {
568 ret += ",";
569 }
570 ret += entry;
571 }
572 return ret;
573}
574
575// We only officially support comma separators, but wetware being what they
576// are will take some liberty and I do not believe they should be punished.
Mark Salyzynacecaf72018-08-10 08:15:57 -0700577std::unordered_set<std::string> llkSplit(const std::string& s) {
Mark Salyzynf089e142018-02-20 10:47:40 -0800578 std::unordered_set<std::string> result;
579
Mark Salyzynacecaf72018-08-10 08:15:57 -0700580 // Special case, allow boolean false to empty the list, otherwise expected
581 // source of input from android::base::GetProperty will supply the default
582 // value on empty content in the property.
583 if (s == "false") return result;
584
Mark Salyzynf089e142018-02-20 10:47:40 -0800585 size_t base = 0;
Mark Salyzynacecaf72018-08-10 08:15:57 -0700586 while (s.size() > base) {
587 auto found = s.find_first_of(", \t:", base);
588 // Only emplace content, empty entries are not an option
589 if (found != base) result.emplace(s.substr(base, found - base));
Mark Salyzynf089e142018-02-20 10:47:40 -0800590 if (found == s.npos) break;
591 base = found + 1;
592 }
593 return result;
594}
595
596bool llkSkipName(const std::string& name,
597 const std::unordered_set<std::string>& blacklist = llkBlacklistProcess) {
598 if ((name.size() == 0) || (blacklist.size() == 0)) {
599 return false;
600 }
601
602 return blacklist.find(name) != blacklist.end();
603}
604
605bool llkSkipPid(pid_t pid) {
606 return llkSkipName(std::to_string(pid), llkBlacklistProcess);
607}
608
609bool llkSkipPpid(pid_t ppid) {
610 return llkSkipName(std::to_string(ppid), llkBlacklistParent);
611}
612
613bool llkSkipUid(uid_t uid) {
614 // Match by number?
615 if (llkSkipName(std::to_string(uid), llkBlacklistUid)) {
616 return true;
617 }
618
619 // Match by name?
620 auto pwd = ::getpwuid(uid);
621 return (pwd != nullptr) && __predict_true(pwd->pw_name != nullptr) &&
622 __predict_true(pwd->pw_name[0] != '\0') && llkSkipName(pwd->pw_name, llkBlacklistUid);
623}
624
625bool getValidTidDir(dirent* dp, std::string* piddir) {
626 if (!::isdigit(dp->d_name[0])) {
627 return false;
628 }
629
630 // Corner case can not happen in reality b/c of above ::isdigit check
631 if (__predict_false(dp->d_type != DT_DIR)) {
632 if (__predict_false(dp->d_type == DT_UNKNOWN)) { // can't b/c procfs
633 struct stat st;
634 *piddir = procdir;
635 *piddir += dp->d_name;
636 return (lstat(piddir->c_str(), &st) == 0) && (st.st_mode & S_IFDIR);
637 }
638 return false;
639 }
640
641 *piddir = procdir;
642 *piddir += dp->d_name;
643 return true;
644}
645
646bool llkIsMonitorState(char state) {
647 return (state == 'Z') || (state == 'D');
648}
649
650// returns -1 if not found
651long long getSchedValue(const std::string& schedString, const char* key) {
652 auto pos = schedString.find(key);
653 if (pos == std::string::npos) {
654 return -1;
655 }
656 pos = schedString.find(':', pos);
657 if (__predict_false(pos == std::string::npos)) {
658 return -1;
659 }
660 while ((++pos < schedString.size()) && ::isblank(schedString[pos])) {
661 ;
662 }
663 long long ret;
664 if (!android::base::ParseInt(schedString.substr(pos), &ret, static_cast<long long>(0))) {
665 return -1;
666 }
667 return ret;
668}
669
670// Primary ABA mitigation watching last time schedule activity happened
671void llkCheckSchedUpdate(proc* procp, const std::string& piddir) {
672 // Audit finds /proc/<tid>/sched is just over 1K, and
673 // is rarely larger than 2K, even less on Android.
674 // For example, the "se.avg.lastUpdateTime" field we are
675 // interested in typically within the primary set in
676 // the first 1K.
677 //
678 // Proc entries can not be read >1K atomically via libbase,
679 // but if there are problems we assume at least a few
680 // samples of reads occur before we take any real action.
681 std::string schedString = ReadFile(piddir + "/sched");
682 if (schedString.size() == 0) {
683 // /schedstat is not as standardized, but in 3.1+
684 // Android devices, the third field is nr_switches
685 // from /sched:
686 schedString = ReadFile(piddir + "/schedstat");
687 if (schedString.size() == 0) {
688 return;
689 }
690 auto val = static_cast<unsigned long long>(-1);
691 if (((::sscanf(schedString.c_str(), "%*d %*d %llu", &val)) == 1) &&
692 (val != static_cast<unsigned long long>(-1)) && (val != 0) &&
693 (val != procp->nrSwitches)) {
694 procp->nrSwitches = val;
695 procp->count = 0ms;
Mark Salyzynafd66f22018-03-19 15:16:29 -0700696 procp->killed = !llkTestWithKill;
Mark Salyzynf089e142018-02-20 10:47:40 -0800697 }
698 return;
699 }
700
701 auto val = getSchedValue(schedString, "\nse.avg.lastUpdateTime");
702 if (val == -1) {
703 val = getSchedValue(schedString, "\nse.svg.last_update_time");
704 }
705 if (val != -1) {
706 auto schedUpdate = nanoseconds(val);
707 if (schedUpdate != procp->schedUpdate) {
708 procp->schedUpdate = schedUpdate;
709 procp->count = 0ms;
Mark Salyzynafd66f22018-03-19 15:16:29 -0700710 procp->killed = !llkTestWithKill;
Mark Salyzynf089e142018-02-20 10:47:40 -0800711 }
712 }
713
714 val = getSchedValue(schedString, "\nnr_switches");
715 if (val != -1) {
716 if (static_cast<uint64_t>(val) != procp->nrSwitches) {
717 procp->nrSwitches = val;
718 procp->count = 0ms;
Mark Salyzynafd66f22018-03-19 15:16:29 -0700719 procp->killed = !llkTestWithKill;
Mark Salyzynf089e142018-02-20 10:47:40 -0800720 }
721 }
722}
723
724void llkLogConfig(void) {
725 LOG(INFO) << "ro.config.low_ram=" << llkFormat(llkLowRam) << "\n"
726 << LLK_ENABLE_PROPERTY "=" << llkFormat(llkEnable) << "\n"
727 << KHT_ENABLE_PROPERTY "=" << llkFormat(khtEnable) << "\n"
728 << LLK_MLOCKALL_PROPERTY "=" << llkFormat(llkMlockall) << "\n"
Mark Salyzynafd66f22018-03-19 15:16:29 -0700729 << LLK_KILLTEST_PROPERTY "=" << llkFormat(llkTestWithKill) << "\n"
Mark Salyzynf089e142018-02-20 10:47:40 -0800730 << KHT_TIMEOUT_PROPERTY "=" << llkFormat(khtTimeout) << "\n"
731 << LLK_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkTimeoutMs) << "\n"
732 << LLK_D_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkStateTimeoutMs[llkStateD]) << "\n"
733 << LLK_Z_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkStateTimeoutMs[llkStateZ]) << "\n"
734 << LLK_CHECK_MS_PROPERTY "=" << llkFormat(llkCheckMs) << "\n"
735 << LLK_BLACKLIST_PROCESS_PROPERTY "=" << llkFormat(llkBlacklistProcess) << "\n"
736 << LLK_BLACKLIST_PARENT_PROPERTY "=" << llkFormat(llkBlacklistParent) << "\n"
737 << LLK_BLACKLIST_UID_PROPERTY "=" << llkFormat(llkBlacklistUid);
738}
739
740void* llkThread(void* obj) {
Mark Salyzyn4832a8b2018-08-15 11:02:18 -0700741 prctl(PR_SET_DUMPABLE, 0);
742
Mark Salyzynf089e142018-02-20 10:47:40 -0800743 LOG(INFO) << "started";
744
745 std::string name = std::to_string(::gettid());
746 if (!llkSkipName(name)) {
747 llkBlacklistProcess.emplace(name);
748 }
749 name = static_cast<const char*>(obj);
750 prctl(PR_SET_NAME, name.c_str());
751 if (__predict_false(!llkSkipName(name))) {
752 llkBlacklistProcess.insert(name);
753 }
754 // No longer modifying llkBlacklistProcess.
755 llkRunning = true;
756 llkLogConfig();
757 while (llkRunning) {
758 ::usleep(duration_cast<microseconds>(llkCheck(true)).count());
759 }
760 // NOTREACHED
761 LOG(INFO) << "exiting";
762 return nullptr;
763}
764
765} // namespace
766
767milliseconds llkCheck(bool checkRunning) {
768 if (!llkEnable || (checkRunning != llkRunning)) {
769 return milliseconds::max();
770 }
771
772 // Reset internal watchdog, which is a healthy engineering margin of
773 // double the maximum wait or cycle time for the mainloop that calls us.
774 //
775 // This alarm is effectively the live lock detection of llkd, as
776 // we understandably can not monitor ourselves otherwise.
777 ::alarm(duration_cast<seconds>(llkTimeoutMs * 2).count());
778
779 // kernel jiffy precision fastest acquisition
780 static timespec last;
781 timespec now;
782 ::clock_gettime(CLOCK_MONOTONIC_COARSE, &now);
783 auto ms = llkGetTimespecDiffMs(&last, &now);
784 if (ms < llkCycle) {
785 return llkCycle - ms;
786 }
787 last = now;
788
789 LOG(VERBOSE) << "opendir(\"" << procdir << "\")";
790 if (__predict_false(!llkTopDirectory)) {
791 // gid containing AID_READPROC required
792 llkTopDirectory.reset(procdir);
793 if (__predict_false(!llkTopDirectory)) {
794 // Most likely reason we could be here is a resource limit.
795 // Keep our processing down to a minimum, but not so low that
796 // we do not recover in a timely manner should the issue be
797 // transitory.
798 LOG(DEBUG) << "opendir(\"" << procdir << "\") failed";
799 return llkTimeoutMs;
800 }
801 }
802
803 for (auto& it : tids) {
804 it.second.updated = false;
805 }
806
807 auto prevUpdate = llkUpdate;
808 llkUpdate += ms;
809 ms -= llkCycle;
810 auto myPid = ::getpid();
811 auto myTid = ::gettid();
812 for (auto dp = llkTopDirectory.read(); dp != nullptr; dp = llkTopDirectory.read()) {
813 std::string piddir;
814
815 if (!getValidTidDir(dp, &piddir)) {
816 continue;
817 }
818
819 // Get the process tasks
820 std::string taskdir = piddir + "/task/";
821 int pid = -1;
822 LOG(VERBOSE) << "+opendir(\"" << taskdir << "\")";
823 dir taskDirectory(taskdir);
824 if (__predict_false(!taskDirectory)) {
825 LOG(DEBUG) << "+opendir(\"" << taskdir << "\") failed";
826 }
827 for (auto tp = taskDirectory.read(dir::task, dp); tp != nullptr;
828 tp = taskDirectory.read(dir::task)) {
829 if (!getValidTidDir(tp, &piddir)) {
830 continue;
831 }
832
833 // Get the process stat
834 std::string stat = ReadFile(piddir + "/stat");
835 if (stat.size() == 0) {
836 continue;
837 }
838 unsigned tid = -1;
839 char pdir[TASK_COMM_LEN + 1];
840 char state = '?';
841 unsigned ppid = -1;
842 unsigned utime = -1;
843 unsigned stime = -1;
844 int dummy;
845 pdir[0] = '\0';
846 // tid should not change value
847 auto match = ::sscanf(
848 stat.c_str(),
849 "%u (%" ___STRING(
850 TASK_COMM_LEN) "[^)]) %c %u %*d %*d %*d %*d %*d %*d %*d %*d %*d %u %u %d",
851 &tid, pdir, &state, &ppid, &utime, &stime, &dummy);
852 if (pid == -1) {
853 pid = tid;
854 }
855 LOG(VERBOSE) << "match " << match << ' ' << tid << " (" << pdir << ") " << state << ' '
856 << ppid << " ... " << utime << ' ' << stime << ' ' << dummy;
857 if (match != 7) {
858 continue;
859 }
860
861 auto procp = llkTidLookup(tid);
862 if (procp == nullptr) {
863 procp = llkTidAlloc(tid, pid, ppid, pdir, utime + stime, state);
864 } else {
865 // comm can change ...
866 procp->setComm(pdir);
867 procp->updated = true;
868 // pid/ppid/tid wrap?
869 if (((procp->update != prevUpdate) && (procp->update != llkUpdate)) ||
870 (procp->ppid != ppid) || (procp->pid != pid)) {
871 procp->reset();
872 } else if (procp->time != (utime + stime)) { // secondary ABA.
873 // watching utime+stime granularity jiffy
874 procp->state = '?';
875 }
876 procp->update = llkUpdate;
877 procp->pid = pid;
878 procp->ppid = ppid;
879 procp->time = utime + stime;
880 if (procp->state != state) {
881 procp->count = 0ms;
Mark Salyzynafd66f22018-03-19 15:16:29 -0700882 procp->killed = !llkTestWithKill;
Mark Salyzynf089e142018-02-20 10:47:40 -0800883 procp->state = state;
884 } else {
885 procp->count += llkCycle;
886 }
887 }
888
889 // Filter checks in intuitive order of CPU cost to evaluate
890 // If tid unique continue, if ppid or pid unique break
891
892 if (pid == myPid) {
893 break;
894 }
895 if (!llkIsMonitorState(state)) {
896 continue;
897 }
898 if ((tid == myTid) || llkSkipPid(tid)) {
899 continue;
900 }
901 if (llkSkipPpid(ppid)) {
902 break;
903 }
904
905 if (llkSkipName(procp->getComm())) {
906 continue;
907 }
908 if (llkSkipName(procp->getCmdline())) {
909 break;
910 }
911
912 auto pprocp = llkTidLookup(ppid);
913 if (pprocp == nullptr) {
914 pprocp = llkTidAlloc(ppid, ppid, 0, "", 0, '?');
915 }
916 if ((pprocp != nullptr) && (llkSkipName(pprocp->getComm(), llkBlacklistParent) ||
917 llkSkipName(pprocp->getCmdline(), llkBlacklistParent))) {
918 break;
919 }
920
921 if ((llkBlacklistUid.size() != 0) && llkSkipUid(procp->getUid())) {
922 continue;
923 }
924
925 // ABA mitigation watching last time schedule activity happened
926 llkCheckSchedUpdate(procp, piddir);
927
928 // Can only fall through to here if registered D or Z state !!!
929 if (procp->count < llkStateTimeoutMs[(state == 'Z') ? llkStateZ : llkStateD]) {
930 LOG(VERBOSE) << state << ' ' << llkFormat(procp->count) << ' ' << ppid << "->"
931 << pid << "->" << tid << ' ' << procp->getComm();
932 continue;
933 }
934
935 // We have to kill it to determine difference between live lock
936 // and persistent state blocked on a resource. Is there something
937 // wrong with a process that has no forward scheduling progress in
938 // Z or D? Yes, generally means improper accounting in the
939 // process, but not always ...
940 //
941 // Whomever we hit with a test kill must accept the Android
942 // Aphorism that everything can be burned to the ground and
943 // must survive.
944 if (procp->killed == false) {
945 procp->killed = true;
946 // confirm: re-read uid before committing to a panic.
947 procp->uid = -1;
948 switch (state) {
949 case 'Z': // kill ppid to free up a Zombie
950 // Killing init will kernel panic without diagnostics
951 // so skip right to controlled kernel panic with
952 // diagnostics.
953 if (ppid == initPid) {
954 break;
955 }
956 LOG(WARNING) << "Z " << llkFormat(procp->count) << ' ' << ppid << "->"
957 << pid << "->" << tid << ' ' << procp->getComm() << " [kill]";
958 if ((llkKillOneProcess(pprocp, procp) >= 0) ||
959 (llkKillOneProcess(ppid, procp) >= 0)) {
960 continue;
961 }
962 break;
963
964 case 'D': // kill tid to free up an uninterruptible D
965 // If ABA is doing its job, we would not need or
966 // want the following. Test kill is a Hail Mary
967 // to make absolutely sure there is no forward
968 // scheduling progress. The cost when ABA is
969 // not working is we kill a process that likes to
970 // stay in 'D' state, instead of panicing the
971 // kernel (worse).
972 LOG(WARNING) << "D " << llkFormat(procp->count) << ' ' << pid << "->" << tid
973 << ' ' << procp->getComm() << " [kill]";
974 if ((llkKillOneProcess(llkTidLookup(pid), procp) >= 0) ||
975 (llkKillOneProcess(pid, 'D', tid) >= 0) ||
976 (llkKillOneProcess(procp, procp) >= 0) ||
977 (llkKillOneProcess(tid, 'D', tid) >= 0)) {
978 continue;
979 }
980 break;
981 }
982 }
983 // We are here because we have confirmed kernel live-lock
984 LOG(ERROR) << state << ' ' << llkFormat(procp->count) << ' ' << ppid << "->" << pid
985 << "->" << tid << ' ' << procp->getComm() << " [panic]";
Mark Salyzynafd66f22018-03-19 15:16:29 -0700986 llkPanicKernel(true, tid, (state == 'Z') ? "zombie" : "driver");
Mark Salyzynf089e142018-02-20 10:47:40 -0800987 }
988 LOG(VERBOSE) << "+closedir()";
989 }
990 llkTopDirectory.rewind();
991 LOG(VERBOSE) << "closedir()";
992
993 // garbage collection of old process references
994 for (auto p = tids.begin(); p != tids.end();) {
995 if (!p->second.updated) {
996 IF_ALOG(LOG_VERBOSE, LOG_TAG) {
997 std::string ppidCmdline = llkProcGetName(p->second.ppid, nullptr, nullptr);
998 if (ppidCmdline.size()) {
999 ppidCmdline = "(" + ppidCmdline + ")";
1000 }
1001 std::string pidCmdline;
1002 if (p->second.pid != p->second.tid) {
1003 pidCmdline = llkProcGetName(p->second.pid, nullptr, p->second.getCmdline());
1004 if (pidCmdline.size()) {
1005 pidCmdline = "(" + pidCmdline + ")";
1006 }
1007 }
1008 std::string tidCmdline =
1009 llkProcGetName(p->second.tid, p->second.getComm(), p->second.getCmdline());
1010 if (tidCmdline.size()) {
1011 tidCmdline = "(" + tidCmdline + ")";
1012 }
1013 LOG(VERBOSE) << "thread " << p->second.ppid << ppidCmdline << "->" << p->second.pid
1014 << pidCmdline << "->" << p->second.tid << tidCmdline << " removed";
1015 }
1016 p = tids.erase(p);
1017 } else {
1018 ++p;
1019 }
1020 }
1021 if (__predict_false(tids.empty())) {
1022 llkTopDirectory.reset();
1023 }
1024
1025 llkCycle = llkCheckMs;
1026
1027 timespec end;
1028 ::clock_gettime(CLOCK_MONOTONIC_COARSE, &end);
1029 auto milli = llkGetTimespecDiffMs(&now, &end);
1030 LOG((milli > 10s) ? ERROR : (milli > 1s) ? WARNING : VERBOSE) << "sample " << llkFormat(milli);
1031
1032 // cap to minimum sleep for 1 second since last cycle
1033 if (llkCycle < (ms + 1s)) {
1034 return 1s;
1035 }
1036 return llkCycle - ms;
1037}
1038
1039unsigned llkCheckMilliseconds() {
1040 return duration_cast<milliseconds>(llkCheck()).count();
1041}
1042
1043bool llkInit(const char* threadname) {
1044 llkLowRam = android::base::GetBoolProperty("ro.config.low_ram", false);
Mark Salyzynd035dbb2018-03-26 08:23:00 -07001045 if (!LLK_ENABLE_DEFAULT && android::base::GetBoolProperty("ro.debuggable", false)) {
1046 llkEnable = android::base::GetProperty(LLK_ENABLE_PROPERTY, "eng") == "eng";
1047 khtEnable = android::base::GetProperty(KHT_ENABLE_PROPERTY, "eng") == "eng";
1048 }
Mark Salyzynf089e142018-02-20 10:47:40 -08001049 llkEnable = android::base::GetBoolProperty(LLK_ENABLE_PROPERTY, llkEnable);
1050 if (llkEnable && !llkTopDirectory.reset(procdir)) {
1051 // Most likely reason we could be here is llkd was started
1052 // incorrectly without the readproc permissions. Keep our
1053 // processing down to a minimum.
1054 llkEnable = false;
1055 }
1056 khtEnable = android::base::GetBoolProperty(KHT_ENABLE_PROPERTY, khtEnable);
1057 llkMlockall = android::base::GetBoolProperty(LLK_MLOCKALL_PROPERTY, llkMlockall);
Mark Salyzynafd66f22018-03-19 15:16:29 -07001058 llkTestWithKill = android::base::GetBoolProperty(LLK_KILLTEST_PROPERTY, llkTestWithKill);
Mark Salyzynf089e142018-02-20 10:47:40 -08001059 // if LLK_TIMOUT_MS_PROPERTY was not set, we will use a set
1060 // KHT_TIMEOUT_PROPERTY as co-operative guidance for the default value.
1061 khtTimeout = GetUintProperty(KHT_TIMEOUT_PROPERTY, khtTimeout);
1062 if (khtTimeout == 0s) {
1063 khtTimeout = duration_cast<seconds>(llkTimeoutMs * (1 + LLK_CHECKS_PER_TIMEOUT_DEFAULT) /
1064 LLK_CHECKS_PER_TIMEOUT_DEFAULT);
1065 }
1066 llkTimeoutMs =
1067 khtTimeout * LLK_CHECKS_PER_TIMEOUT_DEFAULT / (1 + LLK_CHECKS_PER_TIMEOUT_DEFAULT);
1068 llkTimeoutMs = GetUintProperty(LLK_TIMEOUT_MS_PROPERTY, llkTimeoutMs);
1069 llkValidate(); // validate llkTimeoutMs, llkCheckMs and llkCycle
1070 llkStateTimeoutMs[llkStateD] = GetUintProperty(LLK_D_TIMEOUT_MS_PROPERTY, llkTimeoutMs);
1071 llkStateTimeoutMs[llkStateZ] = GetUintProperty(LLK_Z_TIMEOUT_MS_PROPERTY, llkTimeoutMs);
1072 llkCheckMs = GetUintProperty(LLK_CHECK_MS_PROPERTY, llkCheckMs);
1073 llkValidate(); // validate all (effectively minus llkTimeoutMs)
1074 std::string defaultBlacklistProcess(
1075 std::to_string(kernelPid) + "," + std::to_string(initPid) + "," +
1076 std::to_string(kthreaddPid) + "," + std::to_string(::getpid()) + "," +
1077 std::to_string(::gettid()) + "," LLK_BLACKLIST_PROCESS_DEFAULT);
1078 if (threadname) {
Mark Salyzyn52e54a62018-08-07 08:13:13 -07001079 defaultBlacklistProcess += ","s + threadname;
Mark Salyzynf089e142018-02-20 10:47:40 -08001080 }
1081 for (int cpu = 1; cpu < get_nprocs_conf(); ++cpu) {
1082 defaultBlacklistProcess += ",[watchdog/" + std::to_string(cpu) + "]";
1083 }
1084 defaultBlacklistProcess =
1085 android::base::GetProperty(LLK_BLACKLIST_PROCESS_PROPERTY, defaultBlacklistProcess);
1086 llkBlacklistProcess = llkSplit(defaultBlacklistProcess);
1087 if (!llkSkipName("[khungtaskd]")) { // ALWAYS ignore as special
1088 llkBlacklistProcess.emplace("[khungtaskd]");
1089 }
1090 llkBlacklistParent = llkSplit(android::base::GetProperty(
1091 LLK_BLACKLIST_PARENT_PROPERTY, std::to_string(kernelPid) + "," + std::to_string(kthreaddPid) +
1092 "," LLK_BLACKLIST_PARENT_DEFAULT));
1093 llkBlacklistUid =
1094 llkSplit(android::base::GetProperty(LLK_BLACKLIST_UID_PROPERTY, LLK_BLACKLIST_UID_DEFAULT));
1095
1096 // internal watchdog
1097 ::signal(SIGALRM, llkAlarmHandler);
1098
1099 // kernel hung task configuration? Otherwise leave it as-is
1100 if (khtEnable) {
1101 // EUID must be AID_ROOT to write to /proc/sys/kernel/ nodes, there
1102 // are no capability overrides. For security reasons we do not want
1103 // to run as AID_ROOT. We may not be able to write them successfully,
1104 // we will try, but the least we can do is read the values back to
1105 // confirm expectations and report whether configured or not.
1106 auto configured = llkWriteStringToFileConfirm(std::to_string(khtTimeout.count()),
1107 "/proc/sys/kernel/hung_task_timeout_secs");
1108 if (configured) {
1109 llkWriteStringToFile("65535", "/proc/sys/kernel/hung_task_warnings");
1110 llkWriteStringToFile("65535", "/proc/sys/kernel/hung_task_check_count");
1111 configured = llkWriteStringToFileConfirm("1", "/proc/sys/kernel/hung_task_panic");
1112 }
1113 if (configured) {
1114 LOG(INFO) << "[khungtaskd] configured";
1115 } else {
1116 LOG(WARNING) << "[khungtaskd] not configurable";
1117 }
1118 }
1119
1120 bool logConfig = true;
1121 if (llkEnable) {
1122 if (llkMlockall &&
1123 // MCL_ONFAULT pins pages as they fault instead of loading
1124 // everything immediately all at once. (Which would be bad,
1125 // because as of this writing, we have a lot of mapped pages we
1126 // never use.) Old kernels will see MCL_ONFAULT and fail with
1127 // EINVAL; we ignore this failure.
1128 //
1129 // N.B. read the man page for mlockall. MCL_CURRENT | MCL_ONFAULT
1130 // pins ⊆ MCL_CURRENT, converging to just MCL_CURRENT as we fault
1131 // in pages.
1132
1133 // CAP_IPC_LOCK required
1134 mlockall(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT) && (errno != EINVAL)) {
1135 PLOG(WARNING) << "mlockall failed ";
1136 }
1137
1138 if (threadname) {
1139 pthread_attr_t attr;
1140
1141 if (!pthread_attr_init(&attr)) {
1142 sched_param param;
1143
1144 memset(&param, 0, sizeof(param));
1145 pthread_attr_setschedparam(&attr, &param);
1146 pthread_attr_setschedpolicy(&attr, SCHED_BATCH);
1147 if (!pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED)) {
1148 pthread_t thread;
1149 if (!pthread_create(&thread, &attr, llkThread, const_cast<char*>(threadname))) {
1150 // wait a second for thread to start
1151 for (auto retry = 50; retry && !llkRunning; --retry) {
1152 ::usleep(20000);
1153 }
1154 logConfig = !llkRunning; // printed in llkd context?
1155 } else {
1156 LOG(ERROR) << "failed to spawn llkd thread";
1157 }
1158 } else {
1159 LOG(ERROR) << "failed to detach llkd thread";
1160 }
1161 pthread_attr_destroy(&attr);
1162 } else {
1163 LOG(ERROR) << "failed to allocate attibutes for llkd thread";
1164 }
1165 }
1166 } else {
1167 LOG(DEBUG) << "[khungtaskd] left unconfigured";
1168 }
1169 if (logConfig) {
1170 llkLogConfig();
1171 }
1172
1173 return llkEnable;
1174}