| /* | 
 |  * Copyright (C) 2018 The Android Open Source Project | 
 |  * | 
 |  * Licensed under the Apache License, Version 2.0 (the "License"); | 
 |  * you may not use this file except in compliance with the License. | 
 |  * You may obtain a copy of the License at | 
 |  * | 
 |  *      http://www.apache.org/licenses/LICENSE-2.0 | 
 |  * | 
 |  * Unless required by applicable law or agreed to in writing, software | 
 |  * distributed under the License is distributed on an "AS IS" BASIS, | 
 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
 |  * See the License for the specific language governing permissions and | 
 |  * limitations under the License. | 
 |  */ | 
 |  | 
 | #include "llkd.h" | 
 |  | 
 | #include <ctype.h> | 
 | #include <dirent.h>  // opendir() and readdir() | 
 | #include <errno.h> | 
 | #include <fcntl.h> | 
 | #include <pthread.h> | 
 | #include <pwd.h>  // getpwuid() | 
 | #include <signal.h> | 
 | #include <stdint.h> | 
 | #include <sys/cdefs.h>  // ___STRING, __predict_true() and _predict_false() | 
 | #include <sys/mman.h>   // mlockall() | 
 | #include <sys/prctl.h> | 
 | #include <sys/stat.h>     // lstat() | 
 | #include <sys/syscall.h>  // __NR_getdents64 | 
 | #include <sys/sysinfo.h>  // get_nprocs_conf() | 
 | #include <sys/types.h> | 
 | #include <time.h> | 
 | #include <unistd.h> | 
 |  | 
 | #include <chrono> | 
 | #include <ios> | 
 | #include <sstream> | 
 | #include <string> | 
 | #include <unordered_map> | 
 | #include <unordered_set> | 
 |  | 
 | #include <android-base/file.h> | 
 | #include <android-base/logging.h> | 
 | #include <android-base/parseint.h> | 
 | #include <android-base/properties.h> | 
 | #include <android-base/strings.h> | 
 | #include <cutils/android_get_control_file.h> | 
 | #include <log/log_main.h> | 
 |  | 
 | #define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) | 
 |  | 
 | #define TASK_COMM_LEN 16  // internal kernel, not uapi, from .../linux/include/linux/sched.h | 
 |  | 
 | using namespace std::chrono_literals; | 
 | using namespace std::chrono; | 
 |  | 
 | namespace { | 
 |  | 
 | constexpr pid_t kernelPid = 0; | 
 | constexpr pid_t initPid = 1; | 
 | constexpr pid_t kthreaddPid = 2; | 
 |  | 
 | constexpr char procdir[] = "/proc/"; | 
 |  | 
 | // Configuration | 
 | milliseconds llkUpdate;                              // last check ms signature | 
 | milliseconds llkCycle;                               // ms to next thread check | 
 | bool llkEnable = LLK_ENABLE_DEFAULT;                 // llk daemon enabled | 
 | bool llkRunning = false;                             // thread is running | 
 | bool llkMlockall = LLK_MLOCKALL_DEFAULT;             // run mlocked | 
 | bool llkTestWithKill = LLK_KILLTEST_DEFAULT;         // issue test kills | 
 | milliseconds llkTimeoutMs = LLK_TIMEOUT_MS_DEFAULT;  // default timeout | 
 | enum { llkStateD, llkStateZ, llkNumStates };         // state indexes | 
 | milliseconds llkStateTimeoutMs[llkNumStates];        // timeout override for each detection state | 
 | milliseconds llkCheckMs;                             // checking interval to inspect any | 
 |                                                      // persistent live-locked states | 
 | bool llkLowRam;                                      // ro.config.low_ram | 
 | bool khtEnable = LLK_ENABLE_DEFAULT;                 // [khungtaskd] panic | 
 | // [khungtaskd] should have a timeout beyond the granularity of llkTimeoutMs. | 
 | // Provides a wide angle of margin b/c khtTimeout is also its granularity. | 
 | seconds khtTimeout = duration_cast<seconds>(llkTimeoutMs * (1 + LLK_CHECKS_PER_TIMEOUT_DEFAULT) / | 
 |                                             LLK_CHECKS_PER_TIMEOUT_DEFAULT); | 
 |  | 
 | // Blacklist variables, initialized with comma separated lists of high false | 
 | // positive and/or dangerous references, e.g. without self restart, for pid, | 
 | // ppid, name and uid: | 
 |  | 
 | // list of pids, or tids or names to skip. kernel pid (0), init pid (1), | 
 | // [kthreadd] pid (2), ourselves, "init", "[kthreadd]", "lmkd", "llkd" or | 
 | // combinations of watchdogd in kernel and user space. | 
 | std::unordered_set<std::string> llkBlacklistProcess; | 
 | // list of parent pids, comm or cmdline names to skip. default: | 
 | // kernel pid (0), [kthreadd] (2), or ourselves, enforced and implied | 
 | std::unordered_set<std::string> llkBlacklistParent; | 
 | // list of uids, and uid names, to skip, default nothing | 
 | std::unordered_set<std::string> llkBlacklistUid; | 
 |  | 
 | class dir { | 
 |   public: | 
 |     enum level { proc, task, numLevels }; | 
 |  | 
 |   private: | 
 |     int fd; | 
 |     size_t available_bytes; | 
 |     dirent* next; | 
 |     // each directory level picked to be just north of 4K in size | 
 |     static constexpr size_t buffEntries = 15; | 
 |     static dirent buff[numLevels][buffEntries]; | 
 |  | 
 |     bool fill(enum level index) { | 
 |         if (index >= numLevels) return false; | 
 |         if (available_bytes != 0) return true; | 
 |         if (__predict_false(fd < 0)) return false; | 
 |         // getdents64 has no libc wrapper | 
 |         auto rc = TEMP_FAILURE_RETRY(syscall(__NR_getdents64, fd, buff[index], sizeof(buff[0]), 0)); | 
 |         if (rc <= 0) return false; | 
 |         available_bytes = rc; | 
 |         next = buff[index]; | 
 |         return true; | 
 |     } | 
 |  | 
 |   public: | 
 |     dir() : fd(-1), available_bytes(0), next(nullptr) {} | 
 |  | 
 |     explicit dir(const char* directory) | 
 |         : fd(__predict_true(directory != nullptr) | 
 |                  ? ::open(directory, O_CLOEXEC | O_DIRECTORY | O_RDONLY) | 
 |                  : -1), | 
 |           available_bytes(0), | 
 |           next(nullptr) {} | 
 |  | 
 |     explicit dir(const std::string&& directory) | 
 |         : fd(::open(directory.c_str(), O_CLOEXEC | O_DIRECTORY | O_RDONLY)), | 
 |           available_bytes(0), | 
 |           next(nullptr) {} | 
 |  | 
 |     explicit dir(const std::string& directory) | 
 |         : fd(::open(directory.c_str(), O_CLOEXEC | O_DIRECTORY | O_RDONLY)), | 
 |           available_bytes(0), | 
 |           next(nullptr) {} | 
 |  | 
 |     // Don't need any copy or move constructors. | 
 |     explicit dir(const dir& c) = delete; | 
 |     explicit dir(dir& c) = delete; | 
 |     explicit dir(dir&& c) = delete; | 
 |  | 
 |     ~dir() { | 
 |         if (fd >= 0) { | 
 |             ::close(fd); | 
 |         } | 
 |     } | 
 |  | 
 |     operator bool() const { return fd >= 0; } | 
 |  | 
 |     void reset(void) { | 
 |         if (fd >= 0) { | 
 |             ::close(fd); | 
 |             fd = -1; | 
 |             available_bytes = 0; | 
 |             next = nullptr; | 
 |         } | 
 |     } | 
 |  | 
 |     dir& reset(const char* directory) { | 
 |         reset(); | 
 |         // available_bytes will _always_ be zero here as its value is | 
 |         // intimately tied to fd < 0 or not. | 
 |         fd = ::open(directory, O_CLOEXEC | O_DIRECTORY | O_RDONLY); | 
 |         return *this; | 
 |     } | 
 |  | 
 |     void rewind(void) { | 
 |         if (fd >= 0) { | 
 |             ::lseek(fd, off_t(0), SEEK_SET); | 
 |             available_bytes = 0; | 
 |             next = nullptr; | 
 |         } | 
 |     } | 
 |  | 
 |     dirent* read(enum level index = proc, dirent* def = nullptr) { | 
 |         if (!fill(index)) return def; | 
 |         auto ret = next; | 
 |         available_bytes -= next->d_reclen; | 
 |         next = reinterpret_cast<dirent*>(reinterpret_cast<char*>(next) + next->d_reclen); | 
 |         return ret; | 
 |     } | 
 | } llkTopDirectory; | 
 |  | 
 | dirent dir::buff[dir::numLevels][dir::buffEntries]; | 
 |  | 
 | // helper functions | 
 |  | 
 | bool llkIsMissingExeLink(pid_t tid) { | 
 |     char c; | 
 |     // CAP_SYS_PTRACE is required to prevent ret == -1, but ENOENT is signal | 
 |     auto ret = ::readlink((procdir + std::to_string(tid) + "/exe").c_str(), &c, sizeof(c)); | 
 |     return (ret == -1) && (errno == ENOENT); | 
 | } | 
 |  | 
 | // Common routine where caller accepts empty content as error/passthrough. | 
 | // Reduces the churn of reporting read errors in the callers. | 
 | std::string ReadFile(std::string&& path) { | 
 |     std::string content; | 
 |     if (!android::base::ReadFileToString(path, &content)) { | 
 |         PLOG(DEBUG) << "Read " << path << " failed"; | 
 |         content = ""; | 
 |     } | 
 |     return content; | 
 | } | 
 |  | 
 | std::string llkProcGetName(pid_t tid, const char* node = "/cmdline") { | 
 |     std::string content = ReadFile(procdir + std::to_string(tid) + node); | 
 |     static constexpr char needles[] = " \t\r\n";  // including trailing nul | 
 |     auto pos = content.find_first_of(needles, 0, sizeof(needles)); | 
 |     if (pos != std::string::npos) { | 
 |         content.erase(pos); | 
 |     } | 
 |     return content; | 
 | } | 
 |  | 
 | uid_t llkProcGetUid(pid_t tid) { | 
 |     // Get the process' uid.  The following read from /status is admittedly | 
 |     // racy, prone to corruption due to shape-changes.  The consequences are | 
 |     // not catastrophic as we sample a few times before taking action. | 
 |     // | 
 |     // If /loginuid worked on reliably, or on Android (all tasks report -1)... | 
 |     // Android lmkd causes /cgroup to contain memory:/<dom>/uid_<uid>/pid_<pid> | 
 |     // which is tighter, but also not reliable. | 
 |     std::string content = ReadFile(procdir + std::to_string(tid) + "/status"); | 
 |     static constexpr char Uid[] = "\nUid:"; | 
 |     auto pos = content.find(Uid); | 
 |     if (pos == std::string::npos) { | 
 |         return -1; | 
 |     } | 
 |     pos += ::strlen(Uid); | 
 |     while ((pos < content.size()) && ::isblank(content[pos])) { | 
 |         ++pos; | 
 |     } | 
 |     content.erase(0, pos); | 
 |     for (pos = 0; (pos < content.size()) && ::isdigit(content[pos]); ++pos) { | 
 |         ; | 
 |     } | 
 |     // Content of form 'Uid:	0	0	0	0', newline is error | 
 |     if ((pos >= content.size()) || !::isblank(content[pos])) { | 
 |         return -1; | 
 |     } | 
 |     content.erase(pos); | 
 |     uid_t ret; | 
 |     if (!android::base::ParseInt(content, &ret, uid_t(0))) { | 
 |         return -1; | 
 |     } | 
 |     return ret; | 
 | } | 
 |  | 
 | struct proc { | 
 |     pid_t tid;                     // monitored thread id (in Z or D state). | 
 |     nanoseconds schedUpdate;       // /proc/<tid>/sched "se.avg.lastUpdateTime", | 
 |     uint64_t nrSwitches;           // /proc/<tid>/sched "nr_switches" for | 
 |                                    // refined ABA problem detection, determine | 
 |                                    // forward scheduling progress. | 
 |     milliseconds update;           // llkUpdate millisecond signature of last. | 
 |     milliseconds count;            // duration in state. | 
 |     pid_t pid;                     // /proc/<pid> before iterating through | 
 |                                    // /proc/<pid>/task/<tid> for threads. | 
 |     pid_t ppid;                    // /proc/<tid>/stat field 4 parent pid. | 
 |     uid_t uid;                     // /proc/<tid>/status Uid: field. | 
 |     unsigned time;                 // sum of /proc/<tid>/stat field 14 utime & | 
 |                                    // 15 stime for coarse ABA problem detection. | 
 |     std::string cmdline;           // cached /cmdline content | 
 |     char state;                    // /proc/<tid>/stat field 3: Z or D | 
 |                                    // (others we do not monitor: S, R, T or ?) | 
 |     char comm[TASK_COMM_LEN + 3];  // space for adding '[' and ']' | 
 |     bool exeMissingValid;          // exeMissing has been cached | 
 |     bool cmdlineValid;             // cmdline has been cached | 
 |     bool updated;                  // cleared before monitoring pass. | 
 |     bool killed;                   // sent a kill to this thread, next panic... | 
 |  | 
 |     void setComm(const char* _comm) { strncpy(comm + 1, _comm, sizeof(comm) - 2); } | 
 |  | 
 |     proc(pid_t tid, pid_t pid, pid_t ppid, const char* _comm, int time, char state) | 
 |         : tid(tid), | 
 |           schedUpdate(0), | 
 |           nrSwitches(0), | 
 |           update(llkUpdate), | 
 |           count(0), | 
 |           pid(pid), | 
 |           ppid(ppid), | 
 |           uid(-1), | 
 |           time(time), | 
 |           state(state), | 
 |           exeMissingValid(false), | 
 |           cmdlineValid(false), | 
 |           updated(true), | 
 |           killed(!llkTestWithKill) { | 
 |         memset(comm, '\0', sizeof(comm)); | 
 |         setComm(_comm); | 
 |     } | 
 |  | 
 |     const char* getComm(void) { | 
 |         if (comm[1] == '\0') {  // comm Valid? | 
 |             strncpy(comm + 1, llkProcGetName(tid, "/comm").c_str(), sizeof(comm) - 2); | 
 |         } | 
 |         if (!exeMissingValid) { | 
 |             if (llkIsMissingExeLink(tid)) { | 
 |                 comm[0] = '['; | 
 |             } | 
 |             exeMissingValid = true; | 
 |         } | 
 |         size_t len = strlen(comm + 1); | 
 |         if (__predict_true(len < (sizeof(comm) - 1))) { | 
 |             if (comm[0] == '[') { | 
 |                 if ((comm[len] != ']') && __predict_true(len < (sizeof(comm) - 2))) { | 
 |                     comm[++len] = ']'; | 
 |                     comm[++len] = '\0'; | 
 |                 } | 
 |             } else { | 
 |                 if (comm[len] == ']') { | 
 |                     comm[len] = '\0'; | 
 |                 } | 
 |             } | 
 |         } | 
 |         return &comm[comm[0] != '[']; | 
 |     } | 
 |  | 
 |     const char* getCmdline(void) { | 
 |         if (!cmdlineValid) { | 
 |             cmdline = llkProcGetName(tid); | 
 |             cmdlineValid = true; | 
 |         } | 
 |         return cmdline.c_str(); | 
 |     } | 
 |  | 
 |     uid_t getUid(void) { | 
 |         if (uid <= 0) {  // Churn on root user, because most likely to setuid() | 
 |             uid = llkProcGetUid(tid); | 
 |         } | 
 |         return uid; | 
 |     } | 
 |  | 
 |     void reset(void) {  // reset cache, if we detected pid rollover | 
 |         uid = -1; | 
 |         state = '?'; | 
 |         cmdline = ""; | 
 |         comm[0] = '\0'; | 
 |         exeMissingValid = false; | 
 |         cmdlineValid = false; | 
 |     } | 
 | }; | 
 |  | 
 | std::unordered_map<pid_t, proc> tids; | 
 |  | 
 | // Check range and setup defaults, in order of propagation: | 
 | //     llkTimeoutMs | 
 | //     llkCheckMs | 
 | //     ... | 
 | // KISS to keep it all self-contained, and called multiple times as parameters | 
 | // are interpreted so that defaults, llkCheckMs and llkCycle make sense. | 
 | void llkValidate() { | 
 |     if (llkTimeoutMs == 0ms) { | 
 |         llkTimeoutMs = LLK_TIMEOUT_MS_DEFAULT; | 
 |     } | 
 |     llkTimeoutMs = std::max(llkTimeoutMs, LLK_TIMEOUT_MS_MINIMUM); | 
 |     if (llkCheckMs == 0ms) { | 
 |         llkCheckMs = llkTimeoutMs / LLK_CHECKS_PER_TIMEOUT_DEFAULT; | 
 |     } | 
 |     llkCheckMs = std::min(llkCheckMs, llkTimeoutMs); | 
 |  | 
 |     for (size_t state = 0; state < ARRAY_SIZE(llkStateTimeoutMs); ++state) { | 
 |         if (llkStateTimeoutMs[state] == 0ms) { | 
 |             llkStateTimeoutMs[state] = llkTimeoutMs; | 
 |         } | 
 |         llkStateTimeoutMs[state] = | 
 |             std::min(std::max(llkStateTimeoutMs[state], LLK_TIMEOUT_MS_MINIMUM), llkTimeoutMs); | 
 |         llkCheckMs = std::min(llkCheckMs, llkStateTimeoutMs[state]); | 
 |     } | 
 |  | 
 |     llkCheckMs = std::max(llkCheckMs, LLK_CHECK_MS_MINIMUM); | 
 |     if (llkCycle == 0ms) { | 
 |         llkCycle = llkCheckMs; | 
 |     } | 
 |     llkCycle = std::min(llkCycle, llkCheckMs); | 
 | } | 
 |  | 
 | milliseconds llkGetTimespecDiffMs(timespec* from, timespec* to) { | 
 |     return duration_cast<milliseconds>(seconds(to->tv_sec - from->tv_sec)) + | 
 |            duration_cast<milliseconds>(nanoseconds(to->tv_nsec - from->tv_nsec)); | 
 | } | 
 |  | 
 | std::string llkProcGetName(pid_t tid, const char* comm, const char* cmdline) { | 
 |     if ((cmdline != nullptr) && (*cmdline != '\0')) { | 
 |         return cmdline; | 
 |     } | 
 |     if ((comm != nullptr) && (*comm != '\0')) { | 
 |         return comm; | 
 |     } | 
 |  | 
 |     // UNLIKELY! Here because killed before we kill it? | 
 |     // Assume change is afoot, do not call llkTidAlloc | 
 |  | 
 |     // cmdline ? | 
 |     std::string content = llkProcGetName(tid); | 
 |     if (content.size() != 0) { | 
 |         return content; | 
 |     } | 
 |     // Comm instead? | 
 |     content = llkProcGetName(tid, "/comm"); | 
 |     if (llkIsMissingExeLink(tid) && (content.size() != 0)) { | 
 |         return '[' + content + ']'; | 
 |     } | 
 |     return content; | 
 | } | 
 |  | 
 | int llkKillOneProcess(pid_t pid, char state, pid_t tid, const char* tcomm = nullptr, | 
 |                       const char* tcmdline = nullptr, const char* pcomm = nullptr, | 
 |                       const char* pcmdline = nullptr) { | 
 |     std::string forTid; | 
 |     if (tid != pid) { | 
 |         forTid = " for '" + llkProcGetName(tid, tcomm, tcmdline) + "' (" + std::to_string(tid) + ")"; | 
 |     } | 
 |     LOG(INFO) << "Killing '" << llkProcGetName(pid, pcomm, pcmdline) << "' (" << pid | 
 |               << ") to check forward scheduling progress in " << state << " state" << forTid; | 
 |     // CAP_KILL required | 
 |     errno = 0; | 
 |     auto r = ::kill(pid, SIGKILL); | 
 |     if (r) { | 
 |         PLOG(ERROR) << "kill(" << pid << ")=" << r << ' '; | 
 |     } | 
 |  | 
 |     return r; | 
 | } | 
 |  | 
 | // Kill one process | 
 | int llkKillOneProcess(pid_t pid, proc* tprocp) { | 
 |     return llkKillOneProcess(pid, tprocp->state, tprocp->tid, tprocp->getComm(), | 
 |                              tprocp->getCmdline()); | 
 | } | 
 |  | 
 | // Kill one process specified by kprocp | 
 | int llkKillOneProcess(proc* kprocp, proc* tprocp) { | 
 |     if (kprocp == nullptr) { | 
 |         return -2; | 
 |     } | 
 |  | 
 |     return llkKillOneProcess(kprocp->tid, tprocp->state, tprocp->tid, tprocp->getComm(), | 
 |                              tprocp->getCmdline(), kprocp->getComm(), kprocp->getCmdline()); | 
 | } | 
 |  | 
 | // Acquire file descriptor from environment, or open and cache it. | 
 | // NB: cache is unnecessary in our current context, pedantically | 
 | //     required to prevent leakage of file descriptors in the future. | 
 | int llkFileToWriteFd(const std::string& file) { | 
 |     static std::unordered_map<std::string, int> cache; | 
 |     auto search = cache.find(file); | 
 |     if (search != cache.end()) return search->second; | 
 |     auto fd = android_get_control_file(file.c_str()); | 
 |     if (fd >= 0) return fd; | 
 |     fd = TEMP_FAILURE_RETRY(::open(file.c_str(), O_WRONLY | O_CLOEXEC)); | 
 |     if (fd >= 0) cache.emplace(std::make_pair(file, fd)); | 
 |     return fd; | 
 | } | 
 |  | 
 | // Wrap android::base::WriteStringToFile to use android_get_control_file. | 
 | bool llkWriteStringToFile(const std::string& string, const std::string& file) { | 
 |     auto fd = llkFileToWriteFd(file); | 
 |     if (fd < 0) return false; | 
 |     return android::base::WriteStringToFd(string, fd); | 
 | } | 
 |  | 
 | bool llkWriteStringToFileConfirm(const std::string& string, const std::string& file) { | 
 |     auto fd = llkFileToWriteFd(file); | 
 |     auto ret = (fd < 0) ? false : android::base::WriteStringToFd(string, fd); | 
 |     std::string content; | 
 |     if (!android::base::ReadFileToString(file, &content)) return ret; | 
 |     return android::base::Trim(content) == string; | 
 | } | 
 |  | 
 | void llkPanicKernel(bool dump, pid_t tid, const char* state) __noreturn; | 
 | void llkPanicKernel(bool dump, pid_t tid, const char* state) { | 
 |     auto sysrqTriggerFd = llkFileToWriteFd("/proc/sysrq-trigger"); | 
 |     if (sysrqTriggerFd < 0) { | 
 |         // DYB | 
 |         llkKillOneProcess(initPid, 'R', tid); | 
 |         // The answer to life, the universe and everything | 
 |         ::exit(42); | 
 |         // NOTREACHED | 
 |     } | 
 |     ::sync(); | 
 |     if (dump) { | 
 |         // Show all locks that are held | 
 |         android::base::WriteStringToFd("d", sysrqTriggerFd); | 
 |         // This can trigger hardware watchdog, that is somewhat _ok_. | 
 |         // But useless if pstore configured for <256KB, low ram devices ... | 
 |         if (!llkLowRam) { | 
 |             android::base::WriteStringToFd("t", sysrqTriggerFd); | 
 |         } | 
 |         ::usleep(200000);  // let everything settle | 
 |     } | 
 |     llkWriteStringToFile(std::string("SysRq : Trigger a crash : 'livelock,") + state + "'\n", | 
 |                          "/dev/kmsg"); | 
 |     android::base::WriteStringToFd("c", sysrqTriggerFd); | 
 |     // NOTREACHED | 
 |     // DYB | 
 |     llkKillOneProcess(initPid, 'R', tid); | 
 |     // I sat at my desk, stared into the garden and thought '42 will do'. | 
 |     // I typed it out. End of story | 
 |     ::exit(42); | 
 |     // NOTREACHED | 
 | } | 
 |  | 
 | void llkAlarmHandler(int) { | 
 |     llkPanicKernel(false, ::getpid(), "alarm"); | 
 | } | 
 |  | 
 | milliseconds GetUintProperty(const std::string& key, milliseconds def) { | 
 |     return milliseconds(android::base::GetUintProperty(key, static_cast<uint64_t>(def.count()), | 
 |                                                        static_cast<uint64_t>(def.max().count()))); | 
 | } | 
 |  | 
 | seconds GetUintProperty(const std::string& key, seconds def) { | 
 |     return seconds(android::base::GetUintProperty(key, static_cast<uint64_t>(def.count()), | 
 |                                                   static_cast<uint64_t>(def.max().count()))); | 
 | } | 
 |  | 
 | proc* llkTidLookup(pid_t tid) { | 
 |     auto search = tids.find(tid); | 
 |     if (search == tids.end()) { | 
 |         return nullptr; | 
 |     } | 
 |     return &search->second; | 
 | } | 
 |  | 
 | void llkTidRemove(pid_t tid) { | 
 |     tids.erase(tid); | 
 | } | 
 |  | 
 | proc* llkTidAlloc(pid_t tid, pid_t pid, pid_t ppid, const char* comm, int time, char state) { | 
 |     auto it = tids.emplace(std::make_pair(tid, proc(tid, pid, ppid, comm, time, state))); | 
 |     return &it.first->second; | 
 | } | 
 |  | 
 | std::string llkFormat(milliseconds ms) { | 
 |     auto sec = duration_cast<seconds>(ms); | 
 |     std::ostringstream s; | 
 |     s << sec.count() << '.'; | 
 |     auto f = s.fill('0'); | 
 |     auto w = s.width(3); | 
 |     s << std::right << (ms - sec).count(); | 
 |     s.width(w); | 
 |     s.fill(f); | 
 |     s << 's'; | 
 |     return s.str(); | 
 | } | 
 |  | 
 | std::string llkFormat(seconds s) { | 
 |     return std::to_string(s.count()) + 's'; | 
 | } | 
 |  | 
 | std::string llkFormat(bool flag) { | 
 |     return flag ? "true" : "false"; | 
 | } | 
 |  | 
 | std::string llkFormat(const std::unordered_set<std::string>& blacklist) { | 
 |     std::string ret; | 
 |     for (auto entry : blacklist) { | 
 |         if (ret.size()) { | 
 |             ret += ","; | 
 |         } | 
 |         ret += entry; | 
 |     } | 
 |     return ret; | 
 | } | 
 |  | 
 | // We only officially support comma separators, but wetware being what they | 
 | // are will take some liberty and I do not believe they should be punished. | 
 | std::unordered_set<std::string> llkSplit(const std::string& s, | 
 |                                          const std::string& delimiters = ", \t:") { | 
 |     std::unordered_set<std::string> result; | 
 |  | 
 |     size_t base = 0; | 
 |     size_t found; | 
 |     while (true) { | 
 |         found = s.find_first_of(delimiters, base); | 
 |         result.emplace(s.substr(base, found - base)); | 
 |         if (found == s.npos) break; | 
 |         base = found + 1; | 
 |     } | 
 |     return result; | 
 | } | 
 |  | 
 | bool llkSkipName(const std::string& name, | 
 |                  const std::unordered_set<std::string>& blacklist = llkBlacklistProcess) { | 
 |     if ((name.size() == 0) || (blacklist.size() == 0)) { | 
 |         return false; | 
 |     } | 
 |  | 
 |     return blacklist.find(name) != blacklist.end(); | 
 | } | 
 |  | 
 | bool llkSkipPid(pid_t pid) { | 
 |     return llkSkipName(std::to_string(pid), llkBlacklistProcess); | 
 | } | 
 |  | 
 | bool llkSkipPpid(pid_t ppid) { | 
 |     return llkSkipName(std::to_string(ppid), llkBlacklistParent); | 
 | } | 
 |  | 
 | bool llkSkipUid(uid_t uid) { | 
 |     // Match by number? | 
 |     if (llkSkipName(std::to_string(uid), llkBlacklistUid)) { | 
 |         return true; | 
 |     } | 
 |  | 
 |     // Match by name? | 
 |     auto pwd = ::getpwuid(uid); | 
 |     return (pwd != nullptr) && __predict_true(pwd->pw_name != nullptr) && | 
 |            __predict_true(pwd->pw_name[0] != '\0') && llkSkipName(pwd->pw_name, llkBlacklistUid); | 
 | } | 
 |  | 
 | bool getValidTidDir(dirent* dp, std::string* piddir) { | 
 |     if (!::isdigit(dp->d_name[0])) { | 
 |         return false; | 
 |     } | 
 |  | 
 |     // Corner case can not happen in reality b/c of above ::isdigit check | 
 |     if (__predict_false(dp->d_type != DT_DIR)) { | 
 |         if (__predict_false(dp->d_type == DT_UNKNOWN)) {  // can't b/c procfs | 
 |             struct stat st; | 
 |             *piddir = procdir; | 
 |             *piddir += dp->d_name; | 
 |             return (lstat(piddir->c_str(), &st) == 0) && (st.st_mode & S_IFDIR); | 
 |         } | 
 |         return false; | 
 |     } | 
 |  | 
 |     *piddir = procdir; | 
 |     *piddir += dp->d_name; | 
 |     return true; | 
 | } | 
 |  | 
 | bool llkIsMonitorState(char state) { | 
 |     return (state == 'Z') || (state == 'D'); | 
 | } | 
 |  | 
 | // returns -1 if not found | 
 | long long getSchedValue(const std::string& schedString, const char* key) { | 
 |     auto pos = schedString.find(key); | 
 |     if (pos == std::string::npos) { | 
 |         return -1; | 
 |     } | 
 |     pos = schedString.find(':', pos); | 
 |     if (__predict_false(pos == std::string::npos)) { | 
 |         return -1; | 
 |     } | 
 |     while ((++pos < schedString.size()) && ::isblank(schedString[pos])) { | 
 |         ; | 
 |     } | 
 |     long long ret; | 
 |     if (!android::base::ParseInt(schedString.substr(pos), &ret, static_cast<long long>(0))) { | 
 |         return -1; | 
 |     } | 
 |     return ret; | 
 | } | 
 |  | 
 | // Primary ABA mitigation watching last time schedule activity happened | 
 | void llkCheckSchedUpdate(proc* procp, const std::string& piddir) { | 
 |     // Audit finds /proc/<tid>/sched is just over 1K, and | 
 |     // is rarely larger than 2K, even less on Android. | 
 |     // For example, the "se.avg.lastUpdateTime" field we are | 
 |     // interested in typically within the primary set in | 
 |     // the first 1K. | 
 |     // | 
 |     // Proc entries can not be read >1K atomically via libbase, | 
 |     // but if there are problems we assume at least a few | 
 |     // samples of reads occur before we take any real action. | 
 |     std::string schedString = ReadFile(piddir + "/sched"); | 
 |     if (schedString.size() == 0) { | 
 |         // /schedstat is not as standardized, but in 3.1+ | 
 |         // Android devices, the third field is nr_switches | 
 |         // from /sched: | 
 |         schedString = ReadFile(piddir + "/schedstat"); | 
 |         if (schedString.size() == 0) { | 
 |             return; | 
 |         } | 
 |         auto val = static_cast<unsigned long long>(-1); | 
 |         if (((::sscanf(schedString.c_str(), "%*d %*d %llu", &val)) == 1) && | 
 |             (val != static_cast<unsigned long long>(-1)) && (val != 0) && | 
 |             (val != procp->nrSwitches)) { | 
 |             procp->nrSwitches = val; | 
 |             procp->count = 0ms; | 
 |             procp->killed = !llkTestWithKill; | 
 |         } | 
 |         return; | 
 |     } | 
 |  | 
 |     auto val = getSchedValue(schedString, "\nse.avg.lastUpdateTime"); | 
 |     if (val == -1) { | 
 |         val = getSchedValue(schedString, "\nse.svg.last_update_time"); | 
 |     } | 
 |     if (val != -1) { | 
 |         auto schedUpdate = nanoseconds(val); | 
 |         if (schedUpdate != procp->schedUpdate) { | 
 |             procp->schedUpdate = schedUpdate; | 
 |             procp->count = 0ms; | 
 |             procp->killed = !llkTestWithKill; | 
 |         } | 
 |     } | 
 |  | 
 |     val = getSchedValue(schedString, "\nnr_switches"); | 
 |     if (val != -1) { | 
 |         if (static_cast<uint64_t>(val) != procp->nrSwitches) { | 
 |             procp->nrSwitches = val; | 
 |             procp->count = 0ms; | 
 |             procp->killed = !llkTestWithKill; | 
 |         } | 
 |     } | 
 | } | 
 |  | 
 | void llkLogConfig(void) { | 
 |     LOG(INFO) << "ro.config.low_ram=" << llkFormat(llkLowRam) << "\n" | 
 |               << LLK_ENABLE_PROPERTY "=" << llkFormat(llkEnable) << "\n" | 
 |               << KHT_ENABLE_PROPERTY "=" << llkFormat(khtEnable) << "\n" | 
 |               << LLK_MLOCKALL_PROPERTY "=" << llkFormat(llkMlockall) << "\n" | 
 |               << LLK_KILLTEST_PROPERTY "=" << llkFormat(llkTestWithKill) << "\n" | 
 |               << KHT_TIMEOUT_PROPERTY "=" << llkFormat(khtTimeout) << "\n" | 
 |               << LLK_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkTimeoutMs) << "\n" | 
 |               << LLK_D_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkStateTimeoutMs[llkStateD]) << "\n" | 
 |               << LLK_Z_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkStateTimeoutMs[llkStateZ]) << "\n" | 
 |               << LLK_CHECK_MS_PROPERTY "=" << llkFormat(llkCheckMs) << "\n" | 
 |               << LLK_BLACKLIST_PROCESS_PROPERTY "=" << llkFormat(llkBlacklistProcess) << "\n" | 
 |               << LLK_BLACKLIST_PARENT_PROPERTY "=" << llkFormat(llkBlacklistParent) << "\n" | 
 |               << LLK_BLACKLIST_UID_PROPERTY "=" << llkFormat(llkBlacklistUid); | 
 | } | 
 |  | 
 | void* llkThread(void* obj) { | 
 |     LOG(INFO) << "started"; | 
 |  | 
 |     std::string name = std::to_string(::gettid()); | 
 |     if (!llkSkipName(name)) { | 
 |         llkBlacklistProcess.emplace(name); | 
 |     } | 
 |     name = static_cast<const char*>(obj); | 
 |     prctl(PR_SET_NAME, name.c_str()); | 
 |     if (__predict_false(!llkSkipName(name))) { | 
 |         llkBlacklistProcess.insert(name); | 
 |     } | 
 |     // No longer modifying llkBlacklistProcess. | 
 |     llkRunning = true; | 
 |     llkLogConfig(); | 
 |     while (llkRunning) { | 
 |         ::usleep(duration_cast<microseconds>(llkCheck(true)).count()); | 
 |     } | 
 |     // NOTREACHED | 
 |     LOG(INFO) << "exiting"; | 
 |     return nullptr; | 
 | } | 
 |  | 
 | }  // namespace | 
 |  | 
 | milliseconds llkCheck(bool checkRunning) { | 
 |     if (!llkEnable || (checkRunning != llkRunning)) { | 
 |         return milliseconds::max(); | 
 |     } | 
 |  | 
 |     // Reset internal watchdog, which is a healthy engineering margin of | 
 |     // double the maximum wait or cycle time for the mainloop that calls us. | 
 |     // | 
 |     // This alarm is effectively the live lock detection of llkd, as | 
 |     // we understandably can not monitor ourselves otherwise. | 
 |     ::alarm(duration_cast<seconds>(llkTimeoutMs * 2).count()); | 
 |  | 
 |     // kernel jiffy precision fastest acquisition | 
 |     static timespec last; | 
 |     timespec now; | 
 |     ::clock_gettime(CLOCK_MONOTONIC_COARSE, &now); | 
 |     auto ms = llkGetTimespecDiffMs(&last, &now); | 
 |     if (ms < llkCycle) { | 
 |         return llkCycle - ms; | 
 |     } | 
 |     last = now; | 
 |  | 
 |     LOG(VERBOSE) << "opendir(\"" << procdir << "\")"; | 
 |     if (__predict_false(!llkTopDirectory)) { | 
 |         // gid containing AID_READPROC required | 
 |         llkTopDirectory.reset(procdir); | 
 |         if (__predict_false(!llkTopDirectory)) { | 
 |             // Most likely reason we could be here is a resource limit. | 
 |             // Keep our processing down to a minimum, but not so low that | 
 |             // we do not recover in a timely manner should the issue be | 
 |             // transitory. | 
 |             LOG(DEBUG) << "opendir(\"" << procdir << "\") failed"; | 
 |             return llkTimeoutMs; | 
 |         } | 
 |     } | 
 |  | 
 |     for (auto& it : tids) { | 
 |         it.second.updated = false; | 
 |     } | 
 |  | 
 |     auto prevUpdate = llkUpdate; | 
 |     llkUpdate += ms; | 
 |     ms -= llkCycle; | 
 |     auto myPid = ::getpid(); | 
 |     auto myTid = ::gettid(); | 
 |     for (auto dp = llkTopDirectory.read(); dp != nullptr; dp = llkTopDirectory.read()) { | 
 |         std::string piddir; | 
 |  | 
 |         if (!getValidTidDir(dp, &piddir)) { | 
 |             continue; | 
 |         } | 
 |  | 
 |         // Get the process tasks | 
 |         std::string taskdir = piddir + "/task/"; | 
 |         int pid = -1; | 
 |         LOG(VERBOSE) << "+opendir(\"" << taskdir << "\")"; | 
 |         dir taskDirectory(taskdir); | 
 |         if (__predict_false(!taskDirectory)) { | 
 |             LOG(DEBUG) << "+opendir(\"" << taskdir << "\") failed"; | 
 |         } | 
 |         for (auto tp = taskDirectory.read(dir::task, dp); tp != nullptr; | 
 |              tp = taskDirectory.read(dir::task)) { | 
 |             if (!getValidTidDir(tp, &piddir)) { | 
 |                 continue; | 
 |             } | 
 |  | 
 |             // Get the process stat | 
 |             std::string stat = ReadFile(piddir + "/stat"); | 
 |             if (stat.size() == 0) { | 
 |                 continue; | 
 |             } | 
 |             unsigned tid = -1; | 
 |             char pdir[TASK_COMM_LEN + 1]; | 
 |             char state = '?'; | 
 |             unsigned ppid = -1; | 
 |             unsigned utime = -1; | 
 |             unsigned stime = -1; | 
 |             int dummy; | 
 |             pdir[0] = '\0'; | 
 |             // tid should not change value | 
 |             auto match = ::sscanf( | 
 |                 stat.c_str(), | 
 |                 "%u (%" ___STRING( | 
 |                     TASK_COMM_LEN) "[^)]) %c %u %*d %*d %*d %*d %*d %*d %*d %*d %*d %u %u %d", | 
 |                 &tid, pdir, &state, &ppid, &utime, &stime, &dummy); | 
 |             if (pid == -1) { | 
 |                 pid = tid; | 
 |             } | 
 |             LOG(VERBOSE) << "match " << match << ' ' << tid << " (" << pdir << ") " << state << ' ' | 
 |                          << ppid << " ... " << utime << ' ' << stime << ' ' << dummy; | 
 |             if (match != 7) { | 
 |                 continue; | 
 |             } | 
 |  | 
 |             auto procp = llkTidLookup(tid); | 
 |             if (procp == nullptr) { | 
 |                 procp = llkTidAlloc(tid, pid, ppid, pdir, utime + stime, state); | 
 |             } else { | 
 |                 // comm can change ... | 
 |                 procp->setComm(pdir); | 
 |                 procp->updated = true; | 
 |                 // pid/ppid/tid wrap? | 
 |                 if (((procp->update != prevUpdate) && (procp->update != llkUpdate)) || | 
 |                     (procp->ppid != ppid) || (procp->pid != pid)) { | 
 |                     procp->reset(); | 
 |                 } else if (procp->time != (utime + stime)) {  // secondary ABA. | 
 |                     // watching utime+stime granularity jiffy | 
 |                     procp->state = '?'; | 
 |                 } | 
 |                 procp->update = llkUpdate; | 
 |                 procp->pid = pid; | 
 |                 procp->ppid = ppid; | 
 |                 procp->time = utime + stime; | 
 |                 if (procp->state != state) { | 
 |                     procp->count = 0ms; | 
 |                     procp->killed = !llkTestWithKill; | 
 |                     procp->state = state; | 
 |                 } else { | 
 |                     procp->count += llkCycle; | 
 |                 } | 
 |             } | 
 |  | 
 |             // Filter checks in intuitive order of CPU cost to evaluate | 
 |             // If tid unique continue, if ppid or pid unique break | 
 |  | 
 |             if (pid == myPid) { | 
 |                 break; | 
 |             } | 
 |             if (!llkIsMonitorState(state)) { | 
 |                 continue; | 
 |             } | 
 |             if ((tid == myTid) || llkSkipPid(tid)) { | 
 |                 continue; | 
 |             } | 
 |             if (llkSkipPpid(ppid)) { | 
 |                 break; | 
 |             } | 
 |  | 
 |             if (llkSkipName(procp->getComm())) { | 
 |                 continue; | 
 |             } | 
 |             if (llkSkipName(procp->getCmdline())) { | 
 |                 break; | 
 |             } | 
 |  | 
 |             auto pprocp = llkTidLookup(ppid); | 
 |             if (pprocp == nullptr) { | 
 |                 pprocp = llkTidAlloc(ppid, ppid, 0, "", 0, '?'); | 
 |             } | 
 |             if ((pprocp != nullptr) && (llkSkipName(pprocp->getComm(), llkBlacklistParent) || | 
 |                                         llkSkipName(pprocp->getCmdline(), llkBlacklistParent))) { | 
 |                 break; | 
 |             } | 
 |  | 
 |             if ((llkBlacklistUid.size() != 0) && llkSkipUid(procp->getUid())) { | 
 |                 continue; | 
 |             } | 
 |  | 
 |             // ABA mitigation watching last time schedule activity happened | 
 |             llkCheckSchedUpdate(procp, piddir); | 
 |  | 
 |             // Can only fall through to here if registered D or Z state !!! | 
 |             if (procp->count < llkStateTimeoutMs[(state == 'Z') ? llkStateZ : llkStateD]) { | 
 |                 LOG(VERBOSE) << state << ' ' << llkFormat(procp->count) << ' ' << ppid << "->" | 
 |                              << pid << "->" << tid << ' ' << procp->getComm(); | 
 |                 continue; | 
 |             } | 
 |  | 
 |             // We have to kill it to determine difference between live lock | 
 |             // and persistent state blocked on a resource.  Is there something | 
 |             // wrong with a process that has no forward scheduling progress in | 
 |             // Z or D?  Yes, generally means improper accounting in the | 
 |             // process, but not always ... | 
 |             // | 
 |             // Whomever we hit with a test kill must accept the Android | 
 |             // Aphorism that everything can be burned to the ground and | 
 |             // must survive. | 
 |             if (procp->killed == false) { | 
 |                 procp->killed = true; | 
 |                 // confirm: re-read uid before committing to a panic. | 
 |                 procp->uid = -1; | 
 |                 switch (state) { | 
 |                     case 'Z':  // kill ppid to free up a Zombie | 
 |                         // Killing init will kernel panic without diagnostics | 
 |                         // so skip right to controlled kernel panic with | 
 |                         // diagnostics. | 
 |                         if (ppid == initPid) { | 
 |                             break; | 
 |                         } | 
 |                         LOG(WARNING) << "Z " << llkFormat(procp->count) << ' ' << ppid << "->" | 
 |                                      << pid << "->" << tid << ' ' << procp->getComm() << " [kill]"; | 
 |                         if ((llkKillOneProcess(pprocp, procp) >= 0) || | 
 |                             (llkKillOneProcess(ppid, procp) >= 0)) { | 
 |                             continue; | 
 |                         } | 
 |                         break; | 
 |  | 
 |                     case 'D':  // kill tid to free up an uninterruptible D | 
 |                         // If ABA is doing its job, we would not need or | 
 |                         // want the following.  Test kill is a Hail Mary | 
 |                         // to make absolutely sure there is no forward | 
 |                         // scheduling progress.  The cost when ABA is | 
 |                         // not working is we kill a process that likes to | 
 |                         // stay in 'D' state, instead of panicing the | 
 |                         // kernel (worse). | 
 |                         LOG(WARNING) << "D " << llkFormat(procp->count) << ' ' << pid << "->" << tid | 
 |                                      << ' ' << procp->getComm() << " [kill]"; | 
 |                         if ((llkKillOneProcess(llkTidLookup(pid), procp) >= 0) || | 
 |                             (llkKillOneProcess(pid, 'D', tid) >= 0) || | 
 |                             (llkKillOneProcess(procp, procp) >= 0) || | 
 |                             (llkKillOneProcess(tid, 'D', tid) >= 0)) { | 
 |                             continue; | 
 |                         } | 
 |                         break; | 
 |                 } | 
 |             } | 
 |             // We are here because we have confirmed kernel live-lock | 
 |             LOG(ERROR) << state << ' ' << llkFormat(procp->count) << ' ' << ppid << "->" << pid | 
 |                        << "->" << tid << ' ' << procp->getComm() << " [panic]"; | 
 |             llkPanicKernel(true, tid, (state == 'Z') ? "zombie" : "driver"); | 
 |         } | 
 |         LOG(VERBOSE) << "+closedir()"; | 
 |     } | 
 |     llkTopDirectory.rewind(); | 
 |     LOG(VERBOSE) << "closedir()"; | 
 |  | 
 |     // garbage collection of old process references | 
 |     for (auto p = tids.begin(); p != tids.end();) { | 
 |         if (!p->second.updated) { | 
 |             IF_ALOG(LOG_VERBOSE, LOG_TAG) { | 
 |                 std::string ppidCmdline = llkProcGetName(p->second.ppid, nullptr, nullptr); | 
 |                 if (ppidCmdline.size()) { | 
 |                     ppidCmdline = "(" + ppidCmdline + ")"; | 
 |                 } | 
 |                 std::string pidCmdline; | 
 |                 if (p->second.pid != p->second.tid) { | 
 |                     pidCmdline = llkProcGetName(p->second.pid, nullptr, p->second.getCmdline()); | 
 |                     if (pidCmdline.size()) { | 
 |                         pidCmdline = "(" + pidCmdline + ")"; | 
 |                     } | 
 |                 } | 
 |                 std::string tidCmdline = | 
 |                     llkProcGetName(p->second.tid, p->second.getComm(), p->second.getCmdline()); | 
 |                 if (tidCmdline.size()) { | 
 |                     tidCmdline = "(" + tidCmdline + ")"; | 
 |                 } | 
 |                 LOG(VERBOSE) << "thread " << p->second.ppid << ppidCmdline << "->" << p->second.pid | 
 |                              << pidCmdline << "->" << p->second.tid << tidCmdline << " removed"; | 
 |             } | 
 |             p = tids.erase(p); | 
 |         } else { | 
 |             ++p; | 
 |         } | 
 |     } | 
 |     if (__predict_false(tids.empty())) { | 
 |         llkTopDirectory.reset(); | 
 |     } | 
 |  | 
 |     llkCycle = llkCheckMs; | 
 |  | 
 |     timespec end; | 
 |     ::clock_gettime(CLOCK_MONOTONIC_COARSE, &end); | 
 |     auto milli = llkGetTimespecDiffMs(&now, &end); | 
 |     LOG((milli > 10s) ? ERROR : (milli > 1s) ? WARNING : VERBOSE) << "sample " << llkFormat(milli); | 
 |  | 
 |     // cap to minimum sleep for 1 second since last cycle | 
 |     if (llkCycle < (ms + 1s)) { | 
 |         return 1s; | 
 |     } | 
 |     return llkCycle - ms; | 
 | } | 
 |  | 
 | unsigned llkCheckMilliseconds() { | 
 |     return duration_cast<milliseconds>(llkCheck()).count(); | 
 | } | 
 |  | 
 | bool llkInit(const char* threadname) { | 
 |     llkLowRam = android::base::GetBoolProperty("ro.config.low_ram", false); | 
 |     if (!LLK_ENABLE_DEFAULT && android::base::GetBoolProperty("ro.debuggable", false)) { | 
 |         llkEnable = android::base::GetProperty(LLK_ENABLE_PROPERTY, "eng") == "eng"; | 
 |         khtEnable = android::base::GetProperty(KHT_ENABLE_PROPERTY, "eng") == "eng"; | 
 |     } | 
 |     llkEnable = android::base::GetBoolProperty(LLK_ENABLE_PROPERTY, llkEnable); | 
 |     if (llkEnable && !llkTopDirectory.reset(procdir)) { | 
 |         // Most likely reason we could be here is llkd was started | 
 |         // incorrectly without the readproc permissions.  Keep our | 
 |         // processing down to a minimum. | 
 |         llkEnable = false; | 
 |     } | 
 |     khtEnable = android::base::GetBoolProperty(KHT_ENABLE_PROPERTY, khtEnable); | 
 |     llkMlockall = android::base::GetBoolProperty(LLK_MLOCKALL_PROPERTY, llkMlockall); | 
 |     llkTestWithKill = android::base::GetBoolProperty(LLK_KILLTEST_PROPERTY, llkTestWithKill); | 
 |     // if LLK_TIMOUT_MS_PROPERTY was not set, we will use a set | 
 |     // KHT_TIMEOUT_PROPERTY as co-operative guidance for the default value. | 
 |     khtTimeout = GetUintProperty(KHT_TIMEOUT_PROPERTY, khtTimeout); | 
 |     if (khtTimeout == 0s) { | 
 |         khtTimeout = duration_cast<seconds>(llkTimeoutMs * (1 + LLK_CHECKS_PER_TIMEOUT_DEFAULT) / | 
 |                                             LLK_CHECKS_PER_TIMEOUT_DEFAULT); | 
 |     } | 
 |     llkTimeoutMs = | 
 |         khtTimeout * LLK_CHECKS_PER_TIMEOUT_DEFAULT / (1 + LLK_CHECKS_PER_TIMEOUT_DEFAULT); | 
 |     llkTimeoutMs = GetUintProperty(LLK_TIMEOUT_MS_PROPERTY, llkTimeoutMs); | 
 |     llkValidate();  // validate llkTimeoutMs, llkCheckMs and llkCycle | 
 |     llkStateTimeoutMs[llkStateD] = GetUintProperty(LLK_D_TIMEOUT_MS_PROPERTY, llkTimeoutMs); | 
 |     llkStateTimeoutMs[llkStateZ] = GetUintProperty(LLK_Z_TIMEOUT_MS_PROPERTY, llkTimeoutMs); | 
 |     llkCheckMs = GetUintProperty(LLK_CHECK_MS_PROPERTY, llkCheckMs); | 
 |     llkValidate();  // validate all (effectively minus llkTimeoutMs) | 
 |     std::string defaultBlacklistProcess( | 
 |         std::to_string(kernelPid) + "," + std::to_string(initPid) + "," + | 
 |         std::to_string(kthreaddPid) + "," + std::to_string(::getpid()) + "," + | 
 |         std::to_string(::gettid()) + "," LLK_BLACKLIST_PROCESS_DEFAULT); | 
 |     if (threadname) { | 
 |         defaultBlacklistProcess += std::string(",") + threadname; | 
 |     } | 
 |     for (int cpu = 1; cpu < get_nprocs_conf(); ++cpu) { | 
 |         defaultBlacklistProcess += ",[watchdog/" + std::to_string(cpu) + "]"; | 
 |     } | 
 |     defaultBlacklistProcess = | 
 |         android::base::GetProperty(LLK_BLACKLIST_PROCESS_PROPERTY, defaultBlacklistProcess); | 
 |     llkBlacklistProcess = llkSplit(defaultBlacklistProcess); | 
 |     if (!llkSkipName("[khungtaskd]")) {  // ALWAYS ignore as special | 
 |         llkBlacklistProcess.emplace("[khungtaskd]"); | 
 |     } | 
 |     llkBlacklistParent = llkSplit(android::base::GetProperty( | 
 |         LLK_BLACKLIST_PARENT_PROPERTY, std::to_string(kernelPid) + "," + std::to_string(kthreaddPid) + | 
 |                                            "," LLK_BLACKLIST_PARENT_DEFAULT)); | 
 |     llkBlacklistUid = | 
 |         llkSplit(android::base::GetProperty(LLK_BLACKLIST_UID_PROPERTY, LLK_BLACKLIST_UID_DEFAULT)); | 
 |  | 
 |     // internal watchdog | 
 |     ::signal(SIGALRM, llkAlarmHandler); | 
 |  | 
 |     // kernel hung task configuration? Otherwise leave it as-is | 
 |     if (khtEnable) { | 
 |         // EUID must be AID_ROOT to write to /proc/sys/kernel/ nodes, there | 
 |         // are no capability overrides.  For security reasons we do not want | 
 |         // to run as AID_ROOT.  We may not be able to write them successfully, | 
 |         // we will try, but the least we can do is read the values back to | 
 |         // confirm expectations and report whether configured or not. | 
 |         auto configured = llkWriteStringToFileConfirm(std::to_string(khtTimeout.count()), | 
 |                                                       "/proc/sys/kernel/hung_task_timeout_secs"); | 
 |         if (configured) { | 
 |             llkWriteStringToFile("65535", "/proc/sys/kernel/hung_task_warnings"); | 
 |             llkWriteStringToFile("65535", "/proc/sys/kernel/hung_task_check_count"); | 
 |             configured = llkWriteStringToFileConfirm("1", "/proc/sys/kernel/hung_task_panic"); | 
 |         } | 
 |         if (configured) { | 
 |             LOG(INFO) << "[khungtaskd] configured"; | 
 |         } else { | 
 |             LOG(WARNING) << "[khungtaskd] not configurable"; | 
 |         } | 
 |     } | 
 |  | 
 |     bool logConfig = true; | 
 |     if (llkEnable) { | 
 |         if (llkMlockall && | 
 |             // MCL_ONFAULT pins pages as they fault instead of loading | 
 |             // everything immediately all at once. (Which would be bad, | 
 |             // because as of this writing, we have a lot of mapped pages we | 
 |             // never use.) Old kernels will see MCL_ONFAULT and fail with | 
 |             // EINVAL; we ignore this failure. | 
 |             // | 
 |             // N.B. read the man page for mlockall. MCL_CURRENT | MCL_ONFAULT | 
 |             // pins ⊆ MCL_CURRENT, converging to just MCL_CURRENT as we fault | 
 |             // in pages. | 
 |  | 
 |             // CAP_IPC_LOCK required | 
 |             mlockall(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT) && (errno != EINVAL)) { | 
 |             PLOG(WARNING) << "mlockall failed "; | 
 |         } | 
 |  | 
 |         if (threadname) { | 
 |             pthread_attr_t attr; | 
 |  | 
 |             if (!pthread_attr_init(&attr)) { | 
 |                 sched_param param; | 
 |  | 
 |                 memset(¶m, 0, sizeof(param)); | 
 |                 pthread_attr_setschedparam(&attr, ¶m); | 
 |                 pthread_attr_setschedpolicy(&attr, SCHED_BATCH); | 
 |                 if (!pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED)) { | 
 |                     pthread_t thread; | 
 |                     if (!pthread_create(&thread, &attr, llkThread, const_cast<char*>(threadname))) { | 
 |                         // wait a second for thread to start | 
 |                         for (auto retry = 50; retry && !llkRunning; --retry) { | 
 |                             ::usleep(20000); | 
 |                         } | 
 |                         logConfig = !llkRunning;  // printed in llkd context? | 
 |                     } else { | 
 |                         LOG(ERROR) << "failed to spawn llkd thread"; | 
 |                     } | 
 |                 } else { | 
 |                     LOG(ERROR) << "failed to detach llkd thread"; | 
 |                 } | 
 |                 pthread_attr_destroy(&attr); | 
 |             } else { | 
 |                 LOG(ERROR) << "failed to allocate attibutes for llkd thread"; | 
 |             } | 
 |         } | 
 |     } else { | 
 |         LOG(DEBUG) << "[khungtaskd] left unconfigured"; | 
 |     } | 
 |     if (logConfig) { | 
 |         llkLogConfig(); | 
 |     } | 
 |  | 
 |     return llkEnable; | 
 | } |