llkd: Add stack symbol checking
Feature outlined here is only available on userdebug or eng builds.
Blocked for security reasons because requires ptrace capabilities.
Add ro.llk.stack to list a set of symbols that should rarely happen
but if persistent in multiple checks, indicates a live lock condition.
At ro.llk.stack.timeout_ms the process is sent a kill, if it remains,
then panic the kernel.
There is no ABA detection in the paths, the condition for the
stack symbol being present instantaneously must be its rarity of
being caught. If a livelock occurs in the path of the symbol, then
it is possible more than one path could be stuck in the state, but
the best candidate symbols are found underneath a lock resulting in
only one process being the culprit, and the best aim. There may be
processes that induce a look of persistence, if so the symbol is not
a candidate for checking.
Add ro.llk.blacklist.process.stack to list process names we want
to skip checking. This configuration parameter is also used to
prevent sepolicy noise when trying to acquire stacks from non
ptrace'able services.
Test: gTest llkd_unit_tests
Bug: 33808187
Bug: 111910505
Bug: 80502612
Change-Id: Ie71221e371b189bbdda2a1155d47826997842dcc
diff --git a/llkd/libllkd.cpp b/llkd/libllkd.cpp
index bb55d1f..58c2ba8 100644
--- a/llkd/libllkd.cpp
+++ b/llkd/libllkd.cpp
@@ -73,7 +73,14 @@
bool llkMlockall = LLK_MLOCKALL_DEFAULT; // run mlocked
bool llkTestWithKill = LLK_KILLTEST_DEFAULT; // issue test kills
milliseconds llkTimeoutMs = LLK_TIMEOUT_MS_DEFAULT; // default timeout
-enum { llkStateD, llkStateZ, llkNumStates }; // state indexes
+enum { // enum of state indexes
+ llkStateD, // Persistent 'D' state
+ llkStateZ, // Persistent 'Z' state
+#ifdef __PTRACE_ENABLED__ // Extra privileged states
+ llkStateStack, // stack signature
+#endif // End of extra privilege
+ llkNumStates, // Maxumum number of states
+}; // state indexes
milliseconds llkStateTimeoutMs[llkNumStates]; // timeout override for each detection state
milliseconds llkCheckMs; // checking interval to inspect any
// persistent live-locked states
@@ -83,6 +90,10 @@
// Provides a wide angle of margin b/c khtTimeout is also its granularity.
seconds khtTimeout = duration_cast<seconds>(llkTimeoutMs * (1 + LLK_CHECKS_PER_TIMEOUT_DEFAULT) /
LLK_CHECKS_PER_TIMEOUT_DEFAULT);
+#ifdef __PTRACE_ENABLED__
+// list of stack symbols to search for persistence.
+std::unordered_set<std::string> llkCheckStackSymbols;
+#endif
// Blacklist variables, initialized with comma separated lists of high false
// positive and/or dangerous references, e.g. without self restart, for pid,
@@ -97,6 +108,11 @@
std::unordered_set<std::string> llkBlacklistParent;
// list of uids, and uid names, to skip, default nothing
std::unordered_set<std::string> llkBlacklistUid;
+#ifdef __PTRACE_ENABLED__
+// list of names to skip stack checking. "init", "lmkd", "llkd", "keystore" or
+// "logd" (if not userdebug).
+std::unordered_set<std::string> llkBlacklistStack;
+#endif
class dir {
public:
@@ -263,6 +279,9 @@
// forward scheduling progress.
milliseconds update; // llkUpdate millisecond signature of last.
milliseconds count; // duration in state.
+#ifdef __PTRACE_ENABLED__ // Privileged state checking
+ milliseconds count_stack; // duration where stack is stagnant.
+#endif // End privilege
pid_t pid; // /proc/<pid> before iterating through
// /proc/<pid>/task/<tid> for threads.
pid_t ppid; // /proc/<tid>/stat field 4 parent pid.
@@ -272,6 +291,9 @@
std::string cmdline; // cached /cmdline content
char state; // /proc/<tid>/stat field 3: Z or D
// (others we do not monitor: S, R, T or ?)
+#ifdef __PTRACE_ENABLED__ // Privileged state checking
+ char stack; // index in llkCheckStackSymbols for matches
+#endif // and with maximum index PROP_VALUE_MAX/2.
char comm[TASK_COMM_LEN + 3]; // space for adding '[' and ']'
bool exeMissingValid; // exeMissing has been cached
bool cmdlineValid; // cmdline has been cached
@@ -286,11 +308,17 @@
nrSwitches(0),
update(llkUpdate),
count(0ms),
+#ifdef __PTRACE_ENABLED__
+ count_stack(0ms),
+#endif
pid(pid),
ppid(ppid),
uid(-1),
time(time),
state(state),
+#ifdef __PTRACE_ENABLED__
+ stack(-1),
+#endif
exeMissingValid(false),
cmdlineValid(false),
updated(true),
@@ -343,6 +371,10 @@
void reset(void) { // reset cache, if we detected pid rollover
uid = -1;
state = '?';
+#ifdef __PTRACE_ENABLED__
+ count_stack = 0ms;
+ stack = -1;
+#endif
cmdline = "";
comm[0] = '\0';
exeMissingValid = false;
@@ -667,6 +699,48 @@
return ret;
}
+#ifdef __PTRACE_ENABLED__
+bool llkCheckStack(proc* procp, const std::string& piddir) {
+ if (llkCheckStackSymbols.empty()) return false;
+ if (procp->state == 'Z') { // No brains for Zombies
+ procp->stack = -1;
+ procp->count_stack = 0ms;
+ return false;
+ }
+
+ // Don't check process that are known to block ptrace, save sepolicy noise.
+ if (llkSkipName(std::to_string(procp->pid), llkBlacklistStack)) return false;
+ if (llkSkipName(procp->getComm(), llkBlacklistStack)) return false;
+ if (llkSkipName(procp->getCmdline(), llkBlacklistStack)) return false;
+
+ auto kernel_stack = ReadFile(piddir + "/stack");
+ if (kernel_stack.empty()) {
+ LOG(INFO) << piddir << "/stack empty comm=" << procp->getComm()
+ << " cmdline=" << procp->getCmdline();
+ return false;
+ }
+ // A scheduling incident that should not reset count_stack
+ if (kernel_stack.find(" cpu_worker_pools+0x") != std::string::npos) return false;
+ char idx = -1;
+ char match = -1;
+ for (const auto& stack : llkCheckStackSymbols) {
+ if (++idx < 0) break;
+ if (kernel_stack.find(" "s + stack + "+0x") != std::string::npos) {
+ match = idx;
+ break;
+ }
+ }
+ if (procp->stack != match) {
+ procp->stack = match;
+ procp->count_stack = 0ms;
+ return false;
+ }
+ if (match == char(-1)) return false;
+ procp->count_stack += llkCycle;
+ return procp->count_stack >= llkStateTimeoutMs[llkStateStack];
+}
+#endif
+
// Primary ABA mitigation watching last time schedule activity happened
void llkCheckSchedUpdate(proc* procp, const std::string& piddir) {
// Audit finds /proc/<tid>/sched is just over 1K, and
@@ -731,7 +805,15 @@
<< LLK_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkTimeoutMs) << "\n"
<< LLK_D_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkStateTimeoutMs[llkStateD]) << "\n"
<< LLK_Z_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkStateTimeoutMs[llkStateZ]) << "\n"
+#ifdef __PTRACE_ENABLED__
+ << LLK_STACK_TIMEOUT_MS_PROPERTY "=" << llkFormat(llkStateTimeoutMs[llkStateStack])
+ << "\n"
+#endif
<< LLK_CHECK_MS_PROPERTY "=" << llkFormat(llkCheckMs) << "\n"
+#ifdef __PTRACE_ENABLED__
+ << LLK_CHECK_STACK_PROPERTY "=" << llkFormat(llkCheckStackSymbols) << "\n"
+ << LLK_BLACKLIST_STACK_PROPERTY "=" << llkFormat(llkBlacklistStack) << "\n"
+#endif
<< LLK_BLACKLIST_PROCESS_PROPERTY "=" << llkFormat(llkBlacklistProcess) << "\n"
<< LLK_BLACKLIST_PARENT_PROPERTY "=" << llkFormat(llkBlacklistParent) << "\n"
<< LLK_BLACKLIST_UID_PROPERTY "=" << llkFormat(llkBlacklistUid);
@@ -892,9 +974,14 @@
if (pid == myPid) {
break;
}
- if (!llkIsMonitorState(state)) {
+#ifdef __PTRACE_ENABLED__
+ // if no stack monitoring, we can quickly exit here
+ if (!llkIsMonitorState(state) && llkCheckStackSymbols.empty()) {
continue;
}
+#else
+ if (!llkIsMonitorState(state)) continue;
+#endif
if ((tid == myTid) || llkSkipPid(tid)) {
continue;
}
@@ -925,12 +1012,26 @@
// ABA mitigation watching last time schedule activity happened
llkCheckSchedUpdate(procp, piddir);
- // Can only fall through to here if registered D or Z state !!!
- if (procp->count < llkStateTimeoutMs[(state == 'Z') ? llkStateZ : llkStateD]) {
- LOG(VERBOSE) << state << ' ' << llkFormat(procp->count) << ' ' << ppid << "->"
- << pid << "->" << tid << ' ' << procp->getComm();
+#ifdef __PTRACE_ENABLED__
+ auto stuck = llkCheckStack(procp, piddir);
+ if (llkIsMonitorState(state)) {
+ if (procp->count >= llkStateTimeoutMs[(state == 'Z') ? llkStateZ : llkStateD]) {
+ stuck = true;
+ } else if (procp->count != 0ms) {
+ LOG(VERBOSE) << state << ' ' << llkFormat(procp->count) << ' ' << ppid << "->"
+ << pid << "->" << tid << ' ' << procp->getComm();
+ }
+ }
+ if (!stuck) continue;
+#else
+ if (procp->count >= llkStateTimeoutMs[(state == 'Z') ? llkStateZ : llkStateD]) {
+ if (procp->count != 0ms) {
+ LOG(VERBOSE) << state << ' ' << llkFormat(procp->count) << ' ' << ppid << "->"
+ << pid << "->" << tid << ' ' << procp->getComm();
+ }
continue;
}
+#endif
// We have to kill it to determine difference between live lock
// and persistent state blocked on a resource. Is there something
@@ -969,12 +1070,13 @@
// not working is we kill a process that likes to
// stay in 'D' state, instead of panicing the
// kernel (worse).
- LOG(WARNING) << "D " << llkFormat(procp->count) << ' ' << pid << "->" << tid
- << ' ' << procp->getComm() << " [kill]";
+ default:
+ LOG(WARNING) << state << ' ' << llkFormat(procp->count) << ' ' << pid
+ << "->" << tid << ' ' << procp->getComm() << " [kill]";
if ((llkKillOneProcess(llkTidLookup(pid), procp) >= 0) ||
- (llkKillOneProcess(pid, 'D', tid) >= 0) ||
+ (llkKillOneProcess(pid, state, tid) >= 0) ||
(llkKillOneProcess(procp, procp) >= 0) ||
- (llkKillOneProcess(tid, 'D', tid) >= 0)) {
+ (llkKillOneProcess(tid, state, tid) >= 0)) {
continue;
}
break;
@@ -983,7 +1085,8 @@
// We are here because we have confirmed kernel live-lock
LOG(ERROR) << state << ' ' << llkFormat(procp->count) << ' ' << ppid << "->" << pid
<< "->" << tid << ' ' << procp->getComm() << " [panic]";
- llkPanicKernel(true, tid, (state == 'Z') ? "zombie" : "driver");
+ llkPanicKernel(true, tid,
+ (state == 'Z') ? "zombie" : (state == 'D') ? "driver" : "sleeping");
}
LOG(VERBOSE) << "+closedir()";
}
@@ -1041,8 +1144,9 @@
}
bool llkInit(const char* threadname) {
+ auto debuggable = android::base::GetBoolProperty("ro.debuggable", false);
llkLowRam = android::base::GetBoolProperty("ro.config.low_ram", false);
- if (!LLK_ENABLE_DEFAULT && android::base::GetBoolProperty("ro.debuggable", false)) {
+ if (!LLK_ENABLE_DEFAULT && debuggable) {
llkEnable = android::base::GetProperty(LLK_ENABLE_PROPERTY, "eng") == "eng";
khtEnable = android::base::GetProperty(KHT_ENABLE_PROPERTY, "eng") == "eng";
}
@@ -1069,8 +1173,21 @@
llkValidate(); // validate llkTimeoutMs, llkCheckMs and llkCycle
llkStateTimeoutMs[llkStateD] = GetUintProperty(LLK_D_TIMEOUT_MS_PROPERTY, llkTimeoutMs);
llkStateTimeoutMs[llkStateZ] = GetUintProperty(LLK_Z_TIMEOUT_MS_PROPERTY, llkTimeoutMs);
+#ifdef __PTRACE_ENABLED__
+ llkStateTimeoutMs[llkStateStack] = GetUintProperty(LLK_STACK_TIMEOUT_MS_PROPERTY, llkTimeoutMs);
+#endif
llkCheckMs = GetUintProperty(LLK_CHECK_MS_PROPERTY, llkCheckMs);
llkValidate(); // validate all (effectively minus llkTimeoutMs)
+#ifdef __PTRACE_ENABLED__
+ if (debuggable) {
+ llkCheckStackSymbols = llkSplit(
+ android::base::GetProperty(LLK_CHECK_STACK_PROPERTY, LLK_CHECK_STACK_DEFAULT));
+ }
+ std::string defaultBlacklistStack(LLK_BLACKLIST_STACK_DEFAULT);
+ if (!debuggable) defaultBlacklistStack += ",logd,/system/bin/logd";
+ llkBlacklistStack = llkSplit(
+ android::base::GetProperty(LLK_BLACKLIST_STACK_PROPERTY, defaultBlacklistStack));
+#endif
std::string defaultBlacklistProcess(
std::to_string(kernelPid) + "," + std::to_string(initPid) + "," +
std::to_string(kthreaddPid) + "," + std::to_string(::getpid()) + "," +