llkd: handle 'adbd shell setsid' to preserve adbd
A zombie setsid process occurs when adb shell setsid <command> is
issued, however llkd can only detect if it is a result of a kernel
livelock by killing the associated parent, which would be adbd;
resulting in the adb connection(s) being terminated. Will special
case this condition in order to preserve adbd for debugging purposes.
We parse <parent>&<child> in ro.llk.blacklist.parent as this
association, thus adbd&[setsid] covers this special case.
Ampersand was selected because it is never part of a process name,
however a setprop in the shell requires it to be escaped or quoted;
init rc file where this is normally specified does not have issue.
getComm() is effectively pure, so hold on to the return value for
sake of efficiency.
This also reverts commit 599958d1148c1137bee925f4cf08eda7f8050d98
which granted adbd blanket parent immunity from monitoring on
userdebug builds. The new logic is a more refined means of
preserving the live lock checking associated with adbd and allows
the operation to be performed on user builds.
POC: date ; adb shell setsid sleep 900 ; date
Positive for bug, reports less than 15 minutes, otherwise solved.
Test: llkd_unit_test
Bug: 120983740
Change-Id: I6442463a48499d925a3a074423a24a1622905559
diff --git a/llkd/libllkd.cpp b/llkd/libllkd.cpp
index 3a593ec..3c295b5 100644
--- a/llkd/libllkd.cpp
+++ b/llkd/libllkd.cpp
@@ -108,6 +108,9 @@
// list of parent pids, comm or cmdline names to skip. default:
// kernel pid (0), [kthreadd] (2), or ourselves, enforced and implied
std::unordered_set<std::string> llkBlacklistParent;
+// list of parent and target processes to skip. default:
+// adbd *and* [setsid]
+std::unordered_map<std::string, std::unordered_set<std::string>> llkBlacklistParentAndChild;
// list of uids, and uid names, to skip, default nothing
std::unordered_set<std::string> llkBlacklistUid;
#ifdef __PTRACE_ENABLED__
@@ -624,6 +627,19 @@
return ret;
}
+std::string llkFormat(
+ const std::unordered_map<std::string, std::unordered_set<std::string>>& blacklist,
+ bool leading_comma = false) {
+ std::string ret;
+ for (const auto& entry : blacklist) {
+ for (const auto& target : entry.second) {
+ if (leading_comma || !ret.empty()) ret += ",";
+ ret += entry.first + "&" + target;
+ }
+ }
+ return ret;
+}
+
// This function parses the properties as a list, incorporating the supplied
// default. A leading comma separator means preserve the defaults and add
// entries (with an optional leading + sign), or removes entries with a leading
@@ -691,6 +707,27 @@
return false;
}
+const std::unordered_set<std::string>& llkSkipName(
+ const std::string& name,
+ const std::unordered_map<std::string, std::unordered_set<std::string>>& blacklist) {
+ static const std::unordered_set<std::string> empty;
+ if (name.empty() || blacklist.empty()) return empty;
+ auto found = blacklist.find(name);
+ if (found == blacklist.end()) return empty;
+ return found->second;
+}
+
+bool llkSkipPproc(proc* pprocp, proc* procp,
+ const std::unordered_map<std::string, std::unordered_set<std::string>>&
+ blacklist = llkBlacklistParentAndChild) {
+ if (!pprocp || !procp || blacklist.empty()) return false;
+ if (llkSkipProc(procp, llkSkipName(std::to_string(pprocp->pid), blacklist))) return true;
+ if (llkSkipProc(procp, llkSkipName(pprocp->getComm(), blacklist))) return true;
+ if (llkSkipProc(procp, llkSkipName(pprocp->getCmdline(), blacklist))) return true;
+ return llkSkipProc(procp,
+ llkSkipName(android::base::Basename(pprocp->getCmdline()), blacklist));
+}
+
bool llkSkipPid(pid_t pid) {
return llkSkipName(std::to_string(pid), llkBlacklistProcess);
}
@@ -875,7 +912,8 @@
<< LLK_BLACKLIST_STACK_PROPERTY "=" << llkFormat(llkBlacklistStack) << "\n"
#endif
<< LLK_BLACKLIST_PROCESS_PROPERTY "=" << llkFormat(llkBlacklistProcess) << "\n"
- << LLK_BLACKLIST_PARENT_PROPERTY "=" << llkFormat(llkBlacklistParent) << "\n"
+ << LLK_BLACKLIST_PARENT_PROPERTY "=" << llkFormat(llkBlacklistParent)
+ << llkFormat(llkBlacklistParentAndChild, true) << "\n"
<< LLK_BLACKLIST_UID_PROPERTY "=" << llkFormat(llkBlacklistUid);
}
@@ -1050,7 +1088,8 @@
break;
}
- if (llkSkipName(procp->getComm())) {
+ auto process_comm = procp->getComm();
+ if (llkSkipName(process_comm)) {
continue;
}
if (llkSkipName(procp->getCmdline())) {
@@ -1065,6 +1104,7 @@
pprocp = llkTidAlloc(ppid, ppid, 0, "", 0, '?');
}
if (pprocp) {
+ if (llkSkipPproc(pprocp, procp)) break;
if (llkSkipProc(pprocp, llkBlacklistParent)) break;
} else {
if (llkSkipName(std::to_string(ppid), llkBlacklistParent)) break;
@@ -1084,7 +1124,7 @@
stuck = true;
} else if (procp->count != 0ms) {
LOG(VERBOSE) << state << ' ' << llkFormat(procp->count) << ' ' << ppid << "->"
- << pid << "->" << tid << ' ' << procp->getComm();
+ << pid << "->" << tid << ' ' << process_comm;
}
}
if (!stuck) continue;
@@ -1092,7 +1132,7 @@
if (procp->count >= llkStateTimeoutMs[(state == 'Z') ? llkStateZ : llkStateD]) {
if (procp->count != 0ms) {
LOG(VERBOSE) << state << ' ' << llkFormat(procp->count) << ' ' << ppid << "->"
- << pid << "->" << tid << ' ' << procp->getComm();
+ << pid << "->" << tid << ' ' << process_comm;
}
continue;
}
@@ -1120,7 +1160,7 @@
break;
}
LOG(WARNING) << "Z " << llkFormat(procp->count) << ' ' << ppid << "->"
- << pid << "->" << tid << ' ' << procp->getComm() << " [kill]";
+ << pid << "->" << tid << ' ' << process_comm << " [kill]";
if ((llkKillOneProcess(pprocp, procp) >= 0) ||
(llkKillOneProcess(ppid, procp) >= 0)) {
continue;
@@ -1137,7 +1177,7 @@
// kernel (worse).
default:
LOG(WARNING) << state << ' ' << llkFormat(procp->count) << ' ' << pid
- << "->" << tid << ' ' << procp->getComm() << " [kill]";
+ << "->" << tid << ' ' << process_comm << " [kill]";
if ((llkKillOneProcess(llkTidLookup(pid), procp) >= 0) ||
(llkKillOneProcess(pid, state, tid) >= 0) ||
(llkKillOneProcess(procp, procp) >= 0) ||
@@ -1150,7 +1190,7 @@
// We are here because we have confirmed kernel live-lock
const auto message = state + " "s + llkFormat(procp->count) + " " +
std::to_string(ppid) + "->" + std::to_string(pid) + "->" +
- std::to_string(tid) + " " + procp->getComm() + " [panic]";
+ std::to_string(tid) + " " + process_comm + " [panic]";
llkPanicKernel(dump, tid,
(state == 'Z') ? "zombie" : (state == 'D') ? "driver" : "sleeping",
message);
@@ -1274,6 +1314,26 @@
llkBlacklistParent = llkSplit(LLK_BLACKLIST_PARENT_PROPERTY,
std::to_string(kernelPid) + "," + std::to_string(kthreaddPid) +
"," LLK_BLACKLIST_PARENT_DEFAULT);
+ // derive llkBlacklistParentAndChild by moving entries with '&' from above
+ for (auto it = llkBlacklistParent.begin(); it != llkBlacklistParent.end();) {
+ auto pos = it->find('&');
+ if (pos == std::string::npos) {
+ ++it;
+ continue;
+ }
+ auto parent = it->substr(0, pos);
+ auto child = it->substr(pos + 1);
+ it = llkBlacklistParent.erase(it);
+
+ auto found = llkBlacklistParentAndChild.find(parent);
+ if (found == llkBlacklistParentAndChild.end()) {
+ llkBlacklistParentAndChild.emplace(std::make_pair(
+ std::move(parent), std::unordered_set<std::string>({std::move(child)})));
+ } else {
+ found->second.emplace(std::move(child));
+ }
+ }
+
llkBlacklistUid = llkSplit(LLK_BLACKLIST_UID_PROPERTY, LLK_BLACKLIST_UID_DEFAULT);
// internal watchdog