Reapply "libprocessgroup: Poll on cgroup.events"
This reverts commit a72332f9539f73043fe512346535cf595890d5a8.
This change was originally reverted due to a bug in the child commit
which caused process group kills not to occur when using cgroup.kill.
Now that has been fixed, bring back this change with the fixed child.
Bug: 301871933
Change-Id: Ia6c74d9b67a8c88aec4812ac4655646934e0d189
diff --git a/libprocessgroup/processgroup.cpp b/libprocessgroup/processgroup.cpp
index f594f7f..7e27d75 100644
--- a/libprocessgroup/processgroup.cpp
+++ b/libprocessgroup/processgroup.cpp
@@ -22,6 +22,7 @@
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
+#include <poll.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
@@ -30,6 +31,7 @@
#include <unistd.h>
#include <chrono>
+#include <cstring>
#include <map>
#include <memory>
#include <mutex>
@@ -53,7 +55,8 @@
using namespace std::chrono_literals;
-#define PROCESSGROUP_CGROUP_PROCS_FILE "/cgroup.procs"
+#define PROCESSGROUP_CGROUP_PROCS_FILE "cgroup.procs"
+#define PROCESSGROUP_CGROUP_EVENTS_FILE "cgroup.events"
bool CgroupsAvailable() {
static bool cgroups_available = access("/proc/cgroups", F_OK) == 0;
@@ -213,30 +216,21 @@
return StringPrintf("%s/uid_%u/pid_%d", cgroup, uid, pid);
}
-static int RemoveCgroup(const char* cgroup, uid_t uid, int pid, unsigned int retries) {
- int ret = 0;
- auto uid_pid_path = ConvertUidPidToPath(cgroup, uid, pid);
-
- while (retries--) {
- ret = rmdir(uid_pid_path.c_str());
- // If we get an error 2 'No such file or directory' , that means the
- // cgroup is already removed, treat it as success and return 0 for
- // idempotency.
- if (ret < 0 && errno == ENOENT) {
- ret = 0;
- }
- if (!ret || errno != EBUSY || !retries) break;
- std::this_thread::sleep_for(5ms);
- }
+static int RemoveCgroup(const char* cgroup, uid_t uid, int pid) {
+ auto path = ConvertUidPidToPath(cgroup, uid, pid);
+ int ret = TEMP_FAILURE_RETRY(rmdir(path.c_str()));
if (!ret && uid >= AID_ISOLATED_START && uid <= AID_ISOLATED_END) {
// Isolated UIDs are unlikely to be reused soon after removal,
// so free up the kernel resources for the UID level cgroup.
- const auto uid_path = ConvertUidToPath(cgroup, uid);
- ret = rmdir(uid_path.c_str());
- if (ret < 0 && errno == ENOENT) {
- ret = 0;
- }
+ path = ConvertUidToPath(cgroup, uid);
+ ret = TEMP_FAILURE_RETRY(rmdir(path.c_str()));
+ }
+
+ if (ret < 0 && errno == ENOENT) {
+ // This function is idempoetent, but still warn here.
+ LOG(WARNING) << "RemoveCgroup: " << path << " does not exist.";
+ ret = 0;
}
return ret;
@@ -360,38 +354,33 @@
return false;
}
-// Returns number of processes killed on success
-// Returns 0 if there are no processes in the process cgroup left to kill
-// Returns -1 on error
-static int DoKillProcessGroupOnce(const char* cgroup, uid_t uid, int initialPid, int signal) {
- // We separate all of the pids in the cgroup into those pids that are also the leaders of
- // process groups (stored in the pgids set) and those that are not (stored in the pids set).
- std::set<pid_t> pgids;
- pgids.emplace(initialPid);
- std::set<pid_t> pids;
- int processes = 0;
-
- std::unique_ptr<FILE, decltype(&fclose)> fd(nullptr, fclose);
+bool sendSignalToProcessGroup(uid_t uid, int initialPid, int signal) {
+ std::set<pid_t> pgids, pids;
if (CgroupsAvailable()) {
- auto path = ConvertUidPidToPath(cgroup, uid, initialPid) + PROCESSGROUP_CGROUP_PROCS_FILE;
- fd.reset(fopen(path.c_str(), "re"));
- if (!fd) {
- if (errno == ENOENT) {
- // This happens when the process is already dead or if, as the result of a bug, it
- // has been migrated to another cgroup. An example of a bug that can cause migration
- // to another cgroup is using the JoinCgroup action with a cgroup controller that
- // has been activated in the v2 cgroup hierarchy.
- goto kill;
- }
- PLOG(WARNING) << __func__ << " failed to open process cgroup uid " << uid << " pid "
- << initialPid;
- return -1;
+ std::string hierarchy_root_path, cgroup_v2_path;
+ CgroupGetControllerPath(CGROUPV2_HIERARCHY_NAME, &hierarchy_root_path);
+ cgroup_v2_path = ConvertUidPidToPath(hierarchy_root_path.c_str(), uid, initialPid);
+
+ LOG(VERBOSE) << "Using " << PROCESSGROUP_CGROUP_PROCS_FILE << " to signal (" << signal
+ << ") " << cgroup_v2_path;
+
+ // We separate all of the pids in the cgroup into those pids that are also the leaders of
+ // process groups (stored in the pgids set) and those that are not (stored in the pids set).
+ const auto procsfilepath = cgroup_v2_path + '/' + PROCESSGROUP_CGROUP_PROCS_FILE;
+ std::unique_ptr<FILE, decltype(&fclose)> fp(fopen(procsfilepath.c_str(), "re"), fclose);
+ if (!fp) {
+ // This should only happen if the cgroup has already been removed with a successful call
+ // to killProcessGroup. Callers should only retry sendSignalToProcessGroup or
+ // killProcessGroup calls if they fail without ENOENT.
+ PLOG(ERROR) << "Failed to open " << procsfilepath;
+ kill(-initialPid, signal);
+ return false;
}
+
pid_t pid;
bool file_is_empty = true;
- while (fscanf(fd.get(), "%d\n", &pid) == 1 && pid >= 0) {
- processes++;
+ while (fscanf(fp.get(), "%d\n", &pid) == 1 && pid >= 0) {
file_is_empty = false;
if (pid == 0) {
// Should never happen... but if it does, trying to kill this
@@ -421,7 +410,8 @@
}
}
-kill:
+ pgids.emplace(initialPid);
+
// Kill all process groups.
for (const auto pgid : pgids) {
LOG(VERBOSE) << "Killing process group " << -pgid << " in uid " << uid
@@ -442,101 +432,174 @@
}
}
- return (!fd || feof(fd.get())) ? processes : -1;
+ return true;
}
-static int KillProcessGroup(uid_t uid, int initialPid, int signal, int retries) {
+template <typename T>
+static std::chrono::milliseconds toMillisec(T&& duration) {
+ return std::chrono::duration_cast<std::chrono::milliseconds>(duration);
+}
+
+enum class populated_status
+{
+ populated,
+ not_populated,
+ error
+};
+
+static populated_status cgroupIsPopulated(int events_fd) {
+ const std::string POPULATED_KEY("populated ");
+ const std::string::size_type MAX_EVENTS_FILE_SIZE = 32;
+
+ std::string buf;
+ buf.resize(MAX_EVENTS_FILE_SIZE);
+ ssize_t len = TEMP_FAILURE_RETRY(pread(events_fd, buf.data(), buf.size(), 0));
+ if (len == -1) {
+ PLOG(ERROR) << "Could not read cgroup.events: ";
+ // Potentially ENODEV if the cgroup has been removed since we opened this file, but that
+ // shouldn't have happened yet.
+ return populated_status::error;
+ }
+
+ if (len == 0) {
+ LOG(ERROR) << "cgroup.events EOF";
+ return populated_status::error;
+ }
+
+ buf.resize(len);
+
+ const std::string::size_type pos = buf.find(POPULATED_KEY);
+ if (pos == std::string::npos) {
+ LOG(ERROR) << "Could not find populated key in cgroup.events";
+ return populated_status::error;
+ }
+
+ if (pos + POPULATED_KEY.size() + 1 > len) {
+ LOG(ERROR) << "Partial read of cgroup.events";
+ return populated_status::error;
+ }
+
+ return buf[pos + POPULATED_KEY.size()] == '1' ?
+ populated_status::populated : populated_status::not_populated;
+}
+
+// The default timeout of 2200ms comes from the default number of retries in a previous
+// implementation of this function. The default retry value was 40 for killing and 400 for cgroup
+// removal with 5ms sleeps between each retry.
+static int KillProcessGroup(
+ uid_t uid, int initialPid, int signal, bool once = false,
+ std::chrono::steady_clock::time_point until = std::chrono::steady_clock::now() + 2200ms) {
CHECK_GE(uid, 0);
CHECK_GT(initialPid, 0);
+ // Always attempt to send a kill signal to at least the initialPid, at least once, regardless of
+ // whether its cgroup exists or not. This should only be necessary if a bug results in the
+ // migration of the targeted process out of its cgroup, which we will also attempt to kill.
+ const bool signal_ret = sendSignalToProcessGroup(uid, initialPid, signal);
+
+ if (!CgroupsAvailable() || !signal_ret) return signal_ret ? 0 : -1;
+
std::string hierarchy_root_path;
- if (CgroupsAvailable()) {
- CgroupGetControllerPath(CGROUPV2_HIERARCHY_NAME, &hierarchy_root_path);
- }
- const char* cgroup = hierarchy_root_path.c_str();
+ CgroupGetControllerPath(CGROUPV2_HIERARCHY_NAME, &hierarchy_root_path);
- std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now();
+ const std::string cgroup_v2_path =
+ ConvertUidPidToPath(hierarchy_root_path.c_str(), uid, initialPid);
- int retry = retries;
- int processes;
- while ((processes = DoKillProcessGroupOnce(cgroup, uid, initialPid, signal)) > 0) {
- LOG(VERBOSE) << "Killed " << processes << " processes for processgroup " << initialPid;
- if (!CgroupsAvailable()) {
- // makes no sense to retry, because there are no cgroup_procs file
- processes = 0; // no remaining processes
- break;
- }
- if (retry > 0) {
- std::this_thread::sleep_for(5ms);
- --retry;
- } else {
- break;
- }
- }
-
- if (processes < 0) {
- PLOG(ERROR) << "Error encountered killing process cgroup uid " << uid << " pid "
- << initialPid;
+ const std::string eventsfile = cgroup_v2_path + '/' + PROCESSGROUP_CGROUP_EVENTS_FILE;
+ android::base::unique_fd events_fd(open(eventsfile.c_str(), O_RDONLY));
+ if (events_fd.get() == -1) {
+ PLOG(WARNING) << "Error opening " << eventsfile << " for KillProcessGroup";
return -1;
}
- std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
- auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
+ struct pollfd fds = {
+ .fd = events_fd,
+ .events = POLLPRI,
+ };
- // We only calculate the number of 'processes' when killing the processes.
- // In the retries == 0 case, we only kill the processes once and therefore
- // will not have waited then recalculated how many processes are remaining
- // after the first signals have been sent.
- // Logging anything regarding the number of 'processes' here does not make sense.
+ const std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now();
- if (processes == 0) {
- if (retries > 0) {
- LOG(INFO) << "Successfully killed process cgroup uid " << uid << " pid " << initialPid
- << " in " << static_cast<int>(ms) << "ms";
+ // The primary reason to loop here is to capture any new forks or migrations that could occur
+ // after we send signals to the original set of processes, but before all of those processes
+ // exit and the cgroup becomes unpopulated, or before we remove the cgroup. We try hard to
+ // ensure this completes successfully to avoid permanent memory leaks, but we still place a
+ // large default upper bound on the amount of time we spend in this loop. The amount of CPU
+ // contention, and the amount of work that needs to be done in do_exit for each process
+ // determines how long this will take.
+ int ret;
+ do {
+ populated_status populated;
+ while ((populated = cgroupIsPopulated(events_fd.get())) == populated_status::populated &&
+ std::chrono::steady_clock::now() < until) {
+
+ sendSignalToProcessGroup(uid, initialPid, signal);
+ if (once) {
+ populated = cgroupIsPopulated(events_fd.get());
+ break;
+ }
+
+ const std::chrono::steady_clock::time_point poll_start =
+ std::chrono::steady_clock::now();
+
+ if (poll_start < until)
+ ret = TEMP_FAILURE_RETRY(poll(&fds, 1, toMillisec(until - poll_start).count()));
+
+ if (ret == -1) {
+ // Fallback to 5ms sleeps if poll fails
+ PLOG(ERROR) << "Poll on " << eventsfile << "failed";
+ const std::chrono::steady_clock::time_point now = std::chrono::steady_clock::now();
+ if (now < until)
+ std::this_thread::sleep_for(std::min(5ms, toMillisec(until - now)));
+ }
+
+ LOG(VERBOSE) << "Waited "
+ << toMillisec(std::chrono::steady_clock::now() - poll_start).count()
+ << " ms for " << eventsfile << " poll";
}
- if (!CgroupsAvailable()) {
- // nothing to do here, if cgroups isn't available
- return 0;
+ const std::chrono::milliseconds kill_duration =
+ toMillisec(std::chrono::steady_clock::now() - start);
+
+ if (populated == populated_status::populated) {
+ LOG(WARNING) << "Still waiting on process(es) to exit for cgroup " << cgroup_v2_path
+ << " after " << kill_duration.count() << " ms";
+ // We'll still try the cgroup removal below which we expect to log an error.
+ } else if (populated == populated_status::not_populated) {
+ LOG(VERBOSE) << "Killed all processes under cgroup " << cgroup_v2_path
+ << " after " << kill_duration.count() << " ms";
}
- // 400 retries correspond to 2 secs max timeout
- int err = RemoveCgroup(cgroup, uid, initialPid, 400);
+ ret = RemoveCgroup(hierarchy_root_path.c_str(), uid, initialPid);
+ if (ret)
+ PLOG(ERROR) << "Unable to remove cgroup " << cgroup_v2_path;
+ else
+ LOG(INFO) << "Removed cgroup " << cgroup_v2_path;
if (isMemoryCgroupSupported() && UsePerAppMemcg()) {
+ // This per-application memcg v1 case should eventually be removed after migration to
+ // memcg v2.
std::string memcg_apps_path;
if (CgroupGetMemcgAppsPath(&memcg_apps_path) &&
- RemoveCgroup(memcg_apps_path.c_str(), uid, initialPid, 400) < 0) {
- return -1;
+ (ret = RemoveCgroup(memcg_apps_path.c_str(), uid, initialPid)) < 0) {
+ const auto memcg_v1_cgroup_path =
+ ConvertUidPidToPath(memcg_apps_path.c_str(), uid, initialPid);
+ PLOG(ERROR) << "Unable to remove memcg v1 cgroup " << memcg_v1_cgroup_path;
}
}
- return err;
- } else {
- if (retries > 0) {
- LOG(ERROR) << "Failed to kill process cgroup uid " << uid << " pid " << initialPid
- << " in " << static_cast<int>(ms) << "ms, " << processes
- << " processes remain";
- }
- return -1;
- }
+ if (once) break;
+ if (std::chrono::steady_clock::now() >= until) break;
+ } while (ret && errno == EBUSY);
+
+ return ret;
}
int killProcessGroup(uid_t uid, int initialPid, int signal) {
- return KillProcessGroup(uid, initialPid, signal, 40 /*retries*/);
+ return KillProcessGroup(uid, initialPid, signal);
}
int killProcessGroupOnce(uid_t uid, int initialPid, int signal) {
- return KillProcessGroup(uid, initialPid, signal, 0 /*retries*/);
-}
-
-int sendSignalToProcessGroup(uid_t uid, int initialPid, int signal) {
- std::string hierarchy_root_path;
- if (CgroupsAvailable()) {
- CgroupGetControllerPath(CGROUPV2_HIERARCHY_NAME, &hierarchy_root_path);
- }
- const char* cgroup = hierarchy_root_path.c_str();
- return DoKillProcessGroupOnce(cgroup, uid, initialPid, signal);
+ return KillProcessGroup(uid, initialPid, signal, true);
}
static int createProcessGroupInternal(uid_t uid, int initialPid, std::string cgroup,
@@ -576,7 +639,7 @@
return -errno;
}
- auto uid_pid_procs_file = uid_pid_path + PROCESSGROUP_CGROUP_PROCS_FILE;
+ auto uid_pid_procs_file = uid_pid_path + '/' + PROCESSGROUP_CGROUP_PROCS_FILE;
if (!WriteStringToFile(std::to_string(initialPid), uid_pid_procs_file)) {
ret = -errno;