libprocessgroup: Prevent error spam when tests disable all cpus in a cpuset
UserLifecycleTests test disables all Little cores in the course of the
test, which causes attempts to add a process into /dev/cpuset/restricted
cpuset cgroup to fail with ENOSPC error code, indicating that a process
is joining a cpuset cgroup with no online cpus. Current libprocessgroup
implementation will log an error on each such occurrence, which spams
the logs and makes it hard to analyze test results. Because this
situation does not happen in production environment (we do not offline
cpus), we can prevent flooding the logs by identifying this case,
logging an appropriate error one time and ignore all later similar errors.
Bug: 158766131
Test: adb shell "echo 0 > /sys/devices/system/cpu/cpu[0-3]/online"
Test: start some apps, observe libprocessgroup errors in the logcat
Signed-off-by: Suren Baghdasaryan <surenb@google.com>
Change-Id: Ia91d8839d86787569c255481bde077be51c43d93
diff --git a/libprocessgroup/task_profiles.cpp b/libprocessgroup/task_profiles.cpp
index cf74e65..e935f99 100644
--- a/libprocessgroup/task_profiles.cpp
+++ b/libprocessgroup/task_profiles.cpp
@@ -194,22 +194,39 @@
fd_.reset(FDS_NOT_CACHED);
}
-bool SetCgroupAction::AddTidToCgroup(int tid, int fd) {
+bool SetCgroupAction::AddTidToCgroup(int tid, int fd, const char* controller_name) {
if (tid <= 0) {
return true;
}
std::string value = std::to_string(tid);
- if (TEMP_FAILURE_RETRY(write(fd, value.c_str(), value.length())) < 0) {
- // If the thread is in the process of exiting, don't flag an error
- if (errno != ESRCH) {
- PLOG(ERROR) << "AddTidToCgroup failed to write '" << value << "'; fd=" << fd;
- return false;
- }
+ if (TEMP_FAILURE_RETRY(write(fd, value.c_str(), value.length())) == value.length()) {
+ return true;
}
- return true;
+ // If the thread is in the process of exiting, don't flag an error
+ if (errno == ESRCH) {
+ return true;
+ }
+
+ // ENOSPC is returned when cpuset cgroup that we are joining has no online cpus
+ if (errno == ENOSPC && !strcmp(controller_name, "cpuset")) {
+ // This is an abnormal case happening only in testing, so report it only once
+ static bool empty_cpuset_reported = false;
+
+ if (empty_cpuset_reported) {
+ return true;
+ }
+
+ LOG(ERROR) << "Failed to add task '" << value
+ << "' into cpuset because all cpus in that cpuset are offline";
+ empty_cpuset_reported = true;
+ } else {
+ PLOG(ERROR) << "AddTidToCgroup failed to write '" << value << "'; fd=" << fd;
+ }
+
+ return false;
}
bool SetCgroupAction::ExecuteForProcess(uid_t uid, pid_t pid) const {
@@ -219,7 +236,7 @@
PLOG(WARNING) << "Failed to open " << procs_path;
return false;
}
- if (!AddTidToCgroup(pid, tmp_fd)) {
+ if (!AddTidToCgroup(pid, tmp_fd, controller()->name())) {
LOG(ERROR) << "Failed to add task into cgroup";
return false;
}
@@ -231,7 +248,7 @@
std::lock_guard<std::mutex> lock(fd_mutex_);
if (IsFdValid()) {
// fd is cached, reuse it
- if (!AddTidToCgroup(tid, fd_)) {
+ if (!AddTidToCgroup(tid, fd_, controller()->name())) {
LOG(ERROR) << "Failed to add task into cgroup";
return false;
}
@@ -256,7 +273,7 @@
PLOG(WARNING) << "Failed to open " << tasks_path << ": " << strerror(errno);
return false;
}
- if (!AddTidToCgroup(tid, tmp_fd)) {
+ if (!AddTidToCgroup(tid, tmp_fd, controller()->name())) {
LOG(ERROR) << "Failed to add task into cgroup";
return false;
}