userspace reboot: stop post-data services and wait for them to be killed
* Refactored code around stopping services a little bit to reuse it
between full reboot and userspace reboot.
* Add a scope_guard to fallback to full reboot in case userspace reboot
fails.
* In case of userspace reboot init will also wait for services to be
terminated/killed and log the ones that didn't react to
SIGTERM/SIGKILL in time.
* If some of the services didn't react to SIGKILL, fail userspace reboot.
Test: adb reboot userspace
Bug: 135984674
Change-Id: I820c7bc406169333b0f929f0eea028d8384eb2ac
diff --git a/init/sigchld_handler.cpp b/init/sigchld_handler.cpp
index 984235d..9b2c7d9 100644
--- a/init/sigchld_handler.cpp
+++ b/init/sigchld_handler.cpp
@@ -28,28 +28,31 @@
#include <android-base/scopeguard.h>
#include <android-base/stringprintf.h>
+#include <thread>
+
#include "init.h"
#include "service.h"
#include "service_list.h"
-using android::base::StringPrintf;
using android::base::boot_clock;
using android::base::make_scope_guard;
+using android::base::StringPrintf;
+using android::base::Timer;
namespace android {
namespace init {
-static bool ReapOneProcess() {
+static pid_t ReapOneProcess() {
siginfo_t siginfo = {};
// This returns a zombie pid or informs us that there are no zombies left to be reaped.
// It does NOT reap the pid; that is done below.
if (TEMP_FAILURE_RETRY(waitid(P_ALL, 0, &siginfo, WEXITED | WNOHANG | WNOWAIT)) != 0) {
PLOG(ERROR) << "waitid failed";
- return false;
+ return 0;
}
auto pid = siginfo.si_pid;
- if (pid == 0) return false;
+ if (pid == 0) return 0;
// At this point we know we have a zombie pid, so we use this scopeguard to reap the pid
// whenever the function returns from this point forward.
@@ -92,7 +95,7 @@
LOG(INFO) << name << " received signal " << siginfo.si_status << wait_string;
}
- if (!service) return true;
+ if (!service) return pid;
service->Reap(siginfo);
@@ -100,13 +103,33 @@
ServiceList::GetInstance().RemoveService(*service);
}
- return true;
+ return pid;
}
void ReapAnyOutstandingChildren() {
- while (ReapOneProcess()) {
+ while (ReapOneProcess() != 0) {
}
}
+void WaitToBeReaped(const std::vector<pid_t>& pids, std::chrono::milliseconds timeout) {
+ Timer t;
+ std::vector<pid_t> alive_pids(pids.begin(), pids.end());
+ while (!alive_pids.empty() && t.duration() < timeout) {
+ pid_t pid;
+ while ((pid = ReapOneProcess()) != 0) {
+ auto it = std::find(alive_pids.begin(), alive_pids.end(), pid);
+ if (it != alive_pids.end()) {
+ alive_pids.erase(it);
+ }
+ }
+ if (alive_pids.empty()) {
+ break;
+ }
+ std::this_thread::sleep_for(50ms);
+ }
+ LOG(INFO) << "Waiting for " << pids.size() << " pids to be reaped took " << t << " with "
+ << alive_pids.size() << " of them still running";
+}
+
} // namespace init
} // namespace android