init: Make WaitToBeReaped() wait less long

Reduce the time spent in WaitToBeReaped() by waiting for SIGCHLD instead
of waiting for 50 ms.

Bug: 308687042
Change-Id: I5e259fdd22dec68e45d27205def2fc6463c06ca3
Signed-off-by: Bart Van Assche <bvanassche@google.com>
diff --git a/init/init.cpp b/init/init.cpp
index 68701aa..0439d22 100644
--- a/init/init.cpp
+++ b/init/init.cpp
@@ -117,7 +117,7 @@
 
 static int property_triggers_enabled = 0;
 
-static int sigchld_fd = -1;
+int sigchld_fd = -1;
 static int sigterm_fd = -1;
 static int property_fd = -1;
 
@@ -800,6 +800,7 @@
         PLOG(FATAL) << cs_result.error();
     }
     sigchld_fd = cs_result.value();
+    Service::SetSigchldFd(sigchld_fd);
 
     if (sigismember(&mask, SIGTERM)) {
         Result<int> cs_result = CreateAndRegisterSignalFd(epoll, SIGTERM);
diff --git a/init/init.h b/init/init.h
index 9c7e918..b781167 100644
--- a/init/init.h
+++ b/init/init.h
@@ -28,6 +28,8 @@
 namespace android {
 namespace init {
 
+extern int sigchld_fd;
+
 Parser CreateParser(ActionManager& action_manager, ServiceList& service_list);
 Parser CreateApexConfigParser(ActionManager& action_manager, ServiceList& service_list);
 
diff --git a/init/reboot.cpp b/init/reboot.cpp
index 3351c4c..5757922 100644
--- a/init/reboot.cpp
+++ b/init/reboot.cpp
@@ -563,7 +563,7 @@
         }
     }
     if (timeout > 0ms) {
-        WaitToBeReaped(pids, timeout);
+        WaitToBeReaped(sigchld_fd, pids, timeout);
     } else {
         // Even if we don't to wait for services to stop, we still optimistically reap zombies.
         ReapAnyOutstandingChildren();
diff --git a/init/service.cpp b/init/service.cpp
index 5e900ee..66ddc8e 100644
--- a/init/service.cpp
+++ b/init/service.cpp
@@ -136,6 +136,7 @@
 
 unsigned long Service::next_start_order_ = 1;
 bool Service::is_exec_service_running_ = false;
+int Service::sigchld_fd_ = -1;
 
 Service::Service(const std::string& name, Subcontext* subcontext_for_restart_commands,
                  const std::string& filename, const std::vector<std::string>& args)
diff --git a/init/service.h b/init/service.h
index 9f09cef..7e73af6 100644
--- a/init/service.h
+++ b/init/service.h
@@ -156,6 +156,7 @@
     const Subcontext* subcontext() const { return subcontext_; }
     const std::string& filename() const { return filename_; }
     void set_filename(const std::string& name) { filename_ = name; }
+    static void SetSigchldFd(int sigchld_fd) { sigchld_fd_ = sigchld_fd; }
 
   private:
     void NotifyStateChange(const std::string& new_state) const;
@@ -168,8 +169,10 @@
     void RunService(const std::vector<Descriptor>& descriptors, InterprocessFifo cgroups_activated,
                     InterprocessFifo setsid_finished);
     void SetMountNamespace();
+
     static unsigned long next_start_order_;
     static bool is_exec_service_running_;
+    static int sigchld_fd_;
 
     const std::string name_;
     std::set<std::string> classnames_;
diff --git a/init/sigchld_handler.cpp b/init/sigchld_handler.cpp
index 0901a96..9d4c7c8 100644
--- a/init/sigchld_handler.cpp
+++ b/init/sigchld_handler.cpp
@@ -18,6 +18,7 @@
 
 #include <signal.h>
 #include <string.h>
+#include <sys/signalfd.h>
 #include <sys/socket.h>
 #include <sys/types.h>
 #include <sys/wait.h>
@@ -31,6 +32,7 @@
 
 #include <thread>
 
+#include "epoll.h"
 #include "init.h"
 #include "service.h"
 #include "service_list.h"
@@ -121,8 +123,23 @@
     }
 }
 
-void WaitToBeReaped(const std::vector<pid_t>& pids, std::chrono::milliseconds timeout) {
+static void DiscardSiginfo(int signal_fd) {
+    signalfd_siginfo siginfo;
+    ssize_t bytes_read = TEMP_FAILURE_RETRY(read(signal_fd, &siginfo, sizeof(siginfo)));
+    if (bytes_read != sizeof(siginfo)) {
+        LOG(WARNING) << "Unexpected: " << __func__ << " read " << bytes_read << " bytes instead of "
+                     << sizeof(siginfo);
+    }
+}
+
+void WaitToBeReaped(int sigchld_fd, const std::vector<pid_t>& pids,
+                    std::chrono::milliseconds timeout) {
     Timer t;
+    Epoll epoll;
+    // The init process passes a valid sigchld_fd argument but unit tests do not.
+    if (sigchld_fd >= 0) {
+        epoll.RegisterHandler(sigchld_fd, [sigchld_fd]() { DiscardSiginfo(sigchld_fd); });
+    }
     std::vector<pid_t> alive_pids(pids.begin(), pids.end());
     while (!alive_pids.empty() && t.duration() < timeout) {
         pid_t pid;
@@ -135,7 +152,11 @@
         if (alive_pids.empty()) {
             break;
         }
-        std::this_thread::sleep_for(50ms);
+        if (sigchld_fd >= 0) {
+            epoll.Wait(std::max(timeout - t.duration(), 0ms));
+        } else {
+            std::this_thread::sleep_for(50ms);
+        }
     }
     LOG(INFO) << "Waiting for " << pids.size() << " pids to be reaped took " << t << " with "
               << alive_pids.size() << " of them still running";
diff --git a/init/sigchld_handler.h b/init/sigchld_handler.h
index fac1020..e07a7d6 100644
--- a/init/sigchld_handler.h
+++ b/init/sigchld_handler.h
@@ -25,7 +25,8 @@
 
 void ReapAnyOutstandingChildren();
 
-void WaitToBeReaped(const std::vector<pid_t>& pids, std::chrono::milliseconds timeout);
+void WaitToBeReaped(int sigchld_fd, const std::vector<pid_t>& pids,
+                    std::chrono::milliseconds timeout);
 
 }  // namespace init
 }  // namespace android