Merge "init: Add diagnostics for snapuserd hangs"
diff --git a/fs_mgr/libsnapshot/snapuserd/snapuserd_daemon.cpp b/fs_mgr/libsnapshot/snapuserd/snapuserd_daemon.cpp
index ddb1f79..a082742 100644
--- a/fs_mgr/libsnapshot/snapuserd/snapuserd_daemon.cpp
+++ b/fs_mgr/libsnapshot/snapuserd/snapuserd_daemon.cpp
@@ -209,6 +209,8 @@
 int main(int argc, char** argv) {
     android::base::InitLogging(argv, &android::base::KernelLogger);
 
+    LOG(INFO) << "snapuserd daemon about to start";
+
     android::snapshot::Daemon& daemon = android::snapshot::Daemon::Instance();
 
     if (!daemon.StartDaemon(argc, argv)) {
diff --git a/init/init.cpp b/init/init.cpp
index e3596cb..1df4c44 100644
--- a/init/init.cpp
+++ b/init/init.cpp
@@ -33,6 +33,7 @@
 #define _REALLY_INCLUDE_SYS__SYSTEM_PROPERTIES_H_
 #include <sys/_system_properties.h>
 
+#include <filesystem>
 #include <functional>
 #include <map>
 #include <memory>
@@ -46,6 +47,7 @@
 #include <android-base/logging.h>
 #include <android-base/parseint.h>
 #include <android-base/properties.h>
+#include <android-base/scopeguard.h>
 #include <android-base/stringprintf.h>
 #include <android-base/strings.h>
 #include <backtrace/Backtrace.h>
@@ -773,6 +775,82 @@
     return {};
 }
 
+static bool SystemReadSmokeTest() {
+    std::string dev = "/dev/block/mapper/system"s + fs_mgr_get_slot_suffix();
+    android::base::unique_fd fd(open(dev.c_str(), O_RDONLY));
+    if (fd < 0) {
+        PLOG(ERROR) << "open " << dev << " failed, will not diangose snapuserd hangs";
+        return false;
+    }
+
+    for (size_t i = 1; i <= 100; i++) {
+        // Skip around the partition a bit.
+        size_t offset = i * 4096 * 512;
+
+        char b;
+        ssize_t n = TEMP_FAILURE_RETRY(pread(fd.get(), &b, 1, offset));
+        if (n < 0) {
+            PLOG(ERROR) << "snapuserd smoke test read failed";
+            return false;
+        }
+    }
+    return true;
+}
+
+static void DiagnoseSnapuserdHang(pid_t pid) {
+    bool succeeded = false;
+
+    std::mutex m;
+    std::condition_variable cv;
+
+    // Enforce an ordering between this and the thread startup, by taking the
+    // lock before we lanuch the thread.
+    std::unique_lock<std::mutex> cv_lock(m);
+
+    std::thread t([&]() -> void {
+        std::lock_guard<std::mutex> lock(m);
+        succeeded = SystemReadSmokeTest();
+        cv.notify_all();
+    });
+
+    auto join = android::base::make_scope_guard([&]() -> void {
+        // If the smoke test is hung, then this will too. We expect the device to
+        // automatically reboot once the watchdog kicks in.
+        t.join();
+    });
+
+    auto now = std::chrono::system_clock::now();
+    auto deadline = now + 10s;
+    auto status = cv.wait_until(cv_lock, deadline);
+    if (status == std::cv_status::timeout) {
+        LOG(ERROR) << "snapuserd smoke test timed out";
+    } else if (!succeeded) {
+        LOG(ERROR) << "snapuserd smoke test failed";
+    }
+
+    if (succeeded) {
+        LOG(INFO) << "snapuserd smoke test succeeded";
+        return;
+    }
+
+    while (true) {
+        LOG(ERROR) << "snapuserd problem detected, printing open fds";
+
+        std::error_code ec;
+        std::string proc_dir = "/proc/" + std::to_string(pid) + "/fd";
+        for (const auto& entry : std::filesystem::directory_iterator(proc_dir)) {
+            std::string target;
+            if (android::base::Readlink(entry.path(), &target)) {
+                LOG(ERROR) << "snapuserd opened: " << target;
+            } else {
+                LOG(ERROR) << "snapuserd opened: " << entry.path();
+            }
+        }
+
+        std::this_thread::sleep_for(10s);
+    }
+}
+
 int SecondStageMain(int argc, char** argv) {
     if (REBOOT_BOOTLOADER_ON_PANIC) {
         InstallRebootSignalHandlers();
@@ -786,6 +864,11 @@
     InitKernelLogging(argv);
     LOG(INFO) << "init second stage started!";
 
+    if (auto pid = GetSnapuserdFirstStagePid()) {
+        std::thread t(DiagnoseSnapuserdHang, *pid);
+        t.detach();
+    }
+
     // Update $PATH in the case the second stage init is newer than first stage init, where it is
     // first set.
     if (setenv("PATH", _PATH_DEFPATH, 1) != 0) {