Merge "storageproxyd: Use alternate data path if in DSU state"
diff --git a/fs_mgr/libsnapshot/snapuserd/snapuserd_daemon.cpp b/fs_mgr/libsnapshot/snapuserd/snapuserd_daemon.cpp
index ddb1f79..a082742 100644
--- a/fs_mgr/libsnapshot/snapuserd/snapuserd_daemon.cpp
+++ b/fs_mgr/libsnapshot/snapuserd/snapuserd_daemon.cpp
@@ -209,6 +209,8 @@
 int main(int argc, char** argv) {
     android::base::InitLogging(argv, &android::base::KernelLogger);
 
+    LOG(INFO) << "snapuserd daemon about to start";
+
     android::snapshot::Daemon& daemon = android::snapshot::Daemon::Instance();
 
     if (!daemon.StartDaemon(argc, argv)) {
diff --git a/init/init.cpp b/init/init.cpp
index e3596cb..1df4c44 100644
--- a/init/init.cpp
+++ b/init/init.cpp
@@ -33,6 +33,7 @@
 #define _REALLY_INCLUDE_SYS__SYSTEM_PROPERTIES_H_
 #include <sys/_system_properties.h>
 
+#include <filesystem>
 #include <functional>
 #include <map>
 #include <memory>
@@ -46,6 +47,7 @@
 #include <android-base/logging.h>
 #include <android-base/parseint.h>
 #include <android-base/properties.h>
+#include <android-base/scopeguard.h>
 #include <android-base/stringprintf.h>
 #include <android-base/strings.h>
 #include <backtrace/Backtrace.h>
@@ -773,6 +775,82 @@
     return {};
 }
 
+static bool SystemReadSmokeTest() {
+    std::string dev = "/dev/block/mapper/system"s + fs_mgr_get_slot_suffix();
+    android::base::unique_fd fd(open(dev.c_str(), O_RDONLY));
+    if (fd < 0) {
+        PLOG(ERROR) << "open " << dev << " failed, will not diangose snapuserd hangs";
+        return false;
+    }
+
+    for (size_t i = 1; i <= 100; i++) {
+        // Skip around the partition a bit.
+        size_t offset = i * 4096 * 512;
+
+        char b;
+        ssize_t n = TEMP_FAILURE_RETRY(pread(fd.get(), &b, 1, offset));
+        if (n < 0) {
+            PLOG(ERROR) << "snapuserd smoke test read failed";
+            return false;
+        }
+    }
+    return true;
+}
+
+static void DiagnoseSnapuserdHang(pid_t pid) {
+    bool succeeded = false;
+
+    std::mutex m;
+    std::condition_variable cv;
+
+    // Enforce an ordering between this and the thread startup, by taking the
+    // lock before we lanuch the thread.
+    std::unique_lock<std::mutex> cv_lock(m);
+
+    std::thread t([&]() -> void {
+        std::lock_guard<std::mutex> lock(m);
+        succeeded = SystemReadSmokeTest();
+        cv.notify_all();
+    });
+
+    auto join = android::base::make_scope_guard([&]() -> void {
+        // If the smoke test is hung, then this will too. We expect the device to
+        // automatically reboot once the watchdog kicks in.
+        t.join();
+    });
+
+    auto now = std::chrono::system_clock::now();
+    auto deadline = now + 10s;
+    auto status = cv.wait_until(cv_lock, deadline);
+    if (status == std::cv_status::timeout) {
+        LOG(ERROR) << "snapuserd smoke test timed out";
+    } else if (!succeeded) {
+        LOG(ERROR) << "snapuserd smoke test failed";
+    }
+
+    if (succeeded) {
+        LOG(INFO) << "snapuserd smoke test succeeded";
+        return;
+    }
+
+    while (true) {
+        LOG(ERROR) << "snapuserd problem detected, printing open fds";
+
+        std::error_code ec;
+        std::string proc_dir = "/proc/" + std::to_string(pid) + "/fd";
+        for (const auto& entry : std::filesystem::directory_iterator(proc_dir)) {
+            std::string target;
+            if (android::base::Readlink(entry.path(), &target)) {
+                LOG(ERROR) << "snapuserd opened: " << target;
+            } else {
+                LOG(ERROR) << "snapuserd opened: " << entry.path();
+            }
+        }
+
+        std::this_thread::sleep_for(10s);
+    }
+}
+
 int SecondStageMain(int argc, char** argv) {
     if (REBOOT_BOOTLOADER_ON_PANIC) {
         InstallRebootSignalHandlers();
@@ -786,6 +864,11 @@
     InitKernelLogging(argv);
     LOG(INFO) << "init second stage started!";
 
+    if (auto pid = GetSnapuserdFirstStagePid()) {
+        std::thread t(DiagnoseSnapuserdHang, *pid);
+        t.detach();
+    }
+
     // Update $PATH in the case the second stage init is newer than first stage init, where it is
     // first set.
     if (setenv("PATH", _PATH_DEFPATH, 1) != 0) {
diff --git a/init/perfboot.py b/init/perfboot.py
index 4b23ad2..968df38 100755
--- a/init/perfboot.py
+++ b/init/perfboot.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # Copyright (C) 2015 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -39,7 +39,7 @@
 
 import argparse
 import atexit
-import cStringIO
+import io
 import glob
 import inspect
 import logging
@@ -102,7 +102,7 @@
             self._wait_cpu_cool_down(self._product, self._temp_paths)
         else:
             if self._waited:
-                print 'Waiting for %d seconds' % self._interval
+                print('Waiting for %d seconds' % self._interval)
                 time.sleep(self._interval)
         self._waited = True
 
@@ -119,9 +119,9 @@
         threshold = IntervalAdjuster._CPU_COOL_DOWN_THRESHOLDS.get(
             self._product)
         if threshold is None:
-            print 'No CPU temperature threshold is set for ' + self._product
-            print ('Just wait %d seconds' %
-                   IntervalAdjuster._CPU_COOL_DOWN_WAIT_TIME_DEFAULT)
+            print('No CPU temperature threshold is set for ' + self._product)
+            print(('Just wait %d seconds' %
+                   IntervalAdjuster._CPU_COOL_DOWN_WAIT_TIME_DEFAULT))
             time.sleep(IntervalAdjuster._CPU_COOL_DOWN_WAIT_TIME_DEFAULT)
             return
         while True:
@@ -129,8 +129,8 @@
             if temp < threshold:
                 logging.info('Current CPU temperature %s' % temp)
                 return
-            print 'Waiting until CPU temperature (%d) falls below %d' % (
-                temp, threshold)
+            print('Waiting until CPU temperature (%d) falls below %d' % (
+                temp, threshold))
             time.sleep(IntervalAdjuster._CPU_COOL_DOWN_WAIT_INTERVAL)
 
 
@@ -260,7 +260,7 @@
 
 def get_values(record, tag):
     """Gets values that matches |tag| from |record|."""
-    keys = [key for key in record.keys() if key[0] == tag]
+    keys = [key for key in list(record.keys()) if key[0] == tag]
     return [record[k] for k in sorted(keys)]
 
 
@@ -304,7 +304,7 @@
     with open(filename, 'w') as f:
         f.write('\t'.join(labels) + '\n')
         for record in record_list:
-            line = cStringIO.StringIO()
+            line = io.StringIO()
             invalid_line = False
             for i, tag in enumerate(tags):
                 if i != 0:
@@ -319,7 +319,7 @@
                 logging.error('Invalid record found: ' + line.getvalue())
             line.write('\n')
             f.write(line.getvalue())
-    print 'Wrote: ' + filename
+    print(('Wrote: ' + filename))
 
 
 def median(data):
@@ -349,9 +349,9 @@
     # Filter out invalid data.
     end_times = [get_last_value(record, end_tag) for record in record_list
                  if get_last_value(record, end_tag) != 0]
-    print 'mean:', int(round(mean(end_times))), 'ms'
-    print 'median:', int(round(median(end_times))), 'ms'
-    print 'standard deviation:', int(round(stddev(end_times))), 'ms'
+    print(('mean:', int(round(mean(end_times))), 'ms'))
+    print(('median:', int(round(median(end_times))), 'ms'))
+    print(('standard deviation:', int(round(stddev(end_times))), 'ms'))
 
 
 def do_iteration(device, interval_adjuster, event_tags_re, end_tag):
@@ -359,7 +359,7 @@
     device.wait()
     interval_adjuster.wait()
     device.reboot()
-    print 'Rebooted the device'
+    print('Rebooted the device, waiting for tag', end_tag)
     record = {}
     booted = False
     while not booted:
@@ -372,7 +372,7 @@
                 stdout=subprocess.PIPE)
         for line in readlines_unbuffered(p):
             if t.is_timedout():
-                print '*** Timed out ***'
+                print('*** Timed out ***')
                 return record
             m = event_tags_re.search(line)
             if not m:
@@ -381,8 +381,8 @@
             event_time = int(m.group('time'))
             pid = m.group('pid')
             record[(tag, pid)] = event_time
-            print 'Event log recorded: %s (%s) - %d ms' % (
-                tag, pid, event_time)
+            print(('Event log recorded: %s (%s) - %d ms' % (
+                tag, pid, event_time)))
             if tag == end_tag:
                 booted = True
                 t.cancel()
@@ -420,7 +420,7 @@
 
 def install_apks(device, apk_dir):
     for apk in glob.glob(os.path.join(apk_dir, '*.apk')):
-        print 'Installing: ' + apk
+        print('Installing: ' + apk)
         device.install(apk, replace=True)
 
 
@@ -452,7 +452,7 @@
     event_tags_re = make_event_tags_re(event_tags)
 
     for i in range(args.iterations):
-        print 'Run #%d ' % i
+        print('Run #%d ' % i)
         record = do_iteration(
             device, interval_adjuster, event_tags_re, end_tag)
         record_list.append(record)