Merge "Add support for optional cgroup attributes"
diff --git a/init/README.md b/init/README.md
index c82dbfb..13c6ebd 100644
--- a/init/README.md
+++ b/init/README.md
@@ -804,13 +804,18 @@
 `init.svc.<name>`
 > State of a named service ("stopped", "stopping", "running", "restarting")
 
-`dev.mnt.blk.<mount_point>`
+`dev.mnt.dev.<mount_point>`, `dev.mnt.blk.<mount_point>`, `dev.mnt.rootdisk.<mount_point>`
 > Block device base name associated with a *mount_point*.
   The *mount_point* has / replaced by . and if referencing the root mount point
-  "/", it will use "/root", specifically `dev.mnt.blk.root`.
-  Meant for references to `/sys/device/block/${dev.mnt.blk.<mount_point>}/` and
-  `/sys/fs/ext4/${dev.mnt.blk.<mount_point>}/` to tune the block device
-  characteristics in a device agnostic manner.
+  "/", it will use "/root".
+  `dev.mnt.dev.<mount_point>` indicates a block device attached to filesystems.
+    (e.g., dm-N or sdaN/mmcblk0pN to access `/sys/fs/ext4/${dev.mnt.dev.<mount_point>}/`)
+
+  `dev.mnt.blk.<mount_point>` indicates the disk partition to the above block device.
+    (e.g., sdaN / mmcblk0pN to access `/sys/class/block/${dev.mnt.blk.<mount_point>}/`)
+
+  `dev.mnt.rootdisk.<mount_point>` indicates the root disk to contain the above disk partition.
+    (e.g., sda / mmcblk0 to access `/sys/class/block/${dev.mnt.rootdisk.<mount_point>}/queue`)
 
 Init responds to properties that begin with `ctl.`.  These properties take the format of
 `ctl.[<target>_]<command>` and the _value_ of the system property is used as a parameter.  The
diff --git a/init/init.cpp b/init/init.cpp
index eca7bc5..5a0b3a6 100644
--- a/init/init.cpp
+++ b/init/init.cpp
@@ -578,12 +578,29 @@
     HandlePowerctlMessage("shutdown,container");
 }
 
+static constexpr std::chrono::milliseconds kDiagnosticTimeout = 10s;
+
 static void HandleSignalFd() {
     signalfd_siginfo siginfo;
-    ssize_t bytes_read = TEMP_FAILURE_RETRY(read(signal_fd, &siginfo, sizeof(siginfo)));
-    if (bytes_read != sizeof(siginfo)) {
-        PLOG(ERROR) << "Failed to read siginfo from signal_fd";
-        return;
+    auto started = std::chrono::steady_clock::now();
+    for (;;) {
+        ssize_t bytes_read = TEMP_FAILURE_RETRY(read(signal_fd, &siginfo, sizeof(siginfo)));
+        if (bytes_read < 0 && errno == EAGAIN) {
+            auto now = std::chrono::steady_clock::now();
+            std::chrono::duration<double> waited = now - started;
+            if (waited >= kDiagnosticTimeout) {
+                LOG(ERROR) << "epoll() woke us up, but we waited with no SIGCHLD!";
+                started = now;
+            }
+
+            std::this_thread::sleep_for(100ms);
+            continue;
+        }
+        if (bytes_read != sizeof(siginfo)) {
+            PLOG(ERROR) << "Failed to read siginfo from signal_fd";
+            return;
+        }
+        break;
     }
 
     switch (siginfo.ssi_signo) {
@@ -639,7 +656,7 @@
         LOG(FATAL) << "Failed to register a fork handler: " << strerror(result);
     }
 
-    signal_fd = signalfd(-1, &mask, SFD_CLOEXEC);
+    signal_fd = signalfd(-1, &mask, SFD_CLOEXEC | SFD_NONBLOCK);
     if (signal_fd == -1) {
         PLOG(FATAL) << "failed to create signalfd";
     }
@@ -938,7 +955,7 @@
     setpriority(PRIO_PROCESS, 0, 0);
     while (true) {
         // By default, sleep until something happens.
-        auto epoll_timeout = std::optional<std::chrono::milliseconds>{};
+        auto epoll_timeout = std::optional<std::chrono::milliseconds>{kDiagnosticTimeout};
 
         auto shutdown_command = shutdown_state.CheckShutdown();
         if (shutdown_command) {
@@ -978,6 +995,13 @@
             for (const auto& function : *pending_functions) {
                 (*function)();
             }
+        } else if (Service::is_exec_service_running()) {
+            std::chrono::duration<double> waited =
+                    std::chrono::steady_clock::now() - Service::exec_service_started();
+            if (waited >= kDiagnosticTimeout) {
+                LOG(ERROR) << "Exec service is hung? Waited " << waited.count()
+                           << " without SIGCHLD";
+            }
         }
         if (!IsShuttingDown()) {
             HandleControlMessages();
diff --git a/init/mount_handler.cpp b/init/mount_handler.cpp
index f0d8d45..227ce2f 100644
--- a/init/mount_handler.cpp
+++ b/init/mount_handler.cpp
@@ -25,6 +25,7 @@
 #include <unistd.h>
 
 #include <algorithm>
+#include <filesystem>
 #include <string>
 #include <utility>
 #include <vector>
@@ -32,6 +33,7 @@
 #include <android-base/file.h>
 #include <android-base/logging.h>
 #include <android-base/properties.h>
+#include <android-base/stringprintf.h>
 #include <android-base/strings.h>
 #include <fs_mgr.h>
 #include <fstab/fstab.h>
@@ -39,6 +41,9 @@
 
 #include "epoll.h"
 
+using android::base::Basename;
+using android::base::StringPrintf;
+
 namespace android {
 namespace init {
 
@@ -67,41 +72,82 @@
     return MountHandlerEntry(fields[0], fields[1], fields[2]);
 }
 
+// return sda25 for dm-4, sda25 for sda25, or mmcblk0p24 for mmcblk0p24
+std::string GetDiskPart(std::string blockdev) {
+    if (blockdev.find('/') != std::string::npos) return {};
+
+    while (android::base::StartsWith(blockdev, "dm-")) {
+        auto& dm = dm::DeviceMapper::Instance();
+        std::optional<std::string> parent = dm.GetParentBlockDeviceByPath("/dev/block/" + blockdev);
+        if (parent) {
+            blockdev = android::base::Basename(*parent);
+        } else {
+            return {};
+        }
+    }
+    return blockdev;
+}
+
+// return sda for sda25, or mmcblk0 for mmcblk0p24
+std::string GetRootDisk(std::string blockdev) {
+    if (blockdev.empty()) return {};
+    if (blockdev.find('/') != std::string::npos) return {};
+
+    std::error_code ec;
+    for (const auto& entry : std::filesystem::directory_iterator("/sys/block", ec)) {
+        const std::string path = entry.path().string();
+        if (std::filesystem::exists(StringPrintf("%s/%s", path.c_str(), blockdev.c_str()))) {
+            return Basename(path);
+        }
+    }
+    return {};
+}
+
 void SetMountProperty(const MountHandlerEntry& entry, bool add) {
     static constexpr char devblock[] = "/dev/block/";
     if (!android::base::StartsWith(entry.blk_device, devblock)) return;
-    std::string value;
+    auto target = entry.blk_device.substr(strlen(devblock));
+    std::string diskpart, rootdisk;
     if (add) {
-        value = entry.blk_device.substr(strlen(devblock));
-        if (android::base::StartsWith(value, "sd")) {
-            // All sd partitions inherit their queue characteristics
-            // from the whole device reference.  Strip partition number.
-            auto it = std::find_if(value.begin(), value.end(), [](char c) { return isdigit(c); });
-            if (it != value.end()) value.erase(it, value.end());
-        }
-        auto queue = "/sys/block/" + value + "/queue";
+        diskpart = GetDiskPart(target);
+        rootdisk = GetRootDisk(diskpart);
+
         struct stat sb;
-        if (stat(queue.c_str(), &sb) || !S_ISDIR(sb.st_mode)) value = "";
-        if (stat(entry.mount_point.c_str(), &sb) || !S_ISDIR(sb.st_mode)) value = "";
+        if (stat(entry.mount_point.c_str(), &sb) || !S_ISDIR(sb.st_mode)) rootdisk = "";
         // Clear the noise associated with loopback and APEX.
-        if (android::base::StartsWith(value, "loop")) value = "";
-        if (android::base::StartsWith(entry.mount_point, "/apex/")) value = "";
+        if (android::base::StartsWith(target, "loop")) rootdisk = "";
+        if (android::base::StartsWith(entry.mount_point, "/apex/")) rootdisk = "";
     }
     auto mount_prop = entry.mount_point;
     if (mount_prop == "/") mount_prop = "/root";
     std::replace(mount_prop.begin(), mount_prop.end(), '/', '.');
     auto blk_mount_prop = "dev.mnt.blk" + mount_prop;
     auto dev_mount_prop = "dev.mnt.dev" + mount_prop;
-    // Set property even if its value does not change to trigger 'on property:'
+    auto rootdisk_mount_prop = "dev.mnt.rootdisk" + mount_prop;
+    // Set property even if its rootdisk does not change to trigger 'on property:'
     // handling, except for clearing non-existent or already clear property.
     // Goal is reduction of empty properties and associated triggers.
-    if (value.empty() && android::base::GetProperty(blk_mount_prop, "").empty()) return;
-    android::base::SetProperty(blk_mount_prop, value);
-    if (!value.empty()) {
-        android::base::SetProperty(dev_mount_prop, entry.blk_device.substr(strlen(devblock)));
-    } else {
+    if (rootdisk.empty() && android::base::GetProperty(blk_mount_prop, "").empty()) return;
+
+    if (rootdisk.empty()) {
+        android::base::SetProperty(blk_mount_prop, "");
         android::base::SetProperty(dev_mount_prop, "");
+        android::base::SetProperty(rootdisk_mount_prop, "");
+        return;
     }
+
+    // 1. dm-N
+    //  dev.mnt.dev.data = dm-N
+    //  dev.mnt.blk.data = sdaN or mmcblk0pN
+    //  dev.mnt.rootdisk.data = sda or mmcblk0
+    //
+    // 2. sdaN or mmcblk0pN
+    //  dev.mnt.dev.data = sdaN or mmcblk0pN
+    //  dev.mnt.blk.data = sdaN or mmcblk0pN
+    //  dev.mnt.rootdisk.data = sda or mmcblk0
+    android::base::SetProperty(dev_mount_prop, target);
+    android::base::SetProperty(blk_mount_prop, diskpart);
+    android::base::SetProperty(rootdisk_mount_prop, rootdisk);
 }
 
 }  // namespace
diff --git a/init/service.cpp b/init/service.cpp
index 8a9cc0a..2ebf87e 100644
--- a/init/service.cpp
+++ b/init/service.cpp
@@ -127,6 +127,7 @@
 
 unsigned long Service::next_start_order_ = 1;
 bool Service::is_exec_service_running_ = false;
+std::chrono::time_point<std::chrono::steady_clock> Service::exec_service_started_;
 
 Service::Service(const std::string& name, Subcontext* subcontext_for_restart_commands,
                  const std::vector<std::string>& args, bool from_apex)
@@ -388,6 +389,7 @@
 
     flags_ |= SVC_EXEC;
     is_exec_service_running_ = true;
+    exec_service_started_ = std::chrono::steady_clock::now();
 
     LOG(INFO) << "SVC_EXEC service '" << name_ << "' pid " << pid_ << " (uid " << proc_attr_.uid
               << " gid " << proc_attr_.gid << "+" << proc_attr_.supp_gids.size() << " context "
diff --git a/init/service.h b/init/service.h
index 3f12aa2..d233cbf 100644
--- a/init/service.h
+++ b/init/service.h
@@ -102,6 +102,9 @@
     size_t CheckAllCommands() const { return onrestart_.CheckAllCommands(); }
 
     static bool is_exec_service_running() { return is_exec_service_running_; }
+    static std::chrono::time_point<std::chrono::steady_clock> exec_service_started() {
+        return exec_service_started_;
+    }
 
     const std::string& name() const { return name_; }
     const std::set<std::string>& classnames() const { return classnames_; }
@@ -154,6 +157,8 @@
 
     static unsigned long next_start_order_;
     static bool is_exec_service_running_;
+    static std::chrono::time_point<std::chrono::steady_clock> exec_service_started_;
+    static pid_t exec_service_pid_;
 
     std::string name_;
     std::set<std::string> classnames_;
diff --git a/libutils/RefBase.cpp b/libutils/RefBase.cpp
index 99fefee..4ddac3d 100644
--- a/libutils/RefBase.cpp
+++ b/libutils/RefBase.cpp
@@ -50,7 +50,7 @@
 // log all reference counting operations
 #define PRINT_REFS 0
 
-#if !defined(_WIN32) && !defined(__APPLE__)
+#if defined(__linux__)
 // CallStack is only supported on linux type platforms.
 #define CALLSTACK_ENABLED 1
 #else
diff --git a/libutils/include/utils/Compat.h b/libutils/include/utils/Compat.h
index 6002567..3221899 100644
--- a/libutils/include/utils/Compat.h
+++ b/libutils/include/utils/Compat.h
@@ -71,19 +71,17 @@
 #define CONSTEXPR
 #endif
 
-/*
- * TEMP_FAILURE_RETRY is defined by some, but not all, versions of
- * <unistd.h>. (Alas, it is not as standard as we'd hoped!) So, if it's
- * not already defined, then define it here.
- */
+/* TEMP_FAILURE_RETRY is not available on macOS, but still useful there. */
 #ifndef TEMP_FAILURE_RETRY
 /* Used to retry syscalls that can return EINTR. */
-#define TEMP_FAILURE_RETRY(exp) ({         \
-    typeof (exp) _rc;                      \
-    do {                                   \
-        _rc = (exp);                       \
-    } while (_rc == -1 && errno == EINTR); \
-    _rc; })
+#define TEMP_FAILURE_RETRY(exp)                \
+    ({                                         \
+        __typeof__(exp) _rc;                   \
+        do {                                   \
+            _rc = (exp);                       \
+        } while (_rc == -1 && errno == EINTR); \
+        _rc;                                   \
+    })
 #endif
 
 #if defined(_WIN32)
diff --git a/libutils/include/utils/Errors.h b/libutils/include/utils/Errors.h
index d14d223..22fb36d 100644
--- a/libutils/include/utils/Errors.h
+++ b/libutils/include/utils/Errors.h
@@ -34,15 +34,13 @@
  * All error codes are negative values.
  */
 
-// Win32 #defines NO_ERROR as well.  It has the same value, so there's no
-// real conflict, though it's a bit awkward.
-#ifdef _WIN32
-# undef NO_ERROR
-#endif
-
 enum {
     OK                = 0,    // Preferred constant for checking success.
+#ifndef NO_ERROR
+    // Win32 #defines NO_ERROR as well.  It has the same value, so there's no
+    // real conflict, though it's a bit awkward.
     NO_ERROR          = OK,   // Deprecated synonym for `OK`. Prefer `OK` because it doesn't conflict with Windows.
+#endif
 
     UNKNOWN_ERROR       = (-2147483647-1), // INT32_MIN value
 
@@ -76,10 +74,4 @@
 // Human readable name of error
 std::string statusToString(status_t status);
 
-// Restore define; enumeration is in "android" namespace, so the value defined
-// there won't work for Win32 code in a different namespace.
-#ifdef _WIN32
-# define NO_ERROR 0L
-#endif
-
 }  // namespace android
diff --git a/rootdir/init.rc b/rootdir/init.rc
index c4c9eca..404d7ae 100644
--- a/rootdir/init.rc
+++ b/rootdir/init.rc
@@ -1085,9 +1085,11 @@
     mkdir /dev/sys/fs/by-name 0755 system system
     symlink /sys/fs/f2fs/${dev.mnt.dev.data} /dev/sys/fs/by-name/userdata
 
-    # to access dm-<num> sysfs
+    # dev.mnt.dev.data=dm-N, dev.mnt.blk.data=sdaN/mmcblk0pN, dev.mnt.rootdisk.data=sda/mmcblk0, or
+    # dev.mnt.dev.data=sdaN/mmcblk0pN, dev.mnt.blk.data=sdaN/mmcblk0pN, dev.mnt.rootdisk.data=sda/mmcblk0
     mkdir /dev/sys/block/by-name 0755 system system
-    symlink /sys/devices/virtual/block/${dev.mnt.dev.data} /dev/sys/block/by-name/userdata
+    symlink /sys/class/block/${dev.mnt.dev.data} /dev/sys/block/by-name/userdata
+    symlink /sys/class/block/${dev.mnt.rootdisk.data} /dev/sys/block/by-name/rootdisk
 
     # F2FS tuning. Set cp_interval larger than dirty_expire_centisecs, 30 secs,
     # to avoid power consumption when system becomes mostly idle. Be careful
@@ -1099,8 +1101,9 @@
 
     # limit discard size to 128MB in order to avoid long IO latency
     # for filesystem tuning first (dm or sda)
-    # Note that, if dm-<num> is used, sda/mmcblk0 should be tuned in vendor/init.rc
+    # this requires enabling selinux entry for sda/mmcblk0 in vendor side
     write /dev/sys/block/by-name/userdata/queue/discard_max_bytes 134217728
+    write /dev/sys/block/by-name/rootdisk/queue/discard_max_bytes 134217728
 
     # Permissions for System Server and daemons.
     chown system system /sys/power/autosleep