Merge "Support batching ops across Add*Blocks() call" into main

commit: 26cb9dbfefe3b954b19b6fd046a8ea4cae32f517 [log] [tgz]
author: Treehugger Robot <android-test-infra-autosubmit@system.gserviceaccount.com> Wed Dec 20 22:49:38 2023 +0000
committer: Gerrit Code Review <noreply-gerritcodereview@google.com> Wed Dec 20 22:49:38 2023 +0000
tree: 9735e85a19bc2f53c63b5ba25e178a0cdf783598
parent: c4b984045639beb91eeb7eee98e123463ca955f5 [diff]
parent: a008c9c1a4ff5f8666fefb57d9ced1c60b81a1a5 [diff]
diff --git a/fastboot/fastboot.cpp b/fastboot/fastboot.cpp
index 1bc7b75..235d723 100644
--- a/fastboot/fastboot.cpp
+++ b/fastboot/fastboot.cpp

@@ -1675,7 +1675,7 @@
     }
     for (size_t i = 0; i < tasks->size(); i++) {
         if (auto flash_task = tasks->at(i)->AsFlashTask()) {
-            if (FlashTask::IsDynamicParitition(fp->source.get(), flash_task)) {
+            if (FlashTask::IsDynamicPartition(fp->source.get(), flash_task)) {
                 if (!loc) {
                     loc = i;
                 }

diff --git a/fastboot/task.cpp b/fastboot/task.cpp
index 0947ff9..ea78a01 100644
--- a/fastboot/task.cpp
+++ b/fastboot/task.cpp

@@ -30,7 +30,7 @@
                      const bool apply_vbmeta, const FlashingPlan* fp)
     : pname_(pname), fname_(fname), slot_(slot), apply_vbmeta_(apply_vbmeta), fp_(fp) {}
 
-bool FlashTask::IsDynamicParitition(const ImageSource* source, const FlashTask* task) {
+bool FlashTask::IsDynamicPartition(const ImageSource* source, const FlashTask* task) {
     std::vector<char> contents;
     if (!source->ReadFile("super_empty.img", &contents)) {
         return false;
@@ -152,7 +152,7 @@
             continue;
         }
         auto flash_task = tasks[i + 2]->AsFlashTask();
-        if (!FlashTask::IsDynamicParitition(source, flash_task)) {
+        if (!FlashTask::IsDynamicPartition(source, flash_task)) {
             continue;
         }
         return true;

diff --git a/fastboot/task.h b/fastboot/task.h
index a98c874..7a713cf 100644
--- a/fastboot/task.h
+++ b/fastboot/task.h

@@ -52,7 +52,7 @@
               const bool apply_vbmeta, const FlashingPlan* fp);
     virtual FlashTask* AsFlashTask() override { return this; }
 
-    static bool IsDynamicParitition(const ImageSource* source, const FlashTask* task);
+    static bool IsDynamicPartition(const ImageSource* source, const FlashTask* task);
     void Run() override;
     std::string ToString() const override;
     std::string GetPartition() const { return pname_; }

diff --git a/fastboot/task_test.cpp b/fastboot/task_test.cpp
index 81154c6..519d4ed 100644
--- a/fastboot/task_test.cpp
+++ b/fastboot/task_test.cpp

@@ -233,7 +233,7 @@
             << "size of fastboot-info task list: " << fastboot_info_tasks.size()
             << " size of hardcoded task list: " << hardcoded_tasks.size();
 }
-TEST_F(ParseTest, IsDynamicParitiontest) {
+TEST_F(ParseTest, IsDynamicPartitiontest) {
     if (!get_android_product_out()) {
         GTEST_SKIP();
     }
@@ -258,7 +258,7 @@
                 ParseFastbootInfoLine(fp.get(), android::base::Tokenize(test.first, " "));
         auto flash_task = task->AsFlashTask();
         ASSERT_FALSE(flash_task == nullptr);
-        ASSERT_EQ(FlashTask::IsDynamicParitition(fp->source.get(), flash_task), test.second);
+        ASSERT_EQ(FlashTask::IsDynamicPartition(fp->source.get(), flash_task), test.second);
     }
 }
 
@@ -358,7 +358,7 @@
                     contains_optimized_task = true;
                 }
                 if (auto flash_task = task->AsFlashTask()) {
-                    if (FlashTask::IsDynamicParitition(fp->source.get(), flash_task)) {
+                    if (FlashTask::IsDynamicPartition(fp->source.get(), flash_task)) {
                         return false;
                     }
                 }

diff --git a/fs_mgr/fs_mgr_overlayfs_control.cpp b/fs_mgr/fs_mgr_overlayfs_control.cpp
index 06214ef..08ad80c 100644
--- a/fs_mgr/fs_mgr_overlayfs_control.cpp
+++ b/fs_mgr/fs_mgr_overlayfs_control.cpp

@@ -219,6 +219,35 @@
     return OverlayfsTeardownResult::Ok;
 }
 
+bool GetOverlaysActiveFlag() {
+    auto slot_number = fs_mgr_overlayfs_slot_number();
+    const auto super_device = kPhysicalDevice + fs_mgr_get_super_partition_name();
+
+    auto metadata = ReadMetadata(super_device, slot_number);
+    if (!metadata) {
+        return false;
+    }
+    return !!(metadata->header.flags & LP_HEADER_FLAG_OVERLAYS_ACTIVE);
+}
+
+bool SetOverlaysActiveFlag(bool flag) {
+    // Mark overlays as active in the partition table, to detect re-flash.
+    auto slot_number = fs_mgr_overlayfs_slot_number();
+    const auto super_device = kPhysicalDevice + fs_mgr_get_super_partition_name();
+    auto builder = MetadataBuilder::New(super_device, slot_number);
+    if (!builder) {
+        LERROR << "open " << super_device << " metadata";
+        return false;
+    }
+    builder->SetOverlaysActiveFlag(flag);
+    auto metadata = builder->Export();
+    if (!metadata || !UpdatePartitionTable(super_device, *metadata.get(), slot_number)) {
+        LERROR << "update super metadata";
+        return false;
+    }
+    return true;
+}
+
 OverlayfsTeardownResult fs_mgr_overlayfs_teardown_scratch(const std::string& overlay,
                                                           bool* change) {
     // umount and delete kScratchMountPoint storage if we have logical partitions
@@ -232,6 +261,10 @@
         return OverlayfsTeardownResult::Error;
     }
 
+    // Note: we don't care if SetOverlaysActiveFlag fails, since
+    // the overlays are removed no matter what.
+    SetOverlaysActiveFlag(false);
+
     bool was_mounted = fs_mgr_overlayfs_already_mounted(kScratchMountPoint, false);
     if (was_mounted) {
         fs_mgr_overlayfs_umount_scratch();
@@ -448,6 +481,7 @@
             }
         }
     }
+
     // land the update back on to the partition
     if (changed) {
         auto metadata = builder->Export();
@@ -592,6 +626,12 @@
         return false;
     }
 
+    if (!SetOverlaysActiveFlag(true)) {
+        LOG(ERROR) << "Failed to update dynamic partition data";
+        fs_mgr_overlayfs_teardown_scratch(kScratchMountPoint, nullptr);
+        return false;
+    }
+
     // If the partition exists, assume first that it can be mounted.
     if (partition_exists) {
         if (MountScratch(scratch_device)) {
@@ -856,6 +896,9 @@
         return;
     }
 
+    if (!GetOverlaysActiveFlag()) {
+        return;
+    }
     if (ScratchIsOnData()) {
         if (auto images = IImageManager::Open("remount", 0ms)) {
             images->MapAllImages(init);
@@ -879,6 +922,9 @@
     }
     if (auto images = IImageManager::Open("remount", 0ms)) {
         images->RemoveDisabledImages();
+        if (!GetOverlaysActiveFlag()) {
+            fs_mgr_overlayfs_teardown_scratch(kScratchMountPoint, nullptr);
+        }
     }
 }
 

diff --git a/fs_mgr/libfstab/fstab.cpp b/fs_mgr/libfstab/fstab.cpp
index 32460b1..443017a 100644
--- a/fs_mgr/libfstab/fstab.cpp
+++ b/fs_mgr/libfstab/fstab.cpp

@@ -716,7 +716,7 @@
     if (!ReadFstabFromFileCommon(path, fstab)) {
         return false;
     }
-    if (path != kProcMountsPath) {
+    if (path != kProcMountsPath && !InRecovery()) {
         if (!access(android::gsi::kGsiBootedIndicatorFile, F_OK)) {
             std::string dsu_slot;
             if (!android::gsi::GetActiveDsu(&dsu_slot)) {

diff --git a/fs_mgr/liblp/builder.cpp b/fs_mgr/liblp/builder.cpp
index 6cb2c51..4e6e97b 100644
--- a/fs_mgr/liblp/builder.cpp
+++ b/fs_mgr/liblp/builder.cpp

@@ -1211,6 +1211,15 @@
     header_.flags |= LP_HEADER_FLAG_VIRTUAL_AB_DEVICE;
 }
 
+void MetadataBuilder::SetOverlaysActiveFlag(bool flag) {
+    RequireExpandedMetadataHeader();
+    if (flag) {
+        header_.flags |= LP_HEADER_FLAG_OVERLAYS_ACTIVE;
+    } else {
+        header_.flags &= ~LP_HEADER_FLAG_OVERLAYS_ACTIVE;
+    }
+}
+
 bool MetadataBuilder::IsABDevice() {
     return !IPropertyFetcher::GetInstance()->GetProperty("ro.boot.slot_suffix", "").empty();
 }

diff --git a/fs_mgr/liblp/include/liblp/builder.h b/fs_mgr/liblp/include/liblp/builder.h
index 54f31bc..957b96b 100644
--- a/fs_mgr/liblp/include/liblp/builder.h
+++ b/fs_mgr/liblp/include/liblp/builder.h

@@ -346,6 +346,8 @@
     void SetAutoSlotSuffixing();
     // Set the LP_HEADER_FLAG_VIRTUAL_AB_DEVICE flag.
     void SetVirtualABDeviceFlag();
+    // Set or unset the LP_HEADER_FLAG_OVERLAYS_ACTIVE flag.
+    void SetOverlaysActiveFlag(bool flag);
 
     bool GetBlockDeviceInfo(const std::string& partition_name, BlockDeviceInfo* info) const;
     bool UpdateBlockDeviceInfo(const std::string& partition_name, const BlockDeviceInfo& info);

diff --git a/fs_mgr/liblp/include/liblp/metadata_format.h b/fs_mgr/liblp/include/liblp/metadata_format.h
index 41d8b0c..8d77097 100644
--- a/fs_mgr/liblp/include/liblp/metadata_format.h
+++ b/fs_mgr/liblp/include/liblp/metadata_format.h

@@ -240,6 +240,9 @@
  */
 #define LP_HEADER_FLAG_VIRTUAL_AB_DEVICE 0x1
 
+/* This device has overlays activated via "adb remount". */
+#define LP_HEADER_FLAG_OVERLAYS_ACTIVE 0x2
+
 /* This struct defines a logical partition entry, similar to what would be
  * present in a GUID Partition Table.
  */

diff --git a/fs_mgr/libsnapshot/snapshot_test.cpp b/fs_mgr/libsnapshot/snapshot_test.cpp
index c0c3eaf..e538d50 100644
--- a/fs_mgr/libsnapshot/snapshot_test.cpp
+++ b/fs_mgr/libsnapshot/snapshot_test.cpp

@@ -2864,15 +2864,23 @@
 }
 
 void KillSnapuserd() {
-    auto status = android::base::GetProperty("init.svc.snapuserd", "stopped");
-    if (status == "stopped") {
-        return;
+    // Detach the daemon if it's alive
+    auto snapuserd_client = SnapuserdClient::TryConnect(kSnapuserdSocket, 5s);
+    if (snapuserd_client) {
+        snapuserd_client->DetachSnapuserd();
     }
-    auto snapuserd_client = SnapuserdClient::Connect(kSnapuserdSocket, 5s);
-    if (!snapuserd_client) {
-        return;
+
+    // Now stop the service - Init will send a SIGKILL to the daemon. However,
+    // process state will move from "running" to "stopping". Only after the
+    // process is reaped by init, the service state is moved to "stopped".
+    //
+    // Since the tests involve starting the daemon immediately, wait for the
+    // process to completely stop (aka. wait until init reaps the terminated
+    // process).
+    android::base::SetProperty("ctl.stop", "snapuserd");
+    if (!android::base::WaitForProperty("init.svc.snapuserd", "stopped", 10s)) {
+        LOG(ERROR) << "Timed out waiting for snapuserd to stop.";
     }
-    snapuserd_client->DetachSnapuserd();
 }
 
 }  // namespace snapshot

diff --git a/libprocessgroup/include/processgroup/processgroup.h b/libprocessgroup/include/processgroup/processgroup.h
index 9107838..ca6868c 100644
--- a/libprocessgroup/include/processgroup/processgroup.h
+++ b/libprocessgroup/include/processgroup/processgroup.h

@@ -65,9 +65,8 @@
 // should be active again. E.g. Zygote specialization for child process.
 void DropTaskProfilesResourceCaching();
 
-// Return 0 and removes the cgroup if there are no longer any processes in it.
-// Returns -1 in the case of an error occurring or if there are processes still running
-// even after retrying for up to 200ms.
+// Return 0 if all processes were killed and the cgroup was successfully removed.
+// Returns -1 in the case of an error occurring or if there are processes still running.
 int killProcessGroup(uid_t uid, int initialPid, int signal);
 
 // Returns the same as killProcessGroup(), however it does not retry, which means
@@ -76,8 +75,9 @@
 
 // Sends the provided signal to all members of a process group, but does not wait for processes to
 // exit, or for the cgroup to be removed. Callers should also ensure that killProcessGroup is called
-// later to ensure the cgroup is fully removed, otherwise system resources may leak.
-int sendSignalToProcessGroup(uid_t uid, int initialPid, int signal);
+// later to ensure the cgroup is fully removed, otherwise system resources will leak.
+// Returns true if no errors are encountered sending signals, otherwise false.
+bool sendSignalToProcessGroup(uid_t uid, int initialPid, int signal);
 
 int createProcessGroup(uid_t uid, int initialPid, bool memControl = false);
 

diff --git a/libprocessgroup/processgroup.cpp b/libprocessgroup/processgroup.cpp
index f594f7f..3209adf 100644
--- a/libprocessgroup/processgroup.cpp
+++ b/libprocessgroup/processgroup.cpp

@@ -22,6 +22,7 @@
 #include <errno.h>
 #include <fcntl.h>
 #include <inttypes.h>
+#include <poll.h>
 #include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -30,6 +31,7 @@
 #include <unistd.h>
 
 #include <chrono>
+#include <cstring>
 #include <map>
 #include <memory>
 #include <mutex>
@@ -53,7 +55,9 @@
 
 using namespace std::chrono_literals;
 
-#define PROCESSGROUP_CGROUP_PROCS_FILE "/cgroup.procs"
+#define PROCESSGROUP_CGROUP_PROCS_FILE "cgroup.procs"
+#define PROCESSGROUP_CGROUP_KILL_FILE "cgroup.kill"
+#define PROCESSGROUP_CGROUP_EVENTS_FILE "cgroup.events"
 
 bool CgroupsAvailable() {
     static bool cgroups_available = access("/proc/cgroups", F_OK) == 0;
@@ -74,6 +78,29 @@
     return true;
 }
 
+static std::string ConvertUidToPath(const char* cgroup, uid_t uid) {
+    return StringPrintf("%s/uid_%u", cgroup, uid);
+}
+
+static std::string ConvertUidPidToPath(const char* cgroup, uid_t uid, int pid) {
+    return StringPrintf("%s/uid_%u/pid_%d", cgroup, uid, pid);
+}
+
+static bool CgroupKillAvailable() {
+    static std::once_flag f;
+    static bool cgroup_kill_available = false;
+    std::call_once(f, []() {
+        std::string cg_kill;
+        CgroupGetControllerPath(CGROUPV2_HIERARCHY_NAME, &cg_kill);
+        // cgroup.kill is not on the root cgroup, so check a non-root cgroup that should always
+        // exist
+        cg_kill = ConvertUidToPath(cg_kill.c_str(), AID_ROOT) + '/' + PROCESSGROUP_CGROUP_KILL_FILE;
+        cgroup_kill_available = access(cg_kill.c_str(), F_OK) == 0;
+    });
+
+    return cgroup_kill_available;
+}
+
 static bool CgroupGetMemcgAppsPath(std::string* path) {
     CgroupController controller = CgroupMap::GetInstance().FindController("memory");
 
@@ -205,38 +232,21 @@
                                                        false);
 }
 
-static std::string ConvertUidToPath(const char* cgroup, uid_t uid) {
-    return StringPrintf("%s/uid_%u", cgroup, uid);
-}
-
-static std::string ConvertUidPidToPath(const char* cgroup, uid_t uid, int pid) {
-    return StringPrintf("%s/uid_%u/pid_%d", cgroup, uid, pid);
-}
-
-static int RemoveCgroup(const char* cgroup, uid_t uid, int pid, unsigned int retries) {
-    int ret = 0;
-    auto uid_pid_path = ConvertUidPidToPath(cgroup, uid, pid);
-
-    while (retries--) {
-        ret = rmdir(uid_pid_path.c_str());
-        // If we get an error 2 'No such file or directory' , that means the
-        // cgroup is already removed, treat it as success and return 0 for
-        // idempotency.
-        if (ret < 0 && errno == ENOENT) {
-            ret = 0;
-        }
-        if (!ret || errno != EBUSY || !retries) break;
-        std::this_thread::sleep_for(5ms);
-    }
+static int RemoveCgroup(const char* cgroup, uid_t uid, int pid) {
+    auto path = ConvertUidPidToPath(cgroup, uid, pid);
+    int ret = TEMP_FAILURE_RETRY(rmdir(path.c_str()));
 
     if (!ret && uid >= AID_ISOLATED_START && uid <= AID_ISOLATED_END) {
         // Isolated UIDs are unlikely to be reused soon after removal,
         // so free up the kernel resources for the UID level cgroup.
-        const auto uid_path = ConvertUidToPath(cgroup, uid);
-        ret = rmdir(uid_path.c_str());
-        if (ret < 0 && errno == ENOENT) {
-            ret = 0;
-        }
+        path = ConvertUidToPath(cgroup, uid);
+        ret = TEMP_FAILURE_RETRY(rmdir(path.c_str()));
+    }
+
+    if (ret < 0 && errno == ENOENT) {
+        // This function is idempoetent, but still warn here.
+        LOG(WARNING) << "RemoveCgroup: " << path << " does not exist.";
+        ret = 0;
     }
 
     return ret;
@@ -360,38 +370,55 @@
     return false;
 }
 
-// Returns number of processes killed on success
-// Returns 0 if there are no processes in the process cgroup left to kill
-// Returns -1 on error
-static int DoKillProcessGroupOnce(const char* cgroup, uid_t uid, int initialPid, int signal) {
-    // We separate all of the pids in the cgroup into those pids that are also the leaders of
-    // process groups (stored in the pgids set) and those that are not (stored in the pids set).
-    std::set<pid_t> pgids;
-    pgids.emplace(initialPid);
-    std::set<pid_t> pids;
-    int processes = 0;
-
-    std::unique_ptr<FILE, decltype(&fclose)> fd(nullptr, fclose);
+bool sendSignalToProcessGroup(uid_t uid, int initialPid, int signal) {
+    std::set<pid_t> pgids, pids;
 
     if (CgroupsAvailable()) {
-        auto path = ConvertUidPidToPath(cgroup, uid, initialPid) + PROCESSGROUP_CGROUP_PROCS_FILE;
-        fd.reset(fopen(path.c_str(), "re"));
-        if (!fd) {
-            if (errno == ENOENT) {
-                // This happens when the process is already dead or if, as the result of a bug, it
-                // has been migrated to another cgroup. An example of a bug that can cause migration
-                // to another cgroup is using the JoinCgroup action with a cgroup controller that
-                // has been activated in the v2 cgroup hierarchy.
-                goto kill;
+        std::string hierarchy_root_path, cgroup_v2_path;
+        CgroupGetControllerPath(CGROUPV2_HIERARCHY_NAME, &hierarchy_root_path);
+        cgroup_v2_path = ConvertUidPidToPath(hierarchy_root_path.c_str(), uid, initialPid);
+
+        if (signal == SIGKILL && CgroupKillAvailable()) {
+            LOG(VERBOSE) << "Using " << PROCESSGROUP_CGROUP_KILL_FILE << " to SIGKILL "
+                         << cgroup_v2_path;
+
+            // We need to kill the process group in addition to the cgroup. For normal apps they
+            // should completely overlap, but system_server kills depend on process group kills to
+            // take down apps which are in their own cgroups and not individually targeted.
+            if (kill(-initialPid, signal) == -1 && errno != ESRCH) {
+                PLOG(WARNING) << "kill(" << -initialPid << ", " << signal << ") failed";
             }
-            PLOG(WARNING) << __func__ << " failed to open process cgroup uid " << uid << " pid "
-                          << initialPid;
-            return -1;
+
+            const std::string killfilepath = cgroup_v2_path + '/' + PROCESSGROUP_CGROUP_KILL_FILE;
+            if (WriteStringToFile("1", killfilepath)) {
+                return true;
+            } else {
+                PLOG(ERROR) << "Failed to write 1 to " << killfilepath;
+                // Fallback to cgroup.procs below
+            }
         }
+
+        // Since cgroup.kill only sends SIGKILLs, we read cgroup.procs to find each process to
+        // signal individually. This is more costly than using cgroup.kill for SIGKILLs.
+        LOG(VERBOSE) << "Using " << PROCESSGROUP_CGROUP_PROCS_FILE << " to signal (" << signal
+                     << ") " << cgroup_v2_path;
+
+        // We separate all of the pids in the cgroup into those pids that are also the leaders of
+        // process groups (stored in the pgids set) and those that are not (stored in the pids set).
+        const auto procsfilepath = cgroup_v2_path + '/' + PROCESSGROUP_CGROUP_PROCS_FILE;
+        std::unique_ptr<FILE, decltype(&fclose)> fp(fopen(procsfilepath.c_str(), "re"), fclose);
+        if (!fp) {
+            // This should only happen if the cgroup has already been removed with a successful call
+            // to killProcessGroup. Callers should only retry sendSignalToProcessGroup or
+            // killProcessGroup calls if they fail without ENOENT.
+            PLOG(ERROR) << "Failed to open " << procsfilepath;
+            kill(-initialPid, signal);
+            return false;
+        }
+
         pid_t pid;
         bool file_is_empty = true;
-        while (fscanf(fd.get(), "%d\n", &pid) == 1 && pid >= 0) {
-            processes++;
+        while (fscanf(fp.get(), "%d\n", &pid) == 1 && pid >= 0) {
             file_is_empty = false;
             if (pid == 0) {
                 // Should never happen...  but if it does, trying to kill this
@@ -421,7 +448,8 @@
         }
     }
 
-kill:
+    pgids.emplace(initialPid);
+
     // Kill all process groups.
     for (const auto pgid : pgids) {
         LOG(VERBOSE) << "Killing process group " << -pgid << " in uid " << uid
@@ -442,101 +470,174 @@
         }
     }
 
-    return (!fd || feof(fd.get())) ? processes : -1;
+    return true;
 }
 
-static int KillProcessGroup(uid_t uid, int initialPid, int signal, int retries) {
+template <typename T>
+static std::chrono::milliseconds toMillisec(T&& duration) {
+    return std::chrono::duration_cast<std::chrono::milliseconds>(duration);
+}
+
+enum class populated_status
+{
+    populated,
+    not_populated,
+    error
+};
+
+static populated_status cgroupIsPopulated(int events_fd) {
+    const std::string POPULATED_KEY("populated ");
+    const std::string::size_type MAX_EVENTS_FILE_SIZE = 32;
+
+    std::string buf;
+    buf.resize(MAX_EVENTS_FILE_SIZE);
+    ssize_t len = TEMP_FAILURE_RETRY(pread(events_fd, buf.data(), buf.size(), 0));
+    if (len == -1) {
+        PLOG(ERROR) << "Could not read cgroup.events: ";
+        // Potentially ENODEV if the cgroup has been removed since we opened this file, but that
+        // shouldn't have happened yet.
+        return populated_status::error;
+    }
+
+    if (len == 0) {
+        LOG(ERROR) << "cgroup.events EOF";
+        return populated_status::error;
+    }
+
+    buf.resize(len);
+
+    const std::string::size_type pos = buf.find(POPULATED_KEY);
+    if (pos == std::string::npos) {
+        LOG(ERROR) << "Could not find populated key in cgroup.events";
+        return populated_status::error;
+    }
+
+    if (pos + POPULATED_KEY.size() + 1 > len) {
+        LOG(ERROR) << "Partial read of cgroup.events";
+        return populated_status::error;
+    }
+
+    return buf[pos + POPULATED_KEY.size()] == '1' ?
+        populated_status::populated : populated_status::not_populated;
+}
+
+// The default timeout of 2200ms comes from the default number of retries in a previous
+// implementation of this function. The default retry value was 40 for killing and 400 for cgroup
+// removal with 5ms sleeps between each retry.
+static int KillProcessGroup(
+        uid_t uid, int initialPid, int signal, bool once = false,
+        std::chrono::steady_clock::time_point until = std::chrono::steady_clock::now() + 2200ms) {
     CHECK_GE(uid, 0);
     CHECK_GT(initialPid, 0);
 
+    // Always attempt to send a kill signal to at least the initialPid, at least once, regardless of
+    // whether its cgroup exists or not. This should only be necessary if a bug results in the
+    // migration of the targeted process out of its cgroup, which we will also attempt to kill.
+    const bool signal_ret = sendSignalToProcessGroup(uid, initialPid, signal);
+
+    if (!CgroupsAvailable() || !signal_ret) return signal_ret ? 0 : -1;
+
     std::string hierarchy_root_path;
-    if (CgroupsAvailable()) {
-        CgroupGetControllerPath(CGROUPV2_HIERARCHY_NAME, &hierarchy_root_path);
-    }
-    const char* cgroup = hierarchy_root_path.c_str();
+    CgroupGetControllerPath(CGROUPV2_HIERARCHY_NAME, &hierarchy_root_path);
 
-    std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now();
+    const std::string cgroup_v2_path =
+            ConvertUidPidToPath(hierarchy_root_path.c_str(), uid, initialPid);
 
-    int retry = retries;
-    int processes;
-    while ((processes = DoKillProcessGroupOnce(cgroup, uid, initialPid, signal)) > 0) {
-        LOG(VERBOSE) << "Killed " << processes << " processes for processgroup " << initialPid;
-        if (!CgroupsAvailable()) {
-            // makes no sense to retry, because there are no cgroup_procs file
-            processes = 0;  // no remaining processes
-            break;
-        }
-        if (retry > 0) {
-            std::this_thread::sleep_for(5ms);
-            --retry;
-        } else {
-            break;
-        }
-    }
-
-    if (processes < 0) {
-        PLOG(ERROR) << "Error encountered killing process cgroup uid " << uid << " pid "
-                    << initialPid;
+    const std::string eventsfile = cgroup_v2_path + '/' + PROCESSGROUP_CGROUP_EVENTS_FILE;
+    android::base::unique_fd events_fd(open(eventsfile.c_str(), O_RDONLY));
+    if (events_fd.get() == -1) {
+        PLOG(WARNING) << "Error opening " << eventsfile << " for KillProcessGroup";
         return -1;
     }
 
-    std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
-    auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
+    struct pollfd fds = {
+        .fd = events_fd,
+        .events = POLLPRI,
+    };
 
-    // We only calculate the number of 'processes' when killing the processes.
-    // In the retries == 0 case, we only kill the processes once and therefore
-    // will not have waited then recalculated how many processes are remaining
-    // after the first signals have been sent.
-    // Logging anything regarding the number of 'processes' here does not make sense.
+    const std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now();
 
-    if (processes == 0) {
-        if (retries > 0) {
-            LOG(INFO) << "Successfully killed process cgroup uid " << uid << " pid " << initialPid
-                      << " in " << static_cast<int>(ms) << "ms";
+    // The primary reason to loop here is to capture any new forks or migrations that could occur
+    // after we send signals to the original set of processes, but before all of those processes
+    // exit and the cgroup becomes unpopulated, or before we remove the cgroup. We try hard to
+    // ensure this completes successfully to avoid permanent memory leaks, but we still place a
+    // large default upper bound on the amount of time we spend in this loop. The amount of CPU
+    // contention, and the amount of work that needs to be done in do_exit for each process
+    // determines how long this will take.
+    int ret;
+    do {
+        populated_status populated;
+        while ((populated = cgroupIsPopulated(events_fd.get())) == populated_status::populated &&
+               std::chrono::steady_clock::now() < until) {
+
+            sendSignalToProcessGroup(uid, initialPid, signal);
+            if (once) {
+                populated = cgroupIsPopulated(events_fd.get());
+                break;
+            }
+
+            const std::chrono::steady_clock::time_point poll_start =
+                    std::chrono::steady_clock::now();
+
+            if (poll_start < until)
+                ret = TEMP_FAILURE_RETRY(poll(&fds, 1, toMillisec(until - poll_start).count()));
+
+            if (ret == -1) {
+                // Fallback to 5ms sleeps if poll fails
+                PLOG(ERROR) << "Poll on " << eventsfile << "failed";
+                const std::chrono::steady_clock::time_point now = std::chrono::steady_clock::now();
+                if (now < until)
+                    std::this_thread::sleep_for(std::min(5ms, toMillisec(until - now)));
+            }
+
+            LOG(VERBOSE) << "Waited "
+                         << toMillisec(std::chrono::steady_clock::now() - poll_start).count()
+                         << " ms for " << eventsfile << " poll";
         }
 
-        if (!CgroupsAvailable()) {
-            // nothing to do here, if cgroups isn't available
-            return 0;
+        const std::chrono::milliseconds kill_duration =
+                toMillisec(std::chrono::steady_clock::now() - start);
+
+        if (populated == populated_status::populated) {
+            LOG(WARNING) << "Still waiting on process(es) to exit for cgroup " << cgroup_v2_path
+                         << " after " << kill_duration.count() << " ms";
+            // We'll still try the cgroup removal below which we expect to log an error.
+        } else if (populated == populated_status::not_populated) {
+            LOG(VERBOSE) << "Killed all processes under cgroup " << cgroup_v2_path
+                         << " after " << kill_duration.count() << " ms";
         }
 
-        // 400 retries correspond to 2 secs max timeout
-        int err = RemoveCgroup(cgroup, uid, initialPid, 400);
+        ret = RemoveCgroup(hierarchy_root_path.c_str(), uid, initialPid);
+        if (ret)
+            PLOG(ERROR) << "Unable to remove cgroup " << cgroup_v2_path;
+        else
+            LOG(INFO) << "Removed cgroup " << cgroup_v2_path;
 
         if (isMemoryCgroupSupported() && UsePerAppMemcg()) {
+            // This per-application memcg v1 case should eventually be removed after migration to
+            // memcg v2.
             std::string memcg_apps_path;
             if (CgroupGetMemcgAppsPath(&memcg_apps_path) &&
-                RemoveCgroup(memcg_apps_path.c_str(), uid, initialPid, 400) < 0) {
-                return -1;
+                (ret = RemoveCgroup(memcg_apps_path.c_str(), uid, initialPid)) < 0) {
+                const auto memcg_v1_cgroup_path =
+                        ConvertUidPidToPath(memcg_apps_path.c_str(), uid, initialPid);
+                PLOG(ERROR) << "Unable to remove memcg v1 cgroup " << memcg_v1_cgroup_path;
             }
         }
 
-        return err;
-    } else {
-        if (retries > 0) {
-            LOG(ERROR) << "Failed to kill process cgroup uid " << uid << " pid " << initialPid
-                       << " in " << static_cast<int>(ms) << "ms, " << processes
-                       << " processes remain";
-        }
-        return -1;
-    }
+        if (once) break;
+        if (std::chrono::steady_clock::now() >= until) break;
+    } while (ret && errno == EBUSY);
+
+    return ret;
 }
 
 int killProcessGroup(uid_t uid, int initialPid, int signal) {
-    return KillProcessGroup(uid, initialPid, signal, 40 /*retries*/);
+    return KillProcessGroup(uid, initialPid, signal);
 }
 
 int killProcessGroupOnce(uid_t uid, int initialPid, int signal) {
-    return KillProcessGroup(uid, initialPid, signal, 0 /*retries*/);
-}
-
-int sendSignalToProcessGroup(uid_t uid, int initialPid, int signal) {
-    std::string hierarchy_root_path;
-    if (CgroupsAvailable()) {
-        CgroupGetControllerPath(CGROUPV2_HIERARCHY_NAME, &hierarchy_root_path);
-    }
-    const char* cgroup = hierarchy_root_path.c_str();
-    return DoKillProcessGroupOnce(cgroup, uid, initialPid, signal);
+    return KillProcessGroup(uid, initialPid, signal, true);
 }
 
 static int createProcessGroupInternal(uid_t uid, int initialPid, std::string cgroup,
@@ -576,7 +677,7 @@
         return -errno;
     }
 
-    auto uid_pid_procs_file = uid_pid_path + PROCESSGROUP_CGROUP_PROCS_FILE;
+    auto uid_pid_procs_file = uid_pid_path + '/' + PROCESSGROUP_CGROUP_PROCS_FILE;
 
     if (!WriteStringToFile(std::to_string(initialPid), uid_pid_procs_file)) {
         ret = -errno;

diff --git a/rootdir/Android.mk b/rootdir/Android.mk
index 7deb173..7444f96 100644
--- a/rootdir/Android.mk
+++ b/rootdir/Android.mk

@@ -97,7 +97,7 @@
 # create some directories (some are mount points) and symlinks
 LOCAL_POST_INSTALL_CMD := mkdir -p $(addprefix $(TARGET_ROOT_OUT)/, \
     dev proc sys system data data_mirror odm oem acct config storage mnt apex bootstrap-apex debug_ramdisk \
-    linkerconfig second_stage_resources postinstall $(BOARD_ROOT_EXTRA_FOLDERS)); \
+    linkerconfig second_stage_resources postinstall tmp $(BOARD_ROOT_EXTRA_FOLDERS)); \
     ln -sf /system/bin $(TARGET_ROOT_OUT)/bin; \
     ln -sf /system/etc $(TARGET_ROOT_OUT)/etc; \
     ln -sf /data/user_de/0/com.android.shell/files/bugreports $(TARGET_ROOT_OUT)/bugreports; \

diff --git a/rootdir/init.rc b/rootdir/init.rc
index fb64736..12c46eb 100644
--- a/rootdir/init.rc
+++ b/rootdir/init.rc

@@ -92,6 +92,12 @@
     # checker programs.
     mkdir /dev/fscklogs 0770 root system
 
+    # Create tmpfs for use by the shell user.
+    mount tmpfs tmpfs /tmp
+    restorecon /tmp
+    chown shell shell /tmp
+    chmod 0771 /tmp
+
 on init
     sysclktz 0
 

diff --git a/trusty/storage/proxy/storage.c b/trusty/storage/proxy/storage.c
index 2299481..8c8edb7 100644
--- a/trusty/storage/proxy/storage.c
+++ b/trusty/storage/proxy/storage.c

@@ -353,7 +353,6 @@
     if (open_flags & O_CREAT) {
         sync_parent(path, watcher);
     }
-    free(path);
 
     /* at this point rc contains storage file fd */
     msg->result = STORAGE_NO_ERROR;
@@ -361,6 +360,9 @@
     ALOGV("%s: \"%s\": fd = %u: handle = %d\n",
           __func__, path, rc, resp.handle);
 
+    free(path);
+    path = NULL;
+
     /* a backing file has been opened, notify any waiting init steps */
     if (!fs_ready_initialized) {
         rc = property_set(FS_READY_PROPERTY, "1");
commit	26cb9dbfefe3b954b19b6fd046a8ea4cae32f517	[log] [tgz]
author	Treehugger Robot <android-test-infra-autosubmit@system.gserviceaccount.com>	Wed Dec 20 22:49:38 2023 +0000
committer	Gerrit Code Review <noreply-gerritcodereview@google.com>	Wed Dec 20 22:49:38 2023 +0000
tree	9735e85a19bc2f53c63b5ba25e178a0cdf783598
parent	c4b984045639beb91eeb7eee98e123463ca955f5 [diff]
parent	a008c9c1a4ff5f8666fefb57d9ced1c60b81a1a5 [diff]