Merge "fs_mgr: IWYU include <algorithm> for std::sort" into main
diff --git a/fs_mgr/fs_mgr.cpp b/fs_mgr/fs_mgr.cpp
index faea5eb..35c8c63 100644
--- a/fs_mgr/fs_mgr.cpp
+++ b/fs_mgr/fs_mgr.cpp
@@ -1388,6 +1388,8 @@
         return {FS_MGR_MNTALL_FAIL, userdata_mounted};
     }
 
+    bool scratch_can_be_mounted = true;
+
     // Keep i int to prevent unsigned integer overflow from (i = top_idx - 1),
     // where top_idx is 0. It will give SIGABRT
     for (int i = 0; i < static_cast<int>(fstab->size()); i++) {
@@ -1520,6 +1522,9 @@
             if (current_entry.mount_point == "/data") {
                 userdata_mounted = true;
             }
+
+            MountOverlayfs(attempted_entry, &scratch_can_be_mounted);
+
             // Success!  Go get the next one.
             continue;
         }
@@ -1604,10 +1609,6 @@
 
     set_type_property(encryptable);
 
-#if ALLOW_ADBD_DISABLE_VERITY == 1  // "userdebug" build
-    fs_mgr_overlayfs_mount_all(fstab);
-#endif
-
     if (error_count) {
         return {FS_MGR_MNTALL_FAIL, userdata_mounted};
     } else {
diff --git a/fs_mgr/fs_mgr_format.cpp b/fs_mgr/fs_mgr_format.cpp
index 622f181..8e76150 100644
--- a/fs_mgr/fs_mgr_format.cpp
+++ b/fs_mgr/fs_mgr_format.cpp
@@ -136,6 +136,7 @@
     /* Format the partition using the calculated length */
 
     const auto size_str = std::to_string(dev_sz / getpagesize());
+    std::string block_size = std::to_string(getpagesize());
 
     std::vector<const char*> args = {"/system/bin/make_f2fs", "-g", "android"};
     if (needs_projid) {
@@ -154,6 +155,10 @@
         args.push_back("-O");
         args.push_back("extra_attr");
     }
+    args.push_back("-w");
+    args.push_back(block_size.c_str());
+    args.push_back("-b");
+    args.push_back(block_size.c_str());
     if (!zoned_device.empty()) {
         args.push_back("-c");
         args.push_back(zoned_device.c_str());
diff --git a/fs_mgr/fs_mgr_overlayfs_control.cpp b/fs_mgr/fs_mgr_overlayfs_control.cpp
index 50d8280..fa2c5fe 100644
--- a/fs_mgr/fs_mgr_overlayfs_control.cpp
+++ b/fs_mgr/fs_mgr_overlayfs_control.cpp
@@ -347,33 +347,6 @@
     return "";
 }
 
-// This returns the scratch device that was detected during early boot (first-
-// stage init). If the device was created later, for example during setup for
-// the adb remount command, it can return an empty string since it does not
-// query ImageManager. (Note that ImageManager in first-stage init will always
-// use device-mapper, since /data is not available to use loop devices.)
-static std::string GetBootScratchDevice() {
-    // Note: fs_mgr_is_dsu_running() always returns false in recovery or fastbootd.
-    if (fs_mgr_is_dsu_running()) {
-        return GetDsuScratchDevice();
-    }
-
-    auto& dm = DeviceMapper::Instance();
-
-    // If there is a scratch partition allocated in /data or on super, we
-    // automatically prioritize that over super_other or system_other.
-    // Some devices, for example, have a write-protected eMMC and the
-    // super partition cannot be used even if it exists.
-    std::string device;
-    auto partition_name = android::base::Basename(kScratchMountPoint);
-    if (dm.GetState(partition_name) != DmDeviceState::INVALID &&
-        dm.GetDmDevicePathByName(partition_name, &device)) {
-        return device;
-    }
-
-    return "";
-}
-
 bool MakeScratchFilesystem(const std::string& scratch_device) {
     // Force mkfs by design for overlay support of adb remount, simplify and
     // thus do not rely on fsck to correct problems that could creep in.
@@ -383,6 +356,8 @@
         fs_type = "f2fs";
         command = kMkF2fs + " -w "s;
         command += std::to_string(getpagesize());
+        command = kMkF2fs + " -b "s;
+        command += std::to_string(getpagesize());
         command += " -f -d1 -l" + android::base::Basename(kScratchMountPoint);
     } else if (!access(kMkExt4, X_OK) && fs_mgr_filesystem_available("ext4")) {
         fs_type = "ext4";
@@ -922,6 +897,33 @@
     }
 }
 
+// This returns the scratch device that was detected during early boot (first-
+// stage init). If the device was created later, for example during setup for
+// the adb remount command, it can return an empty string since it does not
+// query ImageManager. (Note that ImageManager in first-stage init will always
+// use device-mapper, since /data is not available to use loop devices.)
+std::string GetBootScratchDevice() {
+    // Note: fs_mgr_is_dsu_running() always returns false in recovery or fastbootd.
+    if (fs_mgr_is_dsu_running()) {
+        return GetDsuScratchDevice();
+    }
+
+    auto& dm = DeviceMapper::Instance();
+
+    // If there is a scratch partition allocated in /data or on super, we
+    // automatically prioritize that over super_other or system_other.
+    // Some devices, for example, have a write-protected eMMC and the
+    // super partition cannot be used even if it exists.
+    std::string device;
+    auto partition_name = android::base::Basename(kScratchMountPoint);
+    if (dm.GetState(partition_name) != DmDeviceState::INVALID &&
+        dm.GetDmDevicePathByName(partition_name, &device)) {
+        return device;
+    }
+
+    return "";
+}
+
 void TeardownAllOverlayForMountPoint(const std::string& mount_point) {
     if (!OverlayfsTeardownAllowed()) {
         return;
diff --git a/fs_mgr/fs_mgr_overlayfs_control.h b/fs_mgr/fs_mgr_overlayfs_control.h
index b175101..3516c46 100644
--- a/fs_mgr/fs_mgr_overlayfs_control.h
+++ b/fs_mgr/fs_mgr_overlayfs_control.h
@@ -38,5 +38,7 @@
 
 void CleanupOldScratchFiles();
 
+std::string GetBootScratchDevice();
+
 }  // namespace fs_mgr
 }  // namespace android
diff --git a/fs_mgr/fs_mgr_overlayfs_mount.cpp b/fs_mgr/fs_mgr_overlayfs_mount.cpp
index ae7ed4c..e168436 100644
--- a/fs_mgr/fs_mgr_overlayfs_mount.cpp
+++ b/fs_mgr/fs_mgr_overlayfs_mount.cpp
@@ -34,6 +34,7 @@
 #include <android-base/file.h>
 #include <android-base/macros.h>
 #include <android-base/properties.h>
+#include <android-base/scopeguard.h>
 #include <android-base/strings.h>
 #include <android-base/unique_fd.h>
 #include <ext4_utils/ext4_utils.h>
@@ -41,15 +42,14 @@
 #include <fs_mgr/file_wait.h>
 #include <fs_mgr_overlayfs.h>
 #include <fstab/fstab.h>
-#include <libdm/dm.h>
 #include <libgsi/libgsi.h>
 #include <storage_literals/storage_literals.h>
 
+#include "fs_mgr_overlayfs_control.h"
 #include "fs_mgr_overlayfs_mount.h"
 #include "fs_mgr_priv.h"
 
 using namespace std::literals;
-using namespace android::dm;
 using namespace android::fs_mgr;
 using namespace android::storage_literals;
 
@@ -58,6 +58,9 @@
 constexpr char kCacheMountPoint[] = "/cache";
 constexpr char kPhysicalDevice[] = "/dev/block/by-name/";
 
+// Mount tree to temporarily hold references to submounts.
+constexpr char kMoveMountTempDir[] = "/dev/remount";
+
 constexpr char kLowerdirOption[] = "lowerdir=";
 constexpr char kUpperdirOption[] = "upperdir=";
 constexpr char kWorkdirOption[] = "workdir=";
@@ -284,10 +287,6 @@
     if (ret) {
         PERROR << "__mount(target=" << mount_point
                << ",flag=" << (shared_flag ? "MS_SHARED" : "MS_PRIVATE") << ")=" << ret;
-        // If "/system" doesn't look like a mountpoint, retry with "/".
-        if (errno == EINVAL && mount_point == "/system") {
-            return fs_mgr_overlayfs_set_shared_mount("/", shared_flag);
-        }
         return false;
     }
     return true;
@@ -302,6 +301,25 @@
     return true;
 }
 
+static bool fs_mgr_overlayfs_mount(const std::string& mount_point, const std::string& options) {
+    auto report = "__mount(source=overlay,target="s + mount_point + ",type=overlay";
+    for (const auto& opt : android::base::Split(options, ",")) {
+        if (android::base::StartsWith(opt, kUpperdirOption)) {
+            report = report + "," + opt;
+            break;
+        }
+    }
+    report = report + ")=";
+    auto ret = mount("overlay", mount_point.c_str(), "overlay", MS_RDONLY | MS_NOATIME,
+                     options.c_str());
+    if (ret) {
+        PERROR << report << ret;
+    } else {
+        LINFO << report << ret;
+    }
+    return !ret;
+}
+
 struct mount_info {
     std::string mount_point;
     bool shared_flag;
@@ -374,24 +392,23 @@
     return info;
 }
 
-static bool fs_mgr_overlayfs_mount(const FstabEntry& entry) {
-    const auto mount_point = fs_mgr_mount_point(entry.mount_point);
-    const auto options = fs_mgr_get_overlayfs_options(entry);
+static bool fs_mgr_overlayfs_mount_one(const FstabEntry& fstab_entry) {
+    const auto mount_point = fs_mgr_mount_point(fstab_entry.mount_point);
+    const auto options = fs_mgr_get_overlayfs_options(fstab_entry);
     if (options.empty()) return false;
 
-    auto retval = true;
-
     struct MoveEntry {
         std::string mount_point;
         std::string dir;
         bool shared_flag;
     };
-
     std::vector<MoveEntry> moved_mounts;
-    auto parent_private = false;
-    auto parent_made_private = false;
-    auto dev_private = false;
-    auto dev_made_private = false;
+
+    bool retval = true;
+    bool move_dir_shared = true;
+    bool parent_shared = true;
+    bool root_shared = true;
+    bool root_made_private = false;
 
     // There could be multiple mount entries with the same mountpoint.
     // Group these entries together with stable_sort, and keep only the last entry of a group.
@@ -411,18 +428,32 @@
     // mountinfo is reversed twice, so still is in lexical sorted order.
 
     for (const auto& entry : mountinfo) {
-        if ((entry.mount_point == mount_point) && !entry.shared_flag) {
-            parent_private = true;
+        if (entry.mount_point == kMoveMountTempDir) {
+            move_dir_shared = entry.shared_flag;
         }
-        if ((entry.mount_point == "/dev") && !entry.shared_flag) {
-            dev_private = true;
+        if (entry.mount_point == mount_point ||
+            (mount_point == "/system" && entry.mount_point == "/")) {
+            parent_shared = entry.shared_flag;
         }
+        if (entry.mount_point == "/") {
+            root_shared = entry.shared_flag;
+        }
+    }
+
+    // Precondition is that kMoveMountTempDir is MS_PRIVATE, otherwise don't try to move any
+    // submount in to or out of it.
+    if (move_dir_shared) {
+        mountinfo.clear();
     }
 
     // Need to make the original mountpoint MS_PRIVATE, so that the overlayfs can be MS_MOVE.
     // This could happen if its parent mount is remounted later.
-    if (!parent_private) {
-        parent_made_private = fs_mgr_overlayfs_set_shared_mount(mount_point, false);
+    if (!fs_mgr_overlayfs_set_shared_mount(mount_point, false)) {
+        // If failed to set "/system" mount type, it might be due to "/system" not being a valid
+        // mountpoint after switch root. Retry with "/" in this case.
+        if (errno == EINVAL && mount_point == "/system") {
+            root_made_private = fs_mgr_overlayfs_set_shared_mount("/", false);
+        }
     }
 
     for (const auto& entry : mountinfo) {
@@ -440,8 +471,8 @@
         // mountinfo is in lexical order, so no need to worry about |entry| being a parent mount of
         // entries of |moved_mounts|.
 
-        // use as the bound directory in /dev.
-        MoveEntry new_entry{entry.mount_point, "/dev/TemporaryDir-XXXXXX", entry.shared_flag};
+        MoveEntry new_entry{entry.mount_point, kMoveMountTempDir + "/TemporaryDir-XXXXXX"s,
+                            entry.shared_flag};
         {
             AutoSetFsCreateCon createcon;
             auto new_context = fs_mgr_get_context(entry.mount_point);
@@ -475,32 +506,10 @@
         moved_mounts.push_back(std::move(new_entry));
     }
 
-    // hijack __mount() report format to help triage
-    auto report = "__mount(source=overlay,target="s + mount_point + ",type=overlay";
-    const auto opt_list = android::base::Split(options, ",");
-    for (const auto& opt : opt_list) {
-        if (android::base::StartsWith(opt, kUpperdirOption)) {
-            report = report + "," + opt;
-            break;
-        }
-    }
-    report = report + ")=";
-
-    auto ret = mount("overlay", mount_point.c_str(), "overlay", MS_RDONLY | MS_NOATIME,
-                     options.c_str());
-    if (ret) {
-        retval = false;
-        PERROR << report << ret;
-    } else {
-        LINFO << report << ret;
-    }
+    retval &= fs_mgr_overlayfs_mount(mount_point, options);
 
     // Move submounts back.
     for (const auto& entry : moved_mounts) {
-        if (!dev_private && !dev_made_private) {
-            dev_made_private = fs_mgr_overlayfs_set_shared_mount("/dev", false);
-        }
-
         if (!fs_mgr_overlayfs_move_mount(entry.dir, entry.mount_point)) {
             retval = false;
         } else if (entry.shared_flag &&
@@ -509,12 +518,13 @@
         }
         rmdir(entry.dir.c_str());
     }
-    if (dev_made_private) {
-        fs_mgr_overlayfs_set_shared_mount("/dev", true);
-    }
-    if (parent_made_private) {
+    // If the original (overridden) mount was MS_SHARED, then set the overlayfs mount to MS_SHARED.
+    if (parent_shared) {
         fs_mgr_overlayfs_set_shared_mount(mount_point, true);
     }
+    if (root_shared && root_made_private) {
+        fs_mgr_overlayfs_set_shared_mount("/", true);
+    }
 
     return retval;
 }
@@ -582,45 +592,6 @@
     return true;
 }
 
-// Note: The scratch partition of DSU is managed by gsid, and should be initialized during
-// first-stage-mount. Just check if the DM device for DSU scratch partition is created or not.
-static std::string GetDsuScratchDevice() {
-    auto& dm = DeviceMapper::Instance();
-    std::string device;
-    if (dm.GetState(android::gsi::kDsuScratch) != DmDeviceState::INVALID &&
-        dm.GetDmDevicePathByName(android::gsi::kDsuScratch, &device)) {
-        return device;
-    }
-    return "";
-}
-
-// This returns the scratch device that was detected during early boot (first-
-// stage init). If the device was created later, for example during setup for
-// the adb remount command, it can return an empty string since it does not
-// query ImageManager. (Note that ImageManager in first-stage init will always
-// use device-mapper, since /data is not available to use loop devices.)
-static std::string GetBootScratchDevice() {
-    // Note: fs_mgr_is_dsu_running() always returns false in recovery or fastbootd.
-    if (fs_mgr_is_dsu_running()) {
-        return GetDsuScratchDevice();
-    }
-
-    auto& dm = DeviceMapper::Instance();
-
-    // If there is a scratch partition allocated in /data or on super, we
-    // automatically prioritize that over super_other or system_other.
-    // Some devices, for example, have a write-protected eMMC and the
-    // super partition cannot be used even if it exists.
-    std::string device;
-    auto partition_name = android::base::Basename(kScratchMountPoint);
-    if (dm.GetState(partition_name) != DmDeviceState::INVALID &&
-        dm.GetDmDevicePathByName(partition_name, &device)) {
-        return device;
-    }
-
-    return "";
-}
-
 // NOTE: OverlayfsSetupAllowed() must be "stricter" than OverlayfsTeardownAllowed().
 // Setup is allowed only if teardown is also allowed.
 bool OverlayfsSetupAllowed(bool verbose) {
@@ -730,6 +701,25 @@
     if (!OverlayfsSetupAllowed()) {
         return false;
     }
+
+    // Ensure kMoveMountTempDir is standalone mount tree with 'private' propagation by bind mounting
+    // to itself and set to MS_PRIVATE.
+    // Otherwise mounts moved in to it would have their propagation type changed unintentionally.
+    // Section 5d, https://www.kernel.org/doc/Documentation/filesystems/sharedsubtree.txt
+    if (!fs_mgr_overlayfs_already_mounted(kMoveMountTempDir, false)) {
+        if (mkdir(kMoveMountTempDir, 0755) && errno != EEXIST) {
+            PERROR << "mkdir " << kMoveMountTempDir;
+        }
+        if (mount(kMoveMountTempDir, kMoveMountTempDir, nullptr, MS_BIND, nullptr)) {
+            PERROR << "bind mount " << kMoveMountTempDir;
+        }
+    }
+    fs_mgr_overlayfs_set_shared_mount(kMoveMountTempDir, false);
+    android::base::ScopeGuard umountDir([]() {
+        umount(kMoveMountTempDir);
+        rmdir(kMoveMountTempDir);
+    });
+
     auto ret = true;
     auto scratch_can_be_mounted = !fs_mgr_overlayfs_already_mounted(kScratchMountPoint, false);
     for (const auto& entry : fs_mgr_overlayfs_candidate_list(*fstab)) {
@@ -742,7 +732,7 @@
             scratch_can_be_mounted = false;
             TryMountScratch();
         }
-        ret &= fs_mgr_overlayfs_mount(entry);
+        ret &= fs_mgr_overlayfs_mount_one(entry);
     }
     return ret;
 }
@@ -785,3 +775,38 @@
     }
     return false;
 }
+
+namespace android {
+namespace fs_mgr {
+
+void MountOverlayfs(const FstabEntry& fstab_entry, bool* scratch_can_be_mounted) {
+    if (!OverlayfsSetupAllowed()) {
+        return;
+    }
+    const auto candidates = fs_mgr_overlayfs_candidate_list({fstab_entry});
+    if (candidates.empty()) {
+        return;
+    }
+    const auto& entry = candidates.front();
+    if (fs_mgr_is_verity_enabled(entry)) {
+        return;
+    }
+    const auto mount_point = fs_mgr_mount_point(entry.mount_point);
+    if (fs_mgr_overlayfs_already_mounted(mount_point)) {
+        return;
+    }
+    if (*scratch_can_be_mounted) {
+        *scratch_can_be_mounted = false;
+        if (!fs_mgr_overlayfs_already_mounted(kScratchMountPoint, false)) {
+            TryMountScratch();
+        }
+    }
+    const auto options = fs_mgr_get_overlayfs_options(entry);
+    if (options.empty()) {
+        return;
+    }
+    fs_mgr_overlayfs_mount(mount_point, options);
+}
+
+}  // namespace fs_mgr
+}  // namespace android
diff --git a/fs_mgr/fs_mgr_roots.cpp b/fs_mgr/fs_mgr_roots.cpp
index 2ad8125..a697fb3 100644
--- a/fs_mgr/fs_mgr_roots.cpp
+++ b/fs_mgr/fs_mgr_roots.cpp
@@ -115,8 +115,8 @@
         return true;
     }
 
-    static const std::vector<std::string> supported_fs{"ext4", "squashfs", "vfat", "f2fs", "erofs",
-                                                       "none"};
+    static const std::vector<std::string> supported_fs{"ext4", "squashfs", "vfat", "exfat", "f2fs",
+                                                       "erofs", "none"};
     if (std::find(supported_fs.begin(), supported_fs.end(), rec->fs_type) == supported_fs.end()) {
         LERROR << "unknown fs_type \"" << rec->fs_type << "\" for " << mount_point;
         return false;
diff --git a/fs_mgr/include/fs_mgr_overlayfs.h b/fs_mgr/include/fs_mgr_overlayfs.h
index bdaabbf..bf68b2c 100644
--- a/fs_mgr/include/fs_mgr_overlayfs.h
+++ b/fs_mgr/include/fs_mgr_overlayfs.h
@@ -30,6 +30,9 @@
 namespace android {
 namespace fs_mgr {
 
+// Mount the overlayfs override for |fstab_entry|.
+void MountOverlayfs(const FstabEntry& fstab_entry, bool* scratch_can_be_mounted);
+
 void MapScratchPartitionIfNeeded(Fstab* fstab,
                                  const std::function<bool(const std::set<std::string>&)>& init);
 
diff --git a/fs_mgr/libsnapshot/include/libsnapshot/cow_format.h b/fs_mgr/libsnapshot/include/libsnapshot/cow_format.h
index 75467cb..debe87e 100644
--- a/fs_mgr/libsnapshot/include/libsnapshot/cow_format.h
+++ b/fs_mgr/libsnapshot/include/libsnapshot/cow_format.h
@@ -16,6 +16,7 @@
 
 #include <stdint.h>
 
+#include <limits>
 #include <optional>
 #include <string_view>
 
@@ -105,11 +106,11 @@
 static constexpr uint8_t kNumResumePoints = 4;
 
 struct CowHeaderV3 : public CowHeader {
-    // Location of sequence buffer in COW.
-    uint64_t sequence_buffer_offset;
-    // number of currently written resume points
+    // Number of sequence data stored (each of which is a 32 byte integer)
+    uint64_t sequence_data_count;
+    // Number of currently written resume points &&
     uint32_t resume_point_count;
-    // Size, in bytes, of the CowResumePoint buffer.
+    // Number of max resume points that can be written
     uint32_t resume_point_max;
     // Number of CowOperationV3 structs in the operation buffer, currently and total
     // region size.
@@ -119,10 +120,30 @@
     uint32_t compression_algorithm;
 } __attribute__((packed));
 
+enum class CowOperationType : uint8_t {
+    kCowCopyOp = 1,
+    kCowReplaceOp = 2,
+    kCowZeroOp = 3,
+    kCowLabelOp = 4,
+    kCowClusterOp = 5,
+    kCowXorOp = 6,
+    kCowSequenceOp = 7,
+    kCowFooterOp = std::numeric_limits<uint8_t>::max(),
+};
+
+static constexpr CowOperationType kCowCopyOp = CowOperationType::kCowCopyOp;
+static constexpr CowOperationType kCowReplaceOp = CowOperationType::kCowReplaceOp;
+static constexpr CowOperationType kCowZeroOp = CowOperationType::kCowZeroOp;
+static constexpr CowOperationType kCowLabelOp = CowOperationType::kCowLabelOp;
+static constexpr CowOperationType kCowClusterOp = CowOperationType::kCowClusterOp;
+static constexpr CowOperationType kCowXorOp = CowOperationType::kCowXorOp;
+static constexpr CowOperationType kCowSequenceOp = CowOperationType::kCowSequenceOp;
+static constexpr CowOperationType kCowFooterOp = CowOperationType::kCowFooterOp;
+
 // This structure is the same size of a normal Operation, but is repurposed for the footer.
 struct CowFooterOperation {
     // The operation code (always kCowFooterOp).
-    uint8_t type;
+    CowOperationType type;
 
     // If this operation reads from the data section of the COW, this contains
     // the compression type of that data (see constants below).
@@ -141,7 +162,7 @@
 // V2 version of COW. On disk format for older devices
 struct CowOperationV2 {
     // The operation code (see the constants and structures below).
-    uint8_t type;
+    CowOperationType type;
 
     // If this operation reads from the data section of the COW, this contains
     // the compression type of that data (see constants below).
@@ -176,7 +197,7 @@
 // The on disk format of cow (currently ==  CowOperation)
 struct CowOperationV3 {
     // The operation code (see the constants and structures below).
-    uint8_t type;
+    CowOperationType type;
 
     // If this operation reads from the data section of the COW, this contains
     // the length.
@@ -201,15 +222,6 @@
 
 static_assert(sizeof(CowOperationV2) == sizeof(CowFooterOperation));
 
-static constexpr uint8_t kCowCopyOp = 1;
-static constexpr uint8_t kCowReplaceOp = 2;
-static constexpr uint8_t kCowZeroOp = 3;
-static constexpr uint8_t kCowLabelOp = 4;
-static constexpr uint8_t kCowClusterOp = 5;
-static constexpr uint8_t kCowXorOp = 6;
-static constexpr uint8_t kCowSequenceOp = 7;
-static constexpr uint8_t kCowFooterOp = -1;
-
 enum CowCompressionAlgorithm : uint8_t {
     kCowCompressNone = 0,
     kCowCompressGz = 1,
@@ -232,19 +244,23 @@
     return op.source_info & kCowOpSourceInfoDataMask;
 }
 
-static constexpr off_t GetOpOffset(uint32_t op_index, const CowHeaderV3 header) {
-    return header.prefix.header_size + header.buffer_size +
-           (header.resume_point_max * sizeof(ResumePoint)) + (op_index * sizeof(CowOperationV3));
-}
-static constexpr off_t GetDataOffset(const CowHeaderV3 header) {
-    return header.prefix.header_size + header.buffer_size +
-           (header.resume_point_max * sizeof(ResumePoint)) +
-           header.op_count_max * sizeof(CowOperation);
-}
-static constexpr off_t GetResumeOffset(const CowHeaderV3 header) {
+static constexpr off_t GetSequenceOffset(const CowHeaderV3& header) {
     return header.prefix.header_size + header.buffer_size;
 }
 
+static constexpr off_t GetResumeOffset(const CowHeaderV3& header) {
+    return GetSequenceOffset(header) + (header.sequence_data_count * sizeof(uint32_t));
+}
+
+static constexpr off_t GetOpOffset(uint32_t op_index, const CowHeaderV3& header) {
+    return GetResumeOffset(header) + (header.resume_point_max * sizeof(ResumePoint)) +
+           (op_index * sizeof(CowOperationV3));
+}
+
+static constexpr off_t GetDataOffset(const CowHeaderV3& header) {
+    return GetOpOffset(header.op_count_max, header);
+}
+
 struct CowFooter {
     CowFooterOperation op;
     uint8_t unused[64];
diff --git a/fs_mgr/libsnapshot/include/libsnapshot/cow_reader.h b/fs_mgr/libsnapshot/include/libsnapshot/cow_reader.h
index c87b32d..bf4c79f 100644
--- a/fs_mgr/libsnapshot/include/libsnapshot/cow_reader.h
+++ b/fs_mgr/libsnapshot/include/libsnapshot/cow_reader.h
@@ -16,8 +16,6 @@
 
 #include <stdint.h>
 
-#include <deque>
-#include <functional>
 #include <memory>
 #include <optional>
 #include <unordered_map>
@@ -169,6 +167,12 @@
   private:
     bool ParseV2(android::base::borrowed_fd fd, std::optional<uint64_t> label);
     bool PrepMergeOps();
+    // sequence data is stored as an operation with actual data residing in the data offset.
+    bool GetSequenceDataV2(std::vector<uint32_t>* merge_op_blocks, std::vector<int>* other_ops,
+                           std::unordered_map<uint32_t, int>* block_map);
+    // v3 of the cow writes sequence data within its own separate sequence buffer.
+    bool GetSequenceData(std::vector<uint32_t>* merge_op_blocks, std::vector<int>* other_ops,
+                         std::unordered_map<uint32_t, int>* block_map);
     uint64_t FindNumCopyops();
     uint8_t GetCompressionType();
 
diff --git a/fs_mgr/libsnapshot/libsnapshot_cow/cow_format.cpp b/fs_mgr/libsnapshot/libsnapshot_cow/cow_format.cpp
index 937065d..4afd026 100644
--- a/fs_mgr/libsnapshot/libsnapshot_cow/cow_format.cpp
+++ b/fs_mgr/libsnapshot/libsnapshot_cow/cow_format.cpp
@@ -30,7 +30,7 @@
 
 using android::base::unique_fd;
 
-std::ostream& EmitCowTypeString(std::ostream& os, uint8_t cow_type) {
+std::ostream& EmitCowTypeString(std::ostream& os, CowOperationType cow_type) {
     switch (cow_type) {
         case kCowCopyOp:
             return os << "kCowCopyOp";
diff --git a/fs_mgr/libsnapshot/libsnapshot_cow/cow_reader.cpp b/fs_mgr/libsnapshot/libsnapshot_cow/cow_reader.cpp
index 8412879..7b5370c 100644
--- a/fs_mgr/libsnapshot/libsnapshot_cow/cow_reader.cpp
+++ b/fs_mgr/libsnapshot/libsnapshot_cow/cow_reader.cpp
@@ -24,6 +24,7 @@
 
 #include <android-base/file.h>
 #include <android-base/logging.h>
+#include <libsnapshot/cow_format.h>
 #include <libsnapshot/cow_reader.h>
 #include <zlib.h>
 
@@ -265,52 +266,31 @@
 //                        Replace-op-4, Zero-op-9, Replace-op-5 }
 //==============================================================
 bool CowReader::PrepMergeOps() {
-    auto merge_op_blocks = std::make_unique<std::vector<uint32_t>>();
     std::vector<int> other_ops;
-    auto seq_ops_set = std::unordered_set<uint32_t>();
-    auto block_map = std::make_unique<std::unordered_map<uint32_t, int>>();
-    size_t num_seqs = 0;
-    size_t read;
+    std::vector<uint32_t> merge_op_blocks;
+    std::unordered_map<uint32_t, int> block_map;
 
-    for (size_t i = 0; i < ops_->size(); i++) {
-        auto& current_op = ops_->data()[i];
-
-        if (current_op.type == kCowSequenceOp) {
-            size_t seq_len = current_op.data_length / sizeof(uint32_t);
-
-            merge_op_blocks->resize(merge_op_blocks->size() + seq_len);
-            if (!GetRawBytes(&current_op, &merge_op_blocks->data()[num_seqs],
-                             current_op.data_length, &read)) {
-                PLOG(ERROR) << "Failed to read sequence op!";
-                return false;
-            }
-            for (size_t j = num_seqs; j < num_seqs + seq_len; j++) {
-                seq_ops_set.insert(merge_op_blocks->data()[j]);
-            }
-            num_seqs += seq_len;
-        }
-
-        if (IsMetadataOp(current_op)) {
-            continue;
-        }
-
-        // Sequence ops must be the first ops in the stream.
-        if (seq_ops_set.empty() && IsOrderedOp(current_op)) {
-            merge_op_blocks->emplace_back(current_op.new_block);
-        } else if (seq_ops_set.count(current_op.new_block) == 0) {
-            other_ops.push_back(current_op.new_block);
-        }
-        block_map->insert({current_op.new_block, i});
+    switch (header_.prefix.major_version) {
+        case 1:
+        case 2:
+            GetSequenceDataV2(&merge_op_blocks, &other_ops, &block_map);
+            break;
+        case 3:
+            GetSequenceData(&merge_op_blocks, &other_ops, &block_map);
+            break;
+        default:
+            break;
     }
-    for (auto block : *merge_op_blocks) {
-        if (block_map->count(block) == 0) {
+
+    for (auto block : merge_op_blocks) {
+        if (block_map.count(block) == 0) {
             LOG(ERROR) << "Invalid Sequence Ops. Could not find Cow Op for new block " << block;
             return false;
         }
     }
 
-    if (merge_op_blocks->size() > header_.num_merge_ops) {
-        num_ordered_ops_to_merge_ = merge_op_blocks->size() - header_.num_merge_ops;
+    if (merge_op_blocks.size() > header_.num_merge_ops) {
+        num_ordered_ops_to_merge_ = merge_op_blocks.size() - header_.num_merge_ops;
     } else {
         num_ordered_ops_to_merge_ = 0;
     }
@@ -326,9 +306,9 @@
         std::sort(other_ops.begin(), other_ops.end(), std::greater<int>());
     }
 
-    merge_op_blocks->insert(merge_op_blocks->end(), other_ops.begin(), other_ops.end());
+    merge_op_blocks.insert(merge_op_blocks.end(), other_ops.begin(), other_ops.end());
 
-    num_total_data_ops_ = merge_op_blocks->size();
+    num_total_data_ops_ = merge_op_blocks.size();
     if (header_.num_merge_ops > 0) {
         merge_op_start_ = header_.num_merge_ops;
     }
@@ -338,24 +318,94 @@
         // the ops vector as required for merge operations.
         auto merge_ops_buffer = std::make_shared<std::vector<CowOperation>>();
         merge_ops_buffer->reserve(num_total_data_ops_);
-        for (auto block : *merge_op_blocks) {
-            merge_ops_buffer->emplace_back(ops_->data()[block_map->at(block)]);
+        for (auto block : merge_op_blocks) {
+            merge_ops_buffer->emplace_back(ops_->data()[block_map.at(block)]);
         }
         ops_->clear();
         ops_ = merge_ops_buffer;
         ops_->shrink_to_fit();
     } else {
-        for (auto block : *merge_op_blocks) {
-            block_pos_index_->push_back(block_map->at(block));
+        for (auto block : merge_op_blocks) {
+            block_pos_index_->push_back(block_map.at(block));
         }
     }
 
-    block_map->clear();
-    merge_op_blocks->clear();
+    block_map.clear();
+    merge_op_blocks.clear();
 
     return true;
 }
 
+bool CowReader::GetSequenceDataV2(std::vector<uint32_t>* merge_op_blocks,
+                                  std::vector<int>* other_ops,
+                                  std::unordered_map<uint32_t, int>* block_map) {
+    auto seq_ops_set = std::unordered_set<uint32_t>();
+    size_t num_seqs = 0;
+    size_t read;
+    for (size_t i = 0; i < ops_->size(); i++) {
+        auto& current_op = ops_->data()[i];
+
+        if (current_op.type == kCowSequenceOp) {
+            size_t seq_len = current_op.data_length / sizeof(uint32_t);
+
+            merge_op_blocks->resize(merge_op_blocks->size() + seq_len);
+            if (!GetRawBytes(&current_op, &merge_op_blocks->data()[num_seqs],
+                             current_op.data_length, &read)) {
+                PLOG(ERROR) << "Failed to read sequence op!";
+                return false;
+            }
+            for (size_t j = num_seqs; j < num_seqs + seq_len; j++) {
+                seq_ops_set.insert(merge_op_blocks->at(j));
+            }
+            num_seqs += seq_len;
+        }
+
+        if (IsMetadataOp(current_op)) {
+            continue;
+        }
+
+        // Sequence ops must be the first ops in the stream.
+        if (seq_ops_set.empty() && IsOrderedOp(current_op)) {
+            merge_op_blocks->emplace_back(current_op.new_block);
+        } else if (seq_ops_set.count(current_op.new_block) == 0) {
+            other_ops->push_back(current_op.new_block);
+        }
+        block_map->insert({current_op.new_block, i});
+    }
+    return false;
+}
+
+bool CowReader::GetSequenceData(std::vector<uint32_t>* merge_op_blocks, std::vector<int>* other_ops,
+                                std::unordered_map<uint32_t, int>* block_map) {
+    std::unordered_set<uint32_t> seq_ops_set;
+    // read sequence ops data
+    merge_op_blocks->resize(header_.sequence_data_count);
+    if (!android::base::ReadFullyAtOffset(
+                fd_, merge_op_blocks->data(),
+                header_.sequence_data_count * sizeof(merge_op_blocks->at(0)),
+                GetSequenceOffset(header_))) {
+        PLOG(ERROR) << "failed to read sequence buffer. seq_data_count: "
+                    << header_.sequence_data_count << " at offset: " << GetSequenceOffset(header_);
+        return false;
+    }
+    seq_ops_set.reserve(merge_op_blocks->size());
+    for (auto& i : *merge_op_blocks) {
+        seq_ops_set.insert(i);
+    }
+    // read ordered op data
+    for (size_t i = 0; i < ops_->size(); i++) {
+        auto& current_op = ops_->data()[i];
+        // Sequence ops must be the first ops in the stream.
+        if (seq_ops_set.empty()) {
+            merge_op_blocks->emplace_back(current_op.new_block);
+        } else if (seq_ops_set.count(current_op.new_block) == 0) {
+            other_ops->push_back(current_op.new_block);
+        }
+        block_map->insert({current_op.new_block, i});
+    }
+    return true;
+}
+
 bool CowReader::VerifyMergeOps() {
     auto itr = GetMergeOpIter(true);
     std::unordered_map<uint64_t, const CowOperation*> overwritten_blocks;
diff --git a/fs_mgr/libsnapshot/libsnapshot_cow/test_v3.cpp b/fs_mgr/libsnapshot/libsnapshot_cow/test_v3.cpp
index c41e07c..9ac1448 100644
--- a/fs_mgr/libsnapshot/libsnapshot_cow/test_v3.cpp
+++ b/fs_mgr/libsnapshot/libsnapshot_cow/test_v3.cpp
@@ -482,5 +482,144 @@
     header = reader.header_v3();
     ASSERT_EQ(header.op_count, 15);
 }
+
+TEST_F(CowTestV3, BufferMetadataSyncTest) {
+    CowOptions options;
+    options.op_count_max = 100;
+    auto writer = CreateCowWriter(3, options, GetCowFd());
+    /*
+    Header metadafields
+    sequence_data_count = 0;
+    resume_point_count = 0;
+    resume_point_max = 4;
+    */
+    ASSERT_TRUE(writer->Finalize());
+
+    CowReader reader;
+    ASSERT_TRUE(reader.Parse(cow_->fd));
+
+    auto header = reader.header_v3();
+    ASSERT_EQ(header.sequence_data_count, 0);
+    ASSERT_EQ(header.resume_point_count, 0);
+    ASSERT_EQ(header.resume_point_max, 4);
+
+    writer->AddLabel(0);
+    ASSERT_TRUE(reader.Parse(cow_->fd));
+    header = reader.header_v3();
+    ASSERT_EQ(header.sequence_data_count, 0);
+    ASSERT_EQ(header.resume_point_count, 1);
+    ASSERT_EQ(header.resume_point_max, 4);
+
+    ASSERT_TRUE(reader.Parse(cow_->fd));
+    header = reader.header_v3();
+
+    /*
+    Header metadafields
+    sequence_data_count = 1;
+    resume_point_count = 0;
+    resume_point_max = 4;
+    */
+}
+
+TEST_F(CowTestV3, SequenceTest) {
+    CowOptions options;
+    options.op_count_max = std::numeric_limits<uint32_t>::max();
+    auto writer = CreateCowWriter(3, options, GetCowFd());
+    // sequence data. This just an arbitrary set of integers that specify the merge order. The
+    // actual calculation is done by update_engine and passed to writer. All we care about here is
+    // writing that data correctly
+    const int seq_len = std::numeric_limits<uint16_t>::max() / sizeof(uint32_t) + 1;
+    uint32_t sequence[seq_len];
+    for (int i = 0; i < seq_len; i++) {
+        sequence[i] = i + 1;
+    }
+
+    ASSERT_TRUE(writer->AddSequenceData(seq_len, sequence));
+    ASSERT_TRUE(writer->AddZeroBlocks(1, seq_len));
+    ASSERT_TRUE(writer->Finalize());
+
+    ASSERT_EQ(lseek(cow_->fd, 0, SEEK_SET), 0);
+
+    CowReader reader;
+    ASSERT_TRUE(reader.Parse(cow_->fd));
+    auto iter = reader.GetRevMergeOpIter();
+
+    for (int i = 0; i < seq_len; i++) {
+        ASSERT_TRUE(!iter->AtEnd());
+        const auto& op = iter->Get();
+
+        ASSERT_EQ(op->new_block, seq_len - i);
+
+        iter->Next();
+    }
+    ASSERT_TRUE(iter->AtEnd());
+}
+
+TEST_F(CowTestV3, MissingSeqOp) {
+    CowOptions options;
+    options.op_count_max = std::numeric_limits<uint32_t>::max();
+    auto writer = CreateCowWriter(3, options, GetCowFd());
+    const int seq_len = 10;
+    uint32_t sequence[seq_len];
+    for (int i = 0; i < seq_len; i++) {
+        sequence[i] = i + 1;
+    }
+    ASSERT_TRUE(writer->AddSequenceData(seq_len, sequence));
+    ASSERT_TRUE(writer->AddZeroBlocks(1, seq_len - 1));
+    ASSERT_TRUE(writer->Finalize());
+
+    ASSERT_EQ(lseek(cow_->fd, 0, SEEK_SET), 0);
+
+    CowReader reader;
+    ASSERT_FALSE(reader.Parse(cow_->fd));
+}
+
+TEST_F(CowTestV3, ResumeSeqOp) {
+    CowOptions options;
+    options.op_count_max = std::numeric_limits<uint32_t>::max();
+    auto writer = std::make_unique<CowWriterV3>(options, GetCowFd());
+    const int seq_len = 10;
+    uint32_t sequence[seq_len];
+    for (int i = 0; i < seq_len; i++) {
+        sequence[i] = i + 1;
+    }
+    ASSERT_TRUE(writer->Initialize());
+
+    ASSERT_TRUE(writer->AddSequenceData(seq_len, sequence));
+    ASSERT_TRUE(writer->AddZeroBlocks(1, seq_len / 2));
+    ASSERT_TRUE(writer->AddLabel(1));
+    ASSERT_TRUE(writer->AddZeroBlocks(1 + seq_len / 2, 1));
+
+    ASSERT_EQ(lseek(cow_->fd, 0, SEEK_SET), 0);
+    auto reader = std::make_unique<CowReader>();
+    ASSERT_TRUE(reader->Parse(cow_->fd, 1));
+    auto itr = reader->GetRevMergeOpIter();
+    ASSERT_TRUE(itr->AtEnd());
+
+    writer = std::make_unique<CowWriterV3>(options, GetCowFd());
+    ASSERT_TRUE(writer->Initialize({1}));
+    ASSERT_TRUE(writer->AddZeroBlocks(1 + seq_len / 2, seq_len / 2));
+    ASSERT_TRUE(writer->Finalize());
+
+    ASSERT_EQ(lseek(cow_->fd, 0, SEEK_SET), 0);
+
+    reader = std::make_unique<CowReader>();
+    ASSERT_TRUE(reader->Parse(cow_->fd));
+
+    auto iter = reader->GetRevMergeOpIter();
+
+    uint64_t expected_block = 10;
+    while (!iter->AtEnd() && expected_block > 0) {
+        ASSERT_FALSE(iter->AtEnd());
+        const auto& op = iter->Get();
+
+        ASSERT_EQ(op->new_block, expected_block);
+
+        iter->Next();
+        expected_block--;
+    }
+    ASSERT_EQ(expected_block, 0);
+    ASSERT_TRUE(iter->AtEnd());
+}
 }  // namespace snapshot
 }  // namespace android
diff --git a/fs_mgr/libsnapshot/libsnapshot_cow/writer_v2.cpp b/fs_mgr/libsnapshot/libsnapshot_cow/writer_v2.cpp
index 37324c7..f9a4e47 100644
--- a/fs_mgr/libsnapshot/libsnapshot_cow/writer_v2.cpp
+++ b/fs_mgr/libsnapshot/libsnapshot_cow/writer_v2.cpp
@@ -369,7 +369,7 @@
 }
 
 bool CowWriterV2::EmitBlocks(uint64_t new_block_start, const void* data, size_t size,
-                             uint64_t old_block, uint16_t offset, uint8_t type) {
+                             uint64_t old_block, uint16_t offset, CowOperationType type) {
     CHECK(!merge_in_progress_);
     const uint8_t* iter = reinterpret_cast<const uint8_t*>(data);
 
diff --git a/fs_mgr/libsnapshot/libsnapshot_cow/writer_v2.h b/fs_mgr/libsnapshot/libsnapshot_cow/writer_v2.h
index 24170eb..50e635f 100644
--- a/fs_mgr/libsnapshot/libsnapshot_cow/writer_v2.h
+++ b/fs_mgr/libsnapshot/libsnapshot_cow/writer_v2.h
@@ -42,7 +42,7 @@
     bool EmitCluster();
     bool EmitClusterIfNeeded();
     bool EmitBlocks(uint64_t new_block_start, const void* data, size_t size, uint64_t old_block,
-                    uint16_t offset, uint8_t type);
+                    uint16_t offset, CowOperationType type);
     void SetupHeaders();
     void SetupWriteOptions();
     bool ParseOptions();
diff --git a/fs_mgr/libsnapshot/libsnapshot_cow/writer_v3.cpp b/fs_mgr/libsnapshot/libsnapshot_cow/writer_v3.cpp
index 6883c5e..767f3d5 100644
--- a/fs_mgr/libsnapshot/libsnapshot_cow/writer_v3.cpp
+++ b/fs_mgr/libsnapshot/libsnapshot_cow/writer_v3.cpp
@@ -77,7 +77,7 @@
     // v3 specific fields
     // WIP: not quite sure how some of these are calculated yet, assuming buffer_size is determined
     // during COW size estimation
-    header_.sequence_buffer_offset = 0;
+    header_.sequence_data_count = 0;
     header_.resume_point_count = 0;
     header_.resume_point_max = kNumResumePoints;
     header_.op_count = 0;
@@ -100,6 +100,7 @@
         return false;
     }
     header_.compression_algorithm = *algorithm;
+    header_.op_count_max = options_.op_count_max;
 
     if (parts.size() > 1) {
         if (!android::base::ParseUint(parts[1], &compression_.compression_level)) {
@@ -163,7 +164,7 @@
             return false;
         }
     }
-    header_.op_count_max = options_.op_count_max;
+
     resume_points_ = std::make_shared<std::vector<ResumePoint>>();
 
     if (!Sync()) {
@@ -229,7 +230,7 @@
 }
 
 bool CowWriterV3::EmitBlocks(uint64_t new_block_start, const void* data, size_t size,
-                             uint64_t old_block, uint16_t offset, uint8_t type) {
+                             uint64_t old_block, uint16_t offset, CowOperationType type) {
     const size_t num_blocks = (size / header_.block_size);
     for (size_t i = 0; i < num_blocks; i++) {
         const uint8_t* const iter =
@@ -311,13 +312,18 @@
         PLOG(ERROR) << "writing resume buffer failed";
         return false;
     }
-    return Sync();
+    return Finalize();
 }
 
 bool CowWriterV3::EmitSequenceData(size_t num_ops, const uint32_t* data) {
-    LOG(ERROR) << __LINE__ << " " << __FILE__ << " <- function here should never be called";
-    if (num_ops && data) return false;
-    return false;
+    // TODO: size sequence buffer based on options
+    header_.sequence_data_count = num_ops;
+    if (!android::base::WriteFullyAtOffset(fd_, data, sizeof(data[0]) * num_ops,
+                                           GetSequenceOffset(header_))) {
+        PLOG(ERROR) << "writing sequence buffer failed";
+        return false;
+    }
+    return true;
 }
 
 bool CowWriterV3::WriteOperation(const CowOperationV3& op, const void* data, size_t size) {
diff --git a/fs_mgr/libsnapshot/libsnapshot_cow/writer_v3.h b/fs_mgr/libsnapshot/libsnapshot_cow/writer_v3.h
index 3dfc33c..340218f 100644
--- a/fs_mgr/libsnapshot/libsnapshot_cow/writer_v3.h
+++ b/fs_mgr/libsnapshot/libsnapshot_cow/writer_v3.h
@@ -46,7 +46,7 @@
     bool OpenForAppend(uint64_t label);
     bool WriteOperation(const CowOperationV3& op, const void* data = nullptr, size_t size = 0);
     bool EmitBlocks(uint64_t new_block_start, const void* data, size_t size, uint64_t old_block,
-                    uint16_t offset, uint8_t type);
+                    uint16_t offset, CowOperationType type);
     bool CompressBlocks(size_t num_blocks, const void* data);
 
   private:
diff --git a/fs_mgr/libsnapshot/snapshotctl.cpp b/fs_mgr/libsnapshot/snapshotctl.cpp
index ebaca2d..0396a55 100644
--- a/fs_mgr/libsnapshot/snapshotctl.cpp
+++ b/fs_mgr/libsnapshot/snapshotctl.cpp
@@ -227,8 +227,12 @@
         if (file_offset >= dev_sz) {
             break;
         }
+
+        if (fsync(cfd.get()) < 0) {
+            PLOG(ERROR) << "Fsync failed at offset: " << file_offset << " size: " << to_read;
+            return false;
+        }
     }
-    fsync(cfd.get());
     return true;
 }
 
diff --git a/fs_mgr/libsnapshot/snapuserd/dm-snapshot-merge/snapuserd_worker.cpp b/fs_mgr/libsnapshot/snapuserd/dm-snapshot-merge/snapuserd_worker.cpp
index 571b352..b24844d 100644
--- a/fs_mgr/libsnapshot/snapuserd/dm-snapshot-merge/snapuserd_worker.cpp
+++ b/fs_mgr/libsnapshot/snapuserd/dm-snapshot-merge/snapuserd_worker.cpp
@@ -191,7 +191,8 @@
         }
 
         default: {
-            SNAP_LOG(ERROR) << "Unsupported operation-type found: " << cow_op->type;
+            SNAP_LOG(ERROR) << "Unsupported operation-type found: "
+                            << static_cast<uint8_t>(cow_op->type);
         }
     }
     return false;
diff --git a/fs_mgr/libsnapshot/snapuserd/user-space-merge/read_worker.cpp b/fs_mgr/libsnapshot/snapuserd/user-space-merge/read_worker.cpp
index 5cb13e8..906316e 100644
--- a/fs_mgr/libsnapshot/snapuserd/user-space-merge/read_worker.cpp
+++ b/fs_mgr/libsnapshot/snapuserd/user-space-merge/read_worker.cpp
@@ -183,7 +183,8 @@
         }
 
         default: {
-            SNAP_LOG(ERROR) << "Unknown operation-type found: " << cow_op->type;
+            SNAP_LOG(ERROR) << "Unknown operation-type found: "
+                            << static_cast<uint8_t>(cow_op->type);
         }
     }
     return false;
diff --git a/init/service_test.cpp b/init/service_test.cpp
index 87a2ce5..a3590b5 100644
--- a/init/service_test.cpp
+++ b/init/service_test.cpp
@@ -17,18 +17,45 @@
 #include "service.h"
 
 #include <algorithm>
+#include <fstream>
 #include <memory>
 #include <type_traits>
 #include <vector>
 
 #include <gtest/gtest.h>
 
+#include <android-base/file.h>
+#include <android-base/stringprintf.h>
+#include <android-base/strings.h>
+#include <selinux/selinux.h>
+#include <sys/signalfd.h>
 #include "lmkd_service.h"
+#include "reboot.h"
+#include "service.h"
+#include "service_list.h"
+#include "service_parser.h"
 #include "util.h"
 
+using ::android::base::ReadFileToString;
+using ::android::base::StringPrintf;
+using ::android::base::StringReplace;
+using ::android::base::unique_fd;
+using ::android::base::WriteStringToFd;
+using ::android::base::WriteStringToFile;
+
 namespace android {
 namespace init {
 
+static std::string GetSecurityContext() {
+    char* ctx;
+    if (getcon(&ctx) == -1) {
+        ADD_FAILURE() << "Failed to call getcon : " << strerror(errno);
+    }
+    std::string result{ctx};
+    freecon(ctx);
+    return result;
+}
+
 TEST(service, pod_initialized) {
     constexpr auto memory_size = sizeof(Service);
     alignas(alignof(Service)) unsigned char old_memory[memory_size];
@@ -190,5 +217,69 @@
     Test_make_temporary_oneshot_service(false, false, false, false, false);
 }
 
+// Returns the path in the v2 cgroup hierarchy for a given process in the format /uid_%d/pid_%d.
+static std::string CgroupPath(pid_t pid) {
+    std::string cgroup_path = StringPrintf("/proc/%d/cgroup", pid);
+    std::ifstream is(cgroup_path, std::ios::in);
+    std::string line;
+    while (std::getline(is, line)) {
+        if (line.substr(0, 3) == "0::") {
+            return line.substr(3);
+        }
+    }
+    return {};
+}
+
+class ServiceStopTest : public testing::TestWithParam<bool> {};
+
+// Before November 2023, processes that were migrated to another v2 cgroup were ignored by
+// Service::Stop() if their uid_%d/pid_%d cgroup directory got removed. This test, if run with the
+// parameter set to 'true', verifies that such services are stopped.
+TEST_P(ServiceStopTest, stop) {
+    if (getuid() != 0) {
+        GTEST_SKIP() << "Must be run as root.";
+        return;
+    }
+
+    static constexpr std::string_view kServiceName = "ServiceA";
+    static constexpr std::string_view kScriptTemplate = R"init(
+service $name /system/bin/yes
+    user shell
+    group shell
+    seclabel $selabel
+)init";
+
+    std::string script = StringReplace(StringReplace(kScriptTemplate, "$name", kServiceName, false),
+                                       "$selabel", GetSecurityContext(), false);
+    ServiceList& service_list = ServiceList::GetInstance();
+    Parser parser;
+    parser.AddSectionParser("service",
+                            std::make_unique<ServiceParser>(&service_list, nullptr, std::nullopt));
+
+    TemporaryFile tf;
+    ASSERT_GE(tf.fd, 0);
+    ASSERT_TRUE(WriteStringToFd(script, tf.fd));
+    ASSERT_TRUE(parser.ParseConfig(tf.path));
+
+    Service* const service = ServiceList::GetInstance().FindService(kServiceName);
+    ASSERT_NE(service, nullptr);
+    ASSERT_RESULT_OK(service->Start());
+    ASSERT_TRUE(service->IsRunning());
+    if (GetParam()) {
+        const pid_t pid = service->pid();
+        const std::string cgroup_path = CgroupPath(pid);
+        EXPECT_NE(cgroup_path, "");
+        EXPECT_NE(cgroup_path, "/");
+        const std::string pid_str = std::to_string(pid);
+        EXPECT_TRUE(WriteStringToFile(pid_str, "/sys/fs/cgroup/cgroup.procs"));
+        EXPECT_EQ(CgroupPath(pid), "/");
+        EXPECT_EQ(rmdir(("/sys/fs/cgroup" + cgroup_path).c_str()), 0);
+    }
+    EXPECT_EQ(0, StopServicesAndLogViolations({service->name()}, 10s, /*terminate=*/true));
+    ServiceList::GetInstance().RemoveService(*service);
+}
+
+INSTANTIATE_TEST_SUITE_P(service, ServiceStopTest, testing::Values(false, true));
+
 }  // namespace init
 }  // namespace android
diff --git a/init/sigchld_handler.cpp b/init/sigchld_handler.cpp
index 9d4c7c8..8e9e713 100644
--- a/init/sigchld_handler.cpp
+++ b/init/sigchld_handler.cpp
@@ -118,12 +118,27 @@
     return pid;
 }
 
-void ReapAnyOutstandingChildren() {
-    while (ReapOneProcess() != 0) {
+std::set<pid_t> ReapAnyOutstandingChildren() {
+    std::set<pid_t> reaped_pids;
+    for (;;) {
+        const pid_t pid = ReapOneProcess();
+        if (pid <= 0) {
+            return reaped_pids;
+        }
+        reaped_pids.emplace(pid);
     }
 }
 
-static void DiscardSiginfo(int signal_fd) {
+static void ReapAndRemove(std::vector<pid_t>& alive_pids) {
+    for (auto pid : ReapAnyOutstandingChildren()) {
+        const auto it = std::find(alive_pids.begin(), alive_pids.end(), pid);
+        if (it != alive_pids.end()) {
+            alive_pids.erase(it);
+        }
+    }
+}
+
+static void HandleSignal(int signal_fd) {
     signalfd_siginfo siginfo;
     ssize_t bytes_read = TEMP_FAILURE_RETRY(read(signal_fd, &siginfo, sizeof(siginfo)));
     if (bytes_read != sizeof(siginfo)) {
@@ -136,27 +151,36 @@
                     std::chrono::milliseconds timeout) {
     Timer t;
     Epoll epoll;
-    // The init process passes a valid sigchld_fd argument but unit tests do not.
     if (sigchld_fd >= 0) {
-        epoll.RegisterHandler(sigchld_fd, [sigchld_fd]() { DiscardSiginfo(sigchld_fd); });
+        if (auto result = epoll.Open(); result.ok()) {
+            result =
+                    epoll.RegisterHandler(sigchld_fd, [sigchld_fd]() { HandleSignal(sigchld_fd); });
+            if (!result.ok()) {
+                LOG(WARNING) << __func__
+                             << " RegisterHandler() failed. Falling back to sleep_for(): "
+                             << result.error();
+                sigchld_fd = -1;
+            }
+        } else {
+            LOG(WARNING) << __func__ << " Epoll::Open() failed. Falling back to sleep_for(): "
+                         << result.error();
+            sigchld_fd = -1;
+        }
     }
-    std::vector<pid_t> alive_pids(pids.begin(), pids.end());
+    std::vector<pid_t> alive_pids(pids);
+    ReapAndRemove(alive_pids);
     while (!alive_pids.empty() && t.duration() < timeout) {
-        pid_t pid;
-        while ((pid = ReapOneProcess()) != 0) {
-            auto it = std::find(alive_pids.begin(), alive_pids.end(), pid);
-            if (it != alive_pids.end()) {
-                alive_pids.erase(it);
+        if (sigchld_fd >= 0) {
+            auto result = epoll.Wait(std::max(timeout - t.duration(), 0ms));
+            if (result.ok()) {
+                ReapAndRemove(alive_pids);
+                continue;
+            } else {
+                LOG(WARNING) << "Epoll::Wait() failed " << result.error();
             }
         }
-        if (alive_pids.empty()) {
-            break;
-        }
-        if (sigchld_fd >= 0) {
-            epoll.Wait(std::max(timeout - t.duration(), 0ms));
-        } else {
-            std::this_thread::sleep_for(50ms);
-        }
+        std::this_thread::sleep_for(50ms);
+        ReapAndRemove(alive_pids);
     }
     LOG(INFO) << "Waiting for " << pids.size() << " pids to be reaped took " << t << " with "
               << alive_pids.size() << " of them still running";
diff --git a/init/sigchld_handler.h b/init/sigchld_handler.h
index e07a7d6..5351302 100644
--- a/init/sigchld_handler.h
+++ b/init/sigchld_handler.h
@@ -18,12 +18,13 @@
 #define _INIT_SIGCHLD_HANDLER_H_
 
 #include <chrono>
+#include <set>
 #include <vector>
 
 namespace android {
 namespace init {
 
-void ReapAnyOutstandingChildren();
+std::set<pid_t> ReapAnyOutstandingChildren();
 
 void WaitToBeReaped(int sigchld_fd, const std::vector<pid_t>& pids,
                     std::chrono::milliseconds timeout);
diff --git a/init/ueventd.cpp b/init/ueventd.cpp
index 586e2cf..3f0d0e9 100644
--- a/init/ueventd.cpp
+++ b/init/ueventd.cpp
@@ -297,6 +297,10 @@
 }
 
 static UeventdConfiguration GetConfiguration() {
+    if (IsMicrodroid()) {
+        return ParseConfig({"/system/etc/ueventd.rc", "/vendor/etc/ueventd.rc"});
+    }
+
     auto hardware = android::base::GetProperty("ro.hardware", "");
 
     struct LegacyPathInfo {
diff --git a/libprocessgroup/processgroup.cpp b/libprocessgroup/processgroup.cpp
index b4482d0..f594f7f 100644
--- a/libprocessgroup/processgroup.cpp
+++ b/libprocessgroup/processgroup.cpp
@@ -219,6 +219,12 @@
 
     while (retries--) {
         ret = rmdir(uid_pid_path.c_str());
+        // If we get an error 2 'No such file or directory' , that means the
+        // cgroup is already removed, treat it as success and return 0 for
+        // idempotency.
+        if (ret < 0 && errno == ENOENT) {
+            ret = 0;
+        }
         if (!ret || errno != EBUSY || !retries) break;
         std::this_thread::sleep_for(5ms);
     }
@@ -228,6 +234,9 @@
         // so free up the kernel resources for the UID level cgroup.
         const auto uid_path = ConvertUidToPath(cgroup, uid);
         ret = rmdir(uid_path.c_str());
+        if (ret < 0 && errno == ENOENT) {
+            ret = 0;
+        }
     }
 
     return ret;
@@ -369,8 +378,11 @@
         fd.reset(fopen(path.c_str(), "re"));
         if (!fd) {
             if (errno == ENOENT) {
-                // This happens when process is already dead
-                return 0;
+                // This happens when the process is already dead or if, as the result of a bug, it
+                // has been migrated to another cgroup. An example of a bug that can cause migration
+                // to another cgroup is using the JoinCgroup action with a cgroup controller that
+                // has been activated in the v2 cgroup hierarchy.
+                goto kill;
             }
             PLOG(WARNING) << __func__ << " failed to open process cgroup uid " << uid << " pid "
                           << initialPid;
@@ -409,6 +421,7 @@
         }
     }
 
+kill:
     // Kill all process groups.
     for (const auto pgid : pgids) {
         LOG(VERBOSE) << "Killing process group " << -pgid << " in uid " << uid
diff --git a/libprocessgroup/profiles/task_profiles.json b/libprocessgroup/profiles/task_profiles.json
index 2c08b0b..f2ef316 100644
--- a/libprocessgroup/profiles/task_profiles.json
+++ b/libprocessgroup/profiles/task_profiles.json
@@ -91,6 +91,11 @@
       "Name": "CfqWeight",
       "Controller": "io",
       "File": "io.weight"
+    },
+    {
+      "Name": "IoPrioClass",
+      "Controller": "io",
+      "File": "io.prio.class"
     }
   ],
 
@@ -479,6 +484,15 @@
             "Value": "200",
             "Optional": "true"
           }
+        },
+        {
+          "Name": "SetAttribute",
+          "Params":
+          {
+            "Name": "IoPrioClass",
+            "Value": "restrict-to-be",
+            "Optional": "true"
+          }
         }
       ]
     },
@@ -511,6 +525,15 @@
             "Value": "1000",
             "Optional": "true"
           }
+        },
+        {
+          "Name": "SetAttribute",
+          "Params":
+          {
+            "Name": "IoPrioClass",
+            "Value": "restrict-to-be",
+            "Optional": "true"
+          }
         }
       ]
     },
@@ -543,6 +566,15 @@
             "Value": "1000",
             "Optional": "true"
           }
+        },
+        {
+          "Name": "SetAttribute",
+          "Params":
+          {
+            "Name": "IoPrioClass",
+            "Value": "promote-to-rt",
+            "Optional": "true"
+          }
         }
       ]
     },
@@ -575,6 +607,15 @@
             "Value": "1000",
             "Optional": "true"
           }
+        },
+        {
+          "Name": "SetAttribute",
+          "Params":
+          {
+            "Name": "IoPrioClass",
+            "Value": "promote-to-rt",
+            "Optional": "true"
+          }
         }
       ]
     },
diff --git a/libutils/Android.bp b/libutils/Android.bp
index 4d4294b..85a0fd2 100644
--- a/libutils/Android.bp
+++ b/libutils/Android.bp
@@ -203,6 +203,7 @@
     defaults: ["libutils_impl_defaults"],
 
     cflags: [
+        "-DDEBUG_CALLBACKS=1",
         "-DDEBUG_POLL_AND_WAKE=1",
         "-DDEBUG_REFS=1",
         "-DDEBUG_TOKENIZER=1",
diff --git a/libutils/Looper.cpp b/libutils/Looper.cpp
index 402e43c..576c61d 100644
--- a/libutils/Looper.cpp
+++ b/libutils/Looper.cpp
@@ -534,7 +534,7 @@
 
 int Looper::removeSequenceNumberLocked(SequenceNumber seq) {
 #if DEBUG_CALLBACKS
-    ALOGD("%p ~ removeFd - fd=%d, seq=%u", this, fd, seq);
+    ALOGD("%p ~ removeFd - seq=%" PRIu64, this, seq);
 #endif
 
     const auto& request_it = mRequests.find(seq);
diff --git a/rootdir/Android.mk b/rootdir/Android.mk
index cc6b64a..7deb173 100644
--- a/rootdir/Android.mk
+++ b/rootdir/Android.mk
@@ -72,6 +72,11 @@
   endif
 endif
 
+EXPORT_GLOBAL_SCUDO_ALLOCATION_RING_BUFFER_SIZE :=
+ifneq ($(PRODUCT_SCUDO_ALLOCATION_RING_BUFFER_SIZE),)
+  EXPORT_GLOBAL_SCUDO_ALLOCATION_RING_BUFFER_SIZE := export SCUDO_ALLOCATION_RING_BUFFER_SIZE $(PRODUCT_SCUDO_ALLOCATION_RING_BUFFER_SIZE)
+endif
+
 EXPORT_GLOBAL_GCOV_OPTIONS :=
 ifeq ($(NATIVE_COVERAGE),true)
   EXPORT_GLOBAL_GCOV_OPTIONS := export GCOV_PREFIX /data/misc/trace
@@ -216,6 +221,7 @@
 	$(hide) sed -i -e 's?%EXPORT_GLOBAL_GCOV_OPTIONS%?$(EXPORT_GLOBAL_GCOV_OPTIONS)?g' $@
 	$(hide) sed -i -e 's?%EXPORT_GLOBAL_CLANG_COVERAGE_OPTIONS%?$(EXPORT_GLOBAL_CLANG_COVERAGE_OPTIONS)?g' $@
 	$(hide) sed -i -e 's?%EXPORT_GLOBAL_HWASAN_OPTIONS%?$(EXPORT_GLOBAL_HWASAN_OPTIONS)?g' $@
+	$(hide) sed -i -e 's?%EXPORT_GLOBAL_SCUDO_ALLOCATION_RING_BUFFER_SIZE%?$(EXPORT_GLOBAL_SCUDO_ALLOCATION_RING_BUFFER_SIZE)?g' $@
 
 # Append PLATFORM_VNDK_VERSION to base name.
 define append_vndk_version
diff --git a/rootdir/init.environ.rc.in b/rootdir/init.environ.rc.in
index bf6e986..7ba1f46 100644
--- a/rootdir/init.environ.rc.in
+++ b/rootdir/init.environ.rc.in
@@ -14,3 +14,4 @@
     %EXPORT_GLOBAL_GCOV_OPTIONS%
     %EXPORT_GLOBAL_CLANG_COVERAGE_OPTIONS%
     %EXPORT_GLOBAL_HWASAN_OPTIONS%
+    %EXPORT_GLOBAL_SCUDO_ALLOCATION_RING_BUFFER_SIZE%
diff --git a/rootdir/ueventd.rc b/rootdir/ueventd.rc
index 60dcc2a..3927501 100644
--- a/rootdir/ueventd.rc
+++ b/rootdir/ueventd.rc
@@ -71,6 +71,7 @@
 /dev/mtp_usb              0660   root       mtp
 /dev/usb_accessory        0660   root       usb
 /dev/tun                  0660   system     vpn
+/dev/hidraw*              0660   system     system
 
 # CDMA radio interface MUX
 /dev/ppp                  0660   radio      vpn
diff --git a/trusty/OWNERS b/trusty/OWNERS
index bf16912..4016792 100644
--- a/trusty/OWNERS
+++ b/trusty/OWNERS
@@ -2,7 +2,6 @@
 arve@android.com
 danielangell@google.com
 gmar@google.com
-marcone@google.com
 mikemcternan@google.com
 mmaurer@google.com
 ncbray@google.com