Merge "ashmem: Remove hardcoded PAGE_SIZE usage in ashmem_test" into main
diff --git a/bootstat/bootstat.cpp b/bootstat/bootstat.cpp
index 2d55e5a..d402bf1 100644
--- a/bootstat/bootstat.cpp
+++ b/bootstat/bootstat.cpp
@@ -830,7 +830,7 @@
   return subReason;
 }
 
-bool addKernelPanicSubReason(const pstoreConsole& console, std::string& ret) {
+void addKernelPanicSubReason(const pstoreConsole& console, std::string& ret) {
   // Check for kernel panic types to refine information
   if ((console.rfind("SysRq : Trigger a crash") != std::string::npos) ||
       (console.rfind("PC is at sysrq_handle_crash+") != std::string::npos)) {
@@ -842,63 +842,61 @@
     if (pos != std::string::npos) {
       ret += "," + getSubreason(console, pos + strlen(sysrqSubreason), /* quoted */ true);
     }
-    return true;
+    return;
   }
   if (console.rfind("Unable to handle kernel NULL pointer dereference at virtual address") !=
       std::string::npos) {
     ret = "kernel_panic,null";
-    return true;
+    return;
   }
   if (console.rfind("Kernel BUG at ") != std::string::npos) {
     ret = "kernel_panic,bug";
-    return true;
+    return;
   }
 
   std::string panic("Kernel panic - not syncing: ");
   auto pos = console.rfind(panic);
-  if (pos != std::string::npos) {
-    static const std::vector<std::pair<const std::string, const std::string>> panicReasons = {
-        {"Out of memory", "oom"},
-        {"out of memory", "oom"},
-        {"Oh boy, that early out of memory", "oom"},  // omg
-        {"BUG!", "bug"},
-        {"hung_task: blocked tasks", "hung"},
-        {"audit: ", "audit"},
-        {"scheduling while atomic", "atomic"},
-        {"Attempted to kill init!", "init"},
-        {"Requested init", "init"},
-        {"No working init", "init"},
-        {"Could not decompress init", "init"},
-        {"RCU Stall", "hung,rcu"},
-        {"stack-protector", "stack"},
-        {"kernel stack overflow", "stack"},
-        {"Corrupt kernel stack", "stack"},
-        {"low stack detected", "stack"},
-        {"corrupted stack end", "stack"},
-        {"subsys-restart: Resetting the SoC - modem crashed.", "modem"},
-        {"subsys-restart: Resetting the SoC - adsp crashed.", "adsp"},
-        {"subsys-restart: Resetting the SoC - dsps crashed.", "dsps"},
-        {"subsys-restart: Resetting the SoC - wcnss crashed.", "wcnss"},
-    };
+  if (pos == std::string::npos) return;
 
-    ret = "kernel_panic";
-    for (auto& s : panicReasons) {
-      if (console.find(panic + s.first, pos) != std::string::npos) {
-        ret += "," + s.second;
-        return true;
-      }
+  static const std::vector<std::pair<const std::string, const std::string>> panicReasons = {
+      {"Out of memory", "oom"},
+      {"out of memory", "oom"},
+      {"Oh boy, that early out of memory", "oom"},  // omg
+      {"BUG!", "bug"},
+      {"hung_task: blocked tasks", "hung"},
+      {"audit: ", "audit"},
+      {"scheduling while atomic", "atomic"},
+      {"Attempted to kill init!", "init"},
+      {"Requested init", "init"},
+      {"No working init", "init"},
+      {"Could not decompress init", "init"},
+      {"RCU Stall", "hung,rcu"},
+      {"stack-protector", "stack"},
+      {"kernel stack overflow", "stack"},
+      {"Corrupt kernel stack", "stack"},
+      {"low stack detected", "stack"},
+      {"corrupted stack end", "stack"},
+      {"subsys-restart: Resetting the SoC - modem crashed.", "modem"},
+      {"subsys-restart: Resetting the SoC - adsp crashed.", "adsp"},
+      {"subsys-restart: Resetting the SoC - dsps crashed.", "dsps"},
+      {"subsys-restart: Resetting the SoC - wcnss crashed.", "wcnss"},
+  };
+
+  ret = "kernel_panic";
+  for (auto& s : panicReasons) {
+    if (console.find(panic + s.first, pos) != std::string::npos) {
+      ret += "," + s.second;
+      return;
     }
-    auto reason = getSubreason(console, pos + panic.length(), /* newline */ false);
-    if (reason.length() > 3) {
-      ret += "," + reason;
-    }
-    return true;
   }
-  return false;
+  auto reason = getSubreason(console, pos + panic.length(), /* newline */ false);
+  if (reason.length() > 3) {
+    ret += "," + reason;
+  }
 }
 
-bool addKernelPanicSubReason(const std::string& content, std::string& ret) {
-  return addKernelPanicSubReason(pstoreConsole(content), ret);
+void addKernelPanicSubReason(const std::string& content, std::string& ret) {
+  addKernelPanicSubReason(pstoreConsole(content), ret);
 }
 
 const char system_reboot_reason_property[] = "sys.boot.reason";
@@ -1079,12 +1077,7 @@
       }
 
       // Check for kernel panics, allowed to override reboot command.
-      if (!addKernelPanicSubReason(console, ret) &&
-          // check for long-press power down
-          ((console.rfind("Power held for ") != std::string::npos) ||
-           (console.rfind("charger: [") != std::string::npos))) {
-        ret = "cold";
-      }
+      (void)addKernelPanicSubReason(console, ret);
     }
 
     // TODO: use the HAL to get battery level (http://b/77725702).
diff --git a/debuggerd/client/debuggerd_client.cpp b/debuggerd/client/debuggerd_client.cpp
index bd1e91d..af1bb81 100644
--- a/debuggerd/client/debuggerd_client.cpp
+++ b/debuggerd/client/debuggerd_client.cpp
@@ -116,7 +116,6 @@
 
 bool debuggerd_trigger_dump(pid_t tid, DebuggerdDumpType dump_type, unsigned int timeout_ms,
                             unique_fd output_fd) {
-  pid_t pid = tid;
   if (dump_type == kDebuggerdJavaBacktrace) {
     // Java dumps always get sent to the tgid, so we need to resolve our tid to a tgid.
     android::procinfo::ProcessInfo procinfo;
@@ -125,10 +124,10 @@
       log_error(output_fd, 0, "failed to get process info: %s", error.c_str());
       return false;
     }
-    pid = procinfo.pid;
+    tid = procinfo.pid;
   }
 
-  LOG(INFO) << TAG "started dumping process " << pid;
+  LOG(INFO) << TAG "started dumping process " << tid;
 
   // Rather than try to deal with poll() all the way through the flow, we update
   // the socket timeout between each step (and only use poll() during the final
@@ -172,7 +171,7 @@
 
   InterceptRequest req = {
       .dump_type = dump_type,
-      .pid = pid,
+      .pid = tid,
   };
 
   // Create an intermediate pipe to pass to the other end.
@@ -235,8 +234,8 @@
   // Send the signal.
   const int signal = (dump_type == kDebuggerdJavaBacktrace) ? SIGQUIT : BIONIC_SIGNAL_DEBUGGER;
   sigval val = {.sival_int = (dump_type == kDebuggerdNativeBacktrace) ? 1 : 0};
-  if (sigqueue(pid, signal, val) != 0) {
-    log_error(output_fd, errno, "failed to send signal to pid %d", pid);
+  if (sigqueue(tid, signal, val) != 0) {
+    log_error(output_fd, errno, "failed to send signal to pid %d", tid);
     return false;
   }
 
@@ -299,7 +298,7 @@
     }
   }
 
-  LOG(INFO) << TAG "done dumping process " << pid;
+  LOG(INFO) << TAG "done dumping process " << tid;
 
   return true;
 }
diff --git a/debuggerd/include/debuggerd/client.h b/debuggerd/include/debuggerd/client.h
index b7284b0..e7401cc 100644
--- a/debuggerd/include/debuggerd/client.h
+++ b/debuggerd/include/debuggerd/client.h
@@ -26,7 +26,7 @@
 
 // Trigger a dump of specified process to output_fd.
 // output_fd is consumed, timeout of 0 will wait forever.
-bool debuggerd_trigger_dump(pid_t pid, enum DebuggerdDumpType dump_type, unsigned int timeout_ms,
+bool debuggerd_trigger_dump(pid_t tid, enum DebuggerdDumpType dump_type, unsigned int timeout_ms,
                             android::base::unique_fd output_fd);
 
 int dump_backtrace_to_file(pid_t tid, enum DebuggerdDumpType dump_type, int output_fd);
diff --git a/fastboot/usb_linux.cpp b/fastboot/usb_linux.cpp
index 37bb304..72e326a 100644
--- a/fastboot/usb_linux.cpp
+++ b/fastboot/usb_linux.cpp
@@ -269,6 +269,9 @@
         auto path = android::base::StringPrintf("/sys/bus/usb/devices/%s/%s:1.%d/interface",
                                                 sysfs_name, sysfs_name, ifc->bInterfaceNumber);
         if (android::base::ReadFileToString(path, &interface)) {
+            if (!interface.empty() && interface.back() == '\n') {
+                interface.pop_back();
+            }
             snprintf(info.interface, sizeof(info.interface), "%s", interface.c_str());
         }
 
@@ -404,34 +407,84 @@
 {
     unsigned char *data = (unsigned char*) _data;
     unsigned count = 0;
-    struct usbdevfs_bulktransfer bulk;
-    int n;
+    struct usbdevfs_urb urb[2] = {};
+    bool pending[2] = {};
 
     if (handle_->ep_out == 0 || handle_->desc == -1) {
         return -1;
     }
 
-    do {
-        int xfer;
-        xfer = (len > MAX_USBFS_BULK_SIZE) ? MAX_USBFS_BULK_SIZE : len;
+    auto submit_urb = [&](size_t i) {
+        int xfer = (len > MAX_USBFS_BULK_SIZE) ? MAX_USBFS_BULK_SIZE : len;
 
-        bulk.ep = handle_->ep_out;
-        bulk.len = xfer;
-        bulk.data = data;
-        bulk.timeout = ms_timeout_;
+        urb[i].type = USBDEVFS_URB_TYPE_BULK;
+        urb[i].endpoint = handle_->ep_out;
+        urb[i].buffer_length = xfer;
+        urb[i].buffer = data;
+        urb[i].usercontext = (void *)i;
 
-        n = ioctl(handle_->desc, USBDEVFS_BULK, &bulk);
-        if(n != xfer) {
-            DBG("ERROR: n = %d, errno = %d (%s)\n",
-                n, errno, strerror(errno));
-            return -1;
+        int n = ioctl(handle_->desc, USBDEVFS_SUBMITURB, &urb[i]);
+        if (n != 0) {
+            DBG("ioctl(USBDEVFS_SUBMITURB) failed\n");
+            return false;
         }
 
+        pending[i] = true;
         count += xfer;
         len -= xfer;
         data += xfer;
-    } while(len > 0);
 
+        return true;
+    };
+
+    auto reap_urb = [&](size_t i) {
+        while (pending[i]) {
+            struct usbdevfs_urb *urbp;
+            int res = ioctl(handle_->desc, USBDEVFS_REAPURB, &urbp);
+            if (res != 0) {
+                DBG("ioctl(USBDEVFS_REAPURB) failed\n");
+                return false;
+            }
+            size_t done = (size_t)urbp->usercontext;
+            if (done >= 2 || !pending[done]) {
+                DBG("unexpected urb\n");
+                return false;
+            }
+            if (urbp->status != 0 || urbp->actual_length != urbp->buffer_length) {
+                DBG("urb returned error\n");
+                return false;
+            }
+            pending[done] = false;
+        }
+        return true;
+    };
+
+    if (!submit_urb(0)) {
+        return -1;
+    }
+    while (len > 0) {
+        if (!submit_urb(1)) {
+            return -1;
+        }
+        if (!reap_urb(0)) {
+            return -1;
+        }
+        if (len <= 0) {
+            if (!reap_urb(1)) {
+                return -1;
+            }
+            return count;
+        }
+        if (!submit_urb(0)) {
+            return -1;
+        }
+        if (!reap_urb(1)) {
+            return -1;
+        }
+    }
+    if (!reap_urb(0)) {
+        return -1;
+    }
     return count;
 }
 
diff --git a/fastboot/util.cpp b/fastboot/util.cpp
index e03012a..5966aea 100644
--- a/fastboot/util.cpp
+++ b/fastboot/util.cpp
@@ -31,7 +31,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <sys/stat.h>
-#include <sys/time.h>
+#include <time.h>
 
 #include <android-base/parseint.h>
 #include <android-base/strings.h>
@@ -43,9 +43,9 @@
 static bool g_verbose = false;
 
 double now() {
-    struct timeval tv;
-    gettimeofday(&tv, NULL);
-    return (double)tv.tv_sec + (double)tv.tv_usec / 1000000;
+    struct timespec ts;
+    clock_gettime(CLOCK_MONOTONIC, &ts);
+    return (double)ts.tv_sec + (double)ts.tv_nsec / 1000000000;
 }
 
 void die(const char* fmt, ...) {
diff --git a/fs_mgr/fs_mgr_remount.cpp b/fs_mgr/fs_mgr_remount.cpp
index 733ba2f..79c0b6d 100644
--- a/fs_mgr/fs_mgr_remount.cpp
+++ b/fs_mgr/fs_mgr_remount.cpp
@@ -42,6 +42,7 @@
 #include <fstab/fstab.h>
 #include <libavb_user/libavb_user.h>
 #include <libgsi/libgsid.h>
+#include <private/android_filesystem_config.h>
 
 #include "fs_mgr_overlayfs_control.h"
 #include "fs_mgr_overlayfs_mount.h"
@@ -608,7 +609,19 @@
     }
 
     // Make sure we are root.
-    if (::getuid() != 0) {
+    if (const auto uid = ::getuid(); uid != AID_ROOT) {
+        // If requesting auto reboot, also try to auto gain root.
+        if (auto_reboot && uid == AID_SHELL && access("/system/xbin/su", F_OK) == 0) {
+            std::vector<char*> args{const_cast<char*>("/system/xbin/su"),
+                                    const_cast<char*>("root")};
+            for (int i = 0; i < argc; ++i) {
+                args.push_back(argv[i]);
+            }
+            args.push_back(nullptr);
+            LOG(INFO) << "Attempting to gain root with \"su root\"";
+            execv(args[0], args.data());
+            PLOG(ERROR) << "Failed to execute \"su root\"";
+        }
         LOG(ERROR) << "Not running as root. Try \"adb root\" first.";
         return EXIT_FAILURE;
     }
diff --git a/fs_mgr/libsnapshot/Android.bp b/fs_mgr/libsnapshot/Android.bp
index a8a7716..5ceaf28 100644
--- a/fs_mgr/libsnapshot/Android.bp
+++ b/fs_mgr/libsnapshot/Android.bp
@@ -108,7 +108,7 @@
     ],
     srcs: [":libsnapshot_sources"],
     static_libs: [
-        "libfs_mgr_binder"
+        "libfs_mgr_binder",
     ],
 }
 
@@ -128,12 +128,12 @@
     static_libs: [
         "libc++fs",
         "libsnapshot_cow",
-    ]
+    ],
 }
 
 cc_library_static {
     name: "libsnapshot_init",
-    native_coverage : true,
+    native_coverage: true,
     defaults: ["libsnapshot_defaults"],
     srcs: [":libsnapshot_sources"],
     ramdisk_available: true,
@@ -204,6 +204,10 @@
         "libsnapshot_cow/writer_v2.cpp",
         "libsnapshot_cow/writer_v3.cpp",
     ],
+
+    header_libs: [
+        "libstorage_literals_headers",
+    ],
     export_include_dirs: ["include"],
     host_supported: true,
     recovery_available: true,
@@ -243,7 +247,10 @@
 
 cc_defaults {
     name: "libsnapshot_test_defaults",
-    defaults: ["libsnapshot_defaults", "libsnapshot_cow_defaults"],
+    defaults: [
+        "libsnapshot_defaults",
+        "libsnapshot_cow_defaults",
+    ],
     srcs: [
         "partition_cow_creator_test.cpp",
         "snapshot_metadata_updater_test.cpp",
@@ -283,10 +290,13 @@
 
 cc_test {
     name: "vts_libsnapshot_test",
-    defaults: ["libsnapshot_test_defaults", "libsnapshot_hal_deps"],
+    defaults: [
+        "libsnapshot_test_defaults",
+        "libsnapshot_hal_deps",
+    ],
     test_suites: [
         "vts",
-        "device-tests"
+        "device-tests",
     ],
     test_options: {
         min_shipping_api_level: 30,
@@ -295,12 +305,15 @@
 
 cc_test {
     name: "vab_legacy_tests",
-    defaults: ["libsnapshot_test_defaults", "libsnapshot_hal_deps"],
+    defaults: [
+        "libsnapshot_test_defaults",
+        "libsnapshot_hal_deps",
+    ],
     cppflags: [
         "-DLIBSNAPSHOT_TEST_VAB_LEGACY",
     ],
     test_suites: [
-        "device-tests"
+        "device-tests",
     ],
     test_options: {
         // Legacy VAB launched in Android R.
@@ -310,12 +323,15 @@
 
 cc_test {
     name: "vabc_legacy_tests",
-    defaults: ["libsnapshot_test_defaults", "libsnapshot_hal_deps"],
+    defaults: [
+        "libsnapshot_test_defaults",
+        "libsnapshot_hal_deps",
+    ],
     cppflags: [
         "-DLIBSNAPSHOT_TEST_VABC_LEGACY",
     ],
     test_suites: [
-        "device-tests"
+        "device-tests",
     ],
     test_options: {
         // Legacy VABC launched in Android S.
@@ -343,7 +359,10 @@
 
 cc_binary {
     name: "snapshotctl",
-    defaults: ["libsnapshot_cow_defaults", "libsnapshot_hal_deps"],
+    defaults: [
+        "libsnapshot_cow_defaults",
+        "libsnapshot_hal_deps",
+    ],
     srcs: [
         "snapshotctl.cpp",
     ],
@@ -412,8 +431,11 @@
         "libgtest",
         "libsnapshot_cow",
     ],
+    header_libs: [
+        "libstorage_literals_headers",
+    ],
     test_suites: [
-        "device-tests"
+        "device-tests",
     ],
     test_options: {
         min_shipping_api_level: 30,
diff --git a/fs_mgr/libsnapshot/android/snapshot/snapshot.proto b/fs_mgr/libsnapshot/android/snapshot/snapshot.proto
index 7e97dc0..f2f7fc1 100644
--- a/fs_mgr/libsnapshot/android/snapshot/snapshot.proto
+++ b/fs_mgr/libsnapshot/android/snapshot/snapshot.proto
@@ -103,7 +103,7 @@
     // The old partition size (if none existed, this will be zero).
     uint64 old_partition_size = 10;
 
-    // Compression algorithm (none, gz, or brotli).
+    // Compression algorithm (none, gz, lz4, zstd, or brotli).
     string compression_algorithm = 11;
 
     // Estimated COW size from OTA manifest.
@@ -117,6 +117,9 @@
 
     // Size of v3 operation buffer. Needs to be determined during writer initialization
     uint64 estimated_ops_buffer_size = 15;
+
+    // Max bytes to be compressed at once (4k, 8k, 16k, 32k, 64k, 128k)
+    uint64 compression_factor = 16;
 }
 
 // Next: 8
diff --git a/fs_mgr/libsnapshot/include/libsnapshot/cow_compress.h b/fs_mgr/libsnapshot/include/libsnapshot/cow_compress.h
index 8191d61..ac04245 100644
--- a/fs_mgr/libsnapshot/include/libsnapshot/cow_compress.h
+++ b/fs_mgr/libsnapshot/include/libsnapshot/cow_compress.h
@@ -17,6 +17,7 @@
 #pragma once
 
 #include <memory>
+#include <vector>
 #include "libsnapshot/cow_format.h"
 
 namespace android {
@@ -40,8 +41,7 @@
 
     uint32_t GetCompressionLevel() const { return compression_level_; }
     uint32_t GetBlockSize() const { return block_size_; }
-    [[nodiscard]] virtual std::basic_string<uint8_t> Compress(const void* data,
-                                                              size_t length) const = 0;
+    [[nodiscard]] virtual std::vector<uint8_t> Compress(const void* data, size_t length) const = 0;
 
   private:
     uint32_t compression_level_;
diff --git a/fs_mgr/libsnapshot/include/libsnapshot/cow_format.h b/fs_mgr/libsnapshot/include/libsnapshot/cow_format.h
index 9401c66..6865b19 100644
--- a/fs_mgr/libsnapshot/include/libsnapshot/cow_format.h
+++ b/fs_mgr/libsnapshot/include/libsnapshot/cow_format.h
@@ -101,7 +101,7 @@
     // monotonically increasing value used by update_engine
     uint64_t label;
     // Index of last CowOperation guaranteed to be resumable
-    uint32_t op_index;
+    uint64_t op_index;
 } __attribute__((packed));
 
 static constexpr uint8_t kNumResumePoints = 4;
@@ -115,10 +115,12 @@
     uint32_t resume_point_max;
     // Number of CowOperationV3 structs in the operation buffer, currently and total
     // region size.
-    uint32_t op_count;
-    uint32_t op_count_max;
+    uint64_t op_count;
+    uint64_t op_count_max;
     // Compression Algorithm
     uint32_t compression_algorithm;
+    // Max compression size supported
+    uint32_t max_compression_size;
 } __attribute__((packed));
 
 enum class CowOperationType : uint8_t {
@@ -199,18 +201,24 @@
 static constexpr uint64_t kCowOpSourceInfoTypeBit = 60;
 static constexpr uint64_t kCowOpSourceInfoTypeNumBits = 4;
 static constexpr uint64_t kCowOpSourceInfoTypeMask = (1ULL << kCowOpSourceInfoTypeNumBits) - 1;
+
+static constexpr uint64_t kCowOpSourceInfoCompressionBit = 57;
+static constexpr uint64_t kCowOpSourceInfoCompressionNumBits = 3;
+static constexpr uint64_t kCowOpSourceInfoCompressionMask =
+        ((1ULL << kCowOpSourceInfoCompressionNumBits) - 1);
+
 // The on disk format of cow (currently ==  CowOperation)
 struct CowOperationV3 {
     // If this operation reads from the data section of the COW, this contains
     // the length.
-    uint16_t data_length;
+    uint32_t data_length;
 
     // The block of data in the new image that this operation modifies.
     uint32_t new_block;
 
     // source_info with have the following layout
-    // |---4 bits ---| ---12 bits---| --- 48 bits ---|
-    // |--- type --- | -- unused -- | --- source --- |
+    // |--- 4 bits -- | --------- 3 bits ------ | --- 9 bits --- | --- 48 bits ---|
+    // |--- type ---  | -- compression factor --| --- unused --- | --- source --- |
     //
     // The value of |source| depends on the operation code.
     //
@@ -223,6 +231,17 @@
     // For ops other than Label:
     //  Bits 47-62 are reserved and must be zero.
     // A block is compressed if it’s data is < block_sz
+    //
+    // Bits [57-59] represents the compression factor.
+    //
+    //       Compression - factor
+    // ==========================
+    // 000 -  4k
+    // 001 -  8k
+    // 010 -  16k
+    // ...
+    // 110 -  256k
+    //
     uint64_t source_info_;
     constexpr uint64_t source() const { return source_info_ & kCowOpSourceInfoDataMask; }
     constexpr void set_source(uint64_t source) {
@@ -243,6 +262,20 @@
         source_info_ |= (static_cast<uint64_t>(type) & kCowOpSourceInfoTypeMask)
                         << kCowOpSourceInfoTypeBit;
     }
+    constexpr void set_compression_bits(uint8_t compression_factor) {
+        // Clear the 3 bits from bit 57 - [57-59]
+        source_info_ &= ~(kCowOpSourceInfoCompressionMask << kCowOpSourceInfoCompressionBit);
+        // Set the actual compression factor
+        source_info_ |=
+                (static_cast<uint64_t>(compression_factor) & kCowOpSourceInfoCompressionMask)
+                << kCowOpSourceInfoCompressionBit;
+    }
+    constexpr uint8_t compression_bits() const {
+        // Grab the 3 bits from [57-59]
+        const auto compression_factor =
+                (source_info_ >> kCowOpSourceInfoCompressionBit) & kCowOpSourceInfoCompressionMask;
+        return static_cast<uint8_t>(compression_factor);
+    }
 } __attribute__((packed));
 
 // Ensure that getters/setters added to CowOperationV3 does not increases size
@@ -324,5 +357,11 @@
 // Convert compression name to internal value.
 std::optional<CowCompressionAlgorithm> CompressionAlgorithmFromString(std::string_view name);
 
+// Return block size used for compression
+size_t CowOpCompressionSize(const CowOperation* op, size_t block_size);
+
+// Return the relative offset of the I/O block which the CowOperation
+// multi-block compression
+bool GetBlockOffset(const CowOperation* op, uint64_t io_block, size_t block_size, off_t* offset);
 }  // namespace snapshot
 }  // namespace android
diff --git a/fs_mgr/libsnapshot/include/libsnapshot/cow_reader.h b/fs_mgr/libsnapshot/include/libsnapshot/cow_reader.h
index bf4c79f..3f49c69 100644
--- a/fs_mgr/libsnapshot/include/libsnapshot/cow_reader.h
+++ b/fs_mgr/libsnapshot/include/libsnapshot/cow_reader.h
@@ -162,6 +162,9 @@
     // Creates a clone of the current CowReader without the file handlers
     std::unique_ptr<CowReader> CloneCowReader();
 
+    // Get the max compression size
+    uint32_t GetMaxCompressionSize();
+
     void UpdateMergeOpsCompleted(int num_merge_ops) { header_.num_merge_ops += num_merge_ops; }
 
   private:
diff --git a/fs_mgr/libsnapshot/include/libsnapshot/cow_writer.h b/fs_mgr/libsnapshot/include/libsnapshot/cow_writer.h
index 7df976d..651083f 100644
--- a/fs_mgr/libsnapshot/include/libsnapshot/cow_writer.h
+++ b/fs_mgr/libsnapshot/include/libsnapshot/cow_writer.h
@@ -53,13 +53,16 @@
     uint64_t num_merge_ops = 0;
 
     // Number of threads for compression
-    int num_compress_threads = 0;
+    uint16_t num_compress_threads = 0;
 
     // Batch write cluster ops
     bool batch_write = false;
 
     // Size of the cow operation buffer; used in v3 only.
     uint64_t op_count_max = 0;
+
+    // Compression factor
+    uint64_t compression_factor = 4096;
 };
 
 // Interface for writing to a snapuserd COW. All operations are ordered; merges
@@ -116,36 +119,38 @@
 
 class CompressWorker {
   public:
-    CompressWorker(std::unique_ptr<ICompressor>&& compressor, uint32_t block_size);
+    CompressWorker(std::unique_ptr<ICompressor>&& compressor);
     bool RunThread();
-    void EnqueueCompressBlocks(const void* buffer, size_t num_blocks);
-    bool GetCompressedBuffers(std::vector<std::basic_string<uint8_t>>* compressed_buf);
+    void EnqueueCompressBlocks(const void* buffer, size_t block_size, size_t num_blocks);
+    bool GetCompressedBuffers(std::vector<std::vector<uint8_t>>* compressed_buf);
     void Finalize();
     static uint32_t GetDefaultCompressionLevel(CowCompressionAlgorithm compression);
 
     static bool CompressBlocks(ICompressor* compressor, size_t block_size, const void* buffer,
                                size_t num_blocks,
-                               std::vector<std::basic_string<uint8_t>>* compressed_data);
+                               std::vector<std::vector<uint8_t>>* compressed_data);
 
   private:
     struct CompressWork {
         const void* buffer;
         size_t num_blocks;
+        size_t block_size;
         bool compression_status = false;
-        std::vector<std::basic_string<uint8_t>> compressed_data;
+        std::vector<std::vector<uint8_t>> compressed_data;
     };
 
     std::unique_ptr<ICompressor> compressor_;
-    uint32_t block_size_;
 
     std::queue<CompressWork> work_queue_;
     std::queue<CompressWork> compressed_queue_;
     std::mutex lock_;
     std::condition_variable cv_;
     bool stopped_ = false;
+    size_t total_submitted_ = 0;
+    size_t total_processed_ = 0;
 
-    bool CompressBlocks(const void* buffer, size_t num_blocks,
-                        std::vector<std::basic_string<uint8_t>>* compressed_data);
+    bool CompressBlocks(const void* buffer, size_t num_blocks, size_t block_size,
+                        std::vector<std::vector<uint8_t>>* compressed_data);
 };
 
 // Create an ICowWriter not backed by any file. This is useful for estimating
diff --git a/fs_mgr/libsnapshot/libsnapshot_cow/cow_compress.cpp b/fs_mgr/libsnapshot/libsnapshot_cow/cow_compress.cpp
index abc7d33..0205f50 100644
--- a/fs_mgr/libsnapshot/libsnapshot_cow/cow_compress.cpp
+++ b/fs_mgr/libsnapshot/libsnapshot_cow/cow_compress.cpp
@@ -20,6 +20,7 @@
 #include <limits>
 #include <memory>
 #include <queue>
+#include <vector>
 
 #include <android-base/file.h>
 #include <android-base/logging.h>
@@ -103,9 +104,9 @@
     GzCompressor(uint32_t compression_level, const uint32_t block_size)
         : ICompressor(compression_level, block_size){};
 
-    std::basic_string<uint8_t> Compress(const void* data, size_t length) const override {
+    std::vector<uint8_t> Compress(const void* data, size_t length) const override {
         const auto bound = compressBound(length);
-        std::basic_string<uint8_t> buffer(bound, '\0');
+        std::vector<uint8_t> buffer(bound, '\0');
 
         uLongf dest_len = bound;
         auto rv = compress2(buffer.data(), &dest_len, reinterpret_cast<const Bytef*>(data), length,
@@ -124,13 +125,13 @@
     Lz4Compressor(uint32_t compression_level, const uint32_t block_size)
         : ICompressor(compression_level, block_size){};
 
-    std::basic_string<uint8_t> Compress(const void* data, size_t length) const override {
+    std::vector<uint8_t> Compress(const void* data, size_t length) const override {
         const auto bound = LZ4_compressBound(length);
         if (!bound) {
             LOG(ERROR) << "LZ4_compressBound returned 0";
             return {};
         }
-        std::basic_string<uint8_t> buffer(bound, '\0');
+        std::vector<uint8_t> buffer(bound, '\0');
 
         const auto compressed_size =
                 LZ4_compress_default(static_cast<const char*>(data),
@@ -156,13 +157,13 @@
     BrotliCompressor(uint32_t compression_level, const uint32_t block_size)
         : ICompressor(compression_level, block_size){};
 
-    std::basic_string<uint8_t> Compress(const void* data, size_t length) const override {
+    std::vector<uint8_t> Compress(const void* data, size_t length) const override {
         const auto bound = BrotliEncoderMaxCompressedSize(length);
         if (!bound) {
             LOG(ERROR) << "BrotliEncoderMaxCompressedSize returned 0";
             return {};
         }
-        std::basic_string<uint8_t> buffer(bound, '\0');
+        std::vector<uint8_t> buffer(bound, '\0');
 
         size_t encoded_size = bound;
         auto rv = BrotliEncoderCompress(
@@ -186,8 +187,8 @@
         ZSTD_CCtx_setParameter(zstd_context_.get(), ZSTD_c_windowLog, log2(GetBlockSize()));
     };
 
-    std::basic_string<uint8_t> Compress(const void* data, size_t length) const override {
-        std::basic_string<uint8_t> buffer(ZSTD_compressBound(length), '\0');
+    std::vector<uint8_t> Compress(const void* data, size_t length) const override {
+        std::vector<uint8_t> buffer(ZSTD_compressBound(length), '\0');
         const auto compressed_size =
                 ZSTD_compress2(zstd_context_.get(), buffer.data(), buffer.size(), data, length);
         if (compressed_size <= 0) {
@@ -208,14 +209,14 @@
     std::unique_ptr<ZSTD_CCtx, decltype(&ZSTD_freeCCtx)> zstd_context_;
 };
 
-bool CompressWorker::CompressBlocks(const void* buffer, size_t num_blocks,
-                                    std::vector<std::basic_string<uint8_t>>* compressed_data) {
-    return CompressBlocks(compressor_.get(), block_size_, buffer, num_blocks, compressed_data);
+bool CompressWorker::CompressBlocks(const void* buffer, size_t num_blocks, size_t block_size,
+                                    std::vector<std::vector<uint8_t>>* compressed_data) {
+    return CompressBlocks(compressor_.get(), block_size, buffer, num_blocks, compressed_data);
 }
 
 bool CompressWorker::CompressBlocks(ICompressor* compressor, size_t block_size, const void* buffer,
                                     size_t num_blocks,
-                                    std::vector<std::basic_string<uint8_t>>* compressed_data) {
+                                    std::vector<std::vector<uint8_t>>* compressed_data) {
     const uint8_t* iter = reinterpret_cast<const uint8_t*>(buffer);
     while (num_blocks) {
         auto data = compressor->Compress(iter, block_size);
@@ -223,7 +224,7 @@
             PLOG(ERROR) << "CompressBlocks: Compression failed";
             return false;
         }
-        if (data.size() > std::numeric_limits<uint16_t>::max()) {
+        if (data.size() > std::numeric_limits<uint32_t>::max()) {
             LOG(ERROR) << "Compressed block is too large: " << data.size();
             return false;
         }
@@ -254,7 +255,8 @@
         }
 
         // Compress blocks
-        bool ret = CompressBlocks(blocks.buffer, blocks.num_blocks, &blocks.compressed_data);
+        bool ret = CompressBlocks(blocks.buffer, blocks.num_blocks, blocks.block_size,
+                                  &blocks.compressed_data);
         blocks.compression_status = ret;
         {
             std::lock_guard<std::mutex> lock(lock_);
@@ -273,35 +275,31 @@
     return true;
 }
 
-void CompressWorker::EnqueueCompressBlocks(const void* buffer, size_t num_blocks) {
+void CompressWorker::EnqueueCompressBlocks(const void* buffer, size_t block_size,
+                                           size_t num_blocks) {
     {
         std::lock_guard<std::mutex> lock(lock_);
 
         CompressWork blocks = {};
         blocks.buffer = buffer;
+        blocks.block_size = block_size;
         blocks.num_blocks = num_blocks;
         work_queue_.push(std::move(blocks));
+        total_submitted_ += 1;
     }
     cv_.notify_all();
 }
 
-bool CompressWorker::GetCompressedBuffers(std::vector<std::basic_string<uint8_t>>* compressed_buf) {
-    {
+bool CompressWorker::GetCompressedBuffers(std::vector<std::vector<uint8_t>>* compressed_buf) {
+    while (true) {
         std::unique_lock<std::mutex> lock(lock_);
-        while (compressed_queue_.empty() && !stopped_) {
+        while ((total_submitted_ != total_processed_) && compressed_queue_.empty() && !stopped_) {
             cv_.wait(lock);
         }
-
-        if (stopped_) {
-            return true;
-        }
-    }
-
-    {
-        std::lock_guard<std::mutex> lock(lock_);
         while (compressed_queue_.size() > 0) {
             CompressWork blocks = std::move(compressed_queue_.front());
             compressed_queue_.pop();
+            total_processed_ += 1;
 
             if (blocks.compression_status) {
                 compressed_buf->insert(compressed_buf->end(),
@@ -312,9 +310,12 @@
                 return false;
             }
         }
+        if ((total_submitted_ == total_processed_) || stopped_) {
+            total_submitted_ = 0;
+            total_processed_ = 0;
+            return true;
+        }
     }
-
-    return true;
 }
 
 std::unique_ptr<ICompressor> ICompressor::Brotli(uint32_t compression_level,
@@ -344,8 +345,8 @@
     cv_.notify_all();
 }
 
-CompressWorker::CompressWorker(std::unique_ptr<ICompressor>&& compressor, uint32_t block_size)
-    : compressor_(std::move(compressor)), block_size_(block_size) {}
+CompressWorker::CompressWorker(std::unique_ptr<ICompressor>&& compressor)
+    : compressor_(std::move(compressor)) {}
 
 }  // namespace snapshot
 }  // namespace android
diff --git a/fs_mgr/libsnapshot/libsnapshot_cow/cow_format.cpp b/fs_mgr/libsnapshot/libsnapshot_cow/cow_format.cpp
index 8d1786c..19014c0 100644
--- a/fs_mgr/libsnapshot/libsnapshot_cow/cow_format.cpp
+++ b/fs_mgr/libsnapshot/libsnapshot_cow/cow_format.cpp
@@ -21,6 +21,7 @@
 #include <android-base/logging.h>
 #include <android-base/stringprintf.h>
 #include <libsnapshot/cow_format.h>
+#include <storage_literals/storage_literals.h>
 #include "writer_v2.h"
 #include "writer_v3.h"
 
@@ -28,6 +29,7 @@
 namespace snapshot {
 
 using android::base::unique_fd;
+using namespace android::storage_literals;
 
 std::ostream& EmitCowTypeString(std::ostream& os, CowOperationType cow_type) {
     switch (cow_type) {
@@ -174,5 +176,36 @@
     return CreateCowWriter(version, options, unique_fd{-1}, std::nullopt);
 }
 
+size_t CowOpCompressionSize(const CowOperation* op, size_t block_size) {
+    uint8_t compression_bits = op->compression_bits();
+    return (block_size << compression_bits);
+}
+
+bool GetBlockOffset(const CowOperation* op, uint64_t io_block, size_t block_size, off_t* offset) {
+    const uint64_t new_block = op->new_block;
+
+    if (op->type() != kCowReplaceOp || io_block < new_block) {
+        LOG(VERBOSE) << "Invalid IO request for block: " << io_block
+                     << " CowOperation: new_block: " << new_block;
+        return false;
+    }
+
+    // Get the actual compression size
+    const size_t compression_size = CowOpCompressionSize(op, block_size);
+    // Find the number of blocks spanned
+    const size_t num_blocks = compression_size / block_size;
+    // Find the distance of the I/O block which this
+    // CowOperation encompasses
+    const size_t block_distance = io_block - new_block;
+    // Check if this block is within this range;
+    // if so, return the relative offset
+    if (block_distance < num_blocks) {
+        *offset = block_distance * block_size;
+        return true;
+    }
+
+    return false;
+}
+
 }  // namespace snapshot
 }  // namespace android
diff --git a/fs_mgr/libsnapshot/libsnapshot_cow/cow_reader.cpp b/fs_mgr/libsnapshot/libsnapshot_cow/cow_reader.cpp
index 1b4a971..6516499 100644
--- a/fs_mgr/libsnapshot/libsnapshot_cow/cow_reader.cpp
+++ b/fs_mgr/libsnapshot/libsnapshot_cow/cow_reader.cpp
@@ -26,6 +26,7 @@
 #include <android-base/logging.h>
 #include <libsnapshot/cow_format.h>
 #include <libsnapshot/cow_reader.h>
+#include <storage_literals/storage_literals.h>
 #include <zlib.h>
 
 #include "cow_decompress.h"
@@ -35,6 +36,8 @@
 namespace android {
 namespace snapshot {
 
+using namespace android::storage_literals;
+
 bool ReadCowHeader(android::base::borrowed_fd fd, CowHeaderV3* header) {
     if (lseek(fd.get(), 0, SEEK_SET) < 0) {
         PLOG(ERROR) << "lseek header failed";
@@ -161,6 +164,21 @@
     return PrepMergeOps();
 }
 
+uint32_t CowReader::GetMaxCompressionSize() {
+    switch (header_.prefix.major_version) {
+        case 1:
+        case 2:
+            // Old versions supports only 4KB compression.
+            return header_.block_size;
+            ;
+        case 3:
+            return header_.max_compression_size;
+        default:
+            LOG(ERROR) << "Unknown version: " << header_.prefix.major_version;
+            return 0;
+    }
+}
+
 //
 // This sets up the data needed for MergeOpIter. MergeOpIter presents
 // data in the order we intend to merge in.
@@ -705,6 +723,11 @@
 ssize_t CowReader::ReadData(const CowOperation* op, void* buffer, size_t buffer_size,
                             size_t ignore_bytes) {
     std::unique_ptr<IDecompressor> decompressor;
+    const size_t op_buf_size = CowOpCompressionSize(op, header_.block_size);
+    if (!op_buf_size) {
+        LOG(ERROR) << "Compression size is zero. op: " << *op;
+        return -1;
+    }
     switch (GetCompressionType()) {
         case kCowCompressNone:
             break;
@@ -715,12 +738,12 @@
             decompressor = IDecompressor::Brotli();
             break;
         case kCowCompressZstd:
-            if (header_.block_size != op->data_length) {
+            if (op_buf_size != op->data_length) {
                 decompressor = IDecompressor::Zstd();
             }
             break;
         case kCowCompressLz4:
-            if (header_.block_size != op->data_length) {
+            if (op_buf_size != op->data_length) {
                 decompressor = IDecompressor::Lz4();
             }
             break;
@@ -736,14 +759,14 @@
         offset = op->source();
     }
     if (!decompressor ||
-        ((op->data_length == header_.block_size) && (header_.prefix.major_version == 3))) {
+        ((op->data_length == op_buf_size) && (header_.prefix.major_version == 3))) {
         CowDataStream stream(this, offset + ignore_bytes, op->data_length - ignore_bytes);
         return stream.ReadFully(buffer, buffer_size);
     }
 
     CowDataStream stream(this, offset, op->data_length);
     decompressor->set_stream(&stream);
-    return decompressor->Decompress(buffer, buffer_size, header_.block_size, ignore_bytes);
+    return decompressor->Decompress(buffer, buffer_size, op_buf_size, ignore_bytes);
 }
 
 bool CowReader::GetSourceOffset(const CowOperation* op, uint64_t* source_offset) {
diff --git a/fs_mgr/libsnapshot/libsnapshot_cow/parser_v2.cpp b/fs_mgr/libsnapshot/libsnapshot_cow/parser_v2.cpp
index fe977b7..a35b614 100644
--- a/fs_mgr/libsnapshot/libsnapshot_cow/parser_v2.cpp
+++ b/fs_mgr/libsnapshot/libsnapshot_cow/parser_v2.cpp
@@ -206,6 +206,8 @@
 
         auto& new_op = out->ops->at(i);
         new_op.set_type(v2_op.type);
+        // v2 ops always have 4k compression
+        new_op.set_compression_bits(0);
         new_op.data_length = v2_op.data_length;
 
         if (v2_op.new_block > std::numeric_limits<uint32_t>::max()) {
diff --git a/fs_mgr/libsnapshot/libsnapshot_cow/snapshot_reader.cpp b/fs_mgr/libsnapshot/libsnapshot_cow/snapshot_reader.cpp
index 4e90a0f..12073fc 100644
--- a/fs_mgr/libsnapshot/libsnapshot_cow/snapshot_reader.cpp
+++ b/fs_mgr/libsnapshot/libsnapshot_cow/snapshot_reader.cpp
@@ -42,10 +42,21 @@
             op_iter_->Next();
             continue;
         }
-        if (op->new_block >= ops_.size()) {
-            ops_.resize(op->new_block + 1, nullptr);
+
+        size_t num_blocks = 1;
+        if (op->type() == kCowReplaceOp) {
+            num_blocks = (CowOpCompressionSize(op, block_size_) / block_size_);
         }
-        ops_[op->new_block] = op;
+        if (op->new_block >= ops_.size()) {
+            ops_.resize(op->new_block + num_blocks, nullptr);
+        }
+
+        size_t vec_index = op->new_block;
+        while (num_blocks) {
+            ops_[vec_index] = op;
+            num_blocks -= 1;
+            vec_index += 1;
+        }
         op_iter_->Next();
     }
 }
@@ -172,11 +183,20 @@
     } else if (op->type() == kCowZeroOp) {
         memset(buffer, 0, bytes_to_read);
     } else if (op->type() == kCowReplaceOp) {
-        if (cow_->ReadData(op, buffer, bytes_to_read, start_offset) < bytes_to_read) {
-            LOG(ERROR) << "CompressedSnapshotReader failed to read replace op";
+        size_t buffer_size = CowOpCompressionSize(op, block_size_);
+        uint8_t temp_buffer[buffer_size];
+        if (cow_->ReadData(op, temp_buffer, buffer_size, 0) < buffer_size) {
+            LOG(ERROR) << "CompressedSnapshotReader failed to read replace op: buffer_size: "
+                       << buffer_size << "start_offset: " << start_offset;
             errno = EIO;
             return -1;
         }
+        off_t block_offset{};
+        if (!GetBlockOffset(op, chunk, block_size_, &block_offset)) {
+            LOG(ERROR) << "GetBlockOffset failed";
+            return -1;
+        }
+        std::memcpy(buffer, (char*)temp_buffer + block_offset + start_offset, bytes_to_read);
     } else if (op->type() == kCowXorOp) {
         borrowed_fd fd = GetSourceFd();
         if (fd < 0) {
diff --git a/fs_mgr/libsnapshot/libsnapshot_cow/test_v2.cpp b/fs_mgr/libsnapshot/libsnapshot_cow/test_v2.cpp
index 1d1d24c..ce80cd7 100644
--- a/fs_mgr/libsnapshot/libsnapshot_cow/test_v2.cpp
+++ b/fs_mgr/libsnapshot/libsnapshot_cow/test_v2.cpp
@@ -18,6 +18,7 @@
 #include <iostream>
 #include <memory>
 #include <string_view>
+#include <vector>
 
 #include <android-base/file.h>
 #include <android-base/logging.h>
@@ -429,7 +430,7 @@
 template <typename T>
 class HorribleStream : public IByteStream {
   public:
-    HorribleStream(const std::basic_string<T>& input) : input_(input) {}
+    HorribleStream(const std::vector<T>& input) : input_(input) {}
 
     ssize_t Read(void* buffer, size_t length) override {
         if (pos_ >= input_.size()) {
@@ -444,16 +445,17 @@
     size_t Size() const override { return input_.size(); }
 
   private:
-    std::basic_string<T> input_;
+    std::vector<T> input_;
     size_t pos_ = 0;
 };
 
 TEST(HorribleStream, ReadFully) {
-    std::string expected = "this is some data";
+    std::string expected_str = "this is some data";
+    std::vector<char> expected{expected_str.begin(), expected_str.end()};
 
     HorribleStream<char> stream(expected);
 
-    std::string buffer(expected.size(), '\0');
+    std::vector<char> buffer(expected.size(), '\0');
     ASSERT_TRUE(stream.ReadFully(buffer.data(), buffer.size()));
     ASSERT_EQ(buffer, expected);
 }
diff --git a/fs_mgr/libsnapshot/libsnapshot_cow/test_v3.cpp b/fs_mgr/libsnapshot/libsnapshot_cow/test_v3.cpp
index 8cf46f4..3c5b394 100644
--- a/fs_mgr/libsnapshot/libsnapshot_cow/test_v3.cpp
+++ b/fs_mgr/libsnapshot/libsnapshot_cow/test_v3.cpp
@@ -18,6 +18,7 @@
 #include <libsnapshot/cow_format.h>
 #include <libsnapshot/cow_reader.h>
 #include <libsnapshot/cow_writer.h>
+#include <storage_literals/storage_literals.h>
 #include "writer_v2.h"
 #include "writer_v3.h"
 
@@ -29,6 +30,9 @@
 namespace android {
 namespace snapshot {
 
+using namespace android::storage_literals;
+using ::testing::TestWithParam;
+
 class CowTestV3 : public ::testing::Test {
   protected:
     virtual void SetUp() override {
@@ -72,6 +76,7 @@
 
 TEST_F(CowTestV3, Header) {
     CowOptions options;
+    options.op_count_max = 15;
     auto writer = CreateCowWriter(3, options, GetCowFd());
     ASSERT_TRUE(writer->Finalize());
 
@@ -483,14 +488,14 @@
     ASSERT_TRUE(reader.Parse(cow_->fd));
 
     auto header = reader.header_v3();
-    ASSERT_EQ(header.sequence_data_count, 0);
+    ASSERT_EQ(header.sequence_data_count, static_cast<uint64_t>(0));
     ASSERT_EQ(header.resume_point_count, 0);
     ASSERT_EQ(header.resume_point_max, 4);
 
     writer->AddLabel(0);
     ASSERT_TRUE(reader.Parse(cow_->fd));
     header = reader.header_v3();
-    ASSERT_EQ(header.sequence_data_count, 0);
+    ASSERT_EQ(header.sequence_data_count, static_cast<uint64_t>(0));
     ASSERT_EQ(header.resume_point_count, 1);
     ASSERT_EQ(header.resume_point_max, 4);
 
@@ -698,5 +703,189 @@
     ASSERT_FALSE(writer->AddZeroBlocks(0, 19));
 }
 
+struct TestParam {
+    std::string compression;
+    int block_size;
+    int num_threads;
+    size_t cluster_ops;
+};
+
+class VariableBlockTest : public ::testing::TestWithParam<TestParam> {
+  protected:
+    virtual void SetUp() override {
+        cow_ = std::make_unique<TemporaryFile>();
+        ASSERT_GE(cow_->fd, 0) << strerror(errno);
+    }
+
+    virtual void TearDown() override { cow_ = nullptr; }
+
+    unique_fd GetCowFd() { return unique_fd{dup(cow_->fd)}; }
+
+    std::unique_ptr<TemporaryFile> cow_;
+};
+
+// Helper to check read sizes.
+static inline void ReadBlockData(CowReader& reader, const CowOperation* op, void* buffer,
+                                 size_t size) {
+    size_t block_size = CowOpCompressionSize(op, 4096);
+    std::string data(block_size, '\0');
+    size_t value = reader.ReadData(op, data.data(), block_size);
+    ASSERT_TRUE(value == block_size);
+    std::memcpy(buffer, data.data(), size);
+}
+
+TEST_P(VariableBlockTest, VariableBlockCompressionTest) {
+    const TestParam params = GetParam();
+
+    CowOptions options;
+    options.op_count_max = 100000;
+    options.compression = params.compression;
+    options.num_compress_threads = params.num_threads;
+    options.batch_write = true;
+    options.compression_factor = params.block_size;
+    options.cluster_ops = params.cluster_ops;
+
+    CowWriterV3 writer(options, GetCowFd());
+
+    ASSERT_TRUE(writer.Initialize());
+
+    std::string xor_data = "This is test data-1. Testing xor";
+    xor_data.resize(options.block_size, '\0');
+    ASSERT_TRUE(writer.AddXorBlocks(50, xor_data.data(), xor_data.size(), 24, 10));
+
+    // Large number of blocks
+    std::string data = "This is test data-2. Testing replace ops";
+    data.resize(options.block_size * 2048, '\0');
+    ASSERT_TRUE(writer.AddRawBlocks(100, data.data(), data.size()));
+
+    std::string data2 = "This is test data-3. Testing replace ops";
+    data2.resize(options.block_size * 259, '\0');
+    ASSERT_TRUE(writer.AddRawBlocks(6000, data2.data(), data2.size()));
+
+    // Test data size is smaller than block size
+
+    // 4k block
+    std::string data3 = "This is test data-4. Testing replace ops";
+    data3.resize(options.block_size, '\0');
+    ASSERT_TRUE(writer.AddRawBlocks(9000, data3.data(), data3.size()));
+
+    // 8k block
+    std::string data4;
+    data4.resize(options.block_size * 2, '\0');
+    for (size_t i = 0; i < data4.size(); i++) {
+        data4[i] = static_cast<char>('A' + i / options.block_size);
+    }
+    ASSERT_TRUE(writer.AddRawBlocks(10000, data4.data(), data4.size()));
+
+    // 16k block
+    std::string data5;
+    data.resize(options.block_size * 4, '\0');
+    for (int i = 0; i < data5.size(); i++) {
+        data5[i] = static_cast<char>('C' + i / options.block_size);
+    }
+    ASSERT_TRUE(writer.AddRawBlocks(11000, data5.data(), data5.size()));
+
+    // 64k Random buffer which cannot be compressed
+    unique_fd rnd_fd(open("/dev/random", O_RDONLY));
+    ASSERT_GE(rnd_fd, 0);
+    std::string random_buffer;
+    random_buffer.resize(65536, '\0');
+    ASSERT_EQ(android::base::ReadFullyAtOffset(rnd_fd, random_buffer.data(), 65536, 0), true);
+    ASSERT_TRUE(writer.AddRawBlocks(12000, random_buffer.data(), 65536));
+
+    ASSERT_TRUE(writer.Finalize());
+
+    ASSERT_EQ(lseek(cow_->fd, 0, SEEK_SET), 0);
+
+    CowReader reader;
+    ASSERT_TRUE(reader.Parse(cow_->fd));
+
+    auto iter = reader.GetOpIter();
+    ASSERT_NE(iter, nullptr);
+
+    while (!iter->AtEnd()) {
+        auto op = iter->Get();
+
+        if (op->type() == kCowXorOp) {
+            std::string sink(xor_data.size(), '\0');
+            ASSERT_EQ(op->new_block, 50);
+            ASSERT_EQ(op->source(), 98314);  // 4096 * 24 + 10
+            ReadBlockData(reader, op, sink.data(), sink.size());
+            ASSERT_EQ(sink, xor_data);
+        }
+        if (op->type() == kCowReplaceOp) {
+            if (op->new_block == 100) {
+                data.resize(options.block_size);
+                std::string sink(data.size(), '\0');
+                ReadBlockData(reader, op, sink.data(), sink.size());
+                ASSERT_EQ(sink.size(), data.size());
+                ASSERT_EQ(sink, data);
+            }
+            if (op->new_block == 6000) {
+                data2.resize(options.block_size);
+                std::string sink(data2.size(), '\0');
+                ReadBlockData(reader, op, sink.data(), sink.size());
+                ASSERT_EQ(sink, data2);
+            }
+            if (op->new_block == 9000) {
+                std::string sink(data3.size(), '\0');
+                ReadBlockData(reader, op, sink.data(), sink.size());
+                ASSERT_EQ(sink, data3);
+            }
+            if (op->new_block == 10000) {
+                data4.resize(options.block_size);
+                std::string sink(options.block_size, '\0');
+                ReadBlockData(reader, op, sink.data(), sink.size());
+                ASSERT_EQ(sink, data4);
+            }
+            if (op->new_block == 11000) {
+                data5.resize(options.block_size);
+                std::string sink(options.block_size, '\0');
+                ReadBlockData(reader, op, sink.data(), sink.size());
+                ASSERT_EQ(sink, data5);
+            }
+            if (op->new_block == 12000) {
+                random_buffer.resize(options.block_size);
+                std::string sink(options.block_size, '\0');
+                ReadBlockData(reader, op, sink.data(), sink.size());
+                ASSERT_EQ(sink, random_buffer);
+            }
+        }
+
+        iter->Next();
+    }
+}
+
+std::vector<TestParam> GetTestConfigs() {
+    std::vector<TestParam> testParams;
+
+    std::vector<int> block_sizes = {4_KiB, 8_KiB, 16_KiB, 32_KiB, 64_KiB, 128_KiB, 256_KiB};
+    std::vector<std::string> compression_algo = {"none", "lz4", "zstd", "gz"};
+    std::vector<int> threads = {1, 2};
+    // This will also test batch size
+    std::vector<size_t> cluster_ops = {1, 256};
+
+    // This should test 112 combination
+    for (auto block : block_sizes) {
+        for (auto compression : compression_algo) {
+            for (auto thread : threads) {
+                for (auto cluster : cluster_ops) {
+                    TestParam param;
+                    param.block_size = block;
+                    param.compression = compression;
+                    param.num_threads = thread;
+                    param.cluster_ops = cluster;
+                    testParams.push_back(std::move(param));
+                }
+            }
+        }
+    }
+
+    return testParams;
+}
+
+INSTANTIATE_TEST_SUITE_P(CompressorsWithVariableBlocks, VariableBlockTest,
+                         ::testing::ValuesIn(GetTestConfigs()));
+
 }  // namespace snapshot
 }  // namespace android
diff --git a/fs_mgr/libsnapshot/libsnapshot_cow/writer_v2.cpp b/fs_mgr/libsnapshot/libsnapshot_cow/writer_v2.cpp
index 75cd111..d0864e0 100644
--- a/fs_mgr/libsnapshot/libsnapshot_cow/writer_v2.cpp
+++ b/fs_mgr/libsnapshot/libsnapshot_cow/writer_v2.cpp
@@ -185,7 +185,7 @@
     for (int i = 0; i < num_compress_threads_; i++) {
         std::unique_ptr<ICompressor> compressor =
                 ICompressor::Create(compression_, header_.block_size);
-        auto wt = std::make_unique<CompressWorker>(std::move(compressor), header_.block_size);
+        auto wt = std::make_unique<CompressWorker>(std::move(compressor));
         threads_.emplace_back(std::async(std::launch::async, &CompressWorker::RunThread, wt.get()));
         compress_threads_.push_back(std::move(wt));
     }
@@ -353,7 +353,7 @@
         if (i == num_threads - 1) {
             num_blocks_per_thread = num_blocks;
         }
-        worker->EnqueueCompressBlocks(iter, num_blocks_per_thread);
+        worker->EnqueueCompressBlocks(iter, header_.block_size, num_blocks_per_thread);
         iter += (num_blocks_per_thread * header_.block_size);
         num_blocks -= num_blocks_per_thread;
     }
diff --git a/fs_mgr/libsnapshot/libsnapshot_cow/writer_v2.h b/fs_mgr/libsnapshot/libsnapshot_cow/writer_v2.h
index 05de2ad..6a37aa7 100644
--- a/fs_mgr/libsnapshot/libsnapshot_cow/writer_v2.h
+++ b/fs_mgr/libsnapshot/libsnapshot_cow/writer_v2.h
@@ -81,8 +81,8 @@
     int num_compress_threads_ = 1;
     std::vector<std::unique_ptr<CompressWorker>> compress_threads_;
     std::vector<std::future<bool>> threads_;
-    std::vector<std::basic_string<uint8_t>> compressed_buf_;
-    std::vector<std::basic_string<uint8_t>>::iterator buf_iter_;
+    std::vector<std::vector<uint8_t>> compressed_buf_;
+    std::vector<std::vector<uint8_t>>::iterator buf_iter_;
 
     std::vector<std::unique_ptr<CowOperationV2>> opbuffer_vec_;
     std::vector<std::unique_ptr<uint8_t[]>> databuffer_vec_;
diff --git a/fs_mgr/libsnapshot/libsnapshot_cow/writer_v3.cpp b/fs_mgr/libsnapshot/libsnapshot_cow/writer_v3.cpp
index be6b6da..22e6f2c 100644
--- a/fs_mgr/libsnapshot/libsnapshot_cow/writer_v3.cpp
+++ b/fs_mgr/libsnapshot/libsnapshot_cow/writer_v3.cpp
@@ -37,6 +37,7 @@
 #include <libsnapshot/cow_compress.h>
 #include <libsnapshot_cow/parser_v3.h>
 #include <linux/fs.h>
+#include <storage_literals/storage_literals.h>
 #include <sys/ioctl.h>
 #include <unistd.h>
 #include <numeric>
@@ -54,6 +55,7 @@
 
 static_assert(sizeof(off_t) == sizeof(uint64_t));
 
+using namespace android::storage_literals;
 using android::base::unique_fd;
 
 // Divide |x| by |y| and round up to the nearest integer.
@@ -77,9 +79,9 @@
     threads_.clear();
     for (size_t i = 0; i < num_compress_threads_; i++) {
         std::unique_ptr<ICompressor> compressor =
-                ICompressor::Create(compression_, header_.block_size);
+                ICompressor::Create(compression_, header_.max_compression_size);
         auto&& wt = compress_threads_.emplace_back(
-                std::make_unique<CompressWorker>(std::move(compressor), header_.block_size));
+                std::make_unique<CompressWorker>(std::move(compressor)));
         threads_.emplace_back(std::thread([wt = wt.get()]() { wt->RunThread(); }));
     }
     LOG(INFO) << num_compress_threads_ << " thread used for compression";
@@ -110,11 +112,17 @@
     header_.op_count = 0;
     header_.op_count_max = 0;
     header_.compression_algorithm = kCowCompressNone;
-    return;
+    header_.max_compression_size = options_.compression_factor;
 }
 
 bool CowWriterV3::ParseOptions() {
-    num_compress_threads_ = std::max(options_.num_compress_threads, 1);
+    if (!header_.max_compression_size || !IsBlockAligned(header_.max_compression_size)) {
+        LOG(ERROR) << "Invalid compression factor: " << header_.max_compression_size;
+        return false;
+    }
+
+    LOG(INFO) << "Compression factor: " << header_.max_compression_size;
+    num_compress_threads_ = std::max(int(options_.num_compress_threads), 1);
     auto parts = android::base::Split(options_.compression, ",");
     if (parts.size() > 2) {
         LOG(ERROR) << "failed to parse compression parameters: invalid argument count: "
@@ -129,6 +137,18 @@
     header_.compression_algorithm = *algorithm;
     header_.op_count_max = options_.op_count_max;
 
+    if (!IsEstimating() && header_.op_count_max == 0) {
+        if (!options_.max_blocks.has_value()) {
+            LOG(ERROR) << "can't size op buffer size since op_count_max is 0 and max_blocks is not "
+                          "set.";
+            return false;
+        }
+        LOG(INFO) << "op count max is read in as 0. Setting to "
+                     "num blocks in partition "
+                  << options_.max_blocks.value();
+        header_.op_count_max = options_.max_blocks.value();
+    }
+
     if (parts.size() > 1) {
         if (!android::base::ParseUint(parts[1], &compression_.compression_level)) {
             LOG(ERROR) << "failed to parse compression level invalid type: " << parts[1];
@@ -141,20 +161,22 @@
 
     compression_.algorithm = *algorithm;
     if (compression_.algorithm != kCowCompressNone) {
-        compressor_ = ICompressor::Create(compression_, header_.block_size);
+        compressor_ = ICompressor::Create(compression_, header_.max_compression_size);
         if (compressor_ == nullptr) {
             LOG(ERROR) << "Failed to create compressor for " << compression_.algorithm;
             return false;
         }
-        if (options_.cluster_ops &&
-            (android::base::GetBoolProperty("ro.virtual_ab.batch_writes", false) ||
-             options_.batch_write)) {
-            batch_size_ = std::max<size_t>(options_.cluster_ops, 1);
-            data_vec_.reserve(batch_size_);
-            cached_data_.reserve(batch_size_);
-            cached_ops_.reserve(batch_size_);
-        }
     }
+
+    if (options_.cluster_ops &&
+        (android::base::GetBoolProperty("ro.virtual_ab.batch_writes", false) ||
+         options_.batch_write)) {
+        batch_size_ = std::max<size_t>(options_.cluster_ops, 1);
+        data_vec_.reserve(batch_size_);
+        cached_data_.reserve(batch_size_);
+        cached_ops_.reserve(batch_size_);
+    }
+
     if (batch_size_ > 1) {
         LOG(INFO) << "Batch writes: enabled with batch size " << batch_size_;
     } else {
@@ -165,6 +187,7 @@
         num_compress_threads_ = options_.num_compress_threads;
     }
     InitWorkers();
+
     return true;
 }
 
@@ -193,6 +216,14 @@
         }
     }
 
+    // TODO: b/322279333
+    // Set compression factor to 4k during estimation.
+    // Once COW estimator is ready to support variable
+    // block size, this check has to be removed.
+    if (IsEstimating()) {
+        header_.max_compression_size = header_.block_size;
+    }
+
     return true;
 }
 
@@ -300,17 +331,11 @@
 }
 
 bool CowWriterV3::EmitRawBlocks(uint64_t new_block_start, const void* data, size_t size) {
-    if (!CheckOpCount(size / header_.block_size)) {
-        return false;
-    }
     return EmitBlocks(new_block_start, data, size, 0, 0, kCowReplaceOp);
 }
 
 bool CowWriterV3::EmitXorBlocks(uint32_t new_block_start, const void* data, size_t size,
                                 uint32_t old_block, uint16_t offset) {
-    if (!CheckOpCount(size / header_.block_size)) {
-        return false;
-    }
     return EmitBlocks(new_block_start, data, size, old_block, offset, kCowXorOp);
 }
 
@@ -321,6 +346,46 @@
     return cached_data_.size() >= batch_size_ || cached_ops_.size() >= batch_size_ * 16;
 }
 
+bool CowWriterV3::ConstructCowOpCompressedBuffers(uint64_t new_block_start, const void* data,
+                                                  uint64_t old_block, uint16_t offset,
+                                                  CowOperationType type, size_t blocks_to_write) {
+    size_t compressed_bytes = 0;
+    auto&& blocks = CompressBlocks(blocks_to_write, data, type);
+    if (blocks.empty()) {
+        LOG(ERROR) << "Failed to compress blocks " << new_block_start << ", " << blocks_to_write
+                   << ", actual number of blocks received from compressor " << blocks.size();
+        return false;
+    }
+    size_t blocks_written = 0;
+    for (size_t blk_index = 0; blk_index < blocks.size(); blk_index++) {
+        CowOperation& op = cached_ops_.emplace_back();
+        auto& vec = data_vec_.emplace_back();
+        CompressedBuffer buffer = std::move(blocks[blk_index]);
+        auto& compressed_data = cached_data_.emplace_back(std::move(buffer.compressed_data));
+        op.new_block = new_block_start + blocks_written;
+
+        op.set_type(type);
+        op.set_compression_bits(std::log2(buffer.compression_factor / header_.block_size));
+
+        if (type == kCowXorOp) {
+            op.set_source((old_block + blocks_written) * header_.block_size + offset);
+        } else {
+            op.set_source(next_data_pos_ + compressed_bytes);
+        }
+
+        vec = {.iov_base = compressed_data.data(), .iov_len = compressed_data.size()};
+        op.data_length = vec.iov_len;
+        compressed_bytes += op.data_length;
+        blocks_written += (buffer.compression_factor / header_.block_size);
+    }
+    if (blocks_written != blocks_to_write) {
+        LOG(ERROR) << "Total compressed blocks: " << blocks_written
+                   << " Expected: " << blocks_to_write;
+        return false;
+    }
+    return true;
+}
+
 bool CowWriterV3::EmitBlocks(uint64_t new_block_start, const void* data, size_t size,
                              uint64_t old_block, uint16_t offset, CowOperationType type) {
     if (compression_.algorithm != kCowCompressNone && compressor_ == nullptr) {
@@ -330,40 +395,25 @@
     }
     const auto bytes = reinterpret_cast<const uint8_t*>(data);
     const size_t num_blocks = (size / header_.block_size);
-
+    if (!CheckOpCount(num_blocks)) {
+        return false;
+    }
     for (size_t i = 0; i < num_blocks;) {
-        const auto blocks_to_write =
+        const size_t blocks_to_write =
                 std::min<size_t>(batch_size_ - cached_data_.size(), num_blocks - i);
-        size_t compressed_bytes = 0;
-        auto&& blocks = CompressBlocks(blocks_to_write, bytes + header_.block_size * i);
-        if (blocks.size() != blocks_to_write) {
-            LOG(ERROR) << "Failed to compress blocks " << new_block_start + i << ", "
-                       << blocks_to_write << ", actual number of blocks received from compressor "
-                       << blocks.size();
+
+        if (!ConstructCowOpCompressedBuffers(new_block_start + i, bytes + header_.block_size * i,
+                                             old_block + i, offset, type, blocks_to_write)) {
             return false;
         }
-        for (size_t j = 0; j < blocks_to_write; j++) {
-            CowOperation& op = cached_ops_.emplace_back();
-            auto& vec = data_vec_.emplace_back();
-            auto& compressed_data = cached_data_.emplace_back(std::move(blocks[j]));
-            op.new_block = new_block_start + i + j;
 
-            op.set_type(type);
-            if (type == kCowXorOp) {
-                op.set_source((old_block + i + j) * header_.block_size + offset);
-            } else {
-                op.set_source(next_data_pos_ + compressed_bytes);
-            }
-            vec = {.iov_base = compressed_data.data(), .iov_len = compressed_data.size()};
-            op.data_length = vec.iov_len;
-            compressed_bytes += op.data_length;
-        }
         if (NeedsFlush() && !FlushCacheOps()) {
             LOG(ERROR) << "EmitBlocks with compression: write failed. new block: "
                        << new_block_start << " compression: " << compression_.algorithm
                        << ", op type: " << type;
             return false;
         }
+
         i += blocks_to_write;
     }
 
@@ -462,8 +512,7 @@
         }
         bytes_written += op.data_length;
     }
-    if (!WriteOperation({cached_ops_.data(), cached_ops_.size()},
-                        {data_vec_.data(), data_vec_.size()})) {
+    if (!WriteOperation(cached_ops_, data_vec_)) {
         LOG(ERROR) << "Failed to flush " << cached_ops_.size() << " ops to disk";
         return false;
     }
@@ -473,59 +522,169 @@
     return true;
 }
 
-std::vector<std::basic_string<uint8_t>> CowWriterV3::CompressBlocks(const size_t num_blocks,
-                                                                    const void* data) {
-    const size_t num_threads = (num_blocks == 1) ? 1 : num_compress_threads_;
-    const size_t blocks_per_thread = DivRoundUp(num_blocks, num_threads);
-    std::vector<std::basic_string<uint8_t>> compressed_buf;
-    compressed_buf.clear();
-    const uint8_t* const iter = reinterpret_cast<const uint8_t*>(data);
-    if (compression_.algorithm == kCowCompressNone) {
-        for (size_t i = 0; i < num_blocks; i++) {
-            auto& buf = compressed_buf.emplace_back();
-            buf.resize(header_.block_size);
-            std::memcpy(buf.data(), iter + i * header_.block_size, header_.block_size);
-        }
-        return compressed_buf;
-    }
-    if (num_threads <= 1) {
-        if (!CompressWorker::CompressBlocks(compressor_.get(), header_.block_size, data, num_blocks,
-                                            &compressed_buf)) {
-            return {};
-        }
-    } else {
-        // Submit the blocks per thread. The retrieval of
-        // compressed buffers has to be done in the same order.
-        // We should not poll for completed buffers in a different order as the
-        // buffers are tightly coupled with block ordering.
-        for (size_t i = 0; i < num_threads; i++) {
-            CompressWorker* worker = compress_threads_[i].get();
-            const auto blocks_in_batch =
-                    std::min(num_blocks - i * blocks_per_thread, blocks_per_thread);
-            worker->EnqueueCompressBlocks(iter + i * blocks_per_thread * header_.block_size,
-                                          blocks_in_batch);
-        }
-
-        for (size_t i = 0; i < num_threads; i++) {
-            CompressWorker* worker = compress_threads_[i].get();
-            if (!worker->GetCompressedBuffers(&compressed_buf)) {
-                return {};
-            }
-        }
-    }
-    for (size_t i = 0; i < num_blocks; i++) {
-        auto& block = compressed_buf[i];
-        if (block.size() >= header_.block_size) {
-            block.resize(header_.block_size);
-            std::memcpy(block.data(), iter + header_.block_size * i, header_.block_size);
-        }
+size_t CowWriterV3::GetCompressionFactor(const size_t blocks_to_compress,
+                                         CowOperationType type) const {
+    // For XOR ops, we don't support bigger block size compression yet.
+    // For bigger block size support, snapshot-merge also has to changed. We
+    // aren't there yet; hence, just stick to 4k for now until
+    // snapshot-merge is ready for XOR operation.
+    if (type == kCowXorOp) {
+        return header_.block_size;
     }
 
-    return compressed_buf;
+    size_t compression_factor = header_.max_compression_size;
+    while (compression_factor > header_.block_size) {
+        size_t num_blocks = compression_factor / header_.block_size;
+        if (blocks_to_compress >= num_blocks) {
+            return compression_factor;
+        }
+        compression_factor >>= 1;
+    }
+    return header_.block_size;
 }
 
-bool CowWriterV3::WriteOperation(std::basic_string_view<CowOperationV3> ops,
-                                 std::basic_string_view<struct iovec> data) {
+std::vector<CowWriterV3::CompressedBuffer> CowWriterV3::ProcessBlocksWithNoCompression(
+        const size_t num_blocks, const void* data, CowOperationType type) {
+    size_t blocks_to_compress = num_blocks;
+    const uint8_t* iter = reinterpret_cast<const uint8_t*>(data);
+    std::vector<CompressedBuffer> compressed_vec;
+
+    while (blocks_to_compress) {
+        CompressedBuffer buffer;
+
+        const size_t compression_factor = GetCompressionFactor(blocks_to_compress, type);
+        size_t num_blocks = compression_factor / header_.block_size;
+
+        buffer.compression_factor = compression_factor;
+        buffer.compressed_data.resize(compression_factor);
+
+        // No compression. Just copy the data as-is.
+        std::memcpy(buffer.compressed_data.data(), iter, compression_factor);
+
+        compressed_vec.push_back(std::move(buffer));
+        blocks_to_compress -= num_blocks;
+        iter += compression_factor;
+    }
+    return compressed_vec;
+}
+
+std::vector<CowWriterV3::CompressedBuffer> CowWriterV3::ProcessBlocksWithCompression(
+        const size_t num_blocks, const void* data, CowOperationType type) {
+    size_t blocks_to_compress = num_blocks;
+    const uint8_t* iter = reinterpret_cast<const uint8_t*>(data);
+    std::vector<CompressedBuffer> compressed_vec;
+
+    while (blocks_to_compress) {
+        CompressedBuffer buffer;
+
+        const size_t compression_factor = GetCompressionFactor(blocks_to_compress, type);
+        size_t num_blocks = compression_factor / header_.block_size;
+
+        buffer.compression_factor = compression_factor;
+        // Compress the blocks
+        buffer.compressed_data = compressor_->Compress(iter, compression_factor);
+        if (buffer.compressed_data.empty()) {
+            PLOG(ERROR) << "Compression failed";
+            return {};
+        }
+
+        // Check if the buffer was indeed compressed
+        if (buffer.compressed_data.size() >= compression_factor) {
+            buffer.compressed_data.resize(compression_factor);
+            std::memcpy(buffer.compressed_data.data(), iter, compression_factor);
+        }
+
+        compressed_vec.push_back(std::move(buffer));
+        blocks_to_compress -= num_blocks;
+        iter += compression_factor;
+    }
+    return compressed_vec;
+}
+
+std::vector<CowWriterV3::CompressedBuffer> CowWriterV3::ProcessBlocksWithThreadedCompression(
+        const size_t num_blocks, const void* data, CowOperationType type) {
+    const size_t num_threads = num_compress_threads_;
+    const size_t blocks_per_thread = DivRoundUp(num_blocks, num_threads);
+    const uint8_t* iter = reinterpret_cast<const uint8_t*>(data);
+
+    std::vector<CompressedBuffer> compressed_vec;
+    // Submit the blocks per thread. The retrieval of
+    // compressed buffers has to be done in the same order.
+    // We should not poll for completed buffers in a different order as the
+    // buffers are tightly coupled with block ordering.
+    for (size_t i = 0; i < num_threads; i++) {
+        CompressWorker* worker = compress_threads_[i].get();
+        auto blocks_in_batch = std::min(num_blocks - i * blocks_per_thread, blocks_per_thread);
+        // Enqueue the blocks to be compressed for each thread.
+        while (blocks_in_batch) {
+            CompressedBuffer buffer;
+
+            const size_t compression_factor = GetCompressionFactor(blocks_in_batch, type);
+            size_t num_blocks = compression_factor / header_.block_size;
+
+            buffer.compression_factor = compression_factor;
+            worker->EnqueueCompressBlocks(iter, compression_factor, 1);
+            compressed_vec.push_back(std::move(buffer));
+            blocks_in_batch -= num_blocks;
+            iter += compression_factor;
+        }
+    }
+
+    // Fetch compressed buffers from the threads
+    std::vector<std::vector<uint8_t>> compressed_buf;
+    compressed_buf.clear();
+    for (size_t i = 0; i < num_threads; i++) {
+        CompressWorker* worker = compress_threads_[i].get();
+        if (!worker->GetCompressedBuffers(&compressed_buf)) {
+            return {};
+        }
+    }
+
+    if (compressed_vec.size() != compressed_buf.size()) {
+        LOG(ERROR) << "Compressed buffer size: " << compressed_buf.size()
+                   << " - Expected: " << compressed_vec.size();
+        return {};
+    }
+
+    iter = reinterpret_cast<const uint8_t*>(data);
+    // Walk through all the compressed buffers
+    for (size_t i = 0; i < compressed_buf.size(); i++) {
+        auto& buffer = compressed_vec[i];
+        auto& block = compressed_buf[i];
+        size_t block_size = buffer.compression_factor;
+        // Check if the blocks was indeed compressed
+        if (block.size() >= block_size) {
+            buffer.compressed_data.resize(block_size);
+            std::memcpy(buffer.compressed_data.data(), iter, block_size);
+        } else {
+            // Compressed block
+            buffer.compressed_data.resize(block.size());
+            std::memcpy(buffer.compressed_data.data(), block.data(), block.size());
+        }
+        iter += block_size;
+    }
+    return compressed_vec;
+}
+
+std::vector<CowWriterV3::CompressedBuffer> CowWriterV3::CompressBlocks(const size_t num_blocks,
+                                                                       const void* data,
+                                                                       CowOperationType type) {
+    if (compression_.algorithm == kCowCompressNone) {
+        return ProcessBlocksWithNoCompression(num_blocks, data, type);
+    }
+
+    const size_t num_threads = (num_blocks == 1) ? 1 : num_compress_threads_;
+
+    // If no threads are required, just compress the blocks inline.
+    if (num_threads <= 1) {
+        return ProcessBlocksWithCompression(num_blocks, data, type);
+    }
+
+    return ProcessBlocksWithThreadedCompression(num_blocks, data, type);
+}
+
+bool CowWriterV3::WriteOperation(std::span<const CowOperationV3> ops,
+                                 std::span<const struct iovec> data) {
     const auto total_data_size =
             std::transform_reduce(data.begin(), data.end(), 0, std::plus<size_t>{},
                                   [](const struct iovec& a) { return a.iov_len; });
diff --git a/fs_mgr/libsnapshot/libsnapshot_cow/writer_v3.h b/fs_mgr/libsnapshot/libsnapshot_cow/writer_v3.h
index b19af60..48eb67b 100644
--- a/fs_mgr/libsnapshot/libsnapshot_cow/writer_v3.h
+++ b/fs_mgr/libsnapshot/libsnapshot_cow/writer_v3.h
@@ -15,15 +15,20 @@
 #pragma once
 
 #include <android-base/logging.h>
+#include <span>
 #include <string_view>
 #include <thread>
 #include <vector>
 
+#include <libsnapshot/cow_format.h>
+#include <storage_literals/storage_literals.h>
 #include "writer_base.h"
 
 namespace android {
 namespace snapshot {
 
+using namespace android::storage_literals;
+
 class CowWriterV3 : public CowWriterBase {
   public:
     explicit CowWriterV3(const CowOptions& options, android::base::unique_fd&& fd);
@@ -43,20 +48,50 @@
     virtual bool EmitSequenceData(size_t num_ops, const uint32_t* data) override;
 
   private:
+    struct CompressedBuffer {
+        size_t compression_factor;
+        std::vector<uint8_t> compressed_data;
+    };
     void SetupHeaders();
     bool NeedsFlush() const;
     bool ParseOptions();
     bool OpenForWrite();
     bool OpenForAppend(uint64_t label);
-    bool WriteOperation(std::basic_string_view<CowOperationV3> op,
-                        std::basic_string_view<struct iovec> data);
+    bool WriteOperation(std::span<const CowOperationV3> op, std::span<const struct iovec> data);
     bool EmitBlocks(uint64_t new_block_start, const void* data, size_t size, uint64_t old_block,
                     uint16_t offset, CowOperationType type);
+    bool ConstructCowOpCompressedBuffers(uint64_t new_block_start, const void* data,
+                                         uint64_t old_block, uint16_t offset, CowOperationType type,
+                                         size_t blocks_to_write);
     bool CheckOpCount(size_t op_count);
 
   private:
-    std::vector<std::basic_string<uint8_t>> CompressBlocks(const size_t num_blocks,
-                                                           const void* data);
+    std::vector<CompressedBuffer> ProcessBlocksWithNoCompression(const size_t num_blocks,
+                                                                 const void* data,
+                                                                 CowOperationType type);
+    std::vector<CompressedBuffer> ProcessBlocksWithCompression(const size_t num_blocks,
+                                                               const void* data,
+                                                               CowOperationType type);
+    std::vector<CompressedBuffer> ProcessBlocksWithThreadedCompression(const size_t num_blocks,
+                                                                       const void* data,
+                                                                       CowOperationType type);
+    std::vector<CompressedBuffer> CompressBlocks(const size_t num_blocks, const void* data,
+                                                 CowOperationType type);
+    size_t GetCompressionFactor(const size_t blocks_to_compress, CowOperationType type) const;
+
+    constexpr bool IsBlockAligned(const size_t size) {
+        // These are the only block size supported. Block size beyond 256k
+        // may impact random read performance post OTA boot.
+        const size_t values[] = {4_KiB, 8_KiB, 16_KiB, 32_KiB, 64_KiB, 128_KiB, 256_KiB};
+
+        auto it = std::lower_bound(std::begin(values), std::end(values), size);
+
+        if (it != std::end(values) && *it == size) {
+            return true;
+        }
+        return false;
+    }
+
     bool ReadBackVerification();
     bool FlushCacheOps();
     void InitWorkers();
@@ -76,7 +111,7 @@
     int num_compress_threads_ = 1;
     size_t batch_size_ = 1;
     std::vector<CowOperationV3> cached_ops_;
-    std::vector<std::basic_string<uint8_t>> cached_data_;
+    std::vector<std::vector<uint8_t>> cached_data_;
     std::vector<struct iovec> data_vec_;
 
     std::vector<std::thread> threads_;
diff --git a/fs_mgr/libsnapshot/partition_cow_creator.h b/fs_mgr/libsnapshot/partition_cow_creator.h
index bd5c8cb..cbd664f 100644
--- a/fs_mgr/libsnapshot/partition_cow_creator.h
+++ b/fs_mgr/libsnapshot/partition_cow_creator.h
@@ -59,6 +59,7 @@
     // True if snapuserd COWs are enabled.
     bool using_snapuserd = false;
     std::string compression_algorithm;
+    uint64_t compression_factor;
 
     // True if multi-threaded compression should be enabled
     bool enable_threading;
diff --git a/fs_mgr/libsnapshot/snapshot.cpp b/fs_mgr/libsnapshot/snapshot.cpp
index 9eb41b2..ba5fb88 100644
--- a/fs_mgr/libsnapshot/snapshot.cpp
+++ b/fs_mgr/libsnapshot/snapshot.cpp
@@ -420,6 +420,7 @@
     status->set_metadata_sectors(0);
     status->set_using_snapuserd(cow_creator->using_snapuserd);
     status->set_compression_algorithm(cow_creator->compression_algorithm);
+    status->set_compression_factor(cow_creator->compression_factor);
     if (cow_creator->enable_threading) {
         status->set_enable_threading(cow_creator->enable_threading);
     }
@@ -3233,8 +3234,10 @@
     }
 
     std::string compression_algorithm;
+    uint64_t compression_factor{};
     if (using_snapuserd) {
         compression_algorithm = dap_metadata.vabc_compression_param();
+        compression_factor = dap_metadata.compression_factor();
         if (compression_algorithm.empty()) {
             // Older OTAs don't set an explicit compression type, so default to gz.
             compression_algorithm = "gz";
@@ -3251,7 +3254,9 @@
             .extra_extents = {},
             .using_snapuserd = using_snapuserd,
             .compression_algorithm = compression_algorithm,
+            .compression_factor = compression_factor,
     };
+
     if (dap_metadata.vabc_feature_set().has_threaded()) {
         cow_creator.enable_threading = dap_metadata.vabc_feature_set().threaded();
     }
@@ -3553,6 +3558,7 @@
             options.compression = it->second.compression_algorithm();
             if (cow_version >= 3) {
                 options.op_count_max = it->second.estimated_ops_buffer_size();
+                options.max_blocks = {it->second.device_size() / options.block_size};
             }
 
             auto writer = CreateCowWriter(cow_version, options, std::move(fd));
@@ -3666,6 +3672,7 @@
     cow_options.batch_write = status.batched_writes();
     cow_options.num_compress_threads = status.enable_threading() ? 2 : 1;
     cow_options.op_count_max = status.estimated_ops_buffer_size();
+    cow_options.compression_factor = status.compression_factor();
     // Disable scratch space for vts tests
     if (device()->IsTestDevice()) {
         cow_options.scratch_space = false;
@@ -3793,6 +3800,7 @@
         ss << "    allocated sectors: " << status.sectors_allocated() << std::endl;
         ss << "    metadata sectors: " << status.metadata_sectors() << std::endl;
         ss << "    compression: " << status.compression_algorithm() << std::endl;
+        ss << "    compression factor: " << status.compression_factor() << std::endl;
         ss << "    merge phase: " << DecideMergePhase(status) << std::endl;
     }
     os << ss.rdbuf();
diff --git a/fs_mgr/libsnapshot/snapuserd/user-space-merge/merge_worker.cpp b/fs_mgr/libsnapshot/snapuserd/user-space-merge/merge_worker.cpp
index 1e7d0c0..bd7eaca 100644
--- a/fs_mgr/libsnapshot/snapuserd/user-space-merge/merge_worker.cpp
+++ b/fs_mgr/libsnapshot/snapuserd/user-space-merge/merge_worker.cpp
@@ -13,10 +13,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "merge_worker.h"
 
+#include <libsnapshot/cow_format.h>
 #include <pthread.h>
 
+#include "merge_worker.h"
 #include "snapuserd_core.h"
 #include "utility.h"
 
@@ -37,6 +38,7 @@
     int num_ops = *pending_ops;
     int nr_consecutive = 0;
     bool checkOrderedOp = (replace_zero_vec == nullptr);
+    size_t num_blocks = 1;
 
     do {
         if (!cowop_iter_->AtEnd() && num_ops) {
@@ -48,11 +50,15 @@
             *source_offset = cow_op->new_block * BLOCK_SZ;
             if (!checkOrderedOp) {
                 replace_zero_vec->push_back(cow_op);
+                if (cow_op->type() == kCowReplaceOp) {
+                    // Get the number of blocks this op has compressed
+                    num_blocks = (CowOpCompressionSize(cow_op, BLOCK_SZ) / BLOCK_SZ);
+                }
             }
 
             cowop_iter_->Next();
-            num_ops -= 1;
-            nr_consecutive = 1;
+            num_ops -= num_blocks;
+            nr_consecutive = num_blocks;
 
             while (!cowop_iter_->AtEnd() && num_ops) {
                 const CowOperation* op = cowop_iter_->Get();
@@ -66,11 +72,20 @@
                 }
 
                 if (!checkOrderedOp) {
+                    if (op->type() == kCowReplaceOp) {
+                        num_blocks = (CowOpCompressionSize(op, BLOCK_SZ) / BLOCK_SZ);
+                        if (num_ops < num_blocks) {
+                            break;
+                        }
+                    } else {
+                        // zero op
+                        num_blocks = 1;
+                    }
                     replace_zero_vec->push_back(op);
                 }
 
-                nr_consecutive += 1;
-                num_ops -= 1;
+                nr_consecutive += num_blocks;
+                num_ops -= num_blocks;
                 cowop_iter_->Next();
             }
         }
@@ -108,18 +123,24 @@
 
         for (size_t i = 0; i < replace_zero_vec.size(); i++) {
             const CowOperation* cow_op = replace_zero_vec[i];
-
-            void* buffer = bufsink_.AcquireBuffer(BLOCK_SZ);
-            if (!buffer) {
-                SNAP_LOG(ERROR) << "AcquireBuffer failed in MergeReplaceOps";
-                return false;
-            }
             if (cow_op->type() == kCowReplaceOp) {
-                if (!reader_->ReadData(cow_op, buffer, BLOCK_SZ)) {
+                size_t buffer_size = CowOpCompressionSize(cow_op, BLOCK_SZ);
+                void* buffer = bufsink_.AcquireBuffer(buffer_size);
+                if (!buffer) {
+                    SNAP_LOG(ERROR) << "AcquireBuffer failed in MergeReplaceOps";
+                    return false;
+                }
+                // Read the entire compressed buffer spanning multiple blocks
+                if (!reader_->ReadData(cow_op, buffer, buffer_size)) {
                     SNAP_LOG(ERROR) << "Failed to read COW in merge";
                     return false;
                 }
             } else {
+                void* buffer = bufsink_.AcquireBuffer(BLOCK_SZ);
+                if (!buffer) {
+                    SNAP_LOG(ERROR) << "AcquireBuffer failed in MergeReplaceOps";
+                    return false;
+                }
                 CHECK(cow_op->type() == kCowZeroOp);
                 memset(buffer, 0, BLOCK_SZ);
             }
@@ -137,7 +158,7 @@
             return false;
         }
 
-        num_ops_merged += linear_blocks;
+        num_ops_merged += replace_zero_vec.size();
 
         if (num_ops_merged >= total_ops_merged_per_commit) {
             // Flush the data
diff --git a/fs_mgr/libsnapshot/snapuserd/user-space-merge/read_worker.cpp b/fs_mgr/libsnapshot/snapuserd/user-space-merge/read_worker.cpp
index f1d4065..d40b6d1 100644
--- a/fs_mgr/libsnapshot/snapuserd/user-space-merge/read_worker.cpp
+++ b/fs_mgr/libsnapshot/snapuserd/user-space-merge/read_worker.cpp
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "read_worker.h"
-
+#include <libsnapshot/cow_format.h>
 #include <pthread.h>
 
+#include "read_worker.h"
 #include "snapuserd_core.h"
 #include "utility.h"
 
@@ -48,9 +48,10 @@
 // Start the replace operation. This will read the
 // internal COW format and if the block is compressed,
 // it will be de-compressed.
-bool ReadWorker::ProcessReplaceOp(const CowOperation* cow_op, void* buffer) {
-    if (!reader_->ReadData(cow_op, buffer, BLOCK_SZ)) {
-        SNAP_LOG(ERROR) << "ProcessReplaceOp failed for block " << cow_op->new_block;
+bool ReadWorker::ProcessReplaceOp(const CowOperation* cow_op, void* buffer, size_t buffer_size) {
+    if (!reader_->ReadData(cow_op, buffer, buffer_size)) {
+        SNAP_LOG(ERROR) << "ProcessReplaceOp failed for block " << cow_op->new_block
+                        << " buffer_size: " << buffer_size;
         return false;
     }
     return true;
@@ -183,7 +184,13 @@
 
     switch (cow_op->type()) {
         case kCowReplaceOp: {
-            return ProcessReplaceOp(cow_op, buffer);
+            size_t buffer_size = CowOpCompressionSize(cow_op, BLOCK_SZ);
+            uint8_t chunk[buffer_size];
+            if (!ProcessReplaceOp(cow_op, chunk, buffer_size)) {
+                return false;
+            }
+            std::memcpy(buffer, chunk, BLOCK_SZ);
+            return true;
         }
 
         case kCowZeroOp: {
@@ -209,6 +216,13 @@
         return false;
     }
 
+    const size_t compression_factor = reader_->GetMaxCompressionSize();
+    if (!compression_factor) {
+        SNAP_LOG(ERROR) << "Compression factor is set to 0 which is invalid.";
+        return false;
+    }
+    decompressed_buffer_ = std::make_unique<uint8_t[]>(compression_factor);
+
     backing_store_fd_.reset(open(backing_store_device_.c_str(), O_RDONLY));
     if (backing_store_fd_ < 0) {
         SNAP_PLOG(ERROR) << "Open Failed: " << backing_store_device_;
@@ -276,6 +290,20 @@
     return true;
 }
 
+bool ReadWorker::GetCowOpBlockOffset(const CowOperation* cow_op, uint64_t io_block,
+                                     off_t* block_offset) {
+    // If this is a replace op, get the block offset of this I/O
+    // block. Multi-block compression is supported only for
+    // Replace ops.
+    //
+    // Note: This can be extended when we support COPY and XOR ops down the
+    // line as the blocks are mostly contiguous.
+    if (cow_op && cow_op->type() == kCowReplaceOp) {
+        return GetBlockOffset(cow_op, io_block, BLOCK_SZ, block_offset);
+    }
+    return false;
+}
+
 bool ReadWorker::ReadAlignedSector(sector_t sector, size_t sz) {
     size_t remaining_size = sz;
     std::vector<std::pair<sector_t, const CowOperation*>>& chunk_vec = snapuserd_->GetChunkVec();
@@ -286,7 +314,7 @@
         size_t read_size = std::min(PAYLOAD_BUFFER_SZ, remaining_size);
 
         size_t total_bytes_read = 0;
-
+        const CowOperation* prev_op = nullptr;
         while (read_size) {
             // We need to check every 4k block to verify if it is
             // present in the mapping.
@@ -294,7 +322,7 @@
 
             auto it = std::lower_bound(chunk_vec.begin(), chunk_vec.end(),
                                        std::make_pair(sector, nullptr), SnapshotHandler::compare);
-            bool not_found = (it == chunk_vec.end() || it->first != sector);
+            const bool sector_not_found = (it == chunk_vec.end() || it->first != sector);
 
             void* buffer = block_server_->GetResponseBuffer(BLOCK_SZ, size);
             if (!buffer) {
@@ -302,15 +330,88 @@
                 return false;
             }
 
-            if (not_found) {
-                // Block not found in map - which means this block was not
-                // changed as per the OTA. Just route the I/O to the base
-                // device.
-                if (!ReadDataFromBaseDevice(sector, buffer, size)) {
-                    SNAP_LOG(ERROR) << "ReadDataFromBaseDevice failed";
-                    return false;
+            if (sector_not_found) {
+                // Find the 4k block
+                uint64_t io_block = SectorToChunk(sector);
+                // Get the previous iterator. Since the vector is sorted, the
+                // lookup of this sector can fall in a range of blocks if
+                // CowOperation has compressed multiple blocks.
+                if (it != chunk_vec.begin()) {
+                    std::advance(it, -1);
                 }
 
+                bool is_mapping_present = true;
+
+                // Vector itself is empty. This can happen if the block was not
+                // changed per the OTA or if the merge was already complete but
+                // snapshot table was not yet collapsed.
+                if (it == chunk_vec.end()) {
+                    is_mapping_present = false;
+                }
+
+                const CowOperation* cow_op = nullptr;
+                // Relative offset within the compressed multiple blocks
+                off_t block_offset = 0;
+                if (is_mapping_present) {
+                    // Get the nearest operation found in the vector
+                    cow_op = it->second;
+                    is_mapping_present = GetCowOpBlockOffset(cow_op, io_block, &block_offset);
+                }
+
+                // Thus, we have a case wherein sector was not found in the sorted
+                // vector; however, we indeed have a mapping of this sector
+                // embedded in one of the CowOperation which spans multiple
+                // block size.
+                if (is_mapping_present) {
+                    // block_offset = 0 would mean that the CowOperation should
+                    // already be in the sorted vector. Hence, lookup should
+                    // have already found it. If not, this is a bug.
+                    if (block_offset == 0) {
+                        SNAP_LOG(ERROR)
+                                << "GetBlockOffset returned offset 0 for io_block: " << io_block;
+                        return false;
+                    }
+
+                    // Get the CowOperation actual compression size
+                    size_t compression_size = CowOpCompressionSize(cow_op, BLOCK_SZ);
+                    // Offset cannot be greater than the compression size
+                    if (block_offset > compression_size) {
+                        SNAP_LOG(ERROR) << "Invalid I/O block found. io_block: " << io_block
+                                        << " CowOperation-new-block: " << cow_op->new_block
+                                        << " compression-size: " << compression_size;
+                        return false;
+                    }
+
+                    // Cached copy of the previous iteration. Just retrieve the
+                    // data
+                    if (prev_op && prev_op->new_block == cow_op->new_block) {
+                        std::memcpy(buffer, (char*)decompressed_buffer_.get() + block_offset, size);
+                    } else {
+                        // Get the data from the disk based on the compression
+                        // size
+                        if (!ProcessReplaceOp(cow_op, decompressed_buffer_.get(),
+                                              compression_size)) {
+                            return false;
+                        }
+                        // Copy the data from the decompressed buffer relative
+                        // to the i/o block offset.
+                        std::memcpy(buffer, (char*)decompressed_buffer_.get() + block_offset, size);
+                        // Cache this CowOperation pointer for successive I/O
+                        // operation. Since the request is sequential and the
+                        // block is already decompressed, subsequest I/O blocks
+                        // can fetch the data directly from this decompressed
+                        // buffer.
+                        prev_op = cow_op;
+                    }
+                } else {
+                    // Block not found in map - which means this block was not
+                    // changed as per the OTA. Just route the I/O to the base
+                    // device.
+                    if (!ReadDataFromBaseDevice(sector, buffer, size)) {
+                        SNAP_LOG(ERROR) << "ReadDataFromBaseDevice failed";
+                        return false;
+                    }
+                }
                 ret = size;
             } else {
                 // We found the sector in mapping. Check the type of COW OP and
@@ -341,12 +442,50 @@
     return true;
 }
 
+bool ReadWorker::IsMappingPresent(const CowOperation* cow_op, loff_t requested_offset,
+                                  loff_t cow_op_offset) {
+    const bool replace_op = (cow_op->type() == kCowReplaceOp);
+    if (replace_op) {
+        size_t max_compressed_size = CowOpCompressionSize(cow_op, BLOCK_SZ);
+        if ((requested_offset >= cow_op_offset) &&
+            (requested_offset < (cow_op_offset + max_compressed_size))) {
+            return true;
+        }
+    }
+    return false;
+}
+
 int ReadWorker::ReadUnalignedSector(
         sector_t sector, size_t size,
         std::vector<std::pair<sector_t, const CowOperation*>>::iterator& it) {
     SNAP_LOG(DEBUG) << "ReadUnalignedSector: sector " << sector << " size: " << size
                     << " Aligned sector: " << it->first;
 
+    loff_t requested_offset = sector << SECTOR_SHIFT;
+    loff_t final_offset = (it->first) << SECTOR_SHIFT;
+
+    const CowOperation* cow_op = it->second;
+    if (IsMappingPresent(cow_op, requested_offset, final_offset)) {
+        size_t buffer_size = CowOpCompressionSize(cow_op, BLOCK_SZ);
+        uint8_t chunk[buffer_size];
+        // Read the entire decompressed buffer based on the block-size
+        if (!ProcessReplaceOp(cow_op, chunk, buffer_size)) {
+            return -1;
+        }
+        size_t skip_offset = (requested_offset - final_offset);
+        size_t write_sz = std::min(size, buffer_size - skip_offset);
+
+        auto buffer =
+                reinterpret_cast<uint8_t*>(block_server_->GetResponseBuffer(BLOCK_SZ, write_sz));
+        if (!buffer) {
+            SNAP_LOG(ERROR) << "ReadUnalignedSector failed to allocate buffer";
+            return -1;
+        }
+
+        std::memcpy(buffer, (char*)chunk + skip_offset, write_sz);
+        return write_sz;
+    }
+
     int num_sectors_skip = sector - it->first;
     size_t skip_size = num_sectors_skip << SECTOR_SHIFT;
     size_t write_size = std::min(size, BLOCK_SZ - skip_size);
@@ -445,8 +584,11 @@
 
     size_t remaining_size = size;
     int ret = 0;
+
+    const CowOperation* cow_op = it->second;
     if (!merge_complete && (requested_offset >= final_offset) &&
-        (requested_offset - final_offset) < BLOCK_SZ) {
+        (((requested_offset - final_offset) < BLOCK_SZ) ||
+         IsMappingPresent(cow_op, requested_offset, final_offset))) {
         // Read the partial un-aligned data
         ret = ReadUnalignedSector(sector, remaining_size, it);
         if (ret < 0) {
diff --git a/fs_mgr/libsnapshot/snapuserd/user-space-merge/read_worker.h b/fs_mgr/libsnapshot/snapuserd/user-space-merge/read_worker.h
index 1aff50c..04b2736 100644
--- a/fs_mgr/libsnapshot/snapuserd/user-space-merge/read_worker.h
+++ b/fs_mgr/libsnapshot/snapuserd/user-space-merge/read_worker.h
@@ -44,9 +44,12 @@
     bool ProcessXorOp(const CowOperation* cow_op, void* buffer);
     bool ProcessOrderedOp(const CowOperation* cow_op, void* buffer);
     bool ProcessCopyOp(const CowOperation* cow_op, void* buffer);
-    bool ProcessReplaceOp(const CowOperation* cow_op, void* buffer);
+    bool ProcessReplaceOp(const CowOperation* cow_op, void* buffer, size_t buffer_size);
     bool ProcessZeroOp(void* buffer);
 
+    bool IsMappingPresent(const CowOperation* cow_op, loff_t requested_offset,
+                          loff_t cow_op_offset);
+    bool GetCowOpBlockOffset(const CowOperation* cow_op, uint64_t io_block, off_t* block_offset);
     bool ReadAlignedSector(sector_t sector, size_t sz);
     bool ReadUnalignedSector(sector_t sector, size_t size);
     int ReadUnalignedSector(sector_t sector, size_t size,
@@ -56,6 +59,7 @@
 
     constexpr bool IsBlockAligned(size_t size) { return ((size & (BLOCK_SZ - 1)) == 0); }
     constexpr sector_t ChunkToSector(chunk_t chunk) { return chunk << CHUNK_SHIFT; }
+    constexpr chunk_t SectorToChunk(sector_t sector) { return sector >> CHUNK_SHIFT; }
 
     std::string backing_store_device_;
     unique_fd backing_store_fd_;
@@ -65,8 +69,9 @@
     std::shared_ptr<IBlockServerOpener> block_server_opener_;
     std::unique_ptr<IBlockServer> block_server_;
 
-    std::basic_string<uint8_t> xor_buffer_;
+    std::vector<uint8_t> xor_buffer_;
     std::unique_ptr<void, decltype(&::free)> aligned_buffer_;
+    std::unique_ptr<uint8_t[]> decompressed_buffer_;
 };
 
 }  // namespace snapshot
diff --git a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_test.cpp b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_test.cpp
index 8ddb0f4..76b44b4 100644
--- a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_test.cpp
+++ b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_test.cpp
@@ -64,6 +64,9 @@
 struct TestParam {
     bool io_uring;
     bool o_direct;
+    std::string compression;
+    int block_size;
+    int num_threads;
 };
 
 class SnapuserdTestBase : public ::testing::TestWithParam<TestParam> {
@@ -74,6 +77,7 @@
     void CreateCowDevice();
     void SetDeviceControlName();
     std::unique_ptr<ICowWriter> CreateCowDeviceInternal();
+    std::unique_ptr<ICowWriter> CreateV3Cow();
 
     std::unique_ptr<ITestHarness> harness_;
     size_t size_ = 10_MiB;
@@ -133,6 +137,24 @@
     return CreateCowWriter(kDefaultCowVersion, options, std::move(fd));
 }
 
+std::unique_ptr<ICowWriter> SnapuserdTestBase::CreateV3Cow() {
+    const TestParam params = GetParam();
+
+    CowOptions options;
+    options.op_count_max = 100000;
+    options.compression = params.compression;
+    options.num_compress_threads = params.num_threads;
+    options.batch_write = true;
+    options.compression_factor = params.block_size;
+
+    cow_system_ = std::make_unique<TemporaryFile>();
+
+    unique_fd fd(cow_system_->fd);
+    cow_system_->fd = -1;
+
+    return CreateCowWriter(3, options, std::move(fd));
+}
+
 void SnapuserdTestBase::CreateCowDevice() {
     unique_fd rnd_fd;
     loff_t offset = 0;
@@ -236,6 +258,7 @@
     void SetupOrderedOpsInverted();
     void SetupCopyOverlap_1();
     void SetupCopyOverlap_2();
+    void SetupDeviceForPassthrough();
     bool Merge();
     void ValidateMerge();
     void ReadSnapshotDeviceAndValidate();
@@ -258,6 +281,9 @@
 
     void SimulateDaemonRestart();
 
+    void CreateCowDeviceWithNoBlockChanges();
+    void ValidateDeviceWithNoBlockChanges();
+
     void CreateCowDeviceOrderedOps();
     void CreateCowDeviceOrderedOpsInverted();
     void CreateCowDeviceWithCopyOverlap_1();
@@ -307,6 +333,12 @@
     ASSERT_NO_FATAL_FAILURE(SetupDaemon());
 }
 
+void SnapuserdTest::SetupDeviceForPassthrough() {
+    ASSERT_NO_FATAL_FAILURE(CreateBaseDevice());
+    ASSERT_NO_FATAL_FAILURE(CreateCowDeviceWithNoBlockChanges());
+    ASSERT_NO_FATAL_FAILURE(SetupDaemon());
+}
+
 void SnapuserdTest::SetupOrderedOpsInverted() {
     ASSERT_NO_FATAL_FAILURE(CreateBaseDevice());
     ASSERT_NO_FATAL_FAILURE(CreateCowDeviceOrderedOpsInverted());
@@ -480,6 +512,47 @@
     }
 }
 
+void SnapuserdTest::CreateCowDeviceWithNoBlockChanges() {
+    auto writer = CreateCowDeviceInternal();
+    ASSERT_NE(writer, nullptr);
+
+    std::unique_ptr<uint8_t[]> buffer = std::make_unique<uint8_t[]>(BLOCK_SZ);
+    std::memset(buffer.get(), 'A', BLOCK_SZ);
+
+    // This test focusses on not changing all the blocks thereby validating
+    // the pass-through I/O
+
+    // Replace the first block
+    ASSERT_TRUE(writer->AddRawBlocks(1, buffer.get(), BLOCK_SZ));
+
+    // Set zero block of Block 3
+    ASSERT_TRUE(writer->AddZeroBlocks(3, 1));
+
+    ASSERT_TRUE(writer->Finalize());
+    orig_buffer_ = std::make_unique<uint8_t[]>(total_base_size_);
+
+    // Read the entire base device
+    ASSERT_EQ(android::base::ReadFullyAtOffset(base_fd_, orig_buffer_.get(), total_base_size_, 0),
+              true);
+
+    off_t offset = BLOCK_SZ;
+    std::memcpy(orig_buffer_.get() + offset, buffer.get(), BLOCK_SZ);
+    offset = 3 * BLOCK_SZ;
+    std::memset(orig_buffer_.get() + offset, 0, BLOCK_SZ);
+}
+
+void SnapuserdTest::ValidateDeviceWithNoBlockChanges() {
+    unique_fd fd(open(dmuser_dev_->GetPath().c_str(), O_RDONLY));
+    ASSERT_GE(fd, 0);
+    std::unique_ptr<uint8_t[]> snapshot_buffer = std::make_unique<uint8_t[]>(size_);
+    std::memset(snapshot_buffer.get(), 'B', size_);
+
+    // All the I/O request should be a pass through to base device except for
+    // Block 1 and Block 3.
+    ASSERT_EQ(ReadFullyAtOffset(fd, snapshot_buffer.get(), size_, 0), true);
+    ASSERT_EQ(memcmp(snapshot_buffer.get(), orig_buffer_.get(), size_), 0);
+}
+
 void SnapuserdTest::CreateCowDeviceWithCopyOverlap_1() {
     auto writer = CreateCowDeviceInternal();
     ASSERT_NE(writer, nullptr);
@@ -781,6 +854,20 @@
     ASSERT_TRUE(Merge());
 }
 
+TEST_P(SnapuserdTest, Snapshot_Passthrough) {
+    if (!harness_->HasUserDevice()) {
+        GTEST_SKIP() << "Skipping snapshot read; not supported";
+    }
+    ASSERT_NO_FATAL_FAILURE(SetupDeviceForPassthrough());
+    // I/O before merge
+    ASSERT_NO_FATAL_FAILURE(ValidateDeviceWithNoBlockChanges());
+    ASSERT_TRUE(Merge());
+    ValidateMerge();
+    // I/O after merge - daemon should read directly
+    // from base device
+    ASSERT_NO_FATAL_FAILURE(ValidateDeviceWithNoBlockChanges());
+}
+
 TEST_P(SnapuserdTest, Snapshot_IO_TEST) {
     if (!harness_->HasUserDevice()) {
         GTEST_SKIP() << "Skipping snapshot read; not supported";
@@ -853,7 +940,7 @@
         GTEST_SKIP() << "Skipping snapshot read; not supported";
     }
     ASSERT_NO_FATAL_FAILURE(SetupCopyOverlap_2());
-    ASSERT_NO_FATAL_FAILURE(MergeInterruptAndValidate(2));
+    ASSERT_NO_FATAL_FAILURE(MergeInterruptFixed(300));
     ValidateMerge();
 }
 
@@ -881,11 +968,243 @@
     ValidateMerge();
 }
 
+class SnapuserdVariableBlockSizeTest : public SnapuserdTest {
+  public:
+    void SetupCowV3ForVariableBlockSize();
+    void ReadSnapshotWithVariableBlockSize();
+
+  protected:
+    void SetUp() override;
+    void TearDown() override;
+
+    void CreateV3CowDeviceForVariableBlockSize();
+};
+
+void SnapuserdVariableBlockSizeTest::SetupCowV3ForVariableBlockSize() {
+    ASSERT_NO_FATAL_FAILURE(CreateBaseDevice());
+    ASSERT_NO_FATAL_FAILURE(CreateV3CowDeviceForVariableBlockSize());
+    ASSERT_NO_FATAL_FAILURE(SetupDaemon());
+}
+
+void SnapuserdVariableBlockSizeTest::CreateV3CowDeviceForVariableBlockSize() {
+    auto writer = CreateV3Cow();
+
+    size_t total_data_to_write = size_;
+
+    size_t total_blocks_to_write = total_data_to_write / BLOCK_SZ;
+    size_t num_blocks_per_op = total_blocks_to_write / 4;
+    size_t source_block = 0;
+
+    size_t seq_len = num_blocks_per_op;
+    uint32_t sequence[seq_len];
+    size_t xor_block_start = seq_len * 3;
+    for (size_t i = 0; i < seq_len; i++) {
+        sequence[i] = xor_block_start + i;
+    }
+    ASSERT_TRUE(writer->AddSequenceData(seq_len, sequence));
+
+    size_t total_replace_blocks = num_blocks_per_op;
+    // Write some data which can be compressed
+    std::string data;
+    data.resize(total_replace_blocks * BLOCK_SZ, '\0');
+    for (size_t i = 0; i < data.size(); i++) {
+        data[i] = static_cast<char>('A' + i / BLOCK_SZ);
+    }
+    // REPLACE ops
+    ASSERT_TRUE(writer->AddRawBlocks(source_block, data.data(), data.size()));
+
+    total_blocks_to_write -= total_replace_blocks;
+    source_block = source_block + total_replace_blocks;
+
+    // ZERO ops
+    size_t total_zero_blocks = total_blocks_to_write / 3;
+    ASSERT_TRUE(writer->AddZeroBlocks(source_block, total_zero_blocks));
+
+    total_blocks_to_write -= total_zero_blocks;
+    source_block = source_block + total_zero_blocks;
+
+    // Generate some random data wherein few blocks cannot be compressed.
+    // This is to test the I/O path for those blocks which aren't compressed.
+    size_t total_random_data_blocks = total_blocks_to_write / 2;
+    unique_fd rnd_fd(open("/dev/random", O_RDONLY));
+
+    ASSERT_GE(rnd_fd, 0);
+    std::string random_buffer;
+    random_buffer.resize(total_random_data_blocks * BLOCK_SZ, '\0');
+    ASSERT_EQ(
+            android::base::ReadFullyAtOffset(rnd_fd, random_buffer.data(), random_buffer.size(), 0),
+            true);
+    // REPLACE ops
+    ASSERT_TRUE(writer->AddRawBlocks(source_block, random_buffer.data(), random_buffer.size()));
+
+    total_blocks_to_write -= total_random_data_blocks;
+    source_block = source_block + total_random_data_blocks;
+
+    // XOR ops will always be 4k blocks
+    std::string xor_buffer;
+    xor_buffer.resize(total_blocks_to_write * BLOCK_SZ, '\0');
+    for (size_t i = 0; i < xor_buffer.size(); i++) {
+        xor_buffer[i] = static_cast<char>('C' + i / BLOCK_SZ);
+    }
+    size_t xor_offset = 21;
+    std::string source_buffer;
+    source_buffer.resize(total_blocks_to_write * BLOCK_SZ, '\0');
+    ASSERT_EQ(android::base::ReadFullyAtOffset(base_fd_, source_buffer.data(), source_buffer.size(),
+                                               size_ + xor_offset),
+              true);
+    for (size_t i = 0; i < xor_buffer.size(); i++) {
+        xor_buffer[i] ^= source_buffer[i];
+    }
+
+    ASSERT_EQ(xor_block_start, source_block);
+
+    ASSERT_TRUE(writer->AddXorBlocks(source_block, xor_buffer.data(), xor_buffer.size(),
+                                     (size_ / BLOCK_SZ), xor_offset));
+    // Flush operations
+    ASSERT_TRUE(writer->Finalize());
+
+    // Construct the buffer required for validation
+    orig_buffer_ = std::make_unique<uint8_t[]>(total_base_size_);
+
+    // Read the entire base device
+    ASSERT_EQ(android::base::ReadFullyAtOffset(base_fd_, orig_buffer_.get(), total_base_size_, 0),
+              true);
+
+    // REPLACE ops which are compressed
+    std::memcpy(orig_buffer_.get(), data.data(), data.size());
+    size_t offset = data.size();
+
+    // ZERO ops
+    std::string zero_buffer(total_zero_blocks * BLOCK_SZ, 0);
+    std::memcpy((char*)orig_buffer_.get() + offset, (void*)zero_buffer.c_str(), zero_buffer.size());
+    offset += zero_buffer.size();
+
+    // REPLACE ops - Random buffers which aren't compressed
+    std::memcpy((char*)orig_buffer_.get() + offset, random_buffer.c_str(), random_buffer.size());
+    offset += random_buffer.size();
+
+    // XOR Ops which default to 4k block size compression irrespective of
+    // compression factor
+    ASSERT_EQ(android::base::ReadFullyAtOffset(base_fd_, (char*)orig_buffer_.get() + offset,
+                                               xor_buffer.size(), size_ + xor_offset),
+              true);
+    for (size_t i = 0; i < xor_buffer.size(); i++) {
+        orig_buffer_.get()[offset + i] = (uint8_t)(orig_buffer_.get()[offset + i] ^ xor_buffer[i]);
+    }
+}
+
+void SnapuserdVariableBlockSizeTest::ReadSnapshotWithVariableBlockSize() {
+    unique_fd fd(open(dmuser_dev_->GetPath().c_str(), O_RDONLY | O_DIRECT));
+    ASSERT_GE(fd, 0);
+
+    void* addr;
+    ssize_t page_size = getpagesize();
+    ASSERT_EQ(posix_memalign(&addr, page_size, size_), 0);
+    std::unique_ptr<void, decltype(&::free)> snapshot_buffer(addr, ::free);
+
+    const TestParam params = GetParam();
+
+    // Issue I/O request with various block sizes
+    size_t num_blocks = size_ / params.block_size;
+    off_t offset = 0;
+    for (size_t i = 0; i < num_blocks; i++) {
+        ASSERT_EQ(ReadFullyAtOffset(fd, (char*)snapshot_buffer.get() + offset, params.block_size,
+                                    offset),
+                  true);
+        offset += params.block_size;
+    }
+    // Validate buffer
+    ASSERT_EQ(memcmp(snapshot_buffer.get(), orig_buffer_.get(), size_), 0);
+
+    // Reset the buffer
+    std::memset(snapshot_buffer.get(), 0, size_);
+
+    // Read one full chunk in a single shot and re-validate.
+    ASSERT_EQ(ReadFullyAtOffset(fd, snapshot_buffer.get(), size_, 0), true);
+    ASSERT_EQ(memcmp(snapshot_buffer.get(), orig_buffer_.get(), size_), 0);
+
+    // Reset the buffer
+    std::memset(snapshot_buffer.get(), 0, size_);
+
+    // Buffered I/O test
+    fd.reset(open(dmuser_dev_->GetPath().c_str(), O_RDONLY));
+    ASSERT_GE(fd, 0);
+
+    // Try not to cache
+    posix_fadvise(fd.get(), 0, size_, POSIX_FADV_DONTNEED);
+
+    size_t num_blocks_per_op = (size_ / BLOCK_SZ) / 4;
+    offset = num_blocks_per_op * BLOCK_SZ;
+    size_t read_size = 1019;  // bytes
+    offset -= 111;
+
+    // Issue a un-aligned read which crosses the boundary between a REPLACE block and a ZERO
+    // block.
+    ASSERT_EQ(ReadFullyAtOffset(fd, snapshot_buffer.get(), read_size, offset), true);
+
+    // Validate the data
+    ASSERT_EQ(std::memcmp(snapshot_buffer.get(), (char*)orig_buffer_.get() + offset, read_size), 0);
+
+    offset = (num_blocks_per_op * 3) * BLOCK_SZ;
+    offset -= (BLOCK_SZ - 119);
+    read_size = 8111;
+
+    // Issue an un-aligned read which crosses the boundary between a REPLACE block of random
+    // un-compressed data and a XOR block
+    ASSERT_EQ(ReadFullyAtOffset(fd, snapshot_buffer.get(), read_size, offset), true);
+
+    // Validate the data
+    ASSERT_EQ(std::memcmp(snapshot_buffer.get(), (char*)orig_buffer_.get() + offset, read_size), 0);
+
+    // Reset the buffer
+    std::memset(snapshot_buffer.get(), 0, size_);
+
+    // Read just one byte at an odd offset which is a REPLACE op
+    offset = 19;
+    read_size = 1;
+    ASSERT_EQ(ReadFullyAtOffset(fd, snapshot_buffer.get(), read_size, offset), true);
+    // Validate the data
+    ASSERT_EQ(std::memcmp(snapshot_buffer.get(), (char*)orig_buffer_.get() + offset, read_size), 0);
+
+    // Reset the buffer
+    std::memset(snapshot_buffer.get(), 0, size_);
+
+    // Read a block which has no mapping to a COW operation. This read should be
+    // a pass-through to the underlying base device.
+    offset = size_ + 9342;
+    read_size = 30;
+    ASSERT_EQ(ReadFullyAtOffset(fd, snapshot_buffer.get(), read_size, offset), true);
+    // Validate the data
+    ASSERT_EQ(std::memcmp(snapshot_buffer.get(), (char*)orig_buffer_.get() + offset, read_size), 0);
+}
+
+void SnapuserdVariableBlockSizeTest::SetUp() {
+    ASSERT_NO_FATAL_FAILURE(SnapuserdTest::SetUp());
+}
+
+void SnapuserdVariableBlockSizeTest::TearDown() {
+    SnapuserdTest::TearDown();
+}
+
+TEST_P(SnapuserdVariableBlockSizeTest, Snapshot_Test_Variable_Block_Size) {
+    if (!harness_->HasUserDevice()) {
+        GTEST_SKIP() << "Skipping snapshot read; not supported";
+    }
+    ASSERT_NO_FATAL_FAILURE(SetupCowV3ForVariableBlockSize());
+    ASSERT_NO_FATAL_FAILURE(ReadSnapshotWithVariableBlockSize());
+    ASSERT_TRUE(StartMerge());
+    CheckMergeCompletion();
+    ValidateMerge();
+    ASSERT_NO_FATAL_FAILURE(ReadSnapshotWithVariableBlockSize());
+}
+
 class HandlerTest : public SnapuserdTestBase {
   protected:
     void SetUp() override;
     void TearDown() override;
 
+    void SetUpV2Cow();
+    void InitializeDevice();
     AssertionResult ReadSectors(sector_t sector, uint64_t size, void* buffer);
 
     TestBlockServerFactory factory_;
@@ -896,10 +1215,11 @@
     std::future<bool> handler_thread_;
 };
 
-void HandlerTest::SetUp() {
-    ASSERT_NO_FATAL_FAILURE(SnapuserdTestBase::SetUp());
-    ASSERT_NO_FATAL_FAILURE(CreateBaseDevice());
+void HandlerTest::SetUpV2Cow() {
     ASSERT_NO_FATAL_FAILURE(CreateCowDevice());
+}
+
+void HandlerTest::InitializeDevice() {
     ASSERT_NO_FATAL_FAILURE(SetDeviceControlName());
 
     opener_ = factory_.CreateTestOpener(system_device_ctrl_name_);
@@ -921,6 +1241,13 @@
     handler_thread_ = std::async(std::launch::async, &SnapshotHandler::Start, handler_.get());
 }
 
+void HandlerTest::SetUp() {
+    ASSERT_NO_FATAL_FAILURE(SnapuserdTestBase::SetUp());
+    ASSERT_NO_FATAL_FAILURE(CreateBaseDevice());
+    ASSERT_NO_FATAL_FAILURE(SetUpV2Cow());
+    ASSERT_NO_FATAL_FAILURE(InitializeDevice());
+}
+
 void HandlerTest::TearDown() {
     ASSERT_TRUE(factory_.DeleteQueue(system_device_ctrl_name_));
     ASSERT_TRUE(handler_thread_.get());
@@ -986,6 +1313,147 @@
     ASSERT_EQ(memcmp(snapuserd_buffer.get(), orig_buffer_.get(), SECTOR_SIZE), 0);
 }
 
+class HandlerTestV3 : public HandlerTest {
+  public:
+    void ReadSnapshotWithVariableBlockSize();
+
+  protected:
+    void SetUp() override;
+    void TearDown() override;
+    void SetUpV3Cow();
+};
+
+void HandlerTestV3::SetUp() {
+    ASSERT_NO_FATAL_FAILURE(SnapuserdTestBase::SetUp());
+    ASSERT_NO_FATAL_FAILURE(CreateBaseDevice());
+    ASSERT_NO_FATAL_FAILURE(SetUpV3Cow());
+    ASSERT_NO_FATAL_FAILURE(InitializeDevice());
+}
+
+void HandlerTestV3::TearDown() {
+    ASSERT_NO_FATAL_FAILURE(HandlerTest::TearDown());
+}
+
+void HandlerTestV3::SetUpV3Cow() {
+    auto writer = CreateV3Cow();
+
+    size_t total_data_to_write = size_;
+
+    size_t total_blocks_to_write = total_data_to_write / BLOCK_SZ;
+    size_t num_blocks_per_op = total_blocks_to_write / 4;
+    size_t source_block = 0;
+
+    size_t total_replace_blocks = num_blocks_per_op;
+    // Write some data which can be compressed
+    std::string data;
+    data.resize(total_replace_blocks * BLOCK_SZ, '\0');
+    for (size_t i = 0; i < data.size(); i++) {
+        data[i] = static_cast<char>('A' + i / BLOCK_SZ);
+    }
+    // REPLACE ops
+    ASSERT_TRUE(writer->AddRawBlocks(source_block, data.data(), data.size()));
+
+    total_blocks_to_write -= total_replace_blocks;
+    source_block = source_block + total_replace_blocks;
+
+    // ZERO ops
+    size_t total_zero_blocks = total_blocks_to_write / 3;
+    ASSERT_TRUE(writer->AddZeroBlocks(source_block, total_zero_blocks));
+
+    total_blocks_to_write -= total_zero_blocks;
+    source_block = source_block + total_zero_blocks;
+
+    // Generate some random data wherein few blocks cannot be compressed.
+    // This is to test the I/O path for those blocks which aren't compressed.
+    size_t total_random_data_blocks = total_blocks_to_write;
+    unique_fd rnd_fd(open("/dev/random", O_RDONLY));
+
+    ASSERT_GE(rnd_fd, 0);
+    std::string random_buffer;
+    random_buffer.resize(total_random_data_blocks * BLOCK_SZ, '\0');
+    ASSERT_EQ(
+            android::base::ReadFullyAtOffset(rnd_fd, random_buffer.data(), random_buffer.size(), 0),
+            true);
+    // REPLACE ops
+    ASSERT_TRUE(writer->AddRawBlocks(source_block, random_buffer.data(), random_buffer.size()));
+    // Flush operations
+    ASSERT_TRUE(writer->Finalize());
+
+    // Construct the buffer required for validation
+    orig_buffer_ = std::make_unique<uint8_t[]>(total_base_size_);
+
+    // Read the entire base device
+    ASSERT_EQ(android::base::ReadFullyAtOffset(base_fd_, orig_buffer_.get(), total_base_size_, 0),
+              true);
+
+    // REPLACE ops which are compressed
+    std::memcpy(orig_buffer_.get(), data.data(), data.size());
+    size_t offset = data.size();
+
+    // ZERO ops
+    std::string zero_buffer(total_zero_blocks * BLOCK_SZ, 0);
+    std::memcpy((char*)orig_buffer_.get() + offset, (void*)zero_buffer.c_str(), zero_buffer.size());
+    offset += zero_buffer.size();
+
+    // REPLACE ops - Random buffers which aren't compressed
+    std::memcpy((char*)orig_buffer_.get() + offset, random_buffer.c_str(), random_buffer.size());
+}
+
+TEST_P(HandlerTestV3, Read) {
+    std::unique_ptr<uint8_t[]> snapuserd_buffer = std::make_unique<uint8_t[]>(size_);
+
+    size_t read_size = SECTOR_SIZE;
+    off_t offset = 0;
+    // Read the first sector
+    ASSERT_TRUE(ReadSectors(1, read_size, snapuserd_buffer.get()));
+    // Validate the data
+    ASSERT_EQ(std::memcmp(snapuserd_buffer.get(), orig_buffer_.get(), read_size), 0);
+
+    // Read the second block at offset 7680 (Sector 15). This will map to the
+    // first COW operation for variable block size
+    offset += (((BLOCK_SZ * 2) - SECTOR_SIZE));
+    read_size = BLOCK_SZ;  // Span across two REPLACE ops
+    ASSERT_TRUE(ReadSectors(offset / SECTOR_SIZE, read_size, snapuserd_buffer.get()));
+    // Validate the data
+    ASSERT_EQ(std::memcmp(snapuserd_buffer.get(), (char*)orig_buffer_.get() + offset, read_size),
+              0);
+
+    // Fill some other data since we are going to read zero blocks
+    std::memset(snapuserd_buffer.get(), 'Z', size_);
+
+    size_t num_blocks_per_op = (size_ / BLOCK_SZ) / 4;
+    offset = num_blocks_per_op * BLOCK_SZ;
+    // Issue read spanning between a REPLACE op and ZERO ops. The starting point
+    // is the last REPLACE op at sector 5118
+    offset -= (SECTOR_SIZE * 2);
+    // This will make sure it falls back to aligned reads after reading the
+    // first unaligned block
+    read_size = BLOCK_SZ * 6;
+    ASSERT_TRUE(ReadSectors(offset / SECTOR_SIZE, read_size, snapuserd_buffer.get()));
+    // Validate the data
+    ASSERT_EQ(std::memcmp(snapuserd_buffer.get(), (char*)orig_buffer_.get() + offset, read_size),
+              0);
+
+    // Issue I/O request at the last block. The first chunk of (SECTOR_SIZE * 2)
+    // will be from REPLACE op which has random buffers
+    offset = (size_ - (SECTOR_SIZE * 2));
+    // Request will span beyond the COW mapping, thereby fetching data from base
+    // device.
+    read_size = BLOCK_SZ * 8;
+    ASSERT_TRUE(ReadSectors(offset / SECTOR_SIZE, read_size, snapuserd_buffer.get()));
+    // Validate the data
+    ASSERT_EQ(std::memcmp(snapuserd_buffer.get(), (char*)orig_buffer_.get() + offset, read_size),
+              0);
+
+    // Issue I/O request which are not mapped to any COW operations
+    offset = (size_ + (SECTOR_SIZE * 3));
+    read_size = BLOCK_SZ * 3;
+    ASSERT_TRUE(ReadSectors(offset / SECTOR_SIZE, read_size, snapuserd_buffer.get()));
+    // Validate the data
+    ASSERT_EQ(std::memcmp(snapuserd_buffer.get(), (char*)orig_buffer_.get() + offset, read_size),
+              0);
+}
+
 std::vector<bool> GetIoUringConfigs() {
 #if __ANDROID__
     if (!android::base::GetBoolProperty("ro.virtual_ab.io_uring.enabled", false)) {
@@ -1018,6 +1486,37 @@
     return testParams;
 }
 
+std::vector<TestParam> GetVariableBlockTestConfigs() {
+    std::vector<TestParam> testParams;
+
+    std::vector<int> block_sizes = {4096, 8192, 16384, 32768, 65536, 131072};
+    std::vector<std::string> compression_algo = {"none", "lz4", "zstd", "gz"};
+    std::vector<int> threads = {1, 2};
+    std::vector<bool> uring_configs = GetIoUringConfigs();
+
+    // This should test 96 combination and validates the I/O path
+    for (auto block : block_sizes) {
+        for (auto compression : compression_algo) {
+            for (auto thread : threads) {
+                for (auto io_uring : uring_configs) {
+                    TestParam param;
+                    param.block_size = block;
+                    param.compression = compression;
+                    param.num_threads = thread;
+                    param.io_uring = io_uring;
+                    param.o_direct = false;
+                    testParams.push_back(std::move(param));
+                }
+            }
+        }
+    }
+
+    return testParams;
+}
+
+INSTANTIATE_TEST_SUITE_P(Io, SnapuserdVariableBlockSizeTest,
+                         ::testing::ValuesIn(GetVariableBlockTestConfigs()));
+INSTANTIATE_TEST_SUITE_P(Io, HandlerTestV3, ::testing::ValuesIn(GetVariableBlockTestConfigs()));
 INSTANTIATE_TEST_SUITE_P(Io, SnapuserdTest, ::testing::ValuesIn(GetTestConfigs()));
 INSTANTIATE_TEST_SUITE_P(Io, HandlerTest, ::testing::ValuesIn(GetTestConfigs()));
 
diff --git a/fs_mgr/libsnapshot/tools/cow_benchmark.cpp b/fs_mgr/libsnapshot/tools/cow_benchmark.cpp
index 4d5e346..fb463c8 100644
--- a/fs_mgr/libsnapshot/tools/cow_benchmark.cpp
+++ b/fs_mgr/libsnapshot/tools/cow_benchmark.cpp
@@ -78,7 +78,7 @@
 
     for (size_t i = 0; i < compressors.size(); i++) {
         const auto start = std::chrono::steady_clock::now();
-        std::basic_string<uint8_t> compressed_data =
+        std::vector<uint8_t> compressed_data =
                 compressors[i]->Compress(buffer.data(), buffer.size());
         const auto end = std::chrono::steady_clock::now();
         const auto latency =
@@ -141,13 +141,13 @@
     std::vector<std::pair<double, std::string>> ratios;
 
     for (size_t i = 0; i < compressors.size(); i++) {
-        std::vector<std::basic_string<uint8_t>> compressed_data_vec;
+        std::vector<std::vector<uint8_t>> compressed_data_vec;
         int num_blocks = buffer.size() / BLOCK_SZ;
         const uint8_t* iter = reinterpret_cast<const uint8_t*>(buffer.data());
 
         const auto start = std::chrono::steady_clock::now();
         while (num_blocks > 0) {
-            std::basic_string<uint8_t> compressed_data = compressors[i]->Compress(iter, BLOCK_SZ);
+            std::vector<uint8_t> compressed_data = compressors[i]->Compress(iter, BLOCK_SZ);
             compressed_data_vec.emplace_back(compressed_data);
             num_blocks--;
             iter += BLOCK_SZ;
diff --git a/healthd/BatteryMonitor.cpp b/healthd/BatteryMonitor.cpp
index b8bb586..fbdf5fe 100644
--- a/healthd/BatteryMonitor.cpp
+++ b/healthd/BatteryMonitor.cpp
@@ -530,7 +530,7 @@
              props.chargerAcOnline ? "a" : "", props.chargerUsbOnline ? "u" : "",
              props.chargerWirelessOnline ? "w" : "", props.chargerDockOnline ? "d" : "");
 
-    KLOG_WARNING(LOG_TAG, "%s\n", dmesgline);
+    KLOG_DEBUG(LOG_TAG, "%s\n", dmesgline);
 }
 
 void BatteryMonitor::logValues(const HealthInfo_2_1& health_info,
diff --git a/init/Android.bp b/init/Android.bp
index a7278d6..181de2e 100644
--- a/init/Android.bp
+++ b/init/Android.bp
@@ -475,6 +475,7 @@
 // ------------------------------------------------------------------------------
 
 cc_test {
+    // Note: This is NOT a CTS test. See b/320800872
     name: "CtsInitTestCases",
     defaults: ["init_defaults"],
     require_root: true,
@@ -507,7 +508,6 @@
     ],
 
     test_suites: [
-        "cts",
         "device-tests",
     ],
 }
diff --git a/libcutils/include/private/android_filesystem_config.h b/libcutils/include/private/android_filesystem_config.h
index 8c6e548..ea61cc2 100644
--- a/libcutils/include/private/android_filesystem_config.h
+++ b/libcutils/include/private/android_filesystem_config.h
@@ -170,6 +170,7 @@
 #define AID_WAKELOCK 3010     /* Allow system wakelock read/write access */
 #define AID_UHID 3011         /* Allow read/write to /dev/uhid node */
 #define AID_READTRACEFS 3012  /* Allow tracefs read */
+#define AID_VIRTUALMACHINE 3013 /* Allows VMs to tune for performance*/
 
 /* The range 5000-5999 is also reserved for vendor partition. */
 #define AID_OEM_RESERVED_2_START 5000
diff --git a/libprocessgroup/TEST_MAPPING b/libprocessgroup/TEST_MAPPING
new file mode 100644
index 0000000..29a9ff0
--- /dev/null
+++ b/libprocessgroup/TEST_MAPPING
@@ -0,0 +1,7 @@
+{
+  "postsubmit": [
+    {
+      "name": "StagedRollbackTest"
+    }
+  ]
+}
diff --git a/libprocessgroup/cgroup_map.cpp b/libprocessgroup/cgroup_map.cpp
index ce7f10b..c8ae216 100644
--- a/libprocessgroup/cgroup_map.cpp
+++ b/libprocessgroup/cgroup_map.cpp
@@ -104,7 +104,7 @@
     return proc_path.append(CGROUP_PROCS_FILE);
 }
 
-bool CgroupController::GetTaskGroup(int tid, std::string* group) const {
+bool CgroupController::GetTaskGroup(pid_t tid, std::string* group) const {
     std::string file_name = StringPrintf("/proc/%d/cgroup", tid);
     std::string content;
     if (!android::base::ReadFileToString(file_name, &content)) {
diff --git a/libprocessgroup/cgroup_map.h b/libprocessgroup/cgroup_map.h
index 5cdf8b2..5c6d3e2 100644
--- a/libprocessgroup/cgroup_map.h
+++ b/libprocessgroup/cgroup_map.h
@@ -43,7 +43,8 @@
 
     std::string GetTasksFilePath(const std::string& path) const;
     std::string GetProcsFilePath(const std::string& path, uid_t uid, pid_t pid) const;
-    bool GetTaskGroup(int tid, std::string* group) const;
+    bool GetTaskGroup(pid_t tid, std::string* group) const;
+
   private:
     enum ControllerState {
         UNKNOWN = 0,
diff --git a/libprocessgroup/include/processgroup/processgroup.h b/libprocessgroup/include/processgroup/processgroup.h
index ca6868c..ffffeb4 100644
--- a/libprocessgroup/include/processgroup/processgroup.h
+++ b/libprocessgroup/include/processgroup/processgroup.h
@@ -33,19 +33,20 @@
 bool CgroupGetControllerPath(const std::string& cgroup_name, std::string* path);
 bool CgroupGetControllerFromPath(const std::string& path, std::string* cgroup_name);
 bool CgroupGetAttributePath(const std::string& attr_name, std::string* path);
-bool CgroupGetAttributePathForTask(const std::string& attr_name, int tid, std::string* path);
+bool CgroupGetAttributePathForTask(const std::string& attr_name, pid_t tid, std::string* path);
 
-bool SetTaskProfiles(int tid, const std::vector<std::string>& profiles, bool use_fd_cache = false);
+bool SetTaskProfiles(pid_t tid, const std::vector<std::string>& profiles,
+                     bool use_fd_cache = false);
 bool SetProcessProfiles(uid_t uid, pid_t pid, const std::vector<std::string>& profiles);
 bool SetUserProfiles(uid_t uid, const std::vector<std::string>& profiles);
 
 __END_DECLS
 
-bool SetTaskProfiles(int tid, std::initializer_list<std::string_view> profiles,
+bool SetTaskProfiles(pid_t tid, std::initializer_list<std::string_view> profiles,
                      bool use_fd_cache = false);
 bool SetProcessProfiles(uid_t uid, pid_t pid, std::initializer_list<std::string_view> profiles);
 #if _LIBCPP_STD_VER > 17
-bool SetTaskProfiles(int tid, std::span<const std::string_view> profiles,
+bool SetTaskProfiles(pid_t tid, std::span<const std::string_view> profiles,
                      bool use_fd_cache = false);
 bool SetProcessProfiles(uid_t uid, pid_t pid, std::span<const std::string_view> profiles);
 #endif
@@ -67,35 +68,35 @@
 
 // Return 0 if all processes were killed and the cgroup was successfully removed.
 // Returns -1 in the case of an error occurring or if there are processes still running.
-int killProcessGroup(uid_t uid, int initialPid, int signal);
+int killProcessGroup(uid_t uid, pid_t initialPid, int signal);
 
 // Returns the same as killProcessGroup(), however it does not retry, which means
 // that it only returns 0 in the case that the cgroup exists and it contains no processes.
-int killProcessGroupOnce(uid_t uid, int initialPid, int signal);
+int killProcessGroupOnce(uid_t uid, pid_t initialPid, int signal);
 
 // Sends the provided signal to all members of a process group, but does not wait for processes to
 // exit, or for the cgroup to be removed. Callers should also ensure that killProcessGroup is called
 // later to ensure the cgroup is fully removed, otherwise system resources will leak.
 // Returns true if no errors are encountered sending signals, otherwise false.
-bool sendSignalToProcessGroup(uid_t uid, int initialPid, int signal);
+bool sendSignalToProcessGroup(uid_t uid, pid_t initialPid, int signal);
 
-int createProcessGroup(uid_t uid, int initialPid, bool memControl = false);
+int createProcessGroup(uid_t uid, pid_t initialPid, bool memControl = false);
 
 // Set various properties of a process group. For these functions to work, the process group must
 // have been created by passing memControl=true to createProcessGroup.
-bool setProcessGroupSwappiness(uid_t uid, int initialPid, int swappiness);
-bool setProcessGroupSoftLimit(uid_t uid, int initialPid, int64_t softLimitInBytes);
-bool setProcessGroupLimit(uid_t uid, int initialPid, int64_t limitInBytes);
+bool setProcessGroupSwappiness(uid_t uid, pid_t initialPid, int swappiness);
+bool setProcessGroupSoftLimit(uid_t uid, pid_t initialPid, int64_t softLimitInBytes);
+bool setProcessGroupLimit(uid_t uid, pid_t initialPid, int64_t limitInBytes);
 
 void removeAllEmptyProcessGroups(void);
 
 // Provides the path for an attribute in a specific process group
 // Returns false in case of error, true in case of success
-bool getAttributePathForTask(const std::string& attr_name, int tid, std::string* path);
+bool getAttributePathForTask(const std::string& attr_name, pid_t tid, std::string* path);
 
 // Check if a profile can be applied without failing.
 // Returns true if it can be applied without failing, false otherwise
-bool isProfileValidForProcess(const std::string& profile_name, int uid, int pid);
+bool isProfileValidForProcess(const std::string& profile_name, uid_t uid, pid_t pid);
 
 #endif // __ANDROID_VNDK__
 
diff --git a/libprocessgroup/processgroup.cpp b/libprocessgroup/processgroup.cpp
index 3209adf..94d9502 100644
--- a/libprocessgroup/processgroup.cpp
+++ b/libprocessgroup/processgroup.cpp
@@ -82,7 +82,7 @@
     return StringPrintf("%s/uid_%u", cgroup, uid);
 }
 
-static std::string ConvertUidPidToPath(const char* cgroup, uid_t uid, int pid) {
+static std::string ConvertUidPidToPath(const char* cgroup, uid_t uid, pid_t pid) {
     return StringPrintf("%s/uid_%u/pid_%d", cgroup, uid, pid);
 }
 
@@ -147,7 +147,7 @@
     return true;
 }
 
-bool CgroupGetAttributePathForTask(const std::string& attr_name, int tid, std::string* path) {
+bool CgroupGetAttributePathForTask(const std::string& attr_name, pid_t tid, std::string* path) {
     const TaskProfiles& tp = TaskProfiles::GetInstance();
     const IProfileAttribute* attr = tp.GetAttribute(attr_name);
 
@@ -198,17 +198,18 @@
             uid, pid, std::span<const std::string>(profiles), true);
 }
 
-bool SetTaskProfiles(int tid, const std::vector<std::string>& profiles, bool use_fd_cache) {
+bool SetTaskProfiles(pid_t tid, const std::vector<std::string>& profiles, bool use_fd_cache) {
     return TaskProfiles::GetInstance().SetTaskProfiles(tid, std::span<const std::string>(profiles),
                                                        use_fd_cache);
 }
 
-bool SetTaskProfiles(int tid, std::initializer_list<std::string_view> profiles, bool use_fd_cache) {
+bool SetTaskProfiles(pid_t tid, std::initializer_list<std::string_view> profiles,
+                     bool use_fd_cache) {
     return TaskProfiles::GetInstance().SetTaskProfiles(
             tid, std::span<const std::string_view>(profiles), use_fd_cache);
 }
 
-bool SetTaskProfiles(int tid, std::span<const std::string_view> profiles, bool use_fd_cache) {
+bool SetTaskProfiles(pid_t tid, std::span<const std::string_view> profiles, bool use_fd_cache) {
     return TaskProfiles::GetInstance().SetTaskProfiles(tid, profiles, use_fd_cache);
 }
 
@@ -232,7 +233,7 @@
                                                        false);
 }
 
-static int RemoveCgroup(const char* cgroup, uid_t uid, int pid) {
+static int RemoveCgroup(const char* cgroup, uid_t uid, pid_t pid) {
     auto path = ConvertUidPidToPath(cgroup, uid, pid);
     int ret = TEMP_FAILURE_RETRY(rmdir(path.c_str()));
 
@@ -370,7 +371,7 @@
     return false;
 }
 
-bool sendSignalToProcessGroup(uid_t uid, int initialPid, int signal) {
+bool sendSignalToProcessGroup(uid_t uid, pid_t initialPid, int signal) {
     std::set<pid_t> pgids, pids;
 
     if (CgroupsAvailable()) {
@@ -525,7 +526,7 @@
 // implementation of this function. The default retry value was 40 for killing and 400 for cgroup
 // removal with 5ms sleeps between each retry.
 static int KillProcessGroup(
-        uid_t uid, int initialPid, int signal, bool once = false,
+        uid_t uid, pid_t initialPid, int signal, bool once = false,
         std::chrono::steady_clock::time_point until = std::chrono::steady_clock::now() + 2200ms) {
     CHECK_GE(uid, 0);
     CHECK_GT(initialPid, 0);
@@ -632,15 +633,15 @@
     return ret;
 }
 
-int killProcessGroup(uid_t uid, int initialPid, int signal) {
+int killProcessGroup(uid_t uid, pid_t initialPid, int signal) {
     return KillProcessGroup(uid, initialPid, signal);
 }
 
-int killProcessGroupOnce(uid_t uid, int initialPid, int signal) {
+int killProcessGroupOnce(uid_t uid, pid_t initialPid, int signal) {
     return KillProcessGroup(uid, initialPid, signal, true);
 }
 
-static int createProcessGroupInternal(uid_t uid, int initialPid, std::string cgroup,
+static int createProcessGroupInternal(uid_t uid, pid_t initialPid, std::string cgroup,
                                       bool activate_controllers) {
     auto uid_path = ConvertUidToPath(cgroup.c_str(), uid);
 
@@ -687,7 +688,7 @@
     return ret;
 }
 
-int createProcessGroup(uid_t uid, int initialPid, bool memControl) {
+int createProcessGroup(uid_t uid, pid_t initialPid, bool memControl) {
     CHECK_GE(uid, 0);
     CHECK_GT(initialPid, 0);
 
@@ -712,7 +713,7 @@
     return createProcessGroupInternal(uid, initialPid, cgroup, true);
 }
 
-static bool SetProcessGroupValue(int tid, const std::string& attr_name, int64_t value) {
+static bool SetProcessGroupValue(pid_t tid, const std::string& attr_name, int64_t value) {
     if (!isMemoryCgroupSupported()) {
         LOG(ERROR) << "Memcg is not mounted.";
         return false;
@@ -731,23 +732,23 @@
     return true;
 }
 
-bool setProcessGroupSwappiness(uid_t, int pid, int swappiness) {
+bool setProcessGroupSwappiness(uid_t, pid_t pid, int swappiness) {
     return SetProcessGroupValue(pid, "MemSwappiness", swappiness);
 }
 
-bool setProcessGroupSoftLimit(uid_t, int pid, int64_t soft_limit_in_bytes) {
+bool setProcessGroupSoftLimit(uid_t, pid_t pid, int64_t soft_limit_in_bytes) {
     return SetProcessGroupValue(pid, "MemSoftLimit", soft_limit_in_bytes);
 }
 
-bool setProcessGroupLimit(uid_t, int pid, int64_t limit_in_bytes) {
+bool setProcessGroupLimit(uid_t, pid_t pid, int64_t limit_in_bytes) {
     return SetProcessGroupValue(pid, "MemLimit", limit_in_bytes);
 }
 
-bool getAttributePathForTask(const std::string& attr_name, int tid, std::string* path) {
+bool getAttributePathForTask(const std::string& attr_name, pid_t tid, std::string* path) {
     return CgroupGetAttributePathForTask(attr_name, tid, path);
 }
 
-bool isProfileValidForProcess(const std::string& profile_name, int uid, int pid) {
+bool isProfileValidForProcess(const std::string& profile_name, uid_t uid, pid_t pid) {
     const TaskProfile* tp = TaskProfiles::GetInstance().GetProfile(profile_name);
 
     if (tp == nullptr) {
diff --git a/libprocessgroup/sched_policy.cpp b/libprocessgroup/sched_policy.cpp
index 169b1d3..1005b1e 100644
--- a/libprocessgroup/sched_policy.cpp
+++ b/libprocessgroup/sched_policy.cpp
@@ -38,7 +38,7 @@
 
 #if defined(__ANDROID__)
 
-int set_cpuset_policy(int tid, SchedPolicy policy) {
+int set_cpuset_policy(pid_t tid, SchedPolicy policy) {
     if (tid == 0) {
         tid = GetThreadId();
     }
@@ -64,7 +64,7 @@
     return 0;
 }
 
-int set_sched_policy(int tid, SchedPolicy policy) {
+int set_sched_policy(pid_t tid, SchedPolicy policy) {
     if (tid == 0) {
         tid = GetThreadId();
     }
@@ -154,7 +154,7 @@
     return enabled;
 }
 
-static int getCGroupSubsys(int tid, const char* subsys, std::string& subgroup) {
+static int getCGroupSubsys(pid_t tid, const char* subsys, std::string& subgroup) {
     auto controller = CgroupMap::GetInstance().FindController(subsys);
 
     if (!controller.IsUsable()) return -1;
@@ -185,7 +185,7 @@
     return 0;
 }
 
-int get_sched_policy(int tid, SchedPolicy* policy) {
+int get_sched_policy(pid_t tid, SchedPolicy* policy) {
     if (tid == 0) {
         tid = GetThreadId();
     }
diff --git a/libprocessgroup/task_profiles.cpp b/libprocessgroup/task_profiles.cpp
index d5bd47c..2353cf1 100644
--- a/libprocessgroup/task_profiles.cpp
+++ b/libprocessgroup/task_profiles.cpp
@@ -136,7 +136,7 @@
     return GetPathForTask(pid, path);
 }
 
-bool ProfileAttribute::GetPathForTask(int tid, std::string* path) const {
+bool ProfileAttribute::GetPathForTask(pid_t tid, std::string* path) const {
     std::string subgroup;
     if (!controller()->GetTaskGroup(tid, &subgroup)) {
         return false;
@@ -179,13 +179,13 @@
 // To avoid issues in sdk_mac build
 #if defined(__ANDROID__)
 
-bool SetTimerSlackAction::IsTimerSlackSupported(int tid) {
+bool SetTimerSlackAction::IsTimerSlackSupported(pid_t tid) {
     auto file = StringPrintf("/proc/%d/timerslack_ns", tid);
 
     return (access(file.c_str(), W_OK) == 0);
 }
 
-bool SetTimerSlackAction::ExecuteForTask(int tid) const {
+bool SetTimerSlackAction::ExecuteForTask(pid_t tid) const {
     static bool sys_supports_timerslack = IsTimerSlackSupported(tid);
 
     // v4.6+ kernels support the /proc/<tid>/timerslack_ns interface.
@@ -250,7 +250,7 @@
     return WriteValueToFile(path);
 }
 
-bool SetAttributeAction::ExecuteForTask(int tid) const {
+bool SetAttributeAction::ExecuteForTask(pid_t tid) const {
     std::string path;
 
     if (!attribute_->GetPathForTask(tid, &path)) {
@@ -288,7 +288,7 @@
     return IsValidForTask(pid);
 }
 
-bool SetAttributeAction::IsValidForTask(int tid) const {
+bool SetAttributeAction::IsValidForTask(pid_t tid) const {
     std::string path;
 
     if (!attribute_->GetPathForTask(tid, &path)) {
@@ -316,7 +316,7 @@
     FdCacheHelper::Init(controller_.GetProcsFilePath(path_, 0, 0), fd_[ProfileAction::RCT_PROCESS]);
 }
 
-bool SetCgroupAction::AddTidToCgroup(int tid, int fd, ResourceCacheType cache_type) const {
+bool SetCgroupAction::AddTidToCgroup(pid_t tid, int fd, ResourceCacheType cache_type) const {
     if (tid <= 0) {
         return true;
     }
@@ -401,7 +401,7 @@
     return true;
 }
 
-bool SetCgroupAction::ExecuteForTask(int tid) const {
+bool SetCgroupAction::ExecuteForTask(pid_t tid) const {
     CacheUseResult result = UseCachedFd(ProfileAction::RCT_TASK, tid);
     if (result != ProfileAction::UNUSED) {
         return result == ProfileAction::SUCCESS;
@@ -489,7 +489,7 @@
 }
 
 bool WriteFileAction::WriteValueToFile(const std::string& value_, ResourceCacheType cache_type,
-                                       int uid, int pid, bool logfailures) const {
+                                       uid_t uid, pid_t pid, bool logfailures) const {
     std::string value(value_);
 
     value = StringReplace(value, "<uid>", std::to_string(uid), true);
@@ -564,7 +564,7 @@
     DIR* d;
     struct dirent* de;
     char proc_path[255];
-    int t_pid;
+    pid_t t_pid;
 
     sprintf(proc_path, "/proc/%d/task", pid);
     if (!(d = opendir(proc_path))) {
@@ -590,7 +590,7 @@
     return true;
 }
 
-bool WriteFileAction::ExecuteForTask(int tid) const {
+bool WriteFileAction::ExecuteForTask(pid_t tid) const {
     return WriteValueToFile(value_, ProfileAction::RCT_TASK, getuid(), tid, logfailures_);
 }
 
@@ -655,7 +655,7 @@
     return true;
 }
 
-bool ApplyProfileAction::ExecuteForTask(int tid) const {
+bool ApplyProfileAction::ExecuteForTask(pid_t tid) const {
     for (const auto& profile : profiles_) {
         profile->ExecuteForTask(tid);
     }
@@ -683,7 +683,7 @@
     return true;
 }
 
-bool ApplyProfileAction::IsValidForTask(int tid) const {
+bool ApplyProfileAction::IsValidForTask(pid_t tid) const {
     for (const auto& profile : profiles_) {
         if (!profile->IsValidForTask(tid)) {
             return false;
@@ -707,7 +707,7 @@
     return true;
 }
 
-bool TaskProfile::ExecuteForTask(int tid) const {
+bool TaskProfile::ExecuteForTask(pid_t tid) const {
     if (tid == 0) {
         tid = GetThreadId();
     }
@@ -761,7 +761,7 @@
     return true;
 }
 
-bool TaskProfile::IsValidForTask(int tid) const {
+bool TaskProfile::IsValidForTask(pid_t tid) const {
     for (const auto& element : elements_) {
         if (!element->IsValidForTask(tid)) return false;
     }
@@ -1043,7 +1043,7 @@
 }
 
 template <typename T>
-bool TaskProfiles::SetTaskProfiles(int tid, std::span<const T> profiles, bool use_fd_cache) {
+bool TaskProfiles::SetTaskProfiles(pid_t tid, std::span<const T> profiles, bool use_fd_cache) {
     bool success = true;
     for (const auto& name : profiles) {
         TaskProfile* profile = GetProfile(name);
@@ -1069,9 +1069,9 @@
 template bool TaskProfiles::SetProcessProfiles(uid_t uid, pid_t pid,
                                                std::span<const std::string_view> profiles,
                                                bool use_fd_cache);
-template bool TaskProfiles::SetTaskProfiles(int tid, std::span<const std::string> profiles,
+template bool TaskProfiles::SetTaskProfiles(pid_t tid, std::span<const std::string> profiles,
                                             bool use_fd_cache);
-template bool TaskProfiles::SetTaskProfiles(int tid, std::span<const std::string_view> profiles,
+template bool TaskProfiles::SetTaskProfiles(pid_t tid, std::span<const std::string_view> profiles,
                                             bool use_fd_cache);
 template bool TaskProfiles::SetUserProfiles(uid_t uid, std::span<const std::string> profiles,
                                             bool use_fd_cache);
diff --git a/libprocessgroup/task_profiles.h b/libprocessgroup/task_profiles.h
index 16ffe63..2fa1931 100644
--- a/libprocessgroup/task_profiles.h
+++ b/libprocessgroup/task_profiles.h
@@ -37,7 +37,7 @@
     virtual const CgroupController* controller() const = 0;
     virtual const std::string& file_name() const = 0;
     virtual bool GetPathForProcess(uid_t uid, pid_t pid, std::string* path) const = 0;
-    virtual bool GetPathForTask(int tid, std::string* path) const = 0;
+    virtual bool GetPathForTask(pid_t tid, std::string* path) const = 0;
     virtual bool GetPathForUID(uid_t uid, std::string* path) const = 0;
 };
 
@@ -57,7 +57,7 @@
                const std::string& file_v2_name) override;
 
     bool GetPathForProcess(uid_t uid, pid_t pid, std::string* path) const override;
-    bool GetPathForTask(int tid, std::string* path) const override;
+    bool GetPathForTask(pid_t tid, std::string* path) const override;
     bool GetPathForUID(uid_t uid, std::string* path) const override;
 
   private:
@@ -83,7 +83,7 @@
     virtual void EnableResourceCaching(ResourceCacheType) {}
     virtual void DropResourceCaching(ResourceCacheType) {}
     virtual bool IsValidForProcess(uid_t uid, pid_t pid) const { return false; }
-    virtual bool IsValidForTask(int tid) const { return false; }
+    virtual bool IsValidForTask(pid_t tid) const { return false; }
 
   protected:
     enum CacheUseResult { SUCCESS, FAIL, UNUSED };
@@ -96,7 +96,7 @@
 
     const char* Name() const override { return "SetClamps"; }
     bool ExecuteForProcess(uid_t uid, pid_t pid) const override;
-    bool ExecuteForTask(int tid) const override;
+    bool ExecuteForTask(pid_t tid) const override;
 
   protected:
     int boost_;
@@ -108,14 +108,14 @@
     SetTimerSlackAction(unsigned long slack) noexcept : slack_(slack) {}
 
     const char* Name() const override { return "SetTimerSlack"; }
-    bool ExecuteForTask(int tid) const override;
+    bool ExecuteForTask(pid_t tid) const override;
     bool IsValidForProcess(uid_t uid, pid_t pid) const override { return true; }
-    bool IsValidForTask(int tid) const override { return true; }
+    bool IsValidForTask(pid_t tid) const override { return true; }
 
   private:
     unsigned long slack_;
 
-    static bool IsTimerSlackSupported(int tid);
+    static bool IsTimerSlackSupported(pid_t tid);
 };
 
 // Set attribute profile element
@@ -126,10 +126,10 @@
 
     const char* Name() const override { return "SetAttribute"; }
     bool ExecuteForProcess(uid_t uid, pid_t pid) const override;
-    bool ExecuteForTask(int tid) const override;
+    bool ExecuteForTask(pid_t tid) const override;
     bool ExecuteForUID(uid_t uid) const override;
     bool IsValidForProcess(uid_t uid, pid_t pid) const override;
-    bool IsValidForTask(int tid) const override;
+    bool IsValidForTask(pid_t tid) const override;
 
   private:
     const IProfileAttribute* attribute_;
@@ -146,11 +146,11 @@
 
     const char* Name() const override { return "SetCgroup"; }
     bool ExecuteForProcess(uid_t uid, pid_t pid) const override;
-    bool ExecuteForTask(int tid) const override;
+    bool ExecuteForTask(pid_t tid) const override;
     void EnableResourceCaching(ResourceCacheType cache_type) override;
     void DropResourceCaching(ResourceCacheType cache_type) override;
     bool IsValidForProcess(uid_t uid, pid_t pid) const override;
-    bool IsValidForTask(int tid) const override;
+    bool IsValidForTask(pid_t tid) const override;
 
     const CgroupController* controller() const { return &controller_; }
 
@@ -160,7 +160,7 @@
     android::base::unique_fd fd_[ProfileAction::RCT_COUNT];
     mutable std::mutex fd_mutex_;
 
-    bool AddTidToCgroup(int tid, int fd, ResourceCacheType cache_type) const;
+    bool AddTidToCgroup(pid_t tid, int fd, ResourceCacheType cache_type) const;
     CacheUseResult UseCachedFd(ResourceCacheType cache_type, int id) const;
 };
 
@@ -172,11 +172,11 @@
 
     const char* Name() const override { return "WriteFile"; }
     bool ExecuteForProcess(uid_t uid, pid_t pid) const override;
-    bool ExecuteForTask(int tid) const override;
+    bool ExecuteForTask(pid_t tid) const override;
     void EnableResourceCaching(ResourceCacheType cache_type) override;
     void DropResourceCaching(ResourceCacheType cache_type) override;
     bool IsValidForProcess(uid_t uid, pid_t pid) const override;
-    bool IsValidForTask(int tid) const override;
+    bool IsValidForTask(pid_t tid) const override;
 
   private:
     std::string task_path_, proc_path_, value_;
@@ -184,8 +184,8 @@
     android::base::unique_fd fd_[ProfileAction::RCT_COUNT];
     mutable std::mutex fd_mutex_;
 
-    bool WriteValueToFile(const std::string& value, ResourceCacheType cache_type, int uid, int pid,
-                          bool logfailures) const;
+    bool WriteValueToFile(const std::string& value, ResourceCacheType cache_type, uid_t uid,
+                          pid_t pid, bool logfailures) const;
     CacheUseResult UseCachedFd(ResourceCacheType cache_type, const std::string& value) const;
 };
 
@@ -198,12 +198,12 @@
     void MoveTo(TaskProfile* profile);
 
     bool ExecuteForProcess(uid_t uid, pid_t pid) const;
-    bool ExecuteForTask(int tid) const;
+    bool ExecuteForTask(pid_t tid) const;
     bool ExecuteForUID(uid_t uid) const;
     void EnableResourceCaching(ProfileAction::ResourceCacheType cache_type);
     void DropResourceCaching(ProfileAction::ResourceCacheType cache_type);
     bool IsValidForProcess(uid_t uid, pid_t pid) const;
-    bool IsValidForTask(int tid) const;
+    bool IsValidForTask(pid_t tid) const;
 
   private:
     const std::string name_;
@@ -219,11 +219,11 @@
 
     const char* Name() const override { return "ApplyProfileAction"; }
     bool ExecuteForProcess(uid_t uid, pid_t pid) const override;
-    bool ExecuteForTask(int tid) const override;
+    bool ExecuteForTask(pid_t tid) const override;
     void EnableResourceCaching(ProfileAction::ResourceCacheType cache_type) override;
     void DropResourceCaching(ProfileAction::ResourceCacheType cache_type) override;
     bool IsValidForProcess(uid_t uid, pid_t pid) const override;
-    bool IsValidForTask(int tid) const override;
+    bool IsValidForTask(pid_t tid) const override;
 
   private:
     std::vector<std::shared_ptr<TaskProfile>> profiles_;
@@ -240,7 +240,7 @@
     template <typename T>
     bool SetProcessProfiles(uid_t uid, pid_t pid, std::span<const T> profiles, bool use_fd_cache);
     template <typename T>
-    bool SetTaskProfiles(int tid, std::span<const T> profiles, bool use_fd_cache);
+    bool SetTaskProfiles(pid_t tid, std::span<const T> profiles, bool use_fd_cache);
     template <typename T>
     bool SetUserProfiles(uid_t uid, std::span<const T> profiles, bool use_fd_cache);
 
diff --git a/libstats/pull_rust/Android.bp b/libstats/pull_rust/Android.bp
index 4609e6b..6902026 100644
--- a/libstats/pull_rust/Android.bp
+++ b/libstats/pull_rust/Android.bp
@@ -60,8 +60,8 @@
     crate_name: "statspull_rust",
     srcs: ["stats_pull.rs"],
     rustlibs: [
-        "liblazy_static",
         "liblog_rust",
+        "libonce_cell",
         "libstatslog_rust_header",
         "libstatspull_bindgen",
     ],
diff --git a/libstats/pull_rust/stats_pull.rs b/libstats/pull_rust/stats_pull.rs
index d188b5f..b2bebcc 100644
--- a/libstats/pull_rust/stats_pull.rs
+++ b/libstats/pull_rust/stats_pull.rs
@@ -14,7 +14,7 @@
 
 //! A Rust interface for the StatsD pull API.
 
-use lazy_static::lazy_static;
+use once_cell::sync::Lazy;
 use statslog_rust_header::{Atoms, Stat, StatsError};
 use statspull_bindgen::*;
 use std::collections::HashMap;
@@ -107,9 +107,8 @@
     }
 }
 
-lazy_static! {
-    static ref COOKIES: Mutex<HashMap<i32, fn() -> StatsPullResult>> = Mutex::new(HashMap::new());
-}
+static COOKIES: Lazy<Mutex<HashMap<i32, fn() -> StatsPullResult>>> =
+    Lazy::new(|| Mutex::new(HashMap::new()));
 
 /// # Safety
 ///
diff --git a/libstats/socket_lazy/libstatssocket_lazy.cpp b/libstats/socket_lazy/libstatssocket_lazy.cpp
index dd93eeb..fe94ef2 100644
--- a/libstats/socket_lazy/libstatssocket_lazy.cpp
+++ b/libstats/socket_lazy/libstatssocket_lazy.cpp
@@ -45,6 +45,7 @@
     k_AStatsEvent_writeBool,
     k_AStatsEvent_writeByteArray,
     k_AStatsEvent_writeString,
+    k_AStatsEvent_writeStringArray,
     k_AStatsEvent_writeAttributionChain,
     k_AStatsEvent_addBoolAnnotation,
     k_AStatsEvent_addInt32Annotation,
@@ -104,6 +105,7 @@
     BIND_SYMBOL(AStatsEvent_writeBool);
     BIND_SYMBOL(AStatsEvent_writeByteArray);
     BIND_SYMBOL(AStatsEvent_writeString);
+    BIND_SYMBOL(AStatsEvent_writeStringArray);
     BIND_SYMBOL(AStatsEvent_writeAttributionChain);
     BIND_SYMBOL(AStatsEvent_addBoolAnnotation);
     BIND_SYMBOL(AStatsEvent_addInt32Annotation);
@@ -179,6 +181,11 @@
     INVOKE_METHOD(AStatsEvent_writeString, event, value);
 }
 
+void AStatsEvent_writeStringArray(AStatsEvent* event, const char* const* elements,
+                                  size_t numElements) {
+    INVOKE_METHOD(AStatsEvent_writeStringArray, event, elements, numElements);
+}
+
 void AStatsEvent_writeAttributionChain(AStatsEvent* event, const uint32_t* uids,
                                        const char* const* tags, uint8_t numNodes) {
     INVOKE_METHOD(AStatsEvent_writeAttributionChain, event, uids, tags, numNodes);
@@ -198,4 +205,4 @@
 
 void AStatsSocket_close() {
     INVOKE_METHOD(AStatsSocket_close);
-}
\ No newline at end of file
+}
diff --git a/libstats/socket_lazy/tests/libstatssocket_lazy_test.cpp b/libstats/socket_lazy/tests/libstatssocket_lazy_test.cpp
index fe13598..3de6cd7 100644
--- a/libstats/socket_lazy/tests/libstatssocket_lazy_test.cpp
+++ b/libstats/socket_lazy/tests/libstatssocket_lazy_test.cpp
@@ -47,6 +47,7 @@
     EXPECT_DEATH(AStatsEvent_writeBool(event, false), kLoadFailed);
     EXPECT_DEATH(AStatsEvent_writeByteArray(event, NULL, 0), kLoadFailed);
     EXPECT_DEATH(AStatsEvent_writeString(event, NULL), kLoadFailed);
+    EXPECT_DEATH(AStatsEvent_writeStringArray(event, NULL, 0), kLoadFailed);
     EXPECT_DEATH(AStatsEvent_writeAttributionChain(event, NULL, NULL, 0), kLoadFailed);
 
     EXPECT_DEATH(AStatsEvent_addBoolAnnotation(event, 0, false), kLoadFailed);
@@ -55,4 +56,4 @@
 
 TEST_F(LibstatssocketLazyTest, NoLibstatssocketForStatsSocket) {
     EXPECT_DEATH(AStatsSocket_close(), kLoadFailed);
-}
\ No newline at end of file
+}
diff --git a/libutils/StopWatch.cpp b/libutils/StopWatch.cpp
index c88d60f..c91cd5c 100644
--- a/libutils/StopWatch.cpp
+++ b/libutils/StopWatch.cpp
@@ -18,10 +18,6 @@
 
 #include <utils/StopWatch.h>
 
-/* for PRId64 */
-#ifndef __STDC_FORMAT_MACROS
-#define __STDC_FORMAT_MACROS 1
-#endif
 #include <inttypes.h>
 
 #include <log/log.h>
diff --git a/libutils/binder/SharedBuffer_test.cpp b/libutils/binder/SharedBuffer_test.cpp
index 1d6317f..26702b0 100644
--- a/libutils/binder/SharedBuffer_test.cpp
+++ b/libutils/binder/SharedBuffer_test.cpp
@@ -14,8 +14,6 @@
  * limitations under the License.
  */
 
-#define __STDC_LIMIT_MACROS
-
 #include <gtest/gtest.h>
 
 #include <memory>
diff --git a/libutils/binder/String8.cpp b/libutils/binder/String8.cpp
index 749bfcb..1de9e8b 100644
--- a/libutils/binder/String8.cpp
+++ b/libutils/binder/String8.cpp
@@ -14,15 +14,13 @@
  * limitations under the License.
  */
 
-#define __STDC_LIMIT_MACROS
-#include <stdint.h>
-
 #include <utils/String8.h>
 
 #include <log/log.h>
 #include <utils/String16.h>
 
 #include <ctype.h>
+#include <stdint.h>
 
 #include <limits>
 #include <string>
diff --git a/libutils/binder/Vector_test.cpp b/libutils/binder/Vector_test.cpp
index 6d90eaa..312dcf6 100644
--- a/libutils/binder/Vector_test.cpp
+++ b/libutils/binder/Vector_test.cpp
@@ -16,7 +16,6 @@
 
 #define LOG_TAG "Vector_test"
 
-#define __STDC_LIMIT_MACROS
 #include <stdint.h>
 #include <unistd.h>
 
diff --git a/rootdir/init.rc b/rootdir/init.rc
index 4c07c83..0646d14 100644
--- a/rootdir/init.rc
+++ b/rootdir/init.rc
@@ -247,6 +247,7 @@
     write /dev/blkio/background/blkio.bfq.weight 10
     write /dev/blkio/blkio.group_idle 0
     write /dev/blkio/background/blkio.group_idle 0
+    write /dev/blkio/background/blkio.prio.class restrict-to-be
 
     restorecon_recursive /mnt
 
diff --git a/trusty/keymint/src/keymint_hal_main.rs b/trusty/keymint/src/keymint_hal_main.rs
index cfa859f..eda986a 100644
--- a/trusty/keymint/src/keymint_hal_main.rs
+++ b/trusty/keymint/src/keymint_hal_main.rs
@@ -92,8 +92,8 @@
     android_logger::init_once(
         android_logger::Config::default()
             .with_tag("keymint-hal-trusty")
-            .with_min_level(log::Level::Info)
-            .with_log_id(android_logger::LogId::System),
+            .with_max_level(log::LevelFilter::Info)
+            .with_log_buffer(android_logger::LogId::System),
     );
     // Redirect panic messages to logcat.
     panic::set_hook(Box::new(|panic_info| {
diff --git a/trusty/secretkeeper/src/hal_main.rs b/trusty/secretkeeper/src/hal_main.rs
index 1dc697d..df30493 100644
--- a/trusty/secretkeeper/src/hal_main.rs
+++ b/trusty/secretkeeper/src/hal_main.rs
@@ -101,8 +101,8 @@
     android_logger::init_once(
         android_logger::Config::default()
             .with_tag("secretkeeper-hal-trusty")
-            .with_min_level(log::Level::Info)
-            .with_log_id(android_logger::LogId::System),
+            .with_max_level(log::LevelFilter::Info)
+            .with_log_buffer(android_logger::LogId::System),
     );
     // Redirect panic messages to logcat.
     panic::set_hook(Box::new(|panic_info| {
diff --git a/trusty/trusty-base.mk b/trusty/trusty-base.mk
index d645c3e..5aa4392 100644
--- a/trusty/trusty-base.mk
+++ b/trusty/trusty-base.mk
@@ -36,10 +36,10 @@
 endif
 
 # TODO(b/306364873): move this to be flag-controlled?
-ifeq ($(SECRETKEEPER_ENABLED),)
-    LOCAL_SECRETKEEPER_PRODUCT_PACKAGE :=
-else
+ifeq ($(SECRETKEEPER_ENABLED),true)
     LOCAL_SECRETKEEPER_PRODUCT_PACKAGE := android.hardware.security.secretkeeper.trusty
+else
+    LOCAL_SECRETKEEPER_PRODUCT_PACKAGE :=
 endif
 
 PRODUCT_PACKAGES += \