Merge "Revert "init: adding fallback paths for debug ramdisk""
diff --git a/debuggerd/Android.bp b/debuggerd/Android.bp
index 7b6f6c0..198e4de 100644
--- a/debuggerd/Android.bp
+++ b/debuggerd/Android.bp
@@ -276,6 +276,12 @@
     ],
 }
 
+cc_test_library {
+    name: "libcrash_test",
+    defaults: ["debuggerd_defaults"],
+    srcs: ["crash_test.cpp"],
+}
+
 cc_test {
     name: "debuggerd_test",
     defaults: ["debuggerd_defaults"],
@@ -341,6 +347,10 @@
         },
     },
 
+    data: [
+        ":libcrash_test",
+    ],
+
     test_suites: ["device-tests"],
 }
 
diff --git a/debuggerd/crash_test.cpp b/debuggerd/crash_test.cpp
new file mode 100644
index 0000000..c15145f
--- /dev/null
+++ b/debuggerd/crash_test.cpp
@@ -0,0 +1,21 @@
+/*
+ * Copyright 2021, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdint.h>
+
+extern "C" void crash() {
+  *reinterpret_cast<volatile char*>(0xdead) = '1';
+}
diff --git a/debuggerd/debuggerd_test.cpp b/debuggerd/debuggerd_test.cpp
index 93725b9..eb3738e 100644
--- a/debuggerd/debuggerd_test.cpp
+++ b/debuggerd/debuggerd_test.cpp
@@ -15,6 +15,7 @@
  */
 
 #include <dirent.h>
+#include <dlfcn.h>
 #include <err.h>
 #include <fcntl.h>
 #include <malloc.h>
@@ -30,6 +31,7 @@
 
 #include <chrono>
 #include <regex>
+#include <string>
 #include <thread>
 
 #include <android/fdsan.h>
@@ -1510,6 +1512,60 @@
   ASSERT_MATCH(result, R"(Cause: stack pointer[^\n]*stack overflow.\n)");
 }
 
+static bool CopySharedLibrary(const char* tmp_dir, std::string* tmp_so_name) {
+  std::string test_lib(testing::internal::GetArgvs()[0]);
+  auto const value = test_lib.find_last_of('/');
+  if (value == std::string::npos) {
+    test_lib = "./";
+  } else {
+    test_lib = test_lib.substr(0, value + 1) + "./";
+  }
+  test_lib += "libcrash_test.so";
+
+  *tmp_so_name = std::string(tmp_dir) + "/libcrash_test.so";
+  std::string cp_cmd = android::base::StringPrintf("cp %s %s", test_lib.c_str(), tmp_dir);
+
+  // Copy the shared so to a tempory directory.
+  return system(cp_cmd.c_str()) == 0;
+}
+
+TEST_F(CrasherTest, unreadable_elf) {
+  int intercept_result;
+  unique_fd output_fd;
+  StartProcess([]() {
+    TemporaryDir td;
+    std::string tmp_so_name;
+    if (!CopySharedLibrary(td.path, &tmp_so_name)) {
+      _exit(1);
+    }
+    void* handle = dlopen(tmp_so_name.c_str(), RTLD_NOW);
+    if (handle == nullptr) {
+      _exit(1);
+    }
+    // Delete the original shared library so that we get the warning
+    // about unreadable elf files.
+    if (unlink(tmp_so_name.c_str()) == -1) {
+      _exit(1);
+    }
+    void (*crash_func)() = reinterpret_cast<void (*)()>(dlsym(handle, "crash"));
+    if (crash_func == nullptr) {
+      _exit(1);
+    }
+    crash_func();
+  });
+
+  StartIntercept(&output_fd);
+  FinishCrasher();
+  AssertDeath(SIGSEGV);
+  FinishIntercept(&intercept_result);
+
+  ASSERT_EQ(1, intercept_result) << "tombstoned reported failure";
+
+  std::string result;
+  ConsumeFd(std::move(output_fd), &result);
+  ASSERT_MATCH(result, R"(NOTE: Function names and BuildId information is missing )");
+}
+
 TEST(tombstoned, proto) {
   const pid_t self = getpid();
   unique_fd tombstoned_socket, text_fd, proto_fd;
diff --git a/debuggerd/libdebuggerd/tombstone_proto.cpp b/debuggerd/libdebuggerd/tombstone_proto.cpp
index 7657001..d4a35b3 100644
--- a/debuggerd/libdebuggerd/tombstone_proto.cpp
+++ b/debuggerd/libdebuggerd/tombstone_proto.cpp
@@ -395,6 +395,15 @@
                             unwinder->LastErrorAddress());
     }
   } else {
+    if (unwinder->elf_from_memory_not_file()) {
+      auto backtrace_note = thread.mutable_backtrace_note();
+      *backtrace_note->Add() =
+          "Function names and BuildId information is missing for some frames due";
+      *backtrace_note->Add() =
+          "to unreadable libraries. For unwinds of apps, only shared libraries";
+      *backtrace_note->Add() = "found under the lib/ directory are readable.";
+      *backtrace_note->Add() = "On this device, run setenforce 0 to make the libraries readable.";
+    }
     unwinder->SetDisplayBuildID(true);
     for (const auto& frame : unwinder->frames()) {
       BacktraceFrame* f = thread.add_current_backtrace();
diff --git a/debuggerd/libdebuggerd/tombstone_proto_to_text.cpp b/debuggerd/libdebuggerd/tombstone_proto_to_text.cpp
index 020b0a5..b780b22 100644
--- a/debuggerd/libdebuggerd/tombstone_proto_to_text.cpp
+++ b/debuggerd/libdebuggerd/tombstone_proto_to_text.cpp
@@ -171,6 +171,10 @@
                                    const Thread& thread, bool should_log) {
   CBS("");
   CB(should_log, "backtrace:");
+  if (!thread.backtrace_note().empty()) {
+    CB(should_log, "  NOTE: %s",
+       android::base::Join(thread.backtrace_note(), "\n  NOTE: ").c_str());
+  }
   print_backtrace(callback, tombstone, thread.current_backtrace(), should_log);
 }
 
diff --git a/debuggerd/proto/tombstone.proto b/debuggerd/proto/tombstone.proto
index 294e4e5..22fc30e 100644
--- a/debuggerd/proto/tombstone.proto
+++ b/debuggerd/proto/tombstone.proto
@@ -119,11 +119,12 @@
   int32 id = 1;
   string name = 2;
   repeated Register registers = 3;
+  repeated string backtrace_note = 7;
   repeated BacktraceFrame current_backtrace = 4;
   repeated MemoryDump memory_dump = 5;
   int64 tagged_addr_ctrl = 6;
 
-  reserved 7 to 999;
+  reserved 8 to 999;
 }
 
 message BacktraceFrame {
diff --git a/fs_mgr/fs_mgr_fstab.cpp b/fs_mgr/fs_mgr_fstab.cpp
index 42bf356..853b24d 100644
--- a/fs_mgr/fs_mgr_fstab.cpp
+++ b/fs_mgr/fs_mgr_fstab.cpp
@@ -298,6 +298,8 @@
             if (!ParseByteCount(arg, &entry->zram_backingdev_size)) {
                 LWARNING << "Warning: zram_backingdev_size= flag malformed: " << arg;
             }
+        } else if (StartsWith(flag, "lowerdir=")) {
+            entry->lowerdir = arg;
         } else {
             LWARNING << "Warning: unknown flag: " << flag;
         }
diff --git a/fs_mgr/fs_mgr_overlayfs.cpp b/fs_mgr/fs_mgr_overlayfs.cpp
index 1134f14..9a94d79 100644
--- a/fs_mgr/fs_mgr_overlayfs.cpp
+++ b/fs_mgr/fs_mgr_overlayfs.cpp
@@ -92,6 +92,10 @@
     return false;
 }
 
+bool fs_mgr_overlayfs_mount_fstab_entry(const std::string&, const std::string&) {
+    return false;
+}
+
 std::vector<std::string> fs_mgr_overlayfs_required_devices(Fstab*) {
     return {};
 }
@@ -116,6 +120,8 @@
 void MapScratchPartitionIfNeeded(Fstab*, const std::function<bool(const std::set<std::string>&)>&) {
 }
 
+void CleanupOldScratchFiles() {}
+
 void TeardownAllOverlayForMountPoint(const std::string&) {}
 
 }  // namespace fs_mgr
@@ -1293,6 +1299,18 @@
     }
 }
 
+bool fs_mgr_overlayfs_mount_fstab_entry(const std::string& lowers,
+                                        const std::string& mount_point) {
+    if (fs_mgr_overlayfs_invalid()) return false;
+
+    std::string aux = "lowerdir=" + lowers + ",override_creds=off";
+    auto rc = mount("overlay", mount_point.c_str(), "overlay", MS_RDONLY | MS_NOATIME, aux.c_str());
+
+    if (rc == 0) return true;
+
+    return false;
+}
+
 bool fs_mgr_overlayfs_mount_all(Fstab* fstab) {
     auto ret = false;
     if (fs_mgr_overlayfs_invalid()) return ret;
diff --git a/fs_mgr/include/fs_mgr_overlayfs.h b/fs_mgr/include/fs_mgr_overlayfs.h
index d45e2de..ac95ef5 100644
--- a/fs_mgr/include/fs_mgr_overlayfs.h
+++ b/fs_mgr/include/fs_mgr_overlayfs.h
@@ -27,6 +27,7 @@
 android::fs_mgr::Fstab fs_mgr_overlayfs_candidate_list(const android::fs_mgr::Fstab& fstab);
 
 bool fs_mgr_overlayfs_mount_all(android::fs_mgr::Fstab* fstab);
+bool fs_mgr_overlayfs_mount_fstab_entry (const std::string& lowers, const std::string& mount_point);
 std::vector<std::string> fs_mgr_overlayfs_required_devices(android::fs_mgr::Fstab* fstab);
 bool fs_mgr_overlayfs_setup(const char* backing = nullptr, const char* mount_point = nullptr,
                             bool* change = nullptr, bool force = true);
diff --git a/fs_mgr/include_fstab/fstab/fstab.h b/fs_mgr/include_fstab/fstab/fstab.h
index 2704e47..f33768b 100644
--- a/fs_mgr/include_fstab/fstab/fstab.h
+++ b/fs_mgr/include_fstab/fstab/fstab.h
@@ -55,6 +55,7 @@
     std::string vbmeta_partition;
     uint64_t zram_backingdev_size = 0;
     std::string avb_keys;
+    std::string lowerdir;
 
     struct FsMgrFlags {
         bool wait : 1;
diff --git a/fs_mgr/libdm/dm_target.cpp b/fs_mgr/libdm/dm_target.cpp
index ef46eb9..b0639e6 100644
--- a/fs_mgr/libdm/dm_target.cpp
+++ b/fs_mgr/libdm/dm_target.cpp
@@ -95,7 +95,9 @@
 }
 
 void DmTargetVerity::SetVerityMode(const std::string& mode) {
-    if (mode != "restart_on_corruption" && mode != "ignore_corruption") {
+    if (mode != "panic_on_corruption" &&
+        mode != "restart_on_corruption" &&
+        mode != "ignore_corruption") {
         LOG(ERROR) << "Unknown verity mode: " << mode;
         valid_ = false;
         return;
diff --git a/fs_mgr/libfs_avb/avb_util.cpp b/fs_mgr/libfs_avb/avb_util.cpp
index 2288674..31494c1 100644
--- a/fs_mgr/libfs_avb/avb_util.cpp
+++ b/fs_mgr/libfs_avb/avb_util.cpp
@@ -61,7 +61,9 @@
 
     // Converts veritymode to the format used in kernel.
     std::string dm_verity_mode;
-    if (verity_mode == "enforcing") {
+    if (verity_mode == "panicking") {
+        dm_verity_mode = "panic_on_corruption";
+    } else if (verity_mode == "enforcing") {
         dm_verity_mode = "restart_on_corruption";
     } else if (verity_mode == "logging") {
         dm_verity_mode = "ignore_corruption";
diff --git a/fs_mgr/libsnapshot/Android.bp b/fs_mgr/libsnapshot/Android.bp
index 3cb4123..adfb56c 100644
--- a/fs_mgr/libsnapshot/Android.bp
+++ b/fs_mgr/libsnapshot/Android.bp
@@ -251,6 +251,7 @@
         "snapshot_metadata_updater_test.cpp",
         "snapshot_reader_test.cpp",
         "snapshot_test.cpp",
+        "snapshot_writer_test.cpp",
     ],
     shared_libs: [
         "libbinder",
@@ -420,7 +421,8 @@
         "snapuserd_server.cpp",
         "snapuserd.cpp",
         "snapuserd_daemon.cpp",
-        "snapuserd_worker.cpp",
+	"snapuserd_worker.cpp",
+	"snapuserd_readahead.cpp",
     ],
 
     cflags: [
diff --git a/fs_mgr/libsnapshot/android/snapshot/snapshot.proto b/fs_mgr/libsnapshot/android/snapshot/snapshot.proto
index 012f2ae..92aa55c 100644
--- a/fs_mgr/libsnapshot/android/snapshot/snapshot.proto
+++ b/fs_mgr/libsnapshot/android/snapshot/snapshot.proto
@@ -186,7 +186,7 @@
     MergeFailureCode merge_failure_code = 7;
 }
 
-// Next: 9
+// Next: 10
 message SnapshotMergeReport {
     // Status of the update after the merge attempts.
     UpdateState state = 1;
@@ -212,4 +212,7 @@
 
     // Time from sys.boot_completed to merge start, in milliseconds.
     uint32 boot_complete_to_merge_start_time_ms = 8;
+
+    // Merge failure code, filled if state == MergeFailed.
+    MergeFailureCode merge_failure_code = 9;
 }
diff --git a/fs_mgr/libsnapshot/cow_reader.cpp b/fs_mgr/libsnapshot/cow_reader.cpp
index 7199b38..35a02e6 100644
--- a/fs_mgr/libsnapshot/cow_reader.cpp
+++ b/fs_mgr/libsnapshot/cow_reader.cpp
@@ -94,11 +94,6 @@
                    << "Expected: " << kCowMagicNumber;
         return false;
     }
-    if (header_.header_size != sizeof(CowHeader)) {
-        LOG(ERROR) << "Header size unknown, read " << header_.header_size << ", expected "
-                   << sizeof(CowHeader);
-        return false;
-    }
     if (header_.footer_size != sizeof(CowFooter)) {
         LOG(ERROR) << "Footer size unknown, read " << header_.footer_size << ", expected "
                    << sizeof(CowFooter);
@@ -123,8 +118,7 @@
         return false;
     }
 
-    if ((header_.major_version != kCowVersionMajor) ||
-        (header_.minor_version != kCowVersionMinor)) {
+    if ((header_.major_version > kCowVersionMajor) || (header_.minor_version != kCowVersionMinor)) {
         LOG(ERROR) << "Header version mismatch";
         LOG(ERROR) << "Major version: " << header_.major_version
                    << "Expected: " << kCowVersionMajor;
@@ -137,10 +131,25 @@
 }
 
 bool CowReader::ParseOps(std::optional<uint64_t> label) {
-    uint64_t pos = lseek(fd_.get(), sizeof(header_), SEEK_SET);
-    if (pos != sizeof(header_)) {
-        PLOG(ERROR) << "lseek ops failed";
-        return false;
+    uint64_t pos;
+
+    // Skip the scratch space
+    if (header_.major_version >= 2 && (header_.buffer_size > 0)) {
+        LOG(DEBUG) << " Scratch space found of size: " << header_.buffer_size;
+        size_t init_offset = header_.header_size + header_.buffer_size;
+        pos = lseek(fd_.get(), init_offset, SEEK_SET);
+        if (pos != init_offset) {
+            PLOG(ERROR) << "lseek ops failed";
+            return false;
+        }
+    } else {
+        pos = lseek(fd_.get(), header_.header_size, SEEK_SET);
+        if (pos != header_.header_size) {
+            PLOG(ERROR) << "lseek ops failed";
+            return false;
+        }
+        // Reading a v1 version of COW which doesn't have buffer_size.
+        header_.buffer_size = 0;
     }
 
     auto ops_buffer = std::make_shared<std::vector<CowOperation>>();
@@ -360,13 +369,7 @@
     //                        Replace-op-4, Zero-op-9, Replace-op-5 }
     //==============================================================
 
-    for (uint64_t i = 0; i < ops_->size(); i++) {
-        auto& current_op = ops_->data()[i];
-        if (current_op.type != kCowCopyOp) {
-            break;
-        }
-        num_copy_ops += 1;
-    }
+    num_copy_ops = FindNumCopyops();
 
     std::sort(ops_.get()->begin() + num_copy_ops, ops_.get()->end(),
               [](CowOperation& op1, CowOperation& op2) -> bool {
@@ -377,6 +380,23 @@
         CHECK(ops_->size() >= header_.num_merge_ops);
         ops_->erase(ops_.get()->begin(), ops_.get()->begin() + header_.num_merge_ops);
     }
+
+    num_copy_ops = FindNumCopyops();
+    set_copy_ops(num_copy_ops);
+}
+
+uint64_t CowReader::FindNumCopyops() {
+    uint64_t num_copy_ops = 0;
+
+    for (uint64_t i = 0; i < ops_->size(); i++) {
+        auto& current_op = ops_->data()[i];
+        if (current_op.type != kCowCopyOp) {
+            break;
+        }
+        num_copy_ops += 1;
+    }
+
+    return num_copy_ops;
 }
 
 bool CowReader::GetHeader(CowHeader* header) {
@@ -470,7 +490,7 @@
 
 bool CowReader::GetRawBytes(uint64_t offset, void* buffer, size_t len, size_t* read) {
     // Validate the offset, taking care to acknowledge possible overflow of offset+len.
-    if (offset < sizeof(header_) || offset >= fd_size_ - sizeof(CowFooter) || len >= fd_size_ ||
+    if (offset < header_.header_size || offset >= fd_size_ - sizeof(CowFooter) || len >= fd_size_ ||
         offset + len > fd_size_ - sizeof(CowFooter)) {
         LOG(ERROR) << "invalid data offset: " << offset << ", " << len << " bytes";
         return false;
diff --git a/fs_mgr/libsnapshot/cow_snapuserd_test.cpp b/fs_mgr/libsnapshot/cow_snapuserd_test.cpp
index 045d9db..313eb64 100644
--- a/fs_mgr/libsnapshot/cow_snapuserd_test.cpp
+++ b/fs_mgr/libsnapshot/cow_snapuserd_test.cpp
@@ -96,6 +96,7 @@
 class CowSnapuserdTest final {
   public:
     bool Setup();
+    bool SetupCopyOverlap();
     bool Merge();
     void ValidateMerge();
     void ReadSnapshotDeviceAndValidate();
@@ -114,6 +115,7 @@
     void StartMerge();
 
     void CreateCowDevice();
+    void CreateCowDeviceWithCopyOverlap();
     void CreateBaseDevice();
     void InitCowDevice();
     void SetDeviceControlName();
@@ -191,6 +193,24 @@
     return setup_ok_;
 }
 
+bool CowSnapuserdTest::SetupCopyOverlap() {
+    CreateBaseDevice();
+    CreateCowDeviceWithCopyOverlap();
+
+    SetDeviceControlName();
+
+    StartSnapuserdDaemon();
+    InitCowDevice();
+
+    CreateDmUserDevice();
+    InitDaemon();
+
+    CreateSnapshotDevice();
+    setup_ok_ = true;
+
+    return setup_ok_;
+}
+
 void CowSnapuserdTest::StartSnapuserdDaemon() {
     pid_t pid = fork();
     ASSERT_GE(pid, 0);
@@ -255,6 +275,49 @@
     ASSERT_EQ(memcmp(snapuserd_buffer.get(), (char*)orig_buffer_.get() + (size_ * 3), size_), 0);
 }
 
+void CowSnapuserdTest::CreateCowDeviceWithCopyOverlap() {
+    std::string path = android::base::GetExecutableDirectory();
+    cow_system_ = std::make_unique<TemporaryFile>(path);
+
+    CowOptions options;
+    options.compression = "gz";
+    CowWriter writer(options);
+
+    ASSERT_TRUE(writer.Initialize(cow_system_->fd));
+
+    size_t num_blocks = size_ / options.block_size;
+    size_t x = num_blocks;
+    size_t blk_src_copy = num_blocks - 1;
+
+    // Create overlapping copy operations
+    while (1) {
+        ASSERT_TRUE(writer.AddCopy(blk_src_copy + 1, blk_src_copy));
+        x -= 1;
+        if (x == 0) {
+            ASSERT_EQ(blk_src_copy, 0);
+            break;
+        }
+        blk_src_copy -= 1;
+    }
+
+    // Flush operations
+    ASSERT_TRUE(writer.Finalize());
+
+    // Construct the buffer required for validation
+    orig_buffer_ = std::make_unique<uint8_t[]>(total_base_size_);
+
+    // Read the entire base device
+    ASSERT_EQ(android::base::ReadFullyAtOffset(base_fd_, orig_buffer_.get(), total_base_size_, 0),
+              true);
+
+    // Merged operations
+    ASSERT_EQ(android::base::ReadFullyAtOffset(base_fd_, orig_buffer_.get(), options.block_size, 0),
+              true);
+    ASSERT_EQ(android::base::ReadFullyAtOffset(
+                      base_fd_, (char*)orig_buffer_.get() + options.block_size, size_, 0),
+              true);
+}
+
 void CowSnapuserdTest::CreateCowDevice() {
     unique_fd rnd_fd;
     loff_t offset = 0;
@@ -707,17 +770,17 @@
 
             de = reinterpret_cast<struct disk_exception*>((char*)buffer + offset);
             ASSERT_EQ(de->old_chunk, 21);
-            ASSERT_EQ(de->new_chunk, 537);
+            ASSERT_EQ(de->new_chunk, 536);
             offset += sizeof(struct disk_exception);
 
             de = reinterpret_cast<struct disk_exception*>((char*)buffer + offset);
             ASSERT_EQ(de->old_chunk, 22);
-            ASSERT_EQ(de->new_chunk, 538);
+            ASSERT_EQ(de->new_chunk, 537);
             offset += sizeof(struct disk_exception);
 
             de = reinterpret_cast<struct disk_exception*>((char*)buffer + offset);
             ASSERT_EQ(de->old_chunk, 23);
-            ASSERT_EQ(de->new_chunk, 539);
+            ASSERT_EQ(de->new_chunk, 538);
             offset += sizeof(struct disk_exception);
 
             // End of metadata
@@ -757,6 +820,23 @@
     harness.ValidateMerge();
     harness.Shutdown();
 }
+
+TEST(Snapuserd_Test, Snapshot_COPY_Overlap_TEST) {
+    CowSnapuserdTest harness;
+    ASSERT_TRUE(harness.SetupCopyOverlap());
+    ASSERT_TRUE(harness.Merge());
+    harness.ValidateMerge();
+    harness.Shutdown();
+}
+
+TEST(Snapuserd_Test, Snapshot_COPY_Overlap_Merge_Resume_TEST) {
+    CowSnapuserdTest harness;
+    ASSERT_TRUE(harness.SetupCopyOverlap());
+    harness.MergeInterrupt();
+    harness.ValidateMerge();
+    harness.Shutdown();
+}
+
 }  // namespace snapshot
 }  // namespace android
 
diff --git a/fs_mgr/libsnapshot/cow_writer.cpp b/fs_mgr/libsnapshot/cow_writer.cpp
index 645ae9d..51c00a9 100644
--- a/fs_mgr/libsnapshot/cow_writer.cpp
+++ b/fs_mgr/libsnapshot/cow_writer.cpp
@@ -94,6 +94,7 @@
     header_.block_size = options_.block_size;
     header_.num_merge_ops = 0;
     header_.cluster_ops = options_.cluster_ops;
+    header_.buffer_size = 0;
     footer_ = {};
     footer_.op.data_length = 64;
     footer_.op.type = kCowFooterOp;
@@ -139,12 +140,6 @@
     return true;
 }
 
-void CowWriter::InitializeMerge(borrowed_fd fd, CowHeader* header) {
-    fd_ = fd;
-    memcpy(&header_, header, sizeof(CowHeader));
-    merge_in_progress_ = true;
-}
-
 bool CowWriter::Initialize(unique_fd&& fd) {
     owned_fd_ = std::move(fd);
     return Initialize(borrowed_fd{owned_fd_});
@@ -172,7 +167,7 @@
 }
 
 void CowWriter::InitPos() {
-    next_op_pos_ = sizeof(header_);
+    next_op_pos_ = sizeof(header_) + header_.buffer_size;
     cluster_size_ = header_.cluster_ops * sizeof(CowOperation);
     if (header_.cluster_ops) {
         next_data_pos_ = next_op_pos_ + cluster_size_;
@@ -196,6 +191,10 @@
         return false;
     }
 
+    if (options_.scratch_space) {
+        header_.buffer_size = BUFFER_REGION_DEFAULT_SIZE;
+    }
+
     // Headers are not complete, but this ensures the file is at the right
     // position.
     if (!android::base::WriteFully(fd_, &header_, sizeof(header_))) {
@@ -203,7 +202,27 @@
         return false;
     }
 
+    if (options_.scratch_space) {
+        // Initialize the scratch space
+        std::string data(header_.buffer_size, 0);
+        if (!android::base::WriteFully(fd_, data.data(), header_.buffer_size)) {
+            PLOG(ERROR) << "writing scratch space failed";
+            return false;
+        }
+    }
+
+    if (!Sync()) {
+        LOG(ERROR) << "Header sync failed";
+        return false;
+    }
+
+    if (lseek(fd_.get(), sizeof(header_) + header_.buffer_size, SEEK_SET) < 0) {
+        PLOG(ERROR) << "lseek failed";
+        return false;
+    }
+
     InitPos();
+
     return true;
 }
 
@@ -517,24 +536,6 @@
     return true;
 }
 
-bool CowWriter::CommitMerge(int merged_ops) {
-    CHECK(merge_in_progress_);
-    header_.num_merge_ops += merged_ops;
-
-    if (lseek(fd_.get(), 0, SEEK_SET) < 0) {
-        PLOG(ERROR) << "lseek failed";
-        return false;
-    }
-
-    if (!android::base::WriteFully(fd_, reinterpret_cast<const uint8_t*>(&header_),
-                                   sizeof(header_))) {
-        PLOG(ERROR) << "WriteFully failed";
-        return false;
-    }
-
-    return Sync();
-}
-
 bool CowWriter::Truncate(off_t length) {
     if (is_dev_null_ || is_block_device_) {
         return true;
diff --git a/fs_mgr/libsnapshot/include/libsnapshot/cow_format.h b/fs_mgr/libsnapshot/include/libsnapshot/cow_format.h
index 797b8ef..000e5e1 100644
--- a/fs_mgr/libsnapshot/include/libsnapshot/cow_format.h
+++ b/fs_mgr/libsnapshot/include/libsnapshot/cow_format.h
@@ -21,15 +21,22 @@
 namespace snapshot {
 
 static constexpr uint64_t kCowMagicNumber = 0x436f77634f572121ULL;
-static constexpr uint32_t kCowVersionMajor = 1;
+static constexpr uint32_t kCowVersionMajor = 2;
 static constexpr uint32_t kCowVersionMinor = 0;
 
+static constexpr uint32_t kCowVersionManifest = 2;
+
+static constexpr uint32_t BLOCK_SZ = 4096;
+static constexpr uint32_t BLOCK_SHIFT = (__builtin_ffs(BLOCK_SZ) - 1);
+
 // This header appears as the first sequence of bytes in the COW. All fields
 // in the layout are little-endian encoded. The on-disk layout is:
 //
 //      +-----------------------+
 //      |     Header (fixed)    |
 //      +-----------------------+
+//      |     Scratch space     |
+//      +-----------------------+
 //      | Operation  (variable) |
 //      | Data       (variable) |
 //      +-----------------------+
@@ -68,6 +75,9 @@
 
     // Tracks merge operations completed
     uint64_t num_merge_ops;
+
+    // Scratch space used during merge
+    uint32_t buffer_size;
 } __attribute__((packed));
 
 // This structure is the same size of a normal Operation, but is repurposed for the footer.
@@ -144,11 +154,31 @@
 static constexpr uint8_t kCowCompressGz = 1;
 static constexpr uint8_t kCowCompressBrotli = 2;
 
+static constexpr uint8_t kCowReadAheadNotStarted = 0;
+static constexpr uint8_t kCowReadAheadInProgress = 1;
+static constexpr uint8_t kCowReadAheadDone = 2;
+
 struct CowFooter {
     CowFooterOperation op;
     CowFooterData data;
 } __attribute__((packed));
 
+struct ScratchMetadata {
+    // Block of data in the image that operation modifies
+    // and read-ahead thread stores the modified data
+    // in the scratch space
+    uint64_t new_block;
+    // Offset within the file to read the data
+    uint64_t file_offset;
+} __attribute__((packed));
+
+struct BufferState {
+    uint8_t read_ahead_state;
+} __attribute__((packed));
+
+// 2MB Scratch space used for read-ahead
+static constexpr uint64_t BUFFER_REGION_DEFAULT_SIZE = (1ULL << 21);
+
 std::ostream& operator<<(std::ostream& os, CowOperation const& arg);
 
 int64_t GetNextOpOffset(const CowOperation& op, uint32_t cluster_size);
diff --git a/fs_mgr/libsnapshot/include/libsnapshot/cow_reader.h b/fs_mgr/libsnapshot/include/libsnapshot/cow_reader.h
index 552fd96..9ebcfd9 100644
--- a/fs_mgr/libsnapshot/include/libsnapshot/cow_reader.h
+++ b/fs_mgr/libsnapshot/include/libsnapshot/cow_reader.h
@@ -141,18 +141,21 @@
 
     bool GetRawBytes(uint64_t offset, void* buffer, size_t len, size_t* read);
 
-    void UpdateMergeProgress(uint64_t merge_ops) { header_.num_merge_ops += merge_ops; }
-
     void InitializeMerge();
 
     void set_total_data_ops(uint64_t size) { total_data_ops_ = size; }
 
     uint64_t total_data_ops() { return total_data_ops_; }
 
+    void set_copy_ops(uint64_t size) { copy_ops_ = size; }
+
+    uint64_t total_copy_ops() { return copy_ops_; }
+
     void CloseCowFd() { owned_fd_ = {}; }
 
   private:
     bool ParseOps(std::optional<uint64_t> label);
+    uint64_t FindNumCopyops();
 
     android::base::unique_fd owned_fd_;
     android::base::borrowed_fd fd_;
@@ -162,6 +165,7 @@
     std::optional<uint64_t> last_label_;
     std::shared_ptr<std::vector<CowOperation>> ops_;
     uint64_t total_data_ops_;
+    uint64_t copy_ops_;
 };
 
 }  // namespace snapshot
diff --git a/fs_mgr/libsnapshot/include/libsnapshot/cow_writer.h b/fs_mgr/libsnapshot/include/libsnapshot/cow_writer.h
index a9efad8..f43ea68 100644
--- a/fs_mgr/libsnapshot/include/libsnapshot/cow_writer.h
+++ b/fs_mgr/libsnapshot/include/libsnapshot/cow_writer.h
@@ -36,6 +36,8 @@
 
     // Number of CowOperations in a cluster. 0 for no clustering. Cannot be 1.
     uint32_t cluster_ops = 200;
+
+    bool scratch_space = true;
 };
 
 // Interface for writing to a snapuserd COW. All operations are ordered; merges
@@ -100,13 +102,12 @@
     bool InitializeAppend(android::base::unique_fd&&, uint64_t label);
     bool InitializeAppend(android::base::borrowed_fd fd, uint64_t label);
 
-    void InitializeMerge(android::base::borrowed_fd fd, CowHeader* header);
-    bool CommitMerge(int merged_ops);
-
     bool Finalize() override;
 
     uint64_t GetCowSize() override;
 
+    uint32_t GetCowVersion() { return header_.major_version; }
+
   protected:
     virtual bool EmitCopy(uint64_t new_block, uint64_t old_block) override;
     virtual bool EmitRawBlocks(uint64_t new_block_start, const void* data, size_t size) override;
diff --git a/fs_mgr/libsnapshot/include/libsnapshot/mock_snapshot.h b/fs_mgr/libsnapshot/include/libsnapshot/mock_snapshot.h
index 1cb966b..94d5055 100644
--- a/fs_mgr/libsnapshot/include/libsnapshot/mock_snapshot.h
+++ b/fs_mgr/libsnapshot/include/libsnapshot/mock_snapshot.h
@@ -27,6 +27,7 @@
     MOCK_METHOD(bool, CancelUpdate, (), (override));
     MOCK_METHOD(bool, FinishedSnapshotWrites, (bool wipe), (override));
     MOCK_METHOD(void, UpdateCowStats, (ISnapshotMergeStats * stats), (override));
+    MOCK_METHOD(MergeFailureCode, ReadMergeFailureCode, (), (override));
     MOCK_METHOD(bool, InitiateMerge, (), (override));
 
     MOCK_METHOD(UpdateState, ProcessUpdateState,
diff --git a/fs_mgr/libsnapshot/include/libsnapshot/mock_snapshot_merge_stats.h b/fs_mgr/libsnapshot/include/libsnapshot/mock_snapshot_merge_stats.h
index ac2c787..067f99c 100644
--- a/fs_mgr/libsnapshot/include/libsnapshot/mock_snapshot_merge_stats.h
+++ b/fs_mgr/libsnapshot/include/libsnapshot/mock_snapshot_merge_stats.h
@@ -34,11 +34,13 @@
     MOCK_METHOD(void, set_estimated_cow_size_bytes, (uint64_t), (override));
     MOCK_METHOD(void, set_boot_complete_time_ms, (uint32_t), (override));
     MOCK_METHOD(void, set_boot_complete_to_merge_start_time_ms, (uint32_t), (override));
+    MOCK_METHOD(void, set_merge_failure_code, (MergeFailureCode), (override));
     MOCK_METHOD(uint64_t, cow_file_size, (), (override));
     MOCK_METHOD(uint64_t, total_cow_size_bytes, (), (override));
     MOCK_METHOD(uint64_t, estimated_cow_size_bytes, (), (override));
     MOCK_METHOD(uint32_t, boot_complete_time_ms, (), (override));
     MOCK_METHOD(uint32_t, boot_complete_to_merge_start_time_ms, (), (override));
+    MOCK_METHOD(MergeFailureCode, merge_failure_code, (), (override));
     MOCK_METHOD(std::unique_ptr<Result>, Finish, (), (override));
 
     using ISnapshotMergeStats::Result;
diff --git a/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h b/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h
index 81cb640..195b6f2 100644
--- a/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h
+++ b/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h
@@ -167,6 +167,10 @@
     virtual UpdateState ProcessUpdateState(const std::function<bool()>& callback = {},
                                            const std::function<bool()>& before_cancel = {}) = 0;
 
+    // If ProcessUpdateState() returned MergeFailed, this returns the appropriate
+    // code. Otherwise, MergeFailureCode::Ok is returned.
+    virtual MergeFailureCode ReadMergeFailureCode() = 0;
+
     // Find the status of the current update, if any.
     //
     // |progress| depends on the returned status:
@@ -332,6 +336,7 @@
     bool CancelUpdate() override;
     bool FinishedSnapshotWrites(bool wipe) override;
     void UpdateCowStats(ISnapshotMergeStats* stats) override;
+    MergeFailureCode ReadMergeFailureCode() override;
     bool InitiateMerge() override;
     UpdateState ProcessUpdateState(const std::function<bool()>& callback = {},
                                    const std::function<bool()>& before_cancel = {}) override;
diff --git a/fs_mgr/libsnapshot/include/libsnapshot/snapshot_stats.h b/fs_mgr/libsnapshot/include/libsnapshot/snapshot_stats.h
index e617d7a..4ce5077 100644
--- a/fs_mgr/libsnapshot/include/libsnapshot/snapshot_stats.h
+++ b/fs_mgr/libsnapshot/include/libsnapshot/snapshot_stats.h
@@ -34,11 +34,13 @@
     virtual void set_estimated_cow_size_bytes(uint64_t bytes) = 0;
     virtual void set_boot_complete_time_ms(uint32_t ms) = 0;
     virtual void set_boot_complete_to_merge_start_time_ms(uint32_t ms) = 0;
+    virtual void set_merge_failure_code(MergeFailureCode code) = 0;
     virtual uint64_t cow_file_size() = 0;
     virtual uint64_t total_cow_size_bytes() = 0;
     virtual uint64_t estimated_cow_size_bytes() = 0;
     virtual uint32_t boot_complete_time_ms() = 0;
     virtual uint32_t boot_complete_to_merge_start_time_ms() = 0;
+    virtual MergeFailureCode merge_failure_code() = 0;
 
     // Called when merge ends. Properly clean up permanent storage.
     class Result {
@@ -70,6 +72,8 @@
     uint32_t boot_complete_time_ms() override;
     void set_boot_complete_to_merge_start_time_ms(uint32_t ms) override;
     uint32_t boot_complete_to_merge_start_time_ms() override;
+    void set_merge_failure_code(MergeFailureCode code) override;
+    MergeFailureCode merge_failure_code() override;
     std::unique_ptr<Result> Finish() override;
 
   private:
diff --git a/fs_mgr/libsnapshot/include/libsnapshot/snapshot_stub.h b/fs_mgr/libsnapshot/include/libsnapshot/snapshot_stub.h
index cc75db8..a7cd939 100644
--- a/fs_mgr/libsnapshot/include/libsnapshot/snapshot_stub.h
+++ b/fs_mgr/libsnapshot/include/libsnapshot/snapshot_stub.h
@@ -29,6 +29,7 @@
     bool CancelUpdate() override;
     bool FinishedSnapshotWrites(bool wipe) override;
     void UpdateCowStats(ISnapshotMergeStats* stats) override;
+    MergeFailureCode ReadMergeFailureCode() override;
     bool InitiateMerge() override;
     UpdateState ProcessUpdateState(const std::function<bool()>& callback = {},
                                    const std::function<bool()>& before_cancel = {}) override;
diff --git a/fs_mgr/libsnapshot/include/libsnapshot/snapuserd_kernel.h b/fs_mgr/libsnapshot/include/libsnapshot/snapuserd_kernel.h
index 2b6c8ef..6bb7a39 100644
--- a/fs_mgr/libsnapshot/include/libsnapshot/snapuserd_kernel.h
+++ b/fs_mgr/libsnapshot/include/libsnapshot/snapuserd_kernel.h
@@ -47,9 +47,6 @@
 static constexpr uint32_t CHUNK_SIZE = 8;
 static constexpr uint32_t CHUNK_SHIFT = (__builtin_ffs(CHUNK_SIZE) - 1);
 
-static constexpr uint32_t BLOCK_SZ = 4096;
-static constexpr uint32_t BLOCK_SHIFT = (__builtin_ffs(BLOCK_SZ) - 1);
-
 #define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d))
 
 // This structure represents the kernel COW header.
diff --git a/fs_mgr/libsnapshot/inspect_cow.cpp b/fs_mgr/libsnapshot/inspect_cow.cpp
index 453b5c6..4a84fba 100644
--- a/fs_mgr/libsnapshot/inspect_cow.cpp
+++ b/fs_mgr/libsnapshot/inspect_cow.cpp
@@ -38,7 +38,8 @@
 static void usage(void) {
     LOG(ERROR) << "Usage: inspect_cow [-sd] <COW_FILE>";
     LOG(ERROR) << "\t -s Run Silent";
-    LOG(ERROR) << "\t -d Attempt to decompress\n";
+    LOG(ERROR) << "\t -d Attempt to decompress";
+    LOG(ERROR) << "\t -b Show data for failed decompress\n";
 }
 
 // Sink that always appends to the end of a string.
@@ -59,7 +60,25 @@
     std::string stream_;
 };
 
-static bool Inspect(const std::string& path, bool silent, bool decompress) {
+static void ShowBad(CowReader& reader, const struct CowOperation& op) {
+    size_t count;
+    auto buffer = std::make_unique<uint8_t[]>(op.data_length);
+
+    if (!reader.GetRawBytes(op.source, buffer.get(), op.data_length, &count)) {
+        std::cerr << "Failed to read at all!\n";
+    } else {
+        std::cout << "The Block data is:\n";
+        for (int i = 0; i < op.data_length; i++) {
+            std::cout << std::hex << (int)buffer[i];
+        }
+        std::cout << std::dec << "\n\n";
+        if (op.data_length >= sizeof(CowOperation)) {
+            std::cout << "The start, as an op, would be " << *(CowOperation*)buffer.get() << "\n";
+        }
+    }
+}
+
+static bool Inspect(const std::string& path, bool silent, bool decompress, bool show_bad) {
     android::base::unique_fd fd(open(path.c_str(), O_RDONLY));
     if (fd < 0) {
         PLOG(ERROR) << "open failed: " << path;
@@ -107,6 +126,7 @@
             if (!reader.ReadData(op, &sink)) {
                 std::cerr << "Failed to decompress for :" << op << "\n";
                 success = false;
+                if (show_bad) ShowBad(reader, op);
             }
             sink.Reset();
         }
@@ -124,7 +144,8 @@
     int ch;
     bool silent = false;
     bool decompress = false;
-    while ((ch = getopt(argc, argv, "sd")) != -1) {
+    bool show_bad = false;
+    while ((ch = getopt(argc, argv, "sdb")) != -1) {
         switch (ch) {
             case 's':
                 silent = true;
@@ -132,6 +153,9 @@
             case 'd':
                 decompress = true;
                 break;
+            case 'b':
+                show_bad = true;
+                break;
             default:
                 android::snapshot::usage();
         }
@@ -143,7 +167,7 @@
         return 1;
     }
 
-    if (!android::snapshot::Inspect(argv[optind], silent, decompress)) {
+    if (!android::snapshot::Inspect(argv[optind], silent, decompress, show_bad)) {
         return 1;
     }
     return 0;
diff --git a/fs_mgr/libsnapshot/snapshot.cpp b/fs_mgr/libsnapshot/snapshot.cpp
index 64a434c..8f3926a 100644
--- a/fs_mgr/libsnapshot/snapshot.cpp
+++ b/fs_mgr/libsnapshot/snapshot.cpp
@@ -2692,8 +2692,18 @@
     AutoDeviceList created_devices;
 
     const auto& dap_metadata = manifest.dynamic_partition_metadata();
-    bool use_compression =
-            IsCompressionEnabled() && dap_metadata.vabc_enabled() && !device_->IsRecovery();
+    CowOptions options;
+    CowWriter writer(options);
+    bool cow_format_support = true;
+    if (dap_metadata.cow_version() < writer.GetCowVersion()) {
+        cow_format_support = false;
+    }
+
+    LOG(INFO) << " dap_metadata.cow_version(): " << dap_metadata.cow_version()
+              << " writer.GetCowVersion(): " << writer.GetCowVersion();
+
+    bool use_compression = IsCompressionEnabled() && dap_metadata.vabc_enabled() &&
+                           !device_->IsRecovery() && cow_format_support;
 
     std::string compression_algorithm;
     if (use_compression) {
@@ -2960,7 +2970,13 @@
                 return Return::Error();
             }
 
-            CowWriter writer(CowOptions{.compression = it->second.compression_algorithm()});
+            CowOptions options;
+            if (device()->IsTestDevice()) {
+                options.scratch_space = false;
+            }
+            options.compression = it->second.compression_algorithm();
+
+            CowWriter writer(options);
             if (!writer.Initialize(fd) || !writer.Finalize()) {
                 LOG(ERROR) << "Could not initialize COW device for " << target_partition->name();
                 return Return::Error();
@@ -3069,6 +3085,10 @@
     CowOptions cow_options;
     cow_options.compression = status.compression_algorithm();
     cow_options.max_blocks = {status.device_size() / cow_options.block_size};
+    // Disable scratch space for vts tests
+    if (device()->IsTestDevice()) {
+        cow_options.scratch_space = false;
+    }
 
     // Currently we don't support partial snapshots, since partition_cow_creator
     // never creates this scenario.
@@ -3690,5 +3710,16 @@
     return false;
 }
 
+MergeFailureCode SnapshotManager::ReadMergeFailureCode() {
+    auto lock = LockExclusive();
+    if (!lock) return MergeFailureCode::AcquireLock;
+
+    SnapshotUpdateStatus status = ReadSnapshotUpdateStatus(lock.get());
+    if (status.state() != UpdateState::MergeFailed) {
+        return MergeFailureCode::Ok;
+    }
+    return status.merge_failure_code();
+}
+
 }  // namespace snapshot
 }  // namespace android
diff --git a/fs_mgr/libsnapshot/snapshot_reader_test.cpp b/fs_mgr/libsnapshot/snapshot_reader_test.cpp
index 4202d22..9373059 100644
--- a/fs_mgr/libsnapshot/snapshot_reader_test.cpp
+++ b/fs_mgr/libsnapshot/snapshot_reader_test.cpp
@@ -150,6 +150,7 @@
     CowOptions options;
     options.compression = "gz";
     options.max_blocks = {kBlockCount};
+    options.scratch_space = false;
 
     unique_fd cow_fd(dup(cow_->fd));
     ASSERT_GE(cow_fd, 0);
diff --git a/fs_mgr/libsnapshot/snapshot_stats.cpp b/fs_mgr/libsnapshot/snapshot_stats.cpp
index 7fcfcea..4a93d65 100644
--- a/fs_mgr/libsnapshot/snapshot_stats.cpp
+++ b/fs_mgr/libsnapshot/snapshot_stats.cpp
@@ -130,6 +130,14 @@
     return report_.boot_complete_to_merge_start_time_ms();
 }
 
+void SnapshotMergeStats::set_merge_failure_code(MergeFailureCode code) {
+    report_.set_merge_failure_code(code);
+}
+
+MergeFailureCode SnapshotMergeStats::merge_failure_code() {
+    return report_.merge_failure_code();
+}
+
 class SnapshotMergeStatsResultImpl : public SnapshotMergeStats::Result {
   public:
     SnapshotMergeStatsResultImpl(const SnapshotMergeReport& report,
diff --git a/fs_mgr/libsnapshot/snapshot_stub.cpp b/fs_mgr/libsnapshot/snapshot_stub.cpp
index 43825cc..1a9eda5 100644
--- a/fs_mgr/libsnapshot/snapshot_stub.cpp
+++ b/fs_mgr/libsnapshot/snapshot_stub.cpp
@@ -135,6 +135,8 @@
     uint32_t boot_complete_time_ms() override { return 0; }
     void set_boot_complete_to_merge_start_time_ms(uint32_t) override {}
     uint32_t boot_complete_to_merge_start_time_ms() override { return 0; }
+    void set_merge_failure_code(MergeFailureCode) override {}
+    MergeFailureCode merge_failure_code() { return MergeFailureCode::Ok; }
 };
 
 ISnapshotMergeStats* SnapshotManagerStub::GetSnapshotMergeStatsInstance() {
@@ -163,4 +165,9 @@
     LOG(ERROR) << __FUNCTION__ << " should never be called.";
 }
 
+auto SnapshotManagerStub::ReadMergeFailureCode() -> MergeFailureCode {
+    LOG(ERROR) << __FUNCTION__ << " should never be called.";
+    return MergeFailureCode::Ok;
+}
+
 }  // namespace android::snapshot
diff --git a/fs_mgr/libsnapshot/snapshot_test.cpp b/fs_mgr/libsnapshot/snapshot_test.cpp
index 3554dce..f18f176 100644
--- a/fs_mgr/libsnapshot/snapshot_test.cpp
+++ b/fs_mgr/libsnapshot/snapshot_test.cpp
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+#include <libsnapshot/cow_format.h>
 #include <libsnapshot/snapshot.h>
 
 #include <fcntl.h>
@@ -327,6 +328,7 @@
 
         auto dynamic_partition_metadata = manifest.mutable_dynamic_partition_metadata();
         dynamic_partition_metadata->set_vabc_enabled(IsCompressionEnabled());
+        dynamic_partition_metadata->set_cow_version(android::snapshot::kCowVersionMajor);
 
         auto group = dynamic_partition_metadata->add_groups();
         group->set_name("group");
@@ -853,6 +855,7 @@
 
         auto dynamic_partition_metadata = manifest_.mutable_dynamic_partition_metadata();
         dynamic_partition_metadata->set_vabc_enabled(IsCompressionEnabled());
+        dynamic_partition_metadata->set_cow_version(android::snapshot::kCowVersionMajor);
 
         // Create a fake update package metadata.
         // Not using full name "system", "vendor", "product" because these names collide with the
@@ -2244,9 +2247,27 @@
     void TearDown() override;
 
   private:
+    bool CreateFakeSuper();
+
     std::unique_ptr<IImageManager> super_images_;
 };
 
+bool SnapshotTestEnvironment::CreateFakeSuper() {
+    // Create and map the fake super partition.
+    static constexpr int kImageFlags =
+            IImageManager::CREATE_IMAGE_DEFAULT | IImageManager::CREATE_IMAGE_ZERO_FILL;
+    if (!super_images_->CreateBackingImage("fake-super", kSuperSize, kImageFlags)) {
+        LOG(ERROR) << "Could not create fake super partition";
+        return false;
+    }
+    if (!super_images_->MapImageDevice("fake-super", 10s, &fake_super)) {
+        LOG(ERROR) << "Could not map fake super partition";
+        return false;
+    }
+    test_device->set_fake_super(fake_super);
+    return true;
+}
+
 void SnapshotTestEnvironment::SetUp() {
     // b/163082876: GTEST_SKIP in Environment will make atest report incorrect results. Until
     // that is fixed, don't call GTEST_SKIP here, but instead call GTEST_SKIP in individual test
@@ -2272,27 +2293,36 @@
     sm = SnapshotManager::New(test_device);
     ASSERT_NE(nullptr, sm) << "Could not create snapshot manager";
 
+    // Use a separate image manager for our fake super partition.
+    super_images_ = IImageManager::Open("ota/test/super", 10s);
+    ASSERT_NE(nullptr, super_images_) << "Could not create image manager";
+
+    // Map the old image if one exists so we can safely unmap everything that
+    // depends on it.
+    bool recreate_fake_super;
+    if (super_images_->BackingImageExists("fake-super")) {
+        if (super_images_->IsImageMapped("fake-super")) {
+            ASSERT_TRUE(super_images_->GetMappedImageDevice("fake-super", &fake_super));
+        } else {
+            ASSERT_TRUE(super_images_->MapImageDevice("fake-super", 10s, &fake_super));
+        }
+        test_device->set_fake_super(fake_super);
+        recreate_fake_super = true;
+    } else {
+        ASSERT_TRUE(CreateFakeSuper());
+        recreate_fake_super = false;
+    }
+
     // Clean up previous run.
     MetadataMountedTest().TearDown();
     SnapshotUpdateTest().Cleanup();
     SnapshotTest().Cleanup();
 
-    // Use a separate image manager for our fake super partition.
-    super_images_ = IImageManager::Open("ota/test/super", 10s);
-    ASSERT_NE(nullptr, super_images_) << "Could not create image manager";
-
-    // Clean up any old copy.
-    DeleteBackingImage(super_images_.get(), "fake-super");
-
-    // Create and map the fake super partition.
-    static constexpr int kImageFlags =
-            IImageManager::CREATE_IMAGE_DEFAULT | IImageManager::CREATE_IMAGE_ZERO_FILL;
-    ASSERT_TRUE(super_images_->CreateBackingImage("fake-super", kSuperSize, kImageFlags))
-            << "Could not create fake super partition";
-
-    ASSERT_TRUE(super_images_->MapImageDevice("fake-super", 10s, &fake_super))
-            << "Could not map fake super partition";
-    test_device->set_fake_super(fake_super);
+    if (recreate_fake_super) {
+        // Clean up any old copy.
+        DeleteBackingImage(super_images_.get(), "fake-super");
+        ASSERT_TRUE(CreateFakeSuper());
+    }
 }
 
 void SnapshotTestEnvironment::TearDown() {
diff --git a/fs_mgr/libsnapshot/snapshot_writer.cpp b/fs_mgr/libsnapshot/snapshot_writer.cpp
index 8e379e4..080f3b7 100644
--- a/fs_mgr/libsnapshot/snapshot_writer.cpp
+++ b/fs_mgr/libsnapshot/snapshot_writer.cpp
@@ -90,7 +90,9 @@
     }
 
     const auto& cow_options = options();
-    reader->SetBlockDeviceSize(*cow_options.max_blocks * cow_options.block_size);
+    if (cow_options.max_blocks) {
+        reader->SetBlockDeviceSize(*cow_options.max_blocks * cow_options.block_size);
+    }
 
     return reader;
 }
diff --git a/fs_mgr/libsnapshot/snapshot_writer_test.cpp b/fs_mgr/libsnapshot/snapshot_writer_test.cpp
new file mode 100644
index 0000000..da48eb9
--- /dev/null
+++ b/fs_mgr/libsnapshot/snapshot_writer_test.cpp
@@ -0,0 +1,62 @@
+//
+// Copyright (C) 2021 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+#include <libsnapshot/snapshot.h>
+
+#include <unordered_set>
+
+#include <android-base/file.h>
+#include <gtest/gtest.h>
+#include <libsnapshot/snapshot_writer.h>
+#include <payload_consumer/file_descriptor.h>
+
+namespace android::snapshot {
+class CompressedSnapshotWriterTest : public ::testing::Test {
+  public:
+    static constexpr size_t BLOCK_SIZE = 4096;
+};
+
+TEST_F(CompressedSnapshotWriterTest, ReadAfterWrite) {
+    TemporaryFile cow_device_file{};
+    android::snapshot::CowOptions options{.block_size = BLOCK_SIZE};
+    android::snapshot::CompressedSnapshotWriter snapshot_writer{options};
+    snapshot_writer.SetCowDevice(android::base::unique_fd{cow_device_file.fd});
+    snapshot_writer.Initialize();
+    std::vector<unsigned char> buffer;
+    buffer.resize(BLOCK_SIZE);
+    std::fill(buffer.begin(), buffer.end(), 123);
+
+    ASSERT_TRUE(snapshot_writer.AddRawBlocks(0, buffer.data(), buffer.size()));
+    ASSERT_TRUE(snapshot_writer.Finalize());
+    auto cow_reader = snapshot_writer.OpenReader();
+    ASSERT_NE(cow_reader, nullptr);
+    ASSERT_TRUE(snapshot_writer.AddRawBlocks(1, buffer.data(), buffer.size()));
+    ASSERT_TRUE(snapshot_writer.AddRawBlocks(2, buffer.data(), buffer.size()));
+    ASSERT_TRUE(snapshot_writer.Finalize());
+    // After wrigin some data, if we call OpenReader() again, writes should
+    // be visible to the newly opened reader. update_engine relies on this
+    // behavior for verity writes.
+    cow_reader = snapshot_writer.OpenReader();
+    ASSERT_NE(cow_reader, nullptr);
+    std::vector<unsigned char> read_back;
+    read_back.resize(buffer.size());
+    cow_reader->Seek(BLOCK_SIZE, SEEK_SET);
+    const auto bytes_read = cow_reader->Read(read_back.data(), read_back.size());
+    ASSERT_EQ((size_t)(bytes_read), BLOCK_SIZE);
+    ASSERT_EQ(read_back, buffer);
+}
+
+}  // namespace android::snapshot
diff --git a/fs_mgr/libsnapshot/snapuserd.cpp b/fs_mgr/libsnapshot/snapuserd.cpp
index 5ef9e29..2ccc750 100644
--- a/fs_mgr/libsnapshot/snapuserd.cpp
+++ b/fs_mgr/libsnapshot/snapuserd.cpp
@@ -47,28 +47,202 @@
 
         worker_threads_.push_back(std::move(wt));
     }
+
+    read_ahead_thread_ = std::make_unique<ReadAheadThread>(cow_device_, backing_store_device_,
+                                                           misc_name_, GetSharedPtr());
     return true;
 }
 
 bool Snapuserd::CommitMerge(int num_merge_ops) {
-    {
-        std::lock_guard<std::mutex> lock(lock_);
-        CowHeader header;
+    struct CowHeader* ch = reinterpret_cast<struct CowHeader*>(mapped_addr_);
+    ch->num_merge_ops += num_merge_ops;
 
-        reader_->GetHeader(&header);
-        header.num_merge_ops += num_merge_ops;
-        reader_->UpdateMergeProgress(num_merge_ops);
-        if (!writer_->CommitMerge(num_merge_ops)) {
-            SNAP_LOG(ERROR) << "CommitMerge failed... merged_ops_cur_iter: " << num_merge_ops
-                            << " Total-merged-ops: " << header.num_merge_ops;
-            return false;
-        }
-        merge_initiated_ = true;
+    if (read_ahead_feature_ && read_ahead_ops_.size() > 0) {
+        struct BufferState* ra_state = GetBufferState();
+        ra_state->read_ahead_state = kCowReadAheadInProgress;
     }
 
+    int ret = msync(mapped_addr_, BLOCK_SZ, MS_SYNC);
+    if (ret < 0) {
+        PLOG(ERROR) << "msync header failed: " << ret;
+        return false;
+    }
+
+    merge_initiated_ = true;
+
     return true;
 }
 
+void Snapuserd::PrepareReadAhead() {
+    if (!read_ahead_feature_) {
+        return;
+    }
+
+    struct BufferState* ra_state = GetBufferState();
+    // Check if the data has to be re-constructed from COW device
+    if (ra_state->read_ahead_state == kCowReadAheadDone) {
+        populate_data_from_cow_ = true;
+    } else {
+        populate_data_from_cow_ = false;
+    }
+
+    StartReadAhead();
+}
+
+bool Snapuserd::GetRABuffer(std::unique_lock<std::mutex>* lock, uint64_t block, void* buffer) {
+    CHECK(lock->owns_lock());
+    std::unordered_map<uint64_t, void*>::iterator it = read_ahead_buffer_map_.find(block);
+
+    // This will be true only for IO's generated as part of reading a root
+    // filesystem. IO's related to merge should always be in read-ahead cache.
+    if (it == read_ahead_buffer_map_.end()) {
+        return false;
+    }
+
+    // Theoretically, we can send the data back from the read-ahead buffer
+    // all the way to the kernel without memcpy. However, if the IO is
+    // un-aligned, the wrapper function will need to touch the read-ahead
+    // buffers and transitions will be bit more complicated.
+    memcpy(buffer, it->second, BLOCK_SZ);
+    return true;
+}
+
+// ========== State transition functions for read-ahead operations ===========
+
+bool Snapuserd::GetReadAheadPopulatedBuffer(uint64_t block, void* buffer) {
+    if (!read_ahead_feature_) {
+        return false;
+    }
+
+    {
+        std::unique_lock<std::mutex> lock(lock_);
+        if (io_state_ == READ_AHEAD_IO_TRANSITION::READ_AHEAD_FAILURE) {
+            return false;
+        }
+
+        if (io_state_ == READ_AHEAD_IO_TRANSITION::IO_IN_PROGRESS) {
+            return GetRABuffer(&lock, block, buffer);
+        }
+    }
+
+    {
+        // Read-ahead thread IO is in-progress. Wait for it to complete
+        std::unique_lock<std::mutex> lock(lock_);
+        while (!(io_state_ == READ_AHEAD_IO_TRANSITION::READ_AHEAD_FAILURE ||
+                 io_state_ == READ_AHEAD_IO_TRANSITION::IO_IN_PROGRESS)) {
+            cv.wait(lock);
+        }
+
+        return GetRABuffer(&lock, block, buffer);
+    }
+}
+
+// This is invoked by read-ahead thread waiting for merge IO's
+// to complete
+bool Snapuserd::WaitForMergeToComplete() {
+    {
+        std::unique_lock<std::mutex> lock(lock_);
+        while (!(io_state_ == READ_AHEAD_IO_TRANSITION::READ_AHEAD_BEGIN ||
+                 io_state_ == READ_AHEAD_IO_TRANSITION::IO_TERMINATED)) {
+            cv.wait(lock);
+        }
+
+        if (io_state_ == READ_AHEAD_IO_TRANSITION::IO_TERMINATED) {
+            return false;
+        }
+
+        io_state_ = READ_AHEAD_IO_TRANSITION::READ_AHEAD_IN_PROGRESS;
+        return true;
+    }
+}
+
+// This is invoked during the launch of worker threads. We wait
+// for read-ahead thread to by fully up before worker threads
+// are launched; else we will have a race between worker threads
+// and read-ahead thread specifically during re-construction.
+bool Snapuserd::WaitForReadAheadToStart() {
+    {
+        std::unique_lock<std::mutex> lock(lock_);
+        while (!(io_state_ == READ_AHEAD_IO_TRANSITION::IO_IN_PROGRESS ||
+                 io_state_ == READ_AHEAD_IO_TRANSITION::READ_AHEAD_FAILURE)) {
+            cv.wait(lock);
+        }
+
+        if (io_state_ == READ_AHEAD_IO_TRANSITION::READ_AHEAD_FAILURE) {
+            return false;
+        }
+
+        return true;
+    }
+}
+
+// Invoked by worker threads when a sequence of merge operation
+// is complete notifying read-ahead thread to make forward
+// progress.
+void Snapuserd::StartReadAhead() {
+    {
+        std::lock_guard<std::mutex> lock(lock_);
+        io_state_ = READ_AHEAD_IO_TRANSITION::READ_AHEAD_BEGIN;
+    }
+
+    cv.notify_one();
+}
+
+void Snapuserd::MergeCompleted() {
+    {
+        std::lock_guard<std::mutex> lock(lock_);
+        io_state_ = READ_AHEAD_IO_TRANSITION::IO_TERMINATED;
+    }
+
+    cv.notify_one();
+}
+
+bool Snapuserd::ReadAheadIOCompleted(bool sync) {
+    if (sync) {
+        // Flush the entire buffer region
+        int ret = msync(mapped_addr_, total_mapped_addr_length_, MS_SYNC);
+        if (ret < 0) {
+            PLOG(ERROR) << "msync failed after ReadAheadIOCompleted: " << ret;
+            return false;
+        }
+
+        // Metadata and data are synced. Now, update the state.
+        // We need to update the state after flushing data; if there is a crash
+        // when read-ahead IO is in progress, the state of data in the COW file
+        // is unknown. kCowReadAheadDone acts as a checkpoint wherein the data
+        // in the scratch space is good and during next reboot, read-ahead thread
+        // can safely re-construct the data.
+        struct BufferState* ra_state = GetBufferState();
+        ra_state->read_ahead_state = kCowReadAheadDone;
+
+        ret = msync(mapped_addr_, BLOCK_SZ, MS_SYNC);
+        if (ret < 0) {
+            PLOG(ERROR) << "msync failed to flush Readahead completion state...";
+            return false;
+        }
+    }
+
+    // Notify the worker threads
+    {
+        std::lock_guard<std::mutex> lock(lock_);
+        io_state_ = READ_AHEAD_IO_TRANSITION::IO_IN_PROGRESS;
+    }
+
+    cv.notify_all();
+    return true;
+}
+
+void Snapuserd::ReadAheadIOFailed() {
+    {
+        std::lock_guard<std::mutex> lock(lock_);
+        io_state_ = READ_AHEAD_IO_TRANSITION::READ_AHEAD_FAILURE;
+    }
+
+    cv.notify_all();
+}
+
+//========== End of state transition functions ====================
+
 bool Snapuserd::IsChunkIdMetadata(chunk_t chunk) {
     uint32_t stride = exceptions_per_area_ + 1;
     lldiv_t divresult = lldiv(chunk, stride);
@@ -93,9 +267,9 @@
         return;
     }
 
-    CowHeader header;
-    reader_->GetHeader(&header);
-    SNAP_LOG(INFO) << "Merge-status: Total-Merged-ops: " << header.num_merge_ops
+    struct CowHeader* ch = reinterpret_cast<struct CowHeader*>(mapped_addr_);
+
+    SNAP_LOG(INFO) << "Merge-status: Total-Merged-ops: " << ch->num_merge_ops
                    << " Total-data-ops: " << reader_->total_data_ops();
 }
 
@@ -175,8 +349,10 @@
     reader_->InitializeMerge();
     SNAP_LOG(DEBUG) << "Merge-ops: " << header.num_merge_ops;
 
-    writer_ = std::make_unique<CowWriter>(options);
-    writer_->InitializeMerge(cow_fd_.get(), &header);
+    if (!MmapMetadata()) {
+        SNAP_LOG(ERROR) << "mmap failed";
+        return false;
+    }
 
     // Initialize the iterator for reading metadata
     cowop_riter_ = reader_->GetRevOpIter();
@@ -258,13 +434,15 @@
         data_chunk_id = GetNextAllocatableChunkId(data_chunk_id);
     }
 
+    int num_ra_ops_per_iter = ((GetBufferDataSize()) / BLOCK_SZ);
     std::optional<chunk_t> prev_id = {};
     std::map<uint64_t, const CowOperation*> map;
-    std::set<uint64_t> dest_blocks;
     size_t pending_copy_ops = exceptions_per_area_ - num_ops;
-    SNAP_LOG(INFO) << " Processing copy-ops at Area: " << vec_.size()
-                   << " Number of replace/zero ops completed in this area: " << num_ops
-                   << " Pending copy ops for this area: " << pending_copy_ops;
+    uint64_t total_copy_ops = reader_->total_copy_ops();
+
+    SNAP_LOG(DEBUG) << " Processing copy-ops at Area: " << vec_.size()
+                    << " Number of replace/zero ops completed in this area: " << num_ops
+                    << " Pending copy ops for this area: " << pending_copy_ops;
     while (!cowop_riter_->Done()) {
         do {
             const CowOperation* cow_op = &cowop_riter_->Get();
@@ -300,41 +478,20 @@
             // Op-6: 15 -> 18
             //
             // Note that the blocks numbers are contiguous. Hence, all 6 copy
-            // operations can potentially be batch merged. However, that will be
+            // operations can be batch merged. However, that will be
             // problematic if we have a crash as block 20, 19, 18 would have
             // been overwritten and hence subsequent recovery may end up with
             // a silent data corruption when op-1, op-2 and op-3 are
             // re-executed.
             //
-            // We will split these 6 operations into two batches viz:
-            //
-            // Batch-1:
-            // ===================
-            // Op-1: 20 -> 23
-            // Op-2: 19 -> 22
-            // Op-3: 18 -> 21
-            // ===================
-            //
-            // Batch-2:
-            // ==================
-            // Op-4: 17 -> 20
-            // Op-5: 16 -> 19
-            // Op-6: 15 -> 18
-            // ==================
-            //
-            // Now, merge sequence will look like:
-            //
-            // 1: Merge Batch-1 { op-1, op-2, op-3 }
-            // 2: Update Metadata in COW File that op-1, op-2, op-3 merge is
-            // done.
-            // 3: Merge Batch-2
-            // 4: Update Metadata in COW File that op-4, op-5, op-6 merge is
-            // done.
-            //
-            // Note, that the order of block operations are still the same.
-            // However, we have two batch merge operations. Any crash between
-            // either of this sequence should be safe as each of these
-            // batches are self-contained.
+            // To address the above problem, read-ahead thread will
+            // read all the 6 source blocks, cache them in the scratch
+            // space of the COW file. During merge, read-ahead
+            // thread will serve the blocks from the read-ahead cache.
+            // If there is a crash during merge; on subsequent reboot,
+            // read-ahead thread will recover the data from the
+            // scratch space and re-construct it thereby there
+            // is no loss of data.
             //
             //===========================================================
             //
@@ -398,14 +555,10 @@
                 if (diff != 1) {
                     break;
                 }
-                if (dest_blocks.count(cow_op->new_block) || map.count(cow_op->source) > 0) {
-                    break;
-                }
             }
             metadata_found = true;
             pending_copy_ops -= 1;
             map[cow_op->new_block] = cow_op;
-            dest_blocks.insert(cow_op->source);
             prev_id = cow_op->new_block;
             cowop_riter_->Next();
         } while (!cowop_riter_->Done() && pending_copy_ops);
@@ -426,6 +579,9 @@
             offset += sizeof(struct disk_exception);
             num_ops += 1;
             copy_ops++;
+            if (read_ahead_feature_) {
+                read_ahead_ops_.push_back(it->second);
+            }
 
             SNAP_LOG(DEBUG) << num_ops << ":"
                             << " Copy-op: "
@@ -453,9 +609,17 @@
             }
 
             data_chunk_id = GetNextAllocatableChunkId(data_chunk_id);
+            total_copy_ops -= 1;
+            /*
+             * Split the number of ops based on the size of read-ahead buffer
+             * region. We need to ensure that kernel doesn't issue IO on blocks
+             * which are not read by the read-ahead thread.
+             */
+            if (read_ahead_feature_ && (total_copy_ops % num_ra_ops_per_iter == 0)) {
+                data_chunk_id = GetNextAllocatableChunkId(data_chunk_id);
+            }
         }
         map.clear();
-        dest_blocks.clear();
         prev_id.reset();
     }
 
@@ -470,6 +634,7 @@
 
     chunk_vec_.shrink_to_fit();
     vec_.shrink_to_fit();
+    read_ahead_ops_.shrink_to_fit();
 
     // Sort the vector based on sectors as we need this during un-aligned access
     std::sort(chunk_vec_.begin(), chunk_vec_.end(), compare);
@@ -484,9 +649,41 @@
     // Total number of sectors required for creating dm-user device
     num_sectors_ = ChunkToSector(data_chunk_id);
     merge_initiated_ = false;
+    PrepareReadAhead();
+
     return true;
 }
 
+bool Snapuserd::MmapMetadata() {
+    CowHeader header;
+    reader_->GetHeader(&header);
+
+    if (header.major_version >= 2 && header.buffer_size > 0) {
+        total_mapped_addr_length_ = header.header_size + BUFFER_REGION_DEFAULT_SIZE;
+        read_ahead_feature_ = true;
+    } else {
+        // mmap the first 4k page - older COW format
+        total_mapped_addr_length_ = BLOCK_SZ;
+        read_ahead_feature_ = false;
+    }
+
+    mapped_addr_ = mmap(NULL, total_mapped_addr_length_, PROT_READ | PROT_WRITE, MAP_SHARED,
+                        cow_fd_.get(), 0);
+    if (mapped_addr_ == MAP_FAILED) {
+        SNAP_LOG(ERROR) << "mmap metadata failed";
+        return false;
+    }
+
+    return true;
+}
+
+void Snapuserd::UnmapBufferRegion() {
+    int ret = munmap(mapped_addr_, total_mapped_addr_length_);
+    if (ret < 0) {
+        SNAP_PLOG(ERROR) << "munmap failed";
+    }
+}
+
 void MyLogger(android::base::LogId, android::base::LogSeverity severity, const char*, const char*,
               unsigned int, const char* message) {
     if (severity == android::base::ERROR) {
@@ -507,11 +704,28 @@
 }
 
 /*
- * Entry point to launch worker threads
+ * Entry point to launch threads
  */
 bool Snapuserd::Start() {
     std::vector<std::future<bool>> threads;
+    std::future<bool> ra_thread;
+    bool rathread = (read_ahead_feature_ && (read_ahead_ops_.size() > 0));
 
+    // Start the read-ahead thread and wait
+    // for it as the data has to be re-constructed
+    // from COW device.
+    if (rathread) {
+        ra_thread = std::async(std::launch::async, &ReadAheadThread::RunThread,
+                               read_ahead_thread_.get());
+        if (!WaitForReadAheadToStart()) {
+            SNAP_LOG(ERROR) << "Failed to start Read-ahead thread...";
+            return false;
+        }
+
+        SNAP_LOG(INFO) << "Read-ahead thread started...";
+    }
+
+    // Launch worker threads
     for (int i = 0; i < worker_threads_.size(); i++) {
         threads.emplace_back(
                 std::async(std::launch::async, &WorkerThread::RunThread, worker_threads_[i].get()));
@@ -522,8 +736,69 @@
         ret = t.get() && ret;
     }
 
+    if (rathread) {
+        // Notify the read-ahead thread that all worker threads
+        // are done. We need this explicit notification when
+        // there is an IO failure or there was a switch
+        // of dm-user table; thus, forcing the read-ahead
+        // thread to wake up.
+        MergeCompleted();
+        ret = ret && ra_thread.get();
+    }
+
     return ret;
 }
 
+uint64_t Snapuserd::GetBufferMetadataOffset() {
+    CowHeader header;
+    reader_->GetHeader(&header);
+
+    size_t size = header.header_size + sizeof(BufferState);
+    return size;
+}
+
+/*
+ * Metadata for read-ahead is 16 bytes. For a 2 MB region, we will
+ * end up with 8k (2 PAGE) worth of metadata. Thus, a 2MB buffer
+ * region is split into:
+ *
+ * 1: 8k metadata
+ *
+ */
+size_t Snapuserd::GetBufferMetadataSize() {
+    CowHeader header;
+    reader_->GetHeader(&header);
+
+    size_t metadata_bytes = (header.buffer_size * sizeof(struct ScratchMetadata)) / BLOCK_SZ;
+    return metadata_bytes;
+}
+
+size_t Snapuserd::GetBufferDataOffset() {
+    CowHeader header;
+    reader_->GetHeader(&header);
+
+    return (header.header_size + GetBufferMetadataSize());
+}
+
+/*
+ * (2MB - 8K = 2088960 bytes) will be the buffer region to hold the data.
+ */
+size_t Snapuserd::GetBufferDataSize() {
+    CowHeader header;
+    reader_->GetHeader(&header);
+
+    size_t size = header.buffer_size - GetBufferMetadataSize();
+    return size;
+}
+
+struct BufferState* Snapuserd::GetBufferState() {
+    CowHeader header;
+    reader_->GetHeader(&header);
+
+    struct BufferState* ra_state =
+            reinterpret_cast<struct BufferState*>((char*)mapped_addr_ + header.header_size);
+    return ra_state;
+}
+
 }  // namespace snapshot
 }  // namespace android
diff --git a/fs_mgr/libsnapshot/snapuserd.h b/fs_mgr/libsnapshot/snapuserd.h
index 87c5528..0a5ab50 100644
--- a/fs_mgr/libsnapshot/snapuserd.h
+++ b/fs_mgr/libsnapshot/snapuserd.h
@@ -17,8 +17,10 @@
 #include <linux/types.h>
 #include <stdint.h>
 #include <stdlib.h>
+#include <sys/mman.h>
 
 #include <bitset>
+#include <condition_variable>
 #include <csignal>
 #include <cstring>
 #include <future>
@@ -29,6 +31,7 @@
 #include <string>
 #include <thread>
 #include <unordered_map>
+#include <unordered_set>
 #include <vector>
 
 #include <android-base/file.h>
@@ -56,6 +59,35 @@
  */
 static constexpr int NUM_THREADS_PER_PARTITION = 4;
 
+/*
+ * State transitions between worker threads and read-ahead
+ * threads.
+ *
+ * READ_AHEAD_BEGIN: Worker threads initiates the read-ahead
+ *                   thread to begin reading the copy operations
+ *                   for each bounded region.
+ *
+ * READ_AHEAD_IN_PROGRESS: When read ahead thread is in-flight
+ *                         and reading the copy operations.
+ *
+ * IO_IN_PROGRESS: Merge operation is in-progress by worker threads.
+ *
+ * IO_TERMINATED: When all the worker threads are done, request the
+ *                read-ahead thread to terminate
+ *
+ * READ_AHEAD_FAILURE: If there are any IO failures when read-ahead
+ *                     thread is reading from COW device.
+ *
+ * The transition of each states is described in snapuserd_readahead.cpp
+ */
+enum class READ_AHEAD_IO_TRANSITION {
+    READ_AHEAD_BEGIN,
+    READ_AHEAD_IN_PROGRESS,
+    IO_IN_PROGRESS,
+    IO_TERMINATED,
+    READ_AHEAD_FAILURE,
+};
+
 class BufferSink : public IByteSink {
   public:
     void Initialize(size_t size);
@@ -76,6 +108,47 @@
 
 class Snapuserd;
 
+class ReadAheadThread {
+  public:
+    ReadAheadThread(const std::string& cow_device, const std::string& backing_device,
+                    const std::string& misc_name, std::shared_ptr<Snapuserd> snapuserd);
+    bool RunThread();
+
+  private:
+    void InitializeIter();
+    bool IterDone();
+    void IterNext();
+    const CowOperation* GetIterOp();
+    void InitializeBuffer();
+
+    bool InitializeFds();
+    void CloseFds() {
+        cow_fd_ = {};
+        backing_store_fd_ = {};
+    }
+
+    bool ReadAheadIOStart();
+    void PrepareReadAhead(uint64_t* source_block, int* pending_ops, std::vector<uint64_t>& blocks);
+    bool ReconstructDataFromCow();
+    void CheckOverlap(const CowOperation* cow_op);
+
+    void* read_ahead_buffer_;
+    void* metadata_buffer_;
+    std::vector<const CowOperation*>::reverse_iterator read_ahead_iter_;
+    std::string cow_device_;
+    std::string backing_store_device_;
+    std::string misc_name_;
+
+    unique_fd cow_fd_;
+    unique_fd backing_store_fd_;
+
+    std::shared_ptr<Snapuserd> snapuserd_;
+
+    std::unordered_set<uint64_t> dest_blocks_;
+    std::unordered_set<uint64_t> source_blocks_;
+    bool overlap_;
+};
+
 class WorkerThread {
   public:
     WorkerThread(const std::string& cow_device, const std::string& backing_device,
@@ -116,12 +189,16 @@
     bool ProcessCopyOp(const CowOperation* cow_op);
     bool ProcessZeroOp();
 
+    bool ReadFromBaseDevice(const CowOperation* cow_op);
+    bool GetReadAheadPopulatedBuffer(const CowOperation* cow_op);
+
     // Merge related functions
     bool ProcessMergeComplete(chunk_t chunk, void* buffer);
     loff_t GetMergeStartOffset(void* merged_buffer, void* unmerged_buffer,
                                int* unmerged_exceptions);
+
     int GetNumberOfMergedOps(void* merged_buffer, void* unmerged_buffer, loff_t offset,
-                             int unmerged_exceptions);
+                             int unmerged_exceptions, bool* copy_op, bool* commit);
 
     sector_t ChunkToSector(chunk_t chunk) { return chunk << CHUNK_SHIFT; }
     chunk_t SectorToChunk(sector_t sector) { return sector >> CHUNK_SHIFT; }
@@ -158,7 +235,10 @@
     bool CommitMerge(int num_merge_ops);
 
     void CloseFds() { cow_fd_ = {}; }
-    void FreeResources() { worker_threads_.clear(); }
+    void FreeResources() {
+        worker_threads_.clear();
+        read_ahead_thread_ = nullptr;
+    }
     size_t GetMetadataAreaSize() { return vec_.size(); }
     void* GetExceptionBuffer(size_t i) { return vec_[i].get(); }
 
@@ -173,16 +253,47 @@
         return p1.first < p2.first;
     }
 
-  private:
-    std::vector<std::unique_ptr<WorkerThread>> worker_threads_;
+    void UnmapBufferRegion();
+    bool MmapMetadata();
 
+    // Read-ahead related functions
+    std::vector<const CowOperation*>& GetReadAheadOpsVec() { return read_ahead_ops_; }
+    std::unordered_map<uint64_t, void*>& GetReadAheadMap() { return read_ahead_buffer_map_; }
+    void* GetMappedAddr() { return mapped_addr_; }
+    bool IsReadAheadFeaturePresent() { return read_ahead_feature_; }
+    void PrepareReadAhead();
+    void StartReadAhead();
+    void MergeCompleted();
+    bool ReadAheadIOCompleted(bool sync);
+    void ReadAheadIOFailed();
+    bool WaitForMergeToComplete();
+    bool GetReadAheadPopulatedBuffer(uint64_t block, void* buffer);
+    bool ReconstructDataFromCow() { return populate_data_from_cow_; }
+    void ReconstructDataFromCowFinish() { populate_data_from_cow_ = false; }
+    bool WaitForReadAheadToStart();
+
+    uint64_t GetBufferMetadataOffset();
+    size_t GetBufferMetadataSize();
+    size_t GetBufferDataOffset();
+    size_t GetBufferDataSize();
+
+    // Final block to be merged in a given read-ahead buffer region
+    void SetFinalBlockMerged(uint64_t x) { final_block_merged_ = x; }
+    uint64_t GetFinalBlockMerged() { return final_block_merged_; }
+    // Total number of blocks to be merged in a given read-ahead buffer region
+    void SetTotalRaBlocksMerged(int x) { total_ra_blocks_merged_ = x; }
+    int GetTotalRaBlocksMerged() { return total_ra_blocks_merged_; }
+
+  private:
     bool IsChunkIdMetadata(chunk_t chunk);
     chunk_t GetNextAllocatableChunkId(chunk_t chunk_id);
 
+    bool GetRABuffer(std::unique_lock<std::mutex>* lock, uint64_t block, void* buffer);
     bool ReadMetadata();
     sector_t ChunkToSector(chunk_t chunk) { return chunk << CHUNK_SHIFT; }
     chunk_t SectorToChunk(sector_t sector) { return sector >> CHUNK_SHIFT; }
     bool IsBlockAligned(int read_size) { return ((read_size & (BLOCK_SZ - 1)) == 0); }
+    struct BufferState* GetBufferState();
 
     std::string cow_device_;
     std::string backing_store_device_;
@@ -197,7 +308,6 @@
     std::unique_ptr<ICowOpIter> cowop_iter_;
     std::unique_ptr<ICowOpReverseIter> cowop_riter_;
     std::unique_ptr<CowReader> reader_;
-    std::unique_ptr<CowWriter> writer_;
 
     // Vector of disk exception which is a
     // mapping of old-chunk to new-chunk
@@ -208,6 +318,21 @@
     std::vector<std::pair<sector_t, const CowOperation*>> chunk_vec_;
 
     std::mutex lock_;
+    std::condition_variable cv;
+
+    void* mapped_addr_;
+    size_t total_mapped_addr_length_;
+
+    std::vector<std::unique_ptr<WorkerThread>> worker_threads_;
+    // Read-ahead related
+    std::unordered_map<uint64_t, void*> read_ahead_buffer_map_;
+    std::vector<const CowOperation*> read_ahead_ops_;
+    bool populate_data_from_cow_ = false;
+    bool read_ahead_feature_;
+    uint64_t final_block_merged_;
+    int total_ra_blocks_merged_ = 0;
+    READ_AHEAD_IO_TRANSITION io_state_;
+    std::unique_ptr<ReadAheadThread> read_ahead_thread_;
 
     bool merge_initiated_ = false;
     bool attached_ = false;
diff --git a/fs_mgr/libsnapshot/snapuserd_client.cpp b/fs_mgr/libsnapshot/snapuserd_client.cpp
index 16d02e4..41ab344 100644
--- a/fs_mgr/libsnapshot/snapuserd_client.cpp
+++ b/fs_mgr/libsnapshot/snapuserd_client.cpp
@@ -113,7 +113,7 @@
 
 bool SnapuserdClient::Sendmsg(const std::string& msg) {
     LOG(DEBUG) << "Sendmsg: msg " << msg << " sockfd: " << sockfd_;
-    ssize_t numBytesSent = TEMP_FAILURE_RETRY(send(sockfd_, msg.data(), msg.size(), 0));
+    ssize_t numBytesSent = TEMP_FAILURE_RETRY(send(sockfd_, msg.data(), msg.size(), MSG_NOSIGNAL));
     if (numBytesSent < 0) {
         PLOG(ERROR) << "Send failed";
         return false;
diff --git a/fs_mgr/libsnapshot/snapuserd_readahead.cpp b/fs_mgr/libsnapshot/snapuserd_readahead.cpp
new file mode 100644
index 0000000..09ee2f2
--- /dev/null
+++ b/fs_mgr/libsnapshot/snapuserd_readahead.cpp
@@ -0,0 +1,460 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "snapuserd.h"
+
+#include <csignal>
+#include <optional>
+#include <set>
+
+#include <libsnapshot/snapuserd_client.h>
+
+namespace android {
+namespace snapshot {
+
+using namespace android;
+using namespace android::dm;
+using android::base::unique_fd;
+
+#define SNAP_LOG(level) LOG(level) << misc_name_ << ": "
+#define SNAP_PLOG(level) PLOG(level) << misc_name_ << ": "
+
+/*
+ * Merging a copy operation involves the following flow:
+ *
+ * 1: dm-snapshot layer requests merge for a 4k block. dm-user sends the request
+ *    to the daemon
+ * 2: daemon reads the source block
+ * 3: daemon copies the source data
+ * 4: IO completion sent back to dm-user (a switch from user space to kernel)
+ * 5: dm-snapshot merges the data to base device
+ * 6: dm-snapshot sends the merge-completion IO to dm-user
+ * 7: dm-user re-directs the merge completion IO to daemon (one more switch)
+ * 8: daemon updates the COW file about the completed merge request (a write syscall) and followed
+ * by a fysnc. 9: Send the IO completion back to dm-user
+ *
+ * The above sequence is a significant overhead especially when merging one 4k
+ * block at a time.
+ *
+ * Read-ahead layer will optimize the above path by reading the data from base
+ * device in the background so that merging thread can retrieve the data from
+ * the read-ahead cache. Additionally, syncing of merged data is deferred to
+ * read-ahead thread threadby the IO path is not bottlenecked.
+ *
+ * We create a scratch space of 2MB to store the read-ahead data in the COW
+ * device.
+ *
+ *      +-----------------------+
+ *      |     Header (fixed)    |
+ *      +-----------------------+
+ *      |    Scratch space      |  <-- 2MB
+ *      +-----------------------+
+ *
+ *      Scratch space is as follows:
+ *
+ *      +-----------------------+
+ *      |       Metadata        | <- 4k page
+ *      +-----------------------+
+ *      |       Metadata        | <- 4k page
+ *      +-----------------------+
+ *      |                       |
+ *      |    Read-ahead data    |
+ *      |                       |
+ *      +-----------------------+
+ *
+ * State transitions and communication between read-ahead thread and worker
+ * thread during merge:
+ * =====================================================================
+ *
+ *   Worker Threads                                 Read-Ahead thread
+ *   ------------------------------------------------------------------
+ *
+ *      |
+ *      |
+ *  --> -----------------READ_AHEAD_BEGIN------------->|
+ *  |   |                                              | READ_AHEAD_IN_PROGRESS
+ *  |  WAIT                                            |
+ *  |   |                                              |
+ *  |   |<-----------------IO_IN_PROGRESS---------------
+ *  |   |                                              |
+ *  |   | IO_IN_PRGRESS                               WAIT
+ *  |   |                                              |
+ *  |<--|                                              |
+ *      |                                              |
+ *      ------------------IO_TERMINATED--------------->|
+ *                                                     END
+ *
+ *
+ * ===================================================================
+ *
+ * Example:
+ *
+ * We have 6 copy operations to be executed in OTA and there is a overlap. Update-engine
+ * will write to COW file as follows:
+ *
+ * Op-1: 20 -> 23
+ * Op-2: 19 -> 22
+ * Op-3: 18 -> 21
+ * Op-4: 17 -> 20
+ * Op-5: 16 -> 19
+ * Op-6: 15 -> 18
+ *
+ * Read-ahead thread will read all the 6 source blocks and store the data in the
+ * scratch space. Metadata will contain the destination block numbers. Thus,
+ * scratch space will look something like this:
+ *
+ * +--------------+
+ * | Block   23   |
+ * | offset - 1   |
+ * +--------------+
+ * | Block   22   |
+ * | offset - 2   |
+ * +--------------+
+ * | Block   21   |
+ * | offset - 3   |
+ * +--------------+
+ *    ...
+ *    ...
+ * +--------------+
+ * | Data-Block 20| <-- offset - 1
+ * +--------------+
+ * | Data-Block 19| <-- offset - 2
+ * +--------------+
+ * | Data-Block 18| <-- offset - 3
+ * +--------------+
+ *     ...
+ *     ...
+ *
+ * ====================================================================
+ * IO Path:
+ *
+ * Read-ahead will serve the data to worker threads during merge only
+ * after metadata and data are persisted to the scratch space. Worker
+ * threads during merge will always retrieve the data from cache; if the
+ * cache is not populated, it will wait for the read-ahead thread to finish.
+ * Furthermore, the number of operations merged will by synced to the header
+ * only when all the blocks in the read-ahead cache are merged. In the above
+ * case, when all 6 operations are merged, COW Header is updated with
+ * num_merge_ops = 6.
+ *
+ * Merge resume after crash:
+ *
+ * Let's say we have a crash after 5 operations are merged. i.e. after
+ * Op-5: 16->19 is completed but before the Op-6 is merged. Thus, COW Header
+ * num_merge_ops will be 0 as the all the ops were not merged yet. During next
+ * reboot, read-ahead thread will re-construct the data in-memory from the
+ * scratch space; when merge resumes, Op-1 will be re-exectued. However,
+ * data will be served from read-ahead cache safely even though, block 20
+ * was over-written by Op-4.
+ *
+ */
+
+ReadAheadThread::ReadAheadThread(const std::string& cow_device, const std::string& backing_device,
+                                 const std::string& misc_name,
+                                 std::shared_ptr<Snapuserd> snapuserd) {
+    cow_device_ = cow_device;
+    backing_store_device_ = backing_device;
+    misc_name_ = misc_name;
+    snapuserd_ = snapuserd;
+}
+
+void ReadAheadThread::CheckOverlap(const CowOperation* cow_op) {
+    if (dest_blocks_.count(cow_op->new_block) || source_blocks_.count(cow_op->source)) {
+        overlap_ = true;
+    }
+
+    dest_blocks_.insert(cow_op->source);
+    source_blocks_.insert(cow_op->new_block);
+}
+
+void ReadAheadThread::PrepareReadAhead(uint64_t* source_block, int* pending_ops,
+                                       std::vector<uint64_t>& blocks) {
+    int num_ops = *pending_ops;
+    int nr_consecutive = 0;
+
+    if (!IterDone() && num_ops) {
+        // Get the first block
+        const CowOperation* cow_op = GetIterOp();
+        *source_block = cow_op->source;
+        IterNext();
+        num_ops -= 1;
+        nr_consecutive = 1;
+        blocks.push_back(cow_op->new_block);
+
+        if (!overlap_) {
+            CheckOverlap(cow_op);
+        }
+
+        /*
+         * Find number of consecutive blocks working backwards.
+         */
+        while (!IterDone() && num_ops) {
+            const CowOperation* op = GetIterOp();
+            if (op->source != (*source_block - nr_consecutive)) {
+                break;
+            }
+            nr_consecutive += 1;
+            num_ops -= 1;
+            blocks.push_back(op->new_block);
+            IterNext();
+
+            if (!overlap_) {
+                CheckOverlap(op);
+            }
+        }
+    }
+}
+
+bool ReadAheadThread::ReconstructDataFromCow() {
+    std::unordered_map<uint64_t, void*>& read_ahead_buffer_map = snapuserd_->GetReadAheadMap();
+    read_ahead_buffer_map.clear();
+    loff_t metadata_offset = 0;
+    loff_t start_data_offset = snapuserd_->GetBufferDataOffset();
+    int num_ops = 0;
+    int total_blocks_merged = 0;
+
+    while (true) {
+        struct ScratchMetadata* bm = reinterpret_cast<struct ScratchMetadata*>(
+                (char*)metadata_buffer_ + metadata_offset);
+
+        // Done reading metadata
+        if (bm->new_block == 0 && bm->file_offset == 0) {
+            break;
+        }
+
+        loff_t buffer_offset = bm->file_offset - start_data_offset;
+        void* bufptr = static_cast<void*>((char*)read_ahead_buffer_ + buffer_offset);
+        read_ahead_buffer_map[bm->new_block] = bufptr;
+        num_ops += 1;
+        total_blocks_merged += 1;
+
+        metadata_offset += sizeof(struct ScratchMetadata);
+    }
+
+    // We are done re-constructing the mapping; however, we need to make sure
+    // all the COW operations to-be merged are present in the re-constructed
+    // mapping.
+    while (!IterDone()) {
+        const CowOperation* op = GetIterOp();
+        if (read_ahead_buffer_map.find(op->new_block) != read_ahead_buffer_map.end()) {
+            num_ops -= 1;
+            snapuserd_->SetFinalBlockMerged(op->new_block);
+            IterNext();
+        } else {
+            // Verify that we have covered all the ops which were re-constructed
+            // from COW device - These are the ops which are being
+            // re-constructed after crash.
+            CHECK(num_ops == 0);
+            break;
+        }
+    }
+
+    snapuserd_->SetTotalRaBlocksMerged(total_blocks_merged);
+
+    snapuserd_->ReconstructDataFromCowFinish();
+
+    if (!snapuserd_->ReadAheadIOCompleted(true)) {
+        SNAP_LOG(ERROR) << "ReadAheadIOCompleted failed...";
+        snapuserd_->ReadAheadIOFailed();
+        return false;
+    }
+
+    SNAP_LOG(INFO) << "ReconstructDataFromCow success";
+    return true;
+}
+
+bool ReadAheadThread::ReadAheadIOStart() {
+    // Check if the data has to be constructed from the COW file.
+    // This will be true only once during boot up after a crash
+    // during merge.
+    if (snapuserd_->ReconstructDataFromCow()) {
+        return ReconstructDataFromCow();
+    }
+
+    std::unordered_map<uint64_t, void*>& read_ahead_buffer_map = snapuserd_->GetReadAheadMap();
+    read_ahead_buffer_map.clear();
+
+    int num_ops = (snapuserd_->GetBufferDataSize()) / BLOCK_SZ;
+    loff_t metadata_offset = 0;
+
+    struct ScratchMetadata* bm =
+            reinterpret_cast<struct ScratchMetadata*>((char*)metadata_buffer_ + metadata_offset);
+
+    bm->new_block = 0;
+    bm->file_offset = 0;
+
+    std::vector<uint64_t> blocks;
+
+    loff_t buffer_offset = 0;
+    loff_t offset = 0;
+    loff_t file_offset = snapuserd_->GetBufferDataOffset();
+    int total_blocks_merged = 0;
+    overlap_ = false;
+    dest_blocks_.clear();
+    source_blocks_.clear();
+
+    while (true) {
+        uint64_t source_block;
+        int linear_blocks;
+
+        PrepareReadAhead(&source_block, &num_ops, blocks);
+        linear_blocks = blocks.size();
+        if (linear_blocks == 0) {
+            // No more blocks to read
+            SNAP_LOG(DEBUG) << " Read-ahead completed....";
+            break;
+        }
+
+        // Get the first block in the consecutive set of blocks
+        source_block = source_block + 1 - linear_blocks;
+        size_t io_size = (linear_blocks * BLOCK_SZ);
+        num_ops -= linear_blocks;
+        total_blocks_merged += linear_blocks;
+
+        // Mark the block number as the one which will
+        // be the final block to be merged in this entire region.
+        // Read-ahead thread will get
+        // notified when this block is merged to make
+        // forward progress
+        snapuserd_->SetFinalBlockMerged(blocks.back());
+
+        while (linear_blocks) {
+            uint64_t new_block = blocks.back();
+            blocks.pop_back();
+            // Assign the mapping
+            void* bufptr = static_cast<void*>((char*)read_ahead_buffer_ + offset);
+            read_ahead_buffer_map[new_block] = bufptr;
+            offset += BLOCK_SZ;
+
+            bm = reinterpret_cast<struct ScratchMetadata*>((char*)metadata_buffer_ +
+                                                           metadata_offset);
+            bm->new_block = new_block;
+            bm->file_offset = file_offset;
+
+            metadata_offset += sizeof(struct ScratchMetadata);
+            file_offset += BLOCK_SZ;
+
+            linear_blocks -= 1;
+        }
+
+        // Read from the base device consecutive set of blocks in one shot
+        if (!android::base::ReadFullyAtOffset(backing_store_fd_,
+                                              (char*)read_ahead_buffer_ + buffer_offset, io_size,
+                                              source_block * BLOCK_SZ)) {
+            SNAP_PLOG(ERROR) << "Copy-op failed. Read from backing store: " << backing_store_device_
+                             << "at block :" << source_block << " buffer_offset : " << buffer_offset
+                             << " io_size : " << io_size << " buf-addr : " << read_ahead_buffer_;
+
+            snapuserd_->ReadAheadIOFailed();
+            return false;
+        }
+
+        // This is important - explicitly set the contents to zero. This is used
+        // when re-constructing the data after crash. This indicates end of
+        // reading metadata contents when re-constructing the data
+        bm = reinterpret_cast<struct ScratchMetadata*>((char*)metadata_buffer_ + metadata_offset);
+        bm->new_block = 0;
+        bm->file_offset = 0;
+
+        buffer_offset += io_size;
+        CHECK(offset == buffer_offset);
+        CHECK((file_offset - snapuserd_->GetBufferDataOffset()) == offset);
+    }
+
+    snapuserd_->SetTotalRaBlocksMerged(total_blocks_merged);
+
+    // Flush the data only if we have a overlapping blocks in the region
+    if (!snapuserd_->ReadAheadIOCompleted(overlap_)) {
+        SNAP_LOG(ERROR) << "ReadAheadIOCompleted failed...";
+        snapuserd_->ReadAheadIOFailed();
+        return false;
+    }
+
+    return true;
+}
+
+bool ReadAheadThread::RunThread() {
+    if (!InitializeFds()) {
+        return false;
+    }
+
+    InitializeIter();
+    InitializeBuffer();
+
+    while (!IterDone()) {
+        if (!ReadAheadIOStart()) {
+            return false;
+        }
+
+        bool status = snapuserd_->WaitForMergeToComplete();
+
+        if (status && !snapuserd_->CommitMerge(snapuserd_->GetTotalRaBlocksMerged())) {
+            return false;
+        }
+
+        if (!status) break;
+    }
+
+    CloseFds();
+    SNAP_LOG(INFO) << " ReadAhead thread terminating....";
+    return true;
+}
+
+// Initialization
+bool ReadAheadThread::InitializeFds() {
+    backing_store_fd_.reset(open(backing_store_device_.c_str(), O_RDONLY));
+    if (backing_store_fd_ < 0) {
+        SNAP_PLOG(ERROR) << "Open Failed: " << backing_store_device_;
+        return false;
+    }
+
+    cow_fd_.reset(open(cow_device_.c_str(), O_RDWR));
+    if (cow_fd_ < 0) {
+        SNAP_PLOG(ERROR) << "Open Failed: " << cow_device_;
+        return false;
+    }
+
+    return true;
+}
+
+void ReadAheadThread::InitializeIter() {
+    std::vector<const CowOperation*>& read_ahead_ops = snapuserd_->GetReadAheadOpsVec();
+    read_ahead_iter_ = read_ahead_ops.rbegin();
+}
+
+bool ReadAheadThread::IterDone() {
+    std::vector<const CowOperation*>& read_ahead_ops = snapuserd_->GetReadAheadOpsVec();
+    return read_ahead_iter_ == read_ahead_ops.rend();
+}
+
+void ReadAheadThread::IterNext() {
+    read_ahead_iter_++;
+}
+
+const CowOperation* ReadAheadThread::GetIterOp() {
+    return *read_ahead_iter_;
+}
+
+void ReadAheadThread::InitializeBuffer() {
+    void* mapped_addr = snapuserd_->GetMappedAddr();
+    // Map the scratch space region into memory
+    metadata_buffer_ =
+            static_cast<void*>((char*)mapped_addr + snapuserd_->GetBufferMetadataOffset());
+    read_ahead_buffer_ = static_cast<void*>((char*)mapped_addr + snapuserd_->GetBufferDataOffset());
+}
+
+}  // namespace snapshot
+}  // namespace android
diff --git a/fs_mgr/libsnapshot/snapuserd_server.cpp b/fs_mgr/libsnapshot/snapuserd_server.cpp
index 64332d1..ff8a259 100644
--- a/fs_mgr/libsnapshot/snapuserd_server.cpp
+++ b/fs_mgr/libsnapshot/snapuserd_server.cpp
@@ -81,7 +81,7 @@
     : snapuserd_(snapuserd), misc_name_(snapuserd_->GetMiscName()) {}
 
 bool SnapuserdServer::Sendmsg(android::base::borrowed_fd fd, const std::string& msg) {
-    ssize_t ret = TEMP_FAILURE_RETRY(send(fd.get(), msg.data(), msg.size(), 0));
+    ssize_t ret = TEMP_FAILURE_RETRY(send(fd.get(), msg.data(), msg.size(), MSG_NOSIGNAL));
     if (ret < 0) {
         PLOG(ERROR) << "Snapuserd:server: send() failed";
         return false;
@@ -209,10 +209,11 @@
     }
 
     handler->snapuserd()->CloseFds();
+    handler->snapuserd()->CheckMergeCompletionStatus();
+    handler->snapuserd()->UnmapBufferRegion();
 
     auto misc_name = handler->misc_name();
     LOG(INFO) << "Handler thread about to exit: " << misc_name;
-    handler->snapuserd()->CheckMergeCompletionStatus();
 
     {
         std::lock_guard<std::mutex> lock(lock_);
diff --git a/fs_mgr/libsnapshot/snapuserd_worker.cpp b/fs_mgr/libsnapshot/snapuserd_worker.cpp
index 1002569..9f42ab8 100644
--- a/fs_mgr/libsnapshot/snapuserd_worker.cpp
+++ b/fs_mgr/libsnapshot/snapuserd_worker.cpp
@@ -135,14 +135,11 @@
     return true;
 }
 
-// Start the copy operation. This will read the backing
-// block device which is represented by cow_op->source.
-bool WorkerThread::ProcessCopyOp(const CowOperation* cow_op) {
+bool WorkerThread::ReadFromBaseDevice(const CowOperation* cow_op) {
     void* buffer = bufsink_.GetPayloadBuffer(BLOCK_SZ);
     CHECK(buffer != nullptr);
-
-    // Issue a single 4K IO. However, this can be optimized
-    // if the successive blocks are contiguous.
+    SNAP_LOG(DEBUG) << " ReadFromBaseDevice...: new-block: " << cow_op->new_block
+                    << " Source: " << cow_op->source;
     if (!android::base::ReadFullyAtOffset(backing_store_fd_, buffer, BLOCK_SZ,
                                           cow_op->source * BLOCK_SZ)) {
         SNAP_PLOG(ERROR) << "Copy-op failed. Read from backing store: " << backing_store_device_
@@ -153,6 +150,31 @@
     return true;
 }
 
+bool WorkerThread::GetReadAheadPopulatedBuffer(const CowOperation* cow_op) {
+    void* buffer = bufsink_.GetPayloadBuffer(BLOCK_SZ);
+    CHECK(buffer != nullptr);
+
+    if (!snapuserd_->GetReadAheadPopulatedBuffer(cow_op->new_block, buffer)) {
+        return false;
+    }
+
+    return true;
+}
+
+// Start the copy operation. This will read the backing
+// block device which is represented by cow_op->source.
+bool WorkerThread::ProcessCopyOp(const CowOperation* cow_op) {
+    if (!GetReadAheadPopulatedBuffer(cow_op)) {
+        SNAP_LOG(DEBUG) << " GetReadAheadPopulatedBuffer failed..."
+                        << " new_block: " << cow_op->new_block;
+        if (!ReadFromBaseDevice(cow_op)) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
 bool WorkerThread::ProcessZeroOp() {
     // Zero out the entire block
     void* buffer = bufsink_.GetPayloadBuffer(BLOCK_SZ);
@@ -386,8 +408,10 @@
 }
 
 int WorkerThread::GetNumberOfMergedOps(void* merged_buffer, void* unmerged_buffer, loff_t offset,
-                                       int unmerged_exceptions) {
+                                       int unmerged_exceptions, bool* copy_op, bool* commit) {
     int merged_ops_cur_iter = 0;
+    std::unordered_map<uint64_t, void*>& read_ahead_buffer_map = snapuserd_->GetReadAheadMap();
+    *copy_op = false;
     std::vector<std::pair<sector_t, const CowOperation*>>& chunk_vec = snapuserd_->GetChunkVec();
 
     // Find the operations which are merged in this cycle.
@@ -411,6 +435,23 @@
             const CowOperation* cow_op = it->second;
 
             CHECK(cow_op != nullptr);
+            if (snapuserd_->IsReadAheadFeaturePresent() && cow_op->type == kCowCopyOp) {
+                *copy_op = true;
+                // Every single copy operation has to come from read-ahead
+                // cache.
+                if (read_ahead_buffer_map.find(cow_op->new_block) == read_ahead_buffer_map.end()) {
+                    SNAP_LOG(ERROR)
+                            << " Block: " << cow_op->new_block << " not found in read-ahead cache"
+                            << " Source: " << cow_op->source;
+                    return -1;
+                }
+                // If this is a final block merged in the read-ahead buffer
+                // region, notify the read-ahead thread to make forward
+                // progress
+                if (cow_op->new_block == snapuserd_->GetFinalBlockMerged()) {
+                    *commit = true;
+                }
+            }
 
             CHECK(cow_op->new_block == cow_de->old_chunk);
             // zero out to indicate that operation is merged.
@@ -442,6 +483,8 @@
 bool WorkerThread::ProcessMergeComplete(chunk_t chunk, void* buffer) {
     uint32_t stride = exceptions_per_area_ + 1;
     const std::vector<std::unique_ptr<uint8_t[]>>& vec = snapuserd_->GetMetadataVec();
+    bool copy_op = false;
+    bool commit = false;
 
     // ChunkID to vector index
     lldiv_t divresult = lldiv(chunk, stride);
@@ -452,13 +495,24 @@
     int unmerged_exceptions = 0;
     loff_t offset = GetMergeStartOffset(buffer, vec[divresult.quot].get(), &unmerged_exceptions);
 
-    int merged_ops_cur_iter =
-            GetNumberOfMergedOps(buffer, vec[divresult.quot].get(), offset, unmerged_exceptions);
+    int merged_ops_cur_iter = GetNumberOfMergedOps(buffer, vec[divresult.quot].get(), offset,
+                                                   unmerged_exceptions, &copy_op, &commit);
 
     // There should be at least one operation merged in this cycle
     CHECK(merged_ops_cur_iter > 0);
-    if (!snapuserd_->CommitMerge(merged_ops_cur_iter)) {
-        return false;
+
+    if (copy_op) {
+        if (commit) {
+            // Push the flushing logic to read-ahead thread so that merge thread
+            // can make forward progress. Sync will happen in the background
+            snapuserd_->StartReadAhead();
+        }
+    } else {
+        // Non-copy ops and all ops in older COW format
+        if (!snapuserd_->CommitMerge(merged_ops_cur_iter)) {
+            SNAP_LOG(ERROR) << "CommitMerge failed...";
+            return false;
+        }
     }
 
     SNAP_LOG(DEBUG) << "Merge success: " << merged_ops_cur_iter << "chunk: " << chunk;
@@ -613,12 +667,21 @@
             }
         }
 
+        // Just return the header if it is an error
+        if (header->type == DM_USER_RESP_ERROR) {
+            ret = 0;
+        }
+
         // Daemon will not be terminated if there is any error. We will
         // just send the error back to dm-user.
         if (!WriteDmUserPayload(ret)) {
             return false;
         }
 
+        if (header->type == DM_USER_RESP_ERROR) {
+            break;
+        }
+
         remaining_size -= ret;
         offset += ret;
     } while (remaining_size > 0);
diff --git a/fs_mgr/libsnapshot/update_engine/update_metadata.proto b/fs_mgr/libsnapshot/update_engine/update_metadata.proto
index f31ee31..69d72e1 100644
--- a/fs_mgr/libsnapshot/update_engine/update_metadata.proto
+++ b/fs_mgr/libsnapshot/update_engine/update_metadata.proto
@@ -75,6 +75,7 @@
     repeated DynamicPartitionGroup groups = 1;
     optional bool vabc_enabled = 3;
     optional string vabc_compression_param = 4;
+    optional uint32 cow_version = 5;
 }
 
 message DeltaArchiveManifest {
diff --git a/fs_mgr/tests/fs_mgr_test.cpp b/fs_mgr/tests/fs_mgr_test.cpp
index 3ef53ae..83174ef 100644
--- a/fs_mgr/tests/fs_mgr_test.cpp
+++ b/fs_mgr/tests/fs_mgr_test.cpp
@@ -184,6 +184,36 @@
         {"androidboot.space", "sha256 5248 androidboot.nospace = nope"},
 };
 
+bool CompareFlags(FstabEntry::FsMgrFlags& lhs, FstabEntry::FsMgrFlags& rhs) {
+    // clang-format off
+    return lhs.wait == rhs.wait &&
+           lhs.check == rhs.check &&
+           lhs.crypt == rhs.crypt &&
+           lhs.nonremovable == rhs.nonremovable &&
+           lhs.vold_managed == rhs.vold_managed &&
+           lhs.recovery_only == rhs.recovery_only &&
+           lhs.verify == rhs.verify &&
+           lhs.force_crypt == rhs.force_crypt &&
+           lhs.no_emulated_sd == rhs.no_emulated_sd &&
+           lhs.no_trim == rhs.no_trim &&
+           lhs.file_encryption == rhs.file_encryption &&
+           lhs.formattable == rhs.formattable &&
+           lhs.slot_select == rhs.slot_select &&
+           lhs.force_fde_or_fbe == rhs.force_fde_or_fbe &&
+           lhs.late_mount == rhs.late_mount &&
+           lhs.no_fail == rhs.no_fail &&
+           lhs.verify_at_boot == rhs.verify_at_boot &&
+           lhs.quota == rhs.quota &&
+           lhs.avb == rhs.avb &&
+           lhs.logical == rhs.logical &&
+           lhs.checkpoint_blk == rhs.checkpoint_blk &&
+           lhs.checkpoint_fs == rhs.checkpoint_fs &&
+           lhs.first_stage_mount == rhs.first_stage_mount &&
+           lhs.slot_select_other == rhs.slot_select_other &&
+           lhs.fs_verity == rhs.fs_verity;
+    // clang-format on
+}
+
 }  // namespace
 
 TEST(fs_mgr, fs_mgr_parse_cmdline) {
@@ -291,8 +321,7 @@
     EXPECT_EQ(i, fstab.size());
 }
 
-// TODO(124837435): enable it later when it can pass TreeHugger.
-TEST(fs_mgr, DISABLED_ReadFstabFromFile_MountOptions) {
+TEST(fs_mgr, ReadFstabFromFile_MountOptions) {
     TemporaryFile tf;
     ASSERT_TRUE(tf.fd != -1);
     std::string fstab_contents = R"fs(
@@ -316,88 +345,69 @@
 
     Fstab fstab;
     EXPECT_TRUE(ReadFstabFromFile(tf.path, &fstab));
-    ASSERT_EQ(11U, fstab.size());
+    ASSERT_LE(11U, fstab.size());
 
-    EXPECT_EQ("/", fstab[0].mount_point);
-    EXPECT_EQ(static_cast<unsigned long>(MS_RDONLY), fstab[0].flags);
-    EXPECT_EQ("barrier=1", fstab[0].fs_options);
+    FstabEntry* entry = GetEntryForMountPoint(&fstab, "/");
+    ASSERT_NE(nullptr, entry);
+    EXPECT_EQ(static_cast<unsigned long>(MS_RDONLY), entry->flags);
+    EXPECT_EQ("barrier=1", entry->fs_options);
 
-    EXPECT_EQ("/metadata", fstab[1].mount_point);
-    EXPECT_EQ(static_cast<unsigned long>(MS_NOATIME | MS_NOSUID | MS_NODEV), fstab[1].flags);
-    EXPECT_EQ("discard", fstab[1].fs_options);
+    entry = GetEntryForMountPoint(&fstab, "/metadata");
+    ASSERT_NE(nullptr, entry);
+    EXPECT_EQ(static_cast<unsigned long>(MS_NOATIME | MS_NOSUID | MS_NODEV), entry->flags);
+    EXPECT_EQ("discard", entry->fs_options);
 
-    EXPECT_EQ("/data", fstab[2].mount_point);
-    EXPECT_EQ(static_cast<unsigned long>(MS_NOATIME | MS_NOSUID | MS_NODEV), fstab[2].flags);
-    EXPECT_EQ("discard,reserve_root=32768,resgid=1065,fsync_mode=nobarrier", fstab[2].fs_options);
+    entry = GetEntryForMountPoint(&fstab, "/data");
+    ASSERT_NE(nullptr, entry);
+    EXPECT_EQ(static_cast<unsigned long>(MS_NOATIME | MS_NOSUID | MS_NODEV), entry->flags);
+    EXPECT_EQ("discard,reserve_root=32768,resgid=1065,fsync_mode=nobarrier", entry->fs_options);
 
-    EXPECT_EQ("/misc", fstab[3].mount_point);
-    EXPECT_EQ(0U, fstab[3].flags);
-    EXPECT_EQ("", fstab[3].fs_options);
+    entry = GetEntryForMountPoint(&fstab, "/misc");
+    ASSERT_NE(nullptr, entry);
+    EXPECT_EQ(0U, entry->flags);
+    EXPECT_EQ("", entry->fs_options);
 
-    EXPECT_EQ("/vendor/firmware_mnt", fstab[4].mount_point);
-    EXPECT_EQ(static_cast<unsigned long>(MS_RDONLY), fstab[4].flags);
+    entry = GetEntryForMountPoint(&fstab, "/vendor/firmware_mnt");
+    ASSERT_NE(nullptr, entry);
+    EXPECT_EQ(static_cast<unsigned long>(MS_RDONLY), entry->flags);
     EXPECT_EQ(
             "shortname=lower,uid=1000,gid=1000,dmask=227,fmask=337,"
             "context=u:object_r:firmware_file:s0",
-            fstab[4].fs_options);
+            entry->fs_options);
 
-    EXPECT_EQ("auto", fstab[5].mount_point);
-    EXPECT_EQ(0U, fstab[5].flags);
-    EXPECT_EQ("", fstab[5].fs_options);
+    entry = GetEntryForMountPoint(&fstab, "auto");
+    ASSERT_NE(nullptr, entry);
+    EXPECT_EQ(0U, entry->flags);
+    EXPECT_EQ("", entry->fs_options);
 
-    EXPECT_EQ("none", fstab[6].mount_point);
-    EXPECT_EQ(0U, fstab[6].flags);
-    EXPECT_EQ("", fstab[6].fs_options);
+    entry = GetEntryForMountPoint(&fstab, "none");
+    ASSERT_NE(nullptr, entry);
+    EXPECT_EQ(0U, entry->flags);
+    EXPECT_EQ("", entry->fs_options);
 
-    EXPECT_EQ("none2", fstab[7].mount_point);
-    EXPECT_EQ(static_cast<unsigned long>(MS_NODIRATIME | MS_REMOUNT | MS_BIND), fstab[7].flags);
-    EXPECT_EQ("", fstab[7].fs_options);
+    entry = GetEntryForMountPoint(&fstab, "none2");
+    ASSERT_NE(nullptr, entry);
+    EXPECT_EQ(static_cast<unsigned long>(MS_NODIRATIME | MS_REMOUNT | MS_BIND), entry->flags);
+    EXPECT_EQ("", entry->fs_options);
 
-    EXPECT_EQ("none3", fstab[8].mount_point);
-    EXPECT_EQ(static_cast<unsigned long>(MS_UNBINDABLE | MS_PRIVATE | MS_SLAVE), fstab[8].flags);
-    EXPECT_EQ("", fstab[8].fs_options);
+    entry = GetEntryForMountPoint(&fstab, "none3");
+    ASSERT_NE(nullptr, entry);
+    EXPECT_EQ(static_cast<unsigned long>(MS_UNBINDABLE | MS_PRIVATE | MS_SLAVE), entry->flags);
+    EXPECT_EQ("", entry->fs_options);
 
-    EXPECT_EQ("none4", fstab[9].mount_point);
-    EXPECT_EQ(static_cast<unsigned long>(MS_NOEXEC | MS_SHARED | MS_REC), fstab[9].flags);
-    EXPECT_EQ("", fstab[9].fs_options);
+    entry = GetEntryForMountPoint(&fstab, "none4");
+    ASSERT_NE(nullptr, entry);
+    EXPECT_EQ(static_cast<unsigned long>(MS_NOEXEC | MS_SHARED | MS_REC), entry->flags);
+    EXPECT_EQ("", entry->fs_options);
 
-    EXPECT_EQ("none5", fstab[10].mount_point);
-    EXPECT_EQ(0U, fstab[10].flags);  // rw is the same as defaults
-    EXPECT_EQ("", fstab[10].fs_options);
+    entry = GetEntryForMountPoint(&fstab, "none5");
+    ASSERT_NE(nullptr, entry);
+    // rw is the default.
+    EXPECT_EQ(0U, entry->flags);
+    EXPECT_EQ("", entry->fs_options);
 }
 
-static bool CompareFlags(FstabEntry::FsMgrFlags& lhs, FstabEntry::FsMgrFlags& rhs) {
-    // clang-format off
-    return lhs.wait == rhs.wait &&
-           lhs.check == rhs.check &&
-           lhs.crypt == rhs.crypt &&
-           lhs.nonremovable == rhs.nonremovable &&
-           lhs.vold_managed == rhs.vold_managed &&
-           lhs.recovery_only == rhs.recovery_only &&
-           lhs.verify == rhs.verify &&
-           lhs.force_crypt == rhs.force_crypt &&
-           lhs.no_emulated_sd == rhs.no_emulated_sd &&
-           lhs.no_trim == rhs.no_trim &&
-           lhs.file_encryption == rhs.file_encryption &&
-           lhs.formattable == rhs.formattable &&
-           lhs.slot_select == rhs.slot_select &&
-           lhs.force_fde_or_fbe == rhs.force_fde_or_fbe &&
-           lhs.late_mount == rhs.late_mount &&
-           lhs.no_fail == rhs.no_fail &&
-           lhs.verify_at_boot == rhs.verify_at_boot &&
-           lhs.quota == rhs.quota &&
-           lhs.avb == rhs.avb &&
-           lhs.logical == rhs.logical &&
-           lhs.checkpoint_blk == rhs.checkpoint_blk &&
-           lhs.checkpoint_fs == rhs.checkpoint_fs &&
-           lhs.first_stage_mount == rhs.first_stage_mount &&
-           lhs.slot_select_other == rhs.slot_select_other &&
-           lhs.fs_verity == rhs.fs_verity;
-    // clang-format on
-}
-
-// TODO(124837435): enable it later when it can pass TreeHugger.
-TEST(fs_mgr, DISABLED_ReadFstabFromFile_FsMgrFlags) {
+TEST(fs_mgr, ReadFstabFromFile_FsMgrFlags) {
     TemporaryFile tf;
     ASSERT_TRUE(tf.fd != -1);
     std::string fstab_contents = R"fs(
@@ -412,10 +422,10 @@
 
     Fstab fstab;
     EXPECT_TRUE(ReadFstabFromFile(tf.path, &fstab));
-    ASSERT_EQ(6U, fstab.size());
+    ASSERT_LE(6U, fstab.size());
 
-    auto entry = fstab.begin();
-    EXPECT_EQ("none0", entry->mount_point);
+    FstabEntry* entry = GetEntryForMountPoint(&fstab, "none0");
+    ASSERT_NE(nullptr, entry);
     {
         FstabEntry::FsMgrFlags flags = {};
         flags.wait = true;
@@ -426,9 +436,9 @@
         flags.verify = true;
         EXPECT_TRUE(CompareFlags(flags, entry->fs_mgr_flags));
     }
-    entry++;
 
-    EXPECT_EQ("none1", entry->mount_point);
+    entry = GetEntryForMountPoint(&fstab, "none1");
+    ASSERT_NE(nullptr, entry);
     {
         FstabEntry::FsMgrFlags flags = {};
         flags.avb = true;
@@ -439,9 +449,9 @@
         flags.no_fail = true;
         EXPECT_TRUE(CompareFlags(flags, entry->fs_mgr_flags));
     }
-    entry++;
 
-    EXPECT_EQ("none2", entry->mount_point);
+    entry = GetEntryForMountPoint(&fstab, "none2");
+    ASSERT_NE(nullptr, entry);
     {
         FstabEntry::FsMgrFlags flags = {};
         flags.first_stage_mount = true;
@@ -451,25 +461,25 @@
         flags.slot_select_other = true;
         EXPECT_TRUE(CompareFlags(flags, entry->fs_mgr_flags));
     }
-    entry++;
 
-    EXPECT_EQ("none3", entry->mount_point);
+    entry = GetEntryForMountPoint(&fstab, "none3");
+    ASSERT_NE(nullptr, entry);
     {
         FstabEntry::FsMgrFlags flags = {};
         flags.checkpoint_blk = true;
         EXPECT_TRUE(CompareFlags(flags, entry->fs_mgr_flags));
     }
-    entry++;
 
-    EXPECT_EQ("none4", entry->mount_point);
+    entry = GetEntryForMountPoint(&fstab, "none4");
+    ASSERT_NE(nullptr, entry);
     {
         FstabEntry::FsMgrFlags flags = {};
         flags.checkpoint_fs = true;
         EXPECT_TRUE(CompareFlags(flags, entry->fs_mgr_flags));
     }
-    entry++;
 
-    EXPECT_EQ("none5", entry->mount_point);
+    entry = GetEntryForMountPoint(&fstab, "none5");
+    ASSERT_NE(nullptr, entry);
     {
         FstabEntry::FsMgrFlags flags = {};
         EXPECT_TRUE(CompareFlags(flags, entry->fs_mgr_flags));
diff --git a/init/README.md b/init/README.md
index 4a262c9..75dc328 100644
--- a/init/README.md
+++ b/init/README.md
@@ -277,6 +277,8 @@
   CLD_EXITED or an status other than '0', reboot the system with the target specified in
   _target_. _target_ takes the same format as the parameter to sys.powerctl. This is particularly
   intended to be used with the `exec_start` builtin for any must-have checks during boot.
+  A service being stopped by init (e.g. using the `stop` or `class_reset` commands) is not
+  considered a failure for the purpose of this setting.
 
 `restart_period <seconds>`
 > If a non-oneshot service exits, it will be restarted at its start time plus
diff --git a/init/README.ueventd.md b/init/README.ueventd.md
index 3ffca88..d22f68f 100644
--- a/init/README.ueventd.md
+++ b/init/README.ueventd.md
@@ -130,6 +130,10 @@
 Will launch `/vendor/bin/led_coeffs.bin` as the system user instead of serving the default firmware
 for `/devices/leds/red/firmware/coeffs.bin`.
 
+The `devpath` argument may include asterisks (`*`) to match multiple paths. For example, the string
+`/dev/*/red` will match `/dev/leds/red` as well as `/dev/lights/red`. The pattern matching follows
+the rules of the fnmatch() function.
+
 Ueventd will provide the uevent `DEVPATH` and `FIRMWARE` to this external program on the environment
 via environment variables with the same names. Ueventd will use the string written to stdout as the
 new name of the firmware to load. It will still look for the new firmware in the list of firmware
diff --git a/init/firmware_handler_test.cpp b/init/firmware_handler_test.cpp
index 5124a6f..f6e75b0 100644
--- a/init/firmware_handler_test.cpp
+++ b/init/firmware_handler_test.cpp
@@ -103,6 +103,23 @@
     return 0;
 }
 
+TEST(firmware_handler, Matching) {
+    ExternalFirmwareHandler h("/dev/path/a.bin", getuid(), "/test");
+    ASSERT_TRUE(h.match("/dev/path/a.bin"));
+    ASSERT_FALSE(h.match("/dev/path/a.bi"));
+
+    h = ExternalFirmwareHandler("/dev/path/a.*", getuid(), "/test");
+    ASSERT_TRUE(h.match("/dev/path/a.bin"));
+    ASSERT_TRUE(h.match("/dev/path/a.bix"));
+    ASSERT_FALSE(h.match("/dev/path/b.bin"));
+
+    h = ExternalFirmwareHandler("/dev/*/a.bin", getuid(), "/test");
+    ASSERT_TRUE(h.match("/dev/path/a.bin"));
+    ASSERT_TRUE(h.match("/dev/other/a.bin"));
+    ASSERT_FALSE(h.match("/dev/other/c.bin"));
+    ASSERT_FALSE(h.match("/dev/path/b.bin"));
+}
+
 }  // namespace init
 }  // namespace android
 
diff --git a/init/first_stage_mount.cpp b/init/first_stage_mount.cpp
index 3faf430..a733839 100644
--- a/init/first_stage_mount.cpp
+++ b/init/first_stage_mount.cpp
@@ -331,6 +331,12 @@
     if (devices.empty()) {
         return true;
     }
+    // excluding overlays
+    for (auto iter = devices.begin(); iter != devices.end(); ) {
+        if (*iter=="overlay")  iter = devices.erase(iter);
+        else iter++;
+    }
+
     return block_dev_init_.InitDevices(std::move(devices));
 }
 
@@ -542,6 +548,11 @@
             continue;
         }
 
+        if (current->fs_type == "overlay") {
+            ++current;
+            continue;
+        }
+
         // Skip raw partition entries such as boot, dtbo, etc.
         // Having emmc fstab entries allows us to probe current->vbmeta_partition
         // in InitDevices() when they are AVB chained partitions.
@@ -591,6 +602,13 @@
     };
     MapScratchPartitionIfNeeded(&fstab_, init_devices);
 
+    for (auto current = fstab_.begin(); current != fstab_.end(); ) {
+        if (current->fs_type == "overlay") {
+            fs_mgr_overlayfs_mount_fstab_entry(current->lowerdir, current->mount_point);
+        }
+        ++current;
+    }
+
     fs_mgr_overlayfs_mount_all(&fstab_);
 
     return true;
diff --git a/init/property_service.cpp b/init/property_service.cpp
index 17c36bb..fe3490d 100644
--- a/init/property_service.cpp
+++ b/init/property_service.cpp
@@ -1218,13 +1218,20 @@
     });
 }
 
+// bootconfig does not allow to populate `key=value` simultaneously with
+// `key.subkey=value` which does not work with the existing code for
+// `hardware` (e.g. we want both `ro.boot.hardware=value` and
+// `ro.boot.hardware.sku=value`) and for `qemu` (Android Stidio Emulator
+// specific).
+static bool IsAllowedBootconfigKey(const std::string_view key) {
+    return (key == "hardware"sv) || (key == "qemu"sv);
+}
+
 static void ProcessBootconfig() {
     ImportBootconfig([&](const std::string& key, const std::string& value) {
         if (StartsWith(key, ANDROIDBOOT_PREFIX)) {
             InitPropertySet("ro.boot." + key.substr(ANDROIDBOOT_PREFIX.size()), value);
-        } else if (key == "hardware") {
-            // "hardware" in bootconfig replaces "androidboot.hardware" kernel
-            // cmdline parameter
+        } else if (IsAllowedBootconfigKey(key)) {
             InitPropertySet("ro.boot." + key, value);
         }
     });
diff --git a/init/reboot.cpp b/init/reboot.cpp
index d9acee5..ab0e48e 100644
--- a/init/reboot.cpp
+++ b/init/reboot.cpp
@@ -450,10 +450,22 @@
 
 // zram is able to use backing device on top of a loopback device.
 // In order to unmount /data successfully, we have to kill the loopback device first
-#define ZRAM_DEVICE   "/dev/block/zram0"
-#define ZRAM_RESET    "/sys/block/zram0/reset"
-#define ZRAM_BACK_DEV "/sys/block/zram0/backing_dev"
+#define ZRAM_DEVICE       "/dev/block/zram0"
+#define ZRAM_RESET        "/sys/block/zram0/reset"
+#define ZRAM_BACK_DEV     "/sys/block/zram0/backing_dev"
+#define ZRAM_INITSTATE    "/sys/block/zram0/initstate"
 static Result<void> KillZramBackingDevice() {
+    std::string zram_initstate;
+    if (!android::base::ReadFileToString(ZRAM_INITSTATE, &zram_initstate)) {
+        return ErrnoError() << "Failed to read " << ZRAM_INITSTATE;
+    }
+
+    zram_initstate.erase(zram_initstate.length() - 1);
+    if (zram_initstate == "0") {
+        LOG(INFO) << "Zram has not been swapped on";
+        return {};
+    }
+
     if (access(ZRAM_BACK_DEV, F_OK) != 0 && errno == ENOENT) {
         LOG(INFO) << "No zram backing device configured";
         return {};
diff --git a/init/selinux.cpp b/init/selinux.cpp
index 35a96f9..42d3023 100644
--- a/init/selinux.cpp
+++ b/init/selinux.cpp
@@ -240,25 +240,25 @@
     }
 
     // Use precompiled sepolicy only when all corresponding hashes are equal.
-    // plat_sepolicy is always checked, while system_ext and product are checked only when they
-    // exist.
     std::vector<std::pair<std::string, std::string>> sepolicy_hashes{
             {"/system/etc/selinux/plat_sepolicy_and_mapping.sha256",
              precompiled_sepolicy + ".plat_sepolicy_and_mapping.sha256"},
+            {"/system_ext/etc/selinux/system_ext_sepolicy_and_mapping.sha256",
+             precompiled_sepolicy + ".system_ext_sepolicy_and_mapping.sha256"},
+            {"/product/etc/selinux/product_sepolicy_and_mapping.sha256",
+             precompiled_sepolicy + ".product_sepolicy_and_mapping.sha256"},
     };
 
-    if (access("/system_ext/etc/selinux/system_ext_sepolicy.cil", F_OK) == 0) {
-        sepolicy_hashes.emplace_back(
-                "/system_ext/etc/selinux/system_ext_sepolicy_and_mapping.sha256",
-                precompiled_sepolicy + ".system_ext_sepolicy_and_mapping.sha256");
-    }
-
-    if (access("/product/etc/selinux/product_sepolicy.cil", F_OK) == 0) {
-        sepolicy_hashes.emplace_back("/product/etc/selinux/product_sepolicy_and_mapping.sha256",
-                                     precompiled_sepolicy + ".product_sepolicy_and_mapping.sha256");
-    }
-
     for (const auto& [actual_id_path, precompiled_id_path] : sepolicy_hashes) {
+        // Both of them should exist or both of them shouldn't exist.
+        if (access(actual_id_path.c_str(), R_OK) != 0) {
+            if (access(precompiled_id_path.c_str(), R_OK) == 0) {
+                return Error() << precompiled_id_path << " exists but " << actual_id_path
+                               << " doesn't";
+            }
+            continue;
+        }
+
         std::string actual_id;
         if (!ReadFirstLine(actual_id_path.c_str(), &actual_id)) {
             return ErrnoError() << "Failed to read " << actual_id_path;
diff --git a/init/service.cpp b/init/service.cpp
index c3069f5..5af81bf 100644
--- a/init/service.cpp
+++ b/init/service.cpp
@@ -194,6 +194,8 @@
                   << ") process group...";
         int max_processes = 0;
         int r;
+
+        flags_ |= SVC_STOPPING;
         if (signal == SIGTERM) {
             r = killProcessGroupOnce(proc_attr_.uid, pid_, signal, &max_processes);
         } else {
@@ -277,7 +279,8 @@
         f(siginfo);
     }
 
-    if ((siginfo.si_code != CLD_EXITED || siginfo.si_status != 0) && on_failure_reboot_target_) {
+    if ((siginfo.si_code != CLD_EXITED || siginfo.si_status != 0) && on_failure_reboot_target_ &&
+        !(flags_ & SVC_STOPPING)) {
         LOG(ERROR) << "Service with 'reboot_on_failure' option failed, shutting down system.";
         trigger_shutdown(*on_failure_reboot_target_);
     }
@@ -287,7 +290,7 @@
     if (flags_ & SVC_TEMPORARY) return;
 
     pid_ = 0;
-    flags_ &= (~SVC_RUNNING);
+    flags_ &= ~(SVC_RUNNING | SVC_STOPPING);
     start_order_ = 0;
 
     // Oneshot processes go into the disabled state on exit,
@@ -411,7 +414,8 @@
     bool disabled = (flags_ & (SVC_DISABLED | SVC_RESET));
     // Starting a service removes it from the disabled or reset state and
     // immediately takes it out of the restarting state if it was in there.
-    flags_ &= (~(SVC_DISABLED|SVC_RESTARTING|SVC_RESET|SVC_RESTART|SVC_DISABLED_START));
+    flags_ &= (~(SVC_DISABLED | SVC_RESTARTING | SVC_RESET | SVC_RESTART | SVC_DISABLED_START |
+                 SVC_STOPPING));
 
     // Running processes require no additional work --- if they're in the
     // process of exiting, we've ensured that they will immediately restart
diff --git a/init/service.h b/init/service.h
index 043555f..89b1f09 100644
--- a/init/service.h
+++ b/init/service.h
@@ -54,6 +54,7 @@
                                      // should not be killed during shutdown
 #define SVC_TEMPORARY 0x1000  // This service was started by 'exec' and should be removed from the
                               // service list once it is reaped.
+#define SVC_STOPPING 0x2000  // service is being stopped by init
 
 #define NR_SVC_SUPP_GIDS 12    // twelve supplementary groups
 
diff --git a/libcutils/include/private/android_filesystem_config.h b/libcutils/include/private/android_filesystem_config.h
index 7489281..8f22d89 100644
--- a/libcutils/include/private/android_filesystem_config.h
+++ b/libcutils/include/private/android_filesystem_config.h
@@ -128,6 +128,8 @@
 #define AID_EXT_OBB_RW 1079       /* GID for OBB directories on external storage */
 #define AID_CONTEXT_HUB 1080      /* GID for access to the Context Hub */
 #define AID_VIRTMANAGER 1081      /* VirtManager daemon */
+#define AID_ARTD 1082             /* ART Service daemon */
+#define AID_UWB 1083              /* UWB subsystem */
 /* Changes to this file must be made in AOSP, *not* in internal branches. */
 
 #define AID_SHELL 2000 /* adb and debug shell user */
diff --git a/libprocessgroup/cgrouprc/Android.bp b/libprocessgroup/cgrouprc/Android.bp
index 0cbe0cc..7522cfe 100644
--- a/libprocessgroup/cgrouprc/Android.bp
+++ b/libprocessgroup/cgrouprc/Android.bp
@@ -28,7 +28,9 @@
     // defined below. The static library is built for tests.
     vendor_available: false,
     native_bridge_supported: true,
-    llndk_stubs: "libcgrouprc.llndk",
+    llndk: {
+        symbol_file: "libcgrouprc.map.txt",
+    },
     srcs: [
         "cgroup_controller.cpp",
         "cgroup_file.cpp",
@@ -59,12 +61,3 @@
         },
     },
 }
-
-llndk_library {
-    name: "libcgrouprc.llndk",
-    symbol_file: "libcgrouprc.map.txt",
-    native_bridge_supported: true,
-    export_include_dirs: [
-        "include",
-    ],
-}
diff --git a/libprocessgroup/sched_policy.cpp b/libprocessgroup/sched_policy.cpp
index c51ee61..1a4196a 100644
--- a/libprocessgroup/sched_policy.cpp
+++ b/libprocessgroup/sched_policy.cpp
@@ -159,10 +159,9 @@
 
     if (!controller.IsUsable()) return -1;
 
-    if (!controller.GetTaskGroup(tid, &subgroup)) {
-        LOG(ERROR) << "Failed to find cgroup for tid " << tid;
+    if (!controller.GetTaskGroup(tid, &subgroup))
         return -1;
-    }
+
     return 0;
 }
 
@@ -174,11 +173,16 @@
     std::string group;
     if (schedboost_enabled()) {
         if ((getCGroupSubsys(tid, "schedtune", group) < 0) &&
-            (getCGroupSubsys(tid, "cpu", group) < 0))
-		return -1;
+            (getCGroupSubsys(tid, "cpu", group) < 0)) {
+                LOG(ERROR) << "Failed to find cpu cgroup for tid " << tid;
+                return -1;
+        }
     }
     if (group.empty() && cpusets_enabled()) {
-        if (getCGroupSubsys(tid, "cpuset", group) < 0) return -1;
+        if (getCGroupSubsys(tid, "cpuset", group) < 0) {
+            LOG(ERROR) << "Failed to find cpuset cgroup for tid " << tid;
+            return -1;
+        }
     }
 
     // TODO: replace hardcoded directories
diff --git a/libsync/Android.bp b/libsync/Android.bp
index 540a246..99c88cf 100644
--- a/libsync/Android.bp
+++ b/libsync/Android.bp
@@ -42,7 +42,9 @@
     recovery_available: true,
     native_bridge_supported: true,
     defaults: ["libsync_defaults"],
-    llndk_stubs: "libsync.llndk",
+    llndk: {
+        symbol_file: "libsync.map.txt",
+    },
     stubs: {
         symbol_file: "libsync.map.txt",
         versions: [
@@ -51,12 +53,6 @@
     },
 }
 
-llndk_library {
-    name: "libsync.llndk",
-    symbol_file: "libsync.map.txt",
-    export_include_dirs: ["include"],
-}
-
 cc_test {
     name: "sync-unit-tests",
     shared_libs: ["libsync"],
diff --git a/libutils/Android.bp b/libutils/Android.bp
index 6201569..6395567 100644
--- a/libutils/Android.bp
+++ b/libutils/Android.bp
@@ -141,6 +141,7 @@
         "Errors.cpp",
         "FileMap.cpp",
         "JenkinsHash.cpp",
+        "LightRefBase.cpp",
         "NativeHandle.cpp",
         "Printer.cpp",
         "RefBase.cpp",
diff --git a/libutils/LightRefBase.cpp b/libutils/LightRefBase.cpp
new file mode 100644
index 0000000..e08ffec
--- /dev/null
+++ b/libutils/LightRefBase.cpp
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define LOG_TAG "LightRefBase"
+
+#include <utils/LightRefBase.h>
+
+#include <log/log.h>
+
+namespace android {
+
+void LightRefBase_reportIncStrongRequireStrongFailed(const void* thiz) {
+    LOG_ALWAYS_FATAL("incStrongRequireStrong() called on %p which isn't already owned", thiz);
+}
+
+}  // namespace android
diff --git a/libutils/SharedBuffer_test.cpp b/libutils/SharedBuffer_test.cpp
index 3f960d2..1d6317f 100644
--- a/libutils/SharedBuffer_test.cpp
+++ b/libutils/SharedBuffer_test.cpp
@@ -32,10 +32,25 @@
     EXPECT_DEATH(android::SharedBuffer::alloc(SIZE_MAX - sizeof(android::SharedBuffer)), "");
 }
 
-TEST(SharedBufferTest, alloc_null) {
-    // Big enough to fail, not big enough to abort.
+TEST(SharedBufferTest, alloc_max) {
     SKIP_WITH_HWASAN;  // hwasan has a 2GiB allocation limit.
-    ASSERT_EQ(nullptr, android::SharedBuffer::alloc(SIZE_MAX / 2));
+
+    android::SharedBuffer* buf =
+            android::SharedBuffer::alloc(SIZE_MAX - sizeof(android::SharedBuffer) - 1);
+    if (buf != nullptr) {
+        EXPECT_NE(nullptr, buf->data());
+        buf->release();
+    }
+}
+
+TEST(SharedBufferTest, alloc_big) {
+    SKIP_WITH_HWASAN;  // hwasan has a 2GiB allocation limit.
+
+    android::SharedBuffer* buf = android::SharedBuffer::alloc(SIZE_MAX / 2);
+    if (buf != nullptr) {
+        EXPECT_NE(nullptr, buf->data());
+        buf->release();
+    }
 }
 
 TEST(SharedBufferTest, alloc_zero_size) {
@@ -56,7 +71,13 @@
     // Big enough to fail, not big enough to abort.
     SKIP_WITH_HWASAN;  // hwasan has a 2GiB allocation limit.
     android::SharedBuffer* buf = android::SharedBuffer::alloc(10);
-    ASSERT_EQ(nullptr, buf->editResize(SIZE_MAX / 2));
+    android::SharedBuffer* buf2 = buf->editResize(SIZE_MAX / 2);
+    if (buf2 == nullptr) {
+        buf->release();
+    } else {
+        EXPECT_NE(nullptr, buf2->data());
+        buf2->release();
+    }
 }
 
 TEST(SharedBufferTest, editResize_zero_size) {
diff --git a/libutils/StrongPointer_test.cpp b/libutils/StrongPointer_test.cpp
index 29f6bd4..f27c1f1 100644
--- a/libutils/StrongPointer_test.cpp
+++ b/libutils/StrongPointer_test.cpp
@@ -30,17 +30,34 @@
     ~SPFoo() {
         *mDeleted = true;
     }
-private:
+
+  private:
     bool* mDeleted;
 };
 
-TEST(StrongPointer, move) {
+class SPLightFoo : virtual public VirtualLightRefBase {
+  public:
+    explicit SPLightFoo(bool* deleted_check) : mDeleted(deleted_check) { *mDeleted = false; }
+
+    ~SPLightFoo() { *mDeleted = true; }
+
+  private:
+    bool* mDeleted;
+};
+
+template <typename T>
+class StrongPointer : public ::testing::Test {};
+
+using RefBaseTypes = ::testing::Types<SPFoo, SPLightFoo>;
+TYPED_TEST_CASE(StrongPointer, RefBaseTypes);
+
+TYPED_TEST(StrongPointer, move) {
     bool isDeleted;
-    sp<SPFoo> sp1 = sp<SPFoo>::make(&isDeleted);
-    SPFoo* foo = sp1.get();
+    sp<TypeParam> sp1 = sp<TypeParam>::make(&isDeleted);
+    TypeParam* foo = sp1.get();
     ASSERT_EQ(1, foo->getStrongCount());
     {
-        sp<SPFoo> sp2 = std::move(sp1);
+        sp<TypeParam> sp2 = std::move(sp1);
         ASSERT_EQ(1, foo->getStrongCount()) << "std::move failed, incremented refcnt";
         ASSERT_EQ(nullptr, sp1.get()) << "std::move failed, sp1 is still valid";
         // The strong count isn't increasing, let's double check the old object
@@ -50,33 +67,42 @@
     ASSERT_FALSE(isDeleted) << "deleted too early! still has a reference!";
     {
         // Now let's double check it deletes on time
-        sp<SPFoo> sp2 = std::move(sp1);
+        sp<TypeParam> sp2 = std::move(sp1);
     }
     ASSERT_TRUE(isDeleted) << "foo was leaked!";
 }
 
-TEST(StrongPointer, NullptrComparison) {
-    sp<SPFoo> foo;
+TYPED_TEST(StrongPointer, NullptrComparison) {
+    sp<TypeParam> foo;
     ASSERT_EQ(foo, nullptr);
     ASSERT_EQ(nullptr, foo);
 }
 
-TEST(StrongPointer, PointerComparison) {
+TYPED_TEST(StrongPointer, PointerComparison) {
     bool isDeleted;
-    sp<SPFoo> foo = sp<SPFoo>::make(&isDeleted);
+    sp<TypeParam> foo = sp<TypeParam>::make(&isDeleted);
     ASSERT_EQ(foo.get(), foo);
     ASSERT_EQ(foo, foo.get());
     ASSERT_NE(nullptr, foo);
     ASSERT_NE(foo, nullptr);
 }
 
-TEST(StrongPointer, AssertStrongRefExists) {
-    // uses some other refcounting method, or non at all
+TYPED_TEST(StrongPointer, Deleted) {
     bool isDeleted;
-    SPFoo* foo = new SPFoo(&isDeleted);
+    sp<TypeParam> foo = sp<TypeParam>::make(&isDeleted);
 
-    // can only get a valid sp<> object when you construct it as an sp<> object
-    EXPECT_DEATH(sp<SPFoo>::fromExisting(foo), "");
+    auto foo2 = sp<TypeParam>::fromExisting(foo.get());
 
+    EXPECT_FALSE(isDeleted);
+    foo = nullptr;
+    EXPECT_FALSE(isDeleted);
+    foo2 = nullptr;
+    EXPECT_TRUE(isDeleted);
+}
+
+TYPED_TEST(StrongPointer, AssertStrongRefExists) {
+    bool isDeleted;
+    TypeParam* foo = new TypeParam(&isDeleted);
+    EXPECT_DEATH(sp<TypeParam>::fromExisting(foo), "");
     delete foo;
 }
diff --git a/libutils/include/utils/LightRefBase.h b/libutils/include/utils/LightRefBase.h
index b04e5c1..40edf67 100644
--- a/libutils/include/utils/LightRefBase.h
+++ b/libutils/include/utils/LightRefBase.h
@@ -28,6 +28,8 @@
 
 class ReferenceRenamer;
 
+void LightRefBase_reportIncStrongRequireStrongFailed(const void* thiz);
+
 template <class T>
 class LightRefBase
 {
@@ -36,6 +38,11 @@
     inline void incStrong(__attribute__((unused)) const void* id) const {
         mCount.fetch_add(1, std::memory_order_relaxed);
     }
+    inline void incStrongRequireStrong(__attribute__((unused)) const void* id) const {
+        if (0 == mCount.fetch_add(1, std::memory_order_relaxed)) {
+            LightRefBase_reportIncStrongRequireStrongFailed(this);
+        }
+    }
     inline void decStrong(__attribute__((unused)) const void* id) const {
         if (mCount.fetch_sub(1, std::memory_order_release) == 1) {
             std::atomic_thread_fence(std::memory_order_acquire);
@@ -59,7 +66,6 @@
     mutable std::atomic<int32_t> mCount;
 };
 
-
 // This is a wrapper around LightRefBase that simply enforces a virtual
 // destructor to eliminate the template requirement of LightRefBase
 class VirtualLightRefBase : public LightRefBase<VirtualLightRefBase> {
diff --git a/libvndksupport/Android.bp b/libvndksupport/Android.bp
index 11c75f7..f800bf7 100644
--- a/libvndksupport/Android.bp
+++ b/libvndksupport/Android.bp
@@ -5,7 +5,9 @@
 cc_library {
     name: "libvndksupport",
     native_bridge_supported: true,
-    llndk_stubs: "libvndksupport.llndk",
+    llndk: {
+        symbol_file: "libvndksupport.map.txt",
+    },
     srcs: ["linker.cpp"],
     cflags: [
         "-Wall",
@@ -23,10 +25,3 @@
         versions: ["29"],
     },
 }
-
-llndk_library {
-    name: "libvndksupport.llndk",
-    native_bridge_supported: true,
-    symbol_file: "libvndksupport.map.txt",
-    export_include_dirs: ["include"],
-}
diff --git a/cpio/Android.bp b/mkbootfs/Android.bp
similarity index 75%
rename from cpio/Android.bp
rename to mkbootfs/Android.bp
index 16af079..cd2a624 100644
--- a/cpio/Android.bp
+++ b/mkbootfs/Android.bp
@@ -8,7 +8,11 @@
     name: "mkbootfs",
     srcs: ["mkbootfs.c"],
     cflags: ["-Werror"],
-    shared_libs: ["libcutils"],
+    static_libs: [
+        "libbase",
+        "libcutils",
+        "liblog",
+    ],
     dist: {
         targets: ["dist_files"],
     },
diff --git a/cpio/mkbootfs.c b/mkbootfs/mkbootfs.c
similarity index 100%
rename from cpio/mkbootfs.c
rename to mkbootfs/mkbootfs.c
diff --git a/rootdir/etc/linker.config.json b/rootdir/etc/linker.config.json
index a22ef6f..6b03a1d 100644
--- a/rootdir/etc/linker.config.json
+++ b/rootdir/etc/linker.config.json
@@ -1,13 +1,14 @@
 {
   "requireLibs": [
+    "libandroidicu.so",
     "libdexfile.so",
     "libdexfiled.so",
+    "libicu.so",
+    "libjdwp.so",
     "libnativebridge.so",
     "libnativehelper.so",
     "libnativeloader.so",
     "libsigchain.so",
-    "libandroidicu.so",
-    "libicu.so",
     // TODO(b/122876336): Remove libpac.so once it's migrated to Webview
     "libpac.so",
     // TODO(b/120786417 or b/134659294): libicuuc.so
diff --git a/rootdir/init-debug.rc b/rootdir/init-debug.rc
index deb2411..77a80cd 100644
--- a/rootdir/init-debug.rc
+++ b/rootdir/init-debug.rc
@@ -11,5 +11,6 @@
     mount debugfs debugfs /sys/kernel/debug
     chmod 0755 /sys/kernel/debug
 
-on property:sys.boot_completed=1 && property:ro.product.debugfs_restrictions.enabled=true
+on property:sys.boot_completed=1 && property:ro.product.debugfs_restrictions.enabled=true && \
+   property:persist.dbg.keep_debugfs_mounted=""
    umount /sys/kernel/debug
diff --git a/rootdir/init.rc b/rootdir/init.rc
index d834f73..1013d41 100644
--- a/rootdir/init.rc
+++ b/rootdir/init.rc
@@ -685,15 +685,20 @@
     exec - system system -- /system/bin/vdc keymaster earlyBootEnded
 
     # /data/apex is now available. Start apexd to scan and activate APEXes.
+    #
+    # To handle userspace reboots as well as devices that use FDE, make sure
+    # that apexd is started cleanly here (set apexd.status="") and that it is
+    # restarted if it's already running.
     mkdir /data/apex 0755 root system encryption=None
     mkdir /data/apex/active 0755 root system
     mkdir /data/apex/backup 0700 root system
-    mkdir /data/apex/decompressed 0700 root system encryption=Require
+    mkdir /data/apex/decompressed 0755 root system encryption=Require
     mkdir /data/apex/hashtree 0700 root system
     mkdir /data/apex/sessions 0700 root system
     mkdir /data/app-staging 0751 system system encryption=DeleteIfNecessary
     mkdir /data/apex/ota_reserved 0700 root system encryption=Require
-    start apexd
+    setprop apexd.status ""
+    restart apexd
 
     # create rest of basic filesystem structure
     mkdir /data/misc/recovery 0770 system log
@@ -1240,7 +1245,6 @@
   setprop dev.bootcomplete ""
   setprop sys.init.updatable_crashing ""
   setprop sys.init.updatable_crashing_process_name ""
-  setprop apexd.status ""
   setprop sys.user.0.ce_available ""
   setprop sys.shutdown.requested ""
   setprop service.bootanim.exit ""
diff --git a/trusty/metrics/Android.bp b/trusty/metrics/Android.bp
new file mode 100644
index 0000000..e0533bc
--- /dev/null
+++ b/trusty/metrics/Android.bp
@@ -0,0 +1,51 @@
+// Copyright (C) 2021 The Android Open-Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package {
+    default_applicable_licenses: ["Android-Apache-2.0"],
+}
+
+cc_library {
+    name: "libtrusty_metrics",
+    vendor: true,
+    srcs: [
+        "metrics.cpp",
+    ],
+    export_include_dirs: [
+        "include",
+    ],
+    shared_libs: [
+        "libbase",
+        "liblog",
+        "libtrusty",
+    ],
+}
+
+cc_test {
+    name: "libtrusty_metrics_test",
+    vendor: true,
+    srcs: [
+        "metrics_test.cpp",
+    ],
+    static_libs: [
+        "libtrusty_metrics",
+    ],
+    shared_libs: [
+        "libbase",
+        "libbinder",
+        "liblog",
+        "libtrusty",
+    ],
+    require_root: true,
+}
diff --git a/trusty/metrics/include/trusty/metrics/metrics.h b/trusty/metrics/include/trusty/metrics/metrics.h
new file mode 100644
index 0000000..6949e9b
--- /dev/null
+++ b/trusty/metrics/include/trusty/metrics/metrics.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <functional>
+#include <memory>
+#include <string>
+
+#include <android-base/result.h>
+#include <android-base/unique_fd.h>
+
+namespace android {
+namespace trusty {
+namespace metrics {
+
+using android::base::Result;
+using android::base::unique_fd;
+
+class TrustyMetrics {
+  public:
+    /* Wait for next event with a given timeout. Negative timeout means infinite timeout. */
+    Result<void> WaitForEvent(int timeout_ms = -1);
+    /* Attempt to handle an event from Metrics TA in a non-blocking manner. */
+    Result<void> HandleEvent();
+    /* Expose TIPC channel so that client can integrate it into an event loop with other fds. */
+    int GetRawFd() { return metrics_fd_; };
+
+  protected:
+    TrustyMetrics(std::string tipc_dev) : tipc_dev_(std::move(tipc_dev)), metrics_fd_(-1) {}
+    virtual ~TrustyMetrics(){};
+
+    Result<void> Open();
+    virtual void HandleCrash(const std::string& app_id) = 0;
+    virtual void HandleEventDrop() = 0;
+
+  private:
+    std::string tipc_dev_;
+    unique_fd metrics_fd_;
+};
+
+}  // namespace metrics
+}  // namespace trusty
+}  // namespace android
diff --git a/trusty/metrics/include/trusty/metrics/tipc.h b/trusty/metrics/include/trusty/metrics/tipc.h
new file mode 100644
index 0000000..66d0876
--- /dev/null
+++ b/trusty/metrics/include/trusty/metrics/tipc.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright 2021, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <stdint.h>
+
+/**
+ * DOC: Metrics
+ *
+ * Metrics interface provides a way for Android to get Trusty metrics data.
+ *
+ * Currently, only "push" model is supported. Clients are expected to connect to
+ * metrics service, listen for events, e.g. app crash events, and respond to
+ * every event with a &struct metrics_req.
+ *
+ * Communication is driven by metrics service, i.e. requests/responses are all
+ * sent from/to metrics service.
+ *
+ * Note that the type of the event is not known to the client ahead of time.
+ *
+ * In the future, if we need to have Android "pull" metrics data from Trusty,
+ * that can be done by introducing a separate port.
+ *
+ * This interface is shared between Android and Trusty. There is a copy in each
+ * repository. They must be kept in sync.
+ */
+
+#define METRICS_PORT "com.android.trusty.metrics"
+
+/**
+ * enum metrics_cmd - command identifiers for metrics interface
+ * @METRICS_CMD_RESP_BIT:          message is a response
+ * @METRICS_CMD_REQ_SHIFT:         number of bits used by @METRICS_CMD_RESP_BIT
+ * @METRICS_CMD_REPORT_EVENT_DROP: report gaps in the event stream
+ * @METRICS_CMD_REPORT_CRASH:      report an app crash event
+ */
+enum metrics_cmd {
+    METRICS_CMD_RESP_BIT = 1,
+    METRICS_CMD_REQ_SHIFT = 1,
+
+    METRICS_CMD_REPORT_EVENT_DROP = (1 << METRICS_CMD_REQ_SHIFT),
+    METRICS_CMD_REPORT_CRASH = (2 << METRICS_CMD_REQ_SHIFT),
+};
+
+/**
+ * enum metrics_error - metrics error codes
+ * @METRICS_NO_ERROR:        no error
+ * @METRICS_ERR_UNKNOWN_CMD: unknown or not implemented command
+ */
+enum metrics_error {
+    METRICS_NO_ERROR = 0,
+    METRICS_ERR_UNKNOWN_CMD = 1,
+};
+
+/**
+ * struct metrics_req - common structure for metrics requests
+ * @cmd:      command identifier - one of &enum metrics_cmd
+ * @reserved: must be 0
+ */
+struct metrics_req {
+    uint32_t cmd;
+    uint32_t reserved;
+} __attribute__((__packed__));
+
+/**
+ * struct metrics_resp - common structure for metrics responses
+ * @cmd: command identifier - %METRICS_CMD_RESP_BIT or'ed with a cmd in
+ *                            one of &enum metrics_cmd
+ * @status: response status, one of &enum metrics_error
+ */
+struct metrics_resp {
+    uint32_t cmd;
+    uint32_t status;
+} __attribute__((__packed__));
+
+/**
+ * struct metrics_report_crash_req - arguments of %METRICS_CMD_REPORT_CRASH
+ *                                   requests
+ * @app_id_len: length of app ID that follows this structure
+ */
+struct metrics_report_crash_req {
+    uint32_t app_id_len;
+} __attribute__((__packed__));
+
+#define METRICS_MAX_APP_ID_LEN 256
+
+#define METRICS_MAX_MSG_SIZE                                                \
+    (sizeof(struct metrics_req) + sizeof(struct metrics_report_crash_req) + \
+     METRICS_MAX_APP_ID_LEN)
diff --git a/trusty/metrics/metrics.cpp b/trusty/metrics/metrics.cpp
new file mode 100644
index 0000000..3ac128a
--- /dev/null
+++ b/trusty/metrics/metrics.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (C) 2021 The Android Open Sourete Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define LOG_TAG "metrics"
+
+#include <android-base/logging.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <trusty/metrics/metrics.h>
+#include <trusty/metrics/tipc.h>
+#include <trusty/tipc.h>
+#include <unistd.h>
+
+namespace android {
+namespace trusty {
+namespace metrics {
+
+using android::base::ErrnoError;
+using android::base::Error;
+
+Result<void> TrustyMetrics::Open() {
+    int fd = tipc_connect(tipc_dev_.c_str(), METRICS_PORT);
+    if (fd < 0) {
+        return ErrnoError() << "failed to connect to Trusty metrics TA";
+    }
+
+    int flags = fcntl(fd, F_GETFL, 0);
+    if (flags < 0) {
+        return ErrnoError() << "failed F_GETFL";
+    }
+
+    int rc = fcntl(fd, F_SETFL, flags | O_NONBLOCK);
+    if (rc < 0) {
+        return ErrnoError() << "failed F_SETFL";
+    }
+
+    metrics_fd_.reset(fd);
+    return {};
+}
+
+Result<void> TrustyMetrics::WaitForEvent(int timeout_ms) {
+    if (!metrics_fd_.ok()) {
+        return Error() << "connection to Metrics TA has not been initialized yet";
+    }
+
+    struct pollfd pfd = {
+            .fd = metrics_fd_,
+            .events = POLLIN,
+    };
+
+    int rc = poll(&pfd, 1, timeout_ms);
+    if (rc != 1) {
+        return ErrnoError() << "failed poll()";
+    }
+
+    if (!(pfd.revents & POLLIN)) {
+        return ErrnoError() << "channel not ready";
+    }
+
+    return {};
+}
+
+Result<void> TrustyMetrics::HandleEvent() {
+    if (!metrics_fd_.ok()) {
+        return Error() << "connection to Metrics TA has not been initialized yet";
+    }
+
+    uint8_t msg[METRICS_MAX_MSG_SIZE];
+
+    auto rc = read(metrics_fd_, msg, sizeof(msg));
+    if (rc < 0) {
+        return ErrnoError() << "failed to read metrics message";
+    }
+    size_t msg_len = rc;
+
+    if (msg_len < sizeof(metrics_req)) {
+        return Error() << "message too small: " << rc;
+    }
+    auto req = reinterpret_cast<metrics_req*>(msg);
+    size_t offset = sizeof(metrics_req);
+    uint32_t status = METRICS_NO_ERROR;
+
+    switch (req->cmd) {
+        case METRICS_CMD_REPORT_CRASH: {
+            if (msg_len < offset + sizeof(metrics_report_crash_req)) {
+                return Error() << "message too small: " << rc;
+            }
+            auto crash_args = reinterpret_cast<metrics_report_crash_req*>(msg + offset);
+            offset += sizeof(metrics_report_crash_req);
+
+            if (msg_len < offset + crash_args->app_id_len) {
+                return Error() << "message too small: " << rc;
+            }
+            auto app_id_ptr = reinterpret_cast<char*>(msg + offset);
+            std::string app_id(app_id_ptr, crash_args->app_id_len);
+
+            HandleCrash(app_id);
+            break;
+        }
+
+        case METRICS_CMD_REPORT_EVENT_DROP:
+            HandleEventDrop();
+            break;
+
+        default:
+            status = METRICS_ERR_UNKNOWN_CMD;
+            break;
+    }
+
+    metrics_resp resp = {
+            .cmd = req->cmd | METRICS_CMD_RESP_BIT,
+            .status = status,
+    };
+
+    rc = write(metrics_fd_, &resp, sizeof(resp));
+    if (rc < 0) {
+        return ErrnoError() << "failed to request next metrics event";
+    }
+
+    if (rc != (int)sizeof(resp)) {
+        return Error() << "unexpected number of bytes sent event: " << rc;
+    }
+
+    return {};
+}
+
+}  // namespace metrics
+}  // namespace trusty
+}  // namespace android
diff --git a/trusty/metrics/metrics_test.cpp b/trusty/metrics/metrics_test.cpp
new file mode 100644
index 0000000..407ddf2
--- /dev/null
+++ b/trusty/metrics/metrics_test.cpp
@@ -0,0 +1,139 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <android-base/unique_fd.h>
+#include <binder/IPCThreadState.h>
+#include <gtest/gtest.h>
+#include <poll.h>
+#include <trusty/metrics/metrics.h>
+#include <trusty/tipc.h>
+
+#define TIPC_DEV "/dev/trusty-ipc-dev0"
+#define CRASHER_PORT "com.android.trusty.metrics.test.crasher"
+
+namespace android {
+namespace trusty {
+namespace metrics {
+
+using android::base::unique_fd;
+
+static void TriggerCrash() {
+    size_t num_retries = 3;
+    int fd = -1;
+
+    for (size_t i = 0; i < num_retries; i++) {
+        /* It's possible to time out waiting for crasher TA to restart. */
+        fd = tipc_connect(TIPC_DEV, CRASHER_PORT);
+        if (fd >= 0) {
+            break;
+        }
+    }
+
+    unique_fd crasher(fd);
+    ASSERT_GE(crasher, 0);
+
+    int msg = 0;
+    int rc = write(crasher, &msg, sizeof(msg));
+    ASSERT_EQ(rc, sizeof(msg));
+}
+
+class TrustyMetricsTest : public TrustyMetrics, public ::testing::Test {
+  public:
+    TrustyMetricsTest() : TrustyMetrics(TIPC_DEV) {}
+
+    virtual void HandleCrash(const std::string& app_id) override { crashed_app_ = app_id; }
+
+    virtual void HandleEventDrop() override { event_drop_count_++; }
+
+    virtual void SetUp() override {
+        auto ret = Open();
+        ASSERT_TRUE(ret.ok()) << ret.error();
+    }
+
+    void WaitForAndHandleEvent() {
+        auto ret = WaitForEvent(30000 /* 30 second timeout */);
+        ASSERT_TRUE(ret.ok()) << ret.error();
+
+        ret = HandleEvent();
+        ASSERT_TRUE(ret.ok()) << ret.error();
+    }
+
+    std::string crashed_app_;
+    size_t event_drop_count_;
+};
+
+TEST_F(TrustyMetricsTest, Crash) {
+    TriggerCrash();
+    WaitForAndHandleEvent();
+
+    /* Check that correct TA crashed. */
+    ASSERT_EQ(crashed_app_, "36f5b435-5bd3-4526-8b76-200e3a7e79f3:crasher");
+}
+
+TEST_F(TrustyMetricsTest, PollSet) {
+    int binder_fd;
+    int rc = IPCThreadState::self()->setupPolling(&binder_fd);
+    ASSERT_EQ(rc, 0);
+    ASSERT_GE(binder_fd, 0);
+
+    TriggerCrash();
+
+    struct pollfd pfds[] = {
+            {
+                    .fd = binder_fd,
+                    .events = POLLIN,
+            },
+            {
+                    .fd = GetRawFd(),
+                    .events = POLLIN,
+            },
+    };
+
+    rc = poll(pfds, 2, 30000 /* 30 second timeout */);
+    /* We expect one event on the metrics fd. */
+    ASSERT_EQ(rc, 1);
+    ASSERT_TRUE(pfds[1].revents & POLLIN);
+
+    auto ret = HandleEvent();
+    ASSERT_TRUE(ret.ok()) << ret.error();
+
+    /* Check that correct TA crashed. */
+    ASSERT_EQ(crashed_app_, "36f5b435-5bd3-4526-8b76-200e3a7e79f3:crasher");
+}
+
+TEST_F(TrustyMetricsTest, EventDrop) {
+    /* We know the size of the internal event queue is less than this. */
+    size_t num_events = 3;
+
+    ASSERT_EQ(event_drop_count_, 0);
+
+    for (auto i = 0; i < num_events; i++) {
+        TriggerCrash();
+    }
+
+    for (auto i = 0; i < num_events; i++) {
+        WaitForAndHandleEvent();
+        if (event_drop_count_ > 0) {
+            break;
+        }
+    }
+
+    ASSERT_EQ(event_drop_count_, 1);
+}
+
+}  // namespace metrics
+}  // namespace trusty
+}  // namespace android