Merge "fs_mgr: Look for fstab file in /system/etc"
diff --git a/fs_mgr/file_wait.cpp b/fs_mgr/file_wait.cpp
index cbf6845..af0699b 100644
--- a/fs_mgr/file_wait.cpp
+++ b/fs_mgr/file_wait.cpp
@@ -206,6 +206,9 @@
 }
 
 int64_t OneShotInotify::RemainingMs() const {
+    if (relative_timeout_ == std::chrono::milliseconds::max()) {
+        return std::chrono::milliseconds::max().count();
+    }
     auto remaining = (std::chrono::steady_clock::now() - start_time_);
     auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(remaining);
     return (relative_timeout_ - elapsed).count();
diff --git a/fs_mgr/fs_mgr.cpp b/fs_mgr/fs_mgr.cpp
index 01c8ad3..4826ccf 100644
--- a/fs_mgr/fs_mgr.cpp
+++ b/fs_mgr/fs_mgr.cpp
@@ -790,20 +790,26 @@
     int save_errno = 0;
     int gc_allowance = 0;
     std::string opts;
+    std::string checkpoint_opts;
     bool try_f2fs_gc_allowance = is_f2fs(entry.fs_type) && entry.fs_checkpoint_opts.length() > 0;
+    bool try_f2fs_fallback = false;
     Timer t;
 
     do {
-        if (save_errno == EINVAL && try_f2fs_gc_allowance) {
-            PINFO << "Kernel does not support checkpoint=disable:[n]%, trying without.";
+        if (save_errno == EINVAL && (try_f2fs_gc_allowance || try_f2fs_fallback)) {
+            PINFO << "Kernel does not support " << checkpoint_opts << ", trying without.";
             try_f2fs_gc_allowance = false;
+            // Attempt without gc allowance before dropping.
+            try_f2fs_fallback = !try_f2fs_fallback;
         }
         if (try_f2fs_gc_allowance) {
-            opts = entry.fs_options + entry.fs_checkpoint_opts + ":" +
-                   std::to_string(gc_allowance) + "%";
+            checkpoint_opts = entry.fs_checkpoint_opts + ":" + std::to_string(gc_allowance) + "%";
+        } else if (try_f2fs_fallback) {
+            checkpoint_opts = entry.fs_checkpoint_opts;
         } else {
-            opts = entry.fs_options;
+            checkpoint_opts = "";
         }
+        opts = entry.fs_options + checkpoint_opts;
         if (save_errno == EAGAIN) {
             PINFO << "Retrying mount (source=" << source << ",target=" << target
                   << ",type=" << entry.fs_type << ", gc_allowance=" << gc_allowance << "%)=" << ret
@@ -814,7 +820,7 @@
         save_errno = errno;
         if (try_f2fs_gc_allowance) gc_allowance += 10;
     } while ((ret && save_errno == EAGAIN && gc_allowance <= 100) ||
-             (ret && save_errno == EINVAL && try_f2fs_gc_allowance));
+             (ret && save_errno == EINVAL && (try_f2fs_gc_allowance || try_f2fs_fallback)));
     const char* target_missing = "";
     const char* source_missing = "";
     if (save_errno == ENOENT) {
diff --git a/fs_mgr/include/fs_mgr/file_wait.h b/fs_mgr/include/fs_mgr/file_wait.h
index 74d160e..294e727 100644
--- a/fs_mgr/include/fs_mgr/file_wait.h
+++ b/fs_mgr/include/fs_mgr/file_wait.h
@@ -23,6 +23,9 @@
 // Wait at most |relative_timeout| milliseconds for |path| to exist. dirname(path)
 // must already exist. For example, to wait on /dev/block/dm-6, /dev/block must
 // be a valid directory.
+//
+// If relative_timeout is std::chrono::milliseconds::max(), then the wait will
+// block indefinitely.
 bool WaitForFile(const std::string& path, const std::chrono::milliseconds relative_timeout);
 
 // Wait at most |relative_timeout| milliseconds for |path| to stop existing.
diff --git a/fs_mgr/libsnapshot/Android.bp b/fs_mgr/libsnapshot/Android.bp
index acfaa84..1c44e53 100644
--- a/fs_mgr/libsnapshot/Android.bp
+++ b/fs_mgr/libsnapshot/Android.bp
@@ -182,38 +182,6 @@
     vendor_ramdisk_available: true,
 }
 
-cc_defaults {
-    name: "libsnapshot_snapuserd_defaults",
-    defaults: [
-        "fs_mgr_defaults",
-    ],
-    cflags: [
-        "-D_FILE_OFFSET_BITS=64",
-        "-Wall",
-        "-Werror",
-    ],
-    export_include_dirs: ["include"],
-    srcs: [
-        "snapuserd_client.cpp",
-    ],
-}
-
-cc_library_static {
-    name: "libsnapshot_snapuserd",
-    defaults: [
-        "libsnapshot_snapuserd_defaults",
-    ],
-    recovery_available: true,
-    static_libs: [
-        "libcutils_sockets",
-    ],
-    shared_libs: [
-        "libbase",
-        "liblog",
-    ],
-    ramdisk_available: true,
-}
-
 cc_library_static {
     name: "libsnapshot_test_helpers",
     defaults: ["libsnapshot_defaults"],
@@ -412,49 +380,6 @@
     require_root: true,
 }
 
-cc_defaults {
-    name: "snapuserd_defaults",
-    defaults: [
-        "fs_mgr_defaults",
-    ],
-    srcs: [
-        "snapuserd_server.cpp",
-        "snapuserd.cpp",
-        "snapuserd_daemon.cpp",
-        "snapuserd_worker.cpp",
-        "snapuserd_readahead.cpp",
-    ],
-
-    cflags: [
-        "-Wall",
-        "-Werror"
-    ],
-
-    static_libs: [
-        "libbase",
-        "libbrotli",
-        "libcutils_sockets",
-        "libdm",
-        "libgflags",
-        "liblog",
-        "libsnapshot_cow",
-        "libz",
-    ],
-}
-
-cc_binary {
-    name: "snapuserd",
-    defaults: ["snapuserd_defaults"],
-    init_rc: [
-        "snapuserd.rc",
-    ],
-    static_executable: true,
-    system_shared_libs: [],
-    ramdisk_available: true,
-    vendor_ramdisk_available: true,
-    recovery_available: true,
-}
-
 cc_test {
     name: "cow_api_test",
     defaults: [
@@ -556,43 +481,6 @@
     },
 }
 
-cc_test {
-    name: "cow_snapuserd_test",
-    defaults: [
-        "fs_mgr_defaults",
-    ],
-    srcs: [
-        "cow_snapuserd_test.cpp",
-        "snapuserd.cpp",
-        "snapuserd_worker.cpp",
-    ],
-    cflags: [
-        "-Wall",
-        "-Werror",
-    ],
-    shared_libs: [
-        "libbase",
-        "liblog",
-    ],
-    static_libs: [
-        "libbrotli",
-        "libgtest",
-        "libsnapshot_cow",
-        "libsnapshot_snapuserd",
-        "libcutils_sockets",
-        "libz",
-        "libfs_mgr",
-        "libdm",
-    ],
-    header_libs: [
-        "libstorage_literals_headers",
-        "libfiemap_headers",
-    ],
-    test_min_api_level: 30,
-    auto_gen_config: true,
-    require_root: false,
-}
-
 cc_binary {
     name: "inspect_cow",
     host_supported: true,
diff --git a/fs_mgr/libsnapshot/cow_api_test.cpp b/fs_mgr/libsnapshot/cow_api_test.cpp
index 7f7e40a..ecfdefe 100644
--- a/fs_mgr/libsnapshot/cow_api_test.cpp
+++ b/fs_mgr/libsnapshot/cow_api_test.cpp
@@ -1013,6 +1013,27 @@
     ASSERT_TRUE(iter->Done());
 }
 
+TEST_F(CowTest, MissingSeqOp) {
+    CowOptions options;
+    CowWriter writer(options);
+    const int seq_len = 10;
+    uint32_t sequence[seq_len];
+    for (int i = 0; i < seq_len; i++) {
+        sequence[i] = i + 1;
+    }
+
+    ASSERT_TRUE(writer.Initialize(cow_->fd));
+
+    ASSERT_TRUE(writer.AddSequenceData(seq_len, sequence));
+    ASSERT_TRUE(writer.AddZeroBlocks(1, seq_len - 1));
+    ASSERT_TRUE(writer.Finalize());
+
+    ASSERT_EQ(lseek(cow_->fd, 0, SEEK_SET), 0);
+
+    CowReader reader;
+    ASSERT_FALSE(reader.Parse(cow_->fd));
+}
+
 TEST_F(CowTest, RevMergeOpItrTest) {
     CowOptions options;
     options.cluster_ops = 5;
diff --git a/fs_mgr/libsnapshot/cow_reader.cpp b/fs_mgr/libsnapshot/cow_reader.cpp
index af49c7d..ace6f59 100644
--- a/fs_mgr/libsnapshot/cow_reader.cpp
+++ b/fs_mgr/libsnapshot/cow_reader.cpp
@@ -413,6 +413,13 @@
         }
         block_map->insert({current_op.new_block, i});
     }
+    for (auto block : *merge_op_blocks) {
+        if (block_map->count(block) == 0) {
+            LOG(ERROR) << "Invalid Sequence Ops. Could not find Cow Op for new block " << block;
+            return false;
+        }
+    }
+
     if (merge_op_blocks->size() > header_.num_merge_ops) {
         num_ordered_ops_to_merge_ = merge_op_blocks->size() - header_.num_merge_ops;
     } else {
diff --git a/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h b/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h
index 15882b3..e60da31 100644
--- a/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h
+++ b/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h
@@ -38,7 +38,7 @@
 #include <libsnapshot/auto_device.h>
 #include <libsnapshot/return.h>
 #include <libsnapshot/snapshot_writer.h>
-#include <libsnapshot/snapuserd_client.h>
+#include <snapuserd/snapuserd_client.h>
 
 #ifndef FRIEND_TEST
 #define FRIEND_TEST(test_set_name, individual_test) \
diff --git a/fs_mgr/libsnapshot/snapuserd.rc b/fs_mgr/libsnapshot/snapuserd.rc
deleted file mode 100644
index 4bf34a2..0000000
--- a/fs_mgr/libsnapshot/snapuserd.rc
+++ /dev/null
@@ -1,7 +0,0 @@
-service snapuserd /system/bin/snapuserd
-    socket snapuserd stream 0660 system system
-    oneshot
-    disabled
-    user root
-    group root system
-    seclabel u:r:snapuserd:s0
diff --git a/fs_mgr/libsnapshot/snapuserd/Android.bp b/fs_mgr/libsnapshot/snapuserd/Android.bp
new file mode 100644
index 0000000..bc97afc
--- /dev/null
+++ b/fs_mgr/libsnapshot/snapuserd/Android.bp
@@ -0,0 +1,132 @@
+//
+// Copyright (C) 2018 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+package {
+    default_applicable_licenses: ["Android-Apache-2.0"],
+}
+
+cc_defaults {
+    name: "libsnapshot_snapuserd_defaults",
+    defaults: [
+        "fs_mgr_defaults",
+    ],
+    cflags: [
+        "-D_FILE_OFFSET_BITS=64",
+        "-Wall",
+        "-Werror",
+    ],
+    export_include_dirs: ["include"],
+    srcs: [
+        "snapuserd_client.cpp",
+    ],
+}
+
+cc_library_static {
+    name: "libsnapshot_snapuserd",
+    defaults: [
+        "libsnapshot_snapuserd_defaults",
+    ],
+    recovery_available: true,
+    static_libs: [
+        "libcutils_sockets",
+    ],
+    shared_libs: [
+        "libbase",
+        "liblog",
+    ],
+    ramdisk_available: true,
+}
+
+cc_defaults {
+    name: "snapuserd_defaults",
+    defaults: [
+        "fs_mgr_defaults",
+    ],
+    srcs: [
+        "snapuserd_server.cpp",
+        "snapuserd.cpp",
+        "snapuserd_daemon.cpp",
+        "snapuserd_worker.cpp",
+        "snapuserd_readahead.cpp",
+    ],
+
+    cflags: [
+        "-Wall",
+        "-Werror"
+    ],
+
+    static_libs: [
+        "libbase",
+        "libbrotli",
+        "libcutils_sockets",
+        "libdm",
+        "libfs_mgr",
+        "libgflags",
+        "liblog",
+        "libsnapshot_cow",
+        "libz",
+    ],
+}
+
+cc_binary {
+    name: "snapuserd",
+    defaults: ["snapuserd_defaults"],
+    init_rc: [
+        "snapuserd.rc",
+    ],
+    static_executable: true,
+    system_shared_libs: [],
+    ramdisk_available: true,
+    vendor_ramdisk_available: true,
+    recovery_available: true,
+}
+
+cc_test {
+    name: "cow_snapuserd_test",
+    defaults: [
+        "fs_mgr_defaults",
+    ],
+    srcs: [
+        "cow_snapuserd_test.cpp",
+        "snapuserd.cpp",
+        "snapuserd_worker.cpp",
+    ],
+    cflags: [
+        "-Wall",
+        "-Werror",
+    ],
+    shared_libs: [
+        "libbase",
+        "liblog",
+    ],
+    static_libs: [
+        "libbrotli",
+        "libgtest",
+        "libsnapshot_cow",
+        "libsnapshot_snapuserd",
+        "libcutils_sockets",
+        "libz",
+        "libfs_mgr",
+        "libdm",
+    ],
+    header_libs: [
+        "libstorage_literals_headers",
+        "libfiemap_headers",
+    ],
+    test_min_api_level: 30,
+    auto_gen_config: true,
+    require_root: false,
+}
diff --git a/fs_mgr/libsnapshot/snapuserd/OWNERS b/fs_mgr/libsnapshot/snapuserd/OWNERS
new file mode 100644
index 0000000..2df0a2d
--- /dev/null
+++ b/fs_mgr/libsnapshot/snapuserd/OWNERS
@@ -0,0 +1,3 @@
+akailash@google.com
+dvander@google.com
+drosen@google.com
diff --git a/fs_mgr/libsnapshot/cow_snapuserd_test.cpp b/fs_mgr/libsnapshot/snapuserd/cow_snapuserd_test.cpp
similarity index 83%
rename from fs_mgr/libsnapshot/cow_snapuserd_test.cpp
rename to fs_mgr/libsnapshot/snapuserd/cow_snapuserd_test.cpp
index 767cd04..a718328 100644
--- a/fs_mgr/libsnapshot/cow_snapuserd_test.cpp
+++ b/fs_mgr/libsnapshot/snapuserd/cow_snapuserd_test.cpp
@@ -33,7 +33,7 @@
 #include <libdm/dm.h>
 #include <libdm/loop_control.h>
 #include <libsnapshot/cow_writer.h>
-#include <libsnapshot/snapuserd_client.h>
+#include <snapuserd/snapuserd_client.h>
 #include <storage_literals/storage_literals.h>
 
 #include "snapuserd.h"
@@ -96,6 +96,8 @@
 class CowSnapuserdTest final {
   public:
     bool Setup();
+    bool SetupOrderedOps();
+    bool SetupOrderedOpsInverted();
     bool SetupCopyOverlap_1();
     bool SetupCopyOverlap_2();
     bool Merge();
@@ -103,6 +105,8 @@
     void ReadSnapshotDeviceAndValidate();
     void Shutdown();
     void MergeInterrupt();
+    void MergeInterruptFixed(int duration);
+    void MergeInterruptRandomly(int max_duration);
     void ReadDmUserBlockWithoutDaemon();
 
     std::string snapshot_dev() const { return snapshot_dev_->path(); }
@@ -117,6 +121,8 @@
     void StartMerge();
 
     void CreateCowDevice();
+    void CreateCowDeviceOrderedOps();
+    void CreateCowDeviceOrderedOpsInverted();
     void CreateCowDeviceWithCopyOverlap_1();
     void CreateCowDeviceWithCopyOverlap_2();
     bool SetupDaemon();
@@ -197,6 +203,18 @@
     return setup_ok_;
 }
 
+bool CowSnapuserdTest::SetupOrderedOps() {
+    CreateBaseDevice();
+    CreateCowDeviceOrderedOps();
+    return SetupDaemon();
+}
+
+bool CowSnapuserdTest::SetupOrderedOpsInverted() {
+    CreateBaseDevice();
+    CreateCowDeviceOrderedOpsInverted();
+    return SetupDaemon();
+}
+
 bool CowSnapuserdTest::SetupCopyOverlap_1() {
     CreateBaseDevice();
     CreateCowDeviceWithCopyOverlap_1();
@@ -383,6 +401,112 @@
               true);
 }
 
+void CowSnapuserdTest::CreateCowDeviceOrderedOpsInverted() {
+    unique_fd rnd_fd;
+    loff_t offset = 0;
+
+    std::string path = android::base::GetExecutableDirectory();
+    cow_system_ = std::make_unique<TemporaryFile>(path);
+
+    rnd_fd.reset(open("/dev/random", O_RDONLY));
+    ASSERT_TRUE(rnd_fd > 0);
+
+    std::unique_ptr<uint8_t[]> random_buffer_1_ = std::make_unique<uint8_t[]>(size_);
+
+    // Fill random data
+    for (size_t j = 0; j < (size_ / 1_MiB); j++) {
+        ASSERT_EQ(ReadFullyAtOffset(rnd_fd, (char*)random_buffer_1_.get() + offset, 1_MiB, 0),
+                  true);
+
+        offset += 1_MiB;
+    }
+
+    CowOptions options;
+    options.compression = "gz";
+    CowWriter writer(options);
+
+    ASSERT_TRUE(writer.Initialize(cow_system_->fd));
+
+    size_t num_blocks = size_ / options.block_size;
+    size_t blk_end_copy = num_blocks * 2;
+    size_t source_blk = num_blocks - 1;
+    size_t blk_src_copy = blk_end_copy - 1;
+
+    size_t x = num_blocks;
+    while (1) {
+        ASSERT_TRUE(writer.AddCopy(source_blk, blk_src_copy));
+        x -= 1;
+        if (x == 0) {
+            break;
+        }
+        source_blk -= 1;
+        blk_src_copy -= 1;
+    }
+
+    // Flush operations
+    ASSERT_TRUE(writer.Finalize());
+    // Construct the buffer required for validation
+    orig_buffer_ = std::make_unique<uint8_t[]>(total_base_size_);
+    // Read the entire base device
+    ASSERT_EQ(android::base::ReadFullyAtOffset(base_fd_, orig_buffer_.get(), total_base_size_, 0),
+              true);
+    // Merged Buffer
+    memmove(orig_buffer_.get(), (char*)orig_buffer_.get() + size_, size_);
+}
+
+void CowSnapuserdTest::CreateCowDeviceOrderedOps() {
+    unique_fd rnd_fd;
+    loff_t offset = 0;
+
+    std::string path = android::base::GetExecutableDirectory();
+    cow_system_ = std::make_unique<TemporaryFile>(path);
+
+    rnd_fd.reset(open("/dev/random", O_RDONLY));
+    ASSERT_TRUE(rnd_fd > 0);
+
+    std::unique_ptr<uint8_t[]> random_buffer_1_ = std::make_unique<uint8_t[]>(size_);
+
+    // Fill random data
+    for (size_t j = 0; j < (size_ / 1_MiB); j++) {
+        ASSERT_EQ(ReadFullyAtOffset(rnd_fd, (char*)random_buffer_1_.get() + offset, 1_MiB, 0),
+                  true);
+
+        offset += 1_MiB;
+    }
+
+    CowOptions options;
+    options.compression = "gz";
+    CowWriter writer(options);
+
+    ASSERT_TRUE(writer.Initialize(cow_system_->fd));
+
+    size_t num_blocks = size_ / options.block_size;
+    size_t x = num_blocks;
+    size_t source_blk = 0;
+    size_t blk_src_copy = num_blocks;
+
+    while (1) {
+        ASSERT_TRUE(writer.AddCopy(source_blk, blk_src_copy));
+
+        x -= 1;
+        if (x == 0) {
+            break;
+        }
+        source_blk += 1;
+        blk_src_copy += 1;
+    }
+
+    // Flush operations
+    ASSERT_TRUE(writer.Finalize());
+    // Construct the buffer required for validation
+    orig_buffer_ = std::make_unique<uint8_t[]>(total_base_size_);
+    // Read the entire base device
+    ASSERT_EQ(android::base::ReadFullyAtOffset(base_fd_, orig_buffer_.get(), total_base_size_, 0),
+              true);
+    // Merged Buffer
+    memmove(orig_buffer_.get(), (char*)orig_buffer_.get() + size_, size_);
+}
+
 void CowSnapuserdTest::CreateCowDevice() {
     unique_fd rnd_fd;
     loff_t offset = 0;
@@ -597,6 +721,7 @@
 
 void CowSnapuserdTest::SimulateDaemonRestart() {
     Shutdown();
+    std::this_thread::sleep_for(500ms);
     SetDeviceControlName();
     StartSnapuserdDaemon();
     InitCowDevice();
@@ -605,6 +730,34 @@
     CreateSnapshotDevice();
 }
 
+void CowSnapuserdTest::MergeInterruptRandomly(int max_duration) {
+    std::srand(std::time(nullptr));
+    StartMerge();
+
+    for (int i = 0; i < 20; i++) {
+        int duration = std::rand() % max_duration;
+        std::this_thread::sleep_for(std::chrono::milliseconds(duration));
+        SimulateDaemonRestart();
+        StartMerge();
+    }
+
+    SimulateDaemonRestart();
+    ASSERT_TRUE(Merge());
+}
+
+void CowSnapuserdTest::MergeInterruptFixed(int duration) {
+    StartMerge();
+
+    for (int i = 0; i < 25; i++) {
+        std::this_thread::sleep_for(std::chrono::milliseconds(duration));
+        SimulateDaemonRestart();
+        StartMerge();
+    }
+
+    SimulateDaemonRestart();
+    ASSERT_TRUE(Merge());
+}
+
 void CowSnapuserdTest::MergeInterrupt() {
     // Interrupt merge at various intervals
     StartMerge();
@@ -669,10 +822,9 @@
     void* buffer = snapuserd_->GetExceptionBuffer(1);
     loff_t offset = 0;
     struct disk_exception* de;
-    for (int i = 0; i < 12; i++) {
+    for (int i = 11; i >= 0; i--) {
         de = reinterpret_cast<struct disk_exception*>((char*)buffer + offset);
         ASSERT_EQ(de->old_chunk, i);
-        ASSERT_EQ(de->new_chunk, new_chunk);
         offset += sizeof(struct disk_exception);
         new_chunk += 1;
     }
@@ -811,71 +963,71 @@
 
             de = reinterpret_cast<struct disk_exception*>((char*)buffer + offset);
             ASSERT_EQ(de->old_chunk, 100);
-            ASSERT_EQ(de->new_chunk, 522);
+            ASSERT_EQ(de->new_chunk, 521);
             offset += sizeof(struct disk_exception);
 
             de = reinterpret_cast<struct disk_exception*>((char*)buffer + offset);
             ASSERT_EQ(de->old_chunk, 105);
-            ASSERT_EQ(de->new_chunk, 524);
+            ASSERT_EQ(de->new_chunk, 522);
             offset += sizeof(struct disk_exception);
 
             de = reinterpret_cast<struct disk_exception*>((char*)buffer + offset);
             ASSERT_EQ(de->old_chunk, 110);
-            ASSERT_EQ(de->new_chunk, 526);
+            ASSERT_EQ(de->new_chunk, 523);
             offset += sizeof(struct disk_exception);
 
             // The next 4 operations are batch merged as
             // both old and new chunk are contiguous
             de = reinterpret_cast<struct disk_exception*>((char*)buffer + offset);
-            ASSERT_EQ(de->old_chunk, 50);
-            ASSERT_EQ(de->new_chunk, 528);
-            offset += sizeof(struct disk_exception);
-
-            de = reinterpret_cast<struct disk_exception*>((char*)buffer + offset);
-            ASSERT_EQ(de->old_chunk, 51);
-            ASSERT_EQ(de->new_chunk, 529);
+            ASSERT_EQ(de->old_chunk, 53);
+            ASSERT_EQ(de->new_chunk, 524);
             offset += sizeof(struct disk_exception);
 
             de = reinterpret_cast<struct disk_exception*>((char*)buffer + offset);
             ASSERT_EQ(de->old_chunk, 52);
-            ASSERT_EQ(de->new_chunk, 530);
+            ASSERT_EQ(de->new_chunk, 525);
             offset += sizeof(struct disk_exception);
 
             de = reinterpret_cast<struct disk_exception*>((char*)buffer + offset);
-            ASSERT_EQ(de->old_chunk, 53);
-            ASSERT_EQ(de->new_chunk, 531);
+            ASSERT_EQ(de->old_chunk, 51);
+            ASSERT_EQ(de->new_chunk, 526);
+            offset += sizeof(struct disk_exception);
+
+            de = reinterpret_cast<struct disk_exception*>((char*)buffer + offset);
+            ASSERT_EQ(de->old_chunk, 50);
+            ASSERT_EQ(de->new_chunk, 527);
             offset += sizeof(struct disk_exception);
 
             // This is handling overlap operation with
             // two batch merge operations.
             de = reinterpret_cast<struct disk_exception*>((char*)buffer + offset);
             ASSERT_EQ(de->old_chunk, 18);
-            ASSERT_EQ(de->new_chunk, 533);
+            ASSERT_EQ(de->new_chunk, 528);
             offset += sizeof(struct disk_exception);
 
             de = reinterpret_cast<struct disk_exception*>((char*)buffer + offset);
             ASSERT_EQ(de->old_chunk, 19);
-            ASSERT_EQ(de->new_chunk, 534);
+            ASSERT_EQ(de->new_chunk, 529);
             offset += sizeof(struct disk_exception);
 
             de = reinterpret_cast<struct disk_exception*>((char*)buffer + offset);
             ASSERT_EQ(de->old_chunk, 20);
-            ASSERT_EQ(de->new_chunk, 535);
+            ASSERT_EQ(de->new_chunk, 530);
             offset += sizeof(struct disk_exception);
 
             de = reinterpret_cast<struct disk_exception*>((char*)buffer + offset);
             ASSERT_EQ(de->old_chunk, 21);
-            ASSERT_EQ(de->new_chunk, 537);
+            ASSERT_EQ(de->new_chunk, 532);
             offset += sizeof(struct disk_exception);
 
             de = reinterpret_cast<struct disk_exception*>((char*)buffer + offset);
             ASSERT_EQ(de->old_chunk, 22);
-            ASSERT_EQ(de->new_chunk, 538);
+            ASSERT_EQ(de->new_chunk, 533);
             offset += sizeof(struct disk_exception);
 
             de = reinterpret_cast<struct disk_exception*>((char*)buffer + offset);
             ASSERT_EQ(de->old_chunk, 23);
-            ASSERT_EQ(de->new_chunk, 539);
+            ASSERT_EQ(de->new_chunk, 534);
             offset += sizeof(struct disk_exception);
 
             // End of metadata
@@ -945,6 +1097,38 @@
     harness.ReadDmUserBlockWithoutDaemon();
 }
 
+TEST(Snapuserd_Test, Snapshot_Merge_Crash_Fixed_Ordered) {
+    CowSnapuserdTest harness;
+    ASSERT_TRUE(harness.SetupOrderedOps());
+    harness.MergeInterruptFixed(300);
+    harness.ValidateMerge();
+    harness.Shutdown();
+}
+
+TEST(Snapuserd_Test, Snapshot_Merge_Crash_Random_Ordered) {
+    CowSnapuserdTest harness;
+    ASSERT_TRUE(harness.SetupOrderedOps());
+    harness.MergeInterruptRandomly(500);
+    harness.ValidateMerge();
+    harness.Shutdown();
+}
+
+TEST(Snapuserd_Test, Snapshot_Merge_Crash_Fixed_Inverted) {
+    CowSnapuserdTest harness;
+    ASSERT_TRUE(harness.SetupOrderedOpsInverted());
+    harness.MergeInterruptFixed(50);
+    harness.ValidateMerge();
+    harness.Shutdown();
+}
+
+TEST(Snapuserd_Test, Snapshot_Merge_Crash_Random_Inverted) {
+    CowSnapuserdTest harness;
+    ASSERT_TRUE(harness.SetupOrderedOpsInverted());
+    harness.MergeInterruptRandomly(50);
+    harness.ValidateMerge();
+    harness.Shutdown();
+}
+
 }  // namespace snapshot
 }  // namespace android
 
diff --git a/fs_mgr/libsnapshot/include/libsnapshot/snapuserd_client.h b/fs_mgr/libsnapshot/snapuserd/include/snapuserd/snapuserd_client.h
similarity index 92%
rename from fs_mgr/libsnapshot/include/libsnapshot/snapuserd_client.h
rename to fs_mgr/libsnapshot/snapuserd/include/snapuserd/snapuserd_client.h
index 280e857..aeecf41 100644
--- a/fs_mgr/libsnapshot/include/libsnapshot/snapuserd_client.h
+++ b/fs_mgr/libsnapshot/snapuserd/include/snapuserd/snapuserd_client.h
@@ -31,6 +31,7 @@
 static constexpr uint32_t PACKET_SIZE = 512;
 
 static constexpr char kSnapuserdSocket[] = "snapuserd";
+static constexpr char kSnapuserdSocketProxy[] = "snapuserd_proxy";
 
 // Ensure that the second-stage daemon for snapuserd is running.
 bool EnsureSnapuserdStarted();
@@ -75,6 +76,9 @@
     // snapuserd to gracefully exit once all handler threads have terminated.
     // This should only be used on first-stage instances of snapuserd.
     bool DetachSnapuserd();
+
+    // Returns true if the snapuserd instance supports bridging a socket to second-stage init.
+    bool SupportsSecondStageSocketHandoff();
 };
 
 }  // namespace snapshot
diff --git a/fs_mgr/libsnapshot/include/libsnapshot/snapuserd_kernel.h b/fs_mgr/libsnapshot/snapuserd/include/snapuserd/snapuserd_kernel.h
similarity index 100%
rename from fs_mgr/libsnapshot/include/libsnapshot/snapuserd_kernel.h
rename to fs_mgr/libsnapshot/snapuserd/include/snapuserd/snapuserd_kernel.h
diff --git a/fs_mgr/libsnapshot/snapuserd.cpp b/fs_mgr/libsnapshot/snapuserd/snapuserd.cpp
similarity index 88%
rename from fs_mgr/libsnapshot/snapuserd.cpp
rename to fs_mgr/libsnapshot/snapuserd/snapuserd.cpp
index a09b111..31d0221 100644
--- a/fs_mgr/libsnapshot/snapuserd.cpp
+++ b/fs_mgr/libsnapshot/snapuserd/snapuserd.cpp
@@ -20,7 +20,7 @@
 #include <optional>
 #include <set>
 
-#include <libsnapshot/snapuserd_client.h>
+#include <snapuserd/snapuserd_client.h>
 
 namespace android {
 namespace snapshot {
@@ -405,7 +405,6 @@
         de->old_chunk = cow_op->new_block;
         de->new_chunk = data_chunk_id;
 
-
         // Store operation pointer.
         chunk_vec_.push_back(std::make_pair(ChunkToSector(data_chunk_id), cow_op));
         num_ops += 1;
@@ -437,8 +436,9 @@
 
     int num_ra_ops_per_iter = ((GetBufferDataSize()) / BLOCK_SZ);
     std::optional<chunk_t> prev_id = {};
-    std::map<uint64_t, const CowOperation*> map;
+    std::vector<const CowOperation*> vec;
     std::set<uint64_t> dest_blocks;
+    std::set<uint64_t> source_blocks;
     size_t pending_copy_ops = exceptions_per_area_ - num_ops;
     uint64_t total_copy_ops = reader_->get_num_ordered_ops_to_merge();
 
@@ -492,99 +492,45 @@
             // scratch space and re-construct it thereby there
             // is no loss of data.
             //
+            // Note that we will follow the same order of COW operations
+            // as present in the COW file. This will make sure that
+            // the merge of operations are done based on the ops present
+            // in the file.
             //===========================================================
-            //
-            // Case 2:
-            //
-            // Let's say we have three copy operations written to COW file
-            // in the following order:
-            //
-            // op-1: 15 -> 18
-            // op-2: 16 -> 19
-            // op-3: 17 -> 20
-            //
-            // As aforementioned, kernel will initiate merge in reverse order.
-            // Hence, we will read these ops in reverse order so that all these
-            // ops are exectued in the same order as requested. Thus, we will
-            // read the metadata in reverse order and for the kernel it will
-            // look like:
-            //
-            // op-3: 17 -> 20
-            // op-2: 16 -> 19
-            // op-1: 15 -> 18   <-- Merge starts here in the kernel
-            //
-            // Now, this is problematic as kernel cannot batch merge them.
-            //
-            // Merge sequence will look like:
-            //
-            // Merge-1: op-1: 15 -> 18
-            // Merge-2: op-2: 16 -> 19
-            // Merge-3: op-3: 17 -> 20
-            //
-            // We have three merge operations.
-            //
-            // Even though the blocks are contiguous, kernel can batch merge
-            // them if the blocks are in descending order. Update engine
-            // addresses this issue partially for overlapping operations as
-            // we see that op-1 to op-3 and op-4 to op-6 operatiosn are in
-            // descending order. However, if the copy operations are not
-            // overlapping, update engine cannot write these blocks
-            // in descending order. Hence, we will try to address it.
-            // Thus, we will send these blocks to the kernel and it will
-            // look like:
-            //
-            // op-3: 15 -> 18
-            // op-2: 16 -> 19
-            // op-1: 17 -> 20  <-- Merge starts here in the kernel
-            //
-            // Now with this change, we can batch merge all these three
-            // operations. Merge sequence will look like:
-            //
-            // Merge-1: {op-1: 17 -> 20, op-2: 16 -> 19, op-3: 15 -> 18}
-            //
-            // Note that we have changed the ordering of merge; However, this
-            // is ok as each of these copy operations are independent and there
-            // is no overlap.
-            //
-            //===================================================================
             if (prev_id.has_value()) {
-                chunk_t diff = (cow_op->new_block > prev_id.value())
-                                       ? (cow_op->new_block - prev_id.value())
-                                       : (prev_id.value() - cow_op->new_block);
-                if (diff != 1) {
-                    break;
-                }
-
-                if (dest_blocks.count(cow_op->new_block) || map.count(cow_op->source) > 0) {
+                if (dest_blocks.count(cow_op->new_block) || source_blocks.count(cow_op->source)) {
                     break;
                 }
             }
             metadata_found = true;
             pending_copy_ops -= 1;
-            map[cow_op->new_block] = cow_op;
+            vec.push_back(cow_op);
             dest_blocks.insert(cow_op->source);
+            source_blocks.insert(cow_op->new_block);
             prev_id = cow_op->new_block;
             cowop_rm_iter->Next();
         } while (!cowop_rm_iter->Done() && pending_copy_ops);
 
         data_chunk_id = GetNextAllocatableChunkId(data_chunk_id);
-        SNAP_LOG(DEBUG) << "Batch Merge copy-ops of size: " << map.size()
+        SNAP_LOG(DEBUG) << "Batch Merge copy-ops of size: " << vec.size()
                         << " Area: " << vec_.size() << " Area offset: " << offset
                         << " Pending-copy-ops in this area: " << pending_copy_ops;
 
-        for (auto it = map.begin(); it != map.end(); it++) {
+        for (size_t i = 0; i < vec.size(); i++) {
             struct disk_exception* de =
                     reinterpret_cast<struct disk_exception*>((char*)de_ptr.get() + offset);
-            de->old_chunk = it->first;
+            const CowOperation* cow_op = vec[i];
+
+            de->old_chunk = cow_op->new_block;
             de->new_chunk = data_chunk_id;
 
             // Store operation pointer.
-            chunk_vec_.push_back(std::make_pair(ChunkToSector(data_chunk_id), it->second));
+            chunk_vec_.push_back(std::make_pair(ChunkToSector(data_chunk_id), cow_op));
             offset += sizeof(struct disk_exception);
             num_ops += 1;
             copy_ops++;
             if (read_ahead_feature_) {
-                read_ahead_ops_.push_back(it->second);
+                read_ahead_ops_.push_back(cow_op);
             }
 
             SNAP_LOG(DEBUG) << num_ops << ":"
@@ -627,8 +573,9 @@
                 data_chunk_id = GetNextAllocatableChunkId(data_chunk_id);
             }
         }
-        map.clear();
+        vec.clear();
         dest_blocks.clear();
+        source_blocks.clear();
         prev_id.reset();
     }
 
diff --git a/fs_mgr/libsnapshot/snapuserd.h b/fs_mgr/libsnapshot/snapuserd/snapuserd.h
similarity index 99%
rename from fs_mgr/libsnapshot/snapuserd.h
rename to fs_mgr/libsnapshot/snapuserd/snapuserd.h
index 5d86e4f..95d2f77 100644
--- a/fs_mgr/libsnapshot/snapuserd.h
+++ b/fs_mgr/libsnapshot/snapuserd/snapuserd.h
@@ -41,7 +41,7 @@
 #include <libdm/dm.h>
 #include <libsnapshot/cow_reader.h>
 #include <libsnapshot/cow_writer.h>
-#include <libsnapshot/snapuserd_kernel.h>
+#include <snapuserd/snapuserd_kernel.h>
 
 namespace android {
 namespace snapshot {
diff --git a/fs_mgr/libsnapshot/snapuserd/snapuserd.rc b/fs_mgr/libsnapshot/snapuserd/snapuserd.rc
new file mode 100644
index 0000000..02fda8d
--- /dev/null
+++ b/fs_mgr/libsnapshot/snapuserd/snapuserd.rc
@@ -0,0 +1,16 @@
+service snapuserd /system/bin/snapuserd
+    socket snapuserd stream 0660 system system
+    oneshot
+    disabled
+    user root
+    group root system
+    seclabel u:r:snapuserd:s0
+
+service snapuserd_proxy /system/bin/snapuserd -socket-handoff
+    socket snapuserd stream 0660 system system
+    socket snapuserd_proxy seqpacket 0660 system root
+    oneshot
+    disabled
+    user root
+    group root system
+    seclabel u:r:snapuserd:s0
diff --git a/fs_mgr/libsnapshot/snapuserd_client.cpp b/fs_mgr/libsnapshot/snapuserd/snapuserd_client.cpp
similarity index 94%
rename from fs_mgr/libsnapshot/snapuserd_client.cpp
rename to fs_mgr/libsnapshot/snapuserd/snapuserd_client.cpp
index 41ab344..81e9228 100644
--- a/fs_mgr/libsnapshot/snapuserd_client.cpp
+++ b/fs_mgr/libsnapshot/snapuserd/snapuserd_client.cpp
@@ -33,7 +33,7 @@
 #include <android-base/parseint.h>
 #include <android-base/properties.h>
 #include <android-base/strings.h>
-#include <libsnapshot/snapuserd_client.h>
+#include <snapuserd/snapuserd_client.h>
 
 namespace android {
 namespace snapshot {
@@ -141,6 +141,16 @@
     return true;
 }
 
+bool SnapuserdClient::SupportsSecondStageSocketHandoff() {
+    std::string msg = "supports,second_stage_socket_handoff";
+    if (!Sendmsg(msg)) {
+        LOG(ERROR) << "Failed to send message " << msg << " to snapuserd";
+        return false;
+    }
+    std::string response = Receivemsg();
+    return response == "success";
+}
+
 std::string SnapuserdClient::Receivemsg() {
     char msg[PACKET_SIZE];
     ssize_t ret = TEMP_FAILURE_RETRY(recv(sockfd_, msg, sizeof(msg), 0));
diff --git a/fs_mgr/libsnapshot/snapuserd_daemon.cpp b/fs_mgr/libsnapshot/snapuserd/snapuserd_daemon.cpp
similarity index 82%
rename from fs_mgr/libsnapshot/snapuserd_daemon.cpp
rename to fs_mgr/libsnapshot/snapuserd/snapuserd_daemon.cpp
index 7fa01b7..e05822e 100644
--- a/fs_mgr/libsnapshot/snapuserd_daemon.cpp
+++ b/fs_mgr/libsnapshot/snapuserd/snapuserd_daemon.cpp
@@ -19,13 +19,15 @@
 #include <android-base/logging.h>
 #include <android-base/strings.h>
 #include <gflags/gflags.h>
-#include <libsnapshot/snapuserd_client.h>
+#include <snapuserd/snapuserd_client.h>
 
 #include "snapuserd_server.h"
 
 DEFINE_string(socket, android::snapshot::kSnapuserdSocket, "Named socket or socket path.");
 DEFINE_bool(no_socket, false,
             "If true, no socket is used. Each additional argument is an INIT message.");
+DEFINE_bool(socket_handoff, false,
+            "If true, perform a socket hand-off with an existing snapuserd instance, then exit.");
 
 namespace android {
 namespace snapshot {
@@ -33,8 +35,28 @@
 bool Daemon::StartServer(int argc, char** argv) {
     int arg_start = gflags::ParseCommandLineFlags(&argc, &argv, true);
 
+    sigfillset(&signal_mask_);
+    sigdelset(&signal_mask_, SIGINT);
+    sigdelset(&signal_mask_, SIGTERM);
+    sigdelset(&signal_mask_, SIGUSR1);
+
+    // Masking signals here ensure that after this point, we won't handle INT/TERM
+    // until after we call into ppoll()
+    signal(SIGINT, Daemon::SignalHandler);
+    signal(SIGTERM, Daemon::SignalHandler);
+    signal(SIGPIPE, Daemon::SignalHandler);
+    signal(SIGUSR1, Daemon::SignalHandler);
+
+    MaskAllSignalsExceptIntAndTerm();
+
+    if (FLAGS_socket_handoff) {
+        return server_.RunForSocketHandoff();
+    }
     if (!FLAGS_no_socket) {
-        return server_.Start(FLAGS_socket);
+        if (!server_.Start(FLAGS_socket)) {
+            return false;
+        }
+        return server_.Run();
     }
 
     for (int i = arg_start; i < argc; i++) {
@@ -51,8 +73,7 @@
 
     // Skip the accept() call to avoid spurious log spam. The server will still
     // run until all handlers have completed.
-    server_.SetTerminating();
-    return true;
+    return server_.WaitForSocket();
 }
 
 void Daemon::MaskAllSignalsExceptIntAndTerm() {
@@ -61,6 +82,7 @@
     sigdelset(&signal_mask, SIGINT);
     sigdelset(&signal_mask, SIGTERM);
     sigdelset(&signal_mask, SIGPIPE);
+    sigdelset(&signal_mask, SIGUSR1);
     if (sigprocmask(SIG_SETMASK, &signal_mask, NULL) != 0) {
         PLOG(ERROR) << "Failed to set sigprocmask";
     }
@@ -74,28 +96,14 @@
     }
 }
 
-void Daemon::Run() {
-    sigfillset(&signal_mask_);
-    sigdelset(&signal_mask_, SIGINT);
-    sigdelset(&signal_mask_, SIGTERM);
-
-    // Masking signals here ensure that after this point, we won't handle INT/TERM
-    // until after we call into ppoll()
-    signal(SIGINT, Daemon::SignalHandler);
-    signal(SIGTERM, Daemon::SignalHandler);
-    signal(SIGPIPE, Daemon::SignalHandler);
-
-    LOG(DEBUG) << "Snapuserd-server: ready to accept connections";
-
-    MaskAllSignalsExceptIntAndTerm();
-
-    server_.Run();
-}
-
 void Daemon::Interrupt() {
     server_.Interrupt();
 }
 
+void Daemon::ReceivedSocketSignal() {
+    server_.ReceivedSocketSignal();
+}
+
 void Daemon::SignalHandler(int signal) {
     LOG(DEBUG) << "Snapuserd received signal: " << signal;
     switch (signal) {
@@ -108,6 +116,11 @@
             LOG(ERROR) << "Received SIGPIPE signal";
             break;
         }
+        case SIGUSR1: {
+            LOG(INFO) << "Received SIGUSR1, attaching to proxy socket";
+            Daemon::Instance().ReceivedSocketSignal();
+            break;
+        }
         default:
             LOG(ERROR) << "Received unknown signal " << signal;
             break;
@@ -126,7 +139,5 @@
         LOG(ERROR) << "Snapuserd daemon failed to start.";
         exit(EXIT_FAILURE);
     }
-    daemon.Run();
-
     return 0;
 }
diff --git a/fs_mgr/libsnapshot/snapuserd_daemon.h b/fs_mgr/libsnapshot/snapuserd/snapuserd_daemon.h
similarity index 97%
rename from fs_mgr/libsnapshot/snapuserd_daemon.h
rename to fs_mgr/libsnapshot/snapuserd/snapuserd_daemon.h
index f8afac5..b660ba2 100644
--- a/fs_mgr/libsnapshot/snapuserd_daemon.h
+++ b/fs_mgr/libsnapshot/snapuserd/snapuserd_daemon.h
@@ -36,8 +36,8 @@
     }
 
     bool StartServer(int argc, char** argv);
-    void Run();
     void Interrupt();
+    void ReceivedSocketSignal();
 
   private:
     // Signal mask used with ppoll()
diff --git a/fs_mgr/libsnapshot/snapuserd_readahead.cpp b/fs_mgr/libsnapshot/snapuserd/snapuserd_readahead.cpp
similarity index 99%
rename from fs_mgr/libsnapshot/snapuserd_readahead.cpp
rename to fs_mgr/libsnapshot/snapuserd/snapuserd_readahead.cpp
index 16d5919..6fc26a6 100644
--- a/fs_mgr/libsnapshot/snapuserd_readahead.cpp
+++ b/fs_mgr/libsnapshot/snapuserd/snapuserd_readahead.cpp
@@ -20,7 +20,7 @@
 #include <optional>
 #include <set>
 
-#include <libsnapshot/snapuserd_client.h>
+#include <snapuserd/snapuserd_client.h>
 
 namespace android {
 namespace snapshot {
diff --git a/fs_mgr/libsnapshot/snapuserd_server.cpp b/fs_mgr/libsnapshot/snapuserd/snapuserd_server.cpp
similarity index 74%
rename from fs_mgr/libsnapshot/snapuserd_server.cpp
rename to fs_mgr/libsnapshot/snapuserd/snapuserd_server.cpp
index 8339690..a29b19b 100644
--- a/fs_mgr/libsnapshot/snapuserd_server.cpp
+++ b/fs_mgr/libsnapshot/snapuserd/snapuserd_server.cpp
@@ -25,14 +25,26 @@
 #include <sys/types.h>
 #include <unistd.h>
 
+#include <android-base/cmsg.h>
 #include <android-base/logging.h>
-
+#include <android-base/properties.h>
+#include <android-base/scopeguard.h>
+#include <fs_mgr/file_wait.h>
+#include <snapuserd/snapuserd_client.h>
 #include "snapuserd.h"
 #include "snapuserd_server.h"
 
+#define _REALLY_INCLUDE_SYS__SYSTEM_PROPERTIES_H_
+#include <sys/_system_properties.h>
+
 namespace android {
 namespace snapshot {
 
+using namespace std::string_literals;
+
+using android::base::borrowed_fd;
+using android::base::unique_fd;
+
 DaemonOperations SnapuserdServer::Resolveop(std::string& input) {
     if (input == "init") return DaemonOperations::INIT;
     if (input == "start") return DaemonOperations::START;
@@ -40,6 +52,7 @@
     if (input == "query") return DaemonOperations::QUERY;
     if (input == "delete") return DaemonOperations::DELETE;
     if (input == "detach") return DaemonOperations::DETACH;
+    if (input == "supports") return DaemonOperations::SUPPORTS;
 
     return DaemonOperations::INVALID;
 }
@@ -193,6 +206,16 @@
             terminating_ = true;
             return true;
         }
+        case DaemonOperations::SUPPORTS: {
+            if (out.size() != 2) {
+                LOG(ERROR) << "Malformed supports message, " << out.size() << " parts";
+                return Sendmsg(fd, "fail");
+            }
+            if (out[1] == "second_stage_socket_handoff") {
+                return Sendmsg(fd, "success");
+            }
+            return Sendmsg(fd, "fail");
+        }
         default: {
             LOG(ERROR) << "Received unknown message type from client";
             Sendmsg(fd, "fail");
@@ -245,28 +268,36 @@
 }
 
 bool SnapuserdServer::Start(const std::string& socketname) {
+    bool start_listening = true;
+
     sockfd_.reset(android_get_control_socket(socketname.c_str()));
-    if (sockfd_ >= 0) {
-        if (listen(sockfd_.get(), 4) < 0) {
-            PLOG(ERROR) << "listen socket failed: " << socketname;
-            return false;
-        }
-    } else {
+    if (sockfd_ < 0) {
         sockfd_.reset(socket_local_server(socketname.c_str(), ANDROID_SOCKET_NAMESPACE_RESERVED,
                                           SOCK_STREAM));
         if (sockfd_ < 0) {
             PLOG(ERROR) << "Failed to create server socket " << socketname;
             return false;
         }
+        start_listening = false;
+    }
+    return StartWithSocket(start_listening);
+}
+
+bool SnapuserdServer::StartWithSocket(bool start_listening) {
+    if (start_listening && listen(sockfd_.get(), 4) < 0) {
+        PLOG(ERROR) << "listen socket failed";
+        return false;
     }
 
-    AddWatchedFd(sockfd_);
+    AddWatchedFd(sockfd_, POLLIN);
 
-    LOG(DEBUG) << "Snapuserd server successfully started with socket name " << socketname;
+    LOG(DEBUG) << "Snapuserd server now accepting connections";
     return true;
 }
 
 bool SnapuserdServer::Run() {
+    LOG(INFO) << "Now listening on snapuserd socket";
+
     while (!IsTerminating()) {
         int rv = TEMP_FAILURE_RETRY(poll(watched_fds_.data(), watched_fds_.size(), -1));
         if (rv < 0) {
@@ -311,10 +342,10 @@
     }
 }
 
-void SnapuserdServer::AddWatchedFd(android::base::borrowed_fd fd) {
+void SnapuserdServer::AddWatchedFd(android::base::borrowed_fd fd, int events) {
     struct pollfd p = {};
     p.fd = fd.get();
-    p.events = POLLIN;
+    p.events = events;
     watched_fds_.emplace_back(std::move(p));
 }
 
@@ -325,7 +356,7 @@
         return;
     }
 
-    AddWatchedFd(fd);
+    AddWatchedFd(fd, POLLIN);
 }
 
 bool SnapuserdServer::HandleClient(android::base::borrowed_fd fd, int revents) {
@@ -422,5 +453,97 @@
     return true;
 }
 
+bool SnapuserdServer::WaitForSocket() {
+    auto scope_guard = android::base::make_scope_guard([this]() -> void { JoinAllThreads(); });
+
+    auto socket_path = ANDROID_SOCKET_DIR "/"s + kSnapuserdSocketProxy;
+
+    if (!android::fs_mgr::WaitForFile(socket_path, std::chrono::milliseconds::max())) {
+        LOG(ERROR)
+                << "Failed to wait for proxy socket, second-stage snapuserd will fail to connect";
+        return false;
+    }
+
+    // We must re-initialize property service access, since we launched before
+    // second-stage init.
+    __system_properties_init();
+
+    if (!android::base::WaitForProperty("snapuserd.proxy_ready", "true")) {
+        LOG(ERROR)
+                << "Failed to wait for proxy property, second-stage snapuserd will fail to connect";
+        return false;
+    }
+
+    unique_fd fd(socket_local_client(kSnapuserdSocketProxy, ANDROID_SOCKET_NAMESPACE_RESERVED,
+                                     SOCK_SEQPACKET));
+    if (fd < 0) {
+        PLOG(ERROR) << "Failed to connect to socket proxy";
+        return false;
+    }
+
+    char code[1];
+    std::vector<unique_fd> fds;
+    ssize_t rv = android::base::ReceiveFileDescriptorVector(fd, code, sizeof(code), 1, &fds);
+    if (rv < 0) {
+        PLOG(ERROR) << "Failed to receive server socket over proxy";
+        return false;
+    }
+    if (fds.empty()) {
+        LOG(ERROR) << "Expected at least one file descriptor from proxy";
+        return false;
+    }
+
+    // We don't care if the ACK is received.
+    code[0] = 'a';
+    if (TEMP_FAILURE_RETRY(send(fd, code, sizeof(code), MSG_NOSIGNAL) < 0)) {
+        PLOG(ERROR) << "Failed to send ACK to proxy";
+        return false;
+    }
+
+    sockfd_ = std::move(fds[0]);
+    if (!StartWithSocket(true)) {
+        return false;
+    }
+    return Run();
+}
+
+bool SnapuserdServer::RunForSocketHandoff() {
+    unique_fd proxy_fd(android_get_control_socket(kSnapuserdSocketProxy));
+    if (proxy_fd < 0) {
+        PLOG(FATAL) << "Proxy could not get android control socket " << kSnapuserdSocketProxy;
+    }
+    borrowed_fd server_fd(android_get_control_socket(kSnapuserdSocket));
+    if (server_fd < 0) {
+        PLOG(FATAL) << "Proxy could not get android control socket " << kSnapuserdSocket;
+    }
+
+    if (listen(proxy_fd.get(), 4) < 0) {
+        PLOG(FATAL) << "Proxy listen socket failed";
+    }
+
+    if (!android::base::SetProperty("snapuserd.proxy_ready", "true")) {
+        LOG(FATAL) << "Proxy failed to set ready property";
+    }
+
+    unique_fd client_fd(
+            TEMP_FAILURE_RETRY(accept4(proxy_fd.get(), nullptr, nullptr, SOCK_CLOEXEC)));
+    if (client_fd < 0) {
+        PLOG(FATAL) << "Proxy accept failed";
+    }
+
+    char code[1] = {'a'};
+    std::vector<int> fds = {server_fd.get()};
+    ssize_t rv = android::base::SendFileDescriptorVector(client_fd, code, sizeof(code), fds);
+    if (rv < 0) {
+        PLOG(FATAL) << "Proxy could not send file descriptor to snapuserd";
+    }
+    // Wait for an ACK - results don't matter, we just don't want to risk closing
+    // the proxy socket too early.
+    if (recv(client_fd, code, sizeof(code), 0) < 0) {
+        PLOG(FATAL) << "Proxy could not receive terminating code from snapuserd";
+    }
+    return true;
+}
+
 }  // namespace snapshot
 }  // namespace android
diff --git a/fs_mgr/libsnapshot/snapuserd_server.h b/fs_mgr/libsnapshot/snapuserd/snapuserd_server.h
similarity index 92%
rename from fs_mgr/libsnapshot/snapuserd_server.h
rename to fs_mgr/libsnapshot/snapuserd/snapuserd_server.h
index 6699189..846f848 100644
--- a/fs_mgr/libsnapshot/snapuserd_server.h
+++ b/fs_mgr/libsnapshot/snapuserd/snapuserd_server.h
@@ -42,6 +42,7 @@
     STOP,
     DELETE,
     DETACH,
+    SUPPORTS,
     INVALID,
 };
 
@@ -93,6 +94,7 @@
   private:
     android::base::unique_fd sockfd_;
     bool terminating_;
+    volatile bool received_socket_signal_ = false;
     std::vector<struct pollfd> watched_fds_;
 
     std::mutex lock_;
@@ -100,7 +102,7 @@
     using HandlerList = std::vector<std::shared_ptr<DmUserHandler>>;
     HandlerList dm_users_;
 
-    void AddWatchedFd(android::base::borrowed_fd fd);
+    void AddWatchedFd(android::base::borrowed_fd fd, int events);
     void AcceptClient();
     bool HandleClient(android::base::borrowed_fd fd, int revents);
     bool Recv(android::base::borrowed_fd fd, std::string* data);
@@ -117,6 +119,7 @@
 
     void RunThread(std::shared_ptr<DmUserHandler> handler);
     void JoinAllThreads();
+    bool StartWithSocket(bool start_listening);
 
     // Find a DmUserHandler within a lock.
     HandlerList::iterator FindHandler(std::lock_guard<std::mutex>* proof_of_lock,
@@ -129,6 +132,8 @@
     bool Start(const std::string& socketname);
     bool Run();
     void Interrupt();
+    bool RunForSocketHandoff();
+    bool WaitForSocket();
 
     std::shared_ptr<DmUserHandler> AddHandler(const std::string& misc_name,
                                               const std::string& cow_device_path,
@@ -136,6 +141,7 @@
     bool StartHandler(const std::shared_ptr<DmUserHandler>& handler);
 
     void SetTerminating() { terminating_ = true; }
+    void ReceivedSocketSignal() { received_socket_signal_ = true; }
 };
 
 }  // namespace snapshot
diff --git a/fs_mgr/libsnapshot/snapuserd_worker.cpp b/fs_mgr/libsnapshot/snapuserd/snapuserd_worker.cpp
similarity index 99%
rename from fs_mgr/libsnapshot/snapuserd_worker.cpp
rename to fs_mgr/libsnapshot/snapuserd/snapuserd_worker.cpp
index 682f9da..13d45fe 100644
--- a/fs_mgr/libsnapshot/snapuserd_worker.cpp
+++ b/fs_mgr/libsnapshot/snapuserd/snapuserd_worker.cpp
@@ -20,7 +20,7 @@
 #include <optional>
 #include <set>
 
-#include <libsnapshot/snapuserd_client.h>
+#include <snapuserd/snapuserd_client.h>
 
 namespace android {
 namespace snapshot {
diff --git a/init/Android.bp b/init/Android.bp
index 3e8d4e3..a04d2db 100644
--- a/init/Android.bp
+++ b/init/Android.bp
@@ -228,17 +228,19 @@
     stem: "init",
     defaults: ["init_defaults"],
     static_libs: ["libinit"],
-    required: [
-        "e2fsdroid",
-        "init.rc",
-        "mke2fs",
-        "sload_f2fs",
-        "make_f2fs",
-        "ueventd.rc",
-    ],
     srcs: ["main.cpp"],
     symlinks: ["ueventd"],
     target: {
+        platform: {
+            required: [
+                "init.rc",
+                "ueventd.rc",
+                "e2fsdroid",
+                "make_f2fs",
+                "mke2fs",
+                "sload_f2fs",
+            ],
+        },
         recovery: {
             cflags: ["-DRECOVERY"],
             exclude_static_libs: [
@@ -248,6 +250,14 @@
                 "libbinder",
                 "libutils",
             ],
+            required: [
+                "init_recovery.rc",
+                "ueventd.rc.recovery",
+                "e2fsdroid.recovery",
+                "make_f2fs.recovery",
+                "mke2fs.recovery",
+                "sload_f2fs.recovery",
+            ],
         },
     },
     visibility: ["//packages/modules/Virtualization/microdroid"],
diff --git a/init/init.cpp b/init/init.cpp
index a7325ca..bde8e04 100644
--- a/init/init.cpp
+++ b/init/init.cpp
@@ -725,6 +725,40 @@
     }
 }
 
+static Result<void> ConnectEarlyStageSnapuserdAction(const BuiltinArguments& args) {
+    auto pid = GetSnapuserdFirstStagePid();
+    if (!pid) {
+        return {};
+    }
+
+    auto info = GetSnapuserdFirstStageInfo();
+    if (auto iter = std::find(info.begin(), info.end(), "socket"s); iter == info.end()) {
+        // snapuserd does not support socket handoff, so exit early.
+        return {};
+    }
+
+    // Socket handoff is supported.
+    auto svc = ServiceList::GetInstance().FindService("snapuserd");
+    if (!svc) {
+        LOG(FATAL) << "Failed to find snapuserd service entry";
+    }
+
+    svc->SetShutdownCritical();
+    svc->SetStartedInFirstStage(*pid);
+
+    svc = ServiceList::GetInstance().FindService("snapuserd_proxy");
+    if (!svc) {
+        LOG(FATAL) << "Failed find snapuserd_proxy service entry, merge will never initiate";
+    }
+    if (!svc->MarkSocketPersistent("snapuserd")) {
+        LOG(FATAL) << "Could not find snapuserd socket in snapuserd_proxy service entry";
+    }
+    if (auto result = svc->Start(); !result.ok()) {
+        LOG(FATAL) << "Could not start snapuserd_proxy: " << result.error();
+    }
+    return {};
+}
+
 int SecondStageMain(int argc, char** argv) {
     if (REBOOT_BOOTLOADER_ON_PANIC) {
         InstallRebootSignalHandlers();
@@ -852,6 +886,7 @@
     am.QueueBuiltinAction(SetupCgroupsAction, "SetupCgroups");
     am.QueueBuiltinAction(SetKptrRestrictAction, "SetKptrRestrict");
     am.QueueBuiltinAction(TestPerfEventSelinuxAction, "TestPerfEventSelinux");
+    am.QueueBuiltinAction(ConnectEarlyStageSnapuserdAction, "ConnectEarlyStageSnapuserd");
     am.QueueEventTrigger("early-init");
 
     // Queue an action that waits for coldboot done so we know ueventd has set up all of /dev...
diff --git a/init/mount_namespace.cpp b/init/mount_namespace.cpp
index 2a57808..575cae9 100644
--- a/init/mount_namespace.cpp
+++ b/init/mount_namespace.cpp
@@ -82,6 +82,21 @@
     return updatable;
 }
 
+static bool IsMicrodroid() {
+    static bool is_microdroid = android::base::GetProperty("ro.hardware", "") == "microdroid";
+    return is_microdroid;
+}
+
+// In case we have two sets of APEXes (non-updatable, updatable), we need two separate mount
+// namespaces.
+static bool NeedsTwoMountNamespaces() {
+    if (!IsApexUpdatable()) return false;
+    if (IsRecoveryMode()) return false;
+    // In microdroid, there's only one set of APEXes in built-in directories include block devices.
+    if (IsMicrodroid()) return false;
+    return true;
+}
+
 #ifdef ACTIVATE_FLATTENED_APEX
 
 static Result<void> MountDir(const std::string& path, const std::string& mount_path) {
@@ -260,7 +275,7 @@
     // number of essential APEXes (e.g. com.android.runtime) are activated.
     // In the namespace for post-apexd processes, all APEXes are activated.
     bool success = true;
-    if (IsApexUpdatable() && !IsRecoveryMode()) {
+    if (NeedsTwoMountNamespaces()) {
         // Creating a new namespace by cloning, saving, and switching back to
         // the original namespace.
         if (unshare(CLONE_NEWNS) == -1) {
diff --git a/init/service.cpp b/init/service.cpp
index c3069f5..489dd67 100644
--- a/init/service.cpp
+++ b/init/service.cpp
@@ -269,6 +269,9 @@
 
     // Remove any socket resources we may have created.
     for (const auto& socket : sockets_) {
+        if (socket.persist) {
+            continue;
+        }
         auto path = ANDROID_SOCKET_DIR "/" + socket.name;
         unlink(path.c_str());
     }
@@ -409,9 +412,7 @@
     }
 
     bool disabled = (flags_ & (SVC_DISABLED | SVC_RESET));
-    // Starting a service removes it from the disabled or reset state and
-    // immediately takes it out of the restarting state if it was in there.
-    flags_ &= (~(SVC_DISABLED|SVC_RESTARTING|SVC_RESET|SVC_RESTART|SVC_DISABLED_START));
+    ResetFlagsForStart();
 
     // Running processes require no additional work --- if they're in the
     // process of exiting, we've ensured that they will immediately restart
@@ -622,6 +623,23 @@
     return {};
 }
 
+void Service::SetStartedInFirstStage(pid_t pid) {
+    LOG(INFO) << "adding first-stage service '" << name_ << "'...";
+
+    time_started_ = boot_clock::now();  // not accurate, but doesn't matter here
+    pid_ = pid;
+    flags_ |= SVC_RUNNING;
+    start_order_ = next_start_order_++;
+
+    NotifyStateChange("running");
+}
+
+void Service::ResetFlagsForStart() {
+    // Starting a service removes it from the disabled or reset state and
+    // immediately takes it out of the restarting state if it was in there.
+    flags_ &= ~(SVC_DISABLED | SVC_RESTARTING | SVC_RESET | SVC_RESTART | SVC_DISABLED_START);
+}
+
 Result<void> Service::StartIfNotDisabled() {
     if (!(flags_ & SVC_DISABLED)) {
         return Start();
@@ -792,5 +810,18 @@
                                      nullptr, str_args, false);
 }
 
+// This is used for snapuserd_proxy, which hands off a socket to snapuserd. It's
+// a special case to support the daemon launched in first-stage init. The persist
+// feature is not part of the init language and is only used here.
+bool Service::MarkSocketPersistent(const std::string& socket_name) {
+    for (auto& socket : sockets_) {
+        if (socket.name == socket_name) {
+            socket.persist = true;
+            return true;
+        }
+    }
+    return false;
+}
+
 }  // namespace init
 }  // namespace android
diff --git a/init/service.h b/init/service.h
index 043555f..ccf6899 100644
--- a/init/service.h
+++ b/init/service.h
@@ -99,6 +99,8 @@
     void AddReapCallback(std::function<void(const siginfo_t& siginfo)> callback) {
         reap_callbacks_.emplace_back(std::move(callback));
     }
+    void SetStartedInFirstStage(pid_t pid);
+    bool MarkSocketPersistent(const std::string& socket_name);
     size_t CheckAllCommands() const { return onrestart_.CheckAllCommands(); }
 
     static bool is_exec_service_running() { return is_exec_service_running_; }
@@ -144,6 +146,7 @@
     void StopOrReset(int how);
     void KillProcessGroup(int signal, bool report_oneshot = false);
     void SetProcessAttributesAndCaps();
+    void ResetFlagsForStart();
 
     static unsigned long next_start_order_;
     static bool is_exec_service_running_;
diff --git a/init/service_utils.h b/init/service_utils.h
index 1e0b4bd..9b65dca 100644
--- a/init/service_utils.h
+++ b/init/service_utils.h
@@ -54,6 +54,7 @@
     int perm = 0;
     std::string context;
     bool passcred = false;
+    bool persist = false;
 
     // Create() creates the named unix domain socket in /dev/socket and returns a Descriptor object.
     // It should be called when starting a service, before calling fork(), such that the socket is
diff --git a/init/snapuserd_transition.cpp b/init/snapuserd_transition.cpp
index 40467b7..b8c2fd2 100644
--- a/init/snapuserd_transition.cpp
+++ b/init/snapuserd_transition.cpp
@@ -33,10 +33,10 @@
 #include <android-base/unique_fd.h>
 #include <cutils/sockets.h>
 #include <libsnapshot/snapshot.h>
-#include <libsnapshot/snapuserd_client.h>
 #include <private/android_filesystem_config.h>
 #include <procinfo/process_map.h>
 #include <selinux/android.h>
+#include <snapuserd/snapuserd_client.h>
 
 #include "block_dev_initializer.h"
 #include "service_utils.h"
@@ -54,6 +54,7 @@
 static constexpr char kSnapuserdPath[] = "/system/bin/snapuserd";
 static constexpr char kSnapuserdFirstStagePidVar[] = "FIRST_STAGE_SNAPUSERD_PID";
 static constexpr char kSnapuserdFirstStageFdVar[] = "FIRST_STAGE_SNAPUSERD_FD";
+static constexpr char kSnapuserdFirstStageInfoVar[] = "FIRST_STAGE_SNAPUSERD_INFO";
 static constexpr char kSnapuserdLabel[] = "u:object_r:snapuserd_exec:s0";
 static constexpr char kSnapuserdSocketLabel[] = "u:object_r:snapuserd_socket:s0";
 
@@ -87,6 +88,14 @@
         _exit(127);
     }
 
+    auto client = SnapuserdClient::Connect(android::snapshot::kSnapuserdSocket, 10s);
+    if (!client) {
+        LOG(FATAL) << "Could not connect to first-stage snapuserd";
+    }
+    if (client->SupportsSecondStageSocketHandoff()) {
+        setenv(kSnapuserdFirstStageInfoVar, "socket", 1);
+    }
+
     setenv(kSnapuserdFirstStagePidVar, std::to_string(pid).c_str(), 1);
 
     LOG(INFO) << "Relaunched snapuserd with pid: " << pid;
@@ -328,5 +337,13 @@
     return GetSnapuserdFirstStagePid().has_value();
 }
 
+std::vector<std::string> GetSnapuserdFirstStageInfo() {
+    const char* pid_str = getenv(kSnapuserdFirstStageInfoVar);
+    if (!pid_str) {
+        return {};
+    }
+    return android::base::Split(pid_str, ",");
+}
+
 }  // namespace init
 }  // namespace android
diff --git a/init/snapuserd_transition.h b/init/snapuserd_transition.h
index a5ab652..62aee83 100644
--- a/init/snapuserd_transition.h
+++ b/init/snapuserd_transition.h
@@ -76,6 +76,9 @@
 // Return the pid of the first-stage instances of snapuserd, if it was started.
 std::optional<pid_t> GetSnapuserdFirstStagePid();
 
+// Return snapuserd info strings that were set during first-stage init.
+std::vector<std::string> GetSnapuserdFirstStageInfo();
+
 // Save an open fd to /system/bin (in the ramdisk) into an environment. This is
 // used to later execveat() snapuserd.
 void SaveRamdiskPathToSnapuserd();