Ensure CancelUpdate() always works in recovery.

This adds a new helper function, TryCancelUpdate(), and refactors the
update cancellation logic. It now takes a "force" parameter, which if is
set and the device is in recovery, overrides all conditions that would
cause cancellation to fail.

In addition the cancellation logic has been consolidated to avoid
duplication of code and error checks. Removal of invalid snapshots is
now automatic for all success cases.

Test: vts_libsnapshot_test
Bug: 352538505
Change-Id: I612d4eb3c4f6436838c9b2f1e38bf644e99c6ad0
diff --git a/fs_mgr/libfiemap/binder.cpp b/fs_mgr/libfiemap/binder.cpp
index 439aac9..8c5fb09 100644
--- a/fs_mgr/libfiemap/binder.cpp
+++ b/fs_mgr/libfiemap/binder.cpp
@@ -62,6 +62,7 @@
                                   std::string* dev) override;
     FiemapStatus ZeroFillNewImage(const std::string& name, uint64_t bytes) override;
     bool RemoveAllImages() override;
+    bool DisableAllImages() override;
     bool DisableImage(const std::string& name) override;
     bool RemoveDisabledImages() override;
     bool GetMappedImageDevice(const std::string& name, std::string* device) override;
@@ -194,6 +195,9 @@
     }
     return true;
 }
+bool ImageManagerBinder::DisableAllImages() {
+    return true;
+}
 
 bool ImageManagerBinder::DisableImage(const std::string& name) {
     auto status = manager_->disableImage(name);
diff --git a/fs_mgr/libfiemap/image_manager.cpp b/fs_mgr/libfiemap/image_manager.cpp
index a5da6e3..bc61d15 100644
--- a/fs_mgr/libfiemap/image_manager.cpp
+++ b/fs_mgr/libfiemap/image_manager.cpp
@@ -655,6 +655,23 @@
     return ok && RemoveAllMetadata(metadata_dir_);
 }
 
+bool ImageManager::DisableAllImages() {
+    if (!MetadataExists(metadata_dir_)) {
+        return true;
+    }
+    auto metadata = OpenMetadata(metadata_dir_);
+    if (!metadata) {
+        return false;
+    }
+
+    bool ok = true;
+    for (const auto& partition : metadata->partitions) {
+        auto partition_name = GetPartitionName(partition);
+        ok &= DisableImage(partition_name);
+    }
+    return ok;
+}
+
 bool ImageManager::Validate() {
     auto metadata = OpenMetadata(metadata_dir_);
     if (!metadata) {
diff --git a/fs_mgr/libfiemap/include/libfiemap/image_manager.h b/fs_mgr/libfiemap/include/libfiemap/image_manager.h
index 0619c96..78e3080 100644
--- a/fs_mgr/libfiemap/include/libfiemap/image_manager.h
+++ b/fs_mgr/libfiemap/include/libfiemap/image_manager.h
@@ -127,6 +127,10 @@
     // Find and remove all images and metadata for this manager.
     virtual bool RemoveAllImages() = 0;
 
+    // Finds and marks all images for deletion upon next reboot. This is used during recovery since
+    // we cannot mount /data
+    virtual bool DisableAllImages() = 0;
+
     virtual bool UnmapImageIfExists(const std::string& name);
 
     // Returns whether DisableImage() was called.
@@ -158,6 +162,7 @@
     bool MapImageWithDeviceMapper(const IPartitionOpener& opener, const std::string& name,
                                   std::string* dev) override;
     bool RemoveAllImages() override;
+    bool DisableAllImages() override;
     bool DisableImage(const std::string& name) override;
     bool RemoveDisabledImages() override;
     bool GetMappedImageDevice(const std::string& name, std::string* device) override;
diff --git a/fs_mgr/libsnapshot/include/libsnapshot/mock_snapshot.h b/fs_mgr/libsnapshot/include/libsnapshot/mock_snapshot.h
index ca45d2f..5ad9885 100644
--- a/fs_mgr/libsnapshot/include/libsnapshot/mock_snapshot.h
+++ b/fs_mgr/libsnapshot/include/libsnapshot/mock_snapshot.h
@@ -63,6 +63,7 @@
     MOCK_METHOD(ISnapshotMergeStats*, GetSnapshotMergeStatsInstance, (), (override));
     MOCK_METHOD(std::string, ReadSourceBuildFingerprint, (), (override));
     MOCK_METHOD(void, SetMergeStatsFeatures, (ISnapshotMergeStats*), (override));
+    MOCK_METHOD(bool, IsCancelUpdateSafe, (), (override));
 };
 
 }  // namespace android::snapshot
diff --git a/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h b/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h
index de20526..165ecef 100644
--- a/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h
+++ b/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h
@@ -88,6 +88,13 @@
     NOT_CREATED,
 };
 
+enum class CancelResult : unsigned int {
+    OK,
+    ERROR,
+    LIVE_SNAPSHOTS,
+    NEEDS_MERGE,
+};
+
 class ISnapshotManager {
   public:
     // Dependency injection for testing.
@@ -125,6 +132,10 @@
     // Cancel an update; any snapshots will be deleted. This is allowed if the
     // state == Initiated, None, or Unverified (before rebooting to the new
     // slot).
+    //
+    // In recovery, it will cancel an update even if a merge is in progress.
+    // Thus, it should only be called if a new OTA will be sideloaded. The
+    // safety can be checked via IsCancelUpdateSafe().
     virtual bool CancelUpdate() = 0;
 
     // Mark snapshot writes as having completed. After this, new snapshots cannot
@@ -301,6 +312,9 @@
 
     // Return the associated ISnapshotMergeStats instance. Never null.
     virtual ISnapshotMergeStats* GetSnapshotMergeStatsInstance() = 0;
+
+    // Return whether cancelling an update is safe. This is for use in recovery.
+    virtual bool IsCancelUpdateSafe() = 0;
 };
 
 class SnapshotManager final : public ISnapshotManager {
@@ -390,6 +404,7 @@
     bool UnmapAllSnapshots() override;
     std::string ReadSourceBuildFingerprint() override;
     void SetMergeStatsFeatures(ISnapshotMergeStats* stats) override;
+    bool IsCancelUpdateSafe() override;
 
     // We can't use WaitForFile during first-stage init, because ueventd is not
     // running and therefore will not automatically create symlinks. Instead,
@@ -444,6 +459,7 @@
     FRIEND_TEST(SnapshotUpdateTest, SpaceSwapUpdate);
     FRIEND_TEST(SnapshotUpdateTest, InterruptMergeDuringPhaseUpdate);
     FRIEND_TEST(SnapshotUpdateTest, MapAllSnapshotsWithoutSlotSwitch);
+    FRIEND_TEST(SnapshotUpdateTest, CancelInRecovery);
     friend class SnapshotTest;
     friend class SnapshotUpdateTest;
     friend class FlashAfterUpdateTest;
@@ -743,12 +759,8 @@
     // Unmap a dm-user device for user space snapshots
     bool UnmapUserspaceSnapshotDevice(LockedFile* lock, const std::string& snapshot_name);
 
-    // If there isn't a previous update, return true. |needs_merge| is set to false.
-    // If there is a previous update but the device has not boot into it, tries to cancel the
-    //   update and delete any snapshots. Return true if successful. |needs_merge| is set to false.
-    // If there is a previous update and the device has boot into it, do nothing and return true.
-    //   |needs_merge| is set to true.
-    bool TryCancelUpdate(bool* needs_merge);
+    CancelResult TryCancelUpdate();
+    CancelResult IsCancelUpdateSafe(UpdateState state);
 
     // Helper for CreateUpdateSnapshots.
     // Creates all underlying images, COW partitions and snapshot files. Does not initialize them.
diff --git a/fs_mgr/libsnapshot/include/libsnapshot/snapshot_stub.h b/fs_mgr/libsnapshot/include/libsnapshot/snapshot_stub.h
index 1c9b403..e586bbd 100644
--- a/fs_mgr/libsnapshot/include/libsnapshot/snapshot_stub.h
+++ b/fs_mgr/libsnapshot/include/libsnapshot/snapshot_stub.h
@@ -60,6 +60,7 @@
     bool UnmapAllSnapshots() override;
     std::string ReadSourceBuildFingerprint() override;
     void SetMergeStatsFeatures(ISnapshotMergeStats* stats) override;
+    bool IsCancelUpdateSafe() override;
 };
 
 }  // namespace android::snapshot
diff --git a/fs_mgr/libsnapshot/snapshot.cpp b/fs_mgr/libsnapshot/snapshot.cpp
index ecf567e..83787df 100644
--- a/fs_mgr/libsnapshot/snapshot.cpp
+++ b/fs_mgr/libsnapshot/snapshot.cpp
@@ -191,14 +191,18 @@
 }
 
 bool SnapshotManager::BeginUpdate() {
-    bool needs_merge = false;
-    if (!TryCancelUpdate(&needs_merge)) {
-        return false;
-    }
-    if (needs_merge) {
-        LOG(INFO) << "Wait for merge (if any) before beginning a new update.";
-        auto state = ProcessUpdateState();
-        LOG(INFO) << "Merged with state = " << state;
+    switch (TryCancelUpdate()) {
+        case CancelResult::OK:
+            break;
+        case CancelResult::NEEDS_MERGE: {
+            LOG(INFO) << "Wait for merge (if any) before beginning a new update.";
+            auto state = ProcessUpdateState();
+            LOG(INFO) << "Merged with end state: " << state;
+            break;
+        }
+        default:
+            LOG(ERROR) << "Cannot begin update, existing update cannot be cancelled.";
+            return false;
     }
 
     auto file = LockExclusive();
@@ -223,49 +227,82 @@
 }
 
 bool SnapshotManager::CancelUpdate() {
-    bool needs_merge = false;
-    if (!TryCancelUpdate(&needs_merge)) {
-        return false;
-    }
-    if (needs_merge) {
-        LOG(ERROR) << "Cannot cancel update after it has completed or started merging";
-    }
-    return !needs_merge;
+    return TryCancelUpdate() == CancelResult::OK;
 }
 
-bool SnapshotManager::TryCancelUpdate(bool* needs_merge) {
-    *needs_merge = false;
+CancelResult SnapshotManager::TryCancelUpdate() {
+    auto lock = LockExclusive();
+    if (!lock) return CancelResult::ERROR;
 
-    auto file = LockExclusive();
-    if (!file) return false;
+    UpdateState state = ReadUpdateState(lock.get());
+    CancelResult result = IsCancelUpdateSafe(state);
 
-    if (IsSnapshotWithoutSlotSwitch()) {
-        LOG(ERROR) << "Cannot cancel the snapshots as partitions are mounted off the snapshots on "
-                      "current slot.";
-        return false;
+    if (result != CancelResult::OK && device_->IsRecovery()) {
+        LOG(ERROR) << "Cancel result " << result << " will be overridden in recovery.";
+        result = CancelResult::OK;
     }
 
-    UpdateState state = ReadUpdateState(file.get());
-    if (state == UpdateState::None) {
-        RemoveInvalidSnapshots(file.get());
+    switch (result) {
+        case CancelResult::OK:
+            LOG(INFO) << "Cancelling update from state: " << state;
+            RemoveAllUpdateState(lock.get());
+            RemoveInvalidSnapshots(lock.get());
+            break;
+        case CancelResult::NEEDS_MERGE:
+            LOG(ERROR) << "Cannot cancel an update while a merge is in progress.";
+            break;
+        case CancelResult::LIVE_SNAPSHOTS:
+            LOG(ERROR) << "Cannot cancel an update while snapshots are live.";
+            break;
+        case CancelResult::ERROR:
+            // Error was already reported.
+            break;
+    }
+    return result;
+}
+
+bool SnapshotManager::IsCancelUpdateSafe() {
+    // This may be called in recovery, so ensure we have /metadata.
+    auto mount = EnsureMetadataMounted();
+    if (!mount || !mount->HasDevice()) {
         return true;
     }
 
-    if (state == UpdateState::Initiated) {
-        LOG(INFO) << "Update has been initiated, now canceling";
-        return RemoveAllUpdateState(file.get());
+    auto lock = LockExclusive();
+    if (!lock) {
+        return false;
     }
 
-    if (state == UpdateState::Unverified) {
-        // We completed an update, but it can still be canceled if we haven't booted into it.
-        auto slot = GetCurrentSlot();
-        if (slot != Slot::Target) {
-            LOG(INFO) << "Canceling previously completed updates (if any)";
-            return RemoveAllUpdateState(file.get());
-        }
+    UpdateState state = ReadUpdateState(lock.get());
+    return IsCancelUpdateSafe(state) == CancelResult::OK;
+}
+
+CancelResult SnapshotManager::IsCancelUpdateSafe(UpdateState state) {
+    if (IsSnapshotWithoutSlotSwitch()) {
+        return CancelResult::LIVE_SNAPSHOTS;
     }
-    *needs_merge = true;
-    return true;
+
+    switch (state) {
+        case UpdateState::Merging:
+        case UpdateState::MergeNeedsReboot:
+        case UpdateState::MergeFailed:
+            return CancelResult::NEEDS_MERGE;
+        case UpdateState::Unverified: {
+            // We completed an update, but it can still be canceled if we haven't booted into it.
+            auto slot = GetCurrentSlot();
+            if (slot == Slot::Target) {
+                return CancelResult::LIVE_SNAPSHOTS;
+            }
+            return CancelResult::OK;
+        }
+        case UpdateState::None:
+        case UpdateState::Initiated:
+        case UpdateState::Cancelled:
+            return CancelResult::OK;
+        default:
+            LOG(ERROR) << "Unknown state: " << state;
+            return CancelResult::ERROR;
+    }
 }
 
 std::string SnapshotManager::ReadUpdateSourceSlotSuffix() {
@@ -2054,11 +2091,22 @@
     }
 
     if (ok || !has_mapped_cow_images) {
-        // Delete any image artifacts as a precaution, in case an update is
-        // being cancelled due to some corrupted state in an lp_metadata file.
-        // Note that we do not do this if some cow images are still mapped,
-        // since we must not remove backing storage if it's in use.
-        if (!EnsureImageManager() || !images_->RemoveAllImages()) {
+        if (!EnsureImageManager()) {
+            return false;
+        }
+
+        if (device_->IsRecovery()) {
+            // If a device is in recovery, we need to mark the snapshots for cleanup
+            // upon next reboot, since we cannot delete them here.
+            if (!images_->DisableAllImages()) {
+                LOG(ERROR) << "Could not remove all snapshot artifacts in recovery";
+                return false;
+            }
+        } else if (!images_->RemoveAllImages()) {
+            // Delete any image artifacts as a precaution, in case an update is
+            // being cancelled due to some corrupted state in an lp_metadata file.
+            // Note that we do not do this if some cow images are still mapped,
+            // since we must not remove backing storage if it's in use.
             LOG(ERROR) << "Could not remove all snapshot artifacts";
             return false;
         }
diff --git a/fs_mgr/libsnapshot/snapshot_stub.cpp b/fs_mgr/libsnapshot/snapshot_stub.cpp
index 9354102..8edd44f 100644
--- a/fs_mgr/libsnapshot/snapshot_stub.cpp
+++ b/fs_mgr/libsnapshot/snapshot_stub.cpp
@@ -188,4 +188,9 @@
     LOG(ERROR) << __FUNCTION__ << " should never be called.";
 }
 
+bool SnapshotManagerStub::IsCancelUpdateSafe() {
+    LOG(ERROR) << __FUNCTION__ << " should never be called.";
+    return false;
+}
+
 }  // namespace android::snapshot
diff --git a/fs_mgr/libsnapshot/snapshot_test.cpp b/fs_mgr/libsnapshot/snapshot_test.cpp
index 931de89..e04e429 100644
--- a/fs_mgr/libsnapshot/snapshot_test.cpp
+++ b/fs_mgr/libsnapshot/snapshot_test.cpp
@@ -701,6 +701,7 @@
     }
 
     // We should not be able to cancel an update now.
+    ASSERT_EQ(sm->TryCancelUpdate(), CancelResult::NEEDS_MERGE);
     ASSERT_FALSE(sm->CancelUpdate());
 
     ASSERT_EQ(sm->ProcessUpdateState(), UpdateState::MergeCompleted);
@@ -2325,6 +2326,38 @@
     }
 }
 
+// Cancel an OTA in recovery.
+TEST_F(SnapshotUpdateTest, CancelInRecovery) {
+    AddOperationForPartitions();
+    // Execute the update.
+    ASSERT_TRUE(sm->BeginUpdate());
+    ASSERT_TRUE(sm->CreateUpdateSnapshots(manifest_));
+
+    // Write some data to target partitions.
+    ASSERT_TRUE(WriteSnapshots());
+
+    ASSERT_TRUE(sm->FinishedSnapshotWrites(true /* wipe */));
+
+    // Simulate shutting down the device.
+    ASSERT_TRUE(UnmapAll());
+
+    // Simulate a reboot into recovery.
+    auto test_device = new TestDeviceInfo(fake_super, "_b");
+    test_device->set_recovery(true);
+    auto new_sm = NewManagerForFirstStageMount(test_device);
+
+    EXPECT_EQ(new_sm->GetUpdateState(), UpdateState::Unverified);
+    ASSERT_FALSE(new_sm->IsCancelUpdateSafe());
+    ASSERT_TRUE(new_sm->CancelUpdate());
+
+    ASSERT_TRUE(new_sm->EnsureImageManager());
+    auto im = new_sm->image_manager();
+    ASSERT_NE(im, nullptr);
+    ASSERT_TRUE(im->IsImageDisabled("sys_b"));
+    ASSERT_TRUE(im->IsImageDisabled("vnd_b"));
+    ASSERT_TRUE(im->IsImageDisabled("prd_b"));
+}
+
 // Test update package that requests data wipe.
 TEST_F(SnapshotUpdateTest, DataWipeWithStaleSnapshots) {
     AddOperationForPartitions();
diff --git a/fs_mgr/libsnapshot/utility.cpp b/fs_mgr/libsnapshot/utility.cpp
index 7eaaca9..30510d0 100644
--- a/fs_mgr/libsnapshot/utility.cpp
+++ b/fs_mgr/libsnapshot/utility.cpp
@@ -205,6 +205,22 @@
     return os << std::put_time(&now, "%Y%m%d-%H%M%S");
 }
 
+std::ostream& operator<<(std::ostream& os, CancelResult result) {
+    switch (result) {
+        case CancelResult::OK:
+            return os << "ok";
+        case CancelResult::ERROR:
+            return os << "error";
+        case CancelResult::LIVE_SNAPSHOTS:
+            return os << "live_snapshots";
+        case CancelResult::NEEDS_MERGE:
+            return os << "needs_merge";
+        default:
+            LOG(ERROR) << "Unknown cancel result: " << static_cast<uint32_t>(result);
+            return os;
+    }
+}
+
 void AppendExtent(RepeatedPtrField<chromeos_update_engine::Extent>* extents, uint64_t start_block,
                   uint64_t num_blocks) {
     if (extents->size() > 0) {
diff --git a/fs_mgr/libsnapshot/utility.h b/fs_mgr/libsnapshot/utility.h
index 7dae942..30c75c0 100644
--- a/fs_mgr/libsnapshot/utility.h
+++ b/fs_mgr/libsnapshot/utility.h
@@ -123,6 +123,8 @@
 struct Now {};
 std::ostream& operator<<(std::ostream& os, const Now&);
 
+std::ostream& operator<<(std::ostream& os, CancelResult);
+
 // Append to |extents|. Merged into the last element if possible.
 void AppendExtent(google::protobuf::RepeatedPtrField<chromeos_update_engine::Extent>* extents,
                   uint64_t start_block, uint64_t num_blocks);