libsnapshot: Check for valid snapshots based on current slot

We may have snapshot files in /metadata/ota/snapshot/ which ends with
.tmp such as system_a.tmp - This happens if the device
reboots just before `rename` in `WriteStringToFileAtomic`. This
can lead to spurious merge failures.

Log the error and skip these snapshot files. It is ok to skip
as we will still have original snapshot status files since
we are already in the merge path. Additionally, try to remove
these files when snapshot is deleted.

Bug: 292198189
Test: OTA
Change-Id: I5db3dbd5a919b263ae577185de3e7f79a5e9b89a
Signed-off-by: Akilesh Kailash <akailash@google.com>
diff --git a/fs_mgr/libsnapshot/snapshot.cpp b/fs_mgr/libsnapshot/snapshot.cpp
index 86ff5f7..51389a0 100644
--- a/fs_mgr/libsnapshot/snapshot.cpp
+++ b/fs_mgr/libsnapshot/snapshot.cpp
@@ -729,6 +729,14 @@
         LOG(ERROR) << "Failed to remove status file " << file_path << ": " << error;
         return false;
     }
+
+    // This path may never exist. If it is present, then it's a stale
+    // snapshot status file. Just remove the file and log the message.
+    const std::string tmp_path = file_path + ".tmp";
+    if (!android::base::RemoveFileIfExists(tmp_path, &error)) {
+        LOG(ERROR) << "Failed to remove stale snapshot file " << tmp_path;
+    }
+
     return true;
 }
 
@@ -754,10 +762,10 @@
         return false;
     }
 
-    auto other_suffix = device_->GetOtherSlotSuffix();
+    auto current_slot_suffix = device_->GetSlotSuffix();
 
     for (const auto& snapshot : snapshots) {
-        if (android::base::EndsWith(snapshot, other_suffix)) {
+        if (!android::base::EndsWith(snapshot, current_slot_suffix)) {
             // Allow the merge to continue, but log this unexpected case.
             LOG(ERROR) << "Unexpected snapshot found during merge: " << snapshot;
             continue;
@@ -1123,7 +1131,7 @@
         return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ListSnapshots);
     }
 
-    auto other_suffix = device_->GetOtherSlotSuffix();
+    auto current_slot_suffix = device_->GetSlotSuffix();
 
     bool cancelled = false;
     bool merging = false;
@@ -1131,9 +1139,9 @@
     bool wrong_phase = false;
     MergeFailureCode failure_code = MergeFailureCode::Ok;
     for (const auto& snapshot : snapshots) {
-        if (android::base::EndsWith(snapshot, other_suffix)) {
+        if (!android::base::EndsWith(snapshot, current_slot_suffix)) {
             // This will have triggered an error message in InitiateMerge already.
-            LOG(INFO) << "Skipping merge validation of unexpected snapshot: " << snapshot;
+            LOG(ERROR) << "Skipping merge validation of unexpected snapshot: " << snapshot;
             continue;
         }
 
diff --git a/fs_mgr/libsnapshot/snapshot_test.cpp b/fs_mgr/libsnapshot/snapshot_test.cpp
index 0a85489..3b6d26a 100644
--- a/fs_mgr/libsnapshot/snapshot_test.cpp
+++ b/fs_mgr/libsnapshot/snapshot_test.cpp
@@ -685,6 +685,17 @@
     }
     ASSERT_TRUE(sm->InitiateMerge());
 
+    // Create stale files in snapshot directory. Merge should skip these files
+    // as the suffix doesn't match the current slot.
+    auto tmp_path = test_device->GetMetadataDir() + "/snapshots/test_partition_b.tmp";
+    auto other_slot = test_device->GetMetadataDir() + "/snapshots/test_partition_a";
+
+    unique_fd fd(open(tmp_path.c_str(), O_RDWR | O_CLOEXEC | O_CREAT, 0644));
+    ASSERT_GE(fd, 0);
+
+    fd.reset(open(other_slot.c_str(), O_RDWR | O_CLOEXEC | O_CREAT, 0644));
+    ASSERT_GE(fd, 0);
+
     // The device should have been switched to a snapshot-merge target.
     DeviceMapper::TargetInfo target;
     ASSERT_TRUE(sm->IsSnapshotDevice("test_partition_b", &target));
@@ -700,13 +711,23 @@
     ASSERT_EQ(sm->ProcessUpdateState(), UpdateState::MergeCompleted);
     ASSERT_EQ(sm->GetUpdateState(), UpdateState::None);
 
+    // Make sure that snapshot states are cleared and all stale files
+    // are deleted
+    {
+        ASSERT_TRUE(AcquireLock());
+        auto local_lock = std::move(lock_);
+        std::vector<std::string> snapshots;
+        ASSERT_TRUE(sm->ListSnapshots(local_lock.get(), &snapshots));
+        ASSERT_TRUE(snapshots.empty());
+    }
+
     // The device should no longer be a snapshot or snapshot-merge.
     ASSERT_FALSE(sm->IsSnapshotDevice("test_partition_b"));
 
     // Test that we can read back the string we wrote to the snapshot. Note
     // that the base device is gone now. |snap_device| contains the correct
     // partition.
-    unique_fd fd(open("/dev/block/mapper/test_partition_b", O_RDONLY | O_CLOEXEC));
+    fd.reset(open("/dev/block/mapper/test_partition_b", O_RDONLY | O_CLOEXEC));
     ASSERT_GE(fd, 0);
 
     std::string buffer(test_string.size(), '\0');