libsnapshot: Harden merge-in-recovery for factory data resets.
This addresses bugs where unexpected edge cases in the snapshot state
could prevent a merge or data wipe from completing in recovery.
Invalid snapshots (eg on the wrong slot) are now ignored in
CheckMergeState(). This prevents those snapshots from being detected as
"cancelled" and thus falling into RemoveAllUpdateState.
ProcessUpdateState will no longer call RemoveAllUpdateState in recovery.
Furthermore, when RemoveAllUpdateState fails, we will no longer return
the "old" state. If this state is Merging, ProcessUpdateState can
infinite loop.
Finally, HandleImminentDataWipe now guarantees the final state will be
either MergeFailed or None. For testing purposes, the old mechanism was
too susceptible to state machinery changes. And for practical purposes,
either we're going to wipe data (which removes the OTA), or a merge
failed and we can't. So the effective outcome is always no update or a
failed update.
Bug: 179006671
Test: vts_libsnapshot_test
Change-Id: Idcb30151e4d35cbeccf14369f09707ae94a57c66
diff --git a/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h b/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h
index 0d90f6c..a79a86d 100644
--- a/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h
+++ b/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h
@@ -694,8 +694,8 @@
// Call ProcessUpdateState and handle states with special rules before data wipe. Specifically,
// if |allow_forward_merge| and allow-forward-merge indicator exists, initiate merge if
// necessary.
- bool ProcessUpdateStateOnDataWipe(bool allow_forward_merge,
- const std::function<bool()>& callback);
+ UpdateState ProcessUpdateStateOnDataWipe(bool allow_forward_merge,
+ const std::function<bool()>& callback);
// Return device string of a mapped image, or if it is not available, the mapped image path.
bool GetMappedImageDeviceStringOrPath(const std::string& device_name,
diff --git a/fs_mgr/libsnapshot/snapshot.cpp b/fs_mgr/libsnapshot/snapshot.cpp
index 395f91a..cc2599d 100644
--- a/fs_mgr/libsnapshot/snapshot.cpp
+++ b/fs_mgr/libsnapshot/snapshot.cpp
@@ -894,6 +894,8 @@
const std::function<bool()>& before_cancel) {
while (true) {
UpdateState state = CheckMergeState(before_cancel);
+ LOG(INFO) << "ProcessUpdateState handling state: " << state;
+
if (state == UpdateState::MergeFailed) {
AcknowledgeMergeFailure();
}
@@ -920,13 +922,15 @@
}
UpdateState state = CheckMergeState(lock.get(), before_cancel);
+ LOG(INFO) << "CheckMergeState for snapshots returned: " << state;
+
if (state == UpdateState::MergeCompleted) {
// Do this inside the same lock. Failures get acknowledged without the
// lock, because flock() might have failed.
AcknowledgeMergeSuccess(lock.get());
} else if (state == UpdateState::Cancelled) {
- if (!RemoveAllUpdateState(lock.get(), before_cancel)) {
- return ReadSnapshotUpdateStatus(lock.get()).state();
+ if (!device_->IsRecovery() && !RemoveAllUpdateState(lock.get(), before_cancel)) {
+ LOG(ERROR) << "Failed to remove all update state after acknowleding cancelled update.";
}
}
return state;
@@ -968,13 +972,23 @@
return UpdateState::MergeFailed;
}
+ auto other_suffix = device_->GetOtherSlotSuffix();
+
bool cancelled = false;
bool failed = false;
bool merging = false;
bool needs_reboot = false;
bool wrong_phase = false;
for (const auto& snapshot : snapshots) {
+ if (android::base::EndsWith(snapshot, other_suffix)) {
+ // This will have triggered an error message in InitiateMerge already.
+ LOG(INFO) << "Skipping merge validation of unexpected snapshot: " << snapshot;
+ continue;
+ }
+
UpdateState snapshot_state = CheckTargetMergeState(lock, snapshot, update_status);
+ LOG(INFO) << "CheckTargetMergeState for " << snapshot << " returned: " << snapshot_state;
+
switch (snapshot_state) {
case UpdateState::MergeFailed:
failed = true;
@@ -1173,7 +1187,7 @@
// indicator that cleanup is needed on reboot. If a factory data reset
// was requested, it doesn't matter, everything will get wiped anyway.
// To make testing easier we consider a /data wipe as cleaned up.
- if (device_->IsRecovery() && !in_factory_data_reset_) {
+ if (device_->IsRecovery()) {
WriteUpdateState(lock, UpdateState::MergeCompleted);
return;
}
@@ -3213,10 +3227,11 @@
};
in_factory_data_reset_ = true;
- bool ok = ProcessUpdateStateOnDataWipe(true /* allow_forward_merge */, process_callback);
+ UpdateState state =
+ ProcessUpdateStateOnDataWipe(true /* allow_forward_merge */, process_callback);
in_factory_data_reset_ = false;
- if (!ok) {
+ if (state == UpdateState::MergeFailed) {
return false;
}
@@ -3224,6 +3239,16 @@
if (!UnmapAllPartitionsInRecovery()) {
LOG(ERROR) << "Unable to unmap all partitions; fastboot may fail to flash.";
}
+
+ if (state != UpdateState::None) {
+ auto lock = LockExclusive();
+ if (!lock) return false;
+
+ // Zap the update state so the bootloader doesn't think we're still
+ // merging. It's okay if this fails, it's informative only at this
+ // point.
+ WriteUpdateState(lock.get(), UpdateState::None);
+ }
return true;
}
@@ -3258,15 +3283,15 @@
return true;
}
-bool SnapshotManager::ProcessUpdateStateOnDataWipe(bool allow_forward_merge,
- const std::function<bool()>& callback) {
+UpdateState SnapshotManager::ProcessUpdateStateOnDataWipe(bool allow_forward_merge,
+ const std::function<bool()>& callback) {
auto slot_number = SlotNumberForSlotSuffix(device_->GetSlotSuffix());
UpdateState state = ProcessUpdateState(callback);
LOG(INFO) << "Update state in recovery: " << state;
switch (state) {
case UpdateState::MergeFailed:
LOG(ERROR) << "Unrecoverable merge failure detected.";
- return false;
+ return state;
case UpdateState::Unverified: {
// If an OTA was just applied but has not yet started merging:
//
@@ -3286,8 +3311,12 @@
if (allow_forward_merge &&
access(GetForwardMergeIndicatorPath().c_str(), F_OK) == 0) {
LOG(INFO) << "Forward merge allowed, initiating merge now.";
- return InitiateMerge() &&
- ProcessUpdateStateOnDataWipe(false /* allow_forward_merge */, callback);
+
+ if (!InitiateMerge()) {
+ LOG(ERROR) << "Failed to initiate merge on data wipe.";
+ return UpdateState::MergeFailed;
+ }
+ return ProcessUpdateStateOnDataWipe(false /* allow_forward_merge */, callback);
}
LOG(ERROR) << "Reverting to old slot since update will be deleted.";
@@ -3305,7 +3334,7 @@
default:
break;
}
- return true;
+ return state;
}
bool SnapshotManager::EnsureNoOverflowSnapshot(LockedFile* lock) {
diff --git a/fs_mgr/libsnapshot/snapshot_test.cpp b/fs_mgr/libsnapshot/snapshot_test.cpp
index d57aa6c..bde4cca 100644
--- a/fs_mgr/libsnapshot/snapshot_test.cpp
+++ b/fs_mgr/libsnapshot/snapshot_test.cpp
@@ -636,8 +636,8 @@
// Because the status is Merging, we must call ProcessUpdateState, which should
// detect a cancelled update.
- ASSERT_EQ(sm->ProcessUpdateState(), UpdateState::Cancelled);
- ASSERT_EQ(sm->GetUpdateState(), UpdateState::None);
+ ASSERT_EQ(init->ProcessUpdateState(), UpdateState::Cancelled);
+ ASSERT_EQ(init->GetUpdateState(), UpdateState::None);
}
TEST_F(SnapshotTest, UpdateBootControlHal) {
@@ -1767,7 +1767,7 @@
ASSERT_TRUE(new_sm->HandleImminentDataWipe());
// Manually mount metadata so that we can call GetUpdateState() below.
MountMetadata();
- EXPECT_EQ(new_sm->GetUpdateState(), UpdateState::Unverified);
+ EXPECT_EQ(new_sm->GetUpdateState(), UpdateState::None);
EXPECT_TRUE(test_device->IsSlotUnbootable(1));
EXPECT_FALSE(test_device->IsSlotUnbootable(0));
}
@@ -2105,8 +2105,12 @@
// There should be no snapshot to merge.
auto new_sm = SnapshotManager::New(new TestDeviceInfo(fake_super, flashed_slot_suffix));
- // update_enigne calls ProcessUpdateState first -- should see Cancelled.
- ASSERT_EQ(UpdateState::Cancelled, new_sm->ProcessUpdateState());
+ if (flashed_slot == 0 && after_merge) {
+ ASSERT_EQ(UpdateState::MergeCompleted, new_sm->ProcessUpdateState());
+ } else {
+ // update_engine calls ProcessUpdateState first -- should see Cancelled.
+ ASSERT_EQ(UpdateState::Cancelled, new_sm->ProcessUpdateState());
+ }
// Next OTA calls CancelUpdate no matter what.
ASSERT_TRUE(new_sm->CancelUpdate());