libsnapshot: Add a merge failure code to the update state.
Bug: 185290850
Test: vts_libsnapshot_test
Change-Id: I56c62748dd9a24126afbc3e26d1994d80f519333
diff --git a/fs_mgr/libsnapshot/android/snapshot/snapshot.proto b/fs_mgr/libsnapshot/android/snapshot/snapshot.proto
index 1ebc29f..012f2ae 100644
--- a/fs_mgr/libsnapshot/android/snapshot/snapshot.proto
+++ b/fs_mgr/libsnapshot/android/snapshot/snapshot.proto
@@ -139,7 +139,28 @@
Cancelled = 7;
};
-// Next: 7
+// Next 14:
+//
+// To understand the source of each failure, read snapshot.cpp. To handle new
+// sources of failure, avoid reusing an existing code; add a new code instead.
+enum MergeFailureCode {
+ Ok = 0;
+ ReadStatus = 1;
+ GetTableInfo = 2;
+ UnknownTable = 3;
+ GetTableParams = 4;
+ ActivateNewTable = 5;
+ AcquireLock = 6;
+ ListSnapshots = 7;
+ WriteStatus = 8;
+ UnknownTargetType = 9;
+ QuerySnapshotStatus = 10;
+ ExpectedMergeTarget = 11;
+ UnmergedSectorsAfterCompletion = 12;
+ UnexpectedMergeState = 13;
+};
+
+// Next: 8
message SnapshotUpdateStatus {
UpdateState state = 1;
@@ -160,6 +181,9 @@
// Merge phase (if state == MERGING).
MergePhase merge_phase = 6;
+
+ // Merge failure code, filled if state == MergeFailed.
+ MergeFailureCode merge_failure_code = 7;
}
// Next: 9
diff --git a/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h b/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h
index 126e1a0..81cb640 100644
--- a/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h
+++ b/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h
@@ -381,6 +381,7 @@
FRIEND_TEST(SnapshotTest, MapPartialSnapshot);
FRIEND_TEST(SnapshotTest, MapSnapshot);
FRIEND_TEST(SnapshotTest, Merge);
+ FRIEND_TEST(SnapshotTest, MergeFailureCode);
FRIEND_TEST(SnapshotTest, NoMergeBeforeReboot);
FRIEND_TEST(SnapshotTest, UpdateBootControlHal);
FRIEND_TEST(SnapshotUpdateTest, DaemonTransition);
@@ -532,7 +533,8 @@
// Interact with /metadata/ota/state.
UpdateState ReadUpdateState(LockedFile* file);
SnapshotUpdateStatus ReadSnapshotUpdateStatus(LockedFile* file);
- bool WriteUpdateState(LockedFile* file, UpdateState state);
+ bool WriteUpdateState(LockedFile* file, UpdateState state,
+ MergeFailureCode failure_code = MergeFailureCode::Ok);
bool WriteSnapshotUpdateStatus(LockedFile* file, const SnapshotUpdateStatus& status);
std::string GetStateFilePath() const;
@@ -541,12 +543,12 @@
std::string GetMergeStateFilePath() const;
// Helpers for merging.
- bool MergeSecondPhaseSnapshots(LockedFile* lock);
- bool SwitchSnapshotToMerge(LockedFile* lock, const std::string& name);
- bool RewriteSnapshotDeviceTable(const std::string& dm_name);
+ MergeFailureCode MergeSecondPhaseSnapshots(LockedFile* lock);
+ MergeFailureCode SwitchSnapshotToMerge(LockedFile* lock, const std::string& name);
+ MergeFailureCode RewriteSnapshotDeviceTable(const std::string& dm_name);
bool MarkSnapshotMergeCompleted(LockedFile* snapshot_lock, const std::string& snapshot_name);
void AcknowledgeMergeSuccess(LockedFile* lock);
- void AcknowledgeMergeFailure();
+ void AcknowledgeMergeFailure(MergeFailureCode failure_code);
MergePhase DecideMergePhase(const SnapshotStatus& status);
std::unique_ptr<LpMetadata> ReadCurrentMetadata();
@@ -573,14 +575,22 @@
const SnapshotStatus& status);
bool CollapseSnapshotDevice(const std::string& name, const SnapshotStatus& status);
+ struct MergeResult {
+ explicit MergeResult(UpdateState state,
+ MergeFailureCode failure_code = MergeFailureCode::Ok)
+ : state(state), failure_code(failure_code) {}
+ UpdateState state;
+ MergeFailureCode failure_code;
+ };
+
// Only the following UpdateStates are used here:
// UpdateState::Merging
// UpdateState::MergeCompleted
// UpdateState::MergeFailed
// UpdateState::MergeNeedsReboot
- UpdateState CheckMergeState(const std::function<bool()>& before_cancel);
- UpdateState CheckMergeState(LockedFile* lock, const std::function<bool()>& before_cancel);
- UpdateState CheckTargetMergeState(LockedFile* lock, const std::string& name,
+ MergeResult CheckMergeState(const std::function<bool()>& before_cancel);
+ MergeResult CheckMergeState(LockedFile* lock, const std::function<bool()>& before_cancel);
+ MergeResult CheckTargetMergeState(LockedFile* lock, const std::string& name,
const SnapshotUpdateStatus& update_status);
// Interact with status files under /metadata/ota/snapshots.
diff --git a/fs_mgr/libsnapshot/snapshot.cpp b/fs_mgr/libsnapshot/snapshot.cpp
index c504355..64a434c 100644
--- a/fs_mgr/libsnapshot/snapshot.cpp
+++ b/fs_mgr/libsnapshot/snapshot.cpp
@@ -746,32 +746,35 @@
return false;
}
- bool rewrote_all = true;
+ auto reported_code = MergeFailureCode::Ok;
for (const auto& snapshot : *merge_group) {
// If this fails, we have no choice but to continue. Everything must
// be merged. This is not an ideal state to be in, but it is safe,
// because we the next boot will try again.
- if (!SwitchSnapshotToMerge(lock.get(), snapshot)) {
+ auto code = SwitchSnapshotToMerge(lock.get(), snapshot);
+ if (code != MergeFailureCode::Ok) {
LOG(ERROR) << "Failed to switch snapshot to a merge target: " << snapshot;
- rewrote_all = false;
+ if (reported_code == MergeFailureCode::Ok) {
+ reported_code = code;
+ }
}
}
// If we couldn't switch everything to a merge target, pre-emptively mark
// this merge as failed. It will get acknowledged when WaitForMerge() is
// called.
- if (!rewrote_all) {
- WriteUpdateState(lock.get(), UpdateState::MergeFailed);
+ if (reported_code != MergeFailureCode::Ok) {
+ WriteUpdateState(lock.get(), UpdateState::MergeFailed, reported_code);
}
// Return true no matter what, because a merge was initiated.
return true;
}
-bool SnapshotManager::SwitchSnapshotToMerge(LockedFile* lock, const std::string& name) {
+MergeFailureCode SnapshotManager::SwitchSnapshotToMerge(LockedFile* lock, const std::string& name) {
SnapshotStatus status;
if (!ReadSnapshotStatus(lock, name, &status)) {
- return false;
+ return MergeFailureCode::ReadStatus;
}
if (status.state() != SnapshotState::CREATED) {
LOG(WARNING) << "Snapshot " << name
@@ -780,8 +783,8 @@
// After this, we return true because we technically did switch to a merge
// target. Everything else we do here is just informational.
- if (!RewriteSnapshotDeviceTable(name)) {
- return false;
+ if (auto code = RewriteSnapshotDeviceTable(name); code != MergeFailureCode::Ok) {
+ return code;
}
status.set_state(SnapshotState::MERGING);
@@ -795,26 +798,26 @@
if (!WriteSnapshotStatus(lock, status)) {
LOG(ERROR) << "Could not update status file for snapshot: " << name;
}
- return true;
+ return MergeFailureCode::Ok;
}
-bool SnapshotManager::RewriteSnapshotDeviceTable(const std::string& name) {
+MergeFailureCode SnapshotManager::RewriteSnapshotDeviceTable(const std::string& name) {
auto& dm = DeviceMapper::Instance();
std::vector<DeviceMapper::TargetInfo> old_targets;
if (!dm.GetTableInfo(name, &old_targets)) {
LOG(ERROR) << "Could not read snapshot device table: " << name;
- return false;
+ return MergeFailureCode::GetTableInfo;
}
if (old_targets.size() != 1 || DeviceMapper::GetTargetType(old_targets[0].spec) != "snapshot") {
LOG(ERROR) << "Unexpected device-mapper table for snapshot: " << name;
- return false;
+ return MergeFailureCode::UnknownTable;
}
std::string base_device, cow_device;
if (!DmTargetSnapshot::GetDevicesFromParams(old_targets[0].data, &base_device, &cow_device)) {
LOG(ERROR) << "Could not derive underlying devices for snapshot: " << name;
- return false;
+ return MergeFailureCode::GetTableParams;
}
DmTable table;
@@ -822,10 +825,10 @@
SnapshotStorageMode::Merge, kSnapshotChunkSize);
if (!dm.LoadTableAndActivate(name, table)) {
LOG(ERROR) << "Could not swap device-mapper tables on snapshot device " << name;
- return false;
+ return MergeFailureCode::ActivateNewTable;
}
LOG(INFO) << "Successfully switched snapshot device to a merge target: " << name;
- return true;
+ return MergeFailureCode::Ok;
}
enum class TableQuery {
@@ -897,20 +900,20 @@
UpdateState SnapshotManager::ProcessUpdateState(const std::function<bool()>& callback,
const std::function<bool()>& before_cancel) {
while (true) {
- UpdateState state = CheckMergeState(before_cancel);
- LOG(INFO) << "ProcessUpdateState handling state: " << state;
+ auto result = CheckMergeState(before_cancel);
+ LOG(INFO) << "ProcessUpdateState handling state: " << result.state;
- if (state == UpdateState::MergeFailed) {
- AcknowledgeMergeFailure();
+ if (result.state == UpdateState::MergeFailed) {
+ AcknowledgeMergeFailure(result.failure_code);
}
- if (state != UpdateState::Merging) {
+ if (result.state != UpdateState::Merging) {
// Either there is no merge, or the merge was finished, so no need
// to keep waiting.
- return state;
+ return result.state;
}
if (callback && !callback()) {
- return state;
+ return result.state;
}
// This wait is not super time sensitive, so we have a relatively
@@ -919,36 +922,36 @@
}
}
-UpdateState SnapshotManager::CheckMergeState(const std::function<bool()>& before_cancel) {
+auto SnapshotManager::CheckMergeState(const std::function<bool()>& before_cancel) -> MergeResult {
auto lock = LockExclusive();
if (!lock) {
- return UpdateState::MergeFailed;
+ return MergeResult(UpdateState::MergeFailed, MergeFailureCode::AcquireLock);
}
- UpdateState state = CheckMergeState(lock.get(), before_cancel);
- LOG(INFO) << "CheckMergeState for snapshots returned: " << state;
+ auto result = CheckMergeState(lock.get(), before_cancel);
+ LOG(INFO) << "CheckMergeState for snapshots returned: " << result.state;
- if (state == UpdateState::MergeCompleted) {
+ if (result.state == UpdateState::MergeCompleted) {
// Do this inside the same lock. Failures get acknowledged without the
// lock, because flock() might have failed.
AcknowledgeMergeSuccess(lock.get());
- } else if (state == UpdateState::Cancelled) {
+ } else if (result.state == UpdateState::Cancelled) {
if (!device_->IsRecovery() && !RemoveAllUpdateState(lock.get(), before_cancel)) {
LOG(ERROR) << "Failed to remove all update state after acknowleding cancelled update.";
}
}
- return state;
+ return result;
}
-UpdateState SnapshotManager::CheckMergeState(LockedFile* lock,
- const std::function<bool()>& before_cancel) {
+auto SnapshotManager::CheckMergeState(LockedFile* lock, const std::function<bool()>& before_cancel)
+ -> MergeResult {
SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
switch (update_status.state()) {
case UpdateState::None:
case UpdateState::MergeCompleted:
// Harmless races are allowed between two callers of WaitForMerge,
// so in both of these cases we just propagate the state.
- return update_status.state();
+ return MergeResult(update_status.state());
case UpdateState::Merging:
case UpdateState::MergeNeedsReboot:
@@ -963,26 +966,26 @@
// via the merge poll below, but if we never started a merge, we
// need to also check here.
if (HandleCancelledUpdate(lock, before_cancel)) {
- return UpdateState::Cancelled;
+ return MergeResult(UpdateState::Cancelled);
}
- return update_status.state();
+ return MergeResult(update_status.state());
default:
- return update_status.state();
+ return MergeResult(update_status.state());
}
std::vector<std::string> snapshots;
if (!ListSnapshots(lock, &snapshots)) {
- return UpdateState::MergeFailed;
+ return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ListSnapshots);
}
auto other_suffix = device_->GetOtherSlotSuffix();
bool cancelled = false;
- bool failed = false;
bool merging = false;
bool needs_reboot = false;
bool wrong_phase = false;
+ MergeFailureCode failure_code = MergeFailureCode::Ok;
for (const auto& snapshot : snapshots) {
if (android::base::EndsWith(snapshot, other_suffix)) {
// This will have triggered an error message in InitiateMerge already.
@@ -990,12 +993,15 @@
continue;
}
- UpdateState snapshot_state = CheckTargetMergeState(lock, snapshot, update_status);
- LOG(INFO) << "CheckTargetMergeState for " << snapshot << " returned: " << snapshot_state;
+ auto result = CheckTargetMergeState(lock, snapshot, update_status);
+ LOG(INFO) << "CheckTargetMergeState for " << snapshot << " returned: " << result.state;
- switch (snapshot_state) {
+ switch (result.state) {
case UpdateState::MergeFailed:
- failed = true;
+ // Take the first failure code in case other failures compound.
+ if (failure_code == MergeFailureCode::Ok) {
+ failure_code = result.failure_code;
+ }
break;
case UpdateState::Merging:
merging = true;
@@ -1013,8 +1019,10 @@
break;
default:
LOG(ERROR) << "Unknown merge status for \"" << snapshot << "\": "
- << "\"" << snapshot_state << "\"";
- failed = true;
+ << "\"" << result.state << "\"";
+ if (failure_code == MergeFailureCode::Ok) {
+ failure_code = MergeFailureCode::UnexpectedMergeState;
+ }
break;
}
}
@@ -1023,24 +1031,25 @@
// Note that we handle "Merging" before we handle anything else. We
// want to poll until *nothing* is merging if we can, so everything has
// a chance to get marked as completed or failed.
- return UpdateState::Merging;
+ return MergeResult(UpdateState::Merging);
}
- if (failed) {
+ if (failure_code != MergeFailureCode::Ok) {
// Note: since there are many drop-out cases for failure, we acknowledge
// it in WaitForMerge rather than here and elsewhere.
- return UpdateState::MergeFailed;
+ return MergeResult(UpdateState::MergeFailed, failure_code);
}
if (wrong_phase) {
// If we got here, no other partitions are being merged, and nothing
// failed to merge. It's safe to move to the next merge phase.
- if (!MergeSecondPhaseSnapshots(lock)) {
- return UpdateState::MergeFailed;
+ auto code = MergeSecondPhaseSnapshots(lock);
+ if (code != MergeFailureCode::Ok) {
+ return MergeResult(UpdateState::MergeFailed, code);
}
- return UpdateState::Merging;
+ return MergeResult(UpdateState::Merging);
}
if (needs_reboot) {
WriteUpdateState(lock, UpdateState::MergeNeedsReboot);
- return UpdateState::MergeNeedsReboot;
+ return MergeResult(UpdateState::MergeNeedsReboot);
}
if (cancelled) {
// This is an edge case, that we handle as correctly as we sensibly can.
@@ -1048,16 +1057,17 @@
// removed the snapshot as a result. The exact state of the update is
// undefined now, but this can only happen on an unlocked device where
// partitions can be flashed without wiping userdata.
- return UpdateState::Cancelled;
+ return MergeResult(UpdateState::Cancelled);
}
- return UpdateState::MergeCompleted;
+ return MergeResult(UpdateState::MergeCompleted);
}
-UpdateState SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std::string& name,
- const SnapshotUpdateStatus& update_status) {
+auto SnapshotManager::CheckTargetMergeState(LockedFile* lock, const std::string& name,
+ const SnapshotUpdateStatus& update_status)
+ -> MergeResult {
SnapshotStatus snapshot_status;
if (!ReadSnapshotStatus(lock, name, &snapshot_status)) {
- return UpdateState::MergeFailed;
+ return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ReadStatus);
}
std::unique_ptr<LpMetadata> current_metadata;
@@ -1070,7 +1080,7 @@
if (!current_metadata ||
GetMetadataPartitionState(*current_metadata, name) != MetadataPartitionState::Updated) {
DeleteSnapshot(lock, name);
- return UpdateState::Cancelled;
+ return MergeResult(UpdateState::Cancelled);
}
// During a check, we decided the merge was complete, but we were unable to
@@ -1081,11 +1091,11 @@
if (snapshot_status.state() == SnapshotState::MERGE_COMPLETED) {
// NB: It's okay if this fails now, we gave cleanup our best effort.
OnSnapshotMergeComplete(lock, name, snapshot_status);
- return UpdateState::MergeCompleted;
+ return MergeResult(UpdateState::MergeCompleted);
}
LOG(ERROR) << "Expected snapshot or snapshot-merge for device: " << name;
- return UpdateState::MergeFailed;
+ return MergeResult(UpdateState::MergeFailed, MergeFailureCode::UnknownTargetType);
}
// This check is expensive so it is only enabled for debugging.
@@ -1095,29 +1105,30 @@
std::string target_type;
DmTargetSnapshot::Status status;
if (!QuerySnapshotStatus(name, &target_type, &status)) {
- return UpdateState::MergeFailed;
+ return MergeResult(UpdateState::MergeFailed, MergeFailureCode::QuerySnapshotStatus);
}
if (target_type == "snapshot" &&
DecideMergePhase(snapshot_status) == MergePhase::SECOND_PHASE &&
update_status.merge_phase() == MergePhase::FIRST_PHASE) {
// The snapshot is not being merged because it's in the wrong phase.
- return UpdateState::None;
+ return MergeResult(UpdateState::None);
}
if (target_type != "snapshot-merge") {
// We can get here if we failed to rewrite the target type in
// InitiateMerge(). If we failed to create the target in first-stage
// init, boot would not succeed.
LOG(ERROR) << "Snapshot " << name << " has incorrect target type: " << target_type;
- return UpdateState::MergeFailed;
+ return MergeResult(UpdateState::MergeFailed, MergeFailureCode::ExpectedMergeTarget);
}
// These two values are equal when merging is complete.
if (status.sectors_allocated != status.metadata_sectors) {
if (snapshot_status.state() == SnapshotState::MERGE_COMPLETED) {
LOG(ERROR) << "Snapshot " << name << " is merging after being marked merge-complete.";
- return UpdateState::MergeFailed;
+ return MergeResult(UpdateState::MergeFailed,
+ MergeFailureCode::UnmergedSectorsAfterCompletion);
}
- return UpdateState::Merging;
+ return MergeResult(UpdateState::Merging);
}
// Merging is done. First, update the status file to indicate the merge
@@ -1130,18 +1141,18 @@
// snapshot device for this partition.
snapshot_status.set_state(SnapshotState::MERGE_COMPLETED);
if (!WriteSnapshotStatus(lock, snapshot_status)) {
- return UpdateState::MergeFailed;
+ return MergeResult(UpdateState::MergeFailed, MergeFailureCode::WriteStatus);
}
if (!OnSnapshotMergeComplete(lock, name, snapshot_status)) {
- return UpdateState::MergeNeedsReboot;
+ return MergeResult(UpdateState::MergeNeedsReboot);
}
- return UpdateState::MergeCompleted;
+ return MergeResult(UpdateState::MergeCompleted, MergeFailureCode::Ok);
}
-bool SnapshotManager::MergeSecondPhaseSnapshots(LockedFile* lock) {
+MergeFailureCode SnapshotManager::MergeSecondPhaseSnapshots(LockedFile* lock) {
std::vector<std::string> snapshots;
if (!ListSnapshots(lock, &snapshots)) {
- return UpdateState::MergeFailed;
+ return MergeFailureCode::ListSnapshots;
}
SnapshotUpdateStatus update_status = ReadSnapshotUpdateStatus(lock);
@@ -1150,24 +1161,27 @@
update_status.set_merge_phase(MergePhase::SECOND_PHASE);
if (!WriteSnapshotUpdateStatus(lock, update_status)) {
- return false;
+ return MergeFailureCode::WriteStatus;
}
- bool rewrote_all = true;
+ MergeFailureCode result = MergeFailureCode::Ok;
for (const auto& snapshot : snapshots) {
SnapshotStatus snapshot_status;
if (!ReadSnapshotStatus(lock, snapshot, &snapshot_status)) {
- return UpdateState::MergeFailed;
+ return MergeFailureCode::ReadStatus;
}
if (DecideMergePhase(snapshot_status) != MergePhase::SECOND_PHASE) {
continue;
}
- if (!SwitchSnapshotToMerge(lock, snapshot)) {
+ auto code = SwitchSnapshotToMerge(lock, snapshot);
+ if (code != MergeFailureCode::Ok) {
LOG(ERROR) << "Failed to switch snapshot to a second-phase merge target: " << snapshot;
- rewrote_all = false;
+ if (result == MergeFailureCode::Ok) {
+ result = code;
+ }
}
}
- return rewrote_all;
+ return result;
}
std::string SnapshotManager::GetSnapshotBootIndicatorPath() {
@@ -1199,7 +1213,7 @@
RemoveAllUpdateState(lock);
}
-void SnapshotManager::AcknowledgeMergeFailure() {
+void SnapshotManager::AcknowledgeMergeFailure(MergeFailureCode failure_code) {
// Log first, so worst case, we always have a record of why the calls below
// were being made.
LOG(ERROR) << "Merge could not be completed and will be marked as failed.";
@@ -1216,7 +1230,7 @@
return;
}
- WriteUpdateState(lock.get(), UpdateState::MergeFailed);
+ WriteUpdateState(lock.get(), UpdateState::MergeFailed, failure_code);
}
bool SnapshotManager::OnSnapshotMergeComplete(LockedFile* lock, const std::string& name,
@@ -2414,10 +2428,15 @@
return status;
}
-bool SnapshotManager::WriteUpdateState(LockedFile* lock, UpdateState state) {
+bool SnapshotManager::WriteUpdateState(LockedFile* lock, UpdateState state,
+ MergeFailureCode failure_code) {
SnapshotUpdateStatus status;
status.set_state(state);
+ if (state == UpdateState::MergeFailed) {
+ status.set_merge_failure_code(failure_code);
+ }
+
// If we're transitioning between two valid states (eg, we're not beginning
// or ending an OTA), then make sure to propagate the compression bit.
if (!(state == UpdateState::Initiated || state == UpdateState::None)) {
diff --git a/fs_mgr/libsnapshot/snapshot_test.cpp b/fs_mgr/libsnapshot/snapshot_test.cpp
index 8fae00b..3554dce 100644
--- a/fs_mgr/libsnapshot/snapshot_test.cpp
+++ b/fs_mgr/libsnapshot/snapshot_test.cpp
@@ -676,6 +676,18 @@
ASSERT_EQ(test_device->merge_status(), MergeStatus::MERGING);
}
+TEST_F(SnapshotTest, MergeFailureCode) {
+ ASSERT_TRUE(AcquireLock());
+
+ ASSERT_TRUE(sm->WriteUpdateState(lock_.get(), UpdateState::MergeFailed,
+ MergeFailureCode::ListSnapshots));
+ ASSERT_EQ(test_device->merge_status(), MergeStatus::MERGING);
+
+ SnapshotUpdateStatus status = sm->ReadSnapshotUpdateStatus(lock_.get());
+ ASSERT_EQ(status.state(), UpdateState::MergeFailed);
+ ASSERT_EQ(status.merge_failure_code(), MergeFailureCode::ListSnapshots);
+}
+
enum class Request { UNKNOWN, LOCK_SHARED, LOCK_EXCLUSIVE, UNLOCK, EXIT };
std::ostream& operator<<(std::ostream& os, Request request) {
switch (request) {