libsnapshot: Remove consistency check after merge
Merge thread will mark merge-completion after msync on header is
complete. This should be definitive enough to track the completion
status.
Bug: 305187301
Test: OTA on Pixel
Change-Id: I366dc5052fa91a6eacf394a1970200cdebc0e135
Signed-off-by: Akilesh Kailash <akailash@google.com>
diff --git a/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h b/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h
index e7b0020..08a79ba 100644
--- a/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h
+++ b/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h
@@ -405,15 +405,6 @@
// Add new public entries above this line.
- // Helpers for failure injection.
- using MergeConsistencyChecker =
- std::function<MergeFailureCode(const std::string& name, const SnapshotStatus& status)>;
-
- void set_merge_consistency_checker(MergeConsistencyChecker checker) {
- merge_consistency_checker_ = checker;
- }
- MergeConsistencyChecker merge_consistency_checker() const { return merge_consistency_checker_; }
-
private:
FRIEND_TEST(SnapshotTest, CleanFirstStageMount);
FRIEND_TEST(SnapshotTest, CreateSnapshot);
@@ -657,8 +648,6 @@
MergeResult CheckMergeState(LockedFile* lock, const std::function<bool()>& before_cancel);
MergeResult CheckTargetMergeState(LockedFile* lock, const std::string& name,
const SnapshotUpdateStatus& update_status);
- MergeFailureCode CheckMergeConsistency(LockedFile* lock, const std::string& name,
- const SnapshotStatus& update_status);
auto UpdateStateToStr(enum UpdateState state);
// Get status or table information about a device-mapper node with a single target.
@@ -847,7 +836,6 @@
std::unique_ptr<SnapuserdClient> snapuserd_client_;
std::unique_ptr<LpMetadata> old_partition_metadata_;
std::optional<bool> is_snapshot_userspace_;
- MergeConsistencyChecker merge_consistency_checker_;
};
} // namespace snapshot
diff --git a/fs_mgr/libsnapshot/snapshot.cpp b/fs_mgr/libsnapshot/snapshot.cpp
index 4743a42..f5732fc 100644
--- a/fs_mgr/libsnapshot/snapshot.cpp
+++ b/fs_mgr/libsnapshot/snapshot.cpp
@@ -89,8 +89,6 @@
static constexpr char kRollbackIndicatorPath[] = "/metadata/ota/rollback-indicator";
static constexpr auto kUpdateStateCheckInterval = 2s;
-MergeFailureCode CheckMergeConsistency(const std::string& name, const SnapshotStatus& status);
-
// Note: IImageManager is an incomplete type in the header, so the default
// destructor doesn't work.
SnapshotManager::~SnapshotManager() {}
@@ -120,9 +118,7 @@
}
SnapshotManager::SnapshotManager(IDeviceInfo* device)
- : dm_(device->GetDeviceMapper()), device_(device), metadata_dir_(device_->GetMetadataDir()) {
- merge_consistency_checker_ = android::snapshot::CheckMergeConsistency;
-}
+ : dm_(device->GetDeviceMapper()), device_(device), metadata_dir_(device_->GetMetadataDir()) {}
static std::string GetCowName(const std::string& snapshot_name) {
return snapshot_name + "-cow";
@@ -1365,13 +1361,6 @@
}
}
- // Merge is complete at this point
-
- auto code = CheckMergeConsistency(lock, name, snapshot_status);
- if (code != MergeFailureCode::Ok) {
- return MergeResult(UpdateState::MergeFailed, code);
- }
-
// Merging is done. First, update the status file to indicate the merge
// is complete. We do this before calling OnSnapshotMergeComplete, even
// though this means the write is potentially wasted work (since in the
@@ -1401,80 +1390,6 @@
return GetCowName(snapshot);
}
-MergeFailureCode SnapshotManager::CheckMergeConsistency(LockedFile* lock, const std::string& name,
- const SnapshotStatus& status) {
- CHECK(lock);
-
- return merge_consistency_checker_(name, status);
-}
-
-MergeFailureCode CheckMergeConsistency(const std::string& name, const SnapshotStatus& status) {
- if (!status.using_snapuserd()) {
- // Do not try to verify old-style COWs yet.
- return MergeFailureCode::Ok;
- }
-
- auto& dm = DeviceMapper::Instance();
-
- std::string cow_image_name = GetMappedCowDeviceName(name, status);
- std::string cow_image_path;
- if (!dm.GetDmDevicePathByName(cow_image_name, &cow_image_path)) {
- LOG(ERROR) << "Failed to get path for cow device: " << cow_image_name;
- return MergeFailureCode::GetCowPathConsistencyCheck;
- }
-
- // First pass, count # of ops.
- size_t num_ops = 0;
- {
- unique_fd fd(open(cow_image_path.c_str(), O_RDONLY | O_CLOEXEC));
- if (fd < 0) {
- PLOG(ERROR) << "Failed to open " << cow_image_name;
- return MergeFailureCode::OpenCowConsistencyCheck;
- }
-
- CowReader reader;
- if (!reader.Parse(std::move(fd))) {
- LOG(ERROR) << "Failed to parse cow " << cow_image_path;
- return MergeFailureCode::ParseCowConsistencyCheck;
- }
-
- num_ops = reader.get_num_total_data_ops();
- }
-
- // Second pass, try as hard as we can to get the actual number of blocks
- // the system thinks is merged.
- unique_fd fd(open(cow_image_path.c_str(), O_RDONLY | O_DIRECT | O_SYNC | O_CLOEXEC));
- if (fd < 0) {
- PLOG(ERROR) << "Failed to open direct " << cow_image_name;
- return MergeFailureCode::OpenCowDirectConsistencyCheck;
- }
-
- void* addr;
- size_t page_size = getpagesize();
- if (posix_memalign(&addr, page_size, page_size) < 0) {
- PLOG(ERROR) << "posix_memalign with page size " << page_size;
- return MergeFailureCode::MemAlignConsistencyCheck;
- }
-
- // COWs are always at least 2MB, this is guaranteed in snapshot creation.
- std::unique_ptr<void, decltype(&::free)> buffer(addr, ::free);
- if (!android::base::ReadFully(fd, buffer.get(), page_size)) {
- PLOG(ERROR) << "Direct read failed " << cow_image_name;
- return MergeFailureCode::DirectReadConsistencyCheck;
- }
-
- auto header = reinterpret_cast<CowHeader*>(buffer.get());
- if (header->num_merge_ops != num_ops) {
- LOG(ERROR) << "COW consistency check failed, expected " << num_ops << " to be merged, "
- << "but " << header->num_merge_ops << " were actually recorded.";
- LOG(ERROR) << "Aborting merge progress for snapshot " << name
- << ", will try again next boot";
- return MergeFailureCode::WrongMergeCountConsistencyCheck;
- }
-
- return MergeFailureCode::Ok;
-}
-
MergeFailureCode SnapshotManager::MergeSecondPhaseSnapshots(LockedFile* lock) {
std::vector<std::string> snapshots;
if (!ListSnapshots(lock, &snapshots)) {
diff --git a/fs_mgr/libsnapshot/snapshot_test.cpp b/fs_mgr/libsnapshot/snapshot_test.cpp
index e506110..4e6b5e1 100644
--- a/fs_mgr/libsnapshot/snapshot_test.cpp
+++ b/fs_mgr/libsnapshot/snapshot_test.cpp
@@ -1601,97 +1601,6 @@
}
}
-// Test that a transient merge consistency check failure can resume properly.
-TEST_F(SnapshotUpdateTest, ConsistencyCheckResume) {
- if (!snapuserd_required_) {
- // b/179111359
- GTEST_SKIP() << "Skipping snapuserd test";
- }
-
- auto old_sys_size = GetSize(sys_);
- auto old_prd_size = GetSize(prd_);
-
- // Grow |sys| but shrink |prd|.
- SetSize(sys_, old_sys_size * 2);
- sys_->set_estimate_cow_size(8_MiB);
- SetSize(prd_, old_prd_size / 2);
- prd_->set_estimate_cow_size(1_MiB);
-
- AddOperationForPartitions();
-
- ASSERT_TRUE(sm->BeginUpdate());
- ASSERT_TRUE(sm->CreateUpdateSnapshots(manifest_));
- ASSERT_TRUE(WriteSnapshotAndHash(sys_));
- ASSERT_TRUE(WriteSnapshotAndHash(vnd_));
- ASSERT_TRUE(ShiftAllSnapshotBlocks("prd_b", old_prd_size));
-
- sync();
-
- // Assert that source partitions aren't affected.
- for (const auto& name : {"sys_a", "vnd_a", "prd_a"}) {
- ASSERT_TRUE(IsPartitionUnchanged(name));
- }
-
- ASSERT_TRUE(sm->FinishedSnapshotWrites(false));
-
- // Simulate shutting down the device.
- ASSERT_TRUE(UnmapAll());
-
- // After reboot, init does first stage mount.
- auto init = NewManagerForFirstStageMount("_b");
- ASSERT_NE(init, nullptr);
- ASSERT_TRUE(init->NeedSnapshotsInFirstStageMount());
- ASSERT_TRUE(init->CreateLogicalAndSnapshotPartitions("super", snapshot_timeout_));
-
- // Check that the target partitions have the same content.
- for (const auto& name : {"sys_b", "vnd_b", "prd_b"}) {
- ASSERT_TRUE(IsPartitionUnchanged(name));
- }
-
- auto old_checker = init->merge_consistency_checker();
-
- init->set_merge_consistency_checker(
- [](const std::string&, const SnapshotStatus&) -> MergeFailureCode {
- return MergeFailureCode::WrongMergeCountConsistencyCheck;
- });
-
- // Initiate the merge and wait for it to be completed.
- if (ShouldSkipLegacyMerging()) {
- LOG(INFO) << "Skipping legacy merge in test";
- return;
- }
- ASSERT_TRUE(init->InitiateMerge());
- ASSERT_EQ(init->IsSnapuserdRequired(), snapuserd_required_);
- {
- // Check that the merge phase is FIRST_PHASE until at least one call
- // to ProcessUpdateState() occurs.
- ASSERT_TRUE(AcquireLock());
- auto local_lock = std::move(lock_);
- auto status = init->ReadSnapshotUpdateStatus(local_lock.get());
- ASSERT_EQ(status.merge_phase(), MergePhase::FIRST_PHASE);
- }
-
- // Merge should have failed.
- ASSERT_EQ(UpdateState::MergeFailed, init->ProcessUpdateState());
-
- // Simulate shutting down the device and creating partitions again.
- ASSERT_TRUE(UnmapAll());
-
- // Restore the checker.
- init->set_merge_consistency_checker(std::move(old_checker));
-
- ASSERT_TRUE(init->CreateLogicalAndSnapshotPartitions("super", snapshot_timeout_));
-
- // Complete the merge.
- ASSERT_EQ(UpdateState::MergeCompleted, init->ProcessUpdateState());
-
- // Check that the target partitions have the same content after the merge.
- for (const auto& name : {"sys_b", "vnd_b", "prd_b"}) {
- ASSERT_TRUE(IsPartitionUnchanged(name))
- << "Content of " << name << " changes after the merge";
- }
-}
-
// Test that if new system partitions uses empty space in super, that region is not snapshotted.
TEST_F(SnapshotUpdateTest, DirectWriteEmptySpace) {
GTEST_SKIP() << "b/141889746";
diff --git a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_core.cpp b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_core.cpp
index e886ec3..97848fb 100644
--- a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_core.cpp
+++ b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_core.cpp
@@ -83,6 +83,10 @@
SNAP_LOG(DEBUG) << "Merge-complete %: " << merge_completion_percentage_
<< " num_merge_ops: " << ch->num_merge_ops
<< " total-ops: " << reader_->get_num_total_data_ops();
+
+ if (ch->num_merge_ops == reader_->get_num_total_data_ops()) {
+ MarkMergeComplete();
+ }
}
bool SnapshotHandler::CommitMerge(int num_merge_ops) {
diff --git a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_core.h b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_core.h
index 6a1dab8..fa1e7a0 100644
--- a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_core.h
+++ b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_core.h
@@ -159,6 +159,7 @@
bool ShouldReconstructDataFromCow() { return populate_data_from_cow_; }
void FinishReconstructDataFromCow() { populate_data_from_cow_ = false; }
+ void MarkMergeComplete();
// Return the snapshot status
std::string GetMergeStatus();
@@ -245,6 +246,7 @@
int num_worker_threads_ = kNumWorkerThreads;
bool perform_verification_ = true;
bool resume_merge_ = false;
+ bool merge_complete_ = false;
std::unique_ptr<UpdateVerify> update_verify_;
std::shared_ptr<IBlockServerOpener> block_server_opener_;
diff --git a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_transitions.cpp b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_transitions.cpp
index 8d090bf..2ad4ea1 100644
--- a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_transitions.cpp
+++ b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_transitions.cpp
@@ -386,10 +386,16 @@
}
}
+void SnapshotHandler::MarkMergeComplete() {
+ std::lock_guard<std::mutex> lock(lock_);
+ merge_complete_ = true;
+}
+
std::string SnapshotHandler::GetMergeStatus() {
bool merge_not_initiated = false;
bool merge_monitored = false;
bool merge_failed = false;
+ bool merge_complete = false;
{
std::lock_guard<std::mutex> lock(lock_);
@@ -405,10 +411,9 @@
if (io_state_ == MERGE_IO_TRANSITION::MERGE_FAILED) {
merge_failed = true;
}
- }
- struct CowHeader* ch = reinterpret_cast<struct CowHeader*>(mapped_addr_);
- bool merge_complete = (ch->num_merge_ops == reader_->get_num_total_data_ops());
+ merge_complete = merge_complete_;
+ }
if (merge_not_initiated) {
// Merge was not initiated yet; however, we have merge completion
@@ -444,7 +449,6 @@
return "snapshot-merge-failed";
}
- // Merge complete
if (merge_complete) {
return "snapshot-merge-complete";
}