snapuserd: Fix race condition in MergeWorker::WaitForMergeBegin.
The pre-conditions and post-conditions to cv.wait() in this function are
different, which leads to a race. If the merge is never initiated, but
I/O is terminated, the wait() will hang.
This is easy to hit in tests where I/O terminates quickly, but should
not affect actual OTA flow in practice.
Bug: 269361087
Test: snapuserd_test
Change-Id: I85ed9c357031ec6fd74d6a3f49d85e6cbb037eee
diff --git a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_transitions.cpp b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_transitions.cpp
index 52e4f89..f3e0019 100644
--- a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_transitions.cpp
+++ b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_transitions.cpp
@@ -189,35 +189,32 @@
cv.notify_all();
}
+static inline bool IsMergeBeginError(MERGE_IO_TRANSITION io_state) {
+ return io_state == MERGE_IO_TRANSITION::READ_AHEAD_FAILURE ||
+ io_state == MERGE_IO_TRANSITION::IO_TERMINATED;
+}
+
// Invoked by Merge thread - Waits on RA thread to resume merging. Will
// be waken up RA thread.
bool SnapshotHandler::WaitForMergeBegin() {
- {
- std::unique_lock<std::mutex> lock(lock_);
- while (!MergeInitiated()) {
- cv.wait(lock);
+ std::unique_lock<std::mutex> lock(lock_);
- if (io_state_ == MERGE_IO_TRANSITION::READ_AHEAD_FAILURE ||
- io_state_ == MERGE_IO_TRANSITION::IO_TERMINATED) {
- SNAP_LOG(ERROR) << "WaitForMergeBegin failed with state: " << io_state_;
- return false;
- }
- }
+ cv.wait(lock, [this]() -> bool { return MergeInitiated() || IsMergeBeginError(io_state_); });
- while (!(io_state_ == MERGE_IO_TRANSITION::MERGE_BEGIN ||
- io_state_ == MERGE_IO_TRANSITION::READ_AHEAD_FAILURE ||
- io_state_ == MERGE_IO_TRANSITION::IO_TERMINATED)) {
- cv.wait(lock);
- }
-
- if (io_state_ == MERGE_IO_TRANSITION::READ_AHEAD_FAILURE ||
- io_state_ == MERGE_IO_TRANSITION::IO_TERMINATED) {
- SNAP_LOG(ERROR) << "WaitForMergeBegin failed with state: " << io_state_;
- return false;
- }
-
- return true;
+ if (IsMergeBeginError(io_state_)) {
+ SNAP_LOG(ERROR) << "WaitForMergeBegin failed with state: " << io_state_;
+ return false;
}
+
+ cv.wait(lock, [this]() -> bool {
+ return io_state_ == MERGE_IO_TRANSITION::MERGE_BEGIN || IsMergeBeginError(io_state_);
+ });
+
+ if (IsMergeBeginError(io_state_)) {
+ SNAP_LOG(ERROR) << "WaitForMergeBegin failed with state: " << io_state_;
+ return false;
+ }
+ return true;
}
// Invoked by RA thread - Flushes the RA block to scratch space if necessary