snapuserd: Add diagnostics for debugging races.
Adds calls to pthread_setname_np for each thread.
Clarify error messages from io_uring calls that return -errno.
Add log messages for some failure paths that didn't have any.
Add an ostream overload for MERGE_IO_TRANSITION, and add an INVALID
state for initialization.
Bug: 288273605
Test: builds
Change-Id: Ic0681cbf0017af67bcf52b98db184a9b48752faf
diff --git a/fs_mgr/libsnapshot/snapuserd/user-space-merge/handler_manager.cpp b/fs_mgr/libsnapshot/snapuserd/user-space-merge/handler_manager.cpp
index 50b9d43..d979e20 100644
--- a/fs_mgr/libsnapshot/snapuserd/user-space-merge/handler_manager.cpp
+++ b/fs_mgr/libsnapshot/snapuserd/user-space-merge/handler_manager.cpp
@@ -14,6 +14,7 @@
#include "handler_manager.h"
+#include <pthread.h>
#include <sys/eventfd.h>
#include <android-base/logging.h>
@@ -132,6 +133,8 @@
void SnapshotHandlerManager::RunThread(std::shared_ptr<HandlerThread> handler) {
LOG(INFO) << "Entering thread for handler: " << handler->misc_name();
+ pthread_setname_np(pthread_self(), "Handler");
+
if (!handler->snapuserd()->Start()) {
LOG(ERROR) << " Failed to launch all worker threads";
}
@@ -219,6 +222,7 @@
}
void SnapshotHandlerManager::MonitorMerge() {
+ pthread_setname_np(pthread_self(), "Merge Monitor");
while (!stop_monitor_merge_thread_) {
uint64_t testVal;
ssize_t ret =
diff --git a/fs_mgr/libsnapshot/snapuserd/user-space-merge/merge_worker.cpp b/fs_mgr/libsnapshot/snapuserd/user-space-merge/merge_worker.cpp
index 2d69721..11b8d7c 100644
--- a/fs_mgr/libsnapshot/snapuserd/user-space-merge/merge_worker.cpp
+++ b/fs_mgr/libsnapshot/snapuserd/user-space-merge/merge_worker.cpp
@@ -15,6 +15,8 @@
*/
#include "merge_worker.h"
+#include <pthread.h>
+
#include "snapuserd_core.h"
#include "utility.h"
@@ -198,6 +200,7 @@
// Wait for RA thread to notify that the merge window
// is ready for merging.
if (!snapuserd_->WaitForMergeBegin()) {
+ SNAP_LOG(ERROR) << "Failed waiting for merge to begin";
return false;
}
@@ -303,7 +306,7 @@
// will fallback to synchronous I/O.
ret = io_uring_wait_cqe(ring_.get(), &cqe);
if (ret) {
- SNAP_LOG(ERROR) << "Merge: io_uring_wait_cqe failed: " << ret;
+ SNAP_LOG(ERROR) << "Merge: io_uring_wait_cqe failed: " << strerror(-ret);
status = false;
break;
}
@@ -546,6 +549,9 @@
bool MergeWorker::Run() {
SNAP_LOG(DEBUG) << "Waiting for merge begin...";
+
+ pthread_setname_np(pthread_self(), "MergeWorker");
+
if (!snapuserd_->WaitForMergeBegin()) {
SNAP_LOG(ERROR) << "Merge terminated early...";
return true;
diff --git a/fs_mgr/libsnapshot/snapuserd/user-space-merge/read_worker.cpp b/fs_mgr/libsnapshot/snapuserd/user-space-merge/read_worker.cpp
index e2c292b..b9ecfa5 100644
--- a/fs_mgr/libsnapshot/snapuserd/user-space-merge/read_worker.cpp
+++ b/fs_mgr/libsnapshot/snapuserd/user-space-merge/read_worker.cpp
@@ -16,6 +16,8 @@
#include "read_worker.h"
+#include <pthread.h>
+
#include "snapuserd_core.h"
#include "utility.h"
@@ -209,6 +211,8 @@
bool ReadWorker::Run() {
SNAP_LOG(INFO) << "Processing snapshot I/O requests....";
+ pthread_setname_np(pthread_self(), "ReadWorker");
+
if (!SetThreadPriority(kNiceValueForMergeThreads)) {
SNAP_PLOG(ERROR) << "Failed to set thread priority";
}
diff --git a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_core.cpp b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_core.cpp
index 5a82ca1..c295851 100644
--- a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_core.cpp
+++ b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_core.cpp
@@ -295,8 +295,6 @@
if (ra_thread_) {
ra_thread_status =
std::async(std::launch::async, &ReadAhead::RunThread, read_ahead_thread_.get());
-
- SNAP_LOG(INFO) << "Read-ahead thread started";
}
// Launch worker threads
diff --git a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_core.h b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_core.h
index a45e0bc..622fc50 100644
--- a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_core.h
+++ b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_core.h
@@ -28,6 +28,7 @@
#include <iostream>
#include <limits>
#include <mutex>
+#include <ostream>
#include <string>
#include <thread>
#include <unordered_map>
@@ -68,12 +69,13 @@
#define SNAP_PLOG(level) PLOG(level) << misc_name_ << ": "
enum class MERGE_IO_TRANSITION {
+ INVALID,
MERGE_READY,
MERGE_BEGIN,
MERGE_FAILED,
MERGE_COMPLETE,
IO_TERMINATED,
- READ_AHEAD_FAILURE,
+ READ_AHEAD_FAILURE
};
class MergeWorker;
@@ -220,7 +222,7 @@
bool populate_data_from_cow_ = false;
bool ra_thread_ = false;
int total_ra_blocks_merged_ = 0;
- MERGE_IO_TRANSITION io_state_;
+ MERGE_IO_TRANSITION io_state_ = MERGE_IO_TRANSITION::INVALID;
std::unique_ptr<ReadAhead> read_ahead_thread_;
std::unordered_map<uint64_t, void*> read_ahead_buffer_map_;
@@ -246,5 +248,7 @@
std::shared_ptr<IBlockServerOpener> block_server_opener_;
};
+std::ostream& operator<<(std::ostream& os, MERGE_IO_TRANSITION value);
+
} // namespace snapshot
} // namespace android
diff --git a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_readahead.cpp b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_readahead.cpp
index 8b799ea..d2128c5 100644
--- a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_readahead.cpp
+++ b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_readahead.cpp
@@ -16,6 +16,8 @@
#include "snapuserd_readahead.h"
+#include <pthread.h>
+
#include "snapuserd_core.h"
#include "utility.h"
@@ -428,7 +430,7 @@
// will fallback to synchronous I/O.
int ret = io_uring_wait_cqe(ring_.get(), &cqe);
if (ret) {
- SNAP_LOG(ERROR) << "Read-ahead - io_uring_wait_cqe failed: " << ret;
+ SNAP_LOG(ERROR) << "Read-ahead - io_uring_wait_cqe failed: " << strerror(-ret);
status = false;
break;
}
@@ -691,6 +693,7 @@
// window. If there is a crash during this time frame, merge should resume
// based on the contents of the scratch space.
if (!snapuserd_->WaitForMergeReady()) {
+ SNAP_LOG(ERROR) << "ReadAhead failed to wait for merge ready";
return false;
}
@@ -752,6 +755,10 @@
}
bool ReadAhead::RunThread() {
+ SNAP_LOG(INFO) << "ReadAhead thread started.";
+
+ pthread_setname_np(pthread_self(), "ReadAhead");
+
if (!InitializeFds()) {
return false;
}
@@ -770,6 +777,7 @@
SNAP_PLOG(ERROR) << "Failed to set thread priority";
}
+ SNAP_LOG(INFO) << "ReadAhead processing.";
while (!RAIterDone()) {
if (!ReadAheadIOStart()) {
break;
@@ -780,7 +788,7 @@
CloseFds();
reader_->CloseCowFd();
- SNAP_LOG(INFO) << " ReadAhead thread terminating....";
+ SNAP_LOG(INFO) << " ReadAhead thread terminating.";
return true;
}
diff --git a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_transitions.cpp b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_transitions.cpp
index 28c9f68..52e4f89 100644
--- a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_transitions.cpp
+++ b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_transitions.cpp
@@ -199,6 +199,7 @@
if (io_state_ == MERGE_IO_TRANSITION::READ_AHEAD_FAILURE ||
io_state_ == MERGE_IO_TRANSITION::IO_TERMINATED) {
+ SNAP_LOG(ERROR) << "WaitForMergeBegin failed with state: " << io_state_;
return false;
}
}
@@ -211,6 +212,7 @@
if (io_state_ == MERGE_IO_TRANSITION::READ_AHEAD_FAILURE ||
io_state_ == MERGE_IO_TRANSITION::IO_TERMINATED) {
+ SNAP_LOG(ERROR) << "WaitForMergeBegin failed with state: " << io_state_;
return false;
}
@@ -277,6 +279,7 @@
if (io_state_ == MERGE_IO_TRANSITION::MERGE_FAILED ||
io_state_ == MERGE_IO_TRANSITION::MERGE_COMPLETE ||
io_state_ == MERGE_IO_TRANSITION::IO_TERMINATED) {
+ SNAP_LOG(ERROR) << "Wait for merge ready failed: " << io_state_;
return false;
}
return true;
@@ -668,5 +671,26 @@
}
}
+std::ostream& operator<<(std::ostream& os, MERGE_IO_TRANSITION value) {
+ switch (value) {
+ case MERGE_IO_TRANSITION::INVALID:
+ return os << "INVALID";
+ case MERGE_IO_TRANSITION::MERGE_READY:
+ return os << "MERGE_READY";
+ case MERGE_IO_TRANSITION::MERGE_BEGIN:
+ return os << "MERGE_BEGIN";
+ case MERGE_IO_TRANSITION::MERGE_FAILED:
+ return os << "MERGE_FAILED";
+ case MERGE_IO_TRANSITION::MERGE_COMPLETE:
+ return os << "MERGE_COMPLETE";
+ case MERGE_IO_TRANSITION::IO_TERMINATED:
+ return os << "IO_TERMINATED";
+ case MERGE_IO_TRANSITION::READ_AHEAD_FAILURE:
+ return os << "READ_AHEAD_FAILURE";
+ default:
+ return os << "unknown";
+ }
+}
+
} // namespace snapshot
} // namespace android