libsnapshot: Get DaemonTransition test passing again.
This fixes a number of small bugs in libsnapshot. It also refactors the
handler list a bit. Previously, it was a list of unique_ptrs. Now it is
a list of shared_ptrs to simplify ownership.
Additionally, Snapuserd is now keyed solely on the misc device name.
This allows two identical snapshots to run in the same daemon, with
different control names (a scenario that comes up in the
DaemonTransition test). As part of this change, the two-stage
initialization process has been refactored slightly. The "init" message
sets all the device paths, and the "start" message needs only the misc
name.
Both the init and start messages now validate that no duplicate handlers
exist, and that we're not overwriting any previous thread.
This cleanup also fixes a bug in DmUserHandler cleanup - if a control
device shut down raced with WaitForDelete(), the std::thread object
would delete without a call to detach() or join(). In the new
RemoveHandler(), we now correctly detach() in this scenario.
This also fixes a bug where, if a COW had no partition component (it
only resided on /data), the second-stage transition would fail because
it used the wrong device-mapper name.
Bug: N/A
Test: vts_libsnapshot_test
Change-Id: Ib4a281a3b5fe665c727c7077672e3c6b0b3abdba
diff --git a/fs_mgr/libsnapshot/cow_snapuserd_test.cpp b/fs_mgr/libsnapshot/cow_snapuserd_test.cpp
index 7eddf8c..5483fd0 100644
--- a/fs_mgr/libsnapshot/cow_snapuserd_test.cpp
+++ b/fs_mgr/libsnapshot/cow_snapuserd_test.cpp
@@ -288,20 +288,20 @@
}
void SnapuserdTest::InitCowDevices() {
- system_blksize_ = client_->InitDmUserCow(cow_system_->path);
+ system_blksize_ = client_->InitDmUserCow(system_device_ctrl_name_, cow_system_->path,
+ system_a_loop_->device());
ASSERT_NE(system_blksize_, 0);
- product_blksize_ = client_->InitDmUserCow(cow_product_->path);
+ product_blksize_ = client_->InitDmUserCow(product_device_ctrl_name_, cow_product_->path,
+ product_a_loop_->device());
ASSERT_NE(product_blksize_, 0);
}
void SnapuserdTest::InitDaemon() {
- bool ok = client_->InitializeSnapuserd(cow_system_->path, system_a_loop_->device(),
- GetSystemControlPath());
+ bool ok = client_->AttachDmUser(system_device_ctrl_name_);
ASSERT_TRUE(ok);
- ok = client_->InitializeSnapuserd(cow_product_->path, product_a_loop_->device(),
- GetProductControlPath());
+ ok = client_->AttachDmUser(product_device_ctrl_name_);
ASSERT_TRUE(ok);
}
diff --git a/fs_mgr/libsnapshot/include/libsnapshot/snapuserd.h b/fs_mgr/libsnapshot/include/libsnapshot/snapuserd.h
index cd8b080..aa7dc40 100644
--- a/fs_mgr/libsnapshot/include/libsnapshot/snapuserd.h
+++ b/fs_mgr/libsnapshot/include/libsnapshot/snapuserd.h
@@ -61,12 +61,15 @@
class Snapuserd final {
public:
- bool InitBackingAndControlDevice(std::string& backing_device, std::string& control_device);
- bool InitCowDevice(std::string& cow_device);
+ Snapuserd(const std::string& misc_name, const std::string& cow_device,
+ const std::string& backing_device);
+ bool InitBackingAndControlDevice();
+ bool InitCowDevice();
int Run();
const std::string& GetControlDevicePath() { return control_device_; }
- const std::string& GetCowDevice() { return cow_device_; }
+ const std::string& GetMiscName() { return misc_name_; }
uint64_t GetNumSectors() { return num_sectors_; }
+ bool IsAttached() const { return ctrl_fd_ >= 0; }
private:
int ReadDmUserHeader();
@@ -96,6 +99,7 @@
std::string cow_device_;
std::string backing_store_device_;
std::string control_device_;
+ std::string misc_name_;
unique_fd cow_fd_;
unique_fd backing_store_fd_;
diff --git a/fs_mgr/libsnapshot/include/libsnapshot/snapuserd_client.h b/fs_mgr/libsnapshot/include/libsnapshot/snapuserd_client.h
index b5e5a96..0e9ba9e 100644
--- a/fs_mgr/libsnapshot/include/libsnapshot/snapuserd_client.h
+++ b/fs_mgr/libsnapshot/include/libsnapshot/snapuserd_client.h
@@ -58,9 +58,19 @@
std::chrono::milliseconds timeout_ms);
bool StopSnapuserd();
- uint64_t InitDmUserCow(const std::string& cow_device);
- bool InitializeSnapuserd(const std::string& cow_device, const std::string& backing_device,
- const std::string& control_device);
+
+ // Initializing a snapuserd handler is a three-step process:
+ //
+ // 1. Client invokes InitDmUserCow. This creates the snapuserd handler and validates the
+ // COW. The number of sectors required for the dm-user target is returned.
+ // 2. Client creates the device-mapper device with the dm-user target.
+ // 3. Client calls AttachControlDevice.
+ //
+ // The misc_name must be the "misc_name" given to dm-user in step 2.
+ //
+ uint64_t InitDmUserCow(const std::string& misc_name, const std::string& cow_device,
+ const std::string& backing_device);
+ bool AttachDmUser(const std::string& misc_name);
// Wait for snapuserd to disassociate with a dm-user control device. This
// must ONLY be called if the control device has already been deleted.
diff --git a/fs_mgr/libsnapshot/include/libsnapshot/snapuserd_server.h b/fs_mgr/libsnapshot/include/libsnapshot/snapuserd_server.h
index be48400..cadfd71 100644
--- a/fs_mgr/libsnapshot/include/libsnapshot/snapuserd_server.h
+++ b/fs_mgr/libsnapshot/include/libsnapshot/snapuserd_server.h
@@ -56,7 +56,7 @@
const std::unique_ptr<Snapuserd>& snapuserd() const { return snapuserd_; }
std::thread& thread() { return thread_; }
- const std::string& GetControlDevice() const;
+ const std::string& GetMiscName() const;
};
class Stoppable {
@@ -85,7 +85,9 @@
std::vector<struct pollfd> watched_fds_;
std::mutex lock_;
- std::vector<std::unique_ptr<DmUserHandler>> dm_users_;
+
+ using HandlerList = std::vector<std::shared_ptr<DmUserHandler>>;
+ HandlerList dm_users_;
void AddWatchedFd(android::base::borrowed_fd fd);
void AcceptClient();
@@ -95,7 +97,7 @@
bool Receivemsg(android::base::borrowed_fd fd, const std::string& str);
void ShutdownThreads();
- bool WaitForDelete(const std::string& control_device);
+ bool RemoveHandler(const std::string& control_device, bool wait);
DaemonOperations Resolveop(std::string& input);
std::string GetDaemonStatus();
void Parsemsg(std::string const& msg, const char delim, std::vector<std::string>& out);
@@ -103,11 +105,11 @@
void SetTerminating() { terminating_ = true; }
bool IsTerminating() { return terminating_; }
- void RunThread(DmUserHandler* handler);
+ void RunThread(std::shared_ptr<DmUserHandler> handler);
- // Remove a DmUserHandler from dm_users_, searching by its control device.
- // If none is found, return nullptr.
- std::unique_ptr<DmUserHandler> RemoveHandler(const std::string& control_device);
+ // Find a DmUserHandler within a lock.
+ HandlerList::iterator FindHandler(std::lock_guard<std::mutex>* proof_of_lock,
+ const std::string& misc_name);
public:
SnapuserdServer() { terminating_ = false; }
diff --git a/fs_mgr/libsnapshot/snapshot.cpp b/fs_mgr/libsnapshot/snapshot.cpp
index b79dbf1..6595707 100644
--- a/fs_mgr/libsnapshot/snapshot.cpp
+++ b/fs_mgr/libsnapshot/snapshot.cpp
@@ -397,7 +397,7 @@
return false;
}
- uint64_t base_sectors = snapuserd_client_->InitDmUserCow(cow_file);
+ uint64_t base_sectors = snapuserd_client_->InitDmUserCow(misc_name, cow_file, base_device);
if (base_sectors == 0) {
LOG(ERROR) << "Failed to retrieve base_sectors from Snapuserd";
return false;
@@ -410,7 +410,12 @@
}
auto control_device = "/dev/dm-user/" + misc_name;
- return snapuserd_client_->InitializeSnapuserd(cow_file, base_device, control_device);
+ if (!android::fs_mgr::WaitForFile(control_device, timeout_ms)) {
+ LOG(ERROR) << "Timed out waiting for dm-user misc device: " << control_device;
+ return false;
+ }
+
+ return snapuserd_client_->AttachDmUser(misc_name);
}
bool SnapshotManager::MapSnapshot(LockedFile* lock, const std::string& name,
@@ -1310,28 +1315,34 @@
size_t num_cows = 0;
size_t ok_cows = 0;
for (const auto& snapshot : snapshots) {
- std::string cow_name = GetDmUserCowName(snapshot);
- if (dm.GetState(cow_name) == DmDeviceState::INVALID) {
+ std::string user_cow_name = GetDmUserCowName(snapshot);
+ if (dm.GetState(user_cow_name) == DmDeviceState::INVALID) {
continue;
}
DeviceMapper::TargetInfo target;
- if (!GetSingleTarget(cow_name, TableQuery::Table, &target)) {
+ if (!GetSingleTarget(user_cow_name, TableQuery::Table, &target)) {
continue;
}
auto target_type = DeviceMapper::GetTargetType(target.spec);
if (target_type != "user") {
- LOG(ERROR) << "Unexpected target type for " << cow_name << ": " << target_type;
+ LOG(ERROR) << "Unexpected target type for " << user_cow_name << ": " << target_type;
continue;
}
num_cows++;
+ SnapshotStatus snapshot_status;
+ if (!ReadSnapshotStatus(lock.get(), snapshot, &snapshot_status)) {
+ LOG(ERROR) << "Unable to read snapshot status: " << snapshot;
+ continue;
+ }
+
DmTable table;
- table.Emplace<DmTargetUser>(0, target.spec.length, cow_name);
- if (!dm.LoadTableAndActivate(cow_name, table)) {
- LOG(ERROR) << "Unable to swap tables for " << cow_name;
+ table.Emplace<DmTargetUser>(0, target.spec.length, user_cow_name);
+ if (!dm.LoadTableAndActivate(user_cow_name, table)) {
+ LOG(ERROR) << "Unable to swap tables for " << user_cow_name;
continue;
}
@@ -1341,20 +1352,30 @@
continue;
}
- std::string cow_device;
- if (!dm.GetDmDevicePathByName(GetCowName(snapshot), &cow_device)) {
- LOG(ERROR) << "Could not get device path for " << GetCowName(snapshot);
+ // If no partition was created (the COW exists entirely on /data), the
+ // device-mapper layering is different than if we had a partition.
+ std::string cow_image_name;
+ if (snapshot_status.cow_partition_size() == 0) {
+ cow_image_name = GetCowImageDeviceName(snapshot);
+ } else {
+ cow_image_name = GetCowName(snapshot);
+ }
+
+ std::string cow_image_device;
+ if (!dm.GetDmDevicePathByName(cow_image_name, &cow_image_device)) {
+ LOG(ERROR) << "Could not get device path for " << cow_image_name;
continue;
}
// Wait for ueventd to acknowledge and create the control device node.
- std::string control_device = "/dev/dm-user/" + cow_name;
+ std::string control_device = "/dev/dm-user/" + user_cow_name;
if (!android::fs_mgr::WaitForFile(control_device, 10s)) {
LOG(ERROR) << "Could not find control device: " << control_device;
continue;
}
- uint64_t base_sectors = snapuserd_client_->InitDmUserCow(cow_device);
+ uint64_t base_sectors =
+ snapuserd_client_->InitDmUserCow(user_cow_name, cow_image_device, backing_device);
if (base_sectors == 0) {
// Unrecoverable as metadata reads from cow device failed
LOG(FATAL) << "Failed to retrieve base_sectors from Snapuserd";
@@ -1363,10 +1384,10 @@
CHECK(base_sectors == target.spec.length);
- if (!snapuserd_client_->InitializeSnapuserd(cow_device, backing_device, control_device)) {
+ if (!snapuserd_client_->AttachDmUser(user_cow_name)) {
// This error is unrecoverable. We cannot proceed because reads to
// the underlying device will fail.
- LOG(FATAL) << "Could not initialize snapuserd for " << cow_name;
+ LOG(FATAL) << "Could not initialize snapuserd for " << user_cow_name;
return false;
}
@@ -2053,13 +2074,13 @@
return false;
}
- auto control_device = "/dev/dm-user/" + dm_user_name;
- if (!snapuserd_client_->WaitForDeviceDelete(control_device)) {
+ if (!snapuserd_client_->WaitForDeviceDelete(dm_user_name)) {
LOG(ERROR) << "Failed to wait for " << dm_user_name << " control device to delete";
return false;
}
// Ensure the control device is gone so we don't run into ABA problems.
+ auto control_device = "/dev/dm-user/" + dm_user_name;
if (!android::fs_mgr::WaitForFileDeleted(control_device, 10s)) {
LOG(ERROR) << "Timed out waiting for " << control_device << " to unlink";
return false;
diff --git a/fs_mgr/libsnapshot/snapuserd.cpp b/fs_mgr/libsnapshot/snapuserd.cpp
index 7c393fc..80b5734 100644
--- a/fs_mgr/libsnapshot/snapuserd.cpp
+++ b/fs_mgr/libsnapshot/snapuserd.cpp
@@ -66,6 +66,14 @@
return header;
}
+Snapuserd::Snapuserd(const std::string& misc_name, const std::string& cow_device,
+ const std::string& backing_device) {
+ misc_name_ = misc_name;
+ cow_device_ = cow_device;
+ backing_store_device_ = backing_device;
+ control_device_ = "/dev/dm-user/" + misc_name;
+}
+
// Construct kernel COW header in memory
// This header will be in sector 0. The IO
// request will always be 4k. After constructing
@@ -672,9 +680,7 @@
return true;
}
-bool Snapuserd::InitCowDevice(std::string& cow_device) {
- cow_device_ = cow_device;
-
+bool Snapuserd::InitCowDevice() {
cow_fd_.reset(open(cow_device_.c_str(), O_RDWR));
if (cow_fd_ < 0) {
PLOG(ERROR) << "Open Failed: " << cow_device_;
@@ -691,11 +697,7 @@
return ReadMetadata();
}
-bool Snapuserd::InitBackingAndControlDevice(std::string& backing_device,
- std::string& control_device) {
- backing_store_device_ = backing_device;
- control_device_ = control_device;
-
+bool Snapuserd::InitBackingAndControlDevice() {
backing_store_fd_.reset(open(backing_store_device_.c_str(), O_RDONLY));
if (backing_store_fd_ < 0) {
PLOG(ERROR) << "Open Failed: " << backing_store_device_;
diff --git a/fs_mgr/libsnapshot/snapuserd_client.cpp b/fs_mgr/libsnapshot/snapuserd_client.cpp
index d7fdb43..a5d2061 100644
--- a/fs_mgr/libsnapshot/snapuserd_client.cpp
+++ b/fs_mgr/libsnapshot/snapuserd_client.cpp
@@ -183,10 +183,8 @@
return true;
}
-bool SnapuserdClient::InitializeSnapuserd(const std::string& cow_device,
- const std::string& backing_device,
- const std::string& control_device) {
- std::string msg = "start," + cow_device + "," + backing_device + "," + control_device;
+bool SnapuserdClient::AttachDmUser(const std::string& misc_name) {
+ std::string msg = "start," + misc_name;
if (!Sendmsg(msg)) {
LOG(ERROR) << "Failed to send message " << msg << " to snapuserd daemon";
return false;
@@ -202,8 +200,10 @@
return true;
}
-uint64_t SnapuserdClient::InitDmUserCow(const std::string& cow_device) {
- std::string msg = "init," + cow_device;
+uint64_t SnapuserdClient::InitDmUserCow(const std::string& misc_name, const std::string& cow_device,
+ const std::string& backing_device) {
+ std::vector<std::string> parts = {"init", misc_name, cow_device, backing_device};
+ std::string msg = android::base::Join(parts, ",");
if (!Sendmsg(msg)) {
LOG(ERROR) << "Failed to send message " << msg << " to snapuserd daemon";
return 0;
@@ -213,7 +213,7 @@
std::vector<std::string> input = android::base::Split(str, ",");
- if (input[0] != "success") {
+ if (input.empty() || input[0] != "success") {
LOG(ERROR) << "Failed to receive number of sectors for " << msg << " from snapuserd daemon";
return 0;
}
diff --git a/fs_mgr/libsnapshot/snapuserd_server.cpp b/fs_mgr/libsnapshot/snapuserd_server.cpp
index 3aa6136..9d065b6 100644
--- a/fs_mgr/libsnapshot/snapuserd_server.cpp
+++ b/fs_mgr/libsnapshot/snapuserd_server.cpp
@@ -74,7 +74,7 @@
StopThreads();
// Acquire the thread list within the lock.
- std::vector<std::unique_ptr<DmUserHandler>> dm_users;
+ std::vector<std::shared_ptr<DmUserHandler>> dm_users;
{
std::lock_guard<std::mutex> guard(lock_);
dm_users = std::move(dm_users_);
@@ -87,8 +87,8 @@
}
}
-const std::string& DmUserHandler::GetControlDevice() const {
- return snapuserd_->GetControlDevicePath();
+const std::string& DmUserHandler::GetMiscName() const {
+ return snapuserd_->GetMiscName();
}
bool SnapuserdServer::Sendmsg(android::base::borrowed_fd fd, const std::string& msg) {
@@ -126,16 +126,16 @@
switch (op) {
case DaemonOperations::INIT: {
// Message format:
- // init,<cow_device_path>
+ // init,<misc_name>,<cow_device_path>,<control_device>
//
// Reads the metadata and send the number of sectors
- if (out.size() != 2) {
+ if (out.size() != 4) {
LOG(ERROR) << "Malformed init message, " << out.size() << " parts";
return Sendmsg(fd, "fail");
}
- auto snapuserd = std::make_unique<Snapuserd>();
- if (!snapuserd->InitCowDevice(out[1])) {
+ auto snapuserd = std::make_unique<Snapuserd>(out[1], out[2], out[3]);
+ if (!snapuserd->InitCowDevice()) {
LOG(ERROR) << "Failed to initialize Snapuserd";
return Sendmsg(fd, "fail");
}
@@ -145,6 +145,10 @@
auto handler = std::make_unique<DmUserHandler>(std::move(snapuserd));
{
std::lock_guard<std::mutex> lock(lock_);
+ if (FindHandler(&lock, out[1]) != dm_users_.end()) {
+ LOG(ERROR) << "Handler already exists: " << out[1];
+ return Sendmsg(fd, "fail");
+ }
dm_users_.push_back(std::move(handler));
}
@@ -152,37 +156,30 @@
}
case DaemonOperations::START: {
// Message format:
- // start,<cow_device_path>,<source_device_path>,<control_device>
+ // start,<misc_name>
//
// Start the new thread which binds to dm-user misc device
- if (out.size() != 4) {
+ if (out.size() != 2) {
LOG(ERROR) << "Malformed start message, " << out.size() << " parts";
return Sendmsg(fd, "fail");
}
- bool found = false;
- {
- std::lock_guard<std::mutex> lock(lock_);
- auto iter = dm_users_.begin();
- while (iter != dm_users_.end()) {
- if ((*iter)->snapuserd()->GetCowDevice() == out[1]) {
- if (!((*iter)->snapuserd()->InitBackingAndControlDevice(out[2], out[3]))) {
- LOG(ERROR) << "Failed to initialize control device: " << out[3];
- break;
- }
- (*iter)->thread() = std::thread(
- std::bind(&SnapuserdServer::RunThread, this, (*iter).get()));
- found = true;
- break;
- }
- iter++;
- }
- }
- if (found) {
- return Sendmsg(fd, "success");
- } else {
+ std::lock_guard<std::mutex> lock(lock_);
+ auto iter = FindHandler(&lock, out[1]);
+ if (iter == dm_users_.end()) {
+ LOG(ERROR) << "Could not find handler: " << out[1];
return Sendmsg(fd, "fail");
}
+ if ((*iter)->snapuserd()->IsAttached()) {
+ LOG(ERROR) << "Tried to re-attach control device: " << out[1];
+ return Sendmsg(fd, "fail");
+ }
+ if (!((*iter)->snapuserd()->InitBackingAndControlDevice())) {
+ LOG(ERROR) << "Failed to initialize control device: " << out[1];
+ return Sendmsg(fd, "fail");
+ }
+ (*iter)->thread() = std::thread(std::bind(&SnapuserdServer::RunThread, this, *iter));
+ return Sendmsg(fd, "success");
}
case DaemonOperations::STOP: {
// Message format: stop
@@ -207,12 +204,12 @@
}
case DaemonOperations::DELETE: {
// Message format:
- // delete,<control_device_path>
+ // delete,<misc_name>
if (out.size() != 2) {
LOG(ERROR) << "Malformed delete message, " << out.size() << " parts";
return Sendmsg(fd, "fail");
}
- if (!WaitForDelete(out[1])) {
+ if (!RemoveHandler(out[1], true)) {
return Sendmsg(fd, "fail");
}
return Sendmsg(fd, "success");
@@ -225,8 +222,8 @@
}
}
-void SnapuserdServer::RunThread(DmUserHandler* handler) {
- LOG(INFO) << "Entering thread for handler: " << handler->GetControlDevice();
+void SnapuserdServer::RunThread(std::shared_ptr<DmUserHandler> handler) {
+ LOG(INFO) << "Entering thread for handler: " << handler->GetMiscName();
while (!StopRequested()) {
if (handler->snapuserd()->Run() < 0) {
@@ -235,15 +232,11 @@
}
}
- LOG(INFO) << "Exiting thread for handler: " << handler->GetControlDevice();
+ LOG(INFO) << "Exiting thread for handler: " << handler->GetMiscName();
- if (auto client = RemoveHandler(handler->GetControlDevice())) {
- // The main thread did not receive a WaitForDelete request for this
- // control device. Since we transferred ownership within the lock,
- // we know join() was never called, and will never be called. We can
- // safely detach here.
- client->thread().detach();
- }
+ // If the main thread called /emoveHandler, the handler was already removed
+ // from within the lock, and calling RemoveHandler again has no effect.
+ RemoveHandler(handler->GetMiscName(), false);
}
bool SnapuserdServer::Start(const std::string& socketname) {
@@ -336,34 +329,37 @@
SetTerminating();
}
-std::unique_ptr<DmUserHandler> SnapuserdServer::RemoveHandler(const std::string& control_device) {
- std::unique_ptr<DmUserHandler> client;
- {
- std::lock_guard<std::mutex> lock(lock_);
- auto iter = dm_users_.begin();
- while (iter != dm_users_.end()) {
- if ((*iter)->GetControlDevice() == control_device) {
- client = std::move(*iter);
- iter = dm_users_.erase(iter);
- break;
- }
- iter++;
+auto SnapuserdServer::FindHandler(std::lock_guard<std::mutex>* proof_of_lock,
+ const std::string& misc_name) -> HandlerList::iterator {
+ CHECK(proof_of_lock);
+
+ for (auto iter = dm_users_.begin(); iter != dm_users_.end(); iter++) {
+ if ((*iter)->GetMiscName() == misc_name) {
+ return iter;
}
}
- return client;
+ return dm_users_.end();
}
-bool SnapuserdServer::WaitForDelete(const std::string& control_device) {
- auto client = RemoveHandler(control_device);
+bool SnapuserdServer::RemoveHandler(const std::string& misc_name, bool wait) {
+ std::shared_ptr<DmUserHandler> handler;
+ {
+ std::lock_guard<std::mutex> lock(lock_);
- // Client already deleted.
- if (!client) {
- return true;
+ auto iter = FindHandler(&lock, misc_name);
+ if (iter == dm_users_.end()) {
+ // Client already deleted.
+ return true;
+ }
+ handler = std::move(*iter);
+ dm_users_.erase(iter);
}
- auto& th = client->thread();
- if (th.joinable()) {
+ auto& th = handler->thread();
+ if (th.joinable() && wait) {
th.join();
+ } else if (handler->snapuserd()->IsAttached()) {
+ th.detach();
}
return true;
}