libsnapshot: No transition of snapuserd during second stage init
When there is a transition of daemon from selinux stage, we observe
intermittent hangs during OTA. This is a workaround wherein
we don't do the transition and allow the daemon to continue which
was spawned during selinux stage.
Bug: 179331261
Test: Incremental OTA, full OTA on pixel
Signed-off-by: Akilesh Kailash <akailash@google.com>
Change-Id: I622a0ed8afcd404bac4919b1de00728de2c12eaf
diff --git a/fs_mgr/libsnapshot/snapshot.cpp b/fs_mgr/libsnapshot/snapshot.cpp
index 90225f8..3485474 100644
--- a/fs_mgr/libsnapshot/snapshot.cpp
+++ b/fs_mgr/libsnapshot/snapshot.cpp
@@ -1234,6 +1234,25 @@
return true;
}
+static bool DeleteDmDevice(const std::string& name, const std::chrono::milliseconds& timeout_ms) {
+ auto start = std::chrono::steady_clock::now();
+ auto& dm = DeviceMapper::Instance();
+ while (true) {
+ if (dm.DeleteDeviceIfExists(name)) {
+ break;
+ }
+ auto now = std::chrono::steady_clock::now();
+ auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(now - start);
+ if (elapsed >= timeout_ms) {
+ LOG(ERROR) << "DeleteDevice timeout: " << name;
+ return false;
+ }
+ std::this_thread::sleep_for(250ms);
+ }
+
+ return true;
+}
+
bool SnapshotManager::CollapseSnapshotDevice(const std::string& name,
const SnapshotStatus& status) {
auto& dm = DeviceMapper::Instance();
@@ -1292,10 +1311,11 @@
if (!dm.DeleteDeviceIfExists(base_name)) {
LOG(ERROR) << "Unable to delete base device for snapshot: " << base_name;
}
- auto source_name = GetSourceDeviceName(name);
- if (!dm.DeleteDeviceIfExists(source_name)) {
- LOG(ERROR) << "Unable to delete source device for snapshot: " << source_name;
+
+ if (!DeleteDmDevice(GetSourceDeviceName(name), 4000ms)) {
+ LOG(ERROR) << "Unable to delete source device for snapshot: " << GetSourceDeviceName(name);
}
+
return true;
}
@@ -1387,9 +1407,6 @@
}
auto misc_name = user_cow_name;
- if (transition == InitTransition::SELINUX_DETACH) {
- misc_name += "-selinux";
- }
DmTable table;
table.Emplace<DmTargetUser>(0, target.spec.length, misc_name);
@@ -2122,15 +2139,12 @@
CHECK(lock);
if (!EnsureImageManager()) return false;
- auto& dm = DeviceMapper::Instance();
-
if (UpdateUsesCompression(lock) && !UnmapDmUserDevice(name)) {
return false;
}
- auto cow_name = GetCowName(name);
- if (!dm.DeleteDeviceIfExists(cow_name)) {
- LOG(ERROR) << "Cannot unmap " << cow_name;
+ if (!DeleteDmDevice(GetCowName(name), 4000ms)) {
+ LOG(ERROR) << "Cannot unmap: " << GetCowName(name);
return false;
}
@@ -2155,12 +2169,11 @@
return false;
}
- if (!EnsureSnapuserdConnected()) {
- return false;
- }
- if (!snapuserd_client_->WaitForDeviceDelete(dm_user_name)) {
- LOG(ERROR) << "Failed to wait for " << dm_user_name << " control device to delete";
- return false;
+ if (EnsureSnapuserdConnected()) {
+ if (!snapuserd_client_->WaitForDeviceDelete(dm_user_name)) {
+ LOG(ERROR) << "Failed to wait for " << dm_user_name << " control device to delete";
+ return false;
+ }
}
// Ensure the control device is gone so we don't run into ABA problems.
diff --git a/fs_mgr/libsnapshot/snapuserd.cpp b/fs_mgr/libsnapshot/snapuserd.cpp
index 82db0d3..057acad 100644
--- a/fs_mgr/libsnapshot/snapuserd.cpp
+++ b/fs_mgr/libsnapshot/snapuserd.cpp
@@ -534,7 +534,7 @@
bool prev_copy_op = false;
bool metadata_found = false;
- SNAP_LOG(DEBUG) << "ReadMetadata Start...";
+ SNAP_LOG(DEBUG) << "ReadMetadata: Parsing cow file";
if (!reader_->Parse(cow_fd_)) {
SNAP_LOG(ERROR) << "Failed to parse";
diff --git a/fs_mgr/libsnapshot/snapuserd.h b/fs_mgr/libsnapshot/snapuserd.h
index c01fee3..4587881 100644
--- a/fs_mgr/libsnapshot/snapuserd.h
+++ b/fs_mgr/libsnapshot/snapuserd.h
@@ -70,6 +70,11 @@
const std::string& GetMiscName() { return misc_name_; }
uint64_t GetNumSectors() { return num_sectors_; }
bool IsAttached() const { return ctrl_fd_ >= 0; }
+ void CloseFds() {
+ ctrl_fd_ = {};
+ cow_fd_ = {};
+ backing_store_fd_ = {};
+ }
private:
bool DmuserReadRequest();
diff --git a/fs_mgr/libsnapshot/snapuserd_server.cpp b/fs_mgr/libsnapshot/snapuserd_server.cpp
index 38abaec..68a00a0 100644
--- a/fs_mgr/libsnapshot/snapuserd_server.cpp
+++ b/fs_mgr/libsnapshot/snapuserd_server.cpp
@@ -210,6 +210,8 @@
}
}
+ handler->snapuserd()->CloseFds();
+
auto misc_name = handler->misc_name();
LOG(INFO) << "Handler thread about to exit: " << misc_name;
diff --git a/init/init.cpp b/init/init.cpp
index 0c752a9..b08037a 100644
--- a/init/init.cpp
+++ b/init/init.cpp
@@ -723,37 +723,6 @@
}
}
-static Result<void> TransitionSnapuserdAction(const BuiltinArguments&) {
- if (!SnapshotManager::IsSnapshotManagerNeeded() ||
- !android::base::GetBoolProperty(android::snapshot::kVirtualAbCompressionProp, false)) {
- return {};
- }
-
- auto sm = SnapshotManager::New();
- if (!sm) {
- LOG(FATAL) << "Failed to create SnapshotManager, will not transition snapuserd";
- return {};
- }
-
- ServiceList& service_list = ServiceList::GetInstance();
- auto svc = service_list.FindService("snapuserd");
- if (!svc) {
- LOG(FATAL) << "Failed to find snapuserd service, aborting transition";
- return {};
- }
- svc->Start();
- svc->SetShutdownCritical();
-
- if (!sm->PerformSecondStageInitTransition()) {
- LOG(FATAL) << "Failed to transition snapuserd to second-stage";
- }
-
- if (auto pid = GetSnapuserdFirstStagePid()) {
- KillFirstStageSnapuserd(pid.value());
- }
- return {};
-}
-
int SecondStageMain(int argc, char** argv) {
if (REBOOT_BOOTLOADER_ON_PANIC) {
InstallRebootSignalHandlers();
@@ -900,7 +869,6 @@
// Queue an action that waits for coldboot done so we know ueventd has set up all of /dev...
am.QueueBuiltinAction(wait_for_coldboot_done_action, "wait_for_coldboot_done");
- am.QueueBuiltinAction(TransitionSnapuserdAction, "TransitionSnapuserd");
// ... so that we can start queuing up actions that require stuff from /dev.
am.QueueBuiltinAction(SetMmapRndBitsAction, "SetMmapRndBits");
Keychords keychords;