libsnapshot: Add diagnostics for DM_DEV_REMOVE failures.
Example log line:
update_engine: Block device was lazily unmounted and is still in-use:
/dev/block/dm-28; possibly open file descriptor or attached loop device.
This will help diagnose bugs such as b/184715543 in the future.
Bug: N/A
Test: manual test
Change-Id: Ia6b17fe9bd1796d59be7fc0b355218509acfd4af
diff --git a/fastboot/Android.bp b/fastboot/Android.bp
index 43b2ddd..ce702a0 100644
--- a/fastboot/Android.bp
+++ b/fastboot/Android.bp
@@ -183,6 +183,7 @@
],
static_libs: [
+ "libc++fs",
"libgtest_prod",
"libhealthhalutils",
"libsnapshot_cow",
diff --git a/fs_mgr/libsnapshot/Android.bp b/fs_mgr/libsnapshot/Android.bp
index ea92d25..3cb4123 100644
--- a/fs_mgr/libsnapshot/Android.bp
+++ b/fs_mgr/libsnapshot/Android.bp
@@ -264,6 +264,7 @@
"android.hardware.boot@1.0",
"android.hardware.boot@1.1",
"libbrotli",
+ "libc++fs",
"libfs_mgr",
"libgsi",
"libgmock",
@@ -297,6 +298,7 @@
],
static_libs: [
"libbrotli",
+ "libc++fs",
"libfstab",
"libsnapshot",
"libsnapshot_cow",
@@ -326,6 +328,7 @@
"power_test.cpp",
],
static_libs: [
+ "libc++fs",
"libsnapshot",
"update_metadata-protos",
],
@@ -355,6 +358,7 @@
static_libs: [
"libbase",
"libbrotli",
+ "libc++fs",
"libchrome",
"libcrypto_static",
"libcutils",
@@ -416,7 +420,7 @@
"snapuserd_server.cpp",
"snapuserd.cpp",
"snapuserd_daemon.cpp",
- "snapuserd_worker.cpp",
+ "snapuserd_worker.cpp",
],
cflags: [
diff --git a/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h b/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h
index 7e74fac..126e1a0 100644
--- a/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h
+++ b/fs_mgr/libsnapshot/include/libsnapshot/snapshot.h
@@ -493,6 +493,9 @@
// Unmap a COW image device previously mapped with MapCowImage().
bool UnmapCowImage(const std::string& name);
+ // Unmap a COW and remove it from a MetadataBuilder.
+ void UnmapAndDeleteCowPartition(MetadataBuilder* current_metadata);
+
// Unmap and remove all known snapshots.
bool RemoveAllSnapshots(LockedFile* lock);
@@ -738,6 +741,10 @@
// Helper of UpdateUsesCompression
bool UpdateUsesCompression(LockedFile* lock);
+ // Wrapper around libdm, with diagnostics.
+ bool DeleteDeviceIfExists(const std::string& name,
+ const std::chrono::milliseconds& timeout_ms = {});
+
std::string gsid_dir_;
std::string metadata_dir_;
std::unique_ptr<IDeviceInfo> device_;
diff --git a/fs_mgr/libsnapshot/snapshot.cpp b/fs_mgr/libsnapshot/snapshot.cpp
index a0a1e4f..c504355 100644
--- a/fs_mgr/libsnapshot/snapshot.cpp
+++ b/fs_mgr/libsnapshot/snapshot.cpp
@@ -21,6 +21,7 @@
#include <sys/types.h>
#include <sys/unistd.h>
+#include <filesystem>
#include <optional>
#include <thread>
#include <unordered_set>
@@ -587,8 +588,7 @@
bool SnapshotManager::UnmapSnapshot(LockedFile* lock, const std::string& name) {
CHECK(lock);
- auto& dm = DeviceMapper::Instance();
- if (!dm.DeleteDeviceIfExists(name)) {
+ if (!DeleteDeviceIfExists(name)) {
LOG(ERROR) << "Could not delete snapshot device: " << name;
return false;
}
@@ -1252,25 +1252,6 @@
return true;
}
-static bool DeleteDmDevice(const std::string& name, const std::chrono::milliseconds& timeout_ms) {
- auto start = std::chrono::steady_clock::now();
- auto& dm = DeviceMapper::Instance();
- while (true) {
- if (dm.DeleteDeviceIfExists(name)) {
- break;
- }
- auto now = std::chrono::steady_clock::now();
- auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(now - start);
- if (elapsed >= timeout_ms) {
- LOG(ERROR) << "DeleteDevice timeout: " << name;
- return false;
- }
- std::this_thread::sleep_for(400ms);
- }
-
- return true;
-}
-
bool SnapshotManager::CollapseSnapshotDevice(const std::string& name,
const SnapshotStatus& status) {
auto& dm = DeviceMapper::Instance();
@@ -1326,11 +1307,11 @@
UnmapDmUserDevice(name);
}
auto base_name = GetBaseDeviceName(name);
- if (!dm.DeleteDeviceIfExists(base_name)) {
+ if (!DeleteDeviceIfExists(base_name)) {
LOG(ERROR) << "Unable to delete base device for snapshot: " << base_name;
}
- if (!DeleteDmDevice(GetSourceDeviceName(name), 4000ms)) {
+ if (!DeleteDeviceIfExists(GetSourceDeviceName(name), 4000ms)) {
LOG(ERROR) << "Unable to delete source device for snapshot: " << GetSourceDeviceName(name);
}
@@ -2083,15 +2064,14 @@
return false;
}
- auto& dm = DeviceMapper::Instance();
auto base_name = GetBaseDeviceName(target_partition_name);
- if (!dm.DeleteDeviceIfExists(base_name)) {
+ if (!DeleteDeviceIfExists(base_name)) {
LOG(ERROR) << "Cannot delete base device: " << base_name;
return false;
}
auto source_name = GetSourceDeviceName(target_partition_name);
- if (!dm.DeleteDeviceIfExists(source_name)) {
+ if (!DeleteDeviceIfExists(source_name)) {
LOG(ERROR) << "Cannot delete source device: " << source_name;
return false;
}
@@ -2181,7 +2161,7 @@
return false;
}
- if (!DeleteDmDevice(GetCowName(name), 4000ms)) {
+ if (!DeleteDeviceIfExists(GetCowName(name), 4000ms)) {
LOG(ERROR) << "Cannot unmap: " << GetCowName(name);
return false;
}
@@ -2202,7 +2182,7 @@
return true;
}
- if (!dm.DeleteDeviceIfExists(dm_user_name)) {
+ if (!DeleteDeviceIfExists(dm_user_name)) {
LOG(ERROR) << "Cannot unmap " << dm_user_name;
return false;
}
@@ -2593,11 +2573,10 @@
return true;
}
-static void UnmapAndDeleteCowPartition(MetadataBuilder* current_metadata) {
- auto& dm = DeviceMapper::Instance();
+void SnapshotManager::UnmapAndDeleteCowPartition(MetadataBuilder* current_metadata) {
std::vector<std::string> to_delete;
for (auto* existing_cow_partition : current_metadata->ListPartitionsInGroup(kCowGroupName)) {
- if (!dm.DeleteDeviceIfExists(existing_cow_partition->name())) {
+ if (!DeleteDeviceIfExists(existing_cow_partition->name())) {
LOG(WARNING) << existing_cow_partition->name()
<< " cannot be unmapped and its space cannot be reclaimed";
continue;
@@ -3626,5 +3605,71 @@
stats->set_estimated_cow_size_bytes(estimated_cow_size);
}
+bool SnapshotManager::DeleteDeviceIfExists(const std::string& name,
+ const std::chrono::milliseconds& timeout_ms) {
+ auto& dm = DeviceMapper::Instance();
+ auto start = std::chrono::steady_clock::now();
+ while (true) {
+ if (dm.DeleteDeviceIfExists(name)) {
+ return true;
+ }
+ auto now = std::chrono::steady_clock::now();
+ auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(now - start);
+ if (elapsed >= timeout_ms) {
+ break;
+ }
+ std::this_thread::sleep_for(400ms);
+ }
+
+ // Try to diagnose why this failed. First get the actual device path.
+ std::string full_path;
+ if (!dm.GetDmDevicePathByName(name, &full_path)) {
+ LOG(ERROR) << "Unable to diagnose DM_DEV_REMOVE failure.";
+ return false;
+ }
+
+ // Check for child dm-devices.
+ std::string block_name = android::base::Basename(full_path);
+ std::string sysfs_holders = "/sys/class/block/" + block_name + "/holders";
+
+ std::error_code ec;
+ std::filesystem::directory_iterator dir_iter(sysfs_holders, ec);
+ if (auto begin = std::filesystem::begin(dir_iter); begin != std::filesystem::end(dir_iter)) {
+ LOG(ERROR) << "Child device-mapper device still mapped: " << begin->path();
+ return false;
+ }
+
+ // Check for mounted partitions.
+ android::fs_mgr::Fstab fstab;
+ android::fs_mgr::ReadFstabFromFile("/proc/mounts", &fstab);
+ for (const auto& entry : fstab) {
+ if (android::base::Basename(entry.blk_device) == block_name) {
+ LOG(ERROR) << "Partition still mounted: " << entry.mount_point;
+ return false;
+ }
+ }
+
+ // Check for detached mounted partitions.
+ for (const auto& fs : std::filesystem::directory_iterator("/sys/fs", ec)) {
+ std::string fs_type = android::base::Basename(fs.path().c_str());
+ if (!(fs_type == "ext4" || fs_type == "f2fs")) {
+ continue;
+ }
+
+ std::string path = fs.path().c_str() + "/"s + block_name;
+ if (access(path.c_str(), F_OK) == 0) {
+ LOG(ERROR) << "Block device was lazily unmounted and is still in-use: " << full_path
+ << "; possibly open file descriptor or attached loop device.";
+ return false;
+ }
+ }
+
+ LOG(ERROR) << "Device-mapper device " << name << "(" << full_path << ")"
+ << " still in use."
+ << " Probably a file descriptor was leaked or held open, or a loop device is"
+ << " attached.";
+ return false;
+}
+
} // namespace snapshot
} // namespace android