Merge "Verify partitions using registered buffers" into main am: 95b890708d
Original change: https://android-review.googlesource.com/c/platform/system/core/+/3527289
Change-Id: Ie6b9b01c11615492f154417bee0105a93cddbab6
Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
diff --git a/fs_mgr/libsnapshot/snapuserd/Android.bp b/fs_mgr/libsnapshot/snapuserd/Android.bp
index 639116e..9972bc7 100644
--- a/fs_mgr/libsnapshot/snapuserd/Android.bp
+++ b/fs_mgr/libsnapshot/snapuserd/Android.bp
@@ -88,6 +88,7 @@
"libprocessgroup",
"libprocessgroup_util",
"libjsoncpp",
+ "liburing_cpp",
],
export_include_dirs: ["include"],
header_libs: [
@@ -136,6 +137,7 @@
"libext4_utils",
"liburing",
"libzstd",
+ "liburing_cpp",
],
header_libs: [
@@ -222,6 +224,7 @@
"libjsoncpp",
"liburing",
"libz",
+ "liburing_cpp",
],
include_dirs: [
".",
@@ -319,6 +322,7 @@
"libjsoncpp",
"liburing",
"libz",
+ "liburing_cpp",
],
include_dirs: [
".",
diff --git a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_verify.cpp b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_verify.cpp
index 957c6a8..97f8df4 100644
--- a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_verify.cpp
+++ b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_verify.cpp
@@ -22,6 +22,7 @@
#include "android-base/properties.h"
#include "snapuserd_core.h"
+#include "utility.h"
namespace android {
namespace snapshot {
@@ -104,43 +105,108 @@
return false;
}
- loff_t file_offset = offset;
- auto verify_block_size = android::base::GetUintProperty<uint>("ro.virtual_ab.verify_block_size",
- kBlockSizeVerify);
- const uint64_t read_sz = verify_block_size;
+ int queue_depth = std::max(queue_depth_, 1);
+ int verify_block_size = verify_block_size_;
- void* addr;
- ssize_t page_size = getpagesize();
- if (posix_memalign(&addr, page_size, read_sz) < 0) {
- SNAP_PLOG(ERROR) << "posix_memalign failed "
- << " page_size: " << page_size << " read_sz: " << read_sz;
+ // Smaller partitions don't need a bigger queue-depth.
+ // This is required for low-memory devices.
+ if (dev_sz < threshold_size_) {
+ queue_depth = std::max(queue_depth / 2, 1);
+ verify_block_size >>= 2;
+ }
+
+ if (!IsBlockAligned(verify_block_size)) {
+ verify_block_size = EXT4_ALIGN(verify_block_size, BLOCK_SZ);
+ }
+
+ std::unique_ptr<io_uring_cpp::IoUringInterface> ring =
+ io_uring_cpp::IoUringInterface::CreateLinuxIoUring(queue_depth, 0);
+ if (ring.get() == nullptr) {
+ PLOG(ERROR) << "Verify: io_uring_queue_init failed for queue_depth: " << queue_depth;
return false;
}
- std::unique_ptr<void, decltype(&::free)> buffer(addr, ::free);
-
- uint64_t bytes_read = 0;
-
- while (true) {
- size_t to_read = std::min((dev_sz - file_offset), read_sz);
-
- if (!android::base::ReadFullyAtOffset(fd.get(), buffer.get(), to_read, file_offset)) {
- SNAP_PLOG(ERROR) << "Failed to read block from block device: " << dm_block_device
- << " partition-name: " << partition_name
- << " at offset: " << file_offset << " read-size: " << to_read
- << " block-size: " << dev_sz;
+ std::unique_ptr<struct iovec[]> vecs = std::make_unique<struct iovec[]>(queue_depth);
+ std::vector<std::unique_ptr<void, decltype(&::free)>> buffers;
+ for (int i = 0; i < queue_depth; i++) {
+ void* addr;
+ ssize_t page_size = getpagesize();
+ if (posix_memalign(&addr, page_size, verify_block_size) < 0) {
+ LOG(ERROR) << "posix_memalign failed";
return false;
}
- bytes_read += to_read;
- file_offset += (skip_blocks * verify_block_size);
- if (file_offset >= dev_sz) {
+ buffers.emplace_back(addr, ::free);
+ vecs[i].iov_base = addr;
+ vecs[i].iov_len = verify_block_size;
+ }
+
+ auto ret = ring->RegisterBuffers(vecs.get(), queue_depth);
+ if (!ret.IsOk()) {
+ SNAP_LOG(ERROR) << "io_uring_register_buffers failed: " << ret.ErrCode();
+ return false;
+ }
+
+ loff_t file_offset = offset;
+ const uint64_t read_sz = verify_block_size;
+ uint64_t total_read = 0;
+ int num_submitted = 0;
+
+ SNAP_LOG(DEBUG) << "VerifyBlocks: queue_depth: " << queue_depth
+ << " verify_block_size: " << verify_block_size << " dev_sz: " << dev_sz
+ << " file_offset: " << file_offset << " skip_blocks: " << skip_blocks;
+
+ while (file_offset < dev_sz) {
+ for (size_t i = 0; i < queue_depth; i++) {
+ uint64_t to_read = std::min((dev_sz - file_offset), read_sz);
+ if (to_read <= 0) break;
+
+ const auto sqe =
+ ring->PrepReadFixed(fd.get(), vecs[i].iov_base, to_read, file_offset, i);
+ if (!sqe.IsOk()) {
+ SNAP_PLOG(ERROR) << "PrepReadFixed failed";
+ return false;
+ }
+ file_offset += (skip_blocks * to_read);
+ total_read += to_read;
+ num_submitted += 1;
+ if (file_offset >= dev_sz) {
+ break;
+ }
+ }
+
+ if (num_submitted == 0) {
break;
}
+
+ const auto io_submit = ring->SubmitAndWait(num_submitted);
+ if (!io_submit.IsOk()) {
+ SNAP_LOG(ERROR) << "SubmitAndWait failed: " << io_submit.ErrMsg()
+ << " for: " << num_submitted << " entries.";
+ return false;
+ }
+
+ SNAP_LOG(DEBUG) << "io_uring_submit: " << total_read << "num_submitted: " << num_submitted
+ << "ret: " << ret;
+
+ const auto cqes = ring->PopCQE(num_submitted);
+ if (cqes.IsErr()) {
+ SNAP_LOG(ERROR) << "PopCqe failed for: " << num_submitted
+ << " error: " << cqes.GetError().ErrMsg();
+ return false;
+ }
+ for (const auto& cqe : cqes.GetResult()) {
+ if (cqe.res < 0) {
+ SNAP_LOG(ERROR) << "I/O failed: cqe->res: " << cqe.res;
+ return false;
+ }
+ num_submitted -= 1;
+ }
}
- SNAP_LOG(DEBUG) << "Verification success with bytes-read: " << bytes_read
- << " dev_sz: " << dev_sz << " partition_name: " << partition_name;
+ SNAP_LOG(DEBUG) << "Verification success with io_uring: "
+ << " dev_sz: " << dev_sz << " partition_name: " << partition_name
+ << " total_read: " << total_read;
return true;
}
@@ -175,21 +241,14 @@
return false;
}
- /*
- * Not all partitions are of same size. Some partitions are as small as
- * 100Mb. We can just finish them in a single thread. For bigger partitions
- * such as product, 4 threads are sufficient enough.
- *
- * TODO: With io_uring SQ_POLL support, we can completely cut this
- * down to just single thread for all partitions and potentially verify all
- * the partitions with zero syscalls. Additionally, since block layer
- * supports polling, IO_POLL could be used which will further cut down
- * latency.
- */
+ if (!KernelSupportsIoUring()) {
+ SNAP_LOG(INFO) << "Kernel does not support io_uring. Skipping verification.\n";
+ // This will fallback to update_verifier to do the verification.
+ return false;
+ }
+
int num_threads = kMinThreadsToVerify;
- auto verify_threshold_size = android::base::GetUintProperty<uint>(
- "ro.virtual_ab.verify_threshold_size", kThresholdSize);
- if (dev_sz > verify_threshold_size) {
+ if (dev_sz > threshold_size_) {
num_threads = kMaxThreadsToVerify;
}
@@ -197,13 +256,11 @@
off_t start_offset = 0;
const int skip_blocks = num_threads;
- auto verify_block_size =
- android::base::GetUintProperty("ro.virtual_ab.verify_block_size", kBlockSizeVerify);
while (num_threads) {
threads.emplace_back(std::async(std::launch::async, &UpdateVerify::VerifyBlocks, this,
partition_name, dm_block_device, start_offset, skip_blocks,
dev_sz));
- start_offset += verify_block_size;
+ start_offset += verify_block_size_;
num_threads -= 1;
if (start_offset >= dev_sz) {
break;
@@ -218,9 +275,9 @@
if (ret) {
succeeded = true;
UpdatePartitionVerificationState(UpdateVerifyState::VERIFY_SUCCESS);
- SNAP_LOG(INFO) << "Partition: " << partition_name << " Block-device: " << dm_block_device
- << " Size: " << dev_sz
- << " verification success. Duration : " << timer.duration().count() << " ms";
+ SNAP_LOG(INFO) << "Partition verification success: " << partition_name
+ << " Block-device: " << dm_block_device << " Size: " << dev_sz
+ << " Duration : " << timer.duration().count() << " ms";
return true;
}
diff --git a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_verify.h b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_verify.h
index b300a70..69a334b 100644
--- a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_verify.h
+++ b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_verify.h
@@ -15,6 +15,7 @@
#pragma once
+#include <liburing.h>
#include <stdint.h>
#include <sys/types.h>
@@ -22,6 +23,7 @@
#include <mutex>
#include <string>
+#include <liburing_cpp/IoUring.h>
#include <snapuserd/snapuserd_kernel.h>
#include <storage_literals/storage_literals.h>
@@ -48,27 +50,23 @@
std::mutex m_lock_;
std::condition_variable m_cv_;
+ int kMinThreadsToVerify = 1;
+ int kMaxThreadsToVerify = 3;
+
/*
- * Scanning of partitions is an expensive operation both in terms of memory
- * and CPU usage. The goal here is to scan the partitions fast enough without
- * significant increase in the boot time.
- *
- * Partitions such as system, product which may be huge and may need multiple
- * threads to speed up the verification process. Using multiple threads for
- * all partitions may increase CPU usage significantly. Hence, limit that to
- * 1 thread per partition.
+ * To optimize partition scanning speed without significantly impacting boot time,
+ * we employ O_DIRECT, bypassing the page-cache. However, O_DIRECT's memory
+ * allocation from CMA can be problematic on devices with restricted CMA space.
+ * To address this, io_uring_register_buffers() pre-registers I/O buffers,
+ * preventing CMA usage. See b/401952955 for more details.
*
* These numbers were derived by monitoring the memory and CPU pressure
* (/proc/pressure/{cpu,memory}; and monitoring the Inactive(file) and
* Active(file) pages from /proc/meminfo.
- *
- * Additionally, for low memory devices, it is advisable to use O_DIRECT
- * functionality for source block device.
*/
- int kMinThreadsToVerify = 1;
- int kMaxThreadsToVerify = 3;
- uint64_t kThresholdSize = 750_MiB;
- uint64_t kBlockSizeVerify = 2_MiB;
+ uint64_t verify_block_size_ = 1_MiB;
+ uint64_t threshold_size_ = 2_GiB;
+ int queue_depth_ = 4;
bool IsBlockAligned(uint64_t read_size) { return ((read_size & (BLOCK_SZ - 1)) == 0); }
void UpdatePartitionVerificationState(UpdateVerifyState state);