snapuserd: Async I/O for block verification
Boot time improvements:
==================================
Incremental OTA of 300M between two git_master branches on Pixel 6:
Android S (with dm-snapshot):
BootComplete : 39.379 seconds
Android T (with io_uring):
BootComplete : 32.001 seconds
Time taken for each partition:
snapuserd: ReadBlockAsync complete: 2055 ms Block-device: /dev/block/dm-21 Partition-name: system_ext Size: 399302656
snapuserd: ReadBlockAsync complete: 2956 ms Block-device: /dev/block/dm-23 Partition-name: vendor Size: 650084352
snapuserd: ReadBlockAsync complete: 3534 ms Block-device: /dev/block/dm-20 Partition-name: system Size: 859746304
snapuserd: ReadBlockAsync complete: 7808 ms Block-device: /dev/block/dm-22 Partition-name: product Size: 3030687744
====================================
Bug: 202784286
Test: Full/Incremental OTA
Signed-off-by: Akilesh Kailash <akailash@google.com>
Change-Id: I615f9f4fde4e565aa1d611a2d6bbf6a6f62fa3f1
diff --git a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_core.cpp b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_core.cpp
index 2c84ff9..e48a1be 100644
--- a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_core.cpp
+++ b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_core.cpp
@@ -19,6 +19,7 @@
#include <sys/utsname.h>
#include <android-base/properties.h>
+#include <android-base/scopeguard.h>
#include <android-base/strings.h>
namespace android {
@@ -291,6 +292,136 @@
return ReadMetadata();
}
+void SnapshotHandler::FinalizeIouring() {
+ io_uring_queue_exit(ring_.get());
+}
+
+bool SnapshotHandler::InitializeIouring(int io_depth) {
+ ring_ = std::make_unique<struct io_uring>();
+
+ int ret = io_uring_queue_init(io_depth, ring_.get(), 0);
+ if (ret) {
+ LOG(ERROR) << "io_uring_queue_init failed with ret: " << ret;
+ return false;
+ }
+
+ LOG(INFO) << "io_uring_queue_init success with io_depth: " << io_depth;
+ return true;
+}
+
+bool SnapshotHandler::ReadBlocksAsync(const std::string& dm_block_device,
+ const std::string& partition_name, size_t size) {
+ // 64k block size with io_depth of 64 is optimal
+ // for a single thread. We just need a single thread
+ // to read all the blocks from all dynamic partitions.
+ size_t io_depth = 64;
+ size_t bs = (64 * 1024);
+
+ if (!InitializeIouring(io_depth)) {
+ return false;
+ }
+
+ LOG(INFO) << "ReadBlockAsync start "
+ << " Block-device: " << dm_block_device << " Partition-name: " << partition_name
+ << " Size: " << size;
+
+ auto scope_guard = android::base::make_scope_guard([this]() -> void { FinalizeIouring(); });
+
+ std::vector<std::unique_ptr<struct iovec>> vecs;
+ using AlignedBuf = std::unique_ptr<void, decltype(free)*>;
+ std::vector<AlignedBuf> alignedBufVector;
+
+ /*
+ * TODO: We need aligned memory for DIRECT-IO. However, if we do
+ * a DIRECT-IO and verify the blocks then we need to inform
+ * update-verifier that block verification has been done and
+ * there is no need to repeat the same. We are not there yet
+ * as we need to see if there are any boot time improvements doing
+ * a DIRECT-IO.
+ *
+ * Also, we could you the same function post merge for block verification;
+ * again, we can do a DIRECT-IO instead of thrashing page-cache and
+ * hurting other applications.
+ *
+ * For now, we will just create aligned buffers but rely on buffered
+ * I/O until we have perf numbers to justify DIRECT-IO.
+ */
+ for (int i = 0; i < io_depth; i++) {
+ auto iovec = std::make_unique<struct iovec>();
+ vecs.push_back(std::move(iovec));
+
+ struct iovec* iovec_ptr = vecs[i].get();
+
+ if (posix_memalign(&iovec_ptr->iov_base, BLOCK_SZ, bs)) {
+ LOG(ERROR) << "posix_memalign failed";
+ return false;
+ }
+
+ iovec_ptr->iov_len = bs;
+ alignedBufVector.push_back(
+ std::unique_ptr<void, decltype(free)*>(iovec_ptr->iov_base, free));
+ }
+
+ android::base::unique_fd fd(TEMP_FAILURE_RETRY(open(dm_block_device.c_str(), O_RDONLY)));
+ if (fd.get() == -1) {
+ SNAP_PLOG(ERROR) << "File open failed - block-device " << dm_block_device
+ << " partition-name: " << partition_name;
+ return false;
+ }
+
+ loff_t offset = 0;
+ size_t remain = size;
+ size_t read_sz = io_depth * bs;
+
+ while (remain > 0) {
+ size_t to_read = std::min(remain, read_sz);
+ size_t queue_size = to_read / bs;
+
+ for (int i = 0; i < queue_size; i++) {
+ struct io_uring_sqe* sqe = io_uring_get_sqe(ring_.get());
+ if (!sqe) {
+ SNAP_LOG(ERROR) << "io_uring_get_sqe() failed";
+ return false;
+ }
+
+ struct iovec* iovec_ptr = vecs[i].get();
+
+ io_uring_prep_read(sqe, fd.get(), iovec_ptr->iov_base, iovec_ptr->iov_len, offset);
+ sqe->flags |= IOSQE_ASYNC;
+ offset += bs;
+ }
+
+ int ret = io_uring_submit(ring_.get());
+ if (ret != queue_size) {
+ SNAP_LOG(ERROR) << "submit got: " << ret << " wanted: " << queue_size;
+ return false;
+ }
+
+ for (int i = 0; i < queue_size; i++) {
+ struct io_uring_cqe* cqe;
+
+ int ret = io_uring_wait_cqe(ring_.get(), &cqe);
+ if (ret) {
+ SNAP_PLOG(ERROR) << "wait_cqe failed" << ret;
+ return false;
+ }
+
+ if (cqe->res < 0) {
+ SNAP_LOG(ERROR) << "io failed with res: " << cqe->res;
+ return false;
+ }
+ io_uring_cqe_seen(ring_.get(), cqe);
+ }
+
+ remain -= to_read;
+ }
+
+ LOG(INFO) << "ReadBlockAsync complete: "
+ << " Block-device: " << dm_block_device << " Partition-name: " << partition_name
+ << " Size: " << size;
+ return true;
+}
+
void SnapshotHandler::ReadBlocksToCache(const std::string& dm_block_device,
const std::string& partition_name, off_t offset,
size_t size) {
@@ -347,17 +478,22 @@
return;
}
- int num_threads = 2;
- size_t num_blocks = dev_sz >> BLOCK_SHIFT;
- size_t num_blocks_per_thread = num_blocks / num_threads;
- size_t read_sz_per_thread = num_blocks_per_thread << BLOCK_SHIFT;
- off_t offset = 0;
+ if (IsIouringSupported()) {
+ std::async(std::launch::async, &SnapshotHandler::ReadBlocksAsync, this, dm_block_device,
+ partition_name, dev_sz);
+ } else {
+ int num_threads = 2;
+ size_t num_blocks = dev_sz >> BLOCK_SHIFT;
+ size_t num_blocks_per_thread = num_blocks / num_threads;
+ size_t read_sz_per_thread = num_blocks_per_thread << BLOCK_SHIFT;
+ off_t offset = 0;
- for (int i = 0; i < num_threads; i++) {
- std::async(std::launch::async, &SnapshotHandler::ReadBlocksToCache, this, dm_block_device,
- partition_name, offset, read_sz_per_thread);
+ for (int i = 0; i < num_threads; i++) {
+ std::async(std::launch::async, &SnapshotHandler::ReadBlocksToCache, this,
+ dm_block_device, partition_name, offset, read_sz_per_thread);
- offset += read_sz_per_thread;
+ offset += read_sz_per_thread;
+ }
}
}
diff --git a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_core.h b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_core.h
index f36866a..b0f2d65 100644
--- a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_core.h
+++ b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_core.h
@@ -344,6 +344,11 @@
void ReadBlocksToCache(const std::string& dm_block_device, const std::string& partition_name,
off_t offset, size_t size);
+ bool InitializeIouring(int io_depth);
+ void FinalizeIouring();
+ bool ReadBlocksAsync(const std::string& dm_block_device, const std::string& partition_name,
+ size_t size);
+
// COW device
std::string cow_device_;
// Source device
@@ -392,6 +397,8 @@
bool attached_ = false;
bool is_socket_present_;
bool scratch_space_ = false;
+
+ std::unique_ptr<struct io_uring> ring_;
};
} // namespace snapshot