libsnapshot:snapuserd: Multithreading support
Add worker threads per partition to serve the IO request.
Remove memset of buffer in IO path which was impacting
4k IO performance.
update_verifier performance:
1: ~10-12 seconds with this change (both on full OTA and incremental
OTA); ~70 seconds observed without this changeset
2: ~8 seconds without the daemon once merge is completed
and snapshot devices are removed.
Bug: 181293939
Test: update_verifier, full OTA, incremental OTA
Signed-off-by: Akilesh Kailash <akailash@google.com>
Change-Id: Id90887f3f4a664ee5d39433715d1c166acbd6c60
diff --git a/fs_mgr/libsnapshot/Android.bp b/fs_mgr/libsnapshot/Android.bp
index b808609..ea92d25 100644
--- a/fs_mgr/libsnapshot/Android.bp
+++ b/fs_mgr/libsnapshot/Android.bp
@@ -416,6 +416,7 @@
"snapuserd_server.cpp",
"snapuserd.cpp",
"snapuserd_daemon.cpp",
+ "snapuserd_worker.cpp",
],
cflags: [
@@ -554,6 +555,7 @@
srcs: [
"cow_snapuserd_test.cpp",
"snapuserd.cpp",
+ "snapuserd_worker.cpp",
],
cflags: [
"-Wall",
diff --git a/fs_mgr/libsnapshot/cow_reader.cpp b/fs_mgr/libsnapshot/cow_reader.cpp
index cf9f6ea..163e457 100644
--- a/fs_mgr/libsnapshot/cow_reader.cpp
+++ b/fs_mgr/libsnapshot/cow_reader.cpp
@@ -42,6 +42,29 @@
#endif
}
+bool CowReader::InitForMerge(android::base::unique_fd&& fd) {
+ owned_fd_ = std::move(fd);
+ fd_ = owned_fd_.get();
+
+ auto pos = lseek(fd_.get(), 0, SEEK_END);
+ if (pos < 0) {
+ PLOG(ERROR) << "lseek end failed";
+ return false;
+ }
+ fd_size_ = pos;
+
+ if (lseek(fd_.get(), 0, SEEK_SET) < 0) {
+ PLOG(ERROR) << "lseek header failed";
+ return false;
+ }
+ if (!android::base::ReadFully(fd_, &header_, sizeof(header_))) {
+ PLOG(ERROR) << "read header failed";
+ return false;
+ }
+
+ return true;
+}
+
bool CowReader::Parse(android::base::unique_fd&& fd, std::optional<uint64_t> label) {
owned_fd_ = std::move(fd);
return Parse(android::base::borrowed_fd{owned_fd_}, label);
diff --git a/fs_mgr/libsnapshot/include/libsnapshot/cow_reader.h b/fs_mgr/libsnapshot/include/libsnapshot/cow_reader.h
index 1de7473..552fd96 100644
--- a/fs_mgr/libsnapshot/include/libsnapshot/cow_reader.h
+++ b/fs_mgr/libsnapshot/include/libsnapshot/cow_reader.h
@@ -116,12 +116,15 @@
class CowReader : public ICowReader {
public:
CowReader();
+ ~CowReader() { owned_fd_ = {}; }
// Parse the COW, optionally, up to the given label. If no label is
// specified, the COW must have an intact footer.
bool Parse(android::base::unique_fd&& fd, std::optional<uint64_t> label = {});
bool Parse(android::base::borrowed_fd fd, std::optional<uint64_t> label = {});
+ bool InitForMerge(android::base::unique_fd&& fd);
+
bool GetHeader(CowHeader* header) override;
bool GetFooter(CowFooter* footer) override;
@@ -146,6 +149,8 @@
uint64_t total_data_ops() { return total_data_ops_; }
+ void CloseCowFd() { owned_fd_ = {}; }
+
private:
bool ParseOps(std::optional<uint64_t> label);
diff --git a/fs_mgr/libsnapshot/snapshot.cpp b/fs_mgr/libsnapshot/snapshot.cpp
index ca4c265..a8a1446 100644
--- a/fs_mgr/libsnapshot/snapshot.cpp
+++ b/fs_mgr/libsnapshot/snapshot.cpp
@@ -1262,7 +1262,7 @@
LOG(ERROR) << "DeleteDevice timeout: " << name;
return false;
}
- std::this_thread::sleep_for(250ms);
+ std::this_thread::sleep_for(400ms);
}
return true;
diff --git a/fs_mgr/libsnapshot/snapuserd.cpp b/fs_mgr/libsnapshot/snapuserd.cpp
index 4c4a342..f1fcb70 100644
--- a/fs_mgr/libsnapshot/snapuserd.cpp
+++ b/fs_mgr/libsnapshot/snapuserd.cpp
@@ -32,41 +32,6 @@
#define SNAP_LOG(level) LOG(level) << misc_name_ << ": "
#define SNAP_PLOG(level) PLOG(level) << misc_name_ << ": "
-static constexpr size_t PAYLOAD_SIZE = (1UL << 20);
-
-static_assert(PAYLOAD_SIZE >= BLOCK_SZ);
-
-void BufferSink::Initialize(size_t size) {
- buffer_size_ = size;
- buffer_offset_ = 0;
- buffer_ = std::make_unique<uint8_t[]>(size);
-}
-
-void* BufferSink::GetPayloadBuffer(size_t size) {
- if ((buffer_size_ - buffer_offset_) < size) return nullptr;
-
- char* buffer = reinterpret_cast<char*>(GetBufPtr());
- struct dm_user_message* msg = (struct dm_user_message*)(&(buffer[0]));
- return (char*)msg->payload.buf + buffer_offset_;
-}
-
-void* BufferSink::GetBuffer(size_t requested, size_t* actual) {
- void* buf = GetPayloadBuffer(requested);
- if (!buf) {
- *actual = 0;
- return nullptr;
- }
- *actual = requested;
- return buf;
-}
-
-struct dm_user_header* BufferSink::GetHeaderPtr() {
- CHECK(sizeof(struct dm_user_header) <= buffer_size_);
- char* buf = reinterpret_cast<char*>(GetBufPtr());
- struct dm_user_header* header = (struct dm_user_header*)(&(buf[0]));
- return header;
-}
-
Snapuserd::Snapuserd(const std::string& misc_name, const std::string& cow_device,
const std::string& backing_device) {
misc_name_ = misc_name;
@@ -75,356 +40,32 @@
control_device_ = "/dev/dm-user/" + misc_name;
}
-// Construct kernel COW header in memory
-// This header will be in sector 0. The IO
-// request will always be 4k. After constructing
-// the header, zero out the remaining block.
-void Snapuserd::ConstructKernelCowHeader() {
- void* buffer = bufsink_.GetPayloadBuffer(BLOCK_SZ);
- CHECK(buffer != nullptr);
+bool Snapuserd::InitializeWorkers() {
+ for (int i = 0; i < NUM_THREADS_PER_PARTITION; i++) {
+ std::unique_ptr<WorkerThread> wt = std::make_unique<WorkerThread>(
+ cow_device_, backing_store_device_, control_device_, misc_name_, GetSharedPtr());
- memset(buffer, 0, BLOCK_SZ);
-
- struct disk_header* dh = reinterpret_cast<struct disk_header*>(buffer);
-
- dh->magic = SNAP_MAGIC;
- dh->valid = SNAPSHOT_VALID;
- dh->version = SNAPSHOT_DISK_VERSION;
- dh->chunk_size = CHUNK_SIZE;
-}
-
-// Start the replace operation. This will read the
-// internal COW format and if the block is compressed,
-// it will be de-compressed.
-bool Snapuserd::ProcessReplaceOp(const CowOperation* cow_op) {
- if (!reader_->ReadData(*cow_op, &bufsink_)) {
- SNAP_LOG(ERROR) << "ProcessReplaceOp failed for block " << cow_op->new_block;
- return false;
+ worker_threads_.push_back(std::move(wt));
}
-
return true;
}
-// Start the copy operation. This will read the backing
-// block device which is represented by cow_op->source.
-bool Snapuserd::ProcessCopyOp(const CowOperation* cow_op) {
- void* buffer = bufsink_.GetPayloadBuffer(BLOCK_SZ);
- CHECK(buffer != nullptr);
+bool Snapuserd::CommitMerge(int num_merge_ops) {
+ {
+ std::lock_guard<std::mutex> lock(lock_);
+ CowHeader header;
- // Issue a single 4K IO. However, this can be optimized
- // if the successive blocks are contiguous.
- if (!android::base::ReadFullyAtOffset(backing_store_fd_, buffer, BLOCK_SZ,
- cow_op->source * BLOCK_SZ)) {
- SNAP_PLOG(ERROR) << "Copy-op failed. Read from backing store: " << backing_store_device_
- << "at block :" << cow_op->source;
- return false;
- }
-
- return true;
-}
-
-bool Snapuserd::ProcessZeroOp() {
- // Zero out the entire block
- void* buffer = bufsink_.GetPayloadBuffer(BLOCK_SZ);
- CHECK(buffer != nullptr);
-
- memset(buffer, 0, BLOCK_SZ);
- return true;
-}
-
-bool Snapuserd::ProcessCowOp(const CowOperation* cow_op) {
- CHECK(cow_op != nullptr);
-
- switch (cow_op->type) {
- case kCowReplaceOp: {
- return ProcessReplaceOp(cow_op);
+ reader_->GetHeader(&header);
+ header.num_merge_ops += num_merge_ops;
+ reader_->UpdateMergeProgress(num_merge_ops);
+ if (!writer_->CommitMerge(num_merge_ops)) {
+ SNAP_LOG(ERROR) << "CommitMerge failed... merged_ops_cur_iter: " << num_merge_ops
+ << " Total-merged-ops: " << header.num_merge_ops;
+ return false;
}
-
- case kCowZeroOp: {
- return ProcessZeroOp();
- }
-
- case kCowCopyOp: {
- return ProcessCopyOp(cow_op);
- }
-
- default: {
- SNAP_LOG(ERROR) << "Unknown operation-type found: " << cow_op->type;
- }
- }
- return false;
-}
-
-int Snapuserd::ReadUnalignedSector(sector_t sector, size_t size,
- std::map<sector_t, const CowOperation*>::iterator& it) {
- size_t skip_sector_size = 0;
-
- SNAP_LOG(DEBUG) << "ReadUnalignedSector: sector " << sector << " size: " << size
- << " Aligned sector: " << it->second;
-
- if (!ProcessCowOp(it->second)) {
- SNAP_LOG(ERROR) << "ReadUnalignedSector: " << sector << " failed of size: " << size;
- return -1;
+ merge_initiated_ = true;
}
- int num_sectors_skip = sector - it->first;
-
- if (num_sectors_skip > 0) {
- skip_sector_size = num_sectors_skip << SECTOR_SHIFT;
- char* buffer = reinterpret_cast<char*>(bufsink_.GetBufPtr());
- struct dm_user_message* msg = (struct dm_user_message*)(&(buffer[0]));
-
- memmove(msg->payload.buf, (char*)msg->payload.buf + skip_sector_size,
- (BLOCK_SZ - skip_sector_size));
- }
-
- bufsink_.ResetBufferOffset();
- return std::min(size, (BLOCK_SZ - skip_sector_size));
-}
-
-/*
- * Read the data for a given COW Operation.
- *
- * Kernel can issue IO at a sector granularity.
- * Hence, an IO may end up with reading partial
- * data from a COW operation or we may also
- * end up with interspersed request between
- * two COW operations.
- *
- */
-int Snapuserd::ReadData(sector_t sector, size_t size) {
- /*
- * chunk_map stores COW operation at 4k granularity.
- * If the requested IO with the sector falls on the 4k
- * boundary, then we can read the COW op directly without
- * any issue.
- *
- * However, if the requested sector is not 4K aligned,
- * then we will have the find the nearest COW operation
- * and chop the 4K block to fetch the requested sector.
- */
- std::map<sector_t, const CowOperation*>::iterator it = chunk_map_.find(sector);
- if (it == chunk_map_.end()) {
- it = chunk_map_.lower_bound(sector);
- if (it != chunk_map_.begin()) {
- --it;
- }
-
- /*
- * If the IO is spanned between two COW operations,
- * split the IO into two parts:
- *
- * 1: Read the first part from the single COW op
- * 2: Read the second part from the next COW op.
- *
- * Ex: Let's say we have a 1024 Bytes IO request.
- *
- * 0 COW OP-1 4096 COW OP-2 8192
- * |******************|*******************|
- * |*****|*****|
- * 3584 4608
- * <- 1024B - >
- *
- * We have two COW operations which are 4k blocks.
- * The IO is requested for 1024 Bytes which are spanned
- * between two COW operations. We will split this IO
- * into two parts:
- *
- * 1: IO of size 512B from offset 3584 bytes (COW OP-1)
- * 2: IO of size 512B from offset 4096 bytes (COW OP-2)
- */
- return ReadUnalignedSector(sector, size, it);
- }
-
- int num_ops = DIV_ROUND_UP(size, BLOCK_SZ);
- while (num_ops) {
- if (!ProcessCowOp(it->second)) {
- return -1;
- }
- num_ops -= 1;
- it++;
- // Update the buffer offset
- bufsink_.UpdateBufferOffset(BLOCK_SZ);
-
- SNAP_LOG(DEBUG) << "ReadData at sector: " << sector << " size: " << size;
- }
-
- // Reset the buffer offset
- bufsink_.ResetBufferOffset();
- return size;
-}
-
-/*
- * dm-snap does prefetch reads while reading disk-exceptions.
- * By default, prefetch value is set to 12; this means that
- * dm-snap will issue 12 areas wherein each area is a 4k page
- * of disk-exceptions.
- *
- * If during prefetch, if the chunk-id seen is beyond the
- * actual number of metadata page, fill the buffer with zero.
- * When dm-snap starts parsing the buffer, it will stop
- * reading metadata page once the buffer content is zero.
- */
-bool Snapuserd::ZerofillDiskExceptions(size_t read_size) {
- size_t size = exceptions_per_area_ * sizeof(struct disk_exception);
-
- if (read_size > size) {
- return false;
- }
-
- void* buffer = bufsink_.GetPayloadBuffer(size);
- CHECK(buffer != nullptr);
-
- memset(buffer, 0, size);
- return true;
-}
-
-/*
- * A disk exception is a simple mapping of old_chunk to new_chunk.
- * When dm-snapshot device is created, kernel requests these mapping.
- *
- * Each disk exception is of size 16 bytes. Thus a single 4k page can
- * have:
- *
- * exceptions_per_area_ = 4096/16 = 256. This entire 4k page
- * is considered a metadata page and it is represented by chunk ID.
- *
- * Convert the chunk ID to index into the vector which gives us
- * the metadata page.
- */
-bool Snapuserd::ReadDiskExceptions(chunk_t chunk, size_t read_size) {
- uint32_t stride = exceptions_per_area_ + 1;
- size_t size;
-
- // ChunkID to vector index
- lldiv_t divresult = lldiv(chunk, stride);
-
- if (divresult.quot < vec_.size()) {
- size = exceptions_per_area_ * sizeof(struct disk_exception);
-
- CHECK(read_size == size);
-
- void* buffer = bufsink_.GetPayloadBuffer(size);
- CHECK(buffer != nullptr);
-
- memcpy(buffer, vec_[divresult.quot].get(), size);
- } else {
- return ZerofillDiskExceptions(read_size);
- }
-
- return true;
-}
-
-loff_t Snapuserd::GetMergeStartOffset(void* merged_buffer, void* unmerged_buffer,
- int* unmerged_exceptions) {
- loff_t offset = 0;
- *unmerged_exceptions = 0;
-
- while (*unmerged_exceptions <= exceptions_per_area_) {
- struct disk_exception* merged_de =
- reinterpret_cast<struct disk_exception*>((char*)merged_buffer + offset);
- struct disk_exception* cow_de =
- reinterpret_cast<struct disk_exception*>((char*)unmerged_buffer + offset);
-
- // Unmerged op by the kernel
- if (merged_de->old_chunk != 0 || merged_de->new_chunk != 0) {
- CHECK(merged_de->old_chunk == cow_de->old_chunk);
- CHECK(merged_de->new_chunk == cow_de->new_chunk);
-
- offset += sizeof(struct disk_exception);
- *unmerged_exceptions += 1;
- continue;
- }
-
- break;
- }
-
- CHECK(!(*unmerged_exceptions == exceptions_per_area_));
-
- SNAP_LOG(DEBUG) << "Unmerged_Exceptions: " << *unmerged_exceptions << " Offset: " << offset;
- return offset;
-}
-
-int Snapuserd::GetNumberOfMergedOps(void* merged_buffer, void* unmerged_buffer, loff_t offset,
- int unmerged_exceptions) {
- int merged_ops_cur_iter = 0;
-
- // Find the operations which are merged in this cycle.
- while ((unmerged_exceptions + merged_ops_cur_iter) < exceptions_per_area_) {
- struct disk_exception* merged_de =
- reinterpret_cast<struct disk_exception*>((char*)merged_buffer + offset);
- struct disk_exception* cow_de =
- reinterpret_cast<struct disk_exception*>((char*)unmerged_buffer + offset);
-
- CHECK(merged_de->new_chunk == 0);
- CHECK(merged_de->old_chunk == 0);
-
- if (cow_de->new_chunk != 0) {
- merged_ops_cur_iter += 1;
- offset += sizeof(struct disk_exception);
- const CowOperation* cow_op = chunk_map_[ChunkToSector(cow_de->new_chunk)];
- CHECK(cow_op != nullptr);
-
- CHECK(cow_op->new_block == cow_de->old_chunk);
- // zero out to indicate that operation is merged.
- cow_de->old_chunk = 0;
- cow_de->new_chunk = 0;
- } else if (cow_de->old_chunk == 0) {
- // Already merged op in previous iteration or
- // This could also represent a partially filled area.
- //
- // If the op was merged in previous cycle, we don't have
- // to count them.
- CHECK(cow_de->new_chunk == 0);
- break;
- } else {
- SNAP_LOG(ERROR) << "Error in merge operation. Found invalid metadata: "
- << " merged_de-old-chunk: " << merged_de->old_chunk
- << " merged_de-new-chunk: " << merged_de->new_chunk
- << " cow_de-old-chunk: " << cow_de->old_chunk
- << " cow_de-new-chunk: " << cow_de->new_chunk
- << " unmerged_exceptions: " << unmerged_exceptions
- << " merged_ops_cur_iter: " << merged_ops_cur_iter
- << " offset: " << offset;
- return -1;
- }
- }
- return merged_ops_cur_iter;
-}
-
-bool Snapuserd::ProcessMergeComplete(chunk_t chunk, void* buffer) {
- uint32_t stride = exceptions_per_area_ + 1;
- CowHeader header;
-
- if (!reader_->GetHeader(&header)) {
- SNAP_LOG(ERROR) << "Failed to get header";
- return false;
- }
-
- // ChunkID to vector index
- lldiv_t divresult = lldiv(chunk, stride);
- CHECK(divresult.quot < vec_.size());
- SNAP_LOG(DEBUG) << "ProcessMergeComplete: chunk: " << chunk
- << " Metadata-Index: " << divresult.quot;
-
- int unmerged_exceptions = 0;
- loff_t offset = GetMergeStartOffset(buffer, vec_[divresult.quot].get(), &unmerged_exceptions);
-
- int merged_ops_cur_iter =
- GetNumberOfMergedOps(buffer, vec_[divresult.quot].get(), offset, unmerged_exceptions);
-
- // There should be at least one operation merged in this cycle
- CHECK(merged_ops_cur_iter > 0);
-
- header.num_merge_ops += merged_ops_cur_iter;
- reader_->UpdateMergeProgress(merged_ops_cur_iter);
- if (!writer_->CommitMerge(merged_ops_cur_iter)) {
- SNAP_LOG(ERROR) << "CommitMerge failed... merged_ops_cur_iter: " << merged_ops_cur_iter;
- return false;
- }
-
- SNAP_LOG(DEBUG) << "Merge success: " << merged_ops_cur_iter << "chunk: " << chunk;
- merge_initiated_ = true;
return true;
}
@@ -836,7 +477,6 @@
// Total number of sectors required for creating dm-user device
num_sectors_ = ChunkToSector(data_chunk_id);
- metadata_read_done_ = true;
merge_initiated_ = false;
return true;
}
@@ -850,37 +490,6 @@
}
}
-// Read Header from dm-user misc device. This gives
-// us the sector number for which IO is issued by dm-snapshot device
-bool Snapuserd::ReadDmUserHeader() {
- if (!android::base::ReadFully(ctrl_fd_, bufsink_.GetBufPtr(), sizeof(struct dm_user_header))) {
- SNAP_PLOG(ERROR) << "Control-read failed";
- return false;
- }
-
- return true;
-}
-
-// Send the payload/data back to dm-user misc device.
-bool Snapuserd::WriteDmUserPayload(size_t size) {
- if (!android::base::WriteFully(ctrl_fd_, bufsink_.GetBufPtr(),
- sizeof(struct dm_user_header) + size)) {
- SNAP_PLOG(ERROR) << "Write to dm-user failed size: " << size;
- return false;
- }
-
- return true;
-}
-
-bool Snapuserd::ReadDmUserPayload(void* buffer, size_t size) {
- if (!android::base::ReadFully(ctrl_fd_, buffer, size)) {
- SNAP_PLOG(ERROR) << "ReadDmUserPayload failed size: " << size;
- return false;
- }
-
- return true;
-}
-
bool Snapuserd::InitCowDevice() {
cow_fd_.reset(open(cow_device_.c_str(), O_RDWR));
if (cow_fd_ < 0) {
@@ -888,186 +497,26 @@
return false;
}
- // Allocate the buffer which is used to communicate between
- // daemon and dm-user. The buffer comprises of header and a fixed payload.
- // If the dm-user requests a big IO, the IO will be broken into chunks
- // of PAYLOAD_SIZE.
- size_t buf_size = sizeof(struct dm_user_header) + PAYLOAD_SIZE;
- bufsink_.Initialize(buf_size);
-
return ReadMetadata();
}
-bool Snapuserd::InitBackingAndControlDevice() {
- backing_store_fd_.reset(open(backing_store_device_.c_str(), O_RDONLY));
- if (backing_store_fd_ < 0) {
- SNAP_PLOG(ERROR) << "Open Failed: " << backing_store_device_;
- return false;
+/*
+ * Entry point to launch worker threads
+ */
+bool Snapuserd::Start() {
+ std::vector<std::future<bool>> threads;
+
+ for (int i = 0; i < worker_threads_.size(); i++) {
+ threads.emplace_back(
+ std::async(std::launch::async, &WorkerThread::RunThread, worker_threads_[i].get()));
}
- ctrl_fd_.reset(open(control_device_.c_str(), O_RDWR));
- if (ctrl_fd_ < 0) {
- SNAP_PLOG(ERROR) << "Unable to open " << control_device_;
- return false;
+ bool ret = true;
+ for (auto& t : threads) {
+ ret = t.get() && ret;
}
- return true;
-}
-
-bool Snapuserd::DmuserWriteRequest() {
- struct dm_user_header* header = bufsink_.GetHeaderPtr();
-
- // device mapper has the capability to allow
- // targets to flush the cache when writes are completed. This
- // is controlled by each target by a flag "flush_supported".
- // This flag is set by dm-user. When flush is supported,
- // a number of zero-length bio's will be submitted to
- // the target for the purpose of flushing cache. It is the
- // responsibility of the target driver - which is dm-user in this
- // case, to remap these bio's to the underlying device. Since,
- // there is no underlying device for dm-user, this zero length
- // bio's gets routed to daemon.
- //
- // Flush operations are generated post merge by dm-snap by having
- // REQ_PREFLUSH flag set. Snapuser daemon doesn't have anything
- // to flush per se; hence, just respond back with a success message.
- if (header->sector == 0) {
- CHECK(header->len == 0);
- header->type = DM_USER_RESP_SUCCESS;
- if (!WriteDmUserPayload(0)) {
- return false;
- }
- return true;
- }
-
- size_t remaining_size = header->len;
- size_t read_size = std::min(PAYLOAD_SIZE, remaining_size);
- CHECK(read_size == BLOCK_SZ);
-
- CHECK(header->sector > 0);
- chunk_t chunk = SectorToChunk(header->sector);
- CHECK(chunk_map_.find(header->sector) == chunk_map_.end());
-
- void* buffer = bufsink_.GetPayloadBuffer(read_size);
- CHECK(buffer != nullptr);
- header->type = DM_USER_RESP_SUCCESS;
-
- if (!ReadDmUserPayload(buffer, read_size)) {
- SNAP_LOG(ERROR) << "ReadDmUserPayload failed for chunk id: " << chunk
- << "Sector: " << header->sector;
- header->type = DM_USER_RESP_ERROR;
- }
-
- if (header->type == DM_USER_RESP_SUCCESS && !ProcessMergeComplete(chunk, buffer)) {
- SNAP_LOG(ERROR) << "ProcessMergeComplete failed for chunk id: " << chunk
- << "Sector: " << header->sector;
- header->type = DM_USER_RESP_ERROR;
- } else {
- SNAP_LOG(DEBUG) << "ProcessMergeComplete success for chunk id: " << chunk
- << "Sector: " << header->sector;
- }
-
- if (!WriteDmUserPayload(0)) {
- return false;
- }
-
- return true;
-}
-
-bool Snapuserd::DmuserReadRequest() {
- struct dm_user_header* header = bufsink_.GetHeaderPtr();
- size_t remaining_size = header->len;
- loff_t offset = 0;
- sector_t sector = header->sector;
- do {
- size_t read_size = std::min(PAYLOAD_SIZE, remaining_size);
-
- int ret = read_size;
- header->type = DM_USER_RESP_SUCCESS;
- chunk_t chunk = SectorToChunk(header->sector);
-
- // Request to sector 0 is always for kernel
- // representation of COW header. This IO should be only
- // once during dm-snapshot device creation. We should
- // never see multiple IO requests. Additionally this IO
- // will always be a single 4k.
- if (header->sector == 0) {
- CHECK(metadata_read_done_ == true);
- CHECK(read_size == BLOCK_SZ);
- ConstructKernelCowHeader();
- SNAP_LOG(DEBUG) << "Kernel header constructed";
- } else {
- if (!offset && (read_size == BLOCK_SZ) &&
- chunk_map_.find(header->sector) == chunk_map_.end()) {
- if (!ReadDiskExceptions(chunk, read_size)) {
- SNAP_LOG(ERROR) << "ReadDiskExceptions failed for chunk id: " << chunk
- << "Sector: " << header->sector;
- header->type = DM_USER_RESP_ERROR;
- } else {
- SNAP_LOG(DEBUG) << "ReadDiskExceptions success for chunk id: " << chunk
- << "Sector: " << header->sector;
- }
- } else {
- chunk_t num_sectors_read = (offset >> SECTOR_SHIFT);
- ret = ReadData(sector + num_sectors_read, read_size);
- if (ret < 0) {
- SNAP_LOG(ERROR) << "ReadData failed for chunk id: " << chunk
- << " Sector: " << (sector + num_sectors_read)
- << " size: " << read_size << " header-len: " << header->len;
- header->type = DM_USER_RESP_ERROR;
- } else {
- SNAP_LOG(DEBUG) << "ReadData success for chunk id: " << chunk
- << "Sector: " << header->sector;
- }
- }
- }
-
- // Daemon will not be terminated if there is any error. We will
- // just send the error back to dm-user.
- if (!WriteDmUserPayload(ret)) {
- return false;
- }
-
- remaining_size -= ret;
- offset += ret;
- } while (remaining_size > 0);
-
- return true;
-}
-
-bool Snapuserd::Run() {
- struct dm_user_header* header = bufsink_.GetHeaderPtr();
-
- bufsink_.Clear();
-
- if (!ReadDmUserHeader()) {
- SNAP_LOG(ERROR) << "ReadDmUserHeader failed";
- return false;
- }
-
- SNAP_LOG(DEBUG) << "msg->seq: " << std::hex << header->seq;
- SNAP_LOG(DEBUG) << "msg->type: " << std::hex << header->type;
- SNAP_LOG(DEBUG) << "msg->flags: " << std::hex << header->flags;
- SNAP_LOG(DEBUG) << "msg->sector: " << std::hex << header->sector;
- SNAP_LOG(DEBUG) << "msg->len: " << std::hex << header->len;
-
- switch (header->type) {
- case DM_USER_REQ_MAP_READ: {
- if (!DmuserReadRequest()) {
- return false;
- }
- break;
- }
-
- case DM_USER_REQ_MAP_WRITE: {
- if (!DmuserWriteRequest()) {
- return false;
- }
- break;
- }
- }
-
- return true;
+ return ret;
}
} // namespace snapshot
diff --git a/fs_mgr/libsnapshot/snapuserd.h b/fs_mgr/libsnapshot/snapuserd.h
index 518d08b..6ce64d8 100644
--- a/fs_mgr/libsnapshot/snapuserd.h
+++ b/fs_mgr/libsnapshot/snapuserd.h
@@ -18,13 +18,17 @@
#include <stdint.h>
#include <stdlib.h>
+#include <bitset>
#include <csignal>
#include <cstring>
+#include <future>
#include <iostream>
#include <limits>
#include <map>
+#include <mutex>
#include <string>
#include <thread>
+#include <unordered_map>
#include <vector>
#include <android-base/file.h>
@@ -40,6 +44,17 @@
namespace snapshot {
using android::base::unique_fd;
+using namespace std::chrono_literals;
+
+static constexpr size_t PAYLOAD_SIZE = (1UL << 20);
+static_assert(PAYLOAD_SIZE >= BLOCK_SZ);
+
+/*
+ * With 4 threads, we get optimal performance
+ * when update_verifier reads the partition during
+ * boot.
+ */
+static constexpr int NUM_THREADS_PER_PARTITION = 4;
class BufferSink : public IByteSink {
public:
@@ -59,53 +74,106 @@
size_t buffer_size_;
};
-class Snapuserd final {
+class Snapuserd;
+
+class WorkerThread {
public:
- Snapuserd(const std::string& misc_name, const std::string& cow_device,
- const std::string& backing_device);
- bool InitBackingAndControlDevice();
- bool InitCowDevice();
- bool Run();
- const std::string& GetControlDevicePath() { return control_device_; }
- const std::string& GetMiscName() { return misc_name_; }
- uint64_t GetNumSectors() { return num_sectors_; }
- bool IsAttached() const { return ctrl_fd_ >= 0; }
- void CheckMergeCompletionStatus();
- void CloseFds() {
- ctrl_fd_ = {};
- cow_fd_ = {};
- backing_store_fd_ = {};
- }
- size_t GetMetadataAreaSize() { return vec_.size(); }
- void* GetExceptionBuffer(size_t i) { return vec_[i].get(); }
+ WorkerThread(const std::string& cow_device, const std::string& backing_device,
+ const std::string& control_device, const std::string& misc_name,
+ std::shared_ptr<Snapuserd> snapuserd);
+ bool RunThread();
private:
+ // Initialization
+ void InitializeBufsink();
+ bool InitializeFds();
+ bool InitReader();
+ void CloseFds() {
+ ctrl_fd_ = {};
+ backing_store_fd_ = {};
+ }
+
+ // Functions interacting with dm-user
+ bool ReadDmUserHeader();
bool DmuserReadRequest();
bool DmuserWriteRequest();
-
- bool ReadDmUserHeader();
bool ReadDmUserPayload(void* buffer, size_t size);
bool WriteDmUserPayload(size_t size);
- void ConstructKernelCowHeader();
- bool ReadMetadata();
- bool ZerofillDiskExceptions(size_t read_size);
+
bool ReadDiskExceptions(chunk_t chunk, size_t size);
+ bool ZerofillDiskExceptions(size_t read_size);
+ void ConstructKernelCowHeader();
+
+ // IO Path
+ bool ProcessIORequest();
+ int ReadData(sector_t sector, size_t size);
int ReadUnalignedSector(sector_t sector, size_t size,
std::map<sector_t, const CowOperation*>::iterator& it);
- int ReadData(sector_t sector, size_t size);
- bool IsChunkIdMetadata(chunk_t chunk);
- chunk_t GetNextAllocatableChunkId(chunk_t chunk_id);
+ // Processing COW operations
bool ProcessCowOp(const CowOperation* cow_op);
bool ProcessReplaceOp(const CowOperation* cow_op);
bool ProcessCopyOp(const CowOperation* cow_op);
bool ProcessZeroOp();
+ // Merge related functions
+ bool ProcessMergeComplete(chunk_t chunk, void* buffer);
loff_t GetMergeStartOffset(void* merged_buffer, void* unmerged_buffer,
int* unmerged_exceptions);
int GetNumberOfMergedOps(void* merged_buffer, void* unmerged_buffer, loff_t offset,
int unmerged_exceptions);
- bool ProcessMergeComplete(chunk_t chunk, void* buffer);
+
+ sector_t ChunkToSector(chunk_t chunk) { return chunk << CHUNK_SHIFT; }
+ chunk_t SectorToChunk(sector_t sector) { return sector >> CHUNK_SHIFT; }
+
+ std::unique_ptr<CowReader> reader_;
+ BufferSink bufsink_;
+
+ std::string cow_device_;
+ std::string backing_store_device_;
+ std::string control_device_;
+ std::string misc_name_;
+
+ unique_fd cow_fd_;
+ unique_fd backing_store_fd_;
+ unique_fd ctrl_fd_;
+
+ std::shared_ptr<Snapuserd> snapuserd_;
+ uint32_t exceptions_per_area_;
+};
+
+class Snapuserd : public std::enable_shared_from_this<Snapuserd> {
+ public:
+ Snapuserd(const std::string& misc_name, const std::string& cow_device,
+ const std::string& backing_device);
+ bool InitCowDevice();
+ bool Start();
+ const std::string& GetControlDevicePath() { return control_device_; }
+ const std::string& GetMiscName() { return misc_name_; }
+ uint64_t GetNumSectors() { return num_sectors_; }
+ bool IsAttached() const { return attached_; }
+ void AttachControlDevice() { attached_ = true; }
+
+ void CheckMergeCompletionStatus();
+ bool CommitMerge(int num_merge_ops);
+
+ void CloseFds() { cow_fd_ = {}; }
+ size_t GetMetadataAreaSize() { return vec_.size(); }
+ void* GetExceptionBuffer(size_t i) { return vec_[i].get(); }
+
+ bool InitializeWorkers();
+ std::shared_ptr<Snapuserd> GetSharedPtr() { return shared_from_this(); }
+
+ std::map<sector_t, const CowOperation*>& GetChunkMap() { return chunk_map_; }
+ const std::vector<std::unique_ptr<uint8_t[]>>& GetMetadataVec() const { return vec_; }
+
+ private:
+ std::vector<std::unique_ptr<WorkerThread>> worker_threads_;
+
+ bool ReadMetadata();
+ bool IsChunkIdMetadata(chunk_t chunk);
+ chunk_t GetNextAllocatableChunkId(chunk_t chunk_id);
+
sector_t ChunkToSector(chunk_t chunk) { return chunk << CHUNK_SHIFT; }
chunk_t SectorToChunk(sector_t sector) { return sector >> CHUNK_SHIFT; }
bool IsBlockAligned(int read_size) { return ((read_size & (BLOCK_SZ - 1)) == 0); }
@@ -116,8 +184,6 @@
std::string misc_name_;
unique_fd cow_fd_;
- unique_fd backing_store_fd_;
- unique_fd ctrl_fd_;
uint32_t exceptions_per_area_;
uint64_t num_sectors_;
@@ -141,9 +207,10 @@
// in the chunk_map to find the nearest COW op.
std::map<sector_t, const CowOperation*> chunk_map_;
- bool metadata_read_done_ = false;
+ std::mutex lock_;
+
bool merge_initiated_ = false;
- BufferSink bufsink_;
+ bool attached_ = false;
};
} // namespace snapshot
diff --git a/fs_mgr/libsnapshot/snapuserd_server.cpp b/fs_mgr/libsnapshot/snapuserd_server.cpp
index 017de3b..167895e 100644
--- a/fs_mgr/libsnapshot/snapuserd_server.cpp
+++ b/fs_mgr/libsnapshot/snapuserd_server.cpp
@@ -77,8 +77,8 @@
JoinAllThreads();
}
-DmUserHandler::DmUserHandler(std::unique_ptr<Snapuserd>&& snapuserd)
- : snapuserd_(std::move(snapuserd)), misc_name_(snapuserd_->GetMiscName()) {}
+DmUserHandler::DmUserHandler(std::shared_ptr<Snapuserd> snapuserd)
+ : snapuserd_(snapuserd), misc_name_(snapuserd_->GetMiscName()) {}
bool SnapuserdServer::Sendmsg(android::base::borrowed_fd fd, const std::string& msg) {
ssize_t ret = TEMP_FAILURE_RETRY(send(fd.get(), msg.data(), msg.size(), 0));
@@ -204,10 +204,8 @@
void SnapuserdServer::RunThread(std::shared_ptr<DmUserHandler> handler) {
LOG(INFO) << "Entering thread for handler: " << handler->misc_name();
- while (!StopRequested()) {
- if (!handler->snapuserd()->Run()) {
- break;
- }
+ if (!handler->snapuserd()->Start()) {
+ LOG(ERROR) << " Failed to launch all worker threads";
}
handler->snapuserd()->CloseFds();
@@ -349,13 +347,18 @@
std::shared_ptr<DmUserHandler> SnapuserdServer::AddHandler(const std::string& misc_name,
const std::string& cow_device_path,
const std::string& backing_device) {
- auto snapuserd = std::make_unique<Snapuserd>(misc_name, cow_device_path, backing_device);
+ auto snapuserd = std::make_shared<Snapuserd>(misc_name, cow_device_path, backing_device);
if (!snapuserd->InitCowDevice()) {
LOG(ERROR) << "Failed to initialize Snapuserd";
return nullptr;
}
- auto handler = std::make_shared<DmUserHandler>(std::move(snapuserd));
+ if (!snapuserd->InitializeWorkers()) {
+ LOG(ERROR) << "Failed to initialize workers";
+ return nullptr;
+ }
+
+ auto handler = std::make_shared<DmUserHandler>(snapuserd);
{
std::lock_guard<std::mutex> lock(lock_);
if (FindHandler(&lock, misc_name) != dm_users_.end()) {
@@ -370,10 +373,7 @@
bool SnapuserdServer::StartHandler(const std::shared_ptr<DmUserHandler>& handler) {
CHECK(!handler->snapuserd()->IsAttached());
- if (!handler->snapuserd()->InitBackingAndControlDevice()) {
- LOG(ERROR) << "Failed to initialize control device: " << handler->misc_name();
- return false;
- }
+ handler->snapuserd()->AttachControlDevice();
handler->thread() = std::thread(std::bind(&SnapuserdServer::RunThread, this, handler));
return true;
diff --git a/fs_mgr/libsnapshot/snapuserd_server.h b/fs_mgr/libsnapshot/snapuserd_server.h
index 7cbc2de..e9d575d 100644
--- a/fs_mgr/libsnapshot/snapuserd_server.h
+++ b/fs_mgr/libsnapshot/snapuserd_server.h
@@ -47,17 +47,17 @@
class DmUserHandler {
public:
- explicit DmUserHandler(std::unique_ptr<Snapuserd>&& snapuserd);
+ explicit DmUserHandler(std::shared_ptr<Snapuserd> snapuserd);
void FreeResources() { snapuserd_ = nullptr; }
- const std::unique_ptr<Snapuserd>& snapuserd() const { return snapuserd_; }
+ const std::shared_ptr<Snapuserd>& snapuserd() const { return snapuserd_; }
std::thread& thread() { return thread_; }
const std::string& misc_name() const { return misc_name_; }
private:
std::thread thread_;
- std::unique_ptr<Snapuserd> snapuserd_;
+ std::shared_ptr<Snapuserd> snapuserd_;
std::string misc_name_;
};
diff --git a/fs_mgr/libsnapshot/snapuserd_worker.cpp b/fs_mgr/libsnapshot/snapuserd_worker.cpp
new file mode 100644
index 0000000..16f47fe
--- /dev/null
+++ b/fs_mgr/libsnapshot/snapuserd_worker.cpp
@@ -0,0 +1,675 @@
+/*
+ * Copyright (C) 2020 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "snapuserd.h"
+
+#include <csignal>
+#include <optional>
+#include <set>
+
+#include <libsnapshot/snapuserd_client.h>
+
+namespace android {
+namespace snapshot {
+
+using namespace android;
+using namespace android::dm;
+using android::base::unique_fd;
+
+#define SNAP_LOG(level) LOG(level) << misc_name_ << ": "
+#define SNAP_PLOG(level) PLOG(level) << misc_name_ << ": "
+
+void BufferSink::Initialize(size_t size) {
+ buffer_size_ = size;
+ buffer_offset_ = 0;
+ buffer_ = std::make_unique<uint8_t[]>(size);
+}
+
+void* BufferSink::GetPayloadBuffer(size_t size) {
+ if ((buffer_size_ - buffer_offset_) < size) return nullptr;
+
+ char* buffer = reinterpret_cast<char*>(GetBufPtr());
+ struct dm_user_message* msg = (struct dm_user_message*)(&(buffer[0]));
+ return (char*)msg->payload.buf + buffer_offset_;
+}
+
+void* BufferSink::GetBuffer(size_t requested, size_t* actual) {
+ void* buf = GetPayloadBuffer(requested);
+ if (!buf) {
+ *actual = 0;
+ return nullptr;
+ }
+ *actual = requested;
+ return buf;
+}
+
+struct dm_user_header* BufferSink::GetHeaderPtr() {
+ CHECK(sizeof(struct dm_user_header) <= buffer_size_);
+ char* buf = reinterpret_cast<char*>(GetBufPtr());
+ struct dm_user_header* header = (struct dm_user_header*)(&(buf[0]));
+ return header;
+}
+
+WorkerThread::WorkerThread(const std::string& cow_device, const std::string& backing_device,
+ const std::string& control_device, const std::string& misc_name,
+ std::shared_ptr<Snapuserd> snapuserd) {
+ cow_device_ = cow_device;
+ backing_store_device_ = backing_device;
+ control_device_ = control_device;
+ misc_name_ = misc_name;
+ snapuserd_ = snapuserd;
+ exceptions_per_area_ = (CHUNK_SIZE << SECTOR_SHIFT) / sizeof(struct disk_exception);
+}
+
+bool WorkerThread::InitializeFds() {
+ backing_store_fd_.reset(open(backing_store_device_.c_str(), O_RDONLY));
+ if (backing_store_fd_ < 0) {
+ SNAP_PLOG(ERROR) << "Open Failed: " << backing_store_device_;
+ return false;
+ }
+
+ cow_fd_.reset(open(cow_device_.c_str(), O_RDWR));
+ if (cow_fd_ < 0) {
+ SNAP_PLOG(ERROR) << "Open Failed: " << cow_device_;
+ return false;
+ }
+
+ ctrl_fd_.reset(open(control_device_.c_str(), O_RDWR));
+ if (ctrl_fd_ < 0) {
+ SNAP_PLOG(ERROR) << "Unable to open " << control_device_;
+ return false;
+ }
+
+ return true;
+}
+
+bool WorkerThread::InitReader() {
+ reader_ = std::make_unique<CowReader>();
+ if (!reader_->InitForMerge(std::move(cow_fd_))) {
+ return false;
+ }
+
+ return true;
+}
+
+// Construct kernel COW header in memory
+// This header will be in sector 0. The IO
+// request will always be 4k. After constructing
+// the header, zero out the remaining block.
+void WorkerThread::ConstructKernelCowHeader() {
+ void* buffer = bufsink_.GetPayloadBuffer(BLOCK_SZ);
+ CHECK(buffer != nullptr);
+
+ memset(buffer, 0, BLOCK_SZ);
+
+ struct disk_header* dh = reinterpret_cast<struct disk_header*>(buffer);
+
+ dh->magic = SNAP_MAGIC;
+ dh->valid = SNAPSHOT_VALID;
+ dh->version = SNAPSHOT_DISK_VERSION;
+ dh->chunk_size = CHUNK_SIZE;
+}
+
+// Start the replace operation. This will read the
+// internal COW format and if the block is compressed,
+// it will be de-compressed.
+bool WorkerThread::ProcessReplaceOp(const CowOperation* cow_op) {
+ if (!reader_->ReadData(*cow_op, &bufsink_)) {
+ SNAP_LOG(ERROR) << "ProcessReplaceOp failed for block " << cow_op->new_block;
+ return false;
+ }
+
+ return true;
+}
+
+// Start the copy operation. This will read the backing
+// block device which is represented by cow_op->source.
+bool WorkerThread::ProcessCopyOp(const CowOperation* cow_op) {
+ void* buffer = bufsink_.GetPayloadBuffer(BLOCK_SZ);
+ CHECK(buffer != nullptr);
+
+ // Issue a single 4K IO. However, this can be optimized
+ // if the successive blocks are contiguous.
+ if (!android::base::ReadFullyAtOffset(backing_store_fd_, buffer, BLOCK_SZ,
+ cow_op->source * BLOCK_SZ)) {
+ SNAP_PLOG(ERROR) << "Copy-op failed. Read from backing store: " << backing_store_device_
+ << "at block :" << cow_op->source;
+ return false;
+ }
+
+ return true;
+}
+
+bool WorkerThread::ProcessZeroOp() {
+ // Zero out the entire block
+ void* buffer = bufsink_.GetPayloadBuffer(BLOCK_SZ);
+ CHECK(buffer != nullptr);
+
+ memset(buffer, 0, BLOCK_SZ);
+ return true;
+}
+
+bool WorkerThread::ProcessCowOp(const CowOperation* cow_op) {
+ CHECK(cow_op != nullptr);
+
+ switch (cow_op->type) {
+ case kCowReplaceOp: {
+ return ProcessReplaceOp(cow_op);
+ }
+
+ case kCowZeroOp: {
+ return ProcessZeroOp();
+ }
+
+ case kCowCopyOp: {
+ return ProcessCopyOp(cow_op);
+ }
+
+ default: {
+ SNAP_LOG(ERROR) << "Unknown operation-type found: " << cow_op->type;
+ }
+ }
+ return false;
+}
+
+int WorkerThread::ReadUnalignedSector(sector_t sector, size_t size,
+ std::map<sector_t, const CowOperation*>::iterator& it) {
+ size_t skip_sector_size = 0;
+
+ SNAP_LOG(DEBUG) << "ReadUnalignedSector: sector " << sector << " size: " << size
+ << " Aligned sector: " << it->second;
+
+ if (!ProcessCowOp(it->second)) {
+ SNAP_LOG(ERROR) << "ReadUnalignedSector: " << sector << " failed of size: " << size;
+ return -1;
+ }
+
+ int num_sectors_skip = sector - it->first;
+
+ if (num_sectors_skip > 0) {
+ skip_sector_size = num_sectors_skip << SECTOR_SHIFT;
+ char* buffer = reinterpret_cast<char*>(bufsink_.GetBufPtr());
+ struct dm_user_message* msg = (struct dm_user_message*)(&(buffer[0]));
+
+ memmove(msg->payload.buf, (char*)msg->payload.buf + skip_sector_size,
+ (BLOCK_SZ - skip_sector_size));
+ }
+
+ bufsink_.ResetBufferOffset();
+ return std::min(size, (BLOCK_SZ - skip_sector_size));
+}
+
+/*
+ * Read the data for a given COW Operation.
+ *
+ * Kernel can issue IO at a sector granularity.
+ * Hence, an IO may end up with reading partial
+ * data from a COW operation or we may also
+ * end up with interspersed request between
+ * two COW operations.
+ *
+ */
+int WorkerThread::ReadData(sector_t sector, size_t size) {
+ std::map<sector_t, const CowOperation*>& chunk_map = snapuserd_->GetChunkMap();
+ /*
+ * chunk_map stores COW operation at 4k granularity.
+ * If the requested IO with the sector falls on the 4k
+ * boundary, then we can read the COW op directly without
+ * any issue.
+ *
+ * However, if the requested sector is not 4K aligned,
+ * then we will have the find the nearest COW operation
+ * and chop the 4K block to fetch the requested sector.
+ */
+ std::map<sector_t, const CowOperation*>::iterator it = chunk_map.find(sector);
+ if (it == chunk_map.end()) {
+ it = chunk_map.lower_bound(sector);
+ if (it != chunk_map.begin()) {
+ --it;
+ }
+
+ /*
+ * If the IO is spanned between two COW operations,
+ * split the IO into two parts:
+ *
+ * 1: Read the first part from the single COW op
+ * 2: Read the second part from the next COW op.
+ *
+ * Ex: Let's say we have a 1024 Bytes IO request.
+ *
+ * 0 COW OP-1 4096 COW OP-2 8192
+ * |******************|*******************|
+ * |*****|*****|
+ * 3584 4608
+ * <- 1024B - >
+ *
+ * We have two COW operations which are 4k blocks.
+ * The IO is requested for 1024 Bytes which are spanned
+ * between two COW operations. We will split this IO
+ * into two parts:
+ *
+ * 1: IO of size 512B from offset 3584 bytes (COW OP-1)
+ * 2: IO of size 512B from offset 4096 bytes (COW OP-2)
+ */
+ return ReadUnalignedSector(sector, size, it);
+ }
+
+ int num_ops = DIV_ROUND_UP(size, BLOCK_SZ);
+ while (num_ops) {
+ if (!ProcessCowOp(it->second)) {
+ return -1;
+ }
+ num_ops -= 1;
+ it++;
+ // Update the buffer offset
+ bufsink_.UpdateBufferOffset(BLOCK_SZ);
+
+ SNAP_LOG(DEBUG) << "ReadData at sector: " << sector << " size: " << size;
+ }
+
+ // Reset the buffer offset
+ bufsink_.ResetBufferOffset();
+ return size;
+}
+
+/*
+ * dm-snap does prefetch reads while reading disk-exceptions.
+ * By default, prefetch value is set to 12; this means that
+ * dm-snap will issue 12 areas wherein each area is a 4k page
+ * of disk-exceptions.
+ *
+ * If during prefetch, if the chunk-id seen is beyond the
+ * actual number of metadata page, fill the buffer with zero.
+ * When dm-snap starts parsing the buffer, it will stop
+ * reading metadata page once the buffer content is zero.
+ */
+bool WorkerThread::ZerofillDiskExceptions(size_t read_size) {
+ size_t size = exceptions_per_area_ * sizeof(struct disk_exception);
+
+ if (read_size > size) {
+ return false;
+ }
+
+ void* buffer = bufsink_.GetPayloadBuffer(size);
+ CHECK(buffer != nullptr);
+
+ memset(buffer, 0, size);
+ return true;
+}
+
+/*
+ * A disk exception is a simple mapping of old_chunk to new_chunk.
+ * When dm-snapshot device is created, kernel requests these mapping.
+ *
+ * Each disk exception is of size 16 bytes. Thus a single 4k page can
+ * have:
+ *
+ * exceptions_per_area_ = 4096/16 = 256. This entire 4k page
+ * is considered a metadata page and it is represented by chunk ID.
+ *
+ * Convert the chunk ID to index into the vector which gives us
+ * the metadata page.
+ */
+bool WorkerThread::ReadDiskExceptions(chunk_t chunk, size_t read_size) {
+ uint32_t stride = exceptions_per_area_ + 1;
+ size_t size;
+ const std::vector<std::unique_ptr<uint8_t[]>>& vec = snapuserd_->GetMetadataVec();
+
+ // ChunkID to vector index
+ lldiv_t divresult = lldiv(chunk, stride);
+
+ if (divresult.quot < vec.size()) {
+ size = exceptions_per_area_ * sizeof(struct disk_exception);
+
+ CHECK(read_size == size);
+
+ void* buffer = bufsink_.GetPayloadBuffer(size);
+ CHECK(buffer != nullptr);
+
+ memcpy(buffer, vec[divresult.quot].get(), size);
+ } else {
+ return ZerofillDiskExceptions(read_size);
+ }
+
+ return true;
+}
+
+loff_t WorkerThread::GetMergeStartOffset(void* merged_buffer, void* unmerged_buffer,
+ int* unmerged_exceptions) {
+ loff_t offset = 0;
+ *unmerged_exceptions = 0;
+
+ while (*unmerged_exceptions <= exceptions_per_area_) {
+ struct disk_exception* merged_de =
+ reinterpret_cast<struct disk_exception*>((char*)merged_buffer + offset);
+ struct disk_exception* cow_de =
+ reinterpret_cast<struct disk_exception*>((char*)unmerged_buffer + offset);
+
+ // Unmerged op by the kernel
+ if (merged_de->old_chunk != 0 || merged_de->new_chunk != 0) {
+ CHECK(merged_de->old_chunk == cow_de->old_chunk);
+ CHECK(merged_de->new_chunk == cow_de->new_chunk);
+
+ offset += sizeof(struct disk_exception);
+ *unmerged_exceptions += 1;
+ continue;
+ }
+
+ break;
+ }
+
+ CHECK(!(*unmerged_exceptions == exceptions_per_area_));
+
+ SNAP_LOG(DEBUG) << "Unmerged_Exceptions: " << *unmerged_exceptions << " Offset: " << offset;
+ return offset;
+}
+
+int WorkerThread::GetNumberOfMergedOps(void* merged_buffer, void* unmerged_buffer, loff_t offset,
+ int unmerged_exceptions) {
+ int merged_ops_cur_iter = 0;
+ std::map<sector_t, const CowOperation*>& chunk_map = snapuserd_->GetChunkMap();
+
+ // Find the operations which are merged in this cycle.
+ while ((unmerged_exceptions + merged_ops_cur_iter) < exceptions_per_area_) {
+ struct disk_exception* merged_de =
+ reinterpret_cast<struct disk_exception*>((char*)merged_buffer + offset);
+ struct disk_exception* cow_de =
+ reinterpret_cast<struct disk_exception*>((char*)unmerged_buffer + offset);
+
+ CHECK(merged_de->new_chunk == 0);
+ CHECK(merged_de->old_chunk == 0);
+
+ if (cow_de->new_chunk != 0) {
+ merged_ops_cur_iter += 1;
+ offset += sizeof(struct disk_exception);
+ const CowOperation* cow_op = chunk_map[ChunkToSector(cow_de->new_chunk)];
+ CHECK(cow_op != nullptr);
+
+ CHECK(cow_op->new_block == cow_de->old_chunk);
+ // zero out to indicate that operation is merged.
+ cow_de->old_chunk = 0;
+ cow_de->new_chunk = 0;
+ } else if (cow_de->old_chunk == 0) {
+ // Already merged op in previous iteration or
+ // This could also represent a partially filled area.
+ //
+ // If the op was merged in previous cycle, we don't have
+ // to count them.
+ CHECK(cow_de->new_chunk == 0);
+ break;
+ } else {
+ SNAP_LOG(ERROR) << "Error in merge operation. Found invalid metadata: "
+ << " merged_de-old-chunk: " << merged_de->old_chunk
+ << " merged_de-new-chunk: " << merged_de->new_chunk
+ << " cow_de-old-chunk: " << cow_de->old_chunk
+ << " cow_de-new-chunk: " << cow_de->new_chunk
+ << " unmerged_exceptions: " << unmerged_exceptions
+ << " merged_ops_cur_iter: " << merged_ops_cur_iter
+ << " offset: " << offset;
+ return -1;
+ }
+ }
+ return merged_ops_cur_iter;
+}
+
+bool WorkerThread::ProcessMergeComplete(chunk_t chunk, void* buffer) {
+ uint32_t stride = exceptions_per_area_ + 1;
+ const std::vector<std::unique_ptr<uint8_t[]>>& vec = snapuserd_->GetMetadataVec();
+
+ // ChunkID to vector index
+ lldiv_t divresult = lldiv(chunk, stride);
+ CHECK(divresult.quot < vec.size());
+ SNAP_LOG(DEBUG) << "ProcessMergeComplete: chunk: " << chunk
+ << " Metadata-Index: " << divresult.quot;
+
+ int unmerged_exceptions = 0;
+ loff_t offset = GetMergeStartOffset(buffer, vec[divresult.quot].get(), &unmerged_exceptions);
+
+ int merged_ops_cur_iter =
+ GetNumberOfMergedOps(buffer, vec[divresult.quot].get(), offset, unmerged_exceptions);
+
+ // There should be at least one operation merged in this cycle
+ CHECK(merged_ops_cur_iter > 0);
+ if (!snapuserd_->CommitMerge(merged_ops_cur_iter)) {
+ return false;
+ }
+
+ SNAP_LOG(DEBUG) << "Merge success: " << merged_ops_cur_iter << "chunk: " << chunk;
+ return true;
+}
+
+// Read Header from dm-user misc device. This gives
+// us the sector number for which IO is issued by dm-snapshot device
+bool WorkerThread::ReadDmUserHeader() {
+ if (!android::base::ReadFully(ctrl_fd_, bufsink_.GetBufPtr(), sizeof(struct dm_user_header))) {
+ if (errno != ENOTBLK) {
+ SNAP_PLOG(ERROR) << "Control-read failed";
+ }
+ return false;
+ }
+
+ return true;
+}
+
+// Send the payload/data back to dm-user misc device.
+bool WorkerThread::WriteDmUserPayload(size_t size) {
+ if (!android::base::WriteFully(ctrl_fd_, bufsink_.GetBufPtr(),
+ sizeof(struct dm_user_header) + size)) {
+ SNAP_PLOG(ERROR) << "Write to dm-user failed size: " << size;
+ return false;
+ }
+
+ return true;
+}
+
+bool WorkerThread::ReadDmUserPayload(void* buffer, size_t size) {
+ if (!android::base::ReadFully(ctrl_fd_, buffer, size)) {
+ SNAP_PLOG(ERROR) << "ReadDmUserPayload failed size: " << size;
+ return false;
+ }
+
+ return true;
+}
+
+bool WorkerThread::DmuserWriteRequest() {
+ struct dm_user_header* header = bufsink_.GetHeaderPtr();
+
+ // device mapper has the capability to allow
+ // targets to flush the cache when writes are completed. This
+ // is controlled by each target by a flag "flush_supported".
+ // This flag is set by dm-user. When flush is supported,
+ // a number of zero-length bio's will be submitted to
+ // the target for the purpose of flushing cache. It is the
+ // responsibility of the target driver - which is dm-user in this
+ // case, to remap these bio's to the underlying device. Since,
+ // there is no underlying device for dm-user, this zero length
+ // bio's gets routed to daemon.
+ //
+ // Flush operations are generated post merge by dm-snap by having
+ // REQ_PREFLUSH flag set. Snapuser daemon doesn't have anything
+ // to flush per se; hence, just respond back with a success message.
+ if (header->sector == 0) {
+ CHECK(header->len == 0);
+ header->type = DM_USER_RESP_SUCCESS;
+ if (!WriteDmUserPayload(0)) {
+ return false;
+ }
+ return true;
+ }
+
+ std::map<sector_t, const CowOperation*>& chunk_map = snapuserd_->GetChunkMap();
+ size_t remaining_size = header->len;
+ size_t read_size = std::min(PAYLOAD_SIZE, remaining_size);
+ CHECK(read_size == BLOCK_SZ) << "DmuserWriteRequest: read_size: " << read_size;
+
+ CHECK(header->sector > 0);
+ chunk_t chunk = SectorToChunk(header->sector);
+ CHECK(chunk_map.find(header->sector) == chunk_map.end());
+
+ void* buffer = bufsink_.GetPayloadBuffer(read_size);
+ CHECK(buffer != nullptr);
+ header->type = DM_USER_RESP_SUCCESS;
+
+ if (!ReadDmUserPayload(buffer, read_size)) {
+ SNAP_LOG(ERROR) << "ReadDmUserPayload failed for chunk id: " << chunk
+ << "Sector: " << header->sector;
+ header->type = DM_USER_RESP_ERROR;
+ }
+
+ if (header->type == DM_USER_RESP_SUCCESS && !ProcessMergeComplete(chunk, buffer)) {
+ SNAP_LOG(ERROR) << "ProcessMergeComplete failed for chunk id: " << chunk
+ << "Sector: " << header->sector;
+ header->type = DM_USER_RESP_ERROR;
+ } else {
+ SNAP_LOG(DEBUG) << "ProcessMergeComplete success for chunk id: " << chunk
+ << "Sector: " << header->sector;
+ }
+
+ if (!WriteDmUserPayload(0)) {
+ return false;
+ }
+
+ return true;
+}
+
+bool WorkerThread::DmuserReadRequest() {
+ struct dm_user_header* header = bufsink_.GetHeaderPtr();
+ size_t remaining_size = header->len;
+ loff_t offset = 0;
+ sector_t sector = header->sector;
+ std::map<sector_t, const CowOperation*>& chunk_map = snapuserd_->GetChunkMap();
+ do {
+ size_t read_size = std::min(PAYLOAD_SIZE, remaining_size);
+
+ int ret = read_size;
+ header->type = DM_USER_RESP_SUCCESS;
+ chunk_t chunk = SectorToChunk(header->sector);
+
+ // Request to sector 0 is always for kernel
+ // representation of COW header. This IO should be only
+ // once during dm-snapshot device creation. We should
+ // never see multiple IO requests. Additionally this IO
+ // will always be a single 4k.
+ if (header->sector == 0) {
+ CHECK(read_size == BLOCK_SZ) << " Sector 0 read request of size: " << read_size;
+ ConstructKernelCowHeader();
+ SNAP_LOG(DEBUG) << "Kernel header constructed";
+ } else {
+ if (!offset && (read_size == BLOCK_SZ) &&
+ chunk_map.find(header->sector) == chunk_map.end()) {
+ if (!ReadDiskExceptions(chunk, read_size)) {
+ SNAP_LOG(ERROR) << "ReadDiskExceptions failed for chunk id: " << chunk
+ << "Sector: " << header->sector;
+ header->type = DM_USER_RESP_ERROR;
+ } else {
+ SNAP_LOG(DEBUG) << "ReadDiskExceptions success for chunk id: " << chunk
+ << "Sector: " << header->sector;
+ }
+ } else {
+ chunk_t num_sectors_read = (offset >> SECTOR_SHIFT);
+ ret = ReadData(sector + num_sectors_read, read_size);
+ if (ret < 0) {
+ SNAP_LOG(ERROR) << "ReadData failed for chunk id: " << chunk
+ << " Sector: " << (sector + num_sectors_read)
+ << " size: " << read_size << " header-len: " << header->len;
+ header->type = DM_USER_RESP_ERROR;
+ } else {
+ SNAP_LOG(DEBUG) << "ReadData success for chunk id: " << chunk
+ << "Sector: " << header->sector;
+ }
+ }
+ }
+
+ // Daemon will not be terminated if there is any error. We will
+ // just send the error back to dm-user.
+ if (!WriteDmUserPayload(ret)) {
+ return false;
+ }
+
+ remaining_size -= ret;
+ offset += ret;
+ } while (remaining_size > 0);
+
+ return true;
+}
+
+void WorkerThread::InitializeBufsink() {
+ // Allocate the buffer which is used to communicate between
+ // daemon and dm-user. The buffer comprises of header and a fixed payload.
+ // If the dm-user requests a big IO, the IO will be broken into chunks
+ // of PAYLOAD_SIZE.
+ size_t buf_size = sizeof(struct dm_user_header) + PAYLOAD_SIZE;
+ bufsink_.Initialize(buf_size);
+}
+
+bool WorkerThread::RunThread() {
+ InitializeBufsink();
+
+ if (!InitializeFds()) {
+ return false;
+ }
+
+ if (!InitReader()) {
+ return false;
+ }
+
+ // Start serving IO
+ while (true) {
+ if (!ProcessIORequest()) {
+ break;
+ }
+ }
+
+ CloseFds();
+ reader_->CloseCowFd();
+
+ return true;
+}
+
+bool WorkerThread::ProcessIORequest() {
+ struct dm_user_header* header = bufsink_.GetHeaderPtr();
+
+ if (!ReadDmUserHeader()) {
+ return false;
+ }
+
+ SNAP_LOG(DEBUG) << "msg->seq: " << std::hex << header->seq;
+ SNAP_LOG(DEBUG) << "msg->type: " << std::hex << header->type;
+ SNAP_LOG(DEBUG) << "msg->flags: " << std::hex << header->flags;
+ SNAP_LOG(DEBUG) << "msg->sector: " << std::hex << header->sector;
+ SNAP_LOG(DEBUG) << "msg->len: " << std::hex << header->len;
+
+ switch (header->type) {
+ case DM_USER_REQ_MAP_READ: {
+ if (!DmuserReadRequest()) {
+ return false;
+ }
+ break;
+ }
+
+ case DM_USER_REQ_MAP_WRITE: {
+ if (!DmuserWriteRequest()) {
+ return false;
+ }
+ break;
+ }
+ }
+
+ return true;
+}
+
+} // namespace snapshot
+} // namespace android