Merge "libsnapshot:snapuserd: Handle un-aligned IO request"
diff --git a/fs_mgr/libsnapshot/cow_snapuserd_test.cpp b/fs_mgr/libsnapshot/cow_snapuserd_test.cpp
index ed67a1c..4cc0fd3 100644
--- a/fs_mgr/libsnapshot/cow_snapuserd_test.cpp
+++ b/fs_mgr/libsnapshot/cow_snapuserd_test.cpp
@@ -239,16 +239,12 @@
ASSERT_TRUE(rnd_fd > 0);
std::unique_ptr<uint8_t[]> random_buffer_1_ = std::make_unique<uint8_t[]>(size_);
- std::unique_ptr<uint8_t[]> random_buffer_2_ = std::make_unique<uint8_t[]>(size_);
// Fill random data
for (size_t j = 0; j < (size_ / 1_MiB); j++) {
ASSERT_EQ(ReadFullyAtOffset(rnd_fd, (char*)random_buffer_1_.get() + offset, 1_MiB, 0),
true);
- ASSERT_EQ(ReadFullyAtOffset(rnd_fd, (char*)random_buffer_2_.get() + offset, 1_MiB, 0),
- true);
-
offset += 1_MiB;
}
@@ -280,7 +276,7 @@
size_t blk_random2_replace_start = blk_zero_copy_end;
- ASSERT_TRUE(writer.AddRawBlocks(blk_random2_replace_start, random_buffer_2_.get(), size_));
+ ASSERT_TRUE(writer.AddRawBlocks(blk_random2_replace_start, random_buffer_1_.get(), size_));
// Flush operations
ASSERT_TRUE(writer.Finalize());
@@ -292,7 +288,7 @@
ASSERT_EQ(android::base::ReadFullyAtOffset(base_fd_, orig_buffer_.get(), size_, size_), true);
memcpy((char*)orig_buffer_.get() + size_, random_buffer_1_.get(), size_);
memcpy((char*)orig_buffer_.get() + (size_ * 2), (void*)zero_buffer.c_str(), size_);
- memcpy((char*)orig_buffer_.get() + (size_ * 3), random_buffer_2_.get(), size_);
+ memcpy((char*)orig_buffer_.get() + (size_ * 3), random_buffer_1_.get(), size_);
}
void CowSnapuserdTest::InitCowDevice() {
diff --git a/fs_mgr/libsnapshot/include/libsnapshot/snapuserd_kernel.h b/fs_mgr/libsnapshot/include/libsnapshot/snapuserd_kernel.h
index e8dbe6e..7941e68 100644
--- a/fs_mgr/libsnapshot/include/libsnapshot/snapuserd_kernel.h
+++ b/fs_mgr/libsnapshot/include/libsnapshot/snapuserd_kernel.h
@@ -50,6 +50,8 @@
static constexpr uint32_t BLOCK_SIZE = 4096;
static constexpr uint32_t BLOCK_SHIFT = (__builtin_ffs(BLOCK_SIZE) - 1);
+#define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d))
+
// This structure represents the kernel COW header.
// All the below fields should be in Little Endian format.
struct disk_header {
diff --git a/fs_mgr/libsnapshot/snapuserd.cpp b/fs_mgr/libsnapshot/snapuserd.cpp
index 4f94806..573b6a5 100644
--- a/fs_mgr/libsnapshot/snapuserd.cpp
+++ b/fs_mgr/libsnapshot/snapuserd.cpp
@@ -129,88 +129,126 @@
return true;
}
-/*
- * Read the data of size bytes from a given chunk.
- *
- * Kernel can potentially merge the blocks if the
- * successive chunks are contiguous. For chunk size of 8,
- * there can be 256 disk exceptions; and if
- * all 256 disk exceptions are contiguous, kernel can merge
- * them into a single IO.
- *
- * Since each chunk in the disk exception
- * mapping represents a 4k block, kernel can potentially
- * issue 256*4k = 1M IO in one shot.
- *
- * Even though kernel assumes that the blocks are
- * contiguous, we need to split the 1M IO into 4k chunks
- * as each operation represents 4k and it can either be:
- *
- * 1: Replace operation
- * 2: Copy operation
- * 3: Zero operation
- *
- */
-bool Snapuserd::ReadData(chunk_t chunk, size_t size) {
- size_t read_size = size;
- bool ret = true;
- chunk_t chunk_key = chunk;
+bool Snapuserd::ProcessCowOp(const CowOperation* cow_op) {
+ CHECK(cow_op != nullptr);
- if (!((read_size & (BLOCK_SIZE - 1)) == 0)) {
- SNAP_LOG(ERROR) << "ReadData - unaligned read_size: " << read_size;
- return false;
+ switch (cow_op->type) {
+ case kCowReplaceOp: {
+ return ProcessReplaceOp(cow_op);
+ }
+
+ case kCowZeroOp: {
+ return ProcessZeroOp();
+ }
+
+ case kCowCopyOp: {
+ return ProcessCopyOp(cow_op);
+ }
+
+ default: {
+ SNAP_LOG(ERROR) << "Unknown operation-type found: " << cow_op->type;
+ }
+ }
+ return false;
+}
+
+int Snapuserd::ReadUnalignedSector(sector_t sector, size_t size,
+ std::map<sector_t, const CowOperation*>::iterator& it) {
+ size_t skip_sector_size = 0;
+
+ SNAP_LOG(DEBUG) << "ReadUnalignedSector: sector " << sector << " size: " << size
+ << " Aligned sector: " << it->second;
+
+ if (!ProcessCowOp(it->second)) {
+ SNAP_LOG(ERROR) << "ReadUnalignedSector: " << sector << " failed";
+ return -1;
}
- while (read_size > 0) {
- const CowOperation* cow_op = chunk_map_[chunk_key];
- CHECK(cow_op != nullptr);
+ int num_sectors_skip = sector - it->first;
- switch (cow_op->type) {
- case kCowReplaceOp: {
- ret = ProcessReplaceOp(cow_op);
- break;
- }
+ if (num_sectors_skip > 0) {
+ skip_sector_size = num_sectors_skip << SECTOR_SHIFT;
+ char* buffer = reinterpret_cast<char*>(bufsink_.GetBufPtr());
+ struct dm_user_message* msg = (struct dm_user_message*)(&(buffer[0]));
- case kCowZeroOp: {
- ret = ProcessZeroOp();
- break;
- }
+ memmove(msg->payload.buf, (char*)msg->payload.buf + skip_sector_size,
+ (BLOCK_SIZE - skip_sector_size));
+ }
- case kCowCopyOp: {
- ret = ProcessCopyOp(cow_op);
- break;
- }
+ bufsink_.ResetBufferOffset();
+ return std::min(size, (BLOCK_SIZE - skip_sector_size));
+}
- default: {
- SNAP_LOG(ERROR) << "Unknown operation-type found: " << cow_op->type;
- ret = false;
- break;
- }
+/*
+ * Read the data for a given COW Operation.
+ *
+ * Kernel can issue IO at a sector granularity.
+ * Hence, an IO may end up with reading partial
+ * data from a COW operation or we may also
+ * end up with interspersed request between
+ * two COW operations.
+ *
+ */
+int Snapuserd::ReadData(sector_t sector, size_t size) {
+ /*
+ * chunk_map stores COW operation at 4k granularity.
+ * If the requested IO with the sector falls on the 4k
+ * boundary, then we can read the COW op directly without
+ * any issue.
+ *
+ * However, if the requested sector is not 4K aligned,
+ * then we will have the find the nearest COW operation
+ * and chop the 4K block to fetch the requested sector.
+ */
+ std::map<sector_t, const CowOperation*>::iterator it = chunk_map_.find(sector);
+ if (it == chunk_map_.end()) {
+ it = chunk_map_.lower_bound(sector);
+ if (it != chunk_map_.begin()) {
+ --it;
}
- if (!ret) {
- SNAP_LOG(ERROR) << "ReadData failed for operation: " << cow_op->type;
- return false;
- }
+ /*
+ * If the IO is spanned between two COW operations,
+ * split the IO into two parts:
+ *
+ * 1: Read the first part from the single COW op
+ * 2: Read the second part from the next COW op.
+ *
+ * Ex: Let's say we have a 1024 Bytes IO request.
+ *
+ * 0 COW OP-1 4096 COW OP-2 8192
+ * |******************|*******************|
+ * |*****|*****|
+ * 3584 4608
+ * <- 1024B - >
+ *
+ * We have two COW operations which are 4k blocks.
+ * The IO is requested for 1024 Bytes which are spanned
+ * between two COW operations. We will split this IO
+ * into two parts:
+ *
+ * 1: IO of size 512B from offset 3584 bytes (COW OP-1)
+ * 2: IO of size 512B from offset 4096 bytes (COW OP-2)
+ */
+ return ReadUnalignedSector(sector, size, it);
+ }
+ int num_ops = DIV_ROUND_UP(size, BLOCK_SIZE);
+ while (num_ops) {
+ if (!ProcessCowOp(it->second)) {
+ return -1;
+ }
+ num_ops -= 1;
+ it++;
// Update the buffer offset
bufsink_.UpdateBufferOffset(BLOCK_SIZE);
- read_size -= BLOCK_SIZE;
-
- // Start iterating the chunk incrementally; Since while
- // constructing the metadata, we know that the chunk IDs
- // are contiguous
- chunk_key += 1;
-
- if (cow_op->type == kCowCopyOp) {
- CHECK(read_size == 0);
- }
+ SNAP_LOG(DEBUG) << "ReadData at sector: " << sector << " size: " << size;
}
// Reset the buffer offset
bufsink_.ResetBufferOffset();
- return ret;
+ return size;
}
/*
@@ -261,9 +299,7 @@
if (divresult.quot < vec_.size()) {
size = exceptions_per_area_ * sizeof(struct disk_exception);
- if (read_size > size) {
- return false;
- }
+ CHECK(read_size == size);
void* buffer = bufsink_.GetPayloadBuffer(size);
CHECK(buffer != nullptr);
@@ -329,7 +365,7 @@
if (cow_de->new_chunk != 0) {
merged_ops_cur_iter += 1;
offset += sizeof(struct disk_exception);
- const CowOperation* cow_op = chunk_map_[cow_de->new_chunk];
+ const CowOperation* cow_op = chunk_map_[ChunkToSector(cow_de->new_chunk)];
CHECK(cow_op != nullptr);
CHECK(cow_op->new_block == cow_de->old_chunk);
if (cow_op->type == kCowCopyOp) {
@@ -563,7 +599,7 @@
SNAP_LOG(DEBUG) << "Old-chunk: " << de->old_chunk << "New-chunk: " << de->new_chunk;
// Store operation pointer.
- chunk_map_[data_chunk_id] = cow_op;
+ chunk_map_[ChunkToSector(data_chunk_id)] = cow_op;
num_ops += 1;
offset += sizeof(struct disk_exception);
cowop_riter_->Next();
@@ -680,6 +716,126 @@
return true;
}
+bool Snapuserd::DmuserWriteRequest() {
+ struct dm_user_header* header = bufsink_.GetHeaderPtr();
+
+ // device mapper has the capability to allow
+ // targets to flush the cache when writes are completed. This
+ // is controlled by each target by a flag "flush_supported".
+ // This flag is set by dm-user. When flush is supported,
+ // a number of zero-length bio's will be submitted to
+ // the target for the purpose of flushing cache. It is the
+ // responsibility of the target driver - which is dm-user in this
+ // case, to remap these bio's to the underlying device. Since,
+ // there is no underlying device for dm-user, this zero length
+ // bio's gets routed to daemon.
+ //
+ // Flush operations are generated post merge by dm-snap by having
+ // REQ_PREFLUSH flag set. Snapuser daemon doesn't have anything
+ // to flush per se; hence, just respond back with a success message.
+ if (header->sector == 0) {
+ CHECK(header->len == 0);
+ header->type = DM_USER_RESP_SUCCESS;
+ if (!WriteDmUserPayload(0)) {
+ return false;
+ }
+ return true;
+ }
+
+ size_t remaining_size = header->len;
+ size_t read_size = std::min(PAYLOAD_SIZE, remaining_size);
+ CHECK(read_size == BLOCK_SIZE);
+
+ CHECK(header->sector > 0);
+ chunk_t chunk = SectorToChunk(header->sector);
+ CHECK(chunk_map_.find(header->sector) == chunk_map_.end());
+
+ void* buffer = bufsink_.GetPayloadBuffer(read_size);
+ CHECK(buffer != nullptr);
+ header->type = DM_USER_RESP_SUCCESS;
+
+ if (!ReadDmUserPayload(buffer, read_size)) {
+ SNAP_LOG(ERROR) << "ReadDmUserPayload failed for chunk id: " << chunk
+ << "Sector: " << header->sector;
+ header->type = DM_USER_RESP_ERROR;
+ }
+
+ if (header->type == DM_USER_RESP_SUCCESS && !ProcessMergeComplete(chunk, buffer)) {
+ SNAP_LOG(ERROR) << "ProcessMergeComplete failed for chunk id: " << chunk
+ << "Sector: " << header->sector;
+ header->type = DM_USER_RESP_ERROR;
+ } else {
+ SNAP_LOG(DEBUG) << "ProcessMergeComplete success for chunk id: " << chunk
+ << "Sector: " << header->sector;
+ }
+
+ if (!WriteDmUserPayload(0)) {
+ return false;
+ }
+
+ return true;
+}
+
+bool Snapuserd::DmuserReadRequest() {
+ struct dm_user_header* header = bufsink_.GetHeaderPtr();
+ size_t remaining_size = header->len;
+ loff_t offset = 0;
+ sector_t sector = header->sector;
+ do {
+ size_t read_size = std::min(PAYLOAD_SIZE, remaining_size);
+
+ int ret = read_size;
+ header->type = DM_USER_RESP_SUCCESS;
+ chunk_t chunk = SectorToChunk(header->sector);
+
+ // Request to sector 0 is always for kernel
+ // representation of COW header. This IO should be only
+ // once during dm-snapshot device creation. We should
+ // never see multiple IO requests. Additionally this IO
+ // will always be a single 4k.
+ if (header->sector == 0) {
+ CHECK(metadata_read_done_ == true);
+ CHECK(read_size == BLOCK_SIZE);
+ ConstructKernelCowHeader();
+ SNAP_LOG(DEBUG) << "Kernel header constructed";
+ } else {
+ if (!offset && (read_size == BLOCK_SIZE) &&
+ chunk_map_.find(header->sector) == chunk_map_.end()) {
+ if (!ReadDiskExceptions(chunk, read_size)) {
+ SNAP_LOG(ERROR) << "ReadDiskExceptions failed for chunk id: " << chunk
+ << "Sector: " << header->sector;
+ header->type = DM_USER_RESP_ERROR;
+ } else {
+ SNAP_LOG(DEBUG) << "ReadDiskExceptions success for chunk id: " << chunk
+ << "Sector: " << header->sector;
+ }
+ } else {
+ chunk_t num_sectors_read = (offset >> SECTOR_SHIFT);
+ ret = ReadData(sector + num_sectors_read, read_size);
+ if (ret < 0) {
+ SNAP_LOG(ERROR) << "ReadData failed for chunk id: " << chunk
+ << "Sector: " << header->sector;
+ header->type = DM_USER_RESP_ERROR;
+ } else {
+ SNAP_LOG(DEBUG) << "ReadData success for chunk id: " << chunk
+ << "Sector: " << header->sector;
+ }
+ }
+ }
+
+ // Daemon will not be terminated if there is any error. We will
+ // just send the error back to dm-user.
+ if (!WriteDmUserPayload(ret)) {
+ return false;
+ }
+
+ remaining_size -= ret;
+ offset += ret;
+ } while (remaining_size > 0);
+
+ return true;
+}
+
bool Snapuserd::Run() {
struct dm_user_header* header = bufsink_.GetHeaderPtr();
@@ -698,122 +854,16 @@
switch (header->type) {
case DM_USER_REQ_MAP_READ: {
- size_t remaining_size = header->len;
- loff_t offset = 0;
- do {
- size_t read_size = std::min(PAYLOAD_SIZE, remaining_size);
- header->type = DM_USER_RESP_SUCCESS;
-
- // Request to sector 0 is always for kernel
- // representation of COW header. This IO should be only
- // once during dm-snapshot device creation. We should
- // never see multiple IO requests. Additionally this IO
- // will always be a single 4k.
- if (header->sector == 0) {
- CHECK(metadata_read_done_ == true);
- CHECK(read_size == BLOCK_SIZE);
- ConstructKernelCowHeader();
- SNAP_LOG(DEBUG) << "Kernel header constructed";
- } else {
- // Convert the sector number to a chunk ID.
- //
- // Check if the chunk ID represents a metadata
- // page. If the chunk ID is not found in the
- // vector, then it points to a metadata page.
- chunk_t chunk = SectorToChunk(header->sector);
-
- if (chunk_map_.find(chunk) == chunk_map_.end()) {
- if (!ReadDiskExceptions(chunk, read_size)) {
- SNAP_LOG(ERROR) << "ReadDiskExceptions failed for chunk id: " << chunk
- << "Sector: " << header->sector;
- header->type = DM_USER_RESP_ERROR;
- } else {
- SNAP_LOG(DEBUG) << "ReadDiskExceptions success for chunk id: " << chunk
- << "Sector: " << header->sector;
- }
- } else {
- SNAP_LOG(DEBUG) << "ReadData: chunk: " << chunk << " len: " << header->len
- << " read_size: " << read_size << " offset: " << offset;
- chunk_t num_chunks_read = (offset >> BLOCK_SHIFT);
- if (!ReadData(chunk + num_chunks_read, read_size)) {
- SNAP_LOG(ERROR) << "ReadData failed for chunk id: " << chunk
- << "Sector: " << header->sector;
- header->type = DM_USER_RESP_ERROR;
- } else {
- SNAP_LOG(DEBUG) << "ReadData success for chunk id: " << chunk
- << "Sector: " << header->sector;
- }
- }
- }
-
- // Daemon will not be terminated if there is any error. We will
- // just send the error back to dm-user.
- if (!WriteDmUserPayload(read_size)) {
- return false;
- }
-
- remaining_size -= read_size;
- offset += read_size;
- } while (remaining_size);
-
+ if (!DmuserReadRequest()) {
+ return false;
+ }
break;
}
case DM_USER_REQ_MAP_WRITE: {
- // device mapper has the capability to allow
- // targets to flush the cache when writes are completed. This
- // is controlled by each target by a flag "flush_supported".
- // This flag is set by dm-user. When flush is supported,
- // a number of zero-length bio's will be submitted to
- // the target for the purpose of flushing cache. It is the
- // responsibility of the target driver - which is dm-user in this
- // case, to remap these bio's to the underlying device. Since,
- // there is no underlying device for dm-user, this zero length
- // bio's gets routed to daemon.
- //
- // Flush operations are generated post merge by dm-snap by having
- // REQ_PREFLUSH flag set. Snapuser daemon doesn't have anything
- // to flush per se; hence, just respond back with a success message.
- if (header->sector == 0) {
- CHECK(header->len == 0);
- header->type = DM_USER_RESP_SUCCESS;
- if (!WriteDmUserPayload(0)) {
- return false;
- }
- break;
- }
-
- size_t remaining_size = header->len;
- size_t read_size = std::min(PAYLOAD_SIZE, remaining_size);
- CHECK(read_size == BLOCK_SIZE);
-
- CHECK(header->sector > 0);
- chunk_t chunk = SectorToChunk(header->sector);
- CHECK(chunk_map_.find(chunk) == chunk_map_.end());
-
- void* buffer = bufsink_.GetPayloadBuffer(read_size);
- CHECK(buffer != nullptr);
- header->type = DM_USER_RESP_SUCCESS;
-
- if (!ReadDmUserPayload(buffer, read_size)) {
- SNAP_LOG(ERROR) << "ReadDmUserPayload failed for chunk id: " << chunk
- << "Sector: " << header->sector;
- header->type = DM_USER_RESP_ERROR;
- }
-
- if (header->type == DM_USER_RESP_SUCCESS && !ProcessMergeComplete(chunk, buffer)) {
- SNAP_LOG(ERROR) << "ProcessMergeComplete failed for chunk id: " << chunk
- << "Sector: " << header->sector;
- header->type = DM_USER_RESP_ERROR;
- } else {
- SNAP_LOG(DEBUG) << "ProcessMergeComplete success for chunk id: " << chunk
- << "Sector: " << header->sector;
- }
-
- if (!WriteDmUserPayload(0)) {
+ if (!DmuserWriteRequest()) {
return false;
}
-
break;
}
}
diff --git a/fs_mgr/libsnapshot/snapuserd.h b/fs_mgr/libsnapshot/snapuserd.h
index eec6d45..c01fee3 100644
--- a/fs_mgr/libsnapshot/snapuserd.h
+++ b/fs_mgr/libsnapshot/snapuserd.h
@@ -22,9 +22,9 @@
#include <cstring>
#include <iostream>
#include <limits>
+#include <map>
#include <string>
#include <thread>
-#include <unordered_map>
#include <vector>
#include <android-base/file.h>
@@ -72,6 +72,9 @@
bool IsAttached() const { return ctrl_fd_ >= 0; }
private:
+ bool DmuserReadRequest();
+ bool DmuserWriteRequest();
+
bool ReadDmUserHeader();
bool ReadDmUserPayload(void* buffer, size_t size);
bool WriteDmUserPayload(size_t size);
@@ -79,10 +82,13 @@
bool ReadMetadata();
bool ZerofillDiskExceptions(size_t read_size);
bool ReadDiskExceptions(chunk_t chunk, size_t size);
- bool ReadData(chunk_t chunk, size_t size);
+ int ReadUnalignedSector(sector_t sector, size_t size,
+ std::map<sector_t, const CowOperation*>::iterator& it);
+ int ReadData(sector_t sector, size_t size);
bool IsChunkIdMetadata(chunk_t chunk);
chunk_t GetNextAllocatableChunkId(chunk_t chunk_id);
+ bool ProcessCowOp(const CowOperation* cow_op);
bool ProcessReplaceOp(const CowOperation* cow_op);
bool ProcessCopyOp(const CowOperation* cow_op);
bool ProcessZeroOp();
@@ -94,6 +100,7 @@
bool ProcessMergeComplete(chunk_t chunk, void* buffer);
sector_t ChunkToSector(chunk_t chunk) { return chunk << CHUNK_SHIFT; }
chunk_t SectorToChunk(sector_t sector) { return sector >> CHUNK_SHIFT; }
+ bool IsBlockAligned(int read_size) { return ((read_size & (BLOCK_SIZE - 1)) == 0); }
std::string cow_device_;
std::string backing_store_device_;
@@ -116,9 +123,15 @@
// mapping of old-chunk to new-chunk
std::vector<std::unique_ptr<uint8_t[]>> vec_;
- // Key - Chunk ID
+ // Key - Sector
// Value - cow operation
- std::unordered_map<chunk_t, const CowOperation*> chunk_map_;
+ //
+ // chunk_map stores the pseudo mapping of sector
+ // to COW operations. Each COW op is 4k; however,
+ // we can get a read request which are as small
+ // as 512 bytes. Hence, we need to binary search
+ // in the chunk_map to find the nearest COW op.
+ std::map<sector_t, const CowOperation*> chunk_map_;
bool metadata_read_done_ = false;
BufferSink bufsink_;