Merge "libsnapshot:Snapuserd: IO path support with dm-snapshot target"
diff --git a/fs_mgr/libsnapshot/Android.bp b/fs_mgr/libsnapshot/Android.bp
index be6db04..d11d3e4 100644
--- a/fs_mgr/libsnapshot/Android.bp
+++ b/fs_mgr/libsnapshot/Android.bp
@@ -148,12 +148,12 @@
recovery_available: true,
shared_libs: [
"libbase",
- "libcrypto",
"liblog",
],
static_libs: [
"libz",
],
+ ramdisk_available: true,
}
cc_library_static {
@@ -347,6 +347,9 @@
cc_defaults {
name: "snapuserd_defaults",
+ defaults: [
+ "fs_mgr_defaults",
+ ],
srcs: [
"snapuserd.cpp",
],
@@ -360,6 +363,8 @@
"libbase",
"liblog",
"libdm",
+ "libz",
+ "libsnapshot_cow",
],
}
@@ -375,7 +380,6 @@
ramdisk: true,
static_executable: true,
- system_shared_libs: [],
}
cc_test {
@@ -473,3 +477,32 @@
},
},
}
+
+cc_test {
+ name: "cow_snapuserd_test",
+ defaults: [
+ "fs_mgr_defaults",
+ ],
+ srcs: [
+ "cow_snapuserd_test.cpp",
+ ],
+ cflags: [
+ "-Wall",
+ "-Werror",
+ ],
+ shared_libs: [
+ "libbase",
+ "liblog",
+ "libz",
+ ],
+ static_libs: [
+ "libgtest",
+ "libsnapshot_cow",
+ ],
+ header_libs: [
+ "libstorage_literals_headers",
+ ],
+ test_min_api_level: 30,
+ auto_gen_config: true,
+ require_root: false,
+}
diff --git a/fs_mgr/libsnapshot/cow_reader.cpp b/fs_mgr/libsnapshot/cow_reader.cpp
index 86565c4..7f77aec 100644
--- a/fs_mgr/libsnapshot/cow_reader.cpp
+++ b/fs_mgr/libsnapshot/cow_reader.cpp
@@ -20,7 +20,6 @@
#include <android-base/file.h>
#include <android-base/logging.h>
#include <libsnapshot/cow_reader.h>
-#include <openssl/sha.h>
#include <zlib.h>
namespace android {
@@ -28,11 +27,13 @@
CowReader::CowReader() : fd_(-1), header_(), fd_size_(0) {}
-static void SHA256(const void* data, size_t length, uint8_t out[32]) {
+static void SHA256(const void*, size_t, uint8_t[]) {
+#if 0
SHA256_CTX c;
SHA256_Init(&c);
SHA256_Update(&c, data, length);
SHA256_Final(out, &c);
+#endif
}
bool CowReader::Parse(android::base::unique_fd&& fd) {
@@ -69,16 +70,35 @@
return false;
}
+ if (header_.magic != kCowMagicNumber) {
+ LOG(ERROR) << "Header Magic corrupted. Magic: " << header_.magic
+ << "Expected: " << kCowMagicNumber;
+ return false;
+ }
+
+ if ((header_.major_version != kCowVersionMajor) ||
+ (header_.minor_version != kCowVersionMinor)) {
+ LOG(ERROR) << "Header version mismatch";
+ LOG(ERROR) << "Major version: " << header_.major_version
+ << "Expected: " << kCowVersionMajor;
+ LOG(ERROR) << "Minor version: " << header_.minor_version
+ << "Expected: " << kCowVersionMinor;
+ return false;
+ }
+
uint8_t header_csum[32];
{
CowHeader tmp = header_;
memset(&tmp.header_checksum, 0, sizeof(tmp.header_checksum));
+ memset(header_csum, 0, sizeof(uint8_t) * 32);
+
SHA256(&tmp, sizeof(tmp), header_csum);
}
if (memcmp(header_csum, header_.header_checksum, sizeof(header_csum)) != 0) {
LOG(ERROR) << "header checksum is invalid";
return false;
}
+
return true;
}
@@ -140,6 +160,8 @@
}
uint8_t csum[32];
+ memset(csum, 0, sizeof(uint8_t) * 32);
+
SHA256(ops_buffer.get(), header_.ops_size, csum);
if (memcmp(csum, header_.ops_checksum, sizeof(csum)) != 0) {
LOG(ERROR) << "ops checksum does not match";
diff --git a/fs_mgr/libsnapshot/cow_snapuserd_test.cpp b/fs_mgr/libsnapshot/cow_snapuserd_test.cpp
new file mode 100644
index 0000000..d767022
--- /dev/null
+++ b/fs_mgr/libsnapshot/cow_snapuserd_test.cpp
@@ -0,0 +1,255 @@
+// Copyright (C) 2018 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fcntl.h>
+#include <linux/fs.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <iostream>
+#include <memory>
+#include <string_view>
+
+#include <android-base/file.h>
+#include <android-base/unique_fd.h>
+#include <gtest/gtest.h>
+#include <libsnapshot/cow_writer.h>
+#include <storage_literals/storage_literals.h>
+
+namespace android {
+namespace snapshot {
+
+using namespace android::storage_literals;
+using android::base::unique_fd;
+
+class SnapuserdTest : public ::testing::Test {
+ protected:
+ void SetUp() override {
+ cow_ = std::make_unique<TemporaryFile>();
+ ASSERT_GE(cow_->fd, 0) << strerror(errno);
+ }
+
+ void TearDown() override { cow_ = nullptr; }
+
+ std::unique_ptr<TemporaryFile> cow_;
+};
+
+TEST_F(SnapuserdTest, ReadWrite) {
+ loff_t offset = 0;
+ size_t size = 100_MiB;
+ unique_fd rnd_fd;
+ unique_fd sys_fd;
+ unique_fd snapshot_fd;
+ unique_fd system_a_fd;
+ std::string cmd;
+
+ rnd_fd.reset(open("/dev/random", O_RDONLY));
+ ASSERT_TRUE(rnd_fd > 0);
+
+ std::unique_ptr<uint8_t[]> random_buffer_1;
+ std::unique_ptr<uint8_t[]> random_buffer_2;
+ std::unique_ptr<uint8_t[]> system_buffer;
+
+ random_buffer_1 = std::make_unique<uint8_t[]>(size);
+
+ random_buffer_2 = std::make_unique<uint8_t[]>(size);
+
+ system_buffer = std::make_unique<uint8_t[]>(size);
+
+ // Fill random data
+ for (size_t j = 0; j < (size / 1_MiB); j++) {
+ ASSERT_EQ(ReadFullyAtOffset(rnd_fd, (char*)random_buffer_1.get() + offset, 1_MiB, 0), true);
+
+ ASSERT_EQ(ReadFullyAtOffset(rnd_fd, (char*)random_buffer_2.get() + offset, 1_MiB, 0), true);
+
+ offset += 1_MiB;
+ }
+
+ sys_fd.reset(open("/dev/block/mapper/system_a", O_RDONLY));
+ ASSERT_TRUE(sys_fd > 0);
+
+ // Read from system partition from offset 0 of size 100MB
+ ASSERT_EQ(ReadFullyAtOffset(sys_fd, system_buffer.get(), size, 0), true);
+
+ //================Create a COW file with the following operations===========
+ //
+ // Create COW file which is gz compressed
+ //
+ // 0-100 MB of replace operation with random data
+ // 100-200 MB of copy operation
+ // 200-300 MB of zero operation
+ // 300-400 MB of replace operation with random data
+
+ CowOptions options;
+ options.compression = "gz";
+ CowWriter writer(options);
+
+ ASSERT_TRUE(writer.Initialize(cow_->fd));
+
+ // Write 100MB random data to COW file which is gz compressed from block 0
+ ASSERT_TRUE(writer.AddRawBlocks(0, random_buffer_1.get(), size));
+
+ size_t num_blocks = size / options.block_size;
+ size_t blk_start_copy = num_blocks;
+ size_t blk_end_copy = blk_start_copy + num_blocks;
+ size_t source_blk = 0;
+
+ // Copy blocks - source_blk starts from 0 as snapuserd
+ // has to read from block 0 in system_a partition
+ //
+ // This initializes copy operation from block 0 of size 100 MB from
+ // /dev/block/mapper/system_a
+ for (size_t i = blk_start_copy; i < blk_end_copy; i++) {
+ ASSERT_TRUE(writer.AddCopy(i, source_blk));
+ source_blk += 1;
+ }
+
+ size_t blk_zero_copy_start = blk_end_copy;
+ size_t blk_zero_copy_end = blk_zero_copy_start + num_blocks;
+
+ // 100 MB filled with zeroes
+ ASSERT_TRUE(writer.AddZeroBlocks(blk_zero_copy_start, num_blocks));
+
+ // Final 100MB filled with random data which is gz compressed
+ size_t blk_random2_replace_start = blk_zero_copy_end;
+
+ ASSERT_TRUE(writer.AddRawBlocks(blk_random2_replace_start, random_buffer_2.get(), size));
+
+ // Flush operations
+ ASSERT_TRUE(writer.Finalize());
+
+ ASSERT_EQ(lseek(cow_->fd, 0, SEEK_SET), 0);
+
+ //================Setup dm-snapshot and start snapuserd daemon===========
+
+ // Create a COW device. Number of sectors is chosen random which can
+ // hold at least 400MB of data
+
+ system_a_fd.reset(open("/dev/block/mapper/system_a", O_RDONLY));
+ ASSERT_TRUE(system_a_fd > 0);
+
+ int blksize;
+ int err = ioctl(system_a_fd.get(), BLKGETSIZE, &blksize);
+ if (err < 0) {
+ ASSERT_TRUE(0);
+ }
+
+ cmd = "dmctl create system_cow user 0 " + std::to_string(blksize);
+ system(cmd.c_str());
+
+ // Start the snapuserd daemon
+ if (fork() == 0) {
+ const char* argv[] = {"/system/bin/snapuserd", cow_->path, "/dev/block/mapper/system_a",
+ nullptr};
+ if (execv(argv[0], const_cast<char**>(argv))) {
+ ASSERT_TRUE(0);
+ }
+ }
+
+ cmd.clear();
+
+ cmd = "dmctl create system-snapshot -ro snapshot 0 " + std::to_string(blksize);
+ cmd += " /dev/block/mapper/system_a /dev/block/mapper/system_cow ";
+ cmd += "P 8";
+ system(cmd.c_str());
+
+ // Wait so that snapshot device is created
+ sleep(5);
+ std::unique_ptr<uint8_t[]> snapuserd_buffer = std::make_unique<uint8_t[]>(size);
+
+ offset = 0;
+
+ snapshot_fd.reset(open("/dev/block/mapper/system-snapshot", O_RDONLY));
+ ASSERT_TRUE(snapshot_fd > 0);
+
+ //================Start IO operation on dm-snapshot device=================
+ // This will test the following paths:
+ //
+ // 1: IO path for all three operations and interleaving of operations.
+ // 2: Merging of blocks in kernel during metadata read
+ // 3: Bulk IO issued by kernel duing merge operation
+
+ // Read from snapshot device of size 100MB from offset 0. This tests the
+ // 1st replace operation.
+ //
+ // IO path:
+ //
+ // dm-snap->dm-snap-persistent->dm-user->snapuserd->read_compressed_cow (replace
+ // op)->decompress_cow->return
+
+ ASSERT_EQ(ReadFullyAtOffset(snapshot_fd, snapuserd_buffer.get(), size, offset), true);
+
+ // Update the offset
+ offset += size;
+
+ // Compare data with random_buffer_1.
+ ASSERT_EQ(memcmp(snapuserd_buffer.get(), random_buffer_1.get(), size), 0);
+
+ // Clear the buffer
+ memset(snapuserd_buffer.get(), 0, size);
+
+ // Read from snapshot device of size 100MB from offset 100MB. This tests the
+ // copy operation.
+ //
+ // IO path:
+ //
+ // dm-snap->dm-snap-persistent->dm-user->snapuserd->read_from_system_a_partition
+ // (copy op) -> return
+ ASSERT_EQ(ReadFullyAtOffset(snapshot_fd, snapuserd_buffer.get(), size, offset), true);
+
+ // Update the offset
+ offset += size;
+
+ // Compare data with system_buffer.
+ ASSERT_EQ(memcmp(snapuserd_buffer.get(), system_buffer.get(), size), 0);
+
+ // Read from snapshot device of size 100MB from offset 200MB. This tests the
+ // zero operation.
+ //
+ // IO path:
+ //
+ // dm-snap->dm-snap-persistent->dm-user->snapuserd->fill_memory_with_zero
+ // (zero op) -> return
+ ASSERT_EQ(ReadFullyAtOffset(snapshot_fd, snapuserd_buffer.get(), size, offset), true);
+
+ // Fill the random_buffer_1 with zero as we no longer need it
+ memset(random_buffer_1.get(), 0, size);
+
+ // Compare data with zero filled buffer
+ ASSERT_EQ(memcmp(snapuserd_buffer.get(), random_buffer_1.get(), size), 0);
+
+ // Update the offset
+ offset += size;
+
+ // Read from snapshot device of size 100MB from offset 300MB. This tests the
+ // final replace operation.
+ //
+ // IO path:
+ //
+ // dm-snap->dm-snap-persistent->dm-user->snapuserd->read_compressed_cow (replace
+ // op)->decompress_cow->return
+ ASSERT_EQ(ReadFullyAtOffset(snapshot_fd, snapuserd_buffer.get(), size, offset), true);
+
+ // Compare data with random_buffer_2.
+ ASSERT_EQ(memcmp(snapuserd_buffer.get(), random_buffer_2.get(), size), 0);
+}
+
+} // namespace snapshot
+} // namespace android
+
+int main(int argc, char** argv) {
+ ::testing::InitGoogleTest(&argc, argv);
+ return RUN_ALL_TESTS();
+}
diff --git a/fs_mgr/libsnapshot/cow_writer.cpp b/fs_mgr/libsnapshot/cow_writer.cpp
index ff43997..76238c2 100644
--- a/fs_mgr/libsnapshot/cow_writer.cpp
+++ b/fs_mgr/libsnapshot/cow_writer.cpp
@@ -23,7 +23,6 @@
#include <android-base/logging.h>
#include <android-base/unique_fd.h>
#include <libsnapshot/cow_writer.h>
-#include <openssl/sha.h>
#include <zlib.h>
namespace android {
@@ -179,11 +178,15 @@
return {};
}
-static void SHA256(const void* data, size_t length, uint8_t out[32]) {
+// TODO: Fix compilation issues when linking libcrypto library
+// when snapuserd is compiled as part of ramdisk.
+static void SHA256(const void*, size_t, uint8_t[]) {
+#if 0
SHA256_CTX c;
SHA256_Init(&c);
SHA256_Update(&c, data, length);
SHA256_Final(out, &c);
+#endif
}
bool CowWriter::Finalize() {
@@ -199,6 +202,9 @@
header_.ops_offset = offs;
header_.ops_size = ops_.size();
+ memset(header_.ops_checksum, 0, sizeof(uint8_t) * 32);
+ memset(header_.header_checksum, 0, sizeof(uint8_t) * 32);
+
SHA256(ops_.data(), ops_.size(), header_.ops_checksum);
SHA256(&header_, sizeof(header_), header_.header_checksum);
diff --git a/fs_mgr/libsnapshot/include/libsnapshot/cow_reader.h b/fs_mgr/libsnapshot/include/libsnapshot/cow_reader.h
index a3b1291..9e9f9b8 100644
--- a/fs_mgr/libsnapshot/include/libsnapshot/cow_reader.h
+++ b/fs_mgr/libsnapshot/include/libsnapshot/cow_reader.h
@@ -92,6 +92,10 @@
bool Parse(android::base::borrowed_fd fd);
bool GetHeader(CowHeader* header) override;
+
+ // Create a CowOpIter object which contains header_.num_ops
+ // CowOperation objects. Get() returns a unique CowOperation object
+ // whose lifeteime depends on the CowOpIter object
std::unique_ptr<ICowOpIter> GetOpIter() override;
bool GetRawBytes(uint64_t offset, void* buffer, size_t len) override;
bool ReadData(const CowOperation& op, IByteSink* sink) override;
diff --git a/fs_mgr/libsnapshot/include/libsnapshot/snapuserd.h b/fs_mgr/libsnapshot/include/libsnapshot/snapuserd.h
new file mode 100644
index 0000000..e757579
--- /dev/null
+++ b/fs_mgr/libsnapshot/include/libsnapshot/snapuserd.h
@@ -0,0 +1,99 @@
+// Copyright (C) 2020 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <stdint.h>
+
+namespace android {
+namespace snapshot {
+
+// Kernel COW header fields
+static constexpr uint32_t SNAP_MAGIC = 0x70416e53;
+
+static constexpr uint32_t SNAPSHOT_DISK_VERSION = 1;
+
+static constexpr uint32_t NUM_SNAPSHOT_HDR_CHUNKS = 1;
+
+static constexpr uint32_t SNAPSHOT_VALID = 1;
+
+/*
+ * The basic unit of block I/O is a sector. It is used in a number of contexts
+ * in Linux (blk, bio, genhd). The size of one sector is 512 = 2**9
+ * bytes. Variables of type sector_t represent an offset or size that is a
+ * multiple of 512 bytes. Hence these two constants.
+ */
+static constexpr uint32_t SECTOR_SHIFT = 9;
+
+typedef __u64 sector_t;
+typedef sector_t chunk_t;
+
+static constexpr uint32_t CHUNK_SIZE = 8;
+static constexpr uint32_t CHUNK_SHIFT = (__builtin_ffs(CHUNK_SIZE) - 1);
+
+static constexpr uint32_t BLOCK_SIZE = 4096;
+static constexpr uint32_t BLOCK_SHIFT = (__builtin_ffs(BLOCK_SIZE) - 1);
+
+// This structure represents the kernel COW header.
+// All the below fields should be in Little Endian format.
+struct disk_header {
+ uint32_t magic;
+
+ /*
+ * Is this snapshot valid. There is no way of recovering
+ * an invalid snapshot.
+ */
+ uint32_t valid;
+
+ /*
+ * Simple, incrementing version. no backward
+ * compatibility.
+ */
+ uint32_t version;
+
+ /* In sectors */
+ uint32_t chunk_size;
+} __packed;
+
+// A disk exception is a mapping of old_chunk to new_chunk
+// old_chunk is the chunk ID of a dm-snapshot device.
+// new_chunk is the chunk ID of the COW device.
+struct disk_exception {
+ uint64_t old_chunk;
+ uint64_t new_chunk;
+} __packed;
+
+// Control structures to communicate with dm-user
+// It comprises of header and a payload
+struct dm_user_header {
+ __u64 seq;
+ __u64 type;
+ __u64 flags;
+ __u64 sector;
+ __u64 len;
+ __u64 io_in_progress;
+} __attribute__((packed));
+
+struct dm_user_payload {
+ __u8 buf[];
+};
+
+// Message comprising both header and payload
+struct dm_user_message {
+ struct dm_user_header header;
+ struct dm_user_payload payload;
+};
+
+} // namespace snapshot
+} // namespace android
diff --git a/fs_mgr/libsnapshot/snapuserd.cpp b/fs_mgr/libsnapshot/snapuserd.cpp
index a6ff4fd..605af9b 100644
--- a/fs_mgr/libsnapshot/snapuserd.cpp
+++ b/fs_mgr/libsnapshot/snapuserd.cpp
@@ -15,102 +15,662 @@
*/
#include <linux/types.h>
+#include <stdlib.h>
+
+#include <cstring>
+#include <iostream>
+#include <limits>
+#include <string>
+#include <vector>
#include <android-base/file.h>
#include <android-base/logging.h>
#include <android-base/stringprintf.h>
#include <android-base/unique_fd.h>
#include <libdm/dm.h>
+#include <libsnapshot/cow_reader.h>
+#include <libsnapshot/cow_writer.h>
+#include <libsnapshot/snapuserd.h>
+namespace android {
+namespace snapshot {
+
+using namespace android::dm;
using android::base::unique_fd;
#define DM_USER_MAP_READ 0
#define DM_USER_MAP_WRITE 1
-struct dm_user_message {
- __u64 seq;
- __u64 type;
- __u64 flags;
- __u64 sector;
- __u64 len;
- __u8 buf[];
+static constexpr size_t PAYLOAD_SIZE = (1UL << 16);
+
+static_assert(PAYLOAD_SIZE >= BLOCK_SIZE);
+
+class BufferSink : public IByteSink {
+ public:
+ void Initialize(size_t size) {
+ buffer_size_ = size;
+ buffer_offset_ = 0;
+ buffer_ = std::make_unique<uint8_t[]>(size);
+ }
+
+ void* GetBufPtr() { return buffer_.get(); }
+
+ void Clear() { memset(GetBufPtr(), 0, buffer_size_); }
+
+ void* GetPayloadBuffer(size_t size) {
+ if ((buffer_size_ - buffer_offset_) < size) return nullptr;
+
+ char* buffer = reinterpret_cast<char*>(GetBufPtr());
+ struct dm_user_message* msg = (struct dm_user_message*)(&(buffer[0]));
+ return (char*)msg->payload.buf + buffer_offset_;
+ }
+
+ void* GetBuffer(size_t requested, size_t* actual) override {
+ void* buf = GetPayloadBuffer(requested);
+ if (!buf) {
+ *actual = 0;
+ return nullptr;
+ }
+ *actual = requested;
+ return buf;
+ }
+
+ void UpdateBufferOffset(size_t size) { buffer_offset_ += size; }
+
+ struct dm_user_header* GetHeaderPtr() {
+ CHECK(sizeof(struct dm_user_header) <= buffer_size_);
+ char* buf = reinterpret_cast<char*>(GetBufPtr());
+ struct dm_user_header* header = (struct dm_user_header*)(&(buf[0]));
+ return header;
+ }
+
+ bool ReturnData(void*, size_t) override { return true; }
+ void ResetBufferOffset() { buffer_offset_ = 0; }
+
+ private:
+ std::unique_ptr<uint8_t[]> buffer_;
+ loff_t buffer_offset_;
+ size_t buffer_size_;
};
-using namespace android::dm;
+class Snapuserd final {
+ public:
+ Snapuserd(const std::string& in_cow_device, const std::string& in_backing_store_device)
+ : in_cow_device_(in_cow_device),
+ in_backing_store_device_(in_backing_store_device),
+ metadata_read_done_(false) {}
-static int daemon_main(const std::string& device) {
- unique_fd block_fd(open(device.c_str(), O_RDWR));
- if (block_fd < 0) {
- PLOG(ERROR) << "Unable to open " << device;
- return 1;
+ int Run();
+ int ReadDmUserHeader();
+ int WriteDmUserPayload(size_t size);
+ int ConstructKernelCowHeader();
+ int ReadMetadata();
+ int ZerofillDiskExceptions(size_t read_size);
+ int ReadDiskExceptions(chunk_t chunk, size_t size);
+ int ReadData(chunk_t chunk, size_t size);
+
+ private:
+ int ProcessReplaceOp(const CowOperation* cow_op);
+ int ProcessCopyOp(const CowOperation* cow_op);
+ int ProcessZeroOp();
+
+ std::string in_cow_device_;
+ std::string in_backing_store_device_;
+
+ unique_fd cow_fd_;
+ unique_fd backing_store_fd_;
+ unique_fd ctrl_fd_;
+
+ uint32_t exceptions_per_area_;
+
+ std::unique_ptr<ICowOpIter> cowop_iter_;
+ std::unique_ptr<CowReader> reader_;
+
+ // Vector of disk exception which is a
+ // mapping of old-chunk to new-chunk
+ std::vector<std::unique_ptr<uint8_t[]>> vec_;
+
+ // Index - Chunk ID
+ // Value - cow operation
+ std::vector<const CowOperation*> chunk_vec_;
+
+ bool metadata_read_done_;
+ BufferSink bufsink_;
+};
+
+// Construct kernel COW header in memory
+// This header will be in sector 0. The IO
+// request will always be 4k. After constructing
+// the header, zero out the remaining block.
+int Snapuserd::ConstructKernelCowHeader() {
+ void* buffer = bufsink_.GetPayloadBuffer(BLOCK_SIZE);
+ CHECK(buffer != nullptr);
+
+ memset(buffer, 0, BLOCK_SIZE);
+
+ struct disk_header* dh = reinterpret_cast<struct disk_header*>(buffer);
+
+ dh->magic = SNAP_MAGIC;
+ dh->valid = SNAPSHOT_VALID;
+ dh->version = SNAPSHOT_DISK_VERSION;
+ dh->chunk_size = CHUNK_SIZE;
+
+ return BLOCK_SIZE;
+}
+
+// Start the replace operation. This will read the
+// internal COW format and if the block is compressed,
+// it will be de-compressed.
+int Snapuserd::ProcessReplaceOp(const CowOperation* cow_op) {
+ if (!reader_->ReadData(*cow_op, &bufsink_)) {
+ LOG(ERROR) << "ReadData failed for chunk: " << cow_op->new_block;
+ return -EIO;
}
- unique_fd ctrl_fd(open("/dev/dm-user", O_RDWR));
- if (ctrl_fd < 0) {
- PLOG(ERROR) << "Unable to open /dev/dm-user";
- return 1;
+ return BLOCK_SIZE;
+}
+
+// Start the copy operation. This will read the backing
+// block device which is represented by cow_op->source.
+int Snapuserd::ProcessCopyOp(const CowOperation* cow_op) {
+ void* buffer = bufsink_.GetPayloadBuffer(BLOCK_SIZE);
+ CHECK(buffer != nullptr);
+
+ // Issue a single 4K IO. However, this can be optimized
+ // if the successive blocks are contiguous.
+ if (!android::base::ReadFullyAtOffset(backing_store_fd_, buffer, BLOCK_SIZE,
+ cow_op->source * BLOCK_SIZE)) {
+ LOG(ERROR) << "Copy-op failed. Read from backing store at: " << cow_op->source;
+ return -1;
}
- size_t buf_size = 1UL << 16;
- auto buf = std::make_unique<char>(buf_size);
+ return BLOCK_SIZE;
+}
- /* Just keeps pumping messages between userspace and the kernel. We won't
- * actually be doing anything, but the sequence numbers line up so it'll at
- * least make forward progress. */
- while (true) {
- struct dm_user_message* msg = (struct dm_user_message*)buf.get();
+int Snapuserd::ProcessZeroOp() {
+ // Zero out the entire block
+ void* buffer = bufsink_.GetPayloadBuffer(BLOCK_SIZE);
+ CHECK(buffer != nullptr);
- memset(buf.get(), 0, buf_size);
+ memset(buffer, 0, BLOCK_SIZE);
+ return BLOCK_SIZE;
+}
- ssize_t readed = read(ctrl_fd.get(), buf.get(), buf_size);
- if (readed < 0) {
- PLOG(ERROR) << "Control read failed, trying with more space";
- buf_size *= 2;
- buf = std::make_unique<char>(buf_size);
- continue;
- }
+/*
+ * Read the data of size bytes from a given chunk.
+ *
+ * Kernel can potentially merge the blocks if the
+ * successive chunks are contiguous. For chunk size of 8,
+ * there can be 256 disk exceptions; and if
+ * all 256 disk exceptions are contiguous, kernel can merge
+ * them into a single IO.
+ *
+ * Since each chunk in the disk exception
+ * mapping represents a 4k block, kernel can potentially
+ * issue 256*4k = 1M IO in one shot.
+ *
+ * Even though kernel assumes that the blocks are
+ * contiguous, we need to split the 1M IO into 4k chunks
+ * as each operation represents 4k and it can either be:
+ *
+ * 1: Replace operation
+ * 2: Copy operation
+ * 3: Zero operation
+ *
+ */
+int Snapuserd::ReadData(chunk_t chunk, size_t size) {
+ int ret = 0;
- LOG(DEBUG) << android::base::StringPrintf("read() from dm-user returned %d bytes:",
- (int)readed);
- LOG(DEBUG) << android::base::StringPrintf(" msg->seq: 0x%016llx", msg->seq);
- LOG(DEBUG) << android::base::StringPrintf(" msg->type: 0x%016llx", msg->type);
- LOG(DEBUG) << android::base::StringPrintf(" msg->flags: 0x%016llx", msg->flags);
- LOG(DEBUG) << android::base::StringPrintf(" msg->sector: 0x%016llx", msg->sector);
- LOG(DEBUG) << android::base::StringPrintf(" msg->len: 0x%016llx", msg->len);
+ size_t read_size = size;
- switch (msg->type) {
- case DM_USER_MAP_READ: {
- LOG(DEBUG) << android::base::StringPrintf(
- "Responding to read of sector %lld with %lld bytes data", msg->sector,
- msg->len);
+ chunk_t chunk_key = chunk;
+ uint32_t stride;
+ lldiv_t divresult;
- if ((sizeof(*msg) + msg->len) > buf_size) {
- auto old_buf = std::move(buf);
- buf_size = sizeof(*msg) + msg->len;
- buf = std::make_unique<char>(buf_size);
- memcpy(buf.get(), old_buf.get(), sizeof(*msg));
- msg = (struct dm_user_message*)buf.get();
- }
+ // Size should always be aligned
+ CHECK((read_size & (BLOCK_SIZE - 1)) == 0);
- if (lseek(block_fd.get(), msg->sector * 512, SEEK_SET) < 0) {
- PLOG(ERROR) << "lseek failed: " << device;
- return 7;
- }
- if (!android::base::ReadFully(block_fd.get(), msg->buf, msg->len)) {
- PLOG(ERROR) << "read failed: " << device;
- return 7;
- }
+ while (read_size > 0) {
+ const CowOperation* cow_op = chunk_vec_[chunk_key];
+ CHECK(cow_op != nullptr);
+ int result;
- if (!android::base::WriteFully(ctrl_fd.get(), buf.get(), sizeof(*msg) + msg->len)) {
- PLOG(ERROR) << "write control failed";
- return 3;
- }
+ switch (cow_op->type) {
+ case kCowReplaceOp: {
+ result = ProcessReplaceOp(cow_op);
break;
}
- case DM_USER_MAP_WRITE:
+ case kCowZeroOp: {
+ result = ProcessZeroOp();
+ break;
+ }
+
+ case kCowCopyOp: {
+ result = ProcessCopyOp(cow_op);
+ break;
+ }
+
+ default: {
+ LOG(ERROR) << "Unknown operation-type found: " << cow_op->type;
+ ret = -EIO;
+ goto done;
+ }
+ }
+
+ if (result < 0) {
+ ret = result;
+ goto done;
+ }
+
+ // Update the buffer offset
+ bufsink_.UpdateBufferOffset(BLOCK_SIZE);
+
+ read_size -= BLOCK_SIZE;
+ ret += BLOCK_SIZE;
+
+ // Start iterating the chunk incrementally; Since while
+ // constructing the metadata, we know that the chunk IDs
+ // are contiguous
+ chunk_key += 1;
+
+ // This is similar to the way when chunk IDs were assigned
+ // in ReadMetadata().
+ //
+ // Skip if the chunk id represents a metadata chunk.
+ stride = exceptions_per_area_ + 1;
+ divresult = lldiv(chunk_key, stride);
+ if (divresult.rem == NUM_SNAPSHOT_HDR_CHUNKS) {
+ // Crossing exception boundary. Kernel will never
+ // issue IO which is spanning between a data chunk
+ // and a metadata chunk. This should be perfectly aligned.
+ //
+ // Since the input read_size is 4k aligned, we will
+ // always end up reading all 256 data chunks in one area.
+ // Thus, every multiple of 4K IO represents 256 data chunks
+ CHECK(read_size == 0);
+ break;
+ }
+ }
+
+done:
+
+ // Reset the buffer offset
+ bufsink_.ResetBufferOffset();
+ return ret;
+}
+
+/*
+ * dm-snap does prefetch reads while reading disk-exceptions.
+ * By default, prefetch value is set to 12; this means that
+ * dm-snap will issue 12 areas wherein each area is a 4k page
+ * of disk-exceptions.
+ *
+ * If during prefetch, if the chunk-id seen is beyond the
+ * actual number of metadata page, fill the buffer with zero.
+ * When dm-snap starts parsing the buffer, it will stop
+ * reading metadata page once the buffer content is zero.
+ */
+int Snapuserd::ZerofillDiskExceptions(size_t read_size) {
+ size_t size = exceptions_per_area_ * sizeof(struct disk_exception);
+
+ if (read_size > size) return -EINVAL;
+
+ void* buffer = bufsink_.GetPayloadBuffer(size);
+ CHECK(buffer != nullptr);
+
+ memset(buffer, 0, size);
+ return size;
+}
+
+/*
+ * A disk exception is a simple mapping of old_chunk to new_chunk.
+ * When dm-snapshot device is created, kernel requests these mapping.
+ *
+ * Each disk exception is of size 16 bytes. Thus a single 4k page can
+ * have:
+ *
+ * exceptions_per_area_ = 4096/16 = 256. This entire 4k page
+ * is considered a metadata page and it is represented by chunk ID.
+ *
+ * Convert the chunk ID to index into the vector which gives us
+ * the metadata page.
+ */
+int Snapuserd::ReadDiskExceptions(chunk_t chunk, size_t read_size) {
+ uint32_t stride = exceptions_per_area_ + 1;
+ size_t size;
+
+ // ChunkID to vector index
+ lldiv_t divresult = lldiv(chunk, stride);
+
+ if (divresult.quot < vec_.size()) {
+ size = exceptions_per_area_ * sizeof(struct disk_exception);
+
+ if (read_size > size) return -EINVAL;
+
+ void* buffer = bufsink_.GetPayloadBuffer(size);
+ CHECK(buffer != nullptr);
+
+ memcpy(buffer, vec_[divresult.quot].get(), size);
+ } else {
+ size = ZerofillDiskExceptions(read_size);
+ }
+
+ return size;
+}
+
+/*
+ * Read the metadata from COW device and
+ * construct the metadata as required by the kernel.
+ *
+ * Please see design on kernel COW format
+ *
+ * 1: Read the metadata from internal COW device
+ * 2: There are 3 COW operations:
+ * a: Replace op
+ * b: Copy op
+ * c: Zero op
+ * 3: For each of the 3 operations, op->new_block
+ * represents the block number in the base device
+ * for which one of the 3 operations have to be applied.
+ * This represents the old_chunk in the kernel COW format
+ * 4: We need to assign new_chunk for a corresponding old_chunk
+ * 5: The algorithm is similar to how kernel assigns chunk number
+ * while creating exceptions.
+ * 6: Use a monotonically increasing chunk number to assign the
+ * new_chunk
+ * 7: Each chunk-id represents either a: Metadata page or b: Data page
+ * 8: Chunk-id representing a data page is stored in a vector. Index is the
+ * chunk-id and value is the pointer to the CowOperation
+ * 9: Chunk-id representing a metadata page is converted into a vector
+ * index. We store this in vector as kernel requests metadata during
+ * two stage:
+ * a: When initial dm-snapshot device is created, kernel requests
+ * all the metadata and stores it in its internal data-structures.
+ * b: During merge, kernel once again requests the same metadata
+ * once-again.
+ * In both these cases, a quick lookup based on chunk-id is done.
+ * 10: When chunk number is incremented, we need to make sure that
+ * if the chunk is representing a metadata page and skip.
+ * 11: Each 4k page will contain 256 disk exceptions. We call this
+ * exceptions_per_area_
+ * 12: Kernel will stop issuing metadata IO request when new-chunk ID is 0.
+ */
+int Snapuserd::ReadMetadata() {
+ reader_ = std::make_unique<CowReader>();
+ CowHeader header;
+
+ if (!reader_->Parse(cow_fd_)) {
+ LOG(ERROR) << "Failed to parse";
+ return 1;
+ }
+
+ if (!reader_->GetHeader(&header)) {
+ LOG(ERROR) << "Failed to get header";
+ return 1;
+ }
+
+ CHECK(header.block_size == BLOCK_SIZE);
+
+ LOG(DEBUG) << "Num-ops: " << std::hex << header.num_ops;
+ LOG(DEBUG) << "ops-offset: " << std::hex << header.ops_offset;
+ LOG(DEBUG) << "ops-size: " << std::hex << header.ops_size;
+
+ cowop_iter_ = reader_->GetOpIter();
+
+ if (cowop_iter_ == nullptr) {
+ LOG(ERROR) << "Failed to get cowop_iter";
+ return 1;
+ }
+
+ exceptions_per_area_ = (CHUNK_SIZE << SECTOR_SHIFT) / sizeof(struct disk_exception);
+
+ // Start from chunk number 2. Chunk 0 represents header and chunk 1
+ // represents first metadata page.
+ chunk_t next_free = NUM_SNAPSHOT_HDR_CHUNKS + 1;
+ chunk_vec_.push_back(nullptr);
+ chunk_vec_.push_back(nullptr);
+
+ loff_t offset = 0;
+ std::unique_ptr<uint8_t[]> de_ptr =
+ std::make_unique<uint8_t[]>(exceptions_per_area_ * sizeof(struct disk_exception));
+
+ // This memset is important. Kernel will stop issuing IO when new-chunk ID
+ // is 0. When Area is not filled completely will all 256 exceptions,
+ // this memset will ensure that metadata read is completed.
+ memset(de_ptr.get(), 0, (exceptions_per_area_ * sizeof(struct disk_exception)));
+ size_t num_ops = 0;
+
+ while (!cowop_iter_->Done()) {
+ const CowOperation* cow_op = &cowop_iter_->Get();
+ struct disk_exception* de =
+ reinterpret_cast<struct disk_exception*>((char*)de_ptr.get() + offset);
+
+ if (!(cow_op->type == kCowReplaceOp || cow_op->type == kCowZeroOp ||
+ cow_op->type == kCowCopyOp)) {
+ LOG(ERROR) << "Unknown operation-type found: " << cow_op->type;
+ return 1;
+ }
+
+ // Construct the disk-exception
+ de->old_chunk = cow_op->new_block;
+ de->new_chunk = next_free;
+
+ LOG(DEBUG) << "Old-chunk: " << de->old_chunk << "New-chunk: " << de->new_chunk;
+
+ // Store operation pointer. Note, new-chunk ID is the index
+ chunk_vec_.push_back(cow_op);
+ CHECK(next_free == (chunk_vec_.size() - 1));
+
+ offset += sizeof(struct disk_exception);
+
+ cowop_iter_->Next();
+
+ // Find the next free chunk-id to be assigned. Check if the next free
+ // chunk-id represents a metadata page. If so, skip it.
+ next_free += 1;
+ uint32_t stride = exceptions_per_area_ + 1;
+ lldiv_t divresult = lldiv(next_free, stride);
+ num_ops += 1;
+
+ if (divresult.rem == NUM_SNAPSHOT_HDR_CHUNKS) {
+ CHECK(num_ops == exceptions_per_area_);
+ // Store it in vector at the right index. This maps the chunk-id to
+ // vector index.
+ vec_.push_back(std::move(de_ptr));
+ offset = 0;
+ num_ops = 0;
+
+ chunk_t metadata_chunk = (next_free - exceptions_per_area_ - NUM_SNAPSHOT_HDR_CHUNKS);
+
+ LOG(DEBUG) << "Area: " << vec_.size() - 1;
+ LOG(DEBUG) << "Metadata-chunk: " << metadata_chunk;
+ LOG(DEBUG) << "Sector number of Metadata-chunk: " << (metadata_chunk << CHUNK_SHIFT);
+
+ // Create buffer for next area
+ de_ptr = std::make_unique<uint8_t[]>(exceptions_per_area_ *
+ sizeof(struct disk_exception));
+ memset(de_ptr.get(), 0, (exceptions_per_area_ * sizeof(struct disk_exception)));
+
+ // Since this is a metadata, store at this index
+ chunk_vec_.push_back(nullptr);
+
+ // Find the next free chunk-id
+ next_free += 1;
+ if (cowop_iter_->Done()) {
+ vec_.push_back(std::move(de_ptr));
+ }
+ }
+ }
+
+ // Partially filled area
+ if (num_ops) {
+ LOG(DEBUG) << "Partially filled area num_ops: " << num_ops;
+ vec_.push_back(std::move(de_ptr));
+ }
+
+ return 0;
+}
+
+void MyLogger(android::base::LogId, android::base::LogSeverity severity, const char*, const char*,
+ unsigned int, const char* message) {
+ if (severity == android::base::ERROR) {
+ fprintf(stderr, "%s\n", message);
+ } else {
+ fprintf(stdout, "%s\n", message);
+ }
+}
+
+// Read Header from dm-user misc device. This gives
+// us the sector number for which IO is issued by dm-snapshot device
+int Snapuserd::ReadDmUserHeader() {
+ if (!android::base::ReadFully(ctrl_fd_, bufsink_.GetBufPtr(), sizeof(struct dm_user_header))) {
+ PLOG(ERROR) << "Control read failed";
+ return -1;
+ }
+
+ return sizeof(struct dm_user_header);
+}
+
+// Send the payload/data back to dm-user misc device.
+int Snapuserd::WriteDmUserPayload(size_t size) {
+ if (!android::base::WriteFully(ctrl_fd_, bufsink_.GetBufPtr(),
+ sizeof(struct dm_user_header) + size)) {
+ PLOG(ERROR) << "Write to dm-user failed";
+ return -1;
+ }
+
+ return sizeof(struct dm_user_header) + size;
+}
+
+// Start the daemon.
+// TODO: Handle signals
+int Snapuserd::Run() {
+ backing_store_fd_.reset(open(in_backing_store_device_.c_str(), O_RDONLY));
+ if (backing_store_fd_ < 0) {
+ LOG(ERROR) << "Open Failed: " << in_backing_store_device_;
+ return 1;
+ }
+
+ cow_fd_.reset(open(in_cow_device_.c_str(), O_RDWR));
+ if (cow_fd_ < 0) {
+ LOG(ERROR) << "Open Failed: " << in_cow_device_;
+ return 1;
+ }
+
+ // TODO: use UUID to support multiple partitions
+ ctrl_fd_.reset(open("/dev/dm-user", O_RDWR));
+ if (ctrl_fd_ < 0) {
+ LOG(ERROR) << "Unable to open /dev/dm-user";
+ return 1;
+ }
+
+ int ret = 0;
+
+ // Allocate the buffer which is used to communicate between
+ // daemon and dm-user. The buffer comprises of header and a fixed payload.
+ // If the dm-user requests a big IO, the IO will be broken into chunks
+ // of PAYLOAD_SIZE.
+ size_t buf_size = sizeof(struct dm_user_header) + PAYLOAD_SIZE;
+ bufsink_.Initialize(buf_size);
+
+ while (true) {
+ struct dm_user_header* header = bufsink_.GetHeaderPtr();
+
+ bufsink_.Clear();
+
+ ret = ReadDmUserHeader();
+ if (ret < 0) return ret;
+
+ LOG(DEBUG) << "dm-user returned " << ret << " bytes";
+
+ LOG(DEBUG) << "msg->seq: " << std::hex << header->seq;
+ LOG(DEBUG) << "msg->type: " << std::hex << header->type;
+ LOG(DEBUG) << "msg->flags: " << std::hex << header->flags;
+ LOG(DEBUG) << "msg->sector: " << std::hex << header->sector;
+ LOG(DEBUG) << "msg->len: " << std::hex << header->len;
+
+ switch (header->type) {
+ case DM_USER_MAP_READ: {
+ size_t remaining_size = header->len;
+ loff_t offset = 0;
+ header->io_in_progress = 0;
+ ret = 0;
+ do {
+ size_t read_size = std::min(PAYLOAD_SIZE, remaining_size);
+
+ // Request to sector 0 is always for kernel
+ // representation of COW header. This IO should be only
+ // once during dm-snapshot device creation. We should
+ // never see multiple IO requests. Additionally this IO
+ // will always be a single 4k.
+ if (header->sector == 0) {
+ // Read the metadata from internal COW device
+ // and build the in-memory data structures
+ // for all the operations in the internal COW.
+ if (!metadata_read_done_ && ReadMetadata()) {
+ LOG(ERROR) << "Metadata read failed";
+ return 1;
+ }
+ metadata_read_done_ = true;
+
+ CHECK(read_size == BLOCK_SIZE);
+ ret = ConstructKernelCowHeader();
+ if (ret < 0) return ret;
+ } else {
+ // Convert the sector number to a chunk ID.
+ //
+ // Check if the chunk ID represents a metadata
+ // page. If the chunk ID is not found in the
+ // vector, then it points to a metadata page.
+ chunk_t chunk = (header->sector >> CHUNK_SHIFT);
+
+ if (chunk >= chunk_vec_.size()) {
+ ret = ZerofillDiskExceptions(read_size);
+ if (ret < 0) {
+ LOG(ERROR) << "ZerofillDiskExceptions failed";
+ return ret;
+ }
+ } else if (chunk_vec_[chunk] == nullptr) {
+ ret = ReadDiskExceptions(chunk, read_size);
+ if (ret < 0) {
+ LOG(ERROR) << "ReadDiskExceptions failed";
+ return ret;
+ }
+ } else {
+ chunk_t num_chunks_read = (offset >> BLOCK_SHIFT);
+ ret = ReadData(chunk + num_chunks_read, read_size);
+ if (ret < 0) {
+ LOG(ERROR) << "ReadData failed";
+ return ret;
+ }
+ }
+ }
+
+ ssize_t written = WriteDmUserPayload(ret);
+ if (written < 0) return written;
+
+ remaining_size -= ret;
+ offset += ret;
+ if (remaining_size) {
+ LOG(DEBUG) << "Write done ret: " << ret
+ << " remaining size: " << remaining_size;
+ bufsink_.GetHeaderPtr()->io_in_progress = 1;
+ }
+ } while (remaining_size);
+
+ break;
+ }
+
+ case DM_USER_MAP_WRITE: {
+ // TODO: After merge operation is completed, kernel issues write
+ // to flush all the exception mappings where the merge is
+ // completed. If dm-user routes the WRITE IO, we need to clear
+ // in-memory data structures representing those exception
+ // mappings.
abort();
break;
+ }
}
LOG(DEBUG) << "read() finished, next message";
@@ -119,8 +679,12 @@
return 0;
}
+} // namespace snapshot
+} // namespace android
+
int main([[maybe_unused]] int argc, char** argv) {
android::base::InitLogging(argv, &android::base::KernelLogger);
- daemon_main(argv[1]);
- return 0;
+ android::snapshot::Snapuserd snapd(argv[1], argv[2]);
+
+ return snapd.Run();
}