Restructure hash calculation in delta_performer

This patch moves DeltaPerformer::CalculateAndValidateHash to
fd_utils::ReadAndHashExtents and cleans up the code. It also adds
unittests for ReadAndHashExtents.

Bug: None
Test: unittest pass

Change-Id: I297cf79ef38a7495d5bcc0e6516a1ca783e505ea
Signed-off-by: Amin Hassani <ahassani@google.com>
diff --git a/payload_consumer/delta_performer.cc b/payload_consumer/delta_performer.cc
index 6540675..a392b40 100644
--- a/payload_consumer/delta_performer.cc
+++ b/payload_consumer/delta_performer.cc
@@ -1207,28 +1207,6 @@
   return true;
 }
 
-bool DeltaPerformer::CalculateAndValidateSourceHash(
-    const InstallOperation& operation, ErrorCode* error) {
-  const uint64_t kMaxBlocksToRead = 256;  // 1MB if block size is 4KB
-  auto total_blocks = utils::BlocksInExtents(operation.src_extents());
-  brillo::Blob buf(std::min(kMaxBlocksToRead, total_blocks) * block_size_);
-  DirectExtentReader reader;
-  TEST_AND_RETURN_FALSE(
-      reader.Init(source_fd_, operation.src_extents(), block_size_));
-  HashCalculator source_hasher;
-  while (total_blocks > 0) {
-    auto read_blocks = std::min(total_blocks, kMaxBlocksToRead);
-    TEST_AND_RETURN_FALSE(reader.Read(buf.data(), read_blocks * block_size_));
-    TEST_AND_RETURN_FALSE(
-        source_hasher.Update(buf.data(), read_blocks * block_size_));
-    total_blocks -= read_blocks;
-  }
-  TEST_AND_RETURN_FALSE(source_hasher.Finalize());
-  TEST_AND_RETURN_FALSE(ValidateSourceHash(
-      source_hasher.raw_hash(), operation, source_fd_, error));
-  return true;
-}
-
 namespace {
 
 class BsdiffExtentFile : public bsdiff::FileInterface {
@@ -1309,7 +1287,11 @@
     TEST_AND_RETURN_FALSE(operation.dst_length() % block_size_ == 0);
 
   if (operation.has_src_sha256_hash()) {
-    TEST_AND_RETURN_FALSE(CalculateAndValidateSourceHash(operation, error));
+    brillo::Blob source_hash;
+    TEST_AND_RETURN_FALSE(fd_utils::ReadAndHashExtents(
+        source_fd_, operation.src_extents(), block_size_, &source_hash));
+    TEST_AND_RETURN_FALSE(
+        ValidateSourceHash(source_hash, operation, source_fd_, error));
   }
 
   auto reader = std::make_unique<DirectExtentReader>();
@@ -1422,7 +1404,11 @@
   TEST_AND_RETURN_FALSE(buffer_.size() >= operation.data_length());
 
   if (operation.has_src_sha256_hash()) {
-    TEST_AND_RETURN_FALSE(CalculateAndValidateSourceHash(operation, error));
+    brillo::Blob source_hash;
+    TEST_AND_RETURN_FALSE(fd_utils::ReadAndHashExtents(
+        source_fd_, operation.src_extents(), block_size_, &source_hash));
+    TEST_AND_RETURN_FALSE(
+        ValidateSourceHash(source_hash, operation, source_fd_, error));
   }
 
   auto reader = std::make_unique<DirectExtentReader>();
diff --git a/payload_consumer/delta_performer.h b/payload_consumer/delta_performer.h
index 731e7f1..d5ca799 100644
--- a/payload_consumer/delta_performer.h
+++ b/payload_consumer/delta_performer.h
@@ -245,10 +245,6 @@
   // buffer.
   ErrorCode ValidateMetadataSignature(const brillo::Blob& payload);
 
-  // Calculates and validates the source hash of the operation |operation|.
-  bool CalculateAndValidateSourceHash(const InstallOperation& operation,
-                                      ErrorCode* error);
-
   // Returns true on success.
   bool PerformInstallOperation(const InstallOperation& operation);
 
diff --git a/payload_consumer/file_descriptor_utils.cc b/payload_consumer/file_descriptor_utils.cc
index 73f86df..4ffded2 100644
--- a/payload_consumer/file_descriptor_utils.cc
+++ b/payload_consumer/file_descriptor_utils.cc
@@ -33,7 +33,52 @@
 namespace {
 
 // Size of the buffer used to copy blocks.
-const int kMaxCopyBufferSize = 1024 * 1024;
+const uint64_t kMaxCopyBufferSize = 1024 * 1024;
+
+bool CommonHashExtents(FileDescriptorPtr source,
+                       const RepeatedPtrField<Extent>& src_extents,
+                       FileDescriptorPtr target,
+                       const RepeatedPtrField<Extent>& tgt_extents,
+                       uint64_t block_size,
+                       brillo::Blob* hash_out) {
+  auto total_blocks = utils::BlocksInExtents(src_extents);
+  auto buffer_blocks = kMaxCopyBufferSize / block_size;
+  // Ensure we copy at least one block at a time.
+  if (buffer_blocks < 1)
+    buffer_blocks = 1;
+  brillo::Blob buf(buffer_blocks * block_size);
+
+  DirectExtentReader reader;
+  TEST_AND_RETURN_FALSE(reader.Init(source, src_extents, block_size));
+  DirectExtentWriter writer;
+  if (target) {
+    TEST_AND_RETURN_FALSE(writer.Init(target, tgt_extents, block_size));
+    TEST_AND_RETURN_FALSE(total_blocks == utils::BlocksInExtents(tgt_extents));
+  }
+
+  HashCalculator source_hasher;
+  while (total_blocks > 0) {
+    auto read_blocks = std::min(total_blocks, buffer_blocks);
+    TEST_AND_RETURN_FALSE(reader.Read(buf.data(), read_blocks * block_size));
+    if (hash_out != nullptr) {
+      TEST_AND_RETURN_FALSE(
+          source_hasher.Update(buf.data(), read_blocks * block_size));
+    }
+    if (target) {
+      TEST_AND_RETURN_FALSE(writer.Write(buf.data(), read_blocks * block_size));
+    }
+    total_blocks -= read_blocks;
+  }
+  if (target) {
+    TEST_AND_RETURN_FALSE(writer.End());
+  }
+
+  if (hash_out != nullptr) {
+    TEST_AND_RETURN_FALSE(source_hasher.Finalize());
+    *hash_out = source_hasher.raw_hash();
+  }
+  return true;
+}
 
 }  // namespace
 
@@ -43,40 +88,21 @@
                         const RepeatedPtrField<Extent>& src_extents,
                         FileDescriptorPtr target,
                         const RepeatedPtrField<Extent>& tgt_extents,
-                        uint32_t block_size,
+                        uint64_t block_size,
                         brillo::Blob* hash_out) {
-  uint64_t total_blocks = utils::BlocksInExtents(src_extents);
-  TEST_AND_RETURN_FALSE(total_blocks == utils::BlocksInExtents(tgt_extents));
+  TEST_AND_RETURN_FALSE(target);
+  TEST_AND_RETURN_FALSE(CommonHashExtents(
+      source, src_extents, target, tgt_extents, block_size, hash_out));
+  return true;
+}
 
-  DirectExtentReader reader;
-  TEST_AND_RETURN_FALSE(reader.Init(source, src_extents, block_size));
-  DirectExtentWriter writer;
-  TEST_AND_RETURN_FALSE(writer.Init(target, tgt_extents, block_size));
-
-  uint64_t buffer_blocks = kMaxCopyBufferSize / block_size;
-  // Ensure we copy at least one block at a time.
-  if (buffer_blocks < 1)
-    buffer_blocks = 1;
-  brillo::Blob buf(buffer_blocks * block_size);
-
-  HashCalculator source_hasher;
-  uint64_t blocks_left = total_blocks;
-  while (blocks_left > 0) {
-    uint64_t read_blocks = std::min(blocks_left, buffer_blocks);
-    TEST_AND_RETURN_FALSE(reader.Read(buf.data(), read_blocks * block_size));
-    if (hash_out) {
-      TEST_AND_RETURN_FALSE(
-          source_hasher.Update(buf.data(), read_blocks * block_size));
-    }
-    TEST_AND_RETURN_FALSE(writer.Write(buf.data(), read_blocks * block_size));
-    blocks_left -= read_blocks;
-  }
-  TEST_AND_RETURN_FALSE(writer.End());
-
-  if (hash_out) {
-    TEST_AND_RETURN_FALSE(source_hasher.Finalize());
-    *hash_out = source_hasher.raw_hash();
-  }
+bool ReadAndHashExtents(FileDescriptorPtr source,
+                        const RepeatedPtrField<Extent>& extents,
+                        uint64_t block_size,
+                        brillo::Blob* hash_out) {
+  TEST_AND_RETURN_FALSE(hash_out != nullptr);
+  TEST_AND_RETURN_FALSE(
+      CommonHashExtents(source, extents, nullptr, {}, block_size, hash_out));
   return true;
 }
 
diff --git a/payload_consumer/file_descriptor_utils.h b/payload_consumer/file_descriptor_utils.h
index d1289d6..397c35e 100644
--- a/payload_consumer/file_descriptor_utils.h
+++ b/payload_consumer/file_descriptor_utils.h
@@ -39,7 +39,17 @@
     const google::protobuf::RepeatedPtrField<Extent>& src_extents,
     FileDescriptorPtr target,
     const google::protobuf::RepeatedPtrField<Extent>& tgt_extents,
-    uint32_t block_size,
+    uint64_t block_size,
+    brillo::Blob* hash_out);
+
+// Reads blocks from |source| and caculates the hash. The blocks to read are
+// specified by |extents|. Stores the hash in |hash_out| if it is not null. The
+// block sizes are passed as |block_size|. In case of error reading, it returns
+// false and the value pointed by |hash_out| is undefined.
+bool ReadAndHashExtents(
+    FileDescriptorPtr source,
+    const google::protobuf::RepeatedPtrField<Extent>& extents,
+    uint64_t block_size,
     brillo::Blob* hash_out);
 
 }  // namespace fd_utils
diff --git a/payload_consumer/file_descriptor_utils_unittest.cc b/payload_consumer/file_descriptor_utils_unittest.cc
index 8ba8ce6..79d2184 100644
--- a/payload_consumer/file_descriptor_utils_unittest.cc
+++ b/payload_consumer/file_descriptor_utils_unittest.cc
@@ -167,4 +167,32 @@
   EXPECT_EQ(expected_hash, hash_out);
 }
 
+// Failing to read from the source should fail the hash calculation.
+TEST_F(FileDescriptorUtilsTest, ReadAndHashExtentsReadFailureTest) {
+  auto extents = CreateExtentList({{0, 5}});
+  fake_source_->AddFailureRange(10, 5);
+  brillo::Blob hash_out;
+  EXPECT_FALSE(fd_utils::ReadAndHashExtents(source_, extents, 4, &hash_out));
+}
+
+// Test that if hash_out is null, then it should fail.
+TEST_F(FileDescriptorUtilsTest, ReadAndHashExtentsWithoutHashingTest) {
+  auto extents = CreateExtentList({{0, 5}});
+  EXPECT_FALSE(fd_utils::ReadAndHashExtents(source_, extents, 4, nullptr));
+}
+
+// Tests that it can calculate the hash properly.
+TEST_F(FileDescriptorUtilsTest, ReadAndHashExtentsTest) {
+  // Reorder the input as 1 4 2 3 0.
+  auto extents = CreateExtentList({{1, 1}, {4, 1}, {2, 2}, {0, 1}});
+  brillo::Blob hash_out;
+  EXPECT_TRUE(fd_utils::ReadAndHashExtents(source_, extents, 4, &hash_out));
+
+  const char kExpectedResult[] = "00010004000200030000";
+  brillo::Blob expected_hash;
+  EXPECT_TRUE(HashCalculator::RawHashOfBytes(
+      kExpectedResult, strlen(kExpectedResult), &expected_hash));
+  EXPECT_EQ(expected_hash, hash_out);
+}
+
 }  // namespace chromeos_update_engine