Consider diff operation overhead.
Storing a diff operation has more overhead over replace operation in
the manifest, we need to store an additional src_sha256_hash which is
32 bytes and not compressible, and also src_extents which could use
anywhere from a few bytes to hundreds of bytes depending on the
number of extents.
We should consider this overhead when deciding whether to use a diff
operation over a replace operation, replace operation should be
prefered if payload size is similar, because they don't rely on
existing data on disk and they could be merged with other replace
operations.
Test: brillo_update_payload generate
Test: brillo_update_payload verify
Change-Id: I850df7baf72fdd1f5b70b22506ebacadb60db58b
diff --git a/payload_generator/delta_diff_utils.cc b/payload_generator/delta_diff_utils.cc
index 255de91..4ac299d 100644
--- a/payload_generator/delta_diff_utils.cc
+++ b/payload_generator/delta_diff_utils.cc
@@ -177,6 +177,34 @@
return removed_bytes;
}
+// Storing a diff operation has more overhead over replace operation in the
+// manifest, we need to store an additional src_sha256_hash which is 32 bytes
+// and not compressible, and also src_extents which could use anywhere from a
+// few bytes to hundreds of bytes depending on the number of extents.
+// This function evaluates the overhead tradeoff and determines if it's worth to
+// use a diff operation with data blob of |diff_size| and |num_src_extents|
+// extents over an existing |op| with data blob of |old_blob_size|.
+bool IsDiffOperationBetter(const InstallOperation& op,
+ size_t old_blob_size,
+ size_t diff_size,
+ size_t num_src_extents) {
+ if (!diff_utils::IsAReplaceOperation(op.type()))
+ return diff_size < old_blob_size;
+
+ // Reference: https://developers.google.com/protocol-buffers/docs/encoding
+ // For |src_sha256_hash| we need 1 byte field number/type, 1 byte size and 32
+ // bytes data, for |src_extents| we need 1 byte field number/type and 1 byte
+ // size.
+ constexpr size_t kDiffOverhead = 1 + 1 + 32 + 1 + 1;
+ // Each extent has two variable length encoded uint64, here we use a rough
+ // estimate of 6 bytes overhead per extent, since |num_blocks| is usually
+ // very small.
+ constexpr size_t kDiffOverheadPerExtent = 6;
+
+ return diff_size + kDiffOverhead + num_src_extents * kDiffOverheadPerExtent <
+ old_blob_size;
+}
+
} // namespace
namespace diff_utils {
@@ -800,7 +828,10 @@
? InstallOperation::SOURCE_COPY
: InstallOperation::MOVE);
data_blob = brillo::Blob();
- } else {
+ } else if (IsDiffOperationBetter(
+ operation, data_blob.size(), 0, src_extents.size())) {
+ // No point in trying diff if zero blob size diff operation is
+ // still worse than replace.
if (bsdiff_allowed) {
base::FilePath patch;
TEST_AND_RETURN_FALSE(base::CreateTemporaryFile(&patch));
@@ -831,7 +862,10 @@
TEST_AND_RETURN_FALSE(utils::ReadFile(patch.value(), &bsdiff_delta));
CHECK_GT(bsdiff_delta.size(), static_cast<brillo::Blob::size_type>(0));
- if (bsdiff_delta.size() < data_blob.size()) {
+ if (IsDiffOperationBetter(operation,
+ data_blob.size(),
+ bsdiff_delta.size(),
+ src_extents.size())) {
operation.set_type(operation_type);
data_blob = std::move(bsdiff_delta);
}
@@ -867,7 +901,10 @@
temp_file_path,
&puffdiff_delta));
TEST_AND_RETURN_FALSE(puffdiff_delta.size() > 0);
- if (puffdiff_delta.size() < data_blob.size()) {
+ if (IsDiffOperationBetter(operation,
+ data_blob.size(),
+ puffdiff_delta.size(),
+ src_extents.size())) {
operation.set_type(InstallOperation::PUFFDIFF);
data_blob = std::move(puffdiff_delta);
}
diff --git a/payload_generator/delta_diff_utils_unittest.cc b/payload_generator/delta_diff_utils_unittest.cc
index e32dde2..62cdfe9 100644
--- a/payload_generator/delta_diff_utils_unittest.cc
+++ b/payload_generator/delta_diff_utils_unittest.cc
@@ -468,6 +468,37 @@
EXPECT_EQ(InstallOperation::SOURCE_BSDIFF, op.type());
}
+TEST_F(DeltaDiffUtilsTest, PreferReplaceTest) {
+ brillo::Blob data_blob(kBlockSize);
+ vector<Extent> extents = {ExtentForRange(1, 1)};
+
+ // Write something in the first 50 bytes so that REPLACE_BZ will be slightly
+ // larger than BROTLI_BSDIFF.
+ std::iota(data_blob.begin(), data_blob.begin() + 50, 0);
+ EXPECT_TRUE(WriteExtents(old_part_.path, extents, kBlockSize, data_blob));
+ // Shift the first 50 bytes in the new file by one.
+ std::iota(data_blob.begin(), data_blob.begin() + 50, 1);
+ EXPECT_TRUE(WriteExtents(new_part_.path, extents, kBlockSize, data_blob));
+
+ brillo::Blob data;
+ InstallOperation op;
+ EXPECT_TRUE(diff_utils::ReadExtentsToDiff(
+ old_part_.path,
+ new_part_.path,
+ extents,
+ extents,
+ {}, // old_deflates
+ {}, // new_deflates
+ PayloadVersion(kMaxSupportedMajorPayloadVersion,
+ kMaxSupportedMinorPayloadVersion),
+ &data,
+ &op));
+
+ EXPECT_FALSE(data.empty());
+ EXPECT_TRUE(op.has_type());
+ EXPECT_EQ(InstallOperation::REPLACE_BZ, op.type());
+}
+
TEST_F(DeltaDiffUtilsTest, IsNoopOperationTest) {
InstallOperation op;
op.set_type(InstallOperation::REPLACE_BZ);