Improve the control flow of generating diff patches
Factor out the logic that generates the smallest diff patches.
And make it easier to extend with new diff algorithms.
Test: TH
Change-Id: Ieecb1703cb6591afc83b297081c568cb76f1fdfe
diff --git a/payload_generator/delta_diff_utils.cc b/payload_generator/delta_diff_utils.cc
index b4b49e6..f3bc0bb 100644
--- a/payload_generator/delta_diff_utils.cc
+++ b/payload_generator/delta_diff_utils.cc
@@ -30,6 +30,7 @@
#include <algorithm>
#include <functional>
+#include <limits>
#include <list>
#include <map>
#include <memory>
@@ -134,10 +135,10 @@
return distances.back();
}
-static bool ShouldCreateNewOp(const std::vector<CowMergeOperation>& ops,
- size_t src_block,
- size_t dst_block,
- size_t src_offset) {
+bool ShouldCreateNewOp(const std::vector<CowMergeOperation>& ops,
+ size_t src_block,
+ size_t dst_block,
+ size_t src_offset) {
if (ops.empty()) {
return true;
}
@@ -151,10 +152,10 @@
dst_extent.start_block() + dst_extent.num_blocks() != dst_block;
}
-static void AppendXorBlock(std::vector<CowMergeOperation>* ops,
- size_t src_block,
- size_t dst_block,
- size_t src_offset) {
+void AppendXorBlock(std::vector<CowMergeOperation>* ops,
+ size_t src_block,
+ size_t dst_block,
+ size_t src_offset) {
CHECK_NE(src_block, std::numeric_limits<uint64_t>::max());
CHECK_NE(dst_block, std::numeric_limits<uint64_t>::max());
if (ShouldCreateNewOp(*ops, src_block, dst_block, src_offset)) {
@@ -177,6 +178,187 @@
} // namespace
namespace diff_utils {
+// A utility class that tries different algorithms and pick the patch with the
+// smallest size.
+class BestDiffGenerator {
+ public:
+ BestDiffGenerator(const brillo::Blob& old_data,
+ const brillo::Blob& new_data,
+ const std::vector<Extent>& src_extents,
+ const std::vector<Extent>& dst_extents,
+ const std::vector<puffin::BitExtent>& old_deflates,
+ const std::vector<puffin::BitExtent>& new_deflates,
+ const PayloadGenerationConfig& config)
+ : old_data_(old_data),
+ new_data_(new_data),
+ src_extents_(src_extents),
+ dst_extents_(dst_extents),
+ old_deflates_(old_deflates),
+ new_deflates_(new_deflates),
+ config_(config) {}
+
+ // Tries different algorithms and compares their patch sizes with
+ // |data_blob|. If the size is smaller, updates the operation type in |aop|
+ // and bytes in |data_blob|.
+ bool GenerateBestDiffOperation(AnnotatedOperation* aop,
+ brillo::Blob* data_blob);
+
+ private:
+ bool TryBsdiffAndUpdateOperation(InstallOperation_Type operation_type,
+ AnnotatedOperation* aop,
+ brillo::Blob* data_blob);
+ bool TryPuffdiffAndUpdateOperation(AnnotatedOperation* aop,
+ brillo::Blob* data_blob);
+
+ const brillo::Blob& old_data_;
+ const brillo::Blob& new_data_;
+ const std::vector<Extent>& src_extents_;
+ const std::vector<Extent>& dst_extents_;
+ const std::vector<puffin::BitExtent>& old_deflates_;
+ const std::vector<puffin::BitExtent>& new_deflates_;
+ const PayloadGenerationConfig& config_;
+};
+
+bool BestDiffGenerator::GenerateBestDiffOperation(AnnotatedOperation* aop,
+ brillo::Blob* data_blob) {
+ CHECK(aop);
+ CHECK(data_blob);
+
+ const auto& version = config_.version;
+ uint64_t input_bytes = utils::BlocksInExtents(src_extents_) * kBlockSize;
+ std::vector<std::pair<InstallOperation_Type, size_t>> diff_candidates = {
+ {InstallOperation::SOURCE_BSDIFF, kMaxBsdiffDestinationSize},
+ {InstallOperation::PUFFDIFF, kMaxPuffdiffDestinationSize},
+ };
+
+ for (auto [op_type, limit] : diff_candidates) {
+ if (!version.OperationAllowed(op_type)) {
+ continue;
+ }
+
+ // Disable the specific diff algorithm when the data is too big.
+ if (input_bytes > limit) {
+ LOG(INFO) << op_type << " ignored, data too big: " << input_bytes
+ << " bytes";
+ continue;
+ }
+
+ // Prefer BROTLI_BSDIFF as it gives smaller patch size.
+ if (op_type == InstallOperation::SOURCE_BSDIFF &&
+ version.OperationAllowed(InstallOperation::BROTLI_BSDIFF)) {
+ op_type = InstallOperation::BROTLI_BSDIFF;
+ }
+
+ switch (op_type) {
+ case InstallOperation::SOURCE_BSDIFF:
+ case InstallOperation::BROTLI_BSDIFF:
+ TEST_AND_RETURN_FALSE(
+ TryBsdiffAndUpdateOperation(op_type, aop, data_blob));
+ break;
+ case InstallOperation::PUFFDIFF:
+ TEST_AND_RETURN_FALSE(TryPuffdiffAndUpdateOperation(aop, data_blob));
+ break;
+ default:
+ NOTREACHED();
+ }
+ }
+
+ return true;
+}
+
+bool BestDiffGenerator::TryBsdiffAndUpdateOperation(
+ InstallOperation_Type operation_type,
+ AnnotatedOperation* aop,
+ brillo::Blob* data_blob) {
+ base::FilePath patch;
+ TEST_AND_RETURN_FALSE(base::CreateTemporaryFile(&patch));
+ ScopedPathUnlinker unlinker(patch.value());
+
+ std::unique_ptr<bsdiff::PatchWriterInterface> bsdiff_patch_writer;
+ if (operation_type == InstallOperation::BROTLI_BSDIFF) {
+ bsdiff_patch_writer =
+ bsdiff::CreateBSDF2PatchWriter(patch.value(),
+ bsdiff::CompressorType::kBrotli,
+ kBrotliCompressionQuality);
+ } else {
+ bsdiff_patch_writer = bsdiff::CreateBsdiffPatchWriter(patch.value());
+ }
+
+ brillo::Blob bsdiff_delta;
+ TEST_AND_RETURN_FALSE(0 == bsdiff::bsdiff(old_data_.data(),
+ old_data_.size(),
+ new_data_.data(),
+ new_data_.size(),
+ bsdiff_patch_writer.get(),
+ nullptr));
+
+ TEST_AND_RETURN_FALSE(utils::ReadFile(patch.value(), &bsdiff_delta));
+ TEST_AND_RETURN_FALSE(!bsdiff_delta.empty());
+
+ InstallOperation& operation = aop->op;
+ if (IsDiffOperationBetter(operation,
+ data_blob->size(),
+ bsdiff_delta.size(),
+ src_extents_.size())) {
+ if (config_.enable_vabc_xor) {
+ StoreExtents(src_extents_, operation.mutable_src_extents());
+ diff_utils::PopulateXorOps(aop, bsdiff_delta);
+ }
+ operation.set_type(operation_type);
+ *data_blob = std::move(bsdiff_delta);
+ }
+ return true;
+}
+
+bool BestDiffGenerator::TryPuffdiffAndUpdateOperation(AnnotatedOperation* aop,
+ brillo::Blob* data_blob) {
+ // Find all deflate positions inside the given extents and then put all
+ // deflates together because we have already read all the extents into
+ // one buffer.
+ vector<puffin::BitExtent> src_deflates;
+ TEST_AND_RETURN_FALSE(deflate_utils::FindAndCompactDeflates(
+ src_extents_, old_deflates_, &src_deflates));
+
+ vector<puffin::BitExtent> dst_deflates;
+ TEST_AND_RETURN_FALSE(deflate_utils::FindAndCompactDeflates(
+ dst_extents_, new_deflates_, &dst_deflates));
+
+ puffin::RemoveEqualBitExtents(
+ old_data_, new_data_, &src_deflates, &dst_deflates);
+
+ // See crbug.com/915559.
+ if (config_.version.minor <= kPuffdiffMinorPayloadVersion) {
+ TEST_AND_RETURN_FALSE(
+ puffin::RemoveDeflatesWithBadDistanceCaches(old_data_, &src_deflates));
+
+ TEST_AND_RETURN_FALSE(
+ puffin::RemoveDeflatesWithBadDistanceCaches(new_data_, &dst_deflates));
+ }
+
+ // Only Puffdiff if both files have at least one deflate left.
+ if (!src_deflates.empty() && !dst_deflates.empty()) {
+ brillo::Blob puffdiff_delta;
+ ScopedTempFile temp_file("puffdiff-delta.XXXXXX");
+ // Perform PuffDiff operation.
+ TEST_AND_RETURN_FALSE(puffin::PuffDiff(old_data_,
+ new_data_,
+ src_deflates,
+ dst_deflates,
+ temp_file.path(),
+ &puffdiff_delta));
+ TEST_AND_RETURN_FALSE(!puffdiff_delta.empty());
+
+ InstallOperation& operation = aop->op;
+ if (IsDiffOperationBetter(operation,
+ data_blob->size(),
+ puffdiff_delta.size(),
+ src_extents_.size())) {
+ operation.set_type(InstallOperation::PUFFDIFF);
+ *data_blob = std::move(puffdiff_delta);
+ }
+ }
+ return true;
+}
// This class encapsulates a file delta processing thread work. The
// processor computes the delta between the source and target files;
@@ -807,33 +989,15 @@
brillo::Blob* out_data,
AnnotatedOperation* out_op) {
const auto& version = config.version;
- AnnotatedOperation aop;
- InstallOperation& operation = aop.op;
// We read blocks from old_extents and write blocks to new_extents.
uint64_t blocks_to_read = utils::BlocksInExtents(old_extents);
uint64_t blocks_to_write = utils::BlocksInExtents(new_extents);
- // Disable bsdiff, and puffdiff when the data is too big.
- bool bsdiff_allowed =
- version.OperationAllowed(InstallOperation::SOURCE_BSDIFF);
- if (bsdiff_allowed &&
- blocks_to_read * kBlockSize > kMaxBsdiffDestinationSize) {
- LOG(INFO) << "bsdiff ignored, data too big: " << blocks_to_read * kBlockSize
- << " bytes";
- bsdiff_allowed = false;
- }
-
- bool puffdiff_allowed = version.OperationAllowed(InstallOperation::PUFFDIFF);
- if (puffdiff_allowed &&
- blocks_to_read * kBlockSize > kMaxPuffdiffDestinationSize) {
- LOG(INFO) << "puffdiff ignored, data too big: "
- << blocks_to_read * kBlockSize << " bytes";
- puffdiff_allowed = false;
- }
-
const vector<Extent>& src_extents = old_extents;
const vector<Extent>& dst_extents = new_extents;
+ AnnotatedOperation aop;
+ InstallOperation& operation = aop.op;
// All operations have dst_extents.
StoreExtents(dst_extents, operation.mutable_dst_extents());
@@ -872,91 +1036,16 @@
operation, data_blob.size(), 0, src_extents.size())) {
// No point in trying diff if zero blob size diff operation is
// still worse than replace.
- if (bsdiff_allowed) {
- base::FilePath patch;
- TEST_AND_RETURN_FALSE(base::CreateTemporaryFile(&patch));
- ScopedPathUnlinker unlinker(patch.value());
- std::unique_ptr<bsdiff::PatchWriterInterface> bsdiff_patch_writer;
- InstallOperation::Type operation_type = InstallOperation::SOURCE_BSDIFF;
- if (version.OperationAllowed(InstallOperation::BROTLI_BSDIFF)) {
- bsdiff_patch_writer =
- bsdiff::CreateBSDF2PatchWriter(patch.value(),
- bsdiff::CompressorType::kBrotli,
- kBrotliCompressionQuality);
- operation_type = InstallOperation::BROTLI_BSDIFF;
- } else {
- bsdiff_patch_writer = bsdiff::CreateBsdiffPatchWriter(patch.value());
- }
-
- brillo::Blob bsdiff_delta;
- TEST_AND_RETURN_FALSE(0 == bsdiff::bsdiff(old_data.data(),
- old_data.size(),
- new_data.data(),
- new_data.size(),
- bsdiff_patch_writer.get(),
- nullptr));
-
- TEST_AND_RETURN_FALSE(utils::ReadFile(patch.value(), &bsdiff_delta));
-
- CHECK_GT(bsdiff_delta.size(), static_cast<brillo::Blob::size_type>(0));
- if (IsDiffOperationBetter(operation,
- data_blob.size(),
- bsdiff_delta.size(),
- src_extents.size())) {
- if (config.enable_vabc_xor) {
- StoreExtents(src_extents, operation.mutable_src_extents());
- PopulateXorOps(&aop, bsdiff_delta);
- }
- operation.set_type(operation_type);
- data_blob = std::move(bsdiff_delta);
- }
- }
- if (puffdiff_allowed) {
- // Find all deflate positions inside the given extents and then put all
- // deflates together because we have already read all the extents into
- // one buffer.
- vector<puffin::BitExtent> src_deflates;
- TEST_AND_RETURN_FALSE(deflate_utils::FindAndCompactDeflates(
- src_extents, old_deflates, &src_deflates));
-
- vector<puffin::BitExtent> dst_deflates;
- TEST_AND_RETURN_FALSE(deflate_utils::FindAndCompactDeflates(
- dst_extents, new_deflates, &dst_deflates));
-
- puffin::RemoveEqualBitExtents(
- old_data, new_data, &src_deflates, &dst_deflates);
-
- // See crbug.com/915559.
- if (version.minor <= kPuffdiffMinorPayloadVersion) {
- TEST_AND_RETURN_FALSE(puffin::RemoveDeflatesWithBadDistanceCaches(
- old_data, &src_deflates));
-
- TEST_AND_RETURN_FALSE(puffin::RemoveDeflatesWithBadDistanceCaches(
- new_data, &dst_deflates));
- }
-
- // Only Puffdiff if both files have at least one deflate left.
- if (!src_deflates.empty() && !dst_deflates.empty()) {
- brillo::Blob puffdiff_delta;
- ScopedTempFile temp_file("puffdiff-delta.XXXXXX");
- // Perform PuffDiff operation.
- TEST_AND_RETURN_FALSE(puffin::PuffDiff(old_data,
- new_data,
- src_deflates,
- dst_deflates,
- temp_file.path(),
- &puffdiff_delta));
- TEST_AND_RETURN_FALSE(puffdiff_delta.size() > 0);
- if (IsDiffOperationBetter(operation,
- data_blob.size(),
- puffdiff_delta.size(),
- src_extents.size())) {
- operation.set_type(InstallOperation::PUFFDIFF);
- data_blob = std::move(puffdiff_delta);
- }
- }
- }
+ BestDiffGenerator best_diff_generator(old_data,
+ new_data,
+ src_extents,
+ dst_extents,
+ old_deflates,
+ new_deflates,
+ config);
+ TEST_AND_RETURN_FALSE(
+ best_diff_generator.GenerateBestDiffOperation(&aop, &data_blob));
}
}