Filter out duplicate blocks in new image
I generated 4 OTA packages on coral, and found that after
filtering out duplicate blocks from new_file_extents, OTA
packages are 0.2-0.3% smaller. Tiny improvement bust still
reduces OTA size by a bit.
There was a previous experiment that showed an increase in
OTA size is when we filter out duplicate blocks from BOTH
old_file_extents and new_file_extents.
Test: th
Bug: 177104308
Change-Id: Iff8d5948fbe5a028e356f47637e5b5a803e00334
diff --git a/payload_generator/delta_diff_utils.cc b/payload_generator/delta_diff_utils.cc
index f3bc0bb..7d36bec 100644
--- a/payload_generator/delta_diff_utils.cc
+++ b/payload_generator/delta_diff_utils.cc
@@ -30,7 +30,6 @@
#include <algorithm>
#include <functional>
-#include <limits>
#include <list>
#include <map>
#include <memory>
@@ -61,6 +60,7 @@
#include "update_engine/payload_generator/delta_diff_generator.h"
#include "update_engine/payload_generator/extent_ranges.h"
#include "update_engine/payload_generator/extent_utils.h"
+#include "update_engine/payload_generator/merge_sequence_generator.h"
#include "update_engine/payload_generator/squashfs_filesystem.h"
#include "update_engine/payload_generator/xz.h"
@@ -135,10 +135,10 @@
return distances.back();
}
-bool ShouldCreateNewOp(const std::vector<CowMergeOperation>& ops,
- size_t src_block,
- size_t dst_block,
- size_t src_offset) {
+static bool ShouldCreateNewOp(const std::vector<CowMergeOperation>& ops,
+ size_t src_block,
+ size_t dst_block,
+ size_t src_offset) {
if (ops.empty()) {
return true;
}
@@ -156,6 +156,9 @@
size_t src_block,
size_t dst_block,
size_t src_offset) {
+ if (!ops->empty() && ExtentContains(ops->back().dst_extent(), dst_block)) {
+ return;
+ }
CHECK_NE(src_block, std::numeric_limits<uint64_t>::max());
CHECK_NE(dst_block, std::numeric_limits<uint64_t>::max());
if (ShouldCreateNewOp(*ops, src_block, dst_block, src_offset)) {
@@ -178,187 +181,6 @@
} // namespace
namespace diff_utils {
-// A utility class that tries different algorithms and pick the patch with the
-// smallest size.
-class BestDiffGenerator {
- public:
- BestDiffGenerator(const brillo::Blob& old_data,
- const brillo::Blob& new_data,
- const std::vector<Extent>& src_extents,
- const std::vector<Extent>& dst_extents,
- const std::vector<puffin::BitExtent>& old_deflates,
- const std::vector<puffin::BitExtent>& new_deflates,
- const PayloadGenerationConfig& config)
- : old_data_(old_data),
- new_data_(new_data),
- src_extents_(src_extents),
- dst_extents_(dst_extents),
- old_deflates_(old_deflates),
- new_deflates_(new_deflates),
- config_(config) {}
-
- // Tries different algorithms and compares their patch sizes with
- // |data_blob|. If the size is smaller, updates the operation type in |aop|
- // and bytes in |data_blob|.
- bool GenerateBestDiffOperation(AnnotatedOperation* aop,
- brillo::Blob* data_blob);
-
- private:
- bool TryBsdiffAndUpdateOperation(InstallOperation_Type operation_type,
- AnnotatedOperation* aop,
- brillo::Blob* data_blob);
- bool TryPuffdiffAndUpdateOperation(AnnotatedOperation* aop,
- brillo::Blob* data_blob);
-
- const brillo::Blob& old_data_;
- const brillo::Blob& new_data_;
- const std::vector<Extent>& src_extents_;
- const std::vector<Extent>& dst_extents_;
- const std::vector<puffin::BitExtent>& old_deflates_;
- const std::vector<puffin::BitExtent>& new_deflates_;
- const PayloadGenerationConfig& config_;
-};
-
-bool BestDiffGenerator::GenerateBestDiffOperation(AnnotatedOperation* aop,
- brillo::Blob* data_blob) {
- CHECK(aop);
- CHECK(data_blob);
-
- const auto& version = config_.version;
- uint64_t input_bytes = utils::BlocksInExtents(src_extents_) * kBlockSize;
- std::vector<std::pair<InstallOperation_Type, size_t>> diff_candidates = {
- {InstallOperation::SOURCE_BSDIFF, kMaxBsdiffDestinationSize},
- {InstallOperation::PUFFDIFF, kMaxPuffdiffDestinationSize},
- };
-
- for (auto [op_type, limit] : diff_candidates) {
- if (!version.OperationAllowed(op_type)) {
- continue;
- }
-
- // Disable the specific diff algorithm when the data is too big.
- if (input_bytes > limit) {
- LOG(INFO) << op_type << " ignored, data too big: " << input_bytes
- << " bytes";
- continue;
- }
-
- // Prefer BROTLI_BSDIFF as it gives smaller patch size.
- if (op_type == InstallOperation::SOURCE_BSDIFF &&
- version.OperationAllowed(InstallOperation::BROTLI_BSDIFF)) {
- op_type = InstallOperation::BROTLI_BSDIFF;
- }
-
- switch (op_type) {
- case InstallOperation::SOURCE_BSDIFF:
- case InstallOperation::BROTLI_BSDIFF:
- TEST_AND_RETURN_FALSE(
- TryBsdiffAndUpdateOperation(op_type, aop, data_blob));
- break;
- case InstallOperation::PUFFDIFF:
- TEST_AND_RETURN_FALSE(TryPuffdiffAndUpdateOperation(aop, data_blob));
- break;
- default:
- NOTREACHED();
- }
- }
-
- return true;
-}
-
-bool BestDiffGenerator::TryBsdiffAndUpdateOperation(
- InstallOperation_Type operation_type,
- AnnotatedOperation* aop,
- brillo::Blob* data_blob) {
- base::FilePath patch;
- TEST_AND_RETURN_FALSE(base::CreateTemporaryFile(&patch));
- ScopedPathUnlinker unlinker(patch.value());
-
- std::unique_ptr<bsdiff::PatchWriterInterface> bsdiff_patch_writer;
- if (operation_type == InstallOperation::BROTLI_BSDIFF) {
- bsdiff_patch_writer =
- bsdiff::CreateBSDF2PatchWriter(patch.value(),
- bsdiff::CompressorType::kBrotli,
- kBrotliCompressionQuality);
- } else {
- bsdiff_patch_writer = bsdiff::CreateBsdiffPatchWriter(patch.value());
- }
-
- brillo::Blob bsdiff_delta;
- TEST_AND_RETURN_FALSE(0 == bsdiff::bsdiff(old_data_.data(),
- old_data_.size(),
- new_data_.data(),
- new_data_.size(),
- bsdiff_patch_writer.get(),
- nullptr));
-
- TEST_AND_RETURN_FALSE(utils::ReadFile(patch.value(), &bsdiff_delta));
- TEST_AND_RETURN_FALSE(!bsdiff_delta.empty());
-
- InstallOperation& operation = aop->op;
- if (IsDiffOperationBetter(operation,
- data_blob->size(),
- bsdiff_delta.size(),
- src_extents_.size())) {
- if (config_.enable_vabc_xor) {
- StoreExtents(src_extents_, operation.mutable_src_extents());
- diff_utils::PopulateXorOps(aop, bsdiff_delta);
- }
- operation.set_type(operation_type);
- *data_blob = std::move(bsdiff_delta);
- }
- return true;
-}
-
-bool BestDiffGenerator::TryPuffdiffAndUpdateOperation(AnnotatedOperation* aop,
- brillo::Blob* data_blob) {
- // Find all deflate positions inside the given extents and then put all
- // deflates together because we have already read all the extents into
- // one buffer.
- vector<puffin::BitExtent> src_deflates;
- TEST_AND_RETURN_FALSE(deflate_utils::FindAndCompactDeflates(
- src_extents_, old_deflates_, &src_deflates));
-
- vector<puffin::BitExtent> dst_deflates;
- TEST_AND_RETURN_FALSE(deflate_utils::FindAndCompactDeflates(
- dst_extents_, new_deflates_, &dst_deflates));
-
- puffin::RemoveEqualBitExtents(
- old_data_, new_data_, &src_deflates, &dst_deflates);
-
- // See crbug.com/915559.
- if (config_.version.minor <= kPuffdiffMinorPayloadVersion) {
- TEST_AND_RETURN_FALSE(
- puffin::RemoveDeflatesWithBadDistanceCaches(old_data_, &src_deflates));
-
- TEST_AND_RETURN_FALSE(
- puffin::RemoveDeflatesWithBadDistanceCaches(new_data_, &dst_deflates));
- }
-
- // Only Puffdiff if both files have at least one deflate left.
- if (!src_deflates.empty() && !dst_deflates.empty()) {
- brillo::Blob puffdiff_delta;
- ScopedTempFile temp_file("puffdiff-delta.XXXXXX");
- // Perform PuffDiff operation.
- TEST_AND_RETURN_FALSE(puffin::PuffDiff(old_data_,
- new_data_,
- src_deflates,
- dst_deflates,
- temp_file.path(),
- &puffdiff_delta));
- TEST_AND_RETURN_FALSE(!puffdiff_delta.empty());
-
- InstallOperation& operation = aop->op;
- if (IsDiffOperationBetter(operation,
- data_blob->size(),
- puffdiff_delta.size(),
- src_extents_.size())) {
- operation.set_type(InstallOperation::PUFFDIFF);
- *data_blob = std::move(puffdiff_delta);
- }
- }
- return true;
-}
// This class encapsulates a file delta processing thread work. The
// processor computes the delta between the source and target files;
@@ -493,6 +315,19 @@
return *old_file;
}
+std::vector<Extent> RemoveDuplicateBlocks(const std::vector<Extent>& extents) {
+ ExtentRanges extent_set;
+ std::vector<Extent> ret;
+ for (const auto& extent : extents) {
+ auto vec = FilterExtentRanges({extent}, extent_set);
+ ret.insert(ret.end(),
+ std::make_move_iterator(vec.begin()),
+ std::make_move_iterator(vec.end()));
+ extent_set.AddExtent(extent);
+ }
+ return ret;
+}
+
bool DeltaReadPartition(vector<AnnotatedOperation>* aops,
const PartitionConfig& old_part,
const PartitionConfig& new_part,
@@ -579,11 +414,15 @@
FilterExtentRanges(old_file.extents, old_zero_blocks);
old_visited_blocks.AddExtents(old_file_extents);
+ // TODO(b/177104308) Filtering |new_file_extents| might cause inconsistency
+ // with new_file.deflates. But we filter blocks across different InstallOps
+ // already. Investigate if computing deflates after these filtering produces
+ // better results.
file_delta_processors.emplace_back(old_part.path,
new_part.path,
config,
std::move(old_file_extents),
- std::move(new_file_extents),
+ RemoveDuplicateBlocks(new_file_extents),
old_file.deflates,
new_file.deflates,
new_file.name, // operation name
@@ -613,7 +452,7 @@
new_part.path,
config,
std::move(old_unvisited),
- std::move(new_unvisited),
+ RemoveDuplicateBlocks(new_unvisited),
vector<puffin::BitExtent>{}, // old_deflates,
vector<puffin::BitExtent>{}, // new_deflates
"<non-file-data>", // operation name
@@ -830,6 +669,7 @@
// Now, insert into the list of operations.
AnnotatedOperation aop;
+ aop.name = name;
TEST_AND_RETURN_FALSE(ReadExtentsToDiff(old_part,
new_part,
old_extents_chunk,
@@ -846,7 +686,6 @@
return false;
}
- aop.name = name;
if (static_cast<uint64_t>(chunk_blocks) < total_blocks) {
aop.name = base::StringPrintf(
"%s:%" PRIu64, name.c_str(), block_offset / chunk_blocks);
@@ -989,15 +828,33 @@
brillo::Blob* out_data,
AnnotatedOperation* out_op) {
const auto& version = config.version;
+ AnnotatedOperation& aop = *out_op;
+ InstallOperation& operation = aop.op;
// We read blocks from old_extents and write blocks to new_extents.
uint64_t blocks_to_read = utils::BlocksInExtents(old_extents);
uint64_t blocks_to_write = utils::BlocksInExtents(new_extents);
+ // Disable bsdiff, and puffdiff when the data is too big.
+ bool bsdiff_allowed =
+ version.OperationAllowed(InstallOperation::SOURCE_BSDIFF);
+ if (bsdiff_allowed &&
+ blocks_to_read * kBlockSize > kMaxBsdiffDestinationSize) {
+ LOG(INFO) << "bsdiff ignored, data too big: " << blocks_to_read * kBlockSize
+ << " bytes";
+ bsdiff_allowed = false;
+ }
+
+ bool puffdiff_allowed = version.OperationAllowed(InstallOperation::PUFFDIFF);
+ if (puffdiff_allowed &&
+ blocks_to_read * kBlockSize > kMaxPuffdiffDestinationSize) {
+ LOG(INFO) << "puffdiff ignored, data too big: "
+ << blocks_to_read * kBlockSize << " bytes";
+ puffdiff_allowed = false;
+ }
+
const vector<Extent>& src_extents = old_extents;
const vector<Extent>& dst_extents = new_extents;
- AnnotatedOperation aop;
- InstallOperation& operation = aop.op;
// All operations have dst_extents.
StoreExtents(dst_extents, operation.mutable_dst_extents());
@@ -1036,16 +893,91 @@
operation, data_blob.size(), 0, src_extents.size())) {
// No point in trying diff if zero blob size diff operation is
// still worse than replace.
+ if (bsdiff_allowed) {
+ base::FilePath patch;
+ TEST_AND_RETURN_FALSE(base::CreateTemporaryFile(&patch));
+ ScopedPathUnlinker unlinker(patch.value());
- BestDiffGenerator best_diff_generator(old_data,
- new_data,
- src_extents,
- dst_extents,
- old_deflates,
- new_deflates,
- config);
- TEST_AND_RETURN_FALSE(
- best_diff_generator.GenerateBestDiffOperation(&aop, &data_blob));
+ std::unique_ptr<bsdiff::PatchWriterInterface> bsdiff_patch_writer;
+ InstallOperation::Type operation_type = InstallOperation::SOURCE_BSDIFF;
+ if (version.OperationAllowed(InstallOperation::BROTLI_BSDIFF)) {
+ bsdiff_patch_writer =
+ bsdiff::CreateBSDF2PatchWriter(patch.value(),
+ bsdiff::CompressorType::kBrotli,
+ kBrotliCompressionQuality);
+ operation_type = InstallOperation::BROTLI_BSDIFF;
+ } else {
+ bsdiff_patch_writer = bsdiff::CreateBsdiffPatchWriter(patch.value());
+ }
+
+ brillo::Blob bsdiff_delta;
+ TEST_AND_RETURN_FALSE(0 == bsdiff::bsdiff(old_data.data(),
+ old_data.size(),
+ new_data.data(),
+ new_data.size(),
+ bsdiff_patch_writer.get(),
+ nullptr));
+
+ TEST_AND_RETURN_FALSE(utils::ReadFile(patch.value(), &bsdiff_delta));
+
+ CHECK_GT(bsdiff_delta.size(), static_cast<brillo::Blob::size_type>(0));
+ if (IsDiffOperationBetter(operation,
+ data_blob.size(),
+ bsdiff_delta.size(),
+ src_extents.size())) {
+ if (config.enable_vabc_xor) {
+ StoreExtents(src_extents, operation.mutable_src_extents());
+ PopulateXorOps(&aop, bsdiff_delta);
+ }
+ operation.set_type(operation_type);
+ data_blob = std::move(bsdiff_delta);
+ }
+ }
+ if (puffdiff_allowed) {
+ // Find all deflate positions inside the given extents and then put all
+ // deflates together because we have already read all the extents into
+ // one buffer.
+ vector<puffin::BitExtent> src_deflates;
+ TEST_AND_RETURN_FALSE(deflate_utils::FindAndCompactDeflates(
+ src_extents, old_deflates, &src_deflates));
+
+ vector<puffin::BitExtent> dst_deflates;
+ TEST_AND_RETURN_FALSE(deflate_utils::FindAndCompactDeflates(
+ dst_extents, new_deflates, &dst_deflates));
+
+ puffin::RemoveEqualBitExtents(
+ old_data, new_data, &src_deflates, &dst_deflates);
+
+ // See crbug.com/915559.
+ if (version.minor <= kPuffdiffMinorPayloadVersion) {
+ TEST_AND_RETURN_FALSE(puffin::RemoveDeflatesWithBadDistanceCaches(
+ old_data, &src_deflates));
+
+ TEST_AND_RETURN_FALSE(puffin::RemoveDeflatesWithBadDistanceCaches(
+ new_data, &dst_deflates));
+ }
+
+ // Only Puffdiff if both files have at least one deflate left.
+ if (!src_deflates.empty() && !dst_deflates.empty()) {
+ brillo::Blob puffdiff_delta;
+ ScopedTempFile temp_file("puffdiff-delta.XXXXXX");
+ // Perform PuffDiff operation.
+ TEST_AND_RETURN_FALSE(puffin::PuffDiff(old_data,
+ new_data,
+ src_deflates,
+ dst_deflates,
+ temp_file.path(),
+ &puffdiff_delta));
+ TEST_AND_RETURN_FALSE(puffdiff_delta.size() > 0);
+ if (IsDiffOperationBetter(operation,
+ data_blob.size(),
+ puffdiff_delta.size(),
+ src_extents.size())) {
+ operation.set_type(InstallOperation::PUFFDIFF);
+ data_blob = std::move(puffdiff_delta);
+ }
+ }
+ }
}
}
diff --git a/payload_generator/extent_utils.h b/payload_generator/extent_utils.h
index f5bb39c..712cc92 100644
--- a/payload_generator/extent_utils.h
+++ b/payload_generator/extent_utils.h
@@ -145,6 +145,11 @@
return std::numeric_limits<size_t>::max();
}
+constexpr bool ExtentContains(const Extent& extent, size_t block) {
+ return extent.start_block() <= block &&
+ block < extent.start_block() + extent.num_blocks();
+}
+
} // namespace chromeos_update_engine
#endif // UPDATE_ENGINE_PAYLOAD_GENERATOR_EXTENT_UTILS_H_