delta_generator: Use REPLACE_XZ in deltas when allowed.

This patch moves the generation of the best allowed full operation to a
new function which now includes ZERO, REPLACE, REPLACE_BZ and REPLACE_XZ
when allowed. This function is used to produce a full operation
whenever it is needed in the "full" payload generator and the AB "delta"
payload generator.

Bug: 24578399
TEST=brillo_update_payload generated REPLACE_XZ operations for a delta payload that was using REPLACE_BZ operations and merged them.
TEST=brillo_update_payload generated a full payload with REPLACE, REPLACE_BZ and REPLACE_XZ operations.
TEST=Updated unittests.

Change-Id: I29618094a84ef8050f8dd7ede0fd70056dc9d650
diff --git a/payload_generator/ab_generator.cc b/payload_generator/ab_generator.cc
index c9619ba..8c736a5 100644
--- a/payload_generator/ab_generator.cc
+++ b/payload_generator/ab_generator.cc
@@ -28,6 +28,7 @@
 #include "update_engine/payload_generator/delta_diff_generator.h"
 #include "update_engine/payload_generator/delta_diff_utils.h"
 
+using chromeos_update_engine::diff_utils::IsAReplaceOperation;
 using std::string;
 using std::vector;
 
@@ -55,9 +56,8 @@
                                                        blob_file));
   LOG(INFO) << "done reading " << new_part.name;
 
-  TEST_AND_RETURN_FALSE(FragmentOperations(aops,
-                                           new_part.path,
-                                           blob_file));
+  TEST_AND_RETURN_FALSE(
+      FragmentOperations(config.version, aops, new_part.path, blob_file));
   SortOperationsByDestination(aops);
 
   // Use the soft_chunk_size when merging operations to prevent merging all
@@ -68,10 +68,8 @@
     merge_chunk_blocks = hard_chunk_blocks;
   }
 
-  TEST_AND_RETURN_FALSE(MergeOperations(aops,
-                                        merge_chunk_blocks,
-                                        new_part.path,
-                                        blob_file));
+  TEST_AND_RETURN_FALSE(MergeOperations(
+      aops, config.version, merge_chunk_blocks, new_part.path, blob_file));
 
   if (config.version.minor >= kOpSrcHashMinorPayloadVersion)
     TEST_AND_RETURN_FALSE(AddSourceHash(aops, old_part.path));
@@ -84,24 +82,22 @@
   sort(aops->begin(), aops->end(), diff_utils::CompareAopsByDestination);
 }
 
-bool ABGenerator::FragmentOperations(
-    vector<AnnotatedOperation>* aops,
-    const string& target_part_path,
-    BlobFileWriter* blob_file) {
+bool ABGenerator::FragmentOperations(const PayloadVersion& version,
+                                     vector<AnnotatedOperation>* aops,
+                                     const string& target_part_path,
+                                     BlobFileWriter* blob_file) {
   vector<AnnotatedOperation> fragmented_aops;
   for (const AnnotatedOperation& aop : *aops) {
     if (aop.op.type() == InstallOperation::SOURCE_COPY) {
       TEST_AND_RETURN_FALSE(SplitSourceCopy(aop, &fragmented_aops));
-    } else if ((aop.op.type() == InstallOperation::REPLACE) ||
-               (aop.op.type() == InstallOperation::REPLACE_BZ)) {
-      TEST_AND_RETURN_FALSE(SplitReplaceOrReplaceBz(aop, &fragmented_aops,
-                                                    target_part_path,
-                                                    blob_file));
+    } else if (IsAReplaceOperation(aop.op.type())) {
+      TEST_AND_RETURN_FALSE(SplitAReplaceOp(
+          version, aop, target_part_path, &fragmented_aops, blob_file));
     } else {
       fragmented_aops.push_back(aop);
     }
   }
-  *aops = fragmented_aops;
+  *aops = std::move(fragmented_aops);
   return true;
 }
 
@@ -158,15 +154,14 @@
   return true;
 }
 
-bool ABGenerator::SplitReplaceOrReplaceBz(
-    const AnnotatedOperation& original_aop,
-    vector<AnnotatedOperation>* result_aops,
-    const string& target_part_path,
-    BlobFileWriter* blob_file) {
+bool ABGenerator::SplitAReplaceOp(const PayloadVersion& version,
+                                  const AnnotatedOperation& original_aop,
+                                  const string& target_part_path,
+                                  vector<AnnotatedOperation>* result_aops,
+                                  BlobFileWriter* blob_file) {
   InstallOperation original_op = original_aop.op;
+  TEST_AND_RETURN_FALSE(IsAReplaceOperation(original_op.type()));
   const bool is_replace = original_op.type() == InstallOperation::REPLACE;
-  TEST_AND_RETURN_FALSE(is_replace ||
-                        original_op.type() == InstallOperation::REPLACE_BZ);
 
   uint32_t data_offset = original_op.data_offset();
   for (int i = 0; i < original_op.dst_extents_size(); i++) {
@@ -187,8 +182,8 @@
     AnnotatedOperation new_aop;
     new_aop.op = new_op;
     new_aop.name = base::StringPrintf("%s:%d", original_aop.name.c_str(), i);
-    TEST_AND_RETURN_FALSE(AddDataAndSetType(&new_aop, target_part_path,
-                                            blob_file));
+    TEST_AND_RETURN_FALSE(
+        AddDataAndSetType(&new_aop, version, target_part_path, blob_file));
 
     result_aops->push_back(new_aop);
   }
@@ -196,6 +191,7 @@
 }
 
 bool ABGenerator::MergeOperations(vector<AnnotatedOperation>* aops,
+                                  const PayloadVersion& version,
                                   size_t chunk_blocks,
                                   const string& target_part_path,
                                   BlobFileWriter* blob_file) {
@@ -206,6 +202,7 @@
       continue;
     }
     AnnotatedOperation& last_aop = new_aops.back();
+    bool last_is_a_replace = IsAReplaceOperation(last_aop.op.type());
 
     if (last_aop.op.dst_extents_size() <= 0 ||
         curr_aop.op.dst_extents_size() <= 0) {
@@ -220,11 +217,11 @@
     uint32_t combined_block_count =
         last_aop.op.dst_extents(last_dst_idx).num_blocks() +
         curr_aop.op.dst_extents(0).num_blocks();
-    bool good_op_type = curr_aop.op.type() == InstallOperation::SOURCE_COPY ||
-                        curr_aop.op.type() == InstallOperation::REPLACE ||
-                        curr_aop.op.type() == InstallOperation::REPLACE_BZ;
-    if (good_op_type &&
-        last_aop.op.type() == curr_aop.op.type() &&
+    bool is_a_replace = IsAReplaceOperation(curr_aop.op.type());
+
+    bool is_delta_op = curr_aop.op.type() == InstallOperation::SOURCE_COPY;
+    if (((is_delta_op && (last_aop.op.type() == curr_aop.op.type())) ||
+         (is_a_replace && last_is_a_replace)) &&
         last_end_block == curr_start_block &&
         combined_block_count <= chunk_blocks) {
       // If the operations have the same type (which is a type that we can
@@ -235,34 +232,34 @@
                                          last_aop.name.c_str(),
                                          curr_aop.name.c_str());
 
-      ExtendExtents(last_aop.op.mutable_src_extents(),
-                    curr_aop.op.src_extents());
-      if (curr_aop.op.src_length() > 0)
-        last_aop.op.set_src_length(last_aop.op.src_length() +
-                                   curr_aop.op.src_length());
+      if (is_delta_op) {
+        ExtendExtents(last_aop.op.mutable_src_extents(),
+                      curr_aop.op.src_extents());
+        if (curr_aop.op.src_length() > 0)
+          last_aop.op.set_src_length(last_aop.op.src_length() +
+                                     curr_aop.op.src_length());
+      }
       ExtendExtents(last_aop.op.mutable_dst_extents(),
                     curr_aop.op.dst_extents());
       if (curr_aop.op.dst_length() > 0)
         last_aop.op.set_dst_length(last_aop.op.dst_length() +
                                    curr_aop.op.dst_length());
       // Set the data length to zero so we know to add the blob later.
-      if (curr_aop.op.type() == InstallOperation::REPLACE ||
-          curr_aop.op.type() == InstallOperation::REPLACE_BZ) {
+      if (is_a_replace)
         last_aop.op.set_data_length(0);
-      }
     } else {
       // Otherwise just include the extent as is.
       new_aops.push_back(curr_aop);
     }
   }
 
-  // Set the blobs for REPLACE/REPLACE_BZ operations that have been merged.
+  // Set the blobs for REPLACE/REPLACE_BZ/REPLACE_XZ operations that have been
+  // merged.
   for (AnnotatedOperation& curr_aop : new_aops) {
     if (curr_aop.op.data_length() == 0 &&
-        (curr_aop.op.type() == InstallOperation::REPLACE ||
-         curr_aop.op.type() == InstallOperation::REPLACE_BZ)) {
-      TEST_AND_RETURN_FALSE(AddDataAndSetType(&curr_aop, target_part_path,
-                                              blob_file));
+        IsAReplaceOperation(curr_aop.op.type())) {
+      TEST_AND_RETURN_FALSE(
+          AddDataAndSetType(&curr_aop, version, target_part_path, blob_file));
     }
   }
 
@@ -271,10 +268,10 @@
 }
 
 bool ABGenerator::AddDataAndSetType(AnnotatedOperation* aop,
+                                    const PayloadVersion& version,
                                     const string& target_part_path,
                                     BlobFileWriter* blob_file) {
-  TEST_AND_RETURN_FALSE(aop->op.type() == InstallOperation::REPLACE ||
-                        aop->op.type() == InstallOperation::REPLACE_BZ);
+  TEST_AND_RETURN_FALSE(IsAReplaceOperation(aop->op.type()));
 
   brillo::Blob data(aop->op.dst_length());
   vector<Extent> dst_extents;
@@ -285,25 +282,16 @@
                                            data.size(),
                                            kBlockSize));
 
-  brillo::Blob data_bz;
-  TEST_AND_RETURN_FALSE(BzipCompress(data, &data_bz));
-  CHECK(!data_bz.empty());
+  brillo::Blob blob;
+  InstallOperation_Type op_type;
+  TEST_AND_RETURN_FALSE(
+      diff_utils::GenerateBestFullOperation(data, version, &blob, &op_type));
 
-  brillo::Blob* data_p = nullptr;
-  InstallOperation_Type new_op_type;
-  if (data_bz.size() < data.size()) {
-    new_op_type = InstallOperation::REPLACE_BZ;
-    data_p = &data_bz;
-  } else {
-    new_op_type = InstallOperation::REPLACE;
-    data_p = &data;
-  }
-
-  // If the operation doesn't point to a data blob, then we add it.
-  if (aop->op.type() != new_op_type ||
-      aop->op.data_length() != data_p->size()) {
-    aop->op.set_type(new_op_type);
-    aop->SetOperationBlob(data_p, blob_file);
+  // If the operation doesn't point to a data blob or points to a data blob of
+  // a different type then we add it.
+  if (aop->op.type() != op_type || aop->op.data_length() != blob.size()) {
+    aop->op.set_type(op_type);
+    aop->SetOperationBlob(blob, blob_file);
   }
 
   return true;
diff --git a/payload_generator/ab_generator.h b/payload_generator/ab_generator.h
index c2837c0..77afb87 100644
--- a/payload_generator/ab_generator.h
+++ b/payload_generator/ab_generator.h
@@ -62,7 +62,8 @@
   // The |target_part_path| is the filename of the new image, where the
   // destination extents refer to. The blobs of the operations in |aops| should
   // reference |blob_file|. |blob_file| are updated if needed.
-  static bool FragmentOperations(std::vector<AnnotatedOperation>* aops,
+  static bool FragmentOperations(const PayloadVersion& version,
+                                 std::vector<AnnotatedOperation>* aops,
                                  const std::string& target_part_path,
                                  BlobFileWriter* blob_file);
 
@@ -84,25 +85,27 @@
   static bool SplitSourceCopy(const AnnotatedOperation& original_aop,
                               std::vector<AnnotatedOperation>* result_aops);
 
-  // Takes a REPLACE/REPLACE_BZ operation |aop|, and adds one operation for each
-  // dst extent in |aop| to |ops|. The new operations added to |ops| will have
-  // only one dst extent each, and may be either a REPLACE or REPLACE_BZ
-  // depending on whether compression is advantageous.
-  static bool SplitReplaceOrReplaceBz(
-      const AnnotatedOperation& original_aop,
-      std::vector<AnnotatedOperation>* result_aops,
-      const std::string& target_part,
-      BlobFileWriter* blob_file);
+  // Takes a REPLACE, REPLACE_BZ or REPLACE_XZ operation |aop|, and adds one
+  // operation for each dst extent in |aop| to |ops|. The new operations added
+  // to |ops| will have only one dst extent each, and may be of a different
+  // type depending on whether compression is advantageous.
+  static bool SplitAReplaceOp(const PayloadVersion& version,
+                              const AnnotatedOperation& original_aop,
+                              const std::string& target_part,
+                              std::vector<AnnotatedOperation>* result_aops,
+                              BlobFileWriter* blob_file);
 
   // Takes a sorted (by first destination extent) vector of operations |aops|
-  // and merges SOURCE_COPY, REPLACE, and REPLACE_BZ operations in that vector.
+  // and merges SOURCE_COPY, REPLACE, REPLACE_BZ and REPLACE_XZ, operations in
+  // that vector.
   // It will merge two operations if:
-  //   - They are of the same type.
-  //   - They are contiguous.
+  //   - They are both REPLACE_*, or they are both SOURCE_COPY,
+  //   - Their destination blocks are contiguous.
   //   - Their combined blocks do not exceed |chunk_blocks| blocks.
   // Note that unlike other methods, you can't pass a negative number in
   // |chunk_blocks|.
   static bool MergeOperations(std::vector<AnnotatedOperation>* aops,
+                              const PayloadVersion& version,
                               size_t chunk_blocks,
                               const std::string& target_part,
                               BlobFileWriter* blob_file);
@@ -113,14 +116,15 @@
                             const std::string& source_part_path);
 
  private:
-  // Adds the data payload for a REPLACE/REPLACE_BZ operation |aop| by reading
-  // its output extents from |target_part_path| and appending a corresponding
-  // data blob to |data_fd|. The blob will be compressed if this is smaller than
-  // the uncompressed form, and the operation type will be set accordingly.
-  // |*blob_file| will be updated as well. If the operation happens to have
-  // the right type and already points to a data blob, nothing is written.
-  // Caller should only set type and data blob if it's valid.
+  // Adds the data payload for a REPLACE/REPLACE_BZ/REPLACE_XZ operation |aop|
+  // by reading its output extents from |target_part_path| and appending a
+  // corresponding data blob to |blob_file|. The blob will be compressed if this
+  // is smaller than the uncompressed form, and the operation type will be set
+  // accordingly. |*blob_file| will be updated as well. If the operation happens
+  // to have the right type and already points to a data blob, nothing is
+  // written. Caller should only set type and data blob if it's valid.
   static bool AddDataAndSetType(AnnotatedOperation* aop,
+                                const PayloadVersion& version,
                                 const std::string& target_part_path,
                                 BlobFileWriter* blob_file);
 
diff --git a/payload_generator/ab_generator_unittest.cc b/payload_generator/ab_generator_unittest.cc
index 60bdf26..224880d 100644
--- a/payload_generator/ab_generator_unittest.cc
+++ b/payload_generator/ab_generator_unittest.cc
@@ -20,8 +20,8 @@
 #include <sys/stat.h>
 #include <sys/types.h>
 
-#include <string>
 #include <random>
+#include <string>
 #include <vector>
 
 #include <gtest/gtest.h>
@@ -122,8 +122,10 @@
 
   // Split the operation.
   vector<AnnotatedOperation> result_ops;
-  ASSERT_TRUE(ABGenerator::SplitReplaceOrReplaceBz(
-          aop, &result_ops, part_path, &blob_file));
+  PayloadVersion version(kChromeOSMajorPayloadVersion,
+                         kSourceMinorPayloadVersion);
+  ASSERT_TRUE(ABGenerator::SplitAReplaceOp(
+      version, aop, part_path, &result_ops, &blob_file));
 
   // Check the result.
   InstallOperation_Type expected_type =
@@ -288,8 +290,10 @@
   BlobFileWriter blob_file(data_fd, &data_file_size);
 
   // Merge the operations.
-  EXPECT_TRUE(ABGenerator::MergeOperations(
-      &aops, 5, part_path, &blob_file));
+  PayloadVersion version(kChromeOSMajorPayloadVersion,
+                         kSourceMinorPayloadVersion);
+  EXPECT_TRUE(
+      ABGenerator::MergeOperations(&aops, version, 5, part_path, &blob_file));
 
   // Check the result.
   InstallOperation_Type expected_op_type =
@@ -475,7 +479,9 @@
   aops.push_back(third_aop);
 
   BlobFileWriter blob_file(0, nullptr);
-  EXPECT_TRUE(ABGenerator::MergeOperations(&aops, 5, "", &blob_file));
+  PayloadVersion version(kChromeOSMajorPayloadVersion,
+                         kSourceMinorPayloadVersion);
+  EXPECT_TRUE(ABGenerator::MergeOperations(&aops, version, 5, "", &blob_file));
 
   EXPECT_EQ(1U, aops.size());
   InstallOperation first_result_op = aops[0].op;
@@ -512,9 +518,8 @@
   // Test to make sure we don't merge operations that shouldn't be merged.
   vector<AnnotatedOperation> aops;
   InstallOperation first_op;
-  first_op.set_type(InstallOperation::REPLACE_BZ);
+  first_op.set_type(InstallOperation::ZERO);
   *(first_op.add_dst_extents()) = ExtentForRange(0, 1);
-  first_op.set_data_length(kBlockSize);
   AnnotatedOperation first_aop;
   first_aop.op = first_op;
   aops.push_back(first_aop);
@@ -547,7 +552,9 @@
   aops.push_back(fourth_aop);
 
   BlobFileWriter blob_file(0, nullptr);
-  EXPECT_TRUE(ABGenerator::MergeOperations(&aops, 4, "", &blob_file));
+  PayloadVersion version(kChromeOSMajorPayloadVersion,
+                         kSourceMinorPayloadVersion);
+  EXPECT_TRUE(ABGenerator::MergeOperations(&aops, version, 4, "", &blob_file));
 
   // No operations were merged, the number of ops is the same.
   EXPECT_EQ(4U, aops.size());
diff --git a/payload_generator/annotated_operation.cc b/payload_generator/annotated_operation.cc
index 984f921..b7f3434 100644
--- a/payload_generator/annotated_operation.cc
+++ b/payload_generator/annotated_operation.cc
@@ -36,12 +36,12 @@
 }
 }  // namespace
 
-bool AnnotatedOperation::SetOperationBlob(brillo::Blob* blob,
+bool AnnotatedOperation::SetOperationBlob(const brillo::Blob& blob,
                                           BlobFileWriter* blob_file) {
-  off_t data_offset = blob_file->StoreBlob(*blob);
+  off_t data_offset = blob_file->StoreBlob(blob);
   TEST_AND_RETURN_FALSE(data_offset != -1);
   op.set_data_offset(data_offset);
-  op.set_data_length(blob->size());
+  op.set_data_length(blob.size());
   return true;
 }
 
diff --git a/payload_generator/annotated_operation.h b/payload_generator/annotated_operation.h
index 4076070..653bab0 100644
--- a/payload_generator/annotated_operation.h
+++ b/payload_generator/annotated_operation.h
@@ -35,10 +35,10 @@
   // The InstallOperation, as defined by the protobuf.
   InstallOperation op;
 
-  // Writes |blob| to the end of |data_fd|, and updates |data_file_size| to
-  // match the new size of |data_fd|. It sets the data_offset and data_length
-  // in AnnotatedOperation to match the offset and size of |blob| in |data_fd|.
-  bool SetOperationBlob(brillo::Blob* blob, BlobFileWriter* blob_file);
+  // Writes |blob| to the end of |blob_file|. It sets the data_offset and
+  // data_length in AnnotatedOperation to match the offset and size of |blob|
+  // in |blob_file|.
+  bool SetOperationBlob(const brillo::Blob& blob, BlobFileWriter* blob_file);
 };
 
 // For logging purposes.
diff --git a/payload_generator/delta_diff_utils.cc b/payload_generator/delta_diff_utils.cc
index f7b69f5..e6751f1 100644
--- a/payload_generator/delta_diff_utils.cc
+++ b/payload_generator/delta_diff_utils.cc
@@ -34,6 +34,7 @@
 #include "update_engine/payload_generator/delta_diff_generator.h"
 #include "update_engine/payload_generator/extent_ranges.h"
 #include "update_engine/payload_generator/extent_utils.h"
+#include "update_engine/payload_generator/xz.h"
 
 using std::map;
 using std::string;
@@ -484,7 +485,7 @@
     // Write the data
     if (operation.type() != InstallOperation::MOVE &&
         operation.type() != InstallOperation::SOURCE_COPY) {
-      TEST_AND_RETURN_FALSE(aop.SetOperationBlob(&data, blob_file));
+      TEST_AND_RETURN_FALSE(aop.SetOperationBlob(data, blob_file));
     } else {
       TEST_AND_RETURN_FALSE(blob_file->StoreBlob(data) != -1);
     }
@@ -493,6 +494,60 @@
   return true;
 }
 
+bool GenerateBestFullOperation(const brillo::Blob& new_data,
+                               const PayloadVersion& version,
+                               brillo::Blob* out_blob,
+                               InstallOperation_Type* out_type) {
+  if (new_data.empty())
+    return false;
+
+  if (version.OperationAllowed(InstallOperation::ZERO) &&
+      std::all_of(
+          new_data.begin(), new_data.end(), [](uint8_t x) { return x == 0; })) {
+    // The read buffer is all zeros, so produce a ZERO operation. No need to
+    // check other types of operations in this case.
+    *out_blob = brillo::Blob();
+    *out_type = InstallOperation::ZERO;
+    return true;
+  }
+
+  bool out_blob_set = false;
+
+  // Try compressing |new_data| with xz first.
+  if (version.OperationAllowed(InstallOperation::REPLACE_XZ)) {
+    brillo::Blob new_data_xz;
+    if (XzCompress(new_data, &new_data_xz) && !new_data_xz.empty()) {
+      *out_type = InstallOperation::REPLACE_XZ;
+      *out_blob = std::move(new_data_xz);
+      out_blob_set = true;
+    }
+  }
+
+  // Try compressing it with bzip2.
+  if (version.OperationAllowed(InstallOperation::REPLACE_BZ)) {
+    brillo::Blob new_data_bz;
+    // TODO(deymo): Implement some heuristic to determine if it is worth trying
+    // to compress the blob with bzip2 if we already have a good REPLACE_XZ.
+    if (BzipCompress(new_data, &new_data_bz) && !new_data_bz.empty() &&
+        (!out_blob_set || out_blob->size() > new_data_bz.size())) {
+      // A REPLACE_BZ is better or nothing else was set.
+      *out_type = InstallOperation::REPLACE_BZ;
+      *out_blob = std::move(new_data_bz);
+      out_blob_set = true;
+    }
+  }
+
+  // If nothing else worked or it was badly compressed we try a REPLACE.
+  if (!out_blob_set || out_blob->size() >= new_data.size()) {
+    *out_type = InstallOperation::REPLACE;
+    // This needs to make a copy of the data in the case bzip or xz didn't
+    // compress well, which is not the common case so the performance hit is
+    // low.
+    *out_blob = new_data;
+  }
+  return true;
+}
+
 bool ReadExtentsToDiff(const string& old_part,
                        const string& new_part,
                        const vector<Extent>& old_extents,
@@ -501,8 +556,6 @@
                        brillo::Blob* out_data,
                        InstallOperation* out_op) {
   InstallOperation operation;
-  // Data blob that will be written to delta file.
-  const brillo::Blob* data_blob = nullptr;
 
   // We read blocks from old_extents and write blocks to new_extents.
   uint64_t blocks_to_read = BlocksInExtents(old_extents);
@@ -540,25 +593,17 @@
                                            kBlockSize));
   TEST_AND_RETURN_FALSE(!new_data.empty());
 
+  // Data blob that will be written to delta file.
+  brillo::Blob data_blob;
 
-  // Using a REPLACE is always an option.
-  operation.set_type(InstallOperation::REPLACE);
-  data_blob = &new_data;
-
-  // Try compressing it with bzip2.
-  brillo::Blob new_data_bz;
-  TEST_AND_RETURN_FALSE(BzipCompress(new_data, &new_data_bz));
-  CHECK(!new_data_bz.empty());
-  if (new_data_bz.size() < data_blob->size()) {
-    // A REPLACE_BZ is better.
-    operation.set_type(InstallOperation::REPLACE_BZ);
-    data_blob = &new_data_bz;
-  }
+  // Try generating a full operation for the given new data, regardless of the
+  // old_data.
+  InstallOperation_Type op_type;
+  TEST_AND_RETURN_FALSE(
+      GenerateBestFullOperation(new_data, version, &data_blob, &op_type));
+  operation.set_type(op_type);
 
   brillo::Blob old_data;
-  brillo::Blob empty_blob;
-  brillo::Blob bsdiff_delta;
-  brillo::Blob imgdiff_delta;
   if (blocks_to_read > 0) {
     // Read old data.
     TEST_AND_RETURN_FALSE(
@@ -569,7 +614,7 @@
       operation.set_type(version.OperationAllowed(InstallOperation::SOURCE_COPY)
                              ? InstallOperation::SOURCE_COPY
                              : InstallOperation::MOVE);
-      data_blob = &empty_blob;
+      data_blob = brillo::Blob();
     } else if (bsdiff_allowed || imgdiff_allowed) {
       // If the source file is considered bsdiff safe (no bsdiff bugs
       // triggered), see if BSDIFF encoding is smaller.
@@ -585,18 +630,20 @@
           new_chunk.value().c_str(), new_data.data(), new_data.size()));
 
       if (bsdiff_allowed) {
+        brillo::Blob bsdiff_delta;
         TEST_AND_RETURN_FALSE(DiffFiles(
             kBsdiffPath, old_chunk.value(), new_chunk.value(), &bsdiff_delta));
         CHECK_GT(bsdiff_delta.size(), static_cast<brillo::Blob::size_type>(0));
-        if (bsdiff_delta.size() < data_blob->size()) {
+        if (bsdiff_delta.size() < data_blob.size()) {
           operation.set_type(
               version.OperationAllowed(InstallOperation::SOURCE_BSDIFF)
                   ? InstallOperation::SOURCE_BSDIFF
                   : InstallOperation::BSDIFF);
-          data_blob = &bsdiff_delta;
+          data_blob = std::move(bsdiff_delta);
         }
       }
       if (imgdiff_allowed && ContainsGZip(old_data) && ContainsGZip(new_data)) {
+        brillo::Blob imgdiff_delta;
         // Imgdiff might fail in some cases, only use the result if it succeed,
         // otherwise print the extents to analyze.
         if (DiffFiles(kImgdiffPath,
@@ -604,9 +651,9 @@
                       new_chunk.value(),
                       &imgdiff_delta) &&
             imgdiff_delta.size() > 0) {
-          if (imgdiff_delta.size() < data_blob->size()) {
+          if (imgdiff_delta.size() < data_blob.size()) {
             operation.set_type(InstallOperation::IMGDIFF);
-            data_blob = &imgdiff_delta;
+            data_blob = std::move(imgdiff_delta);
           }
         } else {
           LOG(ERROR) << "Imgdiff failed with source extents: "
@@ -633,13 +680,12 @@
   StoreExtents(dst_extents, operation.mutable_dst_extents());
 
   // Replace operations should not have source extents.
-  if (operation.type() == InstallOperation::REPLACE ||
-      operation.type() == InstallOperation::REPLACE_BZ) {
+  if (IsAReplaceOperation(operation.type())) {
     operation.clear_src_extents();
     operation.clear_src_length();
   }
 
-  *out_data = std::move(*data_blob);
+  *out_data = std::move(data_blob);
   *out_op = operation;
 
   return true;
@@ -675,6 +721,12 @@
   return true;
 }
 
+bool IsAReplaceOperation(InstallOperation_Type op_type) {
+  return (op_type == InstallOperation::REPLACE ||
+          op_type == InstallOperation::REPLACE_BZ ||
+          op_type == InstallOperation::REPLACE_XZ);
+}
+
 // Returns true if |op| is a no-op operation that doesn't do any useful work
 // (e.g., a move operation that copies blocks onto themselves).
 bool IsNoopOperation(const InstallOperation& op) {
diff --git a/payload_generator/delta_diff_utils.h b/payload_generator/delta_diff_utils.h
index db88e6c..4cc85fc 100644
--- a/payload_generator/delta_diff_utils.h
+++ b/payload_generator/delta_diff_utils.h
@@ -110,6 +110,18 @@
                const std::string& new_file,
                brillo::Blob* out);
 
+// Generates the best allowed full operation to produce |new_data|. The allowed
+// operations are based on |payload_version|. The operation blob will be stored
+// in |out_blob| and the resulting operation type in |out_type|. Returns whether
+// a valid full operation was generated.
+bool GenerateBestFullOperation(const brillo::Blob& new_data,
+                               const PayloadVersion& version,
+                               brillo::Blob* out_blob,
+                               InstallOperation_Type* out_type);
+
+// Returns whether op_type is one of the REPLACE full operations.
+bool IsAReplaceOperation(InstallOperation_Type op_type);
+
 // Returns true if |op| is a no-op operation that doesn't do any useful work
 // (e.g., a move operation that copies blocks onto themselves).
 bool IsNoopOperation(const InstallOperation& op);
diff --git a/payload_generator/full_update_generator.cc b/payload_generator/full_update_generator.cc
index 6d419c3..3240606 100644
--- a/payload_generator/full_update_generator.cc
+++ b/payload_generator/full_update_generator.cc
@@ -31,8 +31,7 @@
 #include <brillo/secure_blob.h>
 
 #include "update_engine/common/utils.h"
-#include "update_engine/payload_generator/bzip.h"
-#include "update_engine/payload_generator/xz.h"
+#include "update_engine/payload_generator/delta_diff_utils.h"
 
 using std::vector;
 
@@ -103,35 +102,14 @@
                                         &bytes_read));
   TEST_AND_RETURN_FALSE(bytes_read == static_cast<ssize_t>(size_));
 
-  if (version_.OperationAllowed(InstallOperation::ZERO) &&
-      std::all_of(buffer_in_.begin(),
-                  buffer_in_.end(),
-                  [](uint8_t x) {return x == 0;})) {
-    // The read buffer is all zeros, so produce a ZERO operation. No need to
-    // check other types of operations in this case.
-    aop_->op.set_type(InstallOperation::ZERO);
-    return true;
-  }
+  InstallOperation_Type op_type;
+  TEST_AND_RETURN_FALSE(diff_utils::GenerateBestFullOperation(
+      buffer_in_, version_, &op_blob, &op_type));
 
-  TEST_AND_RETURN_FALSE(BzipCompress(buffer_in_, &op_blob));
-  InstallOperation_Type op_type = InstallOperation::REPLACE_BZ;
-
-  if (version_.OperationAllowed(InstallOperation::REPLACE_XZ)) {
-    brillo::Blob xz_blob;
-    if (XzCompress(buffer_in_, &xz_blob) && xz_blob.size() < op_blob.size()) {
-      op_blob = std::move(xz_blob);
-      op_type = InstallOperation::REPLACE_XZ;
-    }
-  }
-
-  if (op_blob.size() >= buffer_in_.size()) {
-    // A REPLACE is cheaper than a REPLACE_BZ in this case.
-    op_blob = std::move(buffer_in_);
-    op_type = InstallOperation::REPLACE;
-  }
-
-  TEST_AND_RETURN_FALSE(aop_->SetOperationBlob(&op_blob, blob_file_));
   aop_->op.set_type(op_type);
+  if (!op_blob.empty()) {
+    TEST_AND_RETURN_FALSE(aop_->SetOperationBlob(op_blob, blob_file_));
+  }
   return true;
 }