update_engine: Implement soft/hard chunk size limits.

The current approach uses a single chunk_size value, that defaults to
-1 (or "whole files") for delta payloads. The "whole files" default is
the preferred value since BSDIFF and SOURCE_BSDIFF operations (the most
common operations in real delta payloads) will benefit from having
access to the whole file in the old and new partition, in particular in
cases like the chrome binary (~120 MiB in size).

On the other hand, very big chunks have a big memory footprint in most
cases. Current implementations of BSDIFF, REPLACE_BZ and REPLACE will
require as much private memory as the destination chunk_size or more.
Because of this and due to the lack of old data, a  small chunk_size
(1 MiB) is used for full payloads.

To break this tension between having a big chunk_size for operations
that will benefit from it versus having a small chunk_size for cases
where it doesn't change anything, this patch introduces two chunk
size values: a hard and soft limit.

The hard_chunk_size has the same meaning as the old chunk_size: no
operation should have a destination bigger than the provided hard
limit. The soft_chunk_size is the preferred chunk size for an
operation when a bigger chunk will not benefit significantly the
final payload size. Having a small chunk size for operations like
REPLACE_BZ, REPLACE, SOURCE_COPY and MOVE is important to keep the
memory footprint low when it the extra memory is not required.

The new soft_chunk_size limit is used when merging operations
(previously hard-coded to 1 MiB) and when generating new
operations for zeroed and moved blocks.

BUG=chromium:485397
TEST=Run cros_generate_update_payload for full and delta payloads.
Inspected the chunk size in the resulting payloads.

Change-Id: I370048a81913ad23a151cfef6690627b7fff7277
Reviewed-on: https://chromium-review.googlesource.com/286568
Reviewed-by: Gilad Arnold <garnold@chromium.org>
Tested-by: Alex Deymo <deymo@chromium.org>
Trybot-Ready: Alex Deymo <deymo@chromium.org>
Commit-Queue: Alex Deymo <deymo@chromium.org>
diff --git a/delta_performer.cc b/delta_performer.cc
index 1006f17..725ca00 100644
--- a/delta_performer.cc
+++ b/delta_performer.cc
@@ -58,7 +58,6 @@
 
 const uint32_t kInPlaceMinorPayloadVersion = 1;
 const uint32_t kSourceMinorPayloadVersion = 2;
-const size_t kDefaultChunkSize = 1024 * 1024;
 
 namespace {
 const int kUpdateStateOperationInvalid = -1;
diff --git a/delta_performer.h b/delta_performer.h
index ba479af..40ab55f 100644
--- a/delta_performer.h
+++ b/delta_performer.h
@@ -30,9 +30,6 @@
 // The minor version used by the A to B delta generator algorithm.
 extern const uint32_t kSourceMinorPayloadVersion;
 
-// Chunk size used for payloads during test.
-extern const size_t kDefaultChunkSize;
-
 class PrefsInterface;
 
 // This class performs the actions in a delta update synchronously. The delta
diff --git a/delta_performer_unittest.cc b/delta_performer_unittest.cc
index e6e9a1e..a0cb72f 100644
--- a/delta_performer_unittest.cc
+++ b/delta_performer_unittest.cc
@@ -314,7 +314,7 @@
 static void GenerateDeltaFile(bool full_kernel,
                               bool full_rootfs,
                               bool noop,
-                              off_t chunk_size,
+                              ssize_t chunk_size,
                               SignatureTest signature_test,
                               DeltaState *state,
                               uint32_t minor_version) {
@@ -529,7 +529,7 @@
 
     PayloadGenerationConfig payload_config;
     payload_config.is_delta = !full_rootfs;
-    payload_config.chunk_size = chunk_size;
+    payload_config.hard_chunk_size = chunk_size;
     payload_config.rootfs_partition_size = kRootFSPartitionSize;
     payload_config.minor_version = minor_version;
     if (!full_rootfs) {
@@ -541,8 +541,9 @@
       EXPECT_TRUE(payload_config.source.rootfs.OpenFilesystem());
       EXPECT_TRUE(payload_config.source.kernel.OpenFilesystem());
     } else {
-      if (payload_config.chunk_size == -1)
-        payload_config.chunk_size = kDefaultChunkSize;
+      if (payload_config.hard_chunk_size == -1)
+        // Use 1 MiB chunk size for the full unittests.
+        payload_config.hard_chunk_size = 1024 * 1024;
     }
     payload_config.target.rootfs.path = state->b_img;
     payload_config.target.kernel.path = state->new_kernel;
@@ -906,7 +907,7 @@
 }
 
 void DoSmallImageTest(bool full_kernel, bool full_rootfs, bool noop,
-                      off_t chunk_size,
+                      ssize_t chunk_size,
                       SignatureTest signature_test,
                       bool hash_checks_mandatory, uint32_t minor_version) {
   DeltaState state;
diff --git a/payload_generator/ab_generator.cc b/payload_generator/ab_generator.cc
index 8352927..1f7b14e 100644
--- a/payload_generator/ab_generator.cc
+++ b/payload_generator/ab_generator.cc
@@ -27,15 +27,17 @@
     vector<AnnotatedOperation>* rootfs_ops,
     vector<AnnotatedOperation>* kernel_ops) {
 
-  off_t chunk_blocks = (config.chunk_size == -1 ? -1 :
-                        config.chunk_size / config.block_size);
+  ssize_t hard_chunk_blocks = (config.hard_chunk_size == -1 ? -1 :
+                               config.hard_chunk_size / config.block_size);
+  size_t soft_chunk_blocks = config.soft_chunk_size / config.block_size;
 
   rootfs_ops->clear();
   TEST_AND_RETURN_FALSE(diff_utils::DeltaReadPartition(
       rootfs_ops,
       config.source.rootfs,
       config.target.rootfs,
-      chunk_blocks,
+      hard_chunk_blocks,
+      soft_chunk_blocks,
       data_file_fd,
       data_file_size,
       true));  // src_ops_allowed
@@ -46,7 +48,8 @@
       kernel_ops,
       config.source.kernel,
       config.target.kernel,
-      chunk_blocks,
+      hard_chunk_blocks,
+      soft_chunk_blocks,
       data_file_fd,
       data_file_size,
       true));  // src_ops_allowed
@@ -62,15 +65,22 @@
                                            data_file_size));
   SortOperationsByDestination(rootfs_ops);
   SortOperationsByDestination(kernel_ops);
-  // TODO(alliewood): Change merge operations to use config.chunk_size once
-  // specifying chunk_size on the command line works. crbug/485397.
+
+  // Use the soft_chunk_size when merging operations to prevent merging all
+  // the operations into a huge one if there's no hard limit.
+  size_t merge_chunk_blocks = soft_chunk_blocks;
+  if (hard_chunk_blocks != -1 &&
+      static_cast<size_t>(hard_chunk_blocks) < soft_chunk_blocks) {
+    merge_chunk_blocks = hard_chunk_blocks;
+  }
+
   TEST_AND_RETURN_FALSE(MergeOperations(rootfs_ops,
-                                        kDefaultChunkSize,
+                                        merge_chunk_blocks,
                                         config.target.rootfs.path,
                                         data_file_fd,
                                         data_file_size));
   TEST_AND_RETURN_FALSE(MergeOperations(kernel_ops,
-                                        kDefaultChunkSize,
+                                        merge_chunk_blocks,
                                         config.target.kernel.path,
                                         data_file_fd,
                                         data_file_size));
@@ -203,7 +213,7 @@
 }
 
 bool ABGenerator::MergeOperations(vector<AnnotatedOperation>* aops,
-                                  off_t chunk_size,
+                                  size_t chunk_blocks,
                                   const string& target_part_path,
                                   int data_fd,
                                   off_t* data_file_size) {
@@ -238,7 +248,7 @@
     if (good_op_type &&
         last_aop.op.type() == curr_aop.op.type() &&
         last_end_block == curr_start_block &&
-        static_cast<off_t>(combined_block_count * kBlockSize) <= chunk_size) {
+        combined_block_count <= chunk_blocks) {
       // If the operations have the same type (which is a type that we can
       // merge), are contiguous, are fragmented to have one destination extent,
       // and their combined block count would be less than chunk size, merge
diff --git a/payload_generator/ab_generator.h b/payload_generator/ab_generator.h
index d027f22..6450776 100644
--- a/payload_generator/ab_generator.h
+++ b/payload_generator/ab_generator.h
@@ -92,9 +92,11 @@
   // It will merge two operations if:
   //   - They are of the same type.
   //   - They are contiguous.
-  //   - Their combined blocks do not exceed |chunk_size|.
+  //   - Their combined blocks do not exceed |chunk_blocks| blocks.
+  // Note that unlike other methods, you can't pass a negative number in
+  // |chunk_blocks|.
   static bool MergeOperations(std::vector<AnnotatedOperation>* aops,
-                              off_t chunk_size,
+                              size_t chunk_blocks,
                               const std::string& target_part,
                               int data_fd,
                               off_t* data_file_size);
diff --git a/payload_generator/ab_generator_unittest.cc b/payload_generator/ab_generator_unittest.cc
index 9241701..466b699 100644
--- a/payload_generator/ab_generator_unittest.cc
+++ b/payload_generator/ab_generator_unittest.cc
@@ -278,7 +278,7 @@
 
   // Merge the operations.
   EXPECT_TRUE(ABGenerator::MergeOperations(
-      &aops, 5 * kBlockSize, part_path, data_fd, &data_file_size));
+      &aops, 5, part_path, data_fd, &data_file_size));
 
   // Check the result.
   DeltaArchiveManifest_InstallOperation_Type expected_op_type =
@@ -471,8 +471,7 @@
   third_aop.name = "3";
   aops.push_back(third_aop);
 
-  EXPECT_TRUE(ABGenerator::MergeOperations(&aops, 5 * kBlockSize,
-                                           "", 0, nullptr));
+  EXPECT_TRUE(ABGenerator::MergeOperations(&aops, 5, "", 0, nullptr));
 
   EXPECT_EQ(aops.size(), 1);
   DeltaArchiveManifest_InstallOperation first_result_op = aops[0].op;
@@ -548,8 +547,7 @@
   fourth_aop.op = fourth_op;
   aops.push_back(fourth_aop);
 
-  EXPECT_TRUE(ABGenerator::MergeOperations(
-      &aops, 4 * kBlockSize, "", 0, nullptr));
+  EXPECT_TRUE(ABGenerator::MergeOperations(&aops, 4, "", 0, nullptr));
 
   // No operations were merged, the number of ops is the same.
   EXPECT_EQ(aops.size(), 4);
diff --git a/payload_generator/delta_diff_utils.cc b/payload_generator/delta_diff_utils.cc
index 35f3e7f..53396b0 100644
--- a/payload_generator/delta_diff_utils.cc
+++ b/payload_generator/delta_diff_utils.cc
@@ -142,7 +142,8 @@
     vector<AnnotatedOperation>* aops,
     const PartitionConfig& old_part,
     const PartitionConfig& new_part,
-    off_t chunk_blocks,
+    ssize_t hard_chunk_blocks,
+    size_t soft_chunk_blocks,
     int data_fd,
     off_t* data_file_size,
     bool src_ops_allowed) {
@@ -155,7 +156,7 @@
       new_part.path,
       old_part.fs_interface ? old_part.fs_interface->GetBlockCount() : 0,
       new_part.fs_interface->GetBlockCount(),
-      chunk_blocks,
+      soft_chunk_blocks,
       src_ops_allowed,
       data_fd,
       data_file_size,
@@ -215,7 +216,7 @@
         old_file_extents,
         new_file_extents,
         new_file.name,  // operation name
-        chunk_blocks,
+        hard_chunk_blocks,
         data_fd,
         data_file_size,
         src_ops_allowed));
@@ -235,7 +236,11 @@
   }
 
   LOG(INFO) << "Scanning " << BlocksInExtents(new_unvisited)
-            << " unwritten blocks";
+            << " unwritten blocks using chunk size of "
+            << soft_chunk_blocks << " blocks.";
+  // We use the soft_chunk_blocks limit for the <non-file-data> as we don't
+  // really know the structure of this data and we should not expect it to have
+  // redundancy between partitions.
   TEST_AND_RETURN_FALSE(DeltaReadFile(
       aops,
       old_part.path,
@@ -243,7 +248,7 @@
       old_unvisited,
       new_unvisited,
       "<non-file-data>",  // operation name
-      chunk_blocks,
+      soft_chunk_blocks,
       data_fd,
       data_file_size,
       src_ops_allowed));
@@ -257,7 +262,7 @@
     const string& new_part,
     size_t old_num_blocks,
     size_t new_num_blocks,
-    off_t chunk_blocks,
+    ssize_t chunk_blocks,
     bool src_ops_allowed,
     int data_fd,
     off_t* data_file_size,
@@ -404,7 +409,7 @@
     const vector<Extent>& old_extents,
     const vector<Extent>& new_extents,
     const string& name,
-    off_t chunk_blocks,
+    ssize_t chunk_blocks,
     int data_fd,
     off_t* data_file_size,
     bool src_ops_allowed) {
diff --git a/payload_generator/delta_diff_utils.h b/payload_generator/delta_diff_utils.h
index ee8979c..dc837b0 100644
--- a/payload_generator/delta_diff_utils.h
+++ b/payload_generator/delta_diff_utils.h
@@ -25,10 +25,16 @@
 // blocks in that partition (if available) to determine the best way to compress
 // the new files (REPLACE, REPLACE_BZ, COPY, BSDIFF) and writes any necessary
 // data to the end of |data_fd| updating |data_file_size| accordingly.
+// |hard_chunk_blocks| and |soft_chunk_blocks| are the hard and soft chunk
+// limits in number of blocks respectively. The soft chunk limit is used to
+// split MOVE and SOURCE_COPY operations and REPLACE_BZ of zeroed blocks, while
+// the hard limit is used to split a file when generating other operations. A
+// value of -1 in |hard_chunk_blocks| means whole files.
 bool DeltaReadPartition(std::vector<AnnotatedOperation>* aops,
                         const PartitionConfig& old_part,
                         const PartitionConfig& new_part,
-                        off_t chunk_blocks,
+                        ssize_t hard_chunk_blocks,
+                        size_t soft_chunk_blocks,
                         int data_fd,
                         off_t* data_file_size,
                         bool src_ops_allowed);
@@ -49,7 +55,7 @@
                              const std::string& new_part,
                              size_t old_num_blocks,
                              size_t new_num_blocks,
-                             off_t chunk_blocks,
+                             ssize_t chunk_blocks,
                              bool src_ops_allowed,
                              int data_fd,
                              off_t* data_file_size,
@@ -70,7 +76,7 @@
                    const std::vector<Extent>& old_extents,
                    const std::vector<Extent>& new_extents,
                    const std::string& name,
-                   off_t chunk_blocks,
+                   ssize_t chunk_blocks,
                    int data_fd,
                    off_t* data_file_size,
                    bool src_ops_allowed);
diff --git a/payload_generator/delta_diff_utils_unittest.cc b/payload_generator/delta_diff_utils_unittest.cc
index 3b1d44d..9566fec 100644
--- a/payload_generator/delta_diff_utils_unittest.cc
+++ b/payload_generator/delta_diff_utils_unittest.cc
@@ -113,7 +113,7 @@
 
   // Helper function to call DeltaMovedAndZeroBlocks() using this class' data
   // members. This simply avoid repeating all the arguments that never change.
-  bool RunDeltaMovedAndZeroBlocks(off_t chunk_blocks,
+  bool RunDeltaMovedAndZeroBlocks(ssize_t chunk_blocks,
                                   bool src_ops_allowed) {
     return diff_utils::DeltaMovedAndZeroBlocks(
         &aops_,
diff --git a/payload_generator/full_update_generator.cc b/payload_generator/full_update_generator.cc
index c28f7e8..b1e479d 100644
--- a/payload_generator/full_update_generator.cc
+++ b/payload_generator/full_update_generator.cc
@@ -122,10 +122,15 @@
 
   // FullUpdateGenerator requires a positive chunk_size, otherwise there will
   // be only one operation with the whole partition which should not be allowed.
-  size_t full_chunk_size = kDefaultFullChunkSize;
-  if (config.chunk_size >= 0) {
-    full_chunk_size = config.chunk_size;
+  // For performance reasons, we force a small default hard limit of 1 MiB. This
+  // limit can be changed in the config, and we will use the smaller of the two
+  // soft/hard limits.
+  size_t full_chunk_size;
+  if (config.hard_chunk_size >= 0) {
+    full_chunk_size = std::min(static_cast<size_t>(config.hard_chunk_size),
+                               config.soft_chunk_size);
   } else {
+    full_chunk_size = std::min(kDefaultFullChunkSize, config.soft_chunk_size);
     LOG(INFO) << "No chunk_size provided, using the default chunk_size for the "
               << "full operations: " << full_chunk_size << " bytes.";
   }
diff --git a/payload_generator/full_update_generator_unittest.cc b/payload_generator/full_update_generator_unittest.cc
index 0a04e4e..8afbb03 100644
--- a/payload_generator/full_update_generator_unittest.cc
+++ b/payload_generator/full_update_generator_unittest.cc
@@ -24,7 +24,7 @@
   void SetUp() override {
     config_.is_delta = false;
     config_.minor_version = DeltaPerformer::kFullPayloadMinorVersion;
-    config_.chunk_size = 128 * 1024;
+    config_.hard_chunk_size = 128 * 1024;
     config_.block_size = 4096;
 
     EXPECT_TRUE(utils::MakeTempFile("FullUpdateTest_rootfs.XXXXXX",
@@ -85,14 +85,14 @@
                                             &rootfs_ops,
                                             &kernel_ops));
   int64_t target_rootfs_chunks =
-      config_.target.rootfs.size / config_.chunk_size;
+      config_.target.rootfs.size / config_.hard_chunk_size;
   EXPECT_EQ(target_rootfs_chunks, rootfs_ops.size());
-  EXPECT_EQ(new_kern.size() / config_.chunk_size, kernel_ops.size());
+  EXPECT_EQ(new_kern.size() / config_.hard_chunk_size, kernel_ops.size());
   for (off_t i = 0; i < target_rootfs_chunks; ++i) {
     EXPECT_EQ(1, rootfs_ops[i].op.dst_extents_size());
-    EXPECT_EQ(i * config_.chunk_size / config_.block_size,
+    EXPECT_EQ(i * config_.hard_chunk_size / config_.block_size,
               rootfs_ops[i].op.dst_extents(0).start_block()) << "i = " << i;
-    EXPECT_EQ(config_.chunk_size / config_.block_size,
+    EXPECT_EQ(config_.hard_chunk_size / config_.block_size,
               rootfs_ops[i].op.dst_extents(0).num_blocks());
     if (rootfs_ops[i].op.type() !=
         DeltaArchiveManifest_InstallOperation_Type_REPLACE) {
@@ -105,7 +105,8 @@
 // Test that if the chunk size is not a divisor of the image size, it handles
 // correctly the last chunk of each partition.
 TEST_F(FullUpdateGeneratorTest, ChunkSizeTooBig) {
-  config_.chunk_size = 1024 * 1024;
+  config_.hard_chunk_size = 1024 * 1024;
+  config_.soft_chunk_size = config_.hard_chunk_size;
   chromeos::Blob new_root(1536 * 1024);  // 1.5 MiB
   chromeos::Blob new_kern(128 * 1024);
   config_.rootfs_partition_size = new_root.size();
@@ -128,9 +129,9 @@
                                             &kernel_ops));
   // rootfs has one chunk and a half.
   EXPECT_EQ(2, rootfs_ops.size());
-  EXPECT_EQ(config_.chunk_size / config_.block_size,
+  EXPECT_EQ(config_.hard_chunk_size / config_.block_size,
             BlocksInExtents(rootfs_ops[0].op.dst_extents()));
-  EXPECT_EQ((new_root.size() - config_.chunk_size) / config_.block_size,
+  EXPECT_EQ((new_root.size() - config_.hard_chunk_size) / config_.block_size,
             BlocksInExtents(rootfs_ops[1].op.dst_extents()));
 
   // kernel has less than one chunk.
diff --git a/payload_generator/generate_delta_main.cc b/payload_generator/generate_delta_main.cc
index d32221d..e3122df 100644
--- a/payload_generator/generate_delta_main.cc
+++ b/payload_generator/generate_delta_main.cc
@@ -345,7 +345,8 @@
   payload_config.target.rootfs.path = FLAGS_new_image;
   payload_config.target.kernel.path = FLAGS_new_kernel;
 
-  payload_config.chunk_size = FLAGS_chunk_size;
+  // Use the default soft_chunk_size defined in the config.
+  payload_config.hard_chunk_size = FLAGS_chunk_size;
   payload_config.block_size = kBlockSize;
 
   // The kernel and rootfs size is never passed to the delta_generator, so we
diff --git a/payload_generator/inplace_generator.cc b/payload_generator/inplace_generator.cc
index a7f7b55..dc4802a 100644
--- a/payload_generator/inplace_generator.cc
+++ b/payload_generator/inplace_generator.cc
@@ -797,7 +797,8 @@
     const PartitionConfig& new_part,
     uint64_t partition_size,
     size_t block_size,
-    off_t chunk_blocks,
+    ssize_t hard_chunk_blocks,
+    size_t soft_chunk_blocks,
     int data_file_fd,
     off_t* data_file_size,
     vector<AnnotatedOperation>* aops) {
@@ -807,7 +808,8 @@
       diff_utils::DeltaReadPartition(aops,
                                      old_part,
                                      new_part,
-                                     chunk_blocks,
+                                     hard_chunk_blocks,
+                                     soft_chunk_blocks,
                                      data_file_fd,
                                      data_file_size,
                                      false));  // src_ops_allowed
@@ -830,15 +832,17 @@
     off_t* data_file_size,
     vector<AnnotatedOperation>* rootfs_ops,
     vector<AnnotatedOperation>* kernel_ops) {
-  off_t chunk_blocks = (config.chunk_size == -1 ? -1 :
-                        config.chunk_size / config.block_size);
+  ssize_t hard_chunk_blocks = (config.hard_chunk_size == -1 ? -1 :
+                               config.hard_chunk_size / config.block_size);
+  size_t soft_chunk_blocks = config.soft_chunk_size / config.block_size;
 
   TEST_AND_RETURN_FALSE(GenerateOperationsForPartition(
       config.source.rootfs,
       config.target.rootfs,
       config.rootfs_partition_size,
       config.block_size,
-      chunk_blocks,
+      hard_chunk_blocks,
+      soft_chunk_blocks,
       data_file_fd,
       data_file_size,
       rootfs_ops));
@@ -848,7 +852,8 @@
       config.target.kernel,
       config.target.kernel.size,  // kernel "filesystem" is the whole partition.
       config.block_size,
-      chunk_blocks,
+      hard_chunk_blocks,
+      soft_chunk_blocks,
       data_file_fd,
       data_file_size,
       kernel_ops));
diff --git a/payload_generator/inplace_generator.h b/payload_generator/inplace_generator.h
index 0c9fc2a..a1245f3 100644
--- a/payload_generator/inplace_generator.h
+++ b/payload_generator/inplace_generator.h
@@ -223,7 +223,8 @@
       const PartitionConfig& new_part,
       uint64_t partition_size,
       size_t block_size,
-      off_t chunk_blocks,
+      ssize_t hard_chunk_blocks,
+      size_t soft_chunk_blocks,
       int data_file_fd,
       off_t* data_file_size,
       std::vector<AnnotatedOperation>* aops);
diff --git a/payload_generator/payload_generation_config.cc b/payload_generator/payload_generation_config.cc
index 0229b61..cfe1367 100644
--- a/payload_generator/payload_generation_config.cc
+++ b/payload_generator/payload_generation_config.cc
@@ -142,7 +142,9 @@
   TEST_AND_RETURN_FALSE(target.rootfs.size % block_size == 0);
   TEST_AND_RETURN_FALSE(target.kernel.size % block_size == 0);
 
-  TEST_AND_RETURN_FALSE(chunk_size == -1 || chunk_size % block_size == 0);
+  TEST_AND_RETURN_FALSE(hard_chunk_size == -1 ||
+                        hard_chunk_size % block_size == 0);
+  TEST_AND_RETURN_FALSE(soft_chunk_size % block_size == 0);
 
   TEST_AND_RETURN_FALSE(rootfs_partition_size % block_size == 0);
   TEST_AND_RETURN_FALSE(rootfs_partition_size >= target.rootfs.size);
diff --git a/payload_generator/payload_generation_config.h b/payload_generator/payload_generation_config.h
index 4057f75..1846e5b 100644
--- a/payload_generator/payload_generation_config.h
+++ b/payload_generator/payload_generation_config.h
@@ -113,10 +113,20 @@
   // after the partition used to store the verity hashes and or the bootcache.
   uint64_t rootfs_partition_size = 0;
 
-  // The chunk size is the maximum size that a single operation should write in
-  // the destination. Operations bigger than chunk_size should be split. A value
-  // of -1 means no chunk_size limit.
-  off_t chunk_size = -1;
+  // The |hard_chunk_size| is the maximum size that a single operation should
+  // write in the destination. Operations bigger than chunk_size should be
+  // split. A value of -1 means no hard chunk size limit. A very low limit
+  // means more operations, and less of a chance to reuse the data.
+  ssize_t hard_chunk_size = -1;
+
+  // The |soft_chunk_size| is the preferred chunk size to use when there's no
+  // significant impact to the operations. For example, REPLACE, MOVE and
+  // SOURCE_COPY operations are not significantly impacted by the chunk size,
+  // except for a few bytes overhead in the manifest to describe extra
+  // operations. On the other hand, splitting BSDIFF operations impacts the
+  // payload size since it is not possible to use the redundancy *between*
+  // chunks.
+  size_t soft_chunk_size = 2 * 1024 * 1024;
 
   // TODO(deymo): Remove the block_size member and maybe replace it with a
   // minimum alignment size for blocks (if needed). Algorithms should be able to