Merge "Add multithread support to call imgdiff with block-limit"
diff --git a/tools/releasetools/blockimgdiff.py b/tools/releasetools/blockimgdiff.py
index c184c42..6bca99e 100644
--- a/tools/releasetools/blockimgdiff.py
+++ b/tools/releasetools/blockimgdiff.py
@@ -16,6 +16,7 @@
 
 import array
 import common
+import copy
 import functools
 import heapq
 import itertools
@@ -204,6 +205,18 @@
     self.id = len(by_id)
     by_id.append(self)
 
+    self._patch = None
+
+  @property
+  def patch(self):
+    return self._patch
+
+  @patch.setter
+  def patch(self, patch):
+    if patch:
+      assert self.style == "diff"
+    self._patch = patch
+
   def NetStashChange(self):
     return (sum(sr.size() for (_, sr) in self.stash_before) -
             sum(sr.size() for (_, sr) in self.use_stash))
@@ -213,6 +226,7 @@
     self.use_stash = []
     self.style = "new"
     self.src_ranges = RangeSet()
+    self.patch = None
 
   def __str__(self):
     return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
@@ -675,6 +689,7 @@
             # These are identical; we don't need to generate a patch,
             # just issue copy commands on the device.
             xf.style = "move"
+            xf.patch = None
             tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
             if xf.src_ranges != xf.tgt_ranges:
               print("%10d %10d (%6.2f%%) %7s %s %s (from %s)" % (
@@ -683,24 +698,33 @@
                       xf.tgt_name + " (from " + xf.src_name + ")"),
                   str(xf.tgt_ranges), str(xf.src_ranges)))
           else:
-            # For files in zip format (eg, APKs, JARs, etc.) we would
-            # like to use imgdiff -z if possible (because it usually
-            # produces significantly smaller patches than bsdiff).
-            # This is permissible if:
-            #
-            #  - imgdiff is not disabled, and
-            #  - the source and target files are monotonic (ie, the
-            #    data is stored with blocks in increasing order), and
-            #  - we haven't removed any blocks from the source set.
-            #
-            # If these conditions are satisfied then appending all the
-            # blocks in the set together in order will produce a valid
-            # zip file (plus possibly extra zeros in the last block),
-            # which is what imgdiff needs to operate.  (imgdiff is
-            # fine with extra zeros at the end of the file.)
-            imgdiff = (not self.disable_imgdiff and xf.intact and
-                       xf.tgt_name.split(".")[-1].lower()
-                       in ("apk", "jar", "zip"))
+            if xf.patch:
+              # We have already generated the patch with imgdiff. Check if the
+              # transfer is intact.
+              assert not self.disable_imgdiff
+              imgdiff = True
+              if not xf.intact:
+                imgdiff = False
+                xf.patch = None
+            else:
+              # For files in zip format (eg, APKs, JARs, etc.) we would
+              # like to use imgdiff -z if possible (because it usually
+              # produces significantly smaller patches than bsdiff).
+              # This is permissible if:
+              #
+              #  - imgdiff is not disabled, and
+              #  - the source and target files are monotonic (ie, the
+              #    data is stored with blocks in increasing order), and
+              #  - we haven't removed any blocks from the source set.
+              #
+              # If these conditions are satisfied then appending all the
+              # blocks in the set together in order will produce a valid
+              # zip file (plus possibly extra zeros in the last block),
+              # which is what imgdiff needs to operate.  (imgdiff is
+              # fine with extra zeros at the end of the file.)
+              imgdiff = (not self.disable_imgdiff and xf.intact and
+                         xf.tgt_name.split(".")[-1].lower()
+                         in ("apk", "jar", "zip"))
             xf.style = "imgdiff" if imgdiff else "bsdiff"
             diff_queue.append((index, imgdiff, patch_num))
             patch_num += 1
@@ -738,48 +762,51 @@
             xf_index, imgdiff, patch_index = diff_queue.pop()
 
           xf = self.transfers[xf_index]
-          src_ranges = xf.src_ranges
-          tgt_ranges = xf.tgt_ranges
+          patch = xf.patch
+          if not patch:
+            src_ranges = xf.src_ranges
+            tgt_ranges = xf.tgt_ranges
 
-          # Needs lock since WriteRangeDataToFd() is stateful (calling seek).
-          with lock:
-            src_file = common.MakeTempFile(prefix="src-")
-            with open(src_file, "wb") as fd:
-              self.src.WriteRangeDataToFd(src_ranges, fd)
-
-            tgt_file = common.MakeTempFile(prefix="tgt-")
-            with open(tgt_file, "wb") as fd:
-              self.tgt.WriteRangeDataToFd(tgt_ranges, fd)
-
-          message = []
-          try:
-            patch = compute_patch(src_file, tgt_file, imgdiff)
-          except ValueError as e:
-            message.append(
-                "Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (
-                "imgdiff" if imgdiff else "bsdiff",
-                xf.tgt_name if xf.tgt_name == xf.src_name else
-                    xf.tgt_name + " (from " + xf.src_name + ")",
-                xf.tgt_ranges, xf.src_ranges, e.message))
-            # TODO(b/68016761): Better handle the holes in mke2fs created images.
-            if imgdiff:
-              try:
-                patch = compute_patch(src_file, tgt_file, imgdiff=False)
-                message.append(
-                    "Fell back and generated with bsdiff instead for %s" % (
-                    xf.tgt_name,))
-                xf.style = "bsdiff"
-                with lock:
-                  warning_messages.extend(message)
-                del message[:]
-              except ValueError as e:
-                message.append(
-                    "Also failed to generate with bsdiff for %s:\n%s" % (
-                    xf.tgt_name, e.message))
-
-          if message:
+            # Needs lock since WriteRangeDataToFd() is stateful (calling seek).
             with lock:
-              error_messages.extend(message)
+              src_file = common.MakeTempFile(prefix="src-")
+              with open(src_file, "wb") as fd:
+                self.src.WriteRangeDataToFd(src_ranges, fd)
+
+              tgt_file = common.MakeTempFile(prefix="tgt-")
+              with open(tgt_file, "wb") as fd:
+                self.tgt.WriteRangeDataToFd(tgt_ranges, fd)
+
+            message = []
+            try:
+              patch = compute_patch(src_file, tgt_file, imgdiff)
+            except ValueError as e:
+              message.append(
+                  "Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (
+                  "imgdiff" if imgdiff else "bsdiff",
+                  xf.tgt_name if xf.tgt_name == xf.src_name else
+                      xf.tgt_name + " (from " + xf.src_name + ")",
+                  xf.tgt_ranges, xf.src_ranges, e.message))
+              # TODO(b/68016761): Better handle the holes in mke2fs created
+              # images.
+              if imgdiff:
+                try:
+                  patch = compute_patch(src_file, tgt_file, imgdiff=False)
+                  message.append(
+                      "Fell back and generated with bsdiff instead for %s" % (
+                      xf.tgt_name,))
+                  xf.style = "bsdiff"
+                  with lock:
+                    warning_messages.extend(message)
+                  del message[:]
+                except ValueError as e:
+                  message.append(
+                      "Also failed to generate with bsdiff for %s:\n%s" % (
+                      xf.tgt_name, e.message))
+
+            if message:
+              with lock:
+                error_messages.extend(message)
 
           with lock:
             patches[patch_index] = (xf_index, patch)
@@ -1167,12 +1194,9 @@
       Compared to the fixed 1024-block limit, it reduces the overall package
       size by 30% for volantis, and 20% for angler and bullhead."""
 
+      assert style == "diff"
       # Possibly split large files into smaller chunks.
       pieces = 0
-      cache_size = common.OPTIONS.cache_size
-      split_threshold = 0.125
-      max_blocks_per_transfer = int(cache_size * split_threshold /
-                                    self.tgt.blocksize)
 
       # Change nothing for small files.
       if (tgt_ranges.size() <= max_blocks_per_transfer and
@@ -1182,6 +1206,14 @@
                  style, by_id)
         return
 
+      if tgt_name.split(".")[-1].lower() in ("apk", "jar", "zip"):
+        split_enable = (not self.disable_imgdiff and src_ranges.monotonic and
+                        tgt_ranges.monotonic)
+        if split_enable and (self.tgt.RangeSha1(tgt_ranges) !=
+                             self.src.RangeSha1(src_ranges)):
+          large_apks.append((tgt_name, src_name, tgt_ranges, src_ranges))
+          return
+
       while (tgt_ranges.size() > max_blocks_per_transfer and
              src_ranges.size() > max_blocks_per_transfer):
         tgt_split_name = "%s-%d" % (tgt_name, pieces)
@@ -1275,8 +1307,136 @@
       AddSplitTransfers(
           tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
 
+    def ParseAndValidateSplitInfo(patch_size, tgt_ranges, src_ranges,
+                                  split_info):
+      """Parse the split_info and return a list of info tuples.
+
+      Args:
+        patch_size: total size of the patch file.
+        tgt_ranges: Ranges of the target file within the original image.
+        src_ranges: Ranges of the source file within the original image.
+        split_info format:
+          imgdiff version#
+          count of pieces
+          <patch_size_1> <tgt_size_1> <src_ranges_1>
+          ...
+          <patch_size_n> <tgt_size_n> <src_ranges_n>
+
+      Returns:
+        [patch_start, patch_len, split_tgt_ranges, split_src_ranges]
+      """
+
+      version = int(split_info[0])
+      assert version == 2
+      count = int(split_info[1])
+      assert len(split_info) - 2 == count
+
+      split_info_list = []
+      patch_start = 0
+      tgt_remain = copy.deepcopy(tgt_ranges)
+      # each line has the format <patch_size>, <tgt_size>, <src_ranges>
+      for line in split_info[2:]:
+        info = line.split()
+        assert len(info) == 3
+        patch_length = int(info[0])
+
+        split_tgt_size = int(info[1])
+        assert split_tgt_size % 4096 == 0
+        assert split_tgt_size / 4096 <= tgt_remain.size()
+        split_tgt_ranges = tgt_remain.first(split_tgt_size / 4096)
+        tgt_remain = tgt_remain.subtract(split_tgt_ranges)
+
+        # Find the split_src_ranges within the image file from its relative
+        # position in file.
+        split_src_indices = RangeSet.parse_raw(info[2])
+        split_src_ranges = RangeSet()
+        for r in split_src_indices:
+          curr_range = src_ranges.first(r[1]).subtract(src_ranges.first(r[0]))
+          assert not split_src_ranges.overlaps(curr_range)
+          split_src_ranges = split_src_ranges.union(curr_range)
+
+        split_info_list.append((patch_start, patch_length,
+                                split_tgt_ranges, split_src_ranges))
+        patch_start += patch_length
+
+      # Check that the sizes of all the split pieces add up to the final file
+      # size for patch and target.
+      assert tgt_remain.size() == 0
+      assert patch_start == patch_size
+      return split_info_list
+
+    def AddSplitTransferForLargeApks():
+      """Create split transfers for large apk files.
+
+      Example: Chrome.apk will be split into
+        src-0: Chrome.apk-0, tgt-0: Chrome.apk-0
+        src-1: Chrome.apk-1, tgt-1: Chrome.apk-1
+        ...
+
+      After the split, the target pieces are continuous and block aligned; and
+      the source pieces are mutually exclusive. During the split, we also
+      generate and save the image patch between src-X & tgt-X. This patch will
+      be valid because the block ranges of src-X & tgt-X will always stay the
+      same afterwards; but there's a chance we don't use the patch if we
+      convert the "diff" command into "new" or "move" later.
+      """
+
+      while True:
+        with transfer_lock:
+          if not large_apks:
+            return
+          tgt_name, src_name, tgt_ranges, src_ranges = large_apks.pop(0)
+
+        src_file = common.MakeTempFile(prefix="src-")
+        tgt_file = common.MakeTempFile(prefix="tgt-")
+        with transfer_lock:
+          with open(src_file, "wb") as src_fd:
+            self.src.WriteRangeDataToFd(src_ranges, src_fd)
+          with open(tgt_file, "wb") as tgt_fd:
+            self.tgt.WriteRangeDataToFd(tgt_ranges, tgt_fd)
+
+        patch_file = common.MakeTempFile(prefix="patch-")
+        patch_info_file = common.MakeTempFile(prefix="split_info-")
+        cmd = ["imgdiff", "-z",
+               "--block-limit={}".format(max_blocks_per_transfer),
+               "--split-info=" + patch_info_file,
+               src_file, tgt_file, patch_file]
+        p = common.Run(cmd, stdout=subprocess.PIPE)
+        p.communicate()
+        # TODO(xunchang) fall back to the normal split if imgdiff fails.
+        if p.returncode != 0:
+          raise ValueError("Failed to create patch between {} and {}".format(
+              src_name, tgt_name))
+
+        with open(patch_info_file) as patch_info:
+          lines = patch_info.readlines()
+
+        patch_size_total = os.path.getsize(patch_file)
+        split_info_list = ParseAndValidateSplitInfo(patch_size_total,
+                                                    tgt_ranges, src_ranges,
+                                                    lines)
+        for index, (patch_start, patch_length, split_tgt_ranges,
+            split_src_ranges) in enumerate(split_info_list):
+          with open(patch_file) as f:
+            f.seek(patch_start)
+            patch_content = f.read(patch_length)
+
+          split_src_name = "{}-{}".format(src_name, index)
+          split_tgt_name = "{}-{}".format(tgt_name, index)
+          transfer_split = Transfer(split_tgt_name, split_src_name,
+                                    split_tgt_ranges, split_src_ranges,
+                                    self.tgt.RangeSha1(split_tgt_ranges),
+                                    self.src.RangeSha1(split_src_ranges),
+                                    "diff", self.transfers)
+          transfer_split.patch = patch_content
+
     print("Finding transfers...")
 
+    large_apks = []
+    cache_size = common.OPTIONS.cache_size
+    split_threshold = 0.125
+    max_blocks_per_transfer = int(cache_size * split_threshold /
+                                  self.tgt.blocksize)
     empty = RangeSet()
     for tgt_fn, tgt_ranges in self.tgt.file_map.items():
       if tgt_fn == "__ZERO":
@@ -1321,6 +1481,14 @@
 
       AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
 
+    transfer_lock = threading.Lock()
+    threads = [threading.Thread(target=AddSplitTransferForLargeApks)
+               for _ in range(self.threads)]
+    for th in threads:
+      th.start()
+    while threads:
+      threads.pop().join()
+
   def AbbreviateSourceNames(self):
     for k in self.src.file_map.keys():
       b = os.path.basename(k)