Merge "Add multithread support to call imgdiff with block-limit"
diff --git a/tools/releasetools/blockimgdiff.py b/tools/releasetools/blockimgdiff.py
index c184c42..6bca99e 100644
--- a/tools/releasetools/blockimgdiff.py
+++ b/tools/releasetools/blockimgdiff.py
@@ -16,6 +16,7 @@
import array
import common
+import copy
import functools
import heapq
import itertools
@@ -204,6 +205,18 @@
self.id = len(by_id)
by_id.append(self)
+ self._patch = None
+
+ @property
+ def patch(self):
+ return self._patch
+
+ @patch.setter
+ def patch(self, patch):
+ if patch:
+ assert self.style == "diff"
+ self._patch = patch
+
def NetStashChange(self):
return (sum(sr.size() for (_, sr) in self.stash_before) -
sum(sr.size() for (_, sr) in self.use_stash))
@@ -213,6 +226,7 @@
self.use_stash = []
self.style = "new"
self.src_ranges = RangeSet()
+ self.patch = None
def __str__(self):
return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
@@ -675,6 +689,7 @@
# These are identical; we don't need to generate a patch,
# just issue copy commands on the device.
xf.style = "move"
+ xf.patch = None
tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
if xf.src_ranges != xf.tgt_ranges:
print("%10d %10d (%6.2f%%) %7s %s %s (from %s)" % (
@@ -683,24 +698,33 @@
xf.tgt_name + " (from " + xf.src_name + ")"),
str(xf.tgt_ranges), str(xf.src_ranges)))
else:
- # For files in zip format (eg, APKs, JARs, etc.) we would
- # like to use imgdiff -z if possible (because it usually
- # produces significantly smaller patches than bsdiff).
- # This is permissible if:
- #
- # - imgdiff is not disabled, and
- # - the source and target files are monotonic (ie, the
- # data is stored with blocks in increasing order), and
- # - we haven't removed any blocks from the source set.
- #
- # If these conditions are satisfied then appending all the
- # blocks in the set together in order will produce a valid
- # zip file (plus possibly extra zeros in the last block),
- # which is what imgdiff needs to operate. (imgdiff is
- # fine with extra zeros at the end of the file.)
- imgdiff = (not self.disable_imgdiff and xf.intact and
- xf.tgt_name.split(".")[-1].lower()
- in ("apk", "jar", "zip"))
+ if xf.patch:
+ # We have already generated the patch with imgdiff. Check if the
+ # transfer is intact.
+ assert not self.disable_imgdiff
+ imgdiff = True
+ if not xf.intact:
+ imgdiff = False
+ xf.patch = None
+ else:
+ # For files in zip format (eg, APKs, JARs, etc.) we would
+ # like to use imgdiff -z if possible (because it usually
+ # produces significantly smaller patches than bsdiff).
+ # This is permissible if:
+ #
+ # - imgdiff is not disabled, and
+ # - the source and target files are monotonic (ie, the
+ # data is stored with blocks in increasing order), and
+ # - we haven't removed any blocks from the source set.
+ #
+ # If these conditions are satisfied then appending all the
+ # blocks in the set together in order will produce a valid
+ # zip file (plus possibly extra zeros in the last block),
+ # which is what imgdiff needs to operate. (imgdiff is
+ # fine with extra zeros at the end of the file.)
+ imgdiff = (not self.disable_imgdiff and xf.intact and
+ xf.tgt_name.split(".")[-1].lower()
+ in ("apk", "jar", "zip"))
xf.style = "imgdiff" if imgdiff else "bsdiff"
diff_queue.append((index, imgdiff, patch_num))
patch_num += 1
@@ -738,48 +762,51 @@
xf_index, imgdiff, patch_index = diff_queue.pop()
xf = self.transfers[xf_index]
- src_ranges = xf.src_ranges
- tgt_ranges = xf.tgt_ranges
+ patch = xf.patch
+ if not patch:
+ src_ranges = xf.src_ranges
+ tgt_ranges = xf.tgt_ranges
- # Needs lock since WriteRangeDataToFd() is stateful (calling seek).
- with lock:
- src_file = common.MakeTempFile(prefix="src-")
- with open(src_file, "wb") as fd:
- self.src.WriteRangeDataToFd(src_ranges, fd)
-
- tgt_file = common.MakeTempFile(prefix="tgt-")
- with open(tgt_file, "wb") as fd:
- self.tgt.WriteRangeDataToFd(tgt_ranges, fd)
-
- message = []
- try:
- patch = compute_patch(src_file, tgt_file, imgdiff)
- except ValueError as e:
- message.append(
- "Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (
- "imgdiff" if imgdiff else "bsdiff",
- xf.tgt_name if xf.tgt_name == xf.src_name else
- xf.tgt_name + " (from " + xf.src_name + ")",
- xf.tgt_ranges, xf.src_ranges, e.message))
- # TODO(b/68016761): Better handle the holes in mke2fs created images.
- if imgdiff:
- try:
- patch = compute_patch(src_file, tgt_file, imgdiff=False)
- message.append(
- "Fell back and generated with bsdiff instead for %s" % (
- xf.tgt_name,))
- xf.style = "bsdiff"
- with lock:
- warning_messages.extend(message)
- del message[:]
- except ValueError as e:
- message.append(
- "Also failed to generate with bsdiff for %s:\n%s" % (
- xf.tgt_name, e.message))
-
- if message:
+ # Needs lock since WriteRangeDataToFd() is stateful (calling seek).
with lock:
- error_messages.extend(message)
+ src_file = common.MakeTempFile(prefix="src-")
+ with open(src_file, "wb") as fd:
+ self.src.WriteRangeDataToFd(src_ranges, fd)
+
+ tgt_file = common.MakeTempFile(prefix="tgt-")
+ with open(tgt_file, "wb") as fd:
+ self.tgt.WriteRangeDataToFd(tgt_ranges, fd)
+
+ message = []
+ try:
+ patch = compute_patch(src_file, tgt_file, imgdiff)
+ except ValueError as e:
+ message.append(
+ "Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (
+ "imgdiff" if imgdiff else "bsdiff",
+ xf.tgt_name if xf.tgt_name == xf.src_name else
+ xf.tgt_name + " (from " + xf.src_name + ")",
+ xf.tgt_ranges, xf.src_ranges, e.message))
+ # TODO(b/68016761): Better handle the holes in mke2fs created
+ # images.
+ if imgdiff:
+ try:
+ patch = compute_patch(src_file, tgt_file, imgdiff=False)
+ message.append(
+ "Fell back and generated with bsdiff instead for %s" % (
+ xf.tgt_name,))
+ xf.style = "bsdiff"
+ with lock:
+ warning_messages.extend(message)
+ del message[:]
+ except ValueError as e:
+ message.append(
+ "Also failed to generate with bsdiff for %s:\n%s" % (
+ xf.tgt_name, e.message))
+
+ if message:
+ with lock:
+ error_messages.extend(message)
with lock:
patches[patch_index] = (xf_index, patch)
@@ -1167,12 +1194,9 @@
Compared to the fixed 1024-block limit, it reduces the overall package
size by 30% for volantis, and 20% for angler and bullhead."""
+ assert style == "diff"
# Possibly split large files into smaller chunks.
pieces = 0
- cache_size = common.OPTIONS.cache_size
- split_threshold = 0.125
- max_blocks_per_transfer = int(cache_size * split_threshold /
- self.tgt.blocksize)
# Change nothing for small files.
if (tgt_ranges.size() <= max_blocks_per_transfer and
@@ -1182,6 +1206,14 @@
style, by_id)
return
+ if tgt_name.split(".")[-1].lower() in ("apk", "jar", "zip"):
+ split_enable = (not self.disable_imgdiff and src_ranges.monotonic and
+ tgt_ranges.monotonic)
+ if split_enable and (self.tgt.RangeSha1(tgt_ranges) !=
+ self.src.RangeSha1(src_ranges)):
+ large_apks.append((tgt_name, src_name, tgt_ranges, src_ranges))
+ return
+
while (tgt_ranges.size() > max_blocks_per_transfer and
src_ranges.size() > max_blocks_per_transfer):
tgt_split_name = "%s-%d" % (tgt_name, pieces)
@@ -1275,8 +1307,136 @@
AddSplitTransfers(
tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
+ def ParseAndValidateSplitInfo(patch_size, tgt_ranges, src_ranges,
+ split_info):
+ """Parse the split_info and return a list of info tuples.
+
+ Args:
+ patch_size: total size of the patch file.
+ tgt_ranges: Ranges of the target file within the original image.
+ src_ranges: Ranges of the source file within the original image.
+ split_info format:
+ imgdiff version#
+ count of pieces
+ <patch_size_1> <tgt_size_1> <src_ranges_1>
+ ...
+ <patch_size_n> <tgt_size_n> <src_ranges_n>
+
+ Returns:
+ [patch_start, patch_len, split_tgt_ranges, split_src_ranges]
+ """
+
+ version = int(split_info[0])
+ assert version == 2
+ count = int(split_info[1])
+ assert len(split_info) - 2 == count
+
+ split_info_list = []
+ patch_start = 0
+ tgt_remain = copy.deepcopy(tgt_ranges)
+ # each line has the format <patch_size>, <tgt_size>, <src_ranges>
+ for line in split_info[2:]:
+ info = line.split()
+ assert len(info) == 3
+ patch_length = int(info[0])
+
+ split_tgt_size = int(info[1])
+ assert split_tgt_size % 4096 == 0
+ assert split_tgt_size / 4096 <= tgt_remain.size()
+ split_tgt_ranges = tgt_remain.first(split_tgt_size / 4096)
+ tgt_remain = tgt_remain.subtract(split_tgt_ranges)
+
+ # Find the split_src_ranges within the image file from its relative
+ # position in file.
+ split_src_indices = RangeSet.parse_raw(info[2])
+ split_src_ranges = RangeSet()
+ for r in split_src_indices:
+ curr_range = src_ranges.first(r[1]).subtract(src_ranges.first(r[0]))
+ assert not split_src_ranges.overlaps(curr_range)
+ split_src_ranges = split_src_ranges.union(curr_range)
+
+ split_info_list.append((patch_start, patch_length,
+ split_tgt_ranges, split_src_ranges))
+ patch_start += patch_length
+
+ # Check that the sizes of all the split pieces add up to the final file
+ # size for patch and target.
+ assert tgt_remain.size() == 0
+ assert patch_start == patch_size
+ return split_info_list
+
+ def AddSplitTransferForLargeApks():
+ """Create split transfers for large apk files.
+
+ Example: Chrome.apk will be split into
+ src-0: Chrome.apk-0, tgt-0: Chrome.apk-0
+ src-1: Chrome.apk-1, tgt-1: Chrome.apk-1
+ ...
+
+ After the split, the target pieces are continuous and block aligned; and
+ the source pieces are mutually exclusive. During the split, we also
+ generate and save the image patch between src-X & tgt-X. This patch will
+ be valid because the block ranges of src-X & tgt-X will always stay the
+ same afterwards; but there's a chance we don't use the patch if we
+ convert the "diff" command into "new" or "move" later.
+ """
+
+ while True:
+ with transfer_lock:
+ if not large_apks:
+ return
+ tgt_name, src_name, tgt_ranges, src_ranges = large_apks.pop(0)
+
+ src_file = common.MakeTempFile(prefix="src-")
+ tgt_file = common.MakeTempFile(prefix="tgt-")
+ with transfer_lock:
+ with open(src_file, "wb") as src_fd:
+ self.src.WriteRangeDataToFd(src_ranges, src_fd)
+ with open(tgt_file, "wb") as tgt_fd:
+ self.tgt.WriteRangeDataToFd(tgt_ranges, tgt_fd)
+
+ patch_file = common.MakeTempFile(prefix="patch-")
+ patch_info_file = common.MakeTempFile(prefix="split_info-")
+ cmd = ["imgdiff", "-z",
+ "--block-limit={}".format(max_blocks_per_transfer),
+ "--split-info=" + patch_info_file,
+ src_file, tgt_file, patch_file]
+ p = common.Run(cmd, stdout=subprocess.PIPE)
+ p.communicate()
+ # TODO(xunchang) fall back to the normal split if imgdiff fails.
+ if p.returncode != 0:
+ raise ValueError("Failed to create patch between {} and {}".format(
+ src_name, tgt_name))
+
+ with open(patch_info_file) as patch_info:
+ lines = patch_info.readlines()
+
+ patch_size_total = os.path.getsize(patch_file)
+ split_info_list = ParseAndValidateSplitInfo(patch_size_total,
+ tgt_ranges, src_ranges,
+ lines)
+ for index, (patch_start, patch_length, split_tgt_ranges,
+ split_src_ranges) in enumerate(split_info_list):
+ with open(patch_file) as f:
+ f.seek(patch_start)
+ patch_content = f.read(patch_length)
+
+ split_src_name = "{}-{}".format(src_name, index)
+ split_tgt_name = "{}-{}".format(tgt_name, index)
+ transfer_split = Transfer(split_tgt_name, split_src_name,
+ split_tgt_ranges, split_src_ranges,
+ self.tgt.RangeSha1(split_tgt_ranges),
+ self.src.RangeSha1(split_src_ranges),
+ "diff", self.transfers)
+ transfer_split.patch = patch_content
+
print("Finding transfers...")
+ large_apks = []
+ cache_size = common.OPTIONS.cache_size
+ split_threshold = 0.125
+ max_blocks_per_transfer = int(cache_size * split_threshold /
+ self.tgt.blocksize)
empty = RangeSet()
for tgt_fn, tgt_ranges in self.tgt.file_map.items():
if tgt_fn == "__ZERO":
@@ -1321,6 +1481,14 @@
AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
+ transfer_lock = threading.Lock()
+ threads = [threading.Thread(target=AddSplitTransferForLargeApks)
+ for _ in range(self.threads)]
+ for th in threads:
+ th.start()
+ while threads:
+ threads.pop().join()
+
def AbbreviateSourceNames(self):
for k in self.src.file_map.keys():
b = os.path.basename(k)