blob: 72f065d19d6e7cf136d2d51a8f8458d88ae7a173 [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015from __future__ import print_function
16
Doug Zongker6ab2a502016-02-09 08:28:09 -080017import array
Tianjie Xu25366072017-09-08 17:19:02 -070018import copy
Doug Zongker6ab2a502016-02-09 08:28:09 -080019import functools
Doug Zongker62338182014-09-08 08:29:55 -070020import heapq
Doug Zongkerfc44a512014-08-26 13:10:25 -070021import itertools
Tao Bao32fcdab2018-10-12 10:30:39 -070022import logging
Doug Zongkerfc44a512014-08-26 13:10:25 -070023import multiprocessing
24import os
Tao Bao3a2e3502016-12-28 09:14:39 -080025import os.path
Doug Zongkerfc44a512014-08-26 13:10:25 -070026import re
Tao Bao183e56e2017-03-05 17:05:09 -080027import sys
Doug Zongkerfc44a512014-08-26 13:10:25 -070028import threading
xunchang21531832018-12-06 14:20:05 -080029import zlib
30from collections import deque, namedtuple, OrderedDict
Tao Bao508b0872018-02-09 13:44:43 -080031
32import common
Tianjie Xu41976c72019-07-03 13:57:01 -070033from images import EmptyImage
Dan Albert8b72aef2015-03-23 19:13:21 -070034from rangelib import RangeSet
35
Tianjie Xu41976c72019-07-03 13:57:01 -070036__all__ = ["BlockImageDiff"]
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070037
Tao Bao32fcdab2018-10-12 10:30:39 -070038logger = logging.getLogger(__name__)
39
xunchang21531832018-12-06 14:20:05 -080040# The tuple contains the style and bytes of a bsdiff|imgdiff patch.
41PatchInfo = namedtuple("PatchInfo", ["imgdiff", "content"])
42
Dan Albert8b72aef2015-03-23 19:13:21 -070043
Tao Bao183e56e2017-03-05 17:05:09 -080044def compute_patch(srcfile, tgtfile, imgdiff=False):
xunchang21531832018-12-06 14:20:05 -080045 """Calls bsdiff|imgdiff to compute the patch data, returns a PatchInfo."""
Tianjie Xub59c17f2016-10-28 17:55:53 -070046 patchfile = common.MakeTempFile(prefix='patch-')
Doug Zongkerfc44a512014-08-26 13:10:25 -070047
Tianjie Xub59c17f2016-10-28 17:55:53 -070048 cmd = ['imgdiff', '-z'] if imgdiff else ['bsdiff']
49 cmd.extend([srcfile, tgtfile, patchfile])
Doug Zongkerfc44a512014-08-26 13:10:25 -070050
Tao Bao39451582017-05-04 11:10:47 -070051 # Don't dump the bsdiff/imgdiff commands, which are not useful for the case
52 # here, since they contain temp filenames only.
Tao Bao73dd4f42018-10-04 16:25:33 -070053 proc = common.Run(cmd, verbose=False)
54 output, _ = proc.communicate()
Doug Zongkerfc44a512014-08-26 13:10:25 -070055
Tao Bao73dd4f42018-10-04 16:25:33 -070056 if proc.returncode != 0:
Tianjie Xub59c17f2016-10-28 17:55:53 -070057 raise ValueError(output)
58
59 with open(patchfile, 'rb') as f:
xunchang21531832018-12-06 14:20:05 -080060 return PatchInfo(imgdiff, f.read())
Doug Zongkerfc44a512014-08-26 13:10:25 -070061
Dan Albert8b72aef2015-03-23 19:13:21 -070062
Doug Zongkerfc44a512014-08-26 13:10:25 -070063class Transfer(object):
Tao Bao183e56e2017-03-05 17:05:09 -080064 def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, tgt_sha1,
65 src_sha1, style, by_id):
Doug Zongkerfc44a512014-08-26 13:10:25 -070066 self.tgt_name = tgt_name
67 self.src_name = src_name
68 self.tgt_ranges = tgt_ranges
69 self.src_ranges = src_ranges
Tao Bao183e56e2017-03-05 17:05:09 -080070 self.tgt_sha1 = tgt_sha1
71 self.src_sha1 = src_sha1
Doug Zongkerfc44a512014-08-26 13:10:25 -070072 self.style = style
Tao Baob8c87172015-03-19 19:42:12 -070073
74 # We use OrderedDict rather than dict so that the output is repeatable;
75 # otherwise it would depend on the hash values of the Transfer objects.
76 self.goes_before = OrderedDict()
77 self.goes_after = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -070078
Doug Zongker62338182014-09-08 08:29:55 -070079 self.stash_before = []
80 self.use_stash = []
81
Doug Zongkerfc44a512014-08-26 13:10:25 -070082 self.id = len(by_id)
83 by_id.append(self)
84
xunchang21531832018-12-06 14:20:05 -080085 self._patch_info = None
Tianjie Xu25366072017-09-08 17:19:02 -070086
87 @property
xunchang21531832018-12-06 14:20:05 -080088 def patch_info(self):
89 return self._patch_info
Tianjie Xu25366072017-09-08 17:19:02 -070090
xunchang21531832018-12-06 14:20:05 -080091 @patch_info.setter
92 def patch_info(self, info):
93 if info:
Tianjie Xu25366072017-09-08 17:19:02 -070094 assert self.style == "diff"
xunchang21531832018-12-06 14:20:05 -080095 self._patch_info = info
Tianjie Xu25366072017-09-08 17:19:02 -070096
Doug Zongker62338182014-09-08 08:29:55 -070097 def NetStashChange(self):
98 return (sum(sr.size() for (_, sr) in self.stash_before) -
99 sum(sr.size() for (_, sr) in self.use_stash))
100
Tao Bao82c47982015-08-17 09:45:13 -0700101 def ConvertToNew(self):
102 assert self.style != "new"
103 self.use_stash = []
104 self.style = "new"
105 self.src_ranges = RangeSet()
xunchang21531832018-12-06 14:20:05 -0800106 self.patch_info = None
Tao Bao82c47982015-08-17 09:45:13 -0700107
Doug Zongkerfc44a512014-08-26 13:10:25 -0700108 def __str__(self):
109 return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
110 " to " + str(self.tgt_ranges) + ">")
111
112
Doug Zongker6ab2a502016-02-09 08:28:09 -0800113@functools.total_ordering
114class HeapItem(object):
115 def __init__(self, item):
116 self.item = item
Tao Bao186ec992017-12-23 11:50:52 -0800117 # Negate the score since python's heap is a min-heap and we want the
118 # maximum score.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800119 self.score = -item.score
Tao Bao186ec992017-12-23 11:50:52 -0800120
Doug Zongker6ab2a502016-02-09 08:28:09 -0800121 def clear(self):
122 self.item = None
Tao Bao186ec992017-12-23 11:50:52 -0800123
Doug Zongker6ab2a502016-02-09 08:28:09 -0800124 def __bool__(self):
Tao Bao186ec992017-12-23 11:50:52 -0800125 return self.item is not None
126
127 # Python 2 uses __nonzero__, while Python 3 uses __bool__.
128 __nonzero__ = __bool__
129
130 # The rest operations are generated by functools.total_ordering decorator.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800131 def __eq__(self, other):
132 return self.score == other.score
Tao Bao186ec992017-12-23 11:50:52 -0800133
Doug Zongker6ab2a502016-02-09 08:28:09 -0800134 def __le__(self, other):
135 return self.score <= other.score
136
137
Tao Bao294651d2018-02-08 23:21:52 -0800138class ImgdiffStats(object):
139 """A class that collects imgdiff stats.
140
141 It keeps track of the files that will be applied imgdiff while generating
142 BlockImageDiff. It also logs the ones that cannot use imgdiff, with specific
143 reasons. The stats is only meaningful when imgdiff not being disabled by the
144 caller of BlockImageDiff. In addition, only files with supported types
145 (BlockImageDiff.FileTypeSupportedByImgdiff()) are allowed to be logged.
Tao Bao294651d2018-02-08 23:21:52 -0800146 """
147
148 USED_IMGDIFF = "APK files diff'd with imgdiff"
149 USED_IMGDIFF_LARGE_APK = "Large APK files split and diff'd with imgdiff"
150
151 # Reasons for not applying imgdiff on APKs.
Tao Bao294651d2018-02-08 23:21:52 -0800152 SKIPPED_NONMONOTONIC = "Not used imgdiff due to having non-monotonic ranges"
Tao Baoe709b092018-02-07 12:40:00 -0800153 SKIPPED_SHARED_BLOCKS = "Not used imgdiff due to using shared blocks"
Tao Bao4ccea852018-02-06 15:16:41 -0800154 SKIPPED_INCOMPLETE = "Not used imgdiff due to incomplete RangeSet"
Tao Bao294651d2018-02-08 23:21:52 -0800155
156 # The list of valid reasons, which will also be the dumped order in a report.
157 REASONS = (
158 USED_IMGDIFF,
159 USED_IMGDIFF_LARGE_APK,
Tao Bao294651d2018-02-08 23:21:52 -0800160 SKIPPED_NONMONOTONIC,
Tao Baoe709b092018-02-07 12:40:00 -0800161 SKIPPED_SHARED_BLOCKS,
Tao Bao4ccea852018-02-06 15:16:41 -0800162 SKIPPED_INCOMPLETE,
Tao Bao294651d2018-02-08 23:21:52 -0800163 )
164
165 def __init__(self):
166 self.stats = {}
167
168 def Log(self, filename, reason):
169 """Logs why imgdiff can or cannot be applied to the given filename.
170
171 Args:
172 filename: The filename string.
173 reason: One of the reason constants listed in REASONS.
174
175 Raises:
176 AssertionError: On unsupported filetypes or invalid reason.
177 """
178 assert BlockImageDiff.FileTypeSupportedByImgdiff(filename)
179 assert reason in self.REASONS
180
181 if reason not in self.stats:
182 self.stats[reason] = set()
183 self.stats[reason].add(filename)
184
185 def Report(self):
186 """Prints a report of the collected imgdiff stats."""
187
188 def print_header(header, separator):
Tao Bao32fcdab2018-10-12 10:30:39 -0700189 logger.info(header)
Tao Baob8131202019-06-19 14:15:34 -0700190 logger.info('%s\n', separator * len(header))
Tao Bao294651d2018-02-08 23:21:52 -0800191
192 print_header(' Imgdiff Stats Report ', '=')
193 for key in self.REASONS:
194 if key not in self.stats:
195 continue
196 values = self.stats[key]
197 section_header = ' {} (count: {}) '.format(key, len(values))
198 print_header(section_header, '-')
Tao Bao32fcdab2018-10-12 10:30:39 -0700199 logger.info(''.join([' {}\n'.format(name) for name in values]))
Tao Bao294651d2018-02-08 23:21:52 -0800200
201
Doug Zongkerfc44a512014-08-26 13:10:25 -0700202class BlockImageDiff(object):
Tao Bao76def242017-11-21 09:25:31 -0800203 """Generates the diff of two block image objects.
204
205 BlockImageDiff works on two image objects. An image object is anything that
206 provides the following attributes:
207
208 blocksize: the size in bytes of a block, currently must be 4096.
209
210 total_blocks: the total size of the partition/image, in blocks.
211
212 care_map: a RangeSet containing which blocks (in the range [0,
213 total_blocks) we actually care about; i.e. which blocks contain data.
214
215 file_map: a dict that partitions the blocks contained in care_map into
216 smaller domains that are useful for doing diffs on. (Typically a domain
217 is a file, and the key in file_map is the pathname.)
218
219 clobbered_blocks: a RangeSet containing which blocks contain data but may
220 be altered by the FS. They need to be excluded when verifying the
221 partition integrity.
222
223 ReadRangeSet(): a function that takes a RangeSet and returns the data
224 contained in the image blocks of that RangeSet. The data is returned as
225 a list or tuple of strings; concatenating the elements together should
226 produce the requested data. Implementations are free to break up the
227 data into list/tuple elements in any way that is convenient.
228
229 RangeSha1(): a function that returns (as a hex string) the SHA-1 hash of
230 all the data in the specified range.
231
232 TotalSha1(): a function that returns (as a hex string) the SHA-1 hash of
233 all the data in the image (ie, all the blocks in the care_map minus
234 clobbered_blocks, or including the clobbered blocks if
235 include_clobbered_blocks is True).
236
237 When creating a BlockImageDiff, the src image may be None, in which case the
238 list of transfers produced will never read from the original image.
239 """
240
Tao Bao293fd132016-06-11 12:19:23 -0700241 def __init__(self, tgt, src=None, threads=None, version=4,
242 disable_imgdiff=False):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700243 if threads is None:
244 threads = multiprocessing.cpu_count() // 2
Dan Albert8b72aef2015-03-23 19:13:21 -0700245 if threads == 0:
246 threads = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700247 self.threads = threads
Doug Zongker62338182014-09-08 08:29:55 -0700248 self.version = version
Dan Albert8b72aef2015-03-23 19:13:21 -0700249 self.transfers = []
250 self.src_basenames = {}
251 self.src_numpatterns = {}
Tao Baob4cfca52016-02-04 14:26:02 -0800252 self._max_stashed_size = 0
Tao Baod522bdc2016-04-12 15:53:16 -0700253 self.touched_src_ranges = RangeSet()
254 self.touched_src_sha1 = None
Tao Bao293fd132016-06-11 12:19:23 -0700255 self.disable_imgdiff = disable_imgdiff
Tao Bao294651d2018-02-08 23:21:52 -0800256 self.imgdiff_stats = ImgdiffStats() if not disable_imgdiff else None
Doug Zongker62338182014-09-08 08:29:55 -0700257
Tao Bao8fad03e2017-03-01 14:36:26 -0800258 assert version in (3, 4)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700259
260 self.tgt = tgt
261 if src is None:
262 src = EmptyImage()
263 self.src = src
264
265 # The updater code that installs the patch always uses 4k blocks.
266 assert tgt.blocksize == 4096
267 assert src.blocksize == 4096
268
269 # The range sets in each filemap should comprise a partition of
270 # the care map.
271 self.AssertPartition(src.care_map, src.file_map.values())
272 self.AssertPartition(tgt.care_map, tgt.file_map.values())
273
Tao Baob4cfca52016-02-04 14:26:02 -0800274 @property
275 def max_stashed_size(self):
276 return self._max_stashed_size
277
Tao Baocb73aed2018-01-31 17:32:40 -0800278 @staticmethod
279 def FileTypeSupportedByImgdiff(filename):
280 """Returns whether the file type is supported by imgdiff."""
281 return filename.lower().endswith(('.apk', '.jar', '.zip'))
282
Tao Bao294651d2018-02-08 23:21:52 -0800283 def CanUseImgdiff(self, name, tgt_ranges, src_ranges, large_apk=False):
Tao Baocb73aed2018-01-31 17:32:40 -0800284 """Checks whether we can apply imgdiff for the given RangeSets.
285
286 For files in ZIP format (e.g., APKs, JARs, etc.) we would like to use
287 'imgdiff -z' if possible. Because it usually produces significantly smaller
288 patches than bsdiff.
289
290 This is permissible if all of the following conditions hold.
291 - The imgdiff hasn't been disabled by the caller (e.g. squashfs);
292 - The file type is supported by imgdiff;
293 - The source and target blocks are monotonic (i.e. the data is stored with
294 blocks in increasing order);
Tao Baoe709b092018-02-07 12:40:00 -0800295 - Both files don't contain shared blocks;
Tao Bao4ccea852018-02-06 15:16:41 -0800296 - Both files have complete lists of blocks;
Tao Baocb73aed2018-01-31 17:32:40 -0800297 - We haven't removed any blocks from the source set.
298
299 If all these conditions are satisfied, concatenating all the blocks in the
300 RangeSet in order will produce a valid ZIP file (plus possibly extra zeros
301 in the last block). imgdiff is fine with extra zeros at the end of the file.
302
303 Args:
304 name: The filename to be diff'd.
305 tgt_ranges: The target RangeSet.
306 src_ranges: The source RangeSet.
Tao Bao294651d2018-02-08 23:21:52 -0800307 large_apk: Whether this is to split a large APK.
Tao Baocb73aed2018-01-31 17:32:40 -0800308
309 Returns:
310 A boolean result.
311 """
Tao Bao508b0872018-02-09 13:44:43 -0800312 if self.disable_imgdiff or not self.FileTypeSupportedByImgdiff(name):
Tao Bao294651d2018-02-08 23:21:52 -0800313 return False
314
315 if not tgt_ranges.monotonic or not src_ranges.monotonic:
316 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_NONMONOTONIC)
317 return False
318
Tao Baoe709b092018-02-07 12:40:00 -0800319 if (tgt_ranges.extra.get('uses_shared_blocks') or
320 src_ranges.extra.get('uses_shared_blocks')):
321 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_SHARED_BLOCKS)
322 return False
323
Tao Bao4ccea852018-02-06 15:16:41 -0800324 if tgt_ranges.extra.get('incomplete') or src_ranges.extra.get('incomplete'):
325 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_INCOMPLETE)
326 return False
327
Tao Bao294651d2018-02-08 23:21:52 -0800328 reason = (ImgdiffStats.USED_IMGDIFF_LARGE_APK if large_apk
329 else ImgdiffStats.USED_IMGDIFF)
330 self.imgdiff_stats.Log(name, reason)
331 return True
Tao Baocb73aed2018-01-31 17:32:40 -0800332
Doug Zongkerfc44a512014-08-26 13:10:25 -0700333 def Compute(self, prefix):
334 # When looking for a source file to use as the diff input for a
335 # target file, we try:
336 # 1) an exact path match if available, otherwise
337 # 2) a exact basename match if available, otherwise
338 # 3) a basename match after all runs of digits are replaced by
339 # "#" if available, otherwise
340 # 4) we have no source for this target.
341 self.AbbreviateSourceNames()
342 self.FindTransfers()
343
xunchang21531832018-12-06 14:20:05 -0800344 self.FindSequenceForTransfers()
Doug Zongker62338182014-09-08 08:29:55 -0700345
Tao Bao82c47982015-08-17 09:45:13 -0700346 # Ensure the runtime stash size is under the limit.
Tao Bao8fad03e2017-03-01 14:36:26 -0800347 if common.OPTIONS.cache_size is not None:
xunchang3df4d5e2018-12-06 15:03:45 -0800348 stash_limit = (common.OPTIONS.cache_size *
349 common.OPTIONS.stash_threshold / self.tgt.blocksize)
350 # Ignore the stash limit and calculate the maximum simultaneously stashed
351 # blocks needed.
352 _, max_stashed_blocks = self.ReviseStashSize(ignore_stash_limit=True)
353
354 # We cannot stash more blocks than the stash limit simultaneously. As a
355 # result, some 'diff' commands will be converted to new; leading to an
356 # unintended large package. To mitigate this issue, we can carefully
357 # choose the transfers for conversion. The number '1024' can be further
358 # tweaked here to balance the package size and build time.
359 if max_stashed_blocks > stash_limit + 1024:
xunchangb6105dc2018-12-06 16:39:46 -0800360 self.SelectAndConvertDiffTransfersToNew(
361 max_stashed_blocks - stash_limit)
xunchang3df4d5e2018-12-06 15:03:45 -0800362 # Regenerate the sequence as the graph has changed.
363 self.FindSequenceForTransfers()
364
365 # Revise the stash size again to keep the size under limit.
Tao Bao82c47982015-08-17 09:45:13 -0700366 self.ReviseStashSize()
367
Doug Zongkerfc44a512014-08-26 13:10:25 -0700368 # Double-check our work.
369 self.AssertSequenceGood()
Tianjie Xu8a7ed9f2018-01-23 14:06:11 -0800370 self.AssertSha1Good()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700371
372 self.ComputePatches(prefix)
373 self.WriteTransfers(prefix)
374
Tao Bao294651d2018-02-08 23:21:52 -0800375 # Report the imgdiff stats.
Tao Bao32fcdab2018-10-12 10:30:39 -0700376 if not self.disable_imgdiff:
Tao Bao294651d2018-02-08 23:21:52 -0800377 self.imgdiff_stats.Report()
378
Doug Zongkerfc44a512014-08-26 13:10:25 -0700379 def WriteTransfers(self, prefix):
Tianjie Xu37e29302016-06-23 16:10:35 -0700380 def WriteSplitTransfers(out, style, target_blocks):
381 """Limit the size of operand in command 'new' and 'zero' to 1024 blocks.
Tianjie Xubb848c52016-06-21 15:54:09 -0700382
383 This prevents the target size of one command from being too large; and
384 might help to avoid fsync errors on some devices."""
385
Tao Bao3a2e3502016-12-28 09:14:39 -0800386 assert style == "new" or style == "zero"
Tianjie Xu37e29302016-06-23 16:10:35 -0700387 blocks_limit = 1024
Tianjie Xubb848c52016-06-21 15:54:09 -0700388 total = 0
Tianjie Xu37e29302016-06-23 16:10:35 -0700389 while target_blocks:
390 blocks_to_write = target_blocks.first(blocks_limit)
391 out.append("%s %s\n" % (style, blocks_to_write.to_string_raw()))
392 total += blocks_to_write.size()
393 target_blocks = target_blocks.subtract(blocks_to_write)
Tianjie Xubb848c52016-06-21 15:54:09 -0700394 return total
395
Doug Zongkerfc44a512014-08-26 13:10:25 -0700396 out = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700397 total = 0
Doug Zongkerfc44a512014-08-26 13:10:25 -0700398
Tao Bao3a2e3502016-12-28 09:14:39 -0800399 # In BBOTA v3+, it uses the hash of the stashed blocks as the stash slot
400 # id. 'stashes' records the map from 'hash' to the ref count. The stash
401 # will be freed only if the count decrements to zero.
Doug Zongker62338182014-09-08 08:29:55 -0700402 stashes = {}
403 stashed_blocks = 0
404 max_stashed_blocks = 0
405
Doug Zongkerfc44a512014-08-26 13:10:25 -0700406 for xf in self.transfers:
407
Tao Bao8fad03e2017-03-01 14:36:26 -0800408 for _, sr in xf.stash_before:
409 sh = self.src.RangeSha1(sr)
410 if sh in stashes:
411 stashes[sh] += 1
Sami Tolvanendd67a292014-12-09 16:40:34 +0000412 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800413 stashes[sh] = 1
414 stashed_blocks += sr.size()
415 self.touched_src_ranges = self.touched_src_ranges.union(sr)
416 out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
Doug Zongker62338182014-09-08 08:29:55 -0700417
418 if stashed_blocks > max_stashed_blocks:
419 max_stashed_blocks = stashed_blocks
420
Jesse Zhao7b985f62015-03-02 16:53:08 -0800421 free_string = []
caozhiyuan21b37d82015-10-21 15:14:03 +0800422 free_size = 0
Jesse Zhao7b985f62015-03-02 16:53:08 -0800423
Tao Bao8fad03e2017-03-01 14:36:26 -0800424 # <# blocks> <src ranges>
425 # OR
426 # <# blocks> <src ranges> <src locs> <stash refs...>
427 # OR
428 # <# blocks> - <stash refs...>
Doug Zongker62338182014-09-08 08:29:55 -0700429
Tao Bao8fad03e2017-03-01 14:36:26 -0800430 size = xf.src_ranges.size()
Tao Bao508b0872018-02-09 13:44:43 -0800431 src_str_buffer = [str(size)]
Doug Zongker62338182014-09-08 08:29:55 -0700432
Tao Bao8fad03e2017-03-01 14:36:26 -0800433 unstashed_src_ranges = xf.src_ranges
434 mapped_stashes = []
435 for _, sr in xf.use_stash:
436 unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
437 sh = self.src.RangeSha1(sr)
438 sr = xf.src_ranges.map_within(sr)
439 mapped_stashes.append(sr)
440 assert sh in stashes
Tao Bao508b0872018-02-09 13:44:43 -0800441 src_str_buffer.append("%s:%s" % (sh, sr.to_string_raw()))
Tao Bao8fad03e2017-03-01 14:36:26 -0800442 stashes[sh] -= 1
443 if stashes[sh] == 0:
444 free_string.append("free %s\n" % (sh,))
445 free_size += sr.size()
446 stashes.pop(sh)
Doug Zongker62338182014-09-08 08:29:55 -0700447
Tao Bao8fad03e2017-03-01 14:36:26 -0800448 if unstashed_src_ranges:
Tao Bao508b0872018-02-09 13:44:43 -0800449 src_str_buffer.insert(1, unstashed_src_ranges.to_string_raw())
Tao Bao8fad03e2017-03-01 14:36:26 -0800450 if xf.use_stash:
451 mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
Tao Bao508b0872018-02-09 13:44:43 -0800452 src_str_buffer.insert(2, mapped_unstashed.to_string_raw())
Tao Bao8fad03e2017-03-01 14:36:26 -0800453 mapped_stashes.append(mapped_unstashed)
Doug Zongker62338182014-09-08 08:29:55 -0700454 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Tao Bao8fad03e2017-03-01 14:36:26 -0800455 else:
Tao Bao508b0872018-02-09 13:44:43 -0800456 src_str_buffer.insert(1, "-")
Tao Bao8fad03e2017-03-01 14:36:26 -0800457 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Doug Zongker62338182014-09-08 08:29:55 -0700458
Tao Bao508b0872018-02-09 13:44:43 -0800459 src_str = " ".join(src_str_buffer)
Doug Zongker62338182014-09-08 08:29:55 -0700460
Tao Bao8fad03e2017-03-01 14:36:26 -0800461 # version 3+:
Doug Zongker62338182014-09-08 08:29:55 -0700462 # zero <rangeset>
463 # new <rangeset>
464 # erase <rangeset>
Dan Albert8b72aef2015-03-23 19:13:21 -0700465 # bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
466 # imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
467 # move hash <tgt rangeset> <src_str>
Doug Zongkerfc44a512014-08-26 13:10:25 -0700468
469 tgt_size = xf.tgt_ranges.size()
470
471 if xf.style == "new":
472 assert xf.tgt_ranges
Tianjie Xu37e29302016-06-23 16:10:35 -0700473 assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700474 total += tgt_size
475 elif xf.style == "move":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700476 assert xf.tgt_ranges
477 assert xf.src_ranges.size() == tgt_size
478 if xf.src_ranges != xf.tgt_ranges:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100479 # take into account automatic stashing of overlapping blocks
480 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700481 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100482 if temp_stash_usage > max_stashed_blocks:
483 max_stashed_blocks = temp_stash_usage
484
Tao Baod522bdc2016-04-12 15:53:16 -0700485 self.touched_src_ranges = self.touched_src_ranges.union(
486 xf.src_ranges)
487
Tao Bao8fad03e2017-03-01 14:36:26 -0800488 out.append("%s %s %s %s\n" % (
Sami Tolvanendd67a292014-12-09 16:40:34 +0000489 xf.style,
Tao Bao183e56e2017-03-05 17:05:09 -0800490 xf.tgt_sha1,
Dan Albert8b72aef2015-03-23 19:13:21 -0700491 xf.tgt_ranges.to_string_raw(), src_str))
Tao Bao8fad03e2017-03-01 14:36:26 -0800492 total += tgt_size
493 elif xf.style in ("bsdiff", "imgdiff"):
494 assert xf.tgt_ranges
495 assert xf.src_ranges
496 # take into account automatic stashing of overlapping blocks
497 if xf.src_ranges.overlaps(xf.tgt_ranges):
498 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
499 if temp_stash_usage > max_stashed_blocks:
500 max_stashed_blocks = temp_stash_usage
501
502 self.touched_src_ranges = self.touched_src_ranges.union(xf.src_ranges)
503
504 out.append("%s %d %d %s %s %s %s\n" % (
505 xf.style,
506 xf.patch_start, xf.patch_len,
507 xf.src_sha1,
508 xf.tgt_sha1,
509 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700510 total += tgt_size
511 elif xf.style == "zero":
512 assert xf.tgt_ranges
513 to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
Tianjie Xu37e29302016-06-23 16:10:35 -0700514 assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size()
Tianjie Xubb848c52016-06-21 15:54:09 -0700515 total += to_zero.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700516 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700517 raise ValueError("unknown transfer style '%s'\n" % xf.style)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700518
Sami Tolvanendd67a292014-12-09 16:40:34 +0000519 if free_string:
520 out.append("".join(free_string))
caozhiyuan21b37d82015-10-21 15:14:03 +0800521 stashed_blocks -= free_size
Sami Tolvanendd67a292014-12-09 16:40:34 +0000522
Tao Bao8fad03e2017-03-01 14:36:26 -0800523 if common.OPTIONS.cache_size is not None:
Tao Bao8dcf7382015-05-21 14:09:49 -0700524 # Sanity check: abort if we're going to need more stash space than
525 # the allowed size (cache_size * threshold). There are two purposes
526 # of having a threshold here. a) Part of the cache may have been
527 # occupied by some recovery logs. b) It will buy us some time to deal
528 # with the oversize issue.
529 cache_size = common.OPTIONS.cache_size
530 stash_threshold = common.OPTIONS.stash_threshold
531 max_allowed = cache_size * stash_threshold
Tao Baoe8c68a02017-02-26 10:48:11 -0800532 assert max_stashed_blocks * self.tgt.blocksize <= max_allowed, \
Tao Bao8dcf7382015-05-21 14:09:49 -0700533 'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
534 max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
535 self.tgt.blocksize, max_allowed, cache_size,
536 stash_threshold)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700537
Tao Bao8fad03e2017-03-01 14:36:26 -0800538 self.touched_src_sha1 = self.src.RangeSha1(self.touched_src_ranges)
Tao Baod522bdc2016-04-12 15:53:16 -0700539
Tianjie Xu67c7cbb2018-08-30 00:32:07 -0700540 if self.tgt.hashtree_info:
541 out.append("compute_hash_tree {} {} {} {} {}\n".format(
542 self.tgt.hashtree_info.hashtree_range.to_string_raw(),
543 self.tgt.hashtree_info.filesystem_range.to_string_raw(),
544 self.tgt.hashtree_info.hash_algorithm,
545 self.tgt.hashtree_info.salt,
546 self.tgt.hashtree_info.root_hash))
547
Tao Baoe9b61912015-07-09 17:37:49 -0700548 # Zero out extended blocks as a workaround for bug 20881595.
549 if self.tgt.extended:
Tianjie Xu37e29302016-06-23 16:10:35 -0700550 assert (WriteSplitTransfers(out, "zero", self.tgt.extended) ==
Tianjie Xubb848c52016-06-21 15:54:09 -0700551 self.tgt.extended.size())
Tao Baob32d56e2015-09-09 11:55:01 -0700552 total += self.tgt.extended.size()
Tao Baoe9b61912015-07-09 17:37:49 -0700553
554 # We erase all the blocks on the partition that a) don't contain useful
Tao Bao66f1fa62016-05-03 10:02:01 -0700555 # data in the new image; b) will not be touched by dm-verity. Out of those
556 # blocks, we erase the ones that won't be used in this update at the
557 # beginning of an update. The rest would be erased at the end. This is to
558 # work around the eMMC issue observed on some devices, which may otherwise
559 # get starving for clean blocks and thus fail the update. (b/28347095)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700560 all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
Tao Baoe9b61912015-07-09 17:37:49 -0700561 all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
562 new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
Tao Bao66f1fa62016-05-03 10:02:01 -0700563
564 erase_first = new_dontcare.subtract(self.touched_src_ranges)
565 if erase_first:
566 out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))
567
568 erase_last = new_dontcare.subtract(erase_first)
569 if erase_last:
570 out.append("erase %s\n" % (erase_last.to_string_raw(),))
Doug Zongkere985f6f2014-09-09 12:38:47 -0700571
572 out.insert(0, "%d\n" % (self.version,)) # format version number
Tao Baob32d56e2015-09-09 11:55:01 -0700573 out.insert(1, "%d\n" % (total,))
Tao Bao8fad03e2017-03-01 14:36:26 -0800574 # v3+: the number of stash slots is unused.
575 out.insert(2, "0\n")
576 out.insert(3, str(max_stashed_blocks) + "\n")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700577
Tao Baob8131202019-06-19 14:15:34 -0700578 with open(prefix + ".transfer.list", "w") as f:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700579 for i in out:
580 f.write(i)
581
Tao Bao8fad03e2017-03-01 14:36:26 -0800582 self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize
583 OPTIONS = common.OPTIONS
584 if OPTIONS.cache_size is not None:
585 max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
Tao Bao32fcdab2018-10-12 10:30:39 -0700586 logger.info(
587 "max stashed blocks: %d (%d bytes), limit: %d bytes (%.2f%%)\n",
588 max_stashed_blocks, self._max_stashed_size, max_allowed,
589 self._max_stashed_size * 100.0 / max_allowed)
Tao Bao8fad03e2017-03-01 14:36:26 -0800590 else:
Tao Bao32fcdab2018-10-12 10:30:39 -0700591 logger.info(
592 "max stashed blocks: %d (%d bytes), limit: <unknown>\n",
593 max_stashed_blocks, self._max_stashed_size)
Doug Zongker62338182014-09-08 08:29:55 -0700594
xunchang3df4d5e2018-12-06 15:03:45 -0800595 def ReviseStashSize(self, ignore_stash_limit=False):
596 """ Revises the transfers to keep the stash size within the size limit.
597
598 Iterates through the transfer list and calculates the stash size each
599 transfer generates. Converts the affected transfers to new if we reach the
600 stash limit.
601
602 Args:
603 ignore_stash_limit: Ignores the stash limit and calculates the max
604 simultaneous stashed blocks instead. No change will be made to the
605 transfer list with this flag.
606
607 Return:
608 A tuple of (tgt blocks converted to new, max stashed blocks)
609 """
Tao Bao32fcdab2018-10-12 10:30:39 -0700610 logger.info("Revising stash size...")
Tao Baoe27acfd2016-12-16 11:13:55 -0800611 stash_map = {}
Tao Bao82c47982015-08-17 09:45:13 -0700612
613 # Create the map between a stash and its def/use points. For example, for a
Tao Bao3c5a16d2017-02-13 11:42:50 -0800614 # given stash of (raw_id, sr), stash_map[raw_id] = (sr, def_cmd, use_cmd).
Tao Bao82c47982015-08-17 09:45:13 -0700615 for xf in self.transfers:
616 # Command xf defines (stores) all the stashes in stash_before.
Tao Bao3a2e3502016-12-28 09:14:39 -0800617 for stash_raw_id, sr in xf.stash_before:
618 stash_map[stash_raw_id] = (sr, xf)
Tao Bao82c47982015-08-17 09:45:13 -0700619
620 # Record all the stashes command xf uses.
Tao Bao3a2e3502016-12-28 09:14:39 -0800621 for stash_raw_id, _ in xf.use_stash:
622 stash_map[stash_raw_id] += (xf,)
Tao Bao82c47982015-08-17 09:45:13 -0700623
xunchang3df4d5e2018-12-06 15:03:45 -0800624 max_allowed_blocks = None
625 if not ignore_stash_limit:
626 # Compute the maximum blocks available for stash based on /cache size and
627 # the threshold.
628 cache_size = common.OPTIONS.cache_size
629 stash_threshold = common.OPTIONS.stash_threshold
630 max_allowed_blocks = cache_size * stash_threshold / self.tgt.blocksize
Tao Bao82c47982015-08-17 09:45:13 -0700631
Tao Bao3a2e3502016-12-28 09:14:39 -0800632 # See the comments for 'stashes' in WriteTransfers().
Tao Baoe27acfd2016-12-16 11:13:55 -0800633 stashes = {}
Tao Bao82c47982015-08-17 09:45:13 -0700634 stashed_blocks = 0
Tao Bao9a5caf22015-08-25 15:10:10 -0700635 new_blocks = 0
xunchang3df4d5e2018-12-06 15:03:45 -0800636 max_stashed_blocks = 0
Tao Bao82c47982015-08-17 09:45:13 -0700637
638 # Now go through all the commands. Compute the required stash size on the
639 # fly. If a command requires excess stash than available, it deletes the
640 # stash by replacing the command that uses the stash with a "new" command
641 # instead.
642 for xf in self.transfers:
643 replaced_cmds = []
644
645 # xf.stash_before generates explicit stash commands.
Tao Bao3a2e3502016-12-28 09:14:39 -0800646 for stash_raw_id, sr in xf.stash_before:
Tao Baoe27acfd2016-12-16 11:13:55 -0800647 # Check the post-command stashed_blocks.
648 stashed_blocks_after = stashed_blocks
Tao Bao8fad03e2017-03-01 14:36:26 -0800649 sh = self.src.RangeSha1(sr)
650 if sh not in stashes:
Tao Baoe27acfd2016-12-16 11:13:55 -0800651 stashed_blocks_after += sr.size()
Tao Baoe27acfd2016-12-16 11:13:55 -0800652
xunchang3df4d5e2018-12-06 15:03:45 -0800653 if max_allowed_blocks and stashed_blocks_after > max_allowed_blocks:
Tao Bao82c47982015-08-17 09:45:13 -0700654 # We cannot stash this one for a later command. Find out the command
655 # that will use this stash and replace the command with "new".
Tao Bao3a2e3502016-12-28 09:14:39 -0800656 use_cmd = stash_map[stash_raw_id][2]
Tao Bao82c47982015-08-17 09:45:13 -0700657 replaced_cmds.append(use_cmd)
Tao Bao32fcdab2018-10-12 10:30:39 -0700658 logger.info("%10d %9s %s", sr.size(), "explicit", use_cmd)
Tao Bao82c47982015-08-17 09:45:13 -0700659 else:
Tao Bao3c5a16d2017-02-13 11:42:50 -0800660 # Update the stashes map.
Tao Bao8fad03e2017-03-01 14:36:26 -0800661 if sh in stashes:
662 stashes[sh] += 1
Tao Bao3c5a16d2017-02-13 11:42:50 -0800663 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800664 stashes[sh] = 1
Tao Baoe27acfd2016-12-16 11:13:55 -0800665 stashed_blocks = stashed_blocks_after
xunchang3df4d5e2018-12-06 15:03:45 -0800666 max_stashed_blocks = max(max_stashed_blocks, stashed_blocks)
Tao Bao82c47982015-08-17 09:45:13 -0700667
668 # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
669 # ComputePatches(), they both have the style of "diff".
Tao Bao8fad03e2017-03-01 14:36:26 -0800670 if xf.style == "diff":
Tao Bao82c47982015-08-17 09:45:13 -0700671 assert xf.tgt_ranges and xf.src_ranges
672 if xf.src_ranges.overlaps(xf.tgt_ranges):
xunchang3df4d5e2018-12-06 15:03:45 -0800673 if (max_allowed_blocks and
674 stashed_blocks + xf.src_ranges.size() > max_allowed_blocks):
Tao Bao82c47982015-08-17 09:45:13 -0700675 replaced_cmds.append(xf)
Tao Bao32fcdab2018-10-12 10:30:39 -0700676 logger.info("%10d %9s %s", xf.src_ranges.size(), "implicit", xf)
xunchang3df4d5e2018-12-06 15:03:45 -0800677 else:
678 # The whole source ranges will be stashed for implicit stashes.
679 max_stashed_blocks = max(max_stashed_blocks,
680 stashed_blocks + xf.src_ranges.size())
Tao Bao82c47982015-08-17 09:45:13 -0700681
682 # Replace the commands in replaced_cmds with "new"s.
683 for cmd in replaced_cmds:
684 # It no longer uses any commands in "use_stash". Remove the def points
685 # for all those stashes.
Tao Bao3a2e3502016-12-28 09:14:39 -0800686 for stash_raw_id, sr in cmd.use_stash:
687 def_cmd = stash_map[stash_raw_id][1]
688 assert (stash_raw_id, sr) in def_cmd.stash_before
689 def_cmd.stash_before.remove((stash_raw_id, sr))
Tao Bao82c47982015-08-17 09:45:13 -0700690
Tianjie Xuebe39a02016-01-14 14:12:26 -0800691 # Add up blocks that violates space limit and print total number to
692 # screen later.
693 new_blocks += cmd.tgt_ranges.size()
Tao Bao82c47982015-08-17 09:45:13 -0700694 cmd.ConvertToNew()
695
Tao Bao3a2e3502016-12-28 09:14:39 -0800696 # xf.use_stash may generate free commands.
Tao Bao8fad03e2017-03-01 14:36:26 -0800697 for _, sr in xf.use_stash:
698 sh = self.src.RangeSha1(sr)
699 assert sh in stashes
700 stashes[sh] -= 1
701 if stashes[sh] == 0:
Tao Baoe27acfd2016-12-16 11:13:55 -0800702 stashed_blocks -= sr.size()
Tao Bao8fad03e2017-03-01 14:36:26 -0800703 stashes.pop(sh)
Tao Baoe27acfd2016-12-16 11:13:55 -0800704
Tianjie Xuebe39a02016-01-14 14:12:26 -0800705 num_of_bytes = new_blocks * self.tgt.blocksize
Tao Bao32fcdab2018-10-12 10:30:39 -0700706 logger.info(
707 " Total %d blocks (%d bytes) are packed as new blocks due to "
xunchangb6105dc2018-12-06 16:39:46 -0800708 "insufficient cache size. Maximum blocks stashed simultaneously: %d",
709 new_blocks, num_of_bytes, max_stashed_blocks)
xunchang3df4d5e2018-12-06 15:03:45 -0800710 return new_blocks, max_stashed_blocks
Tao Bao9a5caf22015-08-25 15:10:10 -0700711
Doug Zongkerfc44a512014-08-26 13:10:25 -0700712 def ComputePatches(self, prefix):
Tao Bao32fcdab2018-10-12 10:30:39 -0700713 logger.info("Reticulating splines...")
Tao Bao183e56e2017-03-05 17:05:09 -0800714 diff_queue = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700715 patch_num = 0
716 with open(prefix + ".new.dat", "wb") as new_f:
Tao Bao183e56e2017-03-05 17:05:09 -0800717 for index, xf in enumerate(self.transfers):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700718 if xf.style == "zero":
Tao Bao08c85832016-09-19 22:26:30 -0700719 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao32fcdab2018-10-12 10:30:39 -0700720 logger.info(
721 "%10d %10d (%6.2f%%) %7s %s %s", tgt_size, tgt_size, 100.0,
722 xf.style, xf.tgt_name, str(xf.tgt_ranges))
Tao Bao08c85832016-09-19 22:26:30 -0700723
Doug Zongkerfc44a512014-08-26 13:10:25 -0700724 elif xf.style == "new":
Tao Bao183e56e2017-03-05 17:05:09 -0800725 self.tgt.WriteRangeDataToFd(xf.tgt_ranges, new_f)
Tao Bao08c85832016-09-19 22:26:30 -0700726 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao32fcdab2018-10-12 10:30:39 -0700727 logger.info(
728 "%10d %10d (%6.2f%%) %7s %s %s", tgt_size, tgt_size, 100.0,
729 xf.style, xf.tgt_name, str(xf.tgt_ranges))
Tao Bao08c85832016-09-19 22:26:30 -0700730
Doug Zongkerfc44a512014-08-26 13:10:25 -0700731 elif xf.style == "diff":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700732 # We can't compare src and tgt directly because they may have
733 # the same content but be broken up into blocks differently, eg:
734 #
735 # ["he", "llo"] vs ["h", "ello"]
736 #
737 # We want those to compare equal, ideally without having to
738 # actually concatenate the strings (these may be tens of
739 # megabytes).
Tao Bao183e56e2017-03-05 17:05:09 -0800740 if xf.src_sha1 == xf.tgt_sha1:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700741 # These are identical; we don't need to generate a patch,
742 # just issue copy commands on the device.
743 xf.style = "move"
xunchang21531832018-12-06 14:20:05 -0800744 xf.patch_info = None
Tao Bao183e56e2017-03-05 17:05:09 -0800745 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao08c85832016-09-19 22:26:30 -0700746 if xf.src_ranges != xf.tgt_ranges:
Tao Bao32fcdab2018-10-12 10:30:39 -0700747 logger.info(
748 "%10d %10d (%6.2f%%) %7s %s %s (from %s)", tgt_size, tgt_size,
749 100.0, xf.style,
Tao Bao08c85832016-09-19 22:26:30 -0700750 xf.tgt_name if xf.tgt_name == xf.src_name else (
751 xf.tgt_name + " (from " + xf.src_name + ")"),
Tao Bao32fcdab2018-10-12 10:30:39 -0700752 str(xf.tgt_ranges), str(xf.src_ranges))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700753 else:
xunchang21531832018-12-06 14:20:05 -0800754 if xf.patch_info:
755 # We have already generated the patch (e.g. during split of large
756 # APKs or reduction of stash size)
757 imgdiff = xf.patch_info.imgdiff
Tianjie Xu25366072017-09-08 17:19:02 -0700758 else:
Tao Baocb73aed2018-01-31 17:32:40 -0800759 imgdiff = self.CanUseImgdiff(
760 xf.tgt_name, xf.tgt_ranges, xf.src_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700761 xf.style = "imgdiff" if imgdiff else "bsdiff"
Tao Bao183e56e2017-03-05 17:05:09 -0800762 diff_queue.append((index, imgdiff, patch_num))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700763 patch_num += 1
764
765 else:
766 assert False, "unknown style " + xf.style
767
xunchang21531832018-12-06 14:20:05 -0800768 patches = self.ComputePatchesForInputList(diff_queue, False)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700769
Tao Bao183e56e2017-03-05 17:05:09 -0800770 offset = 0
771 with open(prefix + ".patch.dat", "wb") as patch_fd:
xunchang21531832018-12-06 14:20:05 -0800772 for index, patch_info, _ in patches:
Tao Bao183e56e2017-03-05 17:05:09 -0800773 xf = self.transfers[index]
xunchang21531832018-12-06 14:20:05 -0800774 xf.patch_len = len(patch_info.content)
Tao Bao183e56e2017-03-05 17:05:09 -0800775 xf.patch_start = offset
776 offset += xf.patch_len
xunchang21531832018-12-06 14:20:05 -0800777 patch_fd.write(patch_info.content)
Tao Bao183e56e2017-03-05 17:05:09 -0800778
Tao Bao32fcdab2018-10-12 10:30:39 -0700779 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
780 logger.info(
781 "%10d %10d (%6.2f%%) %7s %s %s %s", xf.patch_len, tgt_size,
782 xf.patch_len * 100.0 / tgt_size, xf.style,
783 xf.tgt_name if xf.tgt_name == xf.src_name else (
784 xf.tgt_name + " (from " + xf.src_name + ")"),
785 xf.tgt_ranges, xf.src_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700786
Tianjie Xu8a7ed9f2018-01-23 14:06:11 -0800787 def AssertSha1Good(self):
788 """Check the SHA-1 of the src & tgt blocks in the transfer list.
789
790 Double check the SHA-1 value to avoid the issue in b/71908713, where
791 SparseImage.RangeSha1() messed up with the hash calculation in multi-thread
792 environment. That specific problem has been fixed by protecting the
793 underlying generator function 'SparseImage._GetRangeData()' with lock.
794 """
795 for xf in self.transfers:
796 tgt_sha1 = self.tgt.RangeSha1(xf.tgt_ranges)
797 assert xf.tgt_sha1 == tgt_sha1
798 if xf.style == "diff":
799 src_sha1 = self.src.RangeSha1(xf.src_ranges)
800 assert xf.src_sha1 == src_sha1
801
Doug Zongkerfc44a512014-08-26 13:10:25 -0700802 def AssertSequenceGood(self):
803 # Simulate the sequences of transfers we will output, and check that:
804 # - we never read a block after writing it, and
805 # - we write every block we care about exactly once.
806
807 # Start with no blocks having been touched yet.
Tao Baob8131202019-06-19 14:15:34 -0700808 touched = array.array("B", b"\0" * self.tgt.total_blocks)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700809
810 # Imagine processing the transfers in order.
811 for xf in self.transfers:
812 # Check that the input blocks for this transfer haven't yet been touched.
Doug Zongker62338182014-09-08 08:29:55 -0700813
814 x = xf.src_ranges
Tao Bao8fad03e2017-03-01 14:36:26 -0800815 for _, sr in xf.use_stash:
816 x = x.subtract(sr)
Doug Zongker62338182014-09-08 08:29:55 -0700817
Doug Zongker6ab2a502016-02-09 08:28:09 -0800818 for s, e in x:
Tao Baoff75c232016-03-04 15:23:34 -0800819 # Source image could be larger. Don't check the blocks that are in the
820 # source image only. Since they are not in 'touched', and won't ever
821 # be touched.
822 for i in range(s, min(e, self.tgt.total_blocks)):
Doug Zongker6ab2a502016-02-09 08:28:09 -0800823 assert touched[i] == 0
824
825 # Check that the output blocks for this transfer haven't yet
826 # been touched, and touch all the blocks written by this
827 # transfer.
828 for s, e in xf.tgt_ranges:
829 for i in range(s, e):
830 assert touched[i] == 0
831 touched[i] = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700832
Tianjie Xu67c7cbb2018-08-30 00:32:07 -0700833 if self.tgt.hashtree_info:
834 for s, e in self.tgt.hashtree_info.hashtree_range:
835 for i in range(s, e):
836 assert touched[i] == 0
837 touched[i] = 1
838
Doug Zongkerfc44a512014-08-26 13:10:25 -0700839 # Check that we've written every target block.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800840 for s, e in self.tgt.care_map:
841 for i in range(s, e):
842 assert touched[i] == 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700843
xunchang21531832018-12-06 14:20:05 -0800844 def FindSequenceForTransfers(self):
845 """Finds a sequence for the given transfers.
846
847 The goal is to minimize the violation of order dependencies between these
848 transfers, so that fewer blocks are stashed when applying the update.
849 """
850
851 # Clear the existing dependency between transfers
852 for xf in self.transfers:
853 xf.goes_before = OrderedDict()
854 xf.goes_after = OrderedDict()
855
856 xf.stash_before = []
857 xf.use_stash = []
858
859 # Find the ordering dependencies among transfers (this is O(n^2)
860 # in the number of transfers).
861 self.GenerateDigraph()
862 # Find a sequence of transfers that satisfies as many ordering
863 # dependencies as possible (heuristically).
864 self.FindVertexSequence()
865 # Fix up the ordering dependencies that the sequence didn't
866 # satisfy.
867 self.ReverseBackwardEdges()
868 self.ImproveVertexSequence()
869
Doug Zongker62338182014-09-08 08:29:55 -0700870 def ImproveVertexSequence(self):
Tao Bao32fcdab2018-10-12 10:30:39 -0700871 logger.info("Improving vertex order...")
Doug Zongker62338182014-09-08 08:29:55 -0700872
873 # At this point our digraph is acyclic; we reversed any edges that
874 # were backwards in the heuristically-generated sequence. The
875 # previously-generated order is still acceptable, but we hope to
876 # find a better order that needs less memory for stashed data.
877 # Now we do a topological sort to generate a new vertex order,
878 # using a greedy algorithm to choose which vertex goes next
879 # whenever we have a choice.
880
881 # Make a copy of the edge set; this copy will get destroyed by the
882 # algorithm.
883 for xf in self.transfers:
884 xf.incoming = xf.goes_after.copy()
885 xf.outgoing = xf.goes_before.copy()
886
887 L = [] # the new vertex order
888
889 # S is the set of sources in the remaining graph; we always choose
890 # the one that leaves the least amount of stashed data after it's
891 # executed.
892 S = [(u.NetStashChange(), u.order, u) for u in self.transfers
893 if not u.incoming]
894 heapq.heapify(S)
895
896 while S:
897 _, _, xf = heapq.heappop(S)
898 L.append(xf)
899 for u in xf.outgoing:
900 del u.incoming[xf]
901 if not u.incoming:
902 heapq.heappush(S, (u.NetStashChange(), u.order, u))
903
904 # if this fails then our graph had a cycle.
905 assert len(L) == len(self.transfers)
906
907 self.transfers = L
908 for i, xf in enumerate(L):
909 xf.order = i
910
Doug Zongker62338182014-09-08 08:29:55 -0700911 def ReverseBackwardEdges(self):
Tao Bao3a2e3502016-12-28 09:14:39 -0800912 """Reverse unsatisfying edges and compute pairs of stashed blocks.
913
914 For each transfer, make sure it properly stashes the blocks it touches and
915 will be used by later transfers. It uses pairs of (stash_raw_id, range) to
916 record the blocks to be stashed. 'stash_raw_id' is an id that uniquely
917 identifies each pair. Note that for the same range (e.g. RangeSet("1-5")),
918 it is possible to have multiple pairs with different 'stash_raw_id's. Each
919 'stash_raw_id' will be consumed by one transfer. In BBOTA v3+, identical
920 blocks will be written to the same stash slot in WriteTransfers().
921 """
922
Tao Bao32fcdab2018-10-12 10:30:39 -0700923 logger.info("Reversing backward edges...")
Doug Zongker62338182014-09-08 08:29:55 -0700924 in_order = 0
925 out_of_order = 0
Tao Bao3a2e3502016-12-28 09:14:39 -0800926 stash_raw_id = 0
Doug Zongker62338182014-09-08 08:29:55 -0700927 stash_size = 0
928
929 for xf in self.transfers:
Doug Zongker62338182014-09-08 08:29:55 -0700930 for u in xf.goes_before.copy():
931 # xf should go before u
932 if xf.order < u.order:
933 # it does, hurray!
934 in_order += 1
935 else:
936 # it doesn't, boo. modify u to stash the blocks that it
937 # writes that xf wants to read, and then require u to go
938 # before xf.
939 out_of_order += 1
940
941 overlap = xf.src_ranges.intersect(u.tgt_ranges)
942 assert overlap
943
Tao Bao3a2e3502016-12-28 09:14:39 -0800944 u.stash_before.append((stash_raw_id, overlap))
945 xf.use_stash.append((stash_raw_id, overlap))
946 stash_raw_id += 1
Doug Zongker62338182014-09-08 08:29:55 -0700947 stash_size += overlap.size()
948
949 # reverse the edge direction; now xf must go after u
950 del xf.goes_before[u]
951 del u.goes_after[xf]
952 xf.goes_after[u] = None # value doesn't matter
953 u.goes_before[xf] = None
954
Tao Bao32fcdab2018-10-12 10:30:39 -0700955 logger.info(
956 " %d/%d dependencies (%.2f%%) were violated; %d source blocks "
957 "stashed.", out_of_order, in_order + out_of_order,
958 (out_of_order * 100.0 / (in_order + out_of_order)) if (
959 in_order + out_of_order) else 0.0,
960 stash_size)
Doug Zongker62338182014-09-08 08:29:55 -0700961
Doug Zongkerfc44a512014-08-26 13:10:25 -0700962 def FindVertexSequence(self):
Tao Bao32fcdab2018-10-12 10:30:39 -0700963 logger.info("Finding vertex sequence...")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700964
965 # This is based on "A Fast & Effective Heuristic for the Feedback
966 # Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
967 # it as starting with the digraph G and moving all the vertices to
968 # be on a horizontal line in some order, trying to minimize the
969 # number of edges that end up pointing to the left. Left-pointing
970 # edges will get removed to turn the digraph into a DAG. In this
971 # case each edge has a weight which is the number of source blocks
972 # we'll lose if that edge is removed; we try to minimize the total
973 # weight rather than just the number of edges.
974
975 # Make a copy of the edge set; this copy will get destroyed by the
976 # algorithm.
977 for xf in self.transfers:
978 xf.incoming = xf.goes_after.copy()
979 xf.outgoing = xf.goes_before.copy()
Doug Zongker6ab2a502016-02-09 08:28:09 -0800980 xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())
Doug Zongkerfc44a512014-08-26 13:10:25 -0700981
982 # We use an OrderedDict instead of just a set so that the output
983 # is repeatable; otherwise it would depend on the hash values of
984 # the transfer objects.
985 G = OrderedDict()
986 for xf in self.transfers:
987 G[xf] = None
988 s1 = deque() # the left side of the sequence, built from left to right
989 s2 = deque() # the right side of the sequence, built from right to left
990
Doug Zongker6ab2a502016-02-09 08:28:09 -0800991 heap = []
992 for xf in self.transfers:
993 xf.heap_item = HeapItem(xf)
994 heap.append(xf.heap_item)
995 heapq.heapify(heap)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700996
Tao Bao33482282016-10-24 16:49:08 -0700997 # Use OrderedDict() instead of set() to preserve the insertion order. Need
998 # to use 'sinks[key] = None' to add key into the set. sinks will look like
999 # { key1: None, key2: None, ... }.
1000 sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing)
1001 sources = OrderedDict.fromkeys(u for u in G if not u.incoming)
Doug Zongker6ab2a502016-02-09 08:28:09 -08001002
1003 def adjust_score(iu, delta):
1004 iu.score += delta
1005 iu.heap_item.clear()
1006 iu.heap_item = HeapItem(iu)
1007 heapq.heappush(heap, iu.heap_item)
1008
1009 while G:
Doug Zongkerfc44a512014-08-26 13:10:25 -07001010 # Put all sinks at the end of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001011 while sinks:
Tao Bao33482282016-10-24 16:49:08 -07001012 new_sinks = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001013 for u in sinks:
Tao Bao508b0872018-02-09 13:44:43 -08001014 if u not in G:
1015 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001016 s2.appendleft(u)
1017 del G[u]
1018 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001019 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001020 if not iu.outgoing:
1021 new_sinks[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001022 sinks = new_sinks
Doug Zongkerfc44a512014-08-26 13:10:25 -07001023
1024 # Put all the sources at the beginning of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001025 while sources:
Tao Bao33482282016-10-24 16:49:08 -07001026 new_sources = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001027 for u in sources:
Tao Bao508b0872018-02-09 13:44:43 -08001028 if u not in G:
1029 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001030 s1.append(u)
1031 del G[u]
1032 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001033 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001034 if not iu.incoming:
1035 new_sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001036 sources = new_sources
Doug Zongkerfc44a512014-08-26 13:10:25 -07001037
Tao Bao508b0872018-02-09 13:44:43 -08001038 if not G:
1039 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001040
1041 # Find the "best" vertex to put next. "Best" is the one that
1042 # maximizes the net difference in source blocks saved we get by
1043 # pretending it's a source rather than a sink.
1044
Doug Zongker6ab2a502016-02-09 08:28:09 -08001045 while True:
1046 u = heapq.heappop(heap)
1047 if u and u.item in G:
1048 u = u.item
1049 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001050
Doug Zongkerfc44a512014-08-26 13:10:25 -07001051 s1.append(u)
1052 del G[u]
1053 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001054 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001055 if not iu.incoming:
1056 sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001057
Doug Zongkerfc44a512014-08-26 13:10:25 -07001058 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001059 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001060 if not iu.outgoing:
1061 sinks[iu] = None
Doug Zongkerfc44a512014-08-26 13:10:25 -07001062
1063 # Now record the sequence in the 'order' field of each transfer,
1064 # and by rearranging self.transfers to be in the chosen sequence.
1065
1066 new_transfers = []
1067 for x in itertools.chain(s1, s2):
1068 x.order = len(new_transfers)
1069 new_transfers.append(x)
1070 del x.incoming
1071 del x.outgoing
1072
1073 self.transfers = new_transfers
1074
1075 def GenerateDigraph(self):
Tao Bao32fcdab2018-10-12 10:30:39 -07001076 logger.info("Generating digraph...")
Doug Zongker6ab2a502016-02-09 08:28:09 -08001077
1078 # Each item of source_ranges will be:
1079 # - None, if that block is not used as a source,
Tao Bao33482282016-10-24 16:49:08 -07001080 # - an ordered set of transfers.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001081 source_ranges = []
1082 for b in self.transfers:
1083 for s, e in b.src_ranges:
1084 if e > len(source_ranges):
1085 source_ranges.extend([None] * (e-len(source_ranges)))
1086 for i in range(s, e):
1087 if source_ranges[i] is None:
Tao Bao33482282016-10-24 16:49:08 -07001088 source_ranges[i] = OrderedDict.fromkeys([b])
Doug Zongker6ab2a502016-02-09 08:28:09 -08001089 else:
Tao Bao33482282016-10-24 16:49:08 -07001090 source_ranges[i][b] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001091
Doug Zongkerfc44a512014-08-26 13:10:25 -07001092 for a in self.transfers:
Tao Bao33482282016-10-24 16:49:08 -07001093 intersections = OrderedDict()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001094 for s, e in a.tgt_ranges:
1095 for i in range(s, e):
Tao Bao508b0872018-02-09 13:44:43 -08001096 if i >= len(source_ranges):
1097 break
Tao Bao33482282016-10-24 16:49:08 -07001098 # Add all the Transfers in source_ranges[i] to the (ordered) set.
1099 if source_ranges[i] is not None:
1100 for j in source_ranges[i]:
1101 intersections[j] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001102
1103 for b in intersections:
Tao Bao508b0872018-02-09 13:44:43 -08001104 if a is b:
1105 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001106
1107 # If the blocks written by A are read by B, then B needs to go before A.
1108 i = a.tgt_ranges.intersect(b.src_ranges)
1109 if i:
Doug Zongkerab7ca1d2014-08-26 10:40:28 -07001110 if b.src_name == "__ZERO":
1111 # the cost of removing source blocks for the __ZERO domain
1112 # is (nearly) zero.
1113 size = 0
1114 else:
1115 size = i.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001116 b.goes_before[a] = size
1117 a.goes_after[b] = size
1118
xunchang21531832018-12-06 14:20:05 -08001119 def ComputePatchesForInputList(self, diff_queue, compress_target):
1120 """Returns a list of patch information for the input list of transfers.
1121
1122 Args:
1123 diff_queue: a list of transfers with style 'diff'
1124 compress_target: If True, compresses the target ranges of each
1125 transfers; and save the size.
1126
1127 Returns:
1128 A list of (transfer order, patch_info, compressed_size) tuples.
1129 """
1130
1131 if not diff_queue:
1132 return []
1133
1134 if self.threads > 1:
1135 logger.info("Computing patches (using %d threads)...", self.threads)
1136 else:
1137 logger.info("Computing patches...")
1138
1139 diff_total = len(diff_queue)
1140 patches = [None] * diff_total
1141 error_messages = []
1142
1143 # Using multiprocessing doesn't give additional benefits, due to the
1144 # pattern of the code. The diffing work is done by subprocess.call, which
1145 # already runs in a separate process (not affected much by the GIL -
1146 # Global Interpreter Lock). Using multiprocess also requires either a)
1147 # writing the diff input files in the main process before forking, or b)
1148 # reopening the image file (SparseImage) in the worker processes. Doing
1149 # neither of them further improves the performance.
1150 lock = threading.Lock()
1151
1152 def diff_worker():
1153 while True:
1154 with lock:
1155 if not diff_queue:
1156 return
1157 xf_index, imgdiff, patch_index = diff_queue.pop()
1158 xf = self.transfers[xf_index]
1159
1160 message = []
1161 compressed_size = None
1162
1163 patch_info = xf.patch_info
1164 if not patch_info:
1165 src_file = common.MakeTempFile(prefix="src-")
1166 with open(src_file, "wb") as fd:
1167 self.src.WriteRangeDataToFd(xf.src_ranges, fd)
1168
1169 tgt_file = common.MakeTempFile(prefix="tgt-")
1170 with open(tgt_file, "wb") as fd:
1171 self.tgt.WriteRangeDataToFd(xf.tgt_ranges, fd)
1172
1173 try:
1174 patch_info = compute_patch(src_file, tgt_file, imgdiff)
1175 except ValueError as e:
1176 message.append(
1177 "Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (
1178 "imgdiff" if imgdiff else "bsdiff",
1179 xf.tgt_name if xf.tgt_name == xf.src_name else
1180 xf.tgt_name + " (from " + xf.src_name + ")",
1181 xf.tgt_ranges, xf.src_ranges, e.message))
1182
1183 if compress_target:
1184 tgt_data = self.tgt.ReadRangeSet(xf.tgt_ranges)
1185 try:
1186 # Compresses with the default level
1187 compress_obj = zlib.compressobj(6, zlib.DEFLATED, -zlib.MAX_WBITS)
1188 compressed_data = (compress_obj.compress("".join(tgt_data))
1189 + compress_obj.flush())
1190 compressed_size = len(compressed_data)
1191 except zlib.error as e:
1192 message.append(
1193 "Failed to compress the data in target range {} for {}:\n"
1194 "{}".format(xf.tgt_ranges, xf.tgt_name, e.message))
1195
1196 if message:
1197 with lock:
1198 error_messages.extend(message)
1199
1200 with lock:
1201 patches[patch_index] = (xf_index, patch_info, compressed_size)
1202
1203 threads = [threading.Thread(target=diff_worker)
1204 for _ in range(self.threads)]
1205 for th in threads:
1206 th.start()
1207 while threads:
1208 threads.pop().join()
1209
1210 if error_messages:
1211 logger.error('ERROR:')
1212 logger.error('\n'.join(error_messages))
1213 logger.error('\n\n\n')
1214 sys.exit(1)
1215
1216 return patches
1217
xunchangb6105dc2018-12-06 16:39:46 -08001218 def SelectAndConvertDiffTransfersToNew(self, violated_stash_blocks):
xunchang3df4d5e2018-12-06 15:03:45 -08001219 """Converts the diff transfers to reduce the max simultaneous stash.
1220
1221 Since the 'new' data is compressed with deflate, we can select the 'diff'
1222 transfers for conversion by comparing its patch size with the size of the
1223 compressed data. Ideally, we want to convert the transfers with a small
1224 size increase, but using a large number of stashed blocks.
1225 """
xunchangb6105dc2018-12-06 16:39:46 -08001226 TransferSizeScore = namedtuple("TransferSizeScore",
1227 "xf, used_stash_blocks, score")
xunchang3df4d5e2018-12-06 15:03:45 -08001228
1229 logger.info("Selecting diff commands to convert to new.")
1230 diff_queue = []
1231 for xf in self.transfers:
1232 if xf.style == "diff" and xf.src_sha1 != xf.tgt_sha1:
1233 use_imgdiff = self.CanUseImgdiff(xf.tgt_name, xf.tgt_ranges,
1234 xf.src_ranges)
1235 diff_queue.append((xf.order, use_imgdiff, len(diff_queue)))
1236
1237 # Remove the 'move' transfers, and compute the patch & compressed size
1238 # for the remaining.
1239 result = self.ComputePatchesForInputList(diff_queue, True)
1240
xunchangb6105dc2018-12-06 16:39:46 -08001241 conversion_candidates = []
xunchang3df4d5e2018-12-06 15:03:45 -08001242 for xf_index, patch_info, compressed_size in result:
1243 xf = self.transfers[xf_index]
1244 if not xf.patch_info:
1245 xf.patch_info = patch_info
1246
1247 size_ratio = len(xf.patch_info.content) * 100.0 / compressed_size
1248 diff_style = "imgdiff" if xf.patch_info.imgdiff else "bsdiff"
xunchangb6105dc2018-12-06 16:39:46 -08001249 logger.info("%s, target size: %d blocks, style: %s, patch size: %d,"
xunchang3df4d5e2018-12-06 15:03:45 -08001250 " compression_size: %d, ratio %.2f%%", xf.tgt_name,
1251 xf.tgt_ranges.size(), diff_style,
1252 len(xf.patch_info.content), compressed_size, size_ratio)
1253
xunchangb6105dc2018-12-06 16:39:46 -08001254 used_stash_blocks = sum(sr.size() for _, sr in xf.use_stash)
xunchang3df4d5e2018-12-06 15:03:45 -08001255 # Convert the transfer to new if the compressed size is smaller or equal.
1256 # We don't need to maintain the stash_before lists here because the
1257 # graph will be regenerated later.
1258 if len(xf.patch_info.content) >= compressed_size:
xunchangb6105dc2018-12-06 16:39:46 -08001259 # Add the transfer to the candidate list with negative score. And it
1260 # will be converted later.
1261 conversion_candidates.append(TransferSizeScore(xf, used_stash_blocks,
1262 -1))
1263 elif used_stash_blocks > 0:
1264 # This heuristic represents the size increase in the final package to
1265 # remove per unit of stashed data.
1266 score = ((compressed_size - len(xf.patch_info.content)) * 100.0
1267 / used_stash_blocks)
1268 conversion_candidates.append(TransferSizeScore(xf, used_stash_blocks,
1269 score))
1270 # Transfers with lower score (i.e. less expensive to convert) will be
1271 # converted first.
1272 conversion_candidates.sort(key=lambda x: x.score)
xunchang3df4d5e2018-12-06 15:03:45 -08001273
xunchangb6105dc2018-12-06 16:39:46 -08001274 # TODO(xunchang), improve the logic to find the transfers to convert, e.g.
1275 # convert the ones that contribute to the max stash, run ReviseStashSize
1276 # multiple times etc.
1277 removed_stashed_blocks = 0
1278 for xf, used_stash_blocks, _ in conversion_candidates:
1279 logger.info("Converting %s to new", xf.tgt_name)
1280 xf.ConvertToNew()
1281 removed_stashed_blocks += used_stash_blocks
1282 # Experiments show that we will get a smaller package size if we remove
1283 # slightly more stashed blocks than the violated stash blocks.
1284 if removed_stashed_blocks >= violated_stash_blocks:
1285 break
xunchang3df4d5e2018-12-06 15:03:45 -08001286
1287 logger.info("Removed %d stashed blocks", removed_stashed_blocks)
1288
Doug Zongkerfc44a512014-08-26 13:10:25 -07001289 def FindTransfers(self):
Tao Bao9a5caf22015-08-25 15:10:10 -07001290 """Parse the file_map to generate all the transfers."""
1291
Tianjie Xu41390d42017-11-22 11:35:18 -08001292 def AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
1293 src_ranges, style, by_id):
1294 """Add one or multiple Transfer()s by splitting large files.
1295
1296 For BBOTA v3, we need to stash source blocks for resumable feature.
1297 However, with the growth of file size and the shrink of the cache
1298 partition source blocks are too large to be stashed. If a file occupies
1299 too many blocks, we split it into smaller pieces by getting multiple
1300 Transfer()s.
1301
1302 The downside is that after splitting, we may increase the package size
1303 since the split pieces don't align well. According to our experiments,
1304 1/8 of the cache size as the per-piece limit appears to be optimal.
1305 Compared to the fixed 1024-block limit, it reduces the overall package
1306 size by 30% for volantis, and 20% for angler and bullhead."""
1307
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001308 pieces = 0
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001309 while (tgt_ranges.size() > max_blocks_per_transfer and
1310 src_ranges.size() > max_blocks_per_transfer):
Tao Bao9a5caf22015-08-25 15:10:10 -07001311 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1312 src_split_name = "%s-%d" % (src_name, pieces)
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001313 tgt_first = tgt_ranges.first(max_blocks_per_transfer)
1314 src_first = src_ranges.first(max_blocks_per_transfer)
1315
Tao Bao183e56e2017-03-05 17:05:09 -08001316 Transfer(tgt_split_name, src_split_name, tgt_first, src_first,
1317 self.tgt.RangeSha1(tgt_first), self.src.RangeSha1(src_first),
1318 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001319
1320 tgt_ranges = tgt_ranges.subtract(tgt_first)
1321 src_ranges = src_ranges.subtract(src_first)
1322 pieces += 1
1323
1324 # Handle remaining blocks.
1325 if tgt_ranges.size() or src_ranges.size():
1326 # Must be both non-empty.
1327 assert tgt_ranges.size() and src_ranges.size()
1328 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1329 src_split_name = "%s-%d" % (src_name, pieces)
Tao Bao183e56e2017-03-05 17:05:09 -08001330 Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges,
1331 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1332 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001333
Tianjie Xu41390d42017-11-22 11:35:18 -08001334 def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges, style,
1335 by_id):
1336 """Find all the zip files and split the others with a fixed chunk size.
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001337
Tianjie Xu41390d42017-11-22 11:35:18 -08001338 This function will construct a list of zip archives, which will later be
1339 split by imgdiff to reduce the final patch size. For the other files,
1340 we will plainly split them based on a fixed chunk size with the potential
1341 patch size penalty.
1342 """
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001343
1344 assert style == "diff"
1345
1346 # Change nothing for small files.
1347 if (tgt_ranges.size() <= max_blocks_per_transfer and
1348 src_ranges.size() <= max_blocks_per_transfer):
1349 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1350 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1351 style, by_id)
1352 return
1353
Tao Baocb73aed2018-01-31 17:32:40 -08001354 # Split large APKs with imgdiff, if possible. We're intentionally checking
1355 # file types one more time (CanUseImgdiff() checks that as well), before
1356 # calling the costly RangeSha1()s.
1357 if (self.FileTypeSupportedByImgdiff(tgt_name) and
1358 self.tgt.RangeSha1(tgt_ranges) != self.src.RangeSha1(src_ranges)):
Tao Bao294651d2018-02-08 23:21:52 -08001359 if self.CanUseImgdiff(tgt_name, tgt_ranges, src_ranges, True):
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001360 large_apks.append((tgt_name, src_name, tgt_ranges, src_ranges))
1361 return
1362
Tianjie Xu41390d42017-11-22 11:35:18 -08001363 AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
1364 src_ranges, style, by_id)
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001365
Tao Bao08c85832016-09-19 22:26:30 -07001366 def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
1367 split=False):
1368 """Wrapper function for adding a Transfer()."""
1369
1370 # We specialize diff transfers only (which covers bsdiff/imgdiff/move);
1371 # otherwise add the Transfer() as is.
1372 if style != "diff" or not split:
Tao Bao183e56e2017-03-05 17:05:09 -08001373 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1374 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1375 style, by_id)
Tao Bao08c85832016-09-19 22:26:30 -07001376 return
1377
1378 # Handle .odex files specially to analyze the block-wise difference. If
1379 # most of the blocks are identical with only few changes (e.g. header),
1380 # we will patch the changed blocks only. This avoids stashing unchanged
1381 # blocks while patching. We limit the analysis to files without size
1382 # changes only. This is to avoid sacrificing the OTA generation cost too
1383 # much.
1384 if (tgt_name.split(".")[-1].lower() == 'odex' and
1385 tgt_ranges.size() == src_ranges.size()):
1386
1387 # 0.5 threshold can be further tuned. The tradeoff is: if only very
1388 # few blocks remain identical, we lose the opportunity to use imgdiff
1389 # that may have better compression ratio than bsdiff.
1390 crop_threshold = 0.5
1391
1392 tgt_skipped = RangeSet()
1393 src_skipped = RangeSet()
1394 tgt_size = tgt_ranges.size()
1395 tgt_changed = 0
1396 for src_block, tgt_block in zip(src_ranges.next_item(),
1397 tgt_ranges.next_item()):
1398 src_rs = RangeSet(str(src_block))
1399 tgt_rs = RangeSet(str(tgt_block))
1400 if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):
1401 tgt_skipped = tgt_skipped.union(tgt_rs)
1402 src_skipped = src_skipped.union(src_rs)
1403 else:
1404 tgt_changed += tgt_rs.size()
1405
1406 # Terminate early if no clear sign of benefits.
1407 if tgt_changed > tgt_size * crop_threshold:
1408 break
1409
1410 if tgt_changed < tgt_size * crop_threshold:
1411 assert tgt_changed + tgt_skipped.size() == tgt_size
Tao Bao32fcdab2018-10-12 10:30:39 -07001412 logger.info(
1413 '%10d %10d (%6.2f%%) %s', tgt_skipped.size(), tgt_size,
1414 tgt_skipped.size() * 100.0 / tgt_size, tgt_name)
Tianjie Xu41390d42017-11-22 11:35:18 -08001415 AddSplitTransfers(
Tao Bao08c85832016-09-19 22:26:30 -07001416 "%s-skipped" % (tgt_name,),
1417 "%s-skipped" % (src_name,),
1418 tgt_skipped, src_skipped, style, by_id)
1419
1420 # Intentionally change the file extension to avoid being imgdiff'd as
1421 # the files are no longer in their original format.
1422 tgt_name = "%s-cropped" % (tgt_name,)
1423 src_name = "%s-cropped" % (src_name,)
1424 tgt_ranges = tgt_ranges.subtract(tgt_skipped)
1425 src_ranges = src_ranges.subtract(src_skipped)
1426
1427 # Possibly having no changed blocks.
1428 if not tgt_ranges:
1429 return
1430
1431 # Add the transfer(s).
Tianjie Xu41390d42017-11-22 11:35:18 -08001432 AddSplitTransfers(
Tao Bao08c85832016-09-19 22:26:30 -07001433 tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1434
Tianjie Xu25366072017-09-08 17:19:02 -07001435 def ParseAndValidateSplitInfo(patch_size, tgt_ranges, src_ranges,
1436 split_info):
1437 """Parse the split_info and return a list of info tuples.
1438
1439 Args:
1440 patch_size: total size of the patch file.
1441 tgt_ranges: Ranges of the target file within the original image.
1442 src_ranges: Ranges of the source file within the original image.
1443 split_info format:
1444 imgdiff version#
1445 count of pieces
1446 <patch_size_1> <tgt_size_1> <src_ranges_1>
1447 ...
1448 <patch_size_n> <tgt_size_n> <src_ranges_n>
1449
1450 Returns:
1451 [patch_start, patch_len, split_tgt_ranges, split_src_ranges]
1452 """
1453
1454 version = int(split_info[0])
1455 assert version == 2
1456 count = int(split_info[1])
1457 assert len(split_info) - 2 == count
1458
1459 split_info_list = []
1460 patch_start = 0
1461 tgt_remain = copy.deepcopy(tgt_ranges)
1462 # each line has the format <patch_size>, <tgt_size>, <src_ranges>
1463 for line in split_info[2:]:
1464 info = line.split()
1465 assert len(info) == 3
1466 patch_length = int(info[0])
1467
1468 split_tgt_size = int(info[1])
1469 assert split_tgt_size % 4096 == 0
Tao Baob8131202019-06-19 14:15:34 -07001470 assert split_tgt_size // 4096 <= tgt_remain.size()
1471 split_tgt_ranges = tgt_remain.first(split_tgt_size // 4096)
Tianjie Xu25366072017-09-08 17:19:02 -07001472 tgt_remain = tgt_remain.subtract(split_tgt_ranges)
1473
1474 # Find the split_src_ranges within the image file from its relative
1475 # position in file.
1476 split_src_indices = RangeSet.parse_raw(info[2])
1477 split_src_ranges = RangeSet()
1478 for r in split_src_indices:
1479 curr_range = src_ranges.first(r[1]).subtract(src_ranges.first(r[0]))
1480 assert not split_src_ranges.overlaps(curr_range)
1481 split_src_ranges = split_src_ranges.union(curr_range)
1482
1483 split_info_list.append((patch_start, patch_length,
1484 split_tgt_ranges, split_src_ranges))
1485 patch_start += patch_length
1486
1487 # Check that the sizes of all the split pieces add up to the final file
1488 # size for patch and target.
1489 assert tgt_remain.size() == 0
1490 assert patch_start == patch_size
1491 return split_info_list
1492
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001493 def SplitLargeApks():
1494 """Split the large apks files.
Tianjie Xu25366072017-09-08 17:19:02 -07001495
1496 Example: Chrome.apk will be split into
1497 src-0: Chrome.apk-0, tgt-0: Chrome.apk-0
1498 src-1: Chrome.apk-1, tgt-1: Chrome.apk-1
1499 ...
1500
1501 After the split, the target pieces are continuous and block aligned; and
1502 the source pieces are mutually exclusive. During the split, we also
1503 generate and save the image patch between src-X & tgt-X. This patch will
1504 be valid because the block ranges of src-X & tgt-X will always stay the
1505 same afterwards; but there's a chance we don't use the patch if we
1506 convert the "diff" command into "new" or "move" later.
1507 """
1508
1509 while True:
1510 with transfer_lock:
1511 if not large_apks:
1512 return
1513 tgt_name, src_name, tgt_ranges, src_ranges = large_apks.pop(0)
1514
1515 src_file = common.MakeTempFile(prefix="src-")
1516 tgt_file = common.MakeTempFile(prefix="tgt-")
Tianjie Xudf1166e2018-01-27 17:35:41 -08001517 with open(src_file, "wb") as src_fd:
1518 self.src.WriteRangeDataToFd(src_ranges, src_fd)
1519 with open(tgt_file, "wb") as tgt_fd:
1520 self.tgt.WriteRangeDataToFd(tgt_ranges, tgt_fd)
Tianjie Xu25366072017-09-08 17:19:02 -07001521
1522 patch_file = common.MakeTempFile(prefix="patch-")
1523 patch_info_file = common.MakeTempFile(prefix="split_info-")
1524 cmd = ["imgdiff", "-z",
1525 "--block-limit={}".format(max_blocks_per_transfer),
1526 "--split-info=" + patch_info_file,
1527 src_file, tgt_file, patch_file]
Tao Bao73dd4f42018-10-04 16:25:33 -07001528 proc = common.Run(cmd)
1529 imgdiff_output, _ = proc.communicate()
1530 assert proc.returncode == 0, \
Tao Bao4ccea852018-02-06 15:16:41 -08001531 "Failed to create imgdiff patch between {} and {}:\n{}".format(
1532 src_name, tgt_name, imgdiff_output)
Tianjie Xu25366072017-09-08 17:19:02 -07001533
1534 with open(patch_info_file) as patch_info:
1535 lines = patch_info.readlines()
1536
1537 patch_size_total = os.path.getsize(patch_file)
1538 split_info_list = ParseAndValidateSplitInfo(patch_size_total,
1539 tgt_ranges, src_ranges,
1540 lines)
1541 for index, (patch_start, patch_length, split_tgt_ranges,
Tao Bao508b0872018-02-09 13:44:43 -08001542 split_src_ranges) in enumerate(split_info_list):
Tao Baob8131202019-06-19 14:15:34 -07001543 with open(patch_file, 'rb') as f:
Tianjie Xu25366072017-09-08 17:19:02 -07001544 f.seek(patch_start)
1545 patch_content = f.read(patch_length)
1546
1547 split_src_name = "{}-{}".format(src_name, index)
1548 split_tgt_name = "{}-{}".format(tgt_name, index)
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001549 split_large_apks.append((split_tgt_name,
1550 split_src_name,
1551 split_tgt_ranges,
1552 split_src_ranges,
1553 patch_content))
Tianjie Xu25366072017-09-08 17:19:02 -07001554
Tao Bao32fcdab2018-10-12 10:30:39 -07001555 logger.info("Finding transfers...")
Tao Bao08c85832016-09-19 22:26:30 -07001556
Tianjie Xu25366072017-09-08 17:19:02 -07001557 large_apks = []
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001558 split_large_apks = []
Tianjie Xu25366072017-09-08 17:19:02 -07001559 cache_size = common.OPTIONS.cache_size
1560 split_threshold = 0.125
1561 max_blocks_per_transfer = int(cache_size * split_threshold /
1562 self.tgt.blocksize)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001563 empty = RangeSet()
Tianjie Xu20a86cd2018-01-12 12:21:00 -08001564 for tgt_fn, tgt_ranges in sorted(self.tgt.file_map.items()):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001565 if tgt_fn == "__ZERO":
1566 # the special "__ZERO" domain is all the blocks not contained
1567 # in any file and that are filled with zeros. We have a
1568 # special transfer style for zero blocks.
1569 src_ranges = self.src.file_map.get("__ZERO", empty)
Tao Bao9a5caf22015-08-25 15:10:10 -07001570 AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
1571 "zero", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001572 continue
1573
Tao Baoff777812015-05-12 11:42:31 -07001574 elif tgt_fn == "__COPY":
1575 # "__COPY" domain includes all the blocks not contained in any
1576 # file and that need to be copied unconditionally to the target.
Tao Bao9a5caf22015-08-25 15:10:10 -07001577 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Tao Baoff777812015-05-12 11:42:31 -07001578 continue
1579
Tianjie Xu67c7cbb2018-08-30 00:32:07 -07001580 elif tgt_fn == "__HASHTREE":
1581 continue
1582
Doug Zongkerfc44a512014-08-26 13:10:25 -07001583 elif tgt_fn in self.src.file_map:
1584 # Look for an exact pathname match in the source.
Tao Bao9a5caf22015-08-25 15:10:10 -07001585 AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001586 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001587 continue
1588
1589 b = os.path.basename(tgt_fn)
1590 if b in self.src_basenames:
1591 # Look for an exact basename match in the source.
1592 src_fn = self.src_basenames[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001593 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001594 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001595 continue
1596
1597 b = re.sub("[0-9]+", "#", b)
1598 if b in self.src_numpatterns:
1599 # Look for a 'number pattern' match (a basename match after
1600 # all runs of digits are replaced by "#"). (This is useful
1601 # for .so files that contain version numbers in the filename
1602 # that get bumped.)
1603 src_fn = self.src_numpatterns[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001604 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001605 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001606 continue
1607
Tao Bao9a5caf22015-08-25 15:10:10 -07001608 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001609
Tianjie Xu25366072017-09-08 17:19:02 -07001610 transfer_lock = threading.Lock()
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001611 threads = [threading.Thread(target=SplitLargeApks)
Tianjie Xu25366072017-09-08 17:19:02 -07001612 for _ in range(self.threads)]
1613 for th in threads:
1614 th.start()
1615 while threads:
1616 threads.pop().join()
1617
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001618 # Sort the split transfers for large apks to generate a determinate package.
1619 split_large_apks.sort()
1620 for (tgt_name, src_name, tgt_ranges, src_ranges,
1621 patch) in split_large_apks:
1622 transfer_split = Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1623 self.tgt.RangeSha1(tgt_ranges),
1624 self.src.RangeSha1(src_ranges),
1625 "diff", self.transfers)
xunchang21531832018-12-06 14:20:05 -08001626 transfer_split.patch_info = PatchInfo(True, patch)
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001627
Doug Zongkerfc44a512014-08-26 13:10:25 -07001628 def AbbreviateSourceNames(self):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001629 for k in self.src.file_map.keys():
1630 b = os.path.basename(k)
1631 self.src_basenames[b] = k
1632 b = re.sub("[0-9]+", "#", b)
1633 self.src_numpatterns[b] = k
1634
1635 @staticmethod
1636 def AssertPartition(total, seq):
1637 """Assert that all the RangeSets in 'seq' form a partition of the
1638 'total' RangeSet (ie, they are nonintersecting and their union
1639 equals 'total')."""
Doug Zongker6ab2a502016-02-09 08:28:09 -08001640
Doug Zongkerfc44a512014-08-26 13:10:25 -07001641 so_far = RangeSet()
1642 for i in seq:
1643 assert not so_far.overlaps(i)
1644 so_far = so_far.union(i)
1645 assert so_far == total