blob: 8087fcde290d3aa628aa641047a8e2fa131ff384 [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015from __future__ import print_function
16
Doug Zongker6ab2a502016-02-09 08:28:09 -080017import array
Tianjie Xu25366072017-09-08 17:19:02 -070018import copy
Doug Zongker6ab2a502016-02-09 08:28:09 -080019import functools
Doug Zongker62338182014-09-08 08:29:55 -070020import heapq
Doug Zongkerfc44a512014-08-26 13:10:25 -070021import itertools
Tao Bao32fcdab2018-10-12 10:30:39 -070022import logging
Doug Zongkerfc44a512014-08-26 13:10:25 -070023import multiprocessing
24import os
Tao Bao3a2e3502016-12-28 09:14:39 -080025import os.path
Doug Zongkerfc44a512014-08-26 13:10:25 -070026import re
Tao Bao183e56e2017-03-05 17:05:09 -080027import sys
Doug Zongkerfc44a512014-08-26 13:10:25 -070028import threading
xunchang21531832018-12-06 14:20:05 -080029import zlib
30from collections import deque, namedtuple, OrderedDict
Tao Bao508b0872018-02-09 13:44:43 -080031
32import common
Tianjie Xu41976c72019-07-03 13:57:01 -070033from images import EmptyImage
Dan Albert8b72aef2015-03-23 19:13:21 -070034from rangelib import RangeSet
35
Tianjie Xu41976c72019-07-03 13:57:01 -070036__all__ = ["BlockImageDiff"]
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070037
Tao Bao32fcdab2018-10-12 10:30:39 -070038logger = logging.getLogger(__name__)
39
xunchang21531832018-12-06 14:20:05 -080040# The tuple contains the style and bytes of a bsdiff|imgdiff patch.
41PatchInfo = namedtuple("PatchInfo", ["imgdiff", "content"])
42
Dan Albert8b72aef2015-03-23 19:13:21 -070043
Tao Bao183e56e2017-03-05 17:05:09 -080044def compute_patch(srcfile, tgtfile, imgdiff=False):
xunchang21531832018-12-06 14:20:05 -080045 """Calls bsdiff|imgdiff to compute the patch data, returns a PatchInfo."""
Tianjie Xub59c17f2016-10-28 17:55:53 -070046 patchfile = common.MakeTempFile(prefix='patch-')
Doug Zongkerfc44a512014-08-26 13:10:25 -070047
Tianjie Xub59c17f2016-10-28 17:55:53 -070048 cmd = ['imgdiff', '-z'] if imgdiff else ['bsdiff']
49 cmd.extend([srcfile, tgtfile, patchfile])
Doug Zongkerfc44a512014-08-26 13:10:25 -070050
Tao Bao39451582017-05-04 11:10:47 -070051 # Don't dump the bsdiff/imgdiff commands, which are not useful for the case
52 # here, since they contain temp filenames only.
Tao Bao73dd4f42018-10-04 16:25:33 -070053 proc = common.Run(cmd, verbose=False)
54 output, _ = proc.communicate()
Doug Zongkerfc44a512014-08-26 13:10:25 -070055
Tao Bao73dd4f42018-10-04 16:25:33 -070056 if proc.returncode != 0:
Tianjie Xub59c17f2016-10-28 17:55:53 -070057 raise ValueError(output)
58
59 with open(patchfile, 'rb') as f:
xunchang21531832018-12-06 14:20:05 -080060 return PatchInfo(imgdiff, f.read())
Doug Zongkerfc44a512014-08-26 13:10:25 -070061
Dan Albert8b72aef2015-03-23 19:13:21 -070062
Doug Zongkerfc44a512014-08-26 13:10:25 -070063class Transfer(object):
Tao Bao183e56e2017-03-05 17:05:09 -080064 def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, tgt_sha1,
65 src_sha1, style, by_id):
Doug Zongkerfc44a512014-08-26 13:10:25 -070066 self.tgt_name = tgt_name
67 self.src_name = src_name
68 self.tgt_ranges = tgt_ranges
69 self.src_ranges = src_ranges
Tao Bao183e56e2017-03-05 17:05:09 -080070 self.tgt_sha1 = tgt_sha1
71 self.src_sha1 = src_sha1
Doug Zongkerfc44a512014-08-26 13:10:25 -070072 self.style = style
Tao Baob8c87172015-03-19 19:42:12 -070073
74 # We use OrderedDict rather than dict so that the output is repeatable;
75 # otherwise it would depend on the hash values of the Transfer objects.
76 self.goes_before = OrderedDict()
77 self.goes_after = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -070078
Doug Zongker62338182014-09-08 08:29:55 -070079 self.stash_before = []
80 self.use_stash = []
81
Doug Zongkerfc44a512014-08-26 13:10:25 -070082 self.id = len(by_id)
83 by_id.append(self)
84
xunchang21531832018-12-06 14:20:05 -080085 self._patch_info = None
Tianjie Xu25366072017-09-08 17:19:02 -070086
87 @property
xunchang21531832018-12-06 14:20:05 -080088 def patch_info(self):
89 return self._patch_info
Tianjie Xu25366072017-09-08 17:19:02 -070090
xunchang21531832018-12-06 14:20:05 -080091 @patch_info.setter
92 def patch_info(self, info):
93 if info:
Tianjie Xu25366072017-09-08 17:19:02 -070094 assert self.style == "diff"
xunchang21531832018-12-06 14:20:05 -080095 self._patch_info = info
Tianjie Xu25366072017-09-08 17:19:02 -070096
Doug Zongker62338182014-09-08 08:29:55 -070097 def NetStashChange(self):
98 return (sum(sr.size() for (_, sr) in self.stash_before) -
99 sum(sr.size() for (_, sr) in self.use_stash))
100
Tao Bao82c47982015-08-17 09:45:13 -0700101 def ConvertToNew(self):
102 assert self.style != "new"
103 self.use_stash = []
104 self.style = "new"
105 self.src_ranges = RangeSet()
xunchang21531832018-12-06 14:20:05 -0800106 self.patch_info = None
Tao Bao82c47982015-08-17 09:45:13 -0700107
Doug Zongkerfc44a512014-08-26 13:10:25 -0700108 def __str__(self):
109 return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
110 " to " + str(self.tgt_ranges) + ">")
111
112
Doug Zongker6ab2a502016-02-09 08:28:09 -0800113@functools.total_ordering
114class HeapItem(object):
115 def __init__(self, item):
116 self.item = item
Tao Bao186ec992017-12-23 11:50:52 -0800117 # Negate the score since python's heap is a min-heap and we want the
118 # maximum score.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800119 self.score = -item.score
Tao Bao186ec992017-12-23 11:50:52 -0800120
Doug Zongker6ab2a502016-02-09 08:28:09 -0800121 def clear(self):
122 self.item = None
Tao Bao186ec992017-12-23 11:50:52 -0800123
Doug Zongker6ab2a502016-02-09 08:28:09 -0800124 def __bool__(self):
Tao Bao186ec992017-12-23 11:50:52 -0800125 return self.item is not None
126
127 # Python 2 uses __nonzero__, while Python 3 uses __bool__.
128 __nonzero__ = __bool__
129
130 # The rest operations are generated by functools.total_ordering decorator.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800131 def __eq__(self, other):
132 return self.score == other.score
Tao Bao186ec992017-12-23 11:50:52 -0800133
Doug Zongker6ab2a502016-02-09 08:28:09 -0800134 def __le__(self, other):
135 return self.score <= other.score
136
137
Tao Bao294651d2018-02-08 23:21:52 -0800138class ImgdiffStats(object):
139 """A class that collects imgdiff stats.
140
141 It keeps track of the files that will be applied imgdiff while generating
142 BlockImageDiff. It also logs the ones that cannot use imgdiff, with specific
143 reasons. The stats is only meaningful when imgdiff not being disabled by the
144 caller of BlockImageDiff. In addition, only files with supported types
145 (BlockImageDiff.FileTypeSupportedByImgdiff()) are allowed to be logged.
Tao Bao294651d2018-02-08 23:21:52 -0800146 """
147
148 USED_IMGDIFF = "APK files diff'd with imgdiff"
149 USED_IMGDIFF_LARGE_APK = "Large APK files split and diff'd with imgdiff"
150
151 # Reasons for not applying imgdiff on APKs.
Tao Bao294651d2018-02-08 23:21:52 -0800152 SKIPPED_NONMONOTONIC = "Not used imgdiff due to having non-monotonic ranges"
Tao Baoe709b092018-02-07 12:40:00 -0800153 SKIPPED_SHARED_BLOCKS = "Not used imgdiff due to using shared blocks"
Tao Bao4ccea852018-02-06 15:16:41 -0800154 SKIPPED_INCOMPLETE = "Not used imgdiff due to incomplete RangeSet"
Tao Bao294651d2018-02-08 23:21:52 -0800155
156 # The list of valid reasons, which will also be the dumped order in a report.
157 REASONS = (
158 USED_IMGDIFF,
159 USED_IMGDIFF_LARGE_APK,
Tao Bao294651d2018-02-08 23:21:52 -0800160 SKIPPED_NONMONOTONIC,
Tao Baoe709b092018-02-07 12:40:00 -0800161 SKIPPED_SHARED_BLOCKS,
Tao Bao4ccea852018-02-06 15:16:41 -0800162 SKIPPED_INCOMPLETE,
Tao Bao294651d2018-02-08 23:21:52 -0800163 )
164
165 def __init__(self):
166 self.stats = {}
167
168 def Log(self, filename, reason):
169 """Logs why imgdiff can or cannot be applied to the given filename.
170
171 Args:
172 filename: The filename string.
173 reason: One of the reason constants listed in REASONS.
174
175 Raises:
176 AssertionError: On unsupported filetypes or invalid reason.
177 """
178 assert BlockImageDiff.FileTypeSupportedByImgdiff(filename)
179 assert reason in self.REASONS
180
181 if reason not in self.stats:
182 self.stats[reason] = set()
183 self.stats[reason].add(filename)
184
185 def Report(self):
186 """Prints a report of the collected imgdiff stats."""
187
188 def print_header(header, separator):
Tao Bao32fcdab2018-10-12 10:30:39 -0700189 logger.info(header)
Tao Baob8131202019-06-19 14:15:34 -0700190 logger.info('%s\n', separator * len(header))
Tao Bao294651d2018-02-08 23:21:52 -0800191
192 print_header(' Imgdiff Stats Report ', '=')
193 for key in self.REASONS:
194 if key not in self.stats:
195 continue
196 values = self.stats[key]
197 section_header = ' {} (count: {}) '.format(key, len(values))
198 print_header(section_header, '-')
Tao Bao32fcdab2018-10-12 10:30:39 -0700199 logger.info(''.join([' {}\n'.format(name) for name in values]))
Tao Bao294651d2018-02-08 23:21:52 -0800200
201
Doug Zongkerfc44a512014-08-26 13:10:25 -0700202class BlockImageDiff(object):
Tao Bao76def242017-11-21 09:25:31 -0800203 """Generates the diff of two block image objects.
204
205 BlockImageDiff works on two image objects. An image object is anything that
206 provides the following attributes:
207
208 blocksize: the size in bytes of a block, currently must be 4096.
209
210 total_blocks: the total size of the partition/image, in blocks.
211
212 care_map: a RangeSet containing which blocks (in the range [0,
213 total_blocks) we actually care about; i.e. which blocks contain data.
214
215 file_map: a dict that partitions the blocks contained in care_map into
216 smaller domains that are useful for doing diffs on. (Typically a domain
217 is a file, and the key in file_map is the pathname.)
218
219 clobbered_blocks: a RangeSet containing which blocks contain data but may
220 be altered by the FS. They need to be excluded when verifying the
221 partition integrity.
222
223 ReadRangeSet(): a function that takes a RangeSet and returns the data
224 contained in the image blocks of that RangeSet. The data is returned as
225 a list or tuple of strings; concatenating the elements together should
226 produce the requested data. Implementations are free to break up the
227 data into list/tuple elements in any way that is convenient.
228
229 RangeSha1(): a function that returns (as a hex string) the SHA-1 hash of
230 all the data in the specified range.
231
232 TotalSha1(): a function that returns (as a hex string) the SHA-1 hash of
233 all the data in the image (ie, all the blocks in the care_map minus
234 clobbered_blocks, or including the clobbered blocks if
235 include_clobbered_blocks is True).
236
237 When creating a BlockImageDiff, the src image may be None, in which case the
238 list of transfers produced will never read from the original image.
239 """
240
Tao Bao293fd132016-06-11 12:19:23 -0700241 def __init__(self, tgt, src=None, threads=None, version=4,
242 disable_imgdiff=False):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700243 if threads is None:
244 threads = multiprocessing.cpu_count() // 2
Dan Albert8b72aef2015-03-23 19:13:21 -0700245 if threads == 0:
246 threads = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700247 self.threads = threads
Doug Zongker62338182014-09-08 08:29:55 -0700248 self.version = version
Dan Albert8b72aef2015-03-23 19:13:21 -0700249 self.transfers = []
250 self.src_basenames = {}
251 self.src_numpatterns = {}
Tao Baob4cfca52016-02-04 14:26:02 -0800252 self._max_stashed_size = 0
Tao Baod522bdc2016-04-12 15:53:16 -0700253 self.touched_src_ranges = RangeSet()
254 self.touched_src_sha1 = None
Tao Bao293fd132016-06-11 12:19:23 -0700255 self.disable_imgdiff = disable_imgdiff
Tao Bao294651d2018-02-08 23:21:52 -0800256 self.imgdiff_stats = ImgdiffStats() if not disable_imgdiff else None
Doug Zongker62338182014-09-08 08:29:55 -0700257
Tao Bao8fad03e2017-03-01 14:36:26 -0800258 assert version in (3, 4)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700259
260 self.tgt = tgt
261 if src is None:
262 src = EmptyImage()
263 self.src = src
264
265 # The updater code that installs the patch always uses 4k blocks.
266 assert tgt.blocksize == 4096
267 assert src.blocksize == 4096
268
269 # The range sets in each filemap should comprise a partition of
270 # the care map.
271 self.AssertPartition(src.care_map, src.file_map.values())
272 self.AssertPartition(tgt.care_map, tgt.file_map.values())
273
Tao Baob4cfca52016-02-04 14:26:02 -0800274 @property
275 def max_stashed_size(self):
276 return self._max_stashed_size
277
Tao Baocb73aed2018-01-31 17:32:40 -0800278 @staticmethod
279 def FileTypeSupportedByImgdiff(filename):
280 """Returns whether the file type is supported by imgdiff."""
281 return filename.lower().endswith(('.apk', '.jar', '.zip'))
282
Tao Bao294651d2018-02-08 23:21:52 -0800283 def CanUseImgdiff(self, name, tgt_ranges, src_ranges, large_apk=False):
Tao Baocb73aed2018-01-31 17:32:40 -0800284 """Checks whether we can apply imgdiff for the given RangeSets.
285
286 For files in ZIP format (e.g., APKs, JARs, etc.) we would like to use
287 'imgdiff -z' if possible. Because it usually produces significantly smaller
288 patches than bsdiff.
289
290 This is permissible if all of the following conditions hold.
291 - The imgdiff hasn't been disabled by the caller (e.g. squashfs);
292 - The file type is supported by imgdiff;
293 - The source and target blocks are monotonic (i.e. the data is stored with
294 blocks in increasing order);
Tao Baoe709b092018-02-07 12:40:00 -0800295 - Both files don't contain shared blocks;
Tao Bao4ccea852018-02-06 15:16:41 -0800296 - Both files have complete lists of blocks;
Tao Baocb73aed2018-01-31 17:32:40 -0800297 - We haven't removed any blocks from the source set.
298
299 If all these conditions are satisfied, concatenating all the blocks in the
300 RangeSet in order will produce a valid ZIP file (plus possibly extra zeros
301 in the last block). imgdiff is fine with extra zeros at the end of the file.
302
303 Args:
304 name: The filename to be diff'd.
305 tgt_ranges: The target RangeSet.
306 src_ranges: The source RangeSet.
Tao Bao294651d2018-02-08 23:21:52 -0800307 large_apk: Whether this is to split a large APK.
Tao Baocb73aed2018-01-31 17:32:40 -0800308
309 Returns:
310 A boolean result.
311 """
Tao Bao508b0872018-02-09 13:44:43 -0800312 if self.disable_imgdiff or not self.FileTypeSupportedByImgdiff(name):
Tao Bao294651d2018-02-08 23:21:52 -0800313 return False
314
315 if not tgt_ranges.monotonic or not src_ranges.monotonic:
316 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_NONMONOTONIC)
317 return False
318
Tao Baoe709b092018-02-07 12:40:00 -0800319 if (tgt_ranges.extra.get('uses_shared_blocks') or
320 src_ranges.extra.get('uses_shared_blocks')):
321 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_SHARED_BLOCKS)
322 return False
323
Tao Bao4ccea852018-02-06 15:16:41 -0800324 if tgt_ranges.extra.get('incomplete') or src_ranges.extra.get('incomplete'):
325 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_INCOMPLETE)
326 return False
327
Tao Bao294651d2018-02-08 23:21:52 -0800328 reason = (ImgdiffStats.USED_IMGDIFF_LARGE_APK if large_apk
329 else ImgdiffStats.USED_IMGDIFF)
330 self.imgdiff_stats.Log(name, reason)
331 return True
Tao Baocb73aed2018-01-31 17:32:40 -0800332
Doug Zongkerfc44a512014-08-26 13:10:25 -0700333 def Compute(self, prefix):
334 # When looking for a source file to use as the diff input for a
335 # target file, we try:
336 # 1) an exact path match if available, otherwise
337 # 2) a exact basename match if available, otherwise
338 # 3) a basename match after all runs of digits are replaced by
339 # "#" if available, otherwise
340 # 4) we have no source for this target.
341 self.AbbreviateSourceNames()
342 self.FindTransfers()
343
xunchang21531832018-12-06 14:20:05 -0800344 self.FindSequenceForTransfers()
Doug Zongker62338182014-09-08 08:29:55 -0700345
Tao Bao82c47982015-08-17 09:45:13 -0700346 # Ensure the runtime stash size is under the limit.
Tao Bao8fad03e2017-03-01 14:36:26 -0800347 if common.OPTIONS.cache_size is not None:
xunchang3df4d5e2018-12-06 15:03:45 -0800348 stash_limit = (common.OPTIONS.cache_size *
349 common.OPTIONS.stash_threshold / self.tgt.blocksize)
350 # Ignore the stash limit and calculate the maximum simultaneously stashed
351 # blocks needed.
352 _, max_stashed_blocks = self.ReviseStashSize(ignore_stash_limit=True)
353
354 # We cannot stash more blocks than the stash limit simultaneously. As a
355 # result, some 'diff' commands will be converted to new; leading to an
356 # unintended large package. To mitigate this issue, we can carefully
357 # choose the transfers for conversion. The number '1024' can be further
358 # tweaked here to balance the package size and build time.
359 if max_stashed_blocks > stash_limit + 1024:
xunchangb6105dc2018-12-06 16:39:46 -0800360 self.SelectAndConvertDiffTransfersToNew(
361 max_stashed_blocks - stash_limit)
xunchang3df4d5e2018-12-06 15:03:45 -0800362 # Regenerate the sequence as the graph has changed.
363 self.FindSequenceForTransfers()
364
365 # Revise the stash size again to keep the size under limit.
Tao Bao82c47982015-08-17 09:45:13 -0700366 self.ReviseStashSize()
367
Doug Zongkerfc44a512014-08-26 13:10:25 -0700368 # Double-check our work.
369 self.AssertSequenceGood()
Tianjie Xu8a7ed9f2018-01-23 14:06:11 -0800370 self.AssertSha1Good()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700371
372 self.ComputePatches(prefix)
373 self.WriteTransfers(prefix)
374
Tao Bao294651d2018-02-08 23:21:52 -0800375 # Report the imgdiff stats.
Tao Bao32fcdab2018-10-12 10:30:39 -0700376 if not self.disable_imgdiff:
Tao Bao294651d2018-02-08 23:21:52 -0800377 self.imgdiff_stats.Report()
378
Doug Zongkerfc44a512014-08-26 13:10:25 -0700379 def WriteTransfers(self, prefix):
Tianjie Xu37e29302016-06-23 16:10:35 -0700380 def WriteSplitTransfers(out, style, target_blocks):
381 """Limit the size of operand in command 'new' and 'zero' to 1024 blocks.
Tianjie Xubb848c52016-06-21 15:54:09 -0700382
383 This prevents the target size of one command from being too large; and
384 might help to avoid fsync errors on some devices."""
385
Tao Bao3a2e3502016-12-28 09:14:39 -0800386 assert style == "new" or style == "zero"
Tianjie Xu37e29302016-06-23 16:10:35 -0700387 blocks_limit = 1024
Tianjie Xubb848c52016-06-21 15:54:09 -0700388 total = 0
Tianjie Xu37e29302016-06-23 16:10:35 -0700389 while target_blocks:
390 blocks_to_write = target_blocks.first(blocks_limit)
391 out.append("%s %s\n" % (style, blocks_to_write.to_string_raw()))
392 total += blocks_to_write.size()
393 target_blocks = target_blocks.subtract(blocks_to_write)
Tianjie Xubb848c52016-06-21 15:54:09 -0700394 return total
395
Doug Zongkerfc44a512014-08-26 13:10:25 -0700396 out = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700397 total = 0
Doug Zongkerfc44a512014-08-26 13:10:25 -0700398
Tao Bao3a2e3502016-12-28 09:14:39 -0800399 # In BBOTA v3+, it uses the hash of the stashed blocks as the stash slot
400 # id. 'stashes' records the map from 'hash' to the ref count. The stash
401 # will be freed only if the count decrements to zero.
Doug Zongker62338182014-09-08 08:29:55 -0700402 stashes = {}
403 stashed_blocks = 0
404 max_stashed_blocks = 0
405
Doug Zongkerfc44a512014-08-26 13:10:25 -0700406 for xf in self.transfers:
407
Tao Bao8fad03e2017-03-01 14:36:26 -0800408 for _, sr in xf.stash_before:
409 sh = self.src.RangeSha1(sr)
410 if sh in stashes:
411 stashes[sh] += 1
Sami Tolvanendd67a292014-12-09 16:40:34 +0000412 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800413 stashes[sh] = 1
414 stashed_blocks += sr.size()
415 self.touched_src_ranges = self.touched_src_ranges.union(sr)
416 out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
Doug Zongker62338182014-09-08 08:29:55 -0700417
418 if stashed_blocks > max_stashed_blocks:
419 max_stashed_blocks = stashed_blocks
420
Jesse Zhao7b985f62015-03-02 16:53:08 -0800421 free_string = []
caozhiyuan21b37d82015-10-21 15:14:03 +0800422 free_size = 0
Jesse Zhao7b985f62015-03-02 16:53:08 -0800423
Tao Bao8fad03e2017-03-01 14:36:26 -0800424 # <# blocks> <src ranges>
425 # OR
426 # <# blocks> <src ranges> <src locs> <stash refs...>
427 # OR
428 # <# blocks> - <stash refs...>
Doug Zongker62338182014-09-08 08:29:55 -0700429
Tao Bao8fad03e2017-03-01 14:36:26 -0800430 size = xf.src_ranges.size()
Tao Bao508b0872018-02-09 13:44:43 -0800431 src_str_buffer = [str(size)]
Doug Zongker62338182014-09-08 08:29:55 -0700432
Tao Bao8fad03e2017-03-01 14:36:26 -0800433 unstashed_src_ranges = xf.src_ranges
434 mapped_stashes = []
435 for _, sr in xf.use_stash:
436 unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
437 sh = self.src.RangeSha1(sr)
438 sr = xf.src_ranges.map_within(sr)
439 mapped_stashes.append(sr)
440 assert sh in stashes
Tao Bao508b0872018-02-09 13:44:43 -0800441 src_str_buffer.append("%s:%s" % (sh, sr.to_string_raw()))
Tao Bao8fad03e2017-03-01 14:36:26 -0800442 stashes[sh] -= 1
443 if stashes[sh] == 0:
444 free_string.append("free %s\n" % (sh,))
445 free_size += sr.size()
446 stashes.pop(sh)
Doug Zongker62338182014-09-08 08:29:55 -0700447
Tao Bao8fad03e2017-03-01 14:36:26 -0800448 if unstashed_src_ranges:
Tao Bao508b0872018-02-09 13:44:43 -0800449 src_str_buffer.insert(1, unstashed_src_ranges.to_string_raw())
Tao Bao8fad03e2017-03-01 14:36:26 -0800450 if xf.use_stash:
451 mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
Tao Bao508b0872018-02-09 13:44:43 -0800452 src_str_buffer.insert(2, mapped_unstashed.to_string_raw())
Tao Bao8fad03e2017-03-01 14:36:26 -0800453 mapped_stashes.append(mapped_unstashed)
Doug Zongker62338182014-09-08 08:29:55 -0700454 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Tao Bao8fad03e2017-03-01 14:36:26 -0800455 else:
Tao Bao508b0872018-02-09 13:44:43 -0800456 src_str_buffer.insert(1, "-")
Tao Bao8fad03e2017-03-01 14:36:26 -0800457 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Doug Zongker62338182014-09-08 08:29:55 -0700458
Tao Bao508b0872018-02-09 13:44:43 -0800459 src_str = " ".join(src_str_buffer)
Doug Zongker62338182014-09-08 08:29:55 -0700460
Tao Bao8fad03e2017-03-01 14:36:26 -0800461 # version 3+:
Doug Zongker62338182014-09-08 08:29:55 -0700462 # zero <rangeset>
463 # new <rangeset>
464 # erase <rangeset>
Dan Albert8b72aef2015-03-23 19:13:21 -0700465 # bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
466 # imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
467 # move hash <tgt rangeset> <src_str>
Doug Zongkerfc44a512014-08-26 13:10:25 -0700468
469 tgt_size = xf.tgt_ranges.size()
470
471 if xf.style == "new":
472 assert xf.tgt_ranges
Tianjie Xu37e29302016-06-23 16:10:35 -0700473 assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700474 total += tgt_size
475 elif xf.style == "move":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700476 assert xf.tgt_ranges
477 assert xf.src_ranges.size() == tgt_size
478 if xf.src_ranges != xf.tgt_ranges:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100479 # take into account automatic stashing of overlapping blocks
480 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700481 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100482 if temp_stash_usage > max_stashed_blocks:
483 max_stashed_blocks = temp_stash_usage
484
Tao Baod522bdc2016-04-12 15:53:16 -0700485 self.touched_src_ranges = self.touched_src_ranges.union(
486 xf.src_ranges)
487
Tao Bao8fad03e2017-03-01 14:36:26 -0800488 out.append("%s %s %s %s\n" % (
Sami Tolvanendd67a292014-12-09 16:40:34 +0000489 xf.style,
Tao Bao183e56e2017-03-05 17:05:09 -0800490 xf.tgt_sha1,
Dan Albert8b72aef2015-03-23 19:13:21 -0700491 xf.tgt_ranges.to_string_raw(), src_str))
Tao Bao8fad03e2017-03-01 14:36:26 -0800492 total += tgt_size
493 elif xf.style in ("bsdiff", "imgdiff"):
494 assert xf.tgt_ranges
495 assert xf.src_ranges
496 # take into account automatic stashing of overlapping blocks
497 if xf.src_ranges.overlaps(xf.tgt_ranges):
498 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
499 if temp_stash_usage > max_stashed_blocks:
500 max_stashed_blocks = temp_stash_usage
501
502 self.touched_src_ranges = self.touched_src_ranges.union(xf.src_ranges)
503
504 out.append("%s %d %d %s %s %s %s\n" % (
505 xf.style,
506 xf.patch_start, xf.patch_len,
507 xf.src_sha1,
508 xf.tgt_sha1,
509 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700510 total += tgt_size
511 elif xf.style == "zero":
512 assert xf.tgt_ranges
513 to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
Tianjie Xu37e29302016-06-23 16:10:35 -0700514 assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size()
Tianjie Xubb848c52016-06-21 15:54:09 -0700515 total += to_zero.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700516 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700517 raise ValueError("unknown transfer style '%s'\n" % xf.style)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700518
Sami Tolvanendd67a292014-12-09 16:40:34 +0000519 if free_string:
520 out.append("".join(free_string))
caozhiyuan21b37d82015-10-21 15:14:03 +0800521 stashed_blocks -= free_size
Sami Tolvanendd67a292014-12-09 16:40:34 +0000522
Tao Bao8fad03e2017-03-01 14:36:26 -0800523 if common.OPTIONS.cache_size is not None:
Ivan Lozanob021b2a2020-07-28 09:31:06 -0400524 # Validation check: abort if we're going to need more stash space than
Tao Bao8dcf7382015-05-21 14:09:49 -0700525 # the allowed size (cache_size * threshold). There are two purposes
526 # of having a threshold here. a) Part of the cache may have been
527 # occupied by some recovery logs. b) It will buy us some time to deal
528 # with the oversize issue.
529 cache_size = common.OPTIONS.cache_size
530 stash_threshold = common.OPTIONS.stash_threshold
531 max_allowed = cache_size * stash_threshold
Tao Baoe8c68a02017-02-26 10:48:11 -0800532 assert max_stashed_blocks * self.tgt.blocksize <= max_allowed, \
Tao Bao8dcf7382015-05-21 14:09:49 -0700533 'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
534 max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
535 self.tgt.blocksize, max_allowed, cache_size,
536 stash_threshold)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700537
Tao Bao8fad03e2017-03-01 14:36:26 -0800538 self.touched_src_sha1 = self.src.RangeSha1(self.touched_src_ranges)
Tao Baod522bdc2016-04-12 15:53:16 -0700539
Tao Baoe9b61912015-07-09 17:37:49 -0700540 # Zero out extended blocks as a workaround for bug 20881595.
541 if self.tgt.extended:
Tianjie Xu37e29302016-06-23 16:10:35 -0700542 assert (WriteSplitTransfers(out, "zero", self.tgt.extended) ==
Tianjie Xubb848c52016-06-21 15:54:09 -0700543 self.tgt.extended.size())
Tao Baob32d56e2015-09-09 11:55:01 -0700544 total += self.tgt.extended.size()
Tao Baoe9b61912015-07-09 17:37:49 -0700545
546 # We erase all the blocks on the partition that a) don't contain useful
Tao Bao66f1fa62016-05-03 10:02:01 -0700547 # data in the new image; b) will not be touched by dm-verity. Out of those
548 # blocks, we erase the ones that won't be used in this update at the
549 # beginning of an update. The rest would be erased at the end. This is to
550 # work around the eMMC issue observed on some devices, which may otherwise
551 # get starving for clean blocks and thus fail the update. (b/28347095)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700552 all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
Tao Baoe9b61912015-07-09 17:37:49 -0700553 all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
554 new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
Tao Bao66f1fa62016-05-03 10:02:01 -0700555
556 erase_first = new_dontcare.subtract(self.touched_src_ranges)
557 if erase_first:
558 out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))
559
560 erase_last = new_dontcare.subtract(erase_first)
561 if erase_last:
562 out.append("erase %s\n" % (erase_last.to_string_raw(),))
Doug Zongkere985f6f2014-09-09 12:38:47 -0700563
564 out.insert(0, "%d\n" % (self.version,)) # format version number
Tao Baob32d56e2015-09-09 11:55:01 -0700565 out.insert(1, "%d\n" % (total,))
Tao Bao8fad03e2017-03-01 14:36:26 -0800566 # v3+: the number of stash slots is unused.
567 out.insert(2, "0\n")
568 out.insert(3, str(max_stashed_blocks) + "\n")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700569
Tao Baob8131202019-06-19 14:15:34 -0700570 with open(prefix + ".transfer.list", "w") as f:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700571 for i in out:
572 f.write(i)
573
Tao Bao8fad03e2017-03-01 14:36:26 -0800574 self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize
575 OPTIONS = common.OPTIONS
576 if OPTIONS.cache_size is not None:
577 max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
Tao Bao32fcdab2018-10-12 10:30:39 -0700578 logger.info(
579 "max stashed blocks: %d (%d bytes), limit: %d bytes (%.2f%%)\n",
580 max_stashed_blocks, self._max_stashed_size, max_allowed,
581 self._max_stashed_size * 100.0 / max_allowed)
Tao Bao8fad03e2017-03-01 14:36:26 -0800582 else:
Tao Bao32fcdab2018-10-12 10:30:39 -0700583 logger.info(
584 "max stashed blocks: %d (%d bytes), limit: <unknown>\n",
585 max_stashed_blocks, self._max_stashed_size)
Doug Zongker62338182014-09-08 08:29:55 -0700586
xunchang3df4d5e2018-12-06 15:03:45 -0800587 def ReviseStashSize(self, ignore_stash_limit=False):
588 """ Revises the transfers to keep the stash size within the size limit.
589
590 Iterates through the transfer list and calculates the stash size each
591 transfer generates. Converts the affected transfers to new if we reach the
592 stash limit.
593
594 Args:
595 ignore_stash_limit: Ignores the stash limit and calculates the max
596 simultaneous stashed blocks instead. No change will be made to the
597 transfer list with this flag.
598
599 Return:
600 A tuple of (tgt blocks converted to new, max stashed blocks)
601 """
Tao Bao32fcdab2018-10-12 10:30:39 -0700602 logger.info("Revising stash size...")
Tao Baoe27acfd2016-12-16 11:13:55 -0800603 stash_map = {}
Tao Bao82c47982015-08-17 09:45:13 -0700604
605 # Create the map between a stash and its def/use points. For example, for a
Tao Bao3c5a16d2017-02-13 11:42:50 -0800606 # given stash of (raw_id, sr), stash_map[raw_id] = (sr, def_cmd, use_cmd).
Tao Bao82c47982015-08-17 09:45:13 -0700607 for xf in self.transfers:
608 # Command xf defines (stores) all the stashes in stash_before.
Tao Bao3a2e3502016-12-28 09:14:39 -0800609 for stash_raw_id, sr in xf.stash_before:
610 stash_map[stash_raw_id] = (sr, xf)
Tao Bao82c47982015-08-17 09:45:13 -0700611
612 # Record all the stashes command xf uses.
Tao Bao3a2e3502016-12-28 09:14:39 -0800613 for stash_raw_id, _ in xf.use_stash:
614 stash_map[stash_raw_id] += (xf,)
Tao Bao82c47982015-08-17 09:45:13 -0700615
xunchang3df4d5e2018-12-06 15:03:45 -0800616 max_allowed_blocks = None
617 if not ignore_stash_limit:
618 # Compute the maximum blocks available for stash based on /cache size and
619 # the threshold.
620 cache_size = common.OPTIONS.cache_size
621 stash_threshold = common.OPTIONS.stash_threshold
622 max_allowed_blocks = cache_size * stash_threshold / self.tgt.blocksize
Tao Bao82c47982015-08-17 09:45:13 -0700623
Tao Bao3a2e3502016-12-28 09:14:39 -0800624 # See the comments for 'stashes' in WriteTransfers().
Tao Baoe27acfd2016-12-16 11:13:55 -0800625 stashes = {}
Tao Bao82c47982015-08-17 09:45:13 -0700626 stashed_blocks = 0
Tao Bao9a5caf22015-08-25 15:10:10 -0700627 new_blocks = 0
xunchang3df4d5e2018-12-06 15:03:45 -0800628 max_stashed_blocks = 0
Tao Bao82c47982015-08-17 09:45:13 -0700629
630 # Now go through all the commands. Compute the required stash size on the
631 # fly. If a command requires excess stash than available, it deletes the
632 # stash by replacing the command that uses the stash with a "new" command
633 # instead.
634 for xf in self.transfers:
635 replaced_cmds = []
636
637 # xf.stash_before generates explicit stash commands.
Tao Bao3a2e3502016-12-28 09:14:39 -0800638 for stash_raw_id, sr in xf.stash_before:
Tao Baoe27acfd2016-12-16 11:13:55 -0800639 # Check the post-command stashed_blocks.
640 stashed_blocks_after = stashed_blocks
Tao Bao8fad03e2017-03-01 14:36:26 -0800641 sh = self.src.RangeSha1(sr)
642 if sh not in stashes:
Tao Baoe27acfd2016-12-16 11:13:55 -0800643 stashed_blocks_after += sr.size()
Tao Baoe27acfd2016-12-16 11:13:55 -0800644
xunchang3df4d5e2018-12-06 15:03:45 -0800645 if max_allowed_blocks and stashed_blocks_after > max_allowed_blocks:
Tao Bao82c47982015-08-17 09:45:13 -0700646 # We cannot stash this one for a later command. Find out the command
647 # that will use this stash and replace the command with "new".
Tao Bao3a2e3502016-12-28 09:14:39 -0800648 use_cmd = stash_map[stash_raw_id][2]
Tao Bao82c47982015-08-17 09:45:13 -0700649 replaced_cmds.append(use_cmd)
Tao Bao32fcdab2018-10-12 10:30:39 -0700650 logger.info("%10d %9s %s", sr.size(), "explicit", use_cmd)
Tao Bao82c47982015-08-17 09:45:13 -0700651 else:
Tao Bao3c5a16d2017-02-13 11:42:50 -0800652 # Update the stashes map.
Tao Bao8fad03e2017-03-01 14:36:26 -0800653 if sh in stashes:
654 stashes[sh] += 1
Tao Bao3c5a16d2017-02-13 11:42:50 -0800655 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800656 stashes[sh] = 1
Tao Baoe27acfd2016-12-16 11:13:55 -0800657 stashed_blocks = stashed_blocks_after
xunchang3df4d5e2018-12-06 15:03:45 -0800658 max_stashed_blocks = max(max_stashed_blocks, stashed_blocks)
Tao Bao82c47982015-08-17 09:45:13 -0700659
660 # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
661 # ComputePatches(), they both have the style of "diff".
Tao Bao8fad03e2017-03-01 14:36:26 -0800662 if xf.style == "diff":
Tao Bao82c47982015-08-17 09:45:13 -0700663 assert xf.tgt_ranges and xf.src_ranges
664 if xf.src_ranges.overlaps(xf.tgt_ranges):
xunchang3df4d5e2018-12-06 15:03:45 -0800665 if (max_allowed_blocks and
666 stashed_blocks + xf.src_ranges.size() > max_allowed_blocks):
Tao Bao82c47982015-08-17 09:45:13 -0700667 replaced_cmds.append(xf)
Tao Bao32fcdab2018-10-12 10:30:39 -0700668 logger.info("%10d %9s %s", xf.src_ranges.size(), "implicit", xf)
xunchang3df4d5e2018-12-06 15:03:45 -0800669 else:
670 # The whole source ranges will be stashed for implicit stashes.
671 max_stashed_blocks = max(max_stashed_blocks,
672 stashed_blocks + xf.src_ranges.size())
Tao Bao82c47982015-08-17 09:45:13 -0700673
674 # Replace the commands in replaced_cmds with "new"s.
675 for cmd in replaced_cmds:
676 # It no longer uses any commands in "use_stash". Remove the def points
677 # for all those stashes.
Tao Bao3a2e3502016-12-28 09:14:39 -0800678 for stash_raw_id, sr in cmd.use_stash:
679 def_cmd = stash_map[stash_raw_id][1]
680 assert (stash_raw_id, sr) in def_cmd.stash_before
681 def_cmd.stash_before.remove((stash_raw_id, sr))
Tao Bao82c47982015-08-17 09:45:13 -0700682
Tianjie Xuebe39a02016-01-14 14:12:26 -0800683 # Add up blocks that violates space limit and print total number to
684 # screen later.
685 new_blocks += cmd.tgt_ranges.size()
Tao Bao82c47982015-08-17 09:45:13 -0700686 cmd.ConvertToNew()
687
Tao Bao3a2e3502016-12-28 09:14:39 -0800688 # xf.use_stash may generate free commands.
Tao Bao8fad03e2017-03-01 14:36:26 -0800689 for _, sr in xf.use_stash:
690 sh = self.src.RangeSha1(sr)
691 assert sh in stashes
692 stashes[sh] -= 1
693 if stashes[sh] == 0:
Tao Baoe27acfd2016-12-16 11:13:55 -0800694 stashed_blocks -= sr.size()
Tao Bao8fad03e2017-03-01 14:36:26 -0800695 stashes.pop(sh)
Tao Baoe27acfd2016-12-16 11:13:55 -0800696
Tianjie Xuebe39a02016-01-14 14:12:26 -0800697 num_of_bytes = new_blocks * self.tgt.blocksize
Tao Bao32fcdab2018-10-12 10:30:39 -0700698 logger.info(
699 " Total %d blocks (%d bytes) are packed as new blocks due to "
xunchangb6105dc2018-12-06 16:39:46 -0800700 "insufficient cache size. Maximum blocks stashed simultaneously: %d",
701 new_blocks, num_of_bytes, max_stashed_blocks)
xunchang3df4d5e2018-12-06 15:03:45 -0800702 return new_blocks, max_stashed_blocks
Tao Bao9a5caf22015-08-25 15:10:10 -0700703
Doug Zongkerfc44a512014-08-26 13:10:25 -0700704 def ComputePatches(self, prefix):
Tao Bao32fcdab2018-10-12 10:30:39 -0700705 logger.info("Reticulating splines...")
Tao Bao183e56e2017-03-05 17:05:09 -0800706 diff_queue = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700707 patch_num = 0
708 with open(prefix + ".new.dat", "wb") as new_f:
Tao Bao183e56e2017-03-05 17:05:09 -0800709 for index, xf in enumerate(self.transfers):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700710 if xf.style == "zero":
Tao Bao08c85832016-09-19 22:26:30 -0700711 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao32fcdab2018-10-12 10:30:39 -0700712 logger.info(
713 "%10d %10d (%6.2f%%) %7s %s %s", tgt_size, tgt_size, 100.0,
714 xf.style, xf.tgt_name, str(xf.tgt_ranges))
Tao Bao08c85832016-09-19 22:26:30 -0700715
Doug Zongkerfc44a512014-08-26 13:10:25 -0700716 elif xf.style == "new":
Tao Bao183e56e2017-03-05 17:05:09 -0800717 self.tgt.WriteRangeDataToFd(xf.tgt_ranges, new_f)
Tao Bao08c85832016-09-19 22:26:30 -0700718 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao32fcdab2018-10-12 10:30:39 -0700719 logger.info(
720 "%10d %10d (%6.2f%%) %7s %s %s", tgt_size, tgt_size, 100.0,
721 xf.style, xf.tgt_name, str(xf.tgt_ranges))
Tao Bao08c85832016-09-19 22:26:30 -0700722
Doug Zongkerfc44a512014-08-26 13:10:25 -0700723 elif xf.style == "diff":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700724 # We can't compare src and tgt directly because they may have
725 # the same content but be broken up into blocks differently, eg:
726 #
727 # ["he", "llo"] vs ["h", "ello"]
728 #
729 # We want those to compare equal, ideally without having to
730 # actually concatenate the strings (these may be tens of
731 # megabytes).
Tao Bao183e56e2017-03-05 17:05:09 -0800732 if xf.src_sha1 == xf.tgt_sha1:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700733 # These are identical; we don't need to generate a patch,
734 # just issue copy commands on the device.
735 xf.style = "move"
xunchang21531832018-12-06 14:20:05 -0800736 xf.patch_info = None
Tao Bao183e56e2017-03-05 17:05:09 -0800737 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao08c85832016-09-19 22:26:30 -0700738 if xf.src_ranges != xf.tgt_ranges:
Tao Bao32fcdab2018-10-12 10:30:39 -0700739 logger.info(
740 "%10d %10d (%6.2f%%) %7s %s %s (from %s)", tgt_size, tgt_size,
741 100.0, xf.style,
Tao Bao08c85832016-09-19 22:26:30 -0700742 xf.tgt_name if xf.tgt_name == xf.src_name else (
743 xf.tgt_name + " (from " + xf.src_name + ")"),
Tao Bao32fcdab2018-10-12 10:30:39 -0700744 str(xf.tgt_ranges), str(xf.src_ranges))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700745 else:
xunchang21531832018-12-06 14:20:05 -0800746 if xf.patch_info:
747 # We have already generated the patch (e.g. during split of large
748 # APKs or reduction of stash size)
749 imgdiff = xf.patch_info.imgdiff
Tianjie Xu25366072017-09-08 17:19:02 -0700750 else:
Tao Baocb73aed2018-01-31 17:32:40 -0800751 imgdiff = self.CanUseImgdiff(
752 xf.tgt_name, xf.tgt_ranges, xf.src_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700753 xf.style = "imgdiff" if imgdiff else "bsdiff"
Tao Bao183e56e2017-03-05 17:05:09 -0800754 diff_queue.append((index, imgdiff, patch_num))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700755 patch_num += 1
756
757 else:
758 assert False, "unknown style " + xf.style
759
xunchang21531832018-12-06 14:20:05 -0800760 patches = self.ComputePatchesForInputList(diff_queue, False)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700761
Tao Bao183e56e2017-03-05 17:05:09 -0800762 offset = 0
763 with open(prefix + ".patch.dat", "wb") as patch_fd:
xunchang21531832018-12-06 14:20:05 -0800764 for index, patch_info, _ in patches:
Tao Bao183e56e2017-03-05 17:05:09 -0800765 xf = self.transfers[index]
xunchang21531832018-12-06 14:20:05 -0800766 xf.patch_len = len(patch_info.content)
Tao Bao183e56e2017-03-05 17:05:09 -0800767 xf.patch_start = offset
768 offset += xf.patch_len
xunchang21531832018-12-06 14:20:05 -0800769 patch_fd.write(patch_info.content)
Tao Bao183e56e2017-03-05 17:05:09 -0800770
Tao Bao32fcdab2018-10-12 10:30:39 -0700771 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
772 logger.info(
773 "%10d %10d (%6.2f%%) %7s %s %s %s", xf.patch_len, tgt_size,
774 xf.patch_len * 100.0 / tgt_size, xf.style,
775 xf.tgt_name if xf.tgt_name == xf.src_name else (
776 xf.tgt_name + " (from " + xf.src_name + ")"),
777 xf.tgt_ranges, xf.src_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700778
Tianjie Xu8a7ed9f2018-01-23 14:06:11 -0800779 def AssertSha1Good(self):
780 """Check the SHA-1 of the src & tgt blocks in the transfer list.
781
782 Double check the SHA-1 value to avoid the issue in b/71908713, where
783 SparseImage.RangeSha1() messed up with the hash calculation in multi-thread
784 environment. That specific problem has been fixed by protecting the
785 underlying generator function 'SparseImage._GetRangeData()' with lock.
786 """
787 for xf in self.transfers:
788 tgt_sha1 = self.tgt.RangeSha1(xf.tgt_ranges)
789 assert xf.tgt_sha1 == tgt_sha1
790 if xf.style == "diff":
791 src_sha1 = self.src.RangeSha1(xf.src_ranges)
792 assert xf.src_sha1 == src_sha1
793
Doug Zongkerfc44a512014-08-26 13:10:25 -0700794 def AssertSequenceGood(self):
795 # Simulate the sequences of transfers we will output, and check that:
796 # - we never read a block after writing it, and
797 # - we write every block we care about exactly once.
798
799 # Start with no blocks having been touched yet.
Tao Baob8131202019-06-19 14:15:34 -0700800 touched = array.array("B", b"\0" * self.tgt.total_blocks)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700801
802 # Imagine processing the transfers in order.
803 for xf in self.transfers:
804 # Check that the input blocks for this transfer haven't yet been touched.
Doug Zongker62338182014-09-08 08:29:55 -0700805
806 x = xf.src_ranges
Tao Bao8fad03e2017-03-01 14:36:26 -0800807 for _, sr in xf.use_stash:
808 x = x.subtract(sr)
Doug Zongker62338182014-09-08 08:29:55 -0700809
Doug Zongker6ab2a502016-02-09 08:28:09 -0800810 for s, e in x:
Tao Baoff75c232016-03-04 15:23:34 -0800811 # Source image could be larger. Don't check the blocks that are in the
812 # source image only. Since they are not in 'touched', and won't ever
813 # be touched.
814 for i in range(s, min(e, self.tgt.total_blocks)):
Doug Zongker6ab2a502016-02-09 08:28:09 -0800815 assert touched[i] == 0
816
817 # Check that the output blocks for this transfer haven't yet
818 # been touched, and touch all the blocks written by this
819 # transfer.
820 for s, e in xf.tgt_ranges:
821 for i in range(s, e):
822 assert touched[i] == 0
823 touched[i] = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700824
825 # Check that we've written every target block.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800826 for s, e in self.tgt.care_map:
827 for i in range(s, e):
828 assert touched[i] == 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700829
xunchang21531832018-12-06 14:20:05 -0800830 def FindSequenceForTransfers(self):
831 """Finds a sequence for the given transfers.
832
833 The goal is to minimize the violation of order dependencies between these
834 transfers, so that fewer blocks are stashed when applying the update.
835 """
836
837 # Clear the existing dependency between transfers
838 for xf in self.transfers:
839 xf.goes_before = OrderedDict()
840 xf.goes_after = OrderedDict()
841
842 xf.stash_before = []
843 xf.use_stash = []
844
845 # Find the ordering dependencies among transfers (this is O(n^2)
846 # in the number of transfers).
847 self.GenerateDigraph()
848 # Find a sequence of transfers that satisfies as many ordering
849 # dependencies as possible (heuristically).
850 self.FindVertexSequence()
851 # Fix up the ordering dependencies that the sequence didn't
852 # satisfy.
853 self.ReverseBackwardEdges()
854 self.ImproveVertexSequence()
855
Doug Zongker62338182014-09-08 08:29:55 -0700856 def ImproveVertexSequence(self):
Tao Bao32fcdab2018-10-12 10:30:39 -0700857 logger.info("Improving vertex order...")
Doug Zongker62338182014-09-08 08:29:55 -0700858
859 # At this point our digraph is acyclic; we reversed any edges that
860 # were backwards in the heuristically-generated sequence. The
861 # previously-generated order is still acceptable, but we hope to
862 # find a better order that needs less memory for stashed data.
863 # Now we do a topological sort to generate a new vertex order,
864 # using a greedy algorithm to choose which vertex goes next
865 # whenever we have a choice.
866
867 # Make a copy of the edge set; this copy will get destroyed by the
868 # algorithm.
869 for xf in self.transfers:
870 xf.incoming = xf.goes_after.copy()
871 xf.outgoing = xf.goes_before.copy()
872
873 L = [] # the new vertex order
874
875 # S is the set of sources in the remaining graph; we always choose
876 # the one that leaves the least amount of stashed data after it's
877 # executed.
878 S = [(u.NetStashChange(), u.order, u) for u in self.transfers
879 if not u.incoming]
880 heapq.heapify(S)
881
882 while S:
883 _, _, xf = heapq.heappop(S)
884 L.append(xf)
885 for u in xf.outgoing:
886 del u.incoming[xf]
887 if not u.incoming:
888 heapq.heappush(S, (u.NetStashChange(), u.order, u))
889
890 # if this fails then our graph had a cycle.
891 assert len(L) == len(self.transfers)
892
893 self.transfers = L
894 for i, xf in enumerate(L):
895 xf.order = i
896
Doug Zongker62338182014-09-08 08:29:55 -0700897 def ReverseBackwardEdges(self):
Tao Bao3a2e3502016-12-28 09:14:39 -0800898 """Reverse unsatisfying edges and compute pairs of stashed blocks.
899
900 For each transfer, make sure it properly stashes the blocks it touches and
901 will be used by later transfers. It uses pairs of (stash_raw_id, range) to
902 record the blocks to be stashed. 'stash_raw_id' is an id that uniquely
903 identifies each pair. Note that for the same range (e.g. RangeSet("1-5")),
904 it is possible to have multiple pairs with different 'stash_raw_id's. Each
905 'stash_raw_id' will be consumed by one transfer. In BBOTA v3+, identical
906 blocks will be written to the same stash slot in WriteTransfers().
907 """
908
Tao Bao32fcdab2018-10-12 10:30:39 -0700909 logger.info("Reversing backward edges...")
Doug Zongker62338182014-09-08 08:29:55 -0700910 in_order = 0
911 out_of_order = 0
Tao Bao3a2e3502016-12-28 09:14:39 -0800912 stash_raw_id = 0
Doug Zongker62338182014-09-08 08:29:55 -0700913 stash_size = 0
914
915 for xf in self.transfers:
Doug Zongker62338182014-09-08 08:29:55 -0700916 for u in xf.goes_before.copy():
917 # xf should go before u
918 if xf.order < u.order:
919 # it does, hurray!
920 in_order += 1
921 else:
922 # it doesn't, boo. modify u to stash the blocks that it
923 # writes that xf wants to read, and then require u to go
924 # before xf.
925 out_of_order += 1
926
927 overlap = xf.src_ranges.intersect(u.tgt_ranges)
928 assert overlap
929
Tao Bao3a2e3502016-12-28 09:14:39 -0800930 u.stash_before.append((stash_raw_id, overlap))
931 xf.use_stash.append((stash_raw_id, overlap))
932 stash_raw_id += 1
Doug Zongker62338182014-09-08 08:29:55 -0700933 stash_size += overlap.size()
934
935 # reverse the edge direction; now xf must go after u
936 del xf.goes_before[u]
937 del u.goes_after[xf]
938 xf.goes_after[u] = None # value doesn't matter
939 u.goes_before[xf] = None
940
Tao Bao32fcdab2018-10-12 10:30:39 -0700941 logger.info(
942 " %d/%d dependencies (%.2f%%) were violated; %d source blocks "
943 "stashed.", out_of_order, in_order + out_of_order,
944 (out_of_order * 100.0 / (in_order + out_of_order)) if (
945 in_order + out_of_order) else 0.0,
946 stash_size)
Doug Zongker62338182014-09-08 08:29:55 -0700947
Doug Zongkerfc44a512014-08-26 13:10:25 -0700948 def FindVertexSequence(self):
Tao Bao32fcdab2018-10-12 10:30:39 -0700949 logger.info("Finding vertex sequence...")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700950
951 # This is based on "A Fast & Effective Heuristic for the Feedback
952 # Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
953 # it as starting with the digraph G and moving all the vertices to
954 # be on a horizontal line in some order, trying to minimize the
955 # number of edges that end up pointing to the left. Left-pointing
956 # edges will get removed to turn the digraph into a DAG. In this
957 # case each edge has a weight which is the number of source blocks
958 # we'll lose if that edge is removed; we try to minimize the total
959 # weight rather than just the number of edges.
960
961 # Make a copy of the edge set; this copy will get destroyed by the
962 # algorithm.
963 for xf in self.transfers:
964 xf.incoming = xf.goes_after.copy()
965 xf.outgoing = xf.goes_before.copy()
Doug Zongker6ab2a502016-02-09 08:28:09 -0800966 xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())
Doug Zongkerfc44a512014-08-26 13:10:25 -0700967
968 # We use an OrderedDict instead of just a set so that the output
969 # is repeatable; otherwise it would depend on the hash values of
970 # the transfer objects.
971 G = OrderedDict()
972 for xf in self.transfers:
973 G[xf] = None
974 s1 = deque() # the left side of the sequence, built from left to right
975 s2 = deque() # the right side of the sequence, built from right to left
976
Doug Zongker6ab2a502016-02-09 08:28:09 -0800977 heap = []
978 for xf in self.transfers:
979 xf.heap_item = HeapItem(xf)
980 heap.append(xf.heap_item)
981 heapq.heapify(heap)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700982
Tao Bao33482282016-10-24 16:49:08 -0700983 # Use OrderedDict() instead of set() to preserve the insertion order. Need
984 # to use 'sinks[key] = None' to add key into the set. sinks will look like
985 # { key1: None, key2: None, ... }.
986 sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing)
987 sources = OrderedDict.fromkeys(u for u in G if not u.incoming)
Doug Zongker6ab2a502016-02-09 08:28:09 -0800988
989 def adjust_score(iu, delta):
990 iu.score += delta
991 iu.heap_item.clear()
992 iu.heap_item = HeapItem(iu)
993 heapq.heappush(heap, iu.heap_item)
994
995 while G:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700996 # Put all sinks at the end of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800997 while sinks:
Tao Bao33482282016-10-24 16:49:08 -0700998 new_sinks = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700999 for u in sinks:
Tao Bao508b0872018-02-09 13:44:43 -08001000 if u not in G:
1001 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001002 s2.appendleft(u)
1003 del G[u]
1004 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001005 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001006 if not iu.outgoing:
1007 new_sinks[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001008 sinks = new_sinks
Doug Zongkerfc44a512014-08-26 13:10:25 -07001009
1010 # Put all the sources at the beginning of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001011 while sources:
Tao Bao33482282016-10-24 16:49:08 -07001012 new_sources = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001013 for u in sources:
Tao Bao508b0872018-02-09 13:44:43 -08001014 if u not in G:
1015 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001016 s1.append(u)
1017 del G[u]
1018 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001019 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001020 if not iu.incoming:
1021 new_sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001022 sources = new_sources
Doug Zongkerfc44a512014-08-26 13:10:25 -07001023
Tao Bao508b0872018-02-09 13:44:43 -08001024 if not G:
1025 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001026
1027 # Find the "best" vertex to put next. "Best" is the one that
1028 # maximizes the net difference in source blocks saved we get by
1029 # pretending it's a source rather than a sink.
1030
Doug Zongker6ab2a502016-02-09 08:28:09 -08001031 while True:
1032 u = heapq.heappop(heap)
1033 if u and u.item in G:
1034 u = u.item
1035 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001036
Doug Zongkerfc44a512014-08-26 13:10:25 -07001037 s1.append(u)
1038 del G[u]
1039 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001040 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001041 if not iu.incoming:
1042 sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001043
Doug Zongkerfc44a512014-08-26 13:10:25 -07001044 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001045 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001046 if not iu.outgoing:
1047 sinks[iu] = None
Doug Zongkerfc44a512014-08-26 13:10:25 -07001048
1049 # Now record the sequence in the 'order' field of each transfer,
1050 # and by rearranging self.transfers to be in the chosen sequence.
1051
1052 new_transfers = []
1053 for x in itertools.chain(s1, s2):
1054 x.order = len(new_transfers)
1055 new_transfers.append(x)
1056 del x.incoming
1057 del x.outgoing
1058
1059 self.transfers = new_transfers
1060
1061 def GenerateDigraph(self):
Tao Bao32fcdab2018-10-12 10:30:39 -07001062 logger.info("Generating digraph...")
Doug Zongker6ab2a502016-02-09 08:28:09 -08001063
1064 # Each item of source_ranges will be:
1065 # - None, if that block is not used as a source,
Tao Bao33482282016-10-24 16:49:08 -07001066 # - an ordered set of transfers.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001067 source_ranges = []
1068 for b in self.transfers:
1069 for s, e in b.src_ranges:
1070 if e > len(source_ranges):
1071 source_ranges.extend([None] * (e-len(source_ranges)))
1072 for i in range(s, e):
1073 if source_ranges[i] is None:
Tao Bao33482282016-10-24 16:49:08 -07001074 source_ranges[i] = OrderedDict.fromkeys([b])
Doug Zongker6ab2a502016-02-09 08:28:09 -08001075 else:
Tao Bao33482282016-10-24 16:49:08 -07001076 source_ranges[i][b] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001077
Doug Zongkerfc44a512014-08-26 13:10:25 -07001078 for a in self.transfers:
Tao Bao33482282016-10-24 16:49:08 -07001079 intersections = OrderedDict()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001080 for s, e in a.tgt_ranges:
1081 for i in range(s, e):
Tao Bao508b0872018-02-09 13:44:43 -08001082 if i >= len(source_ranges):
1083 break
Tao Bao33482282016-10-24 16:49:08 -07001084 # Add all the Transfers in source_ranges[i] to the (ordered) set.
1085 if source_ranges[i] is not None:
1086 for j in source_ranges[i]:
1087 intersections[j] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001088
1089 for b in intersections:
Tao Bao508b0872018-02-09 13:44:43 -08001090 if a is b:
1091 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001092
1093 # If the blocks written by A are read by B, then B needs to go before A.
1094 i = a.tgt_ranges.intersect(b.src_ranges)
1095 if i:
Doug Zongkerab7ca1d2014-08-26 10:40:28 -07001096 if b.src_name == "__ZERO":
1097 # the cost of removing source blocks for the __ZERO domain
1098 # is (nearly) zero.
1099 size = 0
1100 else:
1101 size = i.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001102 b.goes_before[a] = size
1103 a.goes_after[b] = size
1104
xunchang21531832018-12-06 14:20:05 -08001105 def ComputePatchesForInputList(self, diff_queue, compress_target):
1106 """Returns a list of patch information for the input list of transfers.
1107
1108 Args:
1109 diff_queue: a list of transfers with style 'diff'
1110 compress_target: If True, compresses the target ranges of each
1111 transfers; and save the size.
1112
1113 Returns:
1114 A list of (transfer order, patch_info, compressed_size) tuples.
1115 """
1116
1117 if not diff_queue:
1118 return []
1119
1120 if self.threads > 1:
1121 logger.info("Computing patches (using %d threads)...", self.threads)
1122 else:
1123 logger.info("Computing patches...")
1124
1125 diff_total = len(diff_queue)
1126 patches = [None] * diff_total
1127 error_messages = []
1128
1129 # Using multiprocessing doesn't give additional benefits, due to the
1130 # pattern of the code. The diffing work is done by subprocess.call, which
1131 # already runs in a separate process (not affected much by the GIL -
1132 # Global Interpreter Lock). Using multiprocess also requires either a)
1133 # writing the diff input files in the main process before forking, or b)
1134 # reopening the image file (SparseImage) in the worker processes. Doing
1135 # neither of them further improves the performance.
1136 lock = threading.Lock()
1137
1138 def diff_worker():
1139 while True:
1140 with lock:
1141 if not diff_queue:
1142 return
1143 xf_index, imgdiff, patch_index = diff_queue.pop()
1144 xf = self.transfers[xf_index]
1145
1146 message = []
1147 compressed_size = None
1148
1149 patch_info = xf.patch_info
1150 if not patch_info:
1151 src_file = common.MakeTempFile(prefix="src-")
1152 with open(src_file, "wb") as fd:
1153 self.src.WriteRangeDataToFd(xf.src_ranges, fd)
1154
1155 tgt_file = common.MakeTempFile(prefix="tgt-")
1156 with open(tgt_file, "wb") as fd:
1157 self.tgt.WriteRangeDataToFd(xf.tgt_ranges, fd)
1158
1159 try:
1160 patch_info = compute_patch(src_file, tgt_file, imgdiff)
1161 except ValueError as e:
1162 message.append(
1163 "Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (
1164 "imgdiff" if imgdiff else "bsdiff",
1165 xf.tgt_name if xf.tgt_name == xf.src_name else
1166 xf.tgt_name + " (from " + xf.src_name + ")",
1167 xf.tgt_ranges, xf.src_ranges, e.message))
1168
1169 if compress_target:
1170 tgt_data = self.tgt.ReadRangeSet(xf.tgt_ranges)
1171 try:
1172 # Compresses with the default level
1173 compress_obj = zlib.compressobj(6, zlib.DEFLATED, -zlib.MAX_WBITS)
luoqiangwei13e3456b2022-11-11 00:05:56 +08001174 compressed_data = (compress_obj.compress(b"".join(tgt_data))
xunchang21531832018-12-06 14:20:05 -08001175 + compress_obj.flush())
1176 compressed_size = len(compressed_data)
1177 except zlib.error as e:
1178 message.append(
1179 "Failed to compress the data in target range {} for {}:\n"
1180 "{}".format(xf.tgt_ranges, xf.tgt_name, e.message))
1181
1182 if message:
1183 with lock:
1184 error_messages.extend(message)
1185
1186 with lock:
1187 patches[patch_index] = (xf_index, patch_info, compressed_size)
1188
1189 threads = [threading.Thread(target=diff_worker)
1190 for _ in range(self.threads)]
1191 for th in threads:
1192 th.start()
1193 while threads:
1194 threads.pop().join()
1195
1196 if error_messages:
1197 logger.error('ERROR:')
1198 logger.error('\n'.join(error_messages))
1199 logger.error('\n\n\n')
1200 sys.exit(1)
1201
1202 return patches
1203
xunchangb6105dc2018-12-06 16:39:46 -08001204 def SelectAndConvertDiffTransfersToNew(self, violated_stash_blocks):
xunchang3df4d5e2018-12-06 15:03:45 -08001205 """Converts the diff transfers to reduce the max simultaneous stash.
1206
1207 Since the 'new' data is compressed with deflate, we can select the 'diff'
1208 transfers for conversion by comparing its patch size with the size of the
1209 compressed data. Ideally, we want to convert the transfers with a small
1210 size increase, but using a large number of stashed blocks.
1211 """
xunchangb6105dc2018-12-06 16:39:46 -08001212 TransferSizeScore = namedtuple("TransferSizeScore",
1213 "xf, used_stash_blocks, score")
xunchang3df4d5e2018-12-06 15:03:45 -08001214
1215 logger.info("Selecting diff commands to convert to new.")
1216 diff_queue = []
1217 for xf in self.transfers:
1218 if xf.style == "diff" and xf.src_sha1 != xf.tgt_sha1:
1219 use_imgdiff = self.CanUseImgdiff(xf.tgt_name, xf.tgt_ranges,
1220 xf.src_ranges)
1221 diff_queue.append((xf.order, use_imgdiff, len(diff_queue)))
1222
1223 # Remove the 'move' transfers, and compute the patch & compressed size
1224 # for the remaining.
1225 result = self.ComputePatchesForInputList(diff_queue, True)
1226
xunchangb6105dc2018-12-06 16:39:46 -08001227 conversion_candidates = []
xunchang3df4d5e2018-12-06 15:03:45 -08001228 for xf_index, patch_info, compressed_size in result:
1229 xf = self.transfers[xf_index]
1230 if not xf.patch_info:
1231 xf.patch_info = patch_info
1232
1233 size_ratio = len(xf.patch_info.content) * 100.0 / compressed_size
1234 diff_style = "imgdiff" if xf.patch_info.imgdiff else "bsdiff"
xunchangb6105dc2018-12-06 16:39:46 -08001235 logger.info("%s, target size: %d blocks, style: %s, patch size: %d,"
xunchang3df4d5e2018-12-06 15:03:45 -08001236 " compression_size: %d, ratio %.2f%%", xf.tgt_name,
1237 xf.tgt_ranges.size(), diff_style,
1238 len(xf.patch_info.content), compressed_size, size_ratio)
1239
xunchangb6105dc2018-12-06 16:39:46 -08001240 used_stash_blocks = sum(sr.size() for _, sr in xf.use_stash)
xunchang3df4d5e2018-12-06 15:03:45 -08001241 # Convert the transfer to new if the compressed size is smaller or equal.
1242 # We don't need to maintain the stash_before lists here because the
1243 # graph will be regenerated later.
1244 if len(xf.patch_info.content) >= compressed_size:
xunchangb6105dc2018-12-06 16:39:46 -08001245 # Add the transfer to the candidate list with negative score. And it
1246 # will be converted later.
1247 conversion_candidates.append(TransferSizeScore(xf, used_stash_blocks,
1248 -1))
1249 elif used_stash_blocks > 0:
1250 # This heuristic represents the size increase in the final package to
1251 # remove per unit of stashed data.
1252 score = ((compressed_size - len(xf.patch_info.content)) * 100.0
1253 / used_stash_blocks)
1254 conversion_candidates.append(TransferSizeScore(xf, used_stash_blocks,
1255 score))
1256 # Transfers with lower score (i.e. less expensive to convert) will be
1257 # converted first.
1258 conversion_candidates.sort(key=lambda x: x.score)
xunchang3df4d5e2018-12-06 15:03:45 -08001259
xunchangb6105dc2018-12-06 16:39:46 -08001260 # TODO(xunchang), improve the logic to find the transfers to convert, e.g.
1261 # convert the ones that contribute to the max stash, run ReviseStashSize
1262 # multiple times etc.
1263 removed_stashed_blocks = 0
1264 for xf, used_stash_blocks, _ in conversion_candidates:
1265 logger.info("Converting %s to new", xf.tgt_name)
1266 xf.ConvertToNew()
1267 removed_stashed_blocks += used_stash_blocks
1268 # Experiments show that we will get a smaller package size if we remove
1269 # slightly more stashed blocks than the violated stash blocks.
1270 if removed_stashed_blocks >= violated_stash_blocks:
1271 break
xunchang3df4d5e2018-12-06 15:03:45 -08001272
1273 logger.info("Removed %d stashed blocks", removed_stashed_blocks)
1274
Doug Zongkerfc44a512014-08-26 13:10:25 -07001275 def FindTransfers(self):
Tao Bao9a5caf22015-08-25 15:10:10 -07001276 """Parse the file_map to generate all the transfers."""
1277
Tianjie Xu41390d42017-11-22 11:35:18 -08001278 def AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
1279 src_ranges, style, by_id):
1280 """Add one or multiple Transfer()s by splitting large files.
1281
1282 For BBOTA v3, we need to stash source blocks for resumable feature.
1283 However, with the growth of file size and the shrink of the cache
1284 partition source blocks are too large to be stashed. If a file occupies
1285 too many blocks, we split it into smaller pieces by getting multiple
1286 Transfer()s.
1287
1288 The downside is that after splitting, we may increase the package size
1289 since the split pieces don't align well. According to our experiments,
1290 1/8 of the cache size as the per-piece limit appears to be optimal.
1291 Compared to the fixed 1024-block limit, it reduces the overall package
1292 size by 30% for volantis, and 20% for angler and bullhead."""
1293
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001294 pieces = 0
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001295 while (tgt_ranges.size() > max_blocks_per_transfer and
1296 src_ranges.size() > max_blocks_per_transfer):
Tao Bao9a5caf22015-08-25 15:10:10 -07001297 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1298 src_split_name = "%s-%d" % (src_name, pieces)
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001299 tgt_first = tgt_ranges.first(max_blocks_per_transfer)
1300 src_first = src_ranges.first(max_blocks_per_transfer)
1301
Tao Bao183e56e2017-03-05 17:05:09 -08001302 Transfer(tgt_split_name, src_split_name, tgt_first, src_first,
1303 self.tgt.RangeSha1(tgt_first), self.src.RangeSha1(src_first),
1304 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001305
1306 tgt_ranges = tgt_ranges.subtract(tgt_first)
1307 src_ranges = src_ranges.subtract(src_first)
1308 pieces += 1
1309
1310 # Handle remaining blocks.
1311 if tgt_ranges.size() or src_ranges.size():
1312 # Must be both non-empty.
1313 assert tgt_ranges.size() and src_ranges.size()
1314 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1315 src_split_name = "%s-%d" % (src_name, pieces)
Tao Bao183e56e2017-03-05 17:05:09 -08001316 Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges,
1317 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1318 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001319
Tianjie Xu41390d42017-11-22 11:35:18 -08001320 def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges, style,
1321 by_id):
1322 """Find all the zip files and split the others with a fixed chunk size.
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001323
Tianjie Xu41390d42017-11-22 11:35:18 -08001324 This function will construct a list of zip archives, which will later be
1325 split by imgdiff to reduce the final patch size. For the other files,
1326 we will plainly split them based on a fixed chunk size with the potential
1327 patch size penalty.
1328 """
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001329
1330 assert style == "diff"
1331
1332 # Change nothing for small files.
1333 if (tgt_ranges.size() <= max_blocks_per_transfer and
1334 src_ranges.size() <= max_blocks_per_transfer):
1335 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1336 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1337 style, by_id)
1338 return
1339
Tao Baocb73aed2018-01-31 17:32:40 -08001340 # Split large APKs with imgdiff, if possible. We're intentionally checking
1341 # file types one more time (CanUseImgdiff() checks that as well), before
1342 # calling the costly RangeSha1()s.
1343 if (self.FileTypeSupportedByImgdiff(tgt_name) and
1344 self.tgt.RangeSha1(tgt_ranges) != self.src.RangeSha1(src_ranges)):
Tao Bao294651d2018-02-08 23:21:52 -08001345 if self.CanUseImgdiff(tgt_name, tgt_ranges, src_ranges, True):
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001346 large_apks.append((tgt_name, src_name, tgt_ranges, src_ranges))
1347 return
1348
Tianjie Xu41390d42017-11-22 11:35:18 -08001349 AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
1350 src_ranges, style, by_id)
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001351
Tao Bao08c85832016-09-19 22:26:30 -07001352 def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
1353 split=False):
1354 """Wrapper function for adding a Transfer()."""
1355
1356 # We specialize diff transfers only (which covers bsdiff/imgdiff/move);
1357 # otherwise add the Transfer() as is.
1358 if style != "diff" or not split:
Tao Bao183e56e2017-03-05 17:05:09 -08001359 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1360 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1361 style, by_id)
Tao Bao08c85832016-09-19 22:26:30 -07001362 return
1363
1364 # Handle .odex files specially to analyze the block-wise difference. If
1365 # most of the blocks are identical with only few changes (e.g. header),
1366 # we will patch the changed blocks only. This avoids stashing unchanged
1367 # blocks while patching. We limit the analysis to files without size
1368 # changes only. This is to avoid sacrificing the OTA generation cost too
1369 # much.
1370 if (tgt_name.split(".")[-1].lower() == 'odex' and
1371 tgt_ranges.size() == src_ranges.size()):
1372
1373 # 0.5 threshold can be further tuned. The tradeoff is: if only very
1374 # few blocks remain identical, we lose the opportunity to use imgdiff
1375 # that may have better compression ratio than bsdiff.
1376 crop_threshold = 0.5
1377
1378 tgt_skipped = RangeSet()
1379 src_skipped = RangeSet()
1380 tgt_size = tgt_ranges.size()
1381 tgt_changed = 0
1382 for src_block, tgt_block in zip(src_ranges.next_item(),
1383 tgt_ranges.next_item()):
1384 src_rs = RangeSet(str(src_block))
1385 tgt_rs = RangeSet(str(tgt_block))
1386 if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):
1387 tgt_skipped = tgt_skipped.union(tgt_rs)
1388 src_skipped = src_skipped.union(src_rs)
1389 else:
1390 tgt_changed += tgt_rs.size()
1391
1392 # Terminate early if no clear sign of benefits.
1393 if tgt_changed > tgt_size * crop_threshold:
1394 break
1395
1396 if tgt_changed < tgt_size * crop_threshold:
1397 assert tgt_changed + tgt_skipped.size() == tgt_size
Tao Bao32fcdab2018-10-12 10:30:39 -07001398 logger.info(
1399 '%10d %10d (%6.2f%%) %s', tgt_skipped.size(), tgt_size,
1400 tgt_skipped.size() * 100.0 / tgt_size, tgt_name)
Tianjie Xu41390d42017-11-22 11:35:18 -08001401 AddSplitTransfers(
Tao Bao08c85832016-09-19 22:26:30 -07001402 "%s-skipped" % (tgt_name,),
1403 "%s-skipped" % (src_name,),
1404 tgt_skipped, src_skipped, style, by_id)
1405
1406 # Intentionally change the file extension to avoid being imgdiff'd as
1407 # the files are no longer in their original format.
1408 tgt_name = "%s-cropped" % (tgt_name,)
1409 src_name = "%s-cropped" % (src_name,)
1410 tgt_ranges = tgt_ranges.subtract(tgt_skipped)
1411 src_ranges = src_ranges.subtract(src_skipped)
1412
1413 # Possibly having no changed blocks.
1414 if not tgt_ranges:
1415 return
1416
1417 # Add the transfer(s).
Tianjie Xu41390d42017-11-22 11:35:18 -08001418 AddSplitTransfers(
Tao Bao08c85832016-09-19 22:26:30 -07001419 tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1420
Tianjie Xu25366072017-09-08 17:19:02 -07001421 def ParseAndValidateSplitInfo(patch_size, tgt_ranges, src_ranges,
1422 split_info):
1423 """Parse the split_info and return a list of info tuples.
1424
1425 Args:
1426 patch_size: total size of the patch file.
1427 tgt_ranges: Ranges of the target file within the original image.
1428 src_ranges: Ranges of the source file within the original image.
1429 split_info format:
1430 imgdiff version#
1431 count of pieces
1432 <patch_size_1> <tgt_size_1> <src_ranges_1>
1433 ...
1434 <patch_size_n> <tgt_size_n> <src_ranges_n>
1435
1436 Returns:
1437 [patch_start, patch_len, split_tgt_ranges, split_src_ranges]
1438 """
1439
1440 version = int(split_info[0])
1441 assert version == 2
1442 count = int(split_info[1])
1443 assert len(split_info) - 2 == count
1444
1445 split_info_list = []
1446 patch_start = 0
1447 tgt_remain = copy.deepcopy(tgt_ranges)
1448 # each line has the format <patch_size>, <tgt_size>, <src_ranges>
1449 for line in split_info[2:]:
1450 info = line.split()
1451 assert len(info) == 3
1452 patch_length = int(info[0])
1453
1454 split_tgt_size = int(info[1])
1455 assert split_tgt_size % 4096 == 0
Tao Baob8131202019-06-19 14:15:34 -07001456 assert split_tgt_size // 4096 <= tgt_remain.size()
1457 split_tgt_ranges = tgt_remain.first(split_tgt_size // 4096)
Tianjie Xu25366072017-09-08 17:19:02 -07001458 tgt_remain = tgt_remain.subtract(split_tgt_ranges)
1459
1460 # Find the split_src_ranges within the image file from its relative
1461 # position in file.
1462 split_src_indices = RangeSet.parse_raw(info[2])
1463 split_src_ranges = RangeSet()
1464 for r in split_src_indices:
1465 curr_range = src_ranges.first(r[1]).subtract(src_ranges.first(r[0]))
1466 assert not split_src_ranges.overlaps(curr_range)
1467 split_src_ranges = split_src_ranges.union(curr_range)
1468
1469 split_info_list.append((patch_start, patch_length,
1470 split_tgt_ranges, split_src_ranges))
1471 patch_start += patch_length
1472
1473 # Check that the sizes of all the split pieces add up to the final file
1474 # size for patch and target.
1475 assert tgt_remain.size() == 0
1476 assert patch_start == patch_size
1477 return split_info_list
1478
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001479 def SplitLargeApks():
1480 """Split the large apks files.
Tianjie Xu25366072017-09-08 17:19:02 -07001481
1482 Example: Chrome.apk will be split into
1483 src-0: Chrome.apk-0, tgt-0: Chrome.apk-0
1484 src-1: Chrome.apk-1, tgt-1: Chrome.apk-1
1485 ...
1486
1487 After the split, the target pieces are continuous and block aligned; and
1488 the source pieces are mutually exclusive. During the split, we also
1489 generate and save the image patch between src-X & tgt-X. This patch will
1490 be valid because the block ranges of src-X & tgt-X will always stay the
1491 same afterwards; but there's a chance we don't use the patch if we
1492 convert the "diff" command into "new" or "move" later.
1493 """
1494
1495 while True:
1496 with transfer_lock:
1497 if not large_apks:
1498 return
1499 tgt_name, src_name, tgt_ranges, src_ranges = large_apks.pop(0)
1500
1501 src_file = common.MakeTempFile(prefix="src-")
1502 tgt_file = common.MakeTempFile(prefix="tgt-")
Tianjie Xudf1166e2018-01-27 17:35:41 -08001503 with open(src_file, "wb") as src_fd:
1504 self.src.WriteRangeDataToFd(src_ranges, src_fd)
1505 with open(tgt_file, "wb") as tgt_fd:
1506 self.tgt.WriteRangeDataToFd(tgt_ranges, tgt_fd)
Tianjie Xu25366072017-09-08 17:19:02 -07001507
1508 patch_file = common.MakeTempFile(prefix="patch-")
1509 patch_info_file = common.MakeTempFile(prefix="split_info-")
1510 cmd = ["imgdiff", "-z",
1511 "--block-limit={}".format(max_blocks_per_transfer),
1512 "--split-info=" + patch_info_file,
1513 src_file, tgt_file, patch_file]
Tao Bao73dd4f42018-10-04 16:25:33 -07001514 proc = common.Run(cmd)
1515 imgdiff_output, _ = proc.communicate()
1516 assert proc.returncode == 0, \
Tao Bao4ccea852018-02-06 15:16:41 -08001517 "Failed to create imgdiff patch between {} and {}:\n{}".format(
1518 src_name, tgt_name, imgdiff_output)
Tianjie Xu25366072017-09-08 17:19:02 -07001519
1520 with open(patch_info_file) as patch_info:
1521 lines = patch_info.readlines()
1522
1523 patch_size_total = os.path.getsize(patch_file)
1524 split_info_list = ParseAndValidateSplitInfo(patch_size_total,
1525 tgt_ranges, src_ranges,
1526 lines)
1527 for index, (patch_start, patch_length, split_tgt_ranges,
Tao Bao508b0872018-02-09 13:44:43 -08001528 split_src_ranges) in enumerate(split_info_list):
Tao Baob8131202019-06-19 14:15:34 -07001529 with open(patch_file, 'rb') as f:
Tianjie Xu25366072017-09-08 17:19:02 -07001530 f.seek(patch_start)
1531 patch_content = f.read(patch_length)
1532
1533 split_src_name = "{}-{}".format(src_name, index)
1534 split_tgt_name = "{}-{}".format(tgt_name, index)
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001535 split_large_apks.append((split_tgt_name,
1536 split_src_name,
1537 split_tgt_ranges,
1538 split_src_ranges,
1539 patch_content))
Tianjie Xu25366072017-09-08 17:19:02 -07001540
Tao Bao32fcdab2018-10-12 10:30:39 -07001541 logger.info("Finding transfers...")
Tao Bao08c85832016-09-19 22:26:30 -07001542
Tianjie Xu25366072017-09-08 17:19:02 -07001543 large_apks = []
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001544 split_large_apks = []
Tianjie Xu25366072017-09-08 17:19:02 -07001545 cache_size = common.OPTIONS.cache_size
1546 split_threshold = 0.125
Yifan Hong65afc072020-04-17 10:08:10 -07001547 assert cache_size is not None
Tianjie Xu25366072017-09-08 17:19:02 -07001548 max_blocks_per_transfer = int(cache_size * split_threshold /
1549 self.tgt.blocksize)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001550 empty = RangeSet()
Tianjie Xu20a86cd2018-01-12 12:21:00 -08001551 for tgt_fn, tgt_ranges in sorted(self.tgt.file_map.items()):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001552 if tgt_fn == "__ZERO":
1553 # the special "__ZERO" domain is all the blocks not contained
1554 # in any file and that are filled with zeros. We have a
1555 # special transfer style for zero blocks.
1556 src_ranges = self.src.file_map.get("__ZERO", empty)
Tao Bao9a5caf22015-08-25 15:10:10 -07001557 AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
1558 "zero", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001559 continue
1560
Tao Baoff777812015-05-12 11:42:31 -07001561 elif tgt_fn == "__COPY":
1562 # "__COPY" domain includes all the blocks not contained in any
1563 # file and that need to be copied unconditionally to the target.
Tao Bao9a5caf22015-08-25 15:10:10 -07001564 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Tao Baoff777812015-05-12 11:42:31 -07001565 continue
1566
Tianjie Xu67c7cbb2018-08-30 00:32:07 -07001567 elif tgt_fn == "__HASHTREE":
1568 continue
1569
Doug Zongkerfc44a512014-08-26 13:10:25 -07001570 elif tgt_fn in self.src.file_map:
1571 # Look for an exact pathname match in the source.
Tao Bao9a5caf22015-08-25 15:10:10 -07001572 AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001573 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001574 continue
1575
1576 b = os.path.basename(tgt_fn)
1577 if b in self.src_basenames:
1578 # Look for an exact basename match in the source.
1579 src_fn = self.src_basenames[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001580 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001581 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001582 continue
1583
1584 b = re.sub("[0-9]+", "#", b)
1585 if b in self.src_numpatterns:
1586 # Look for a 'number pattern' match (a basename match after
1587 # all runs of digits are replaced by "#"). (This is useful
1588 # for .so files that contain version numbers in the filename
1589 # that get bumped.)
1590 src_fn = self.src_numpatterns[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001591 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001592 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001593 continue
1594
Tao Bao9a5caf22015-08-25 15:10:10 -07001595 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001596
Tianjie Xu25366072017-09-08 17:19:02 -07001597 transfer_lock = threading.Lock()
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001598 threads = [threading.Thread(target=SplitLargeApks)
Tianjie Xu25366072017-09-08 17:19:02 -07001599 for _ in range(self.threads)]
1600 for th in threads:
1601 th.start()
1602 while threads:
1603 threads.pop().join()
1604
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001605 # Sort the split transfers for large apks to generate a determinate package.
1606 split_large_apks.sort()
1607 for (tgt_name, src_name, tgt_ranges, src_ranges,
1608 patch) in split_large_apks:
1609 transfer_split = Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1610 self.tgt.RangeSha1(tgt_ranges),
1611 self.src.RangeSha1(src_ranges),
1612 "diff", self.transfers)
xunchang21531832018-12-06 14:20:05 -08001613 transfer_split.patch_info = PatchInfo(True, patch)
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001614
Doug Zongkerfc44a512014-08-26 13:10:25 -07001615 def AbbreviateSourceNames(self):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001616 for k in self.src.file_map.keys():
1617 b = os.path.basename(k)
1618 self.src_basenames[b] = k
1619 b = re.sub("[0-9]+", "#", b)
1620 self.src_numpatterns[b] = k
1621
1622 @staticmethod
1623 def AssertPartition(total, seq):
1624 """Assert that all the RangeSets in 'seq' form a partition of the
1625 'total' RangeSet (ie, they are nonintersecting and their union
1626 equals 'total')."""
Doug Zongker6ab2a502016-02-09 08:28:09 -08001627
Doug Zongkerfc44a512014-08-26 13:10:25 -07001628 so_far = RangeSet()
1629 for i in seq:
1630 assert not so_far.overlaps(i)
1631 so_far = so_far.union(i)
1632 assert so_far == total