blob: 2d20e23fd9188bd9d31523fa24b675bc8e2ab5a0 [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015from __future__ import print_function
16
Doug Zongker6ab2a502016-02-09 08:28:09 -080017import array
Tianjie Xu25366072017-09-08 17:19:02 -070018import copy
Doug Zongker6ab2a502016-02-09 08:28:09 -080019import functools
Doug Zongker62338182014-09-08 08:29:55 -070020import heapq
Doug Zongkerfc44a512014-08-26 13:10:25 -070021import itertools
Tao Bao32fcdab2018-10-12 10:30:39 -070022import logging
Doug Zongkerfc44a512014-08-26 13:10:25 -070023import multiprocessing
24import os
Tao Bao3a2e3502016-12-28 09:14:39 -080025import os.path
Doug Zongkerfc44a512014-08-26 13:10:25 -070026import re
Tao Bao183e56e2017-03-05 17:05:09 -080027import sys
Doug Zongkerfc44a512014-08-26 13:10:25 -070028import threading
Tao Bao3a2e3502016-12-28 09:14:39 -080029from collections import deque, OrderedDict
30from hashlib import sha1
Tao Bao508b0872018-02-09 13:44:43 -080031
32import common
Dan Albert8b72aef2015-03-23 19:13:21 -070033from rangelib import RangeSet
34
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070035__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
36
Tao Bao32fcdab2018-10-12 10:30:39 -070037logger = logging.getLogger(__name__)
38
Dan Albert8b72aef2015-03-23 19:13:21 -070039
Tao Bao183e56e2017-03-05 17:05:09 -080040def compute_patch(srcfile, tgtfile, imgdiff=False):
Tianjie Xub59c17f2016-10-28 17:55:53 -070041 patchfile = common.MakeTempFile(prefix='patch-')
Doug Zongkerfc44a512014-08-26 13:10:25 -070042
Tianjie Xub59c17f2016-10-28 17:55:53 -070043 cmd = ['imgdiff', '-z'] if imgdiff else ['bsdiff']
44 cmd.extend([srcfile, tgtfile, patchfile])
Doug Zongkerfc44a512014-08-26 13:10:25 -070045
Tao Bao39451582017-05-04 11:10:47 -070046 # Don't dump the bsdiff/imgdiff commands, which are not useful for the case
47 # here, since they contain temp filenames only.
Tao Bao73dd4f42018-10-04 16:25:33 -070048 proc = common.Run(cmd, verbose=False)
49 output, _ = proc.communicate()
Doug Zongkerfc44a512014-08-26 13:10:25 -070050
Tao Bao73dd4f42018-10-04 16:25:33 -070051 if proc.returncode != 0:
Tianjie Xub59c17f2016-10-28 17:55:53 -070052 raise ValueError(output)
53
54 with open(patchfile, 'rb') as f:
Tao Bao183e56e2017-03-05 17:05:09 -080055 return f.read()
Doug Zongkerfc44a512014-08-26 13:10:25 -070056
Dan Albert8b72aef2015-03-23 19:13:21 -070057
58class Image(object):
Tao Bao183e56e2017-03-05 17:05:09 -080059 def RangeSha1(self, ranges):
60 raise NotImplementedError
61
Dan Albert8b72aef2015-03-23 19:13:21 -070062 def ReadRangeSet(self, ranges):
63 raise NotImplementedError
64
Tao Bao68658c02015-06-01 13:40:49 -070065 def TotalSha1(self, include_clobbered_blocks=False):
Dan Albert8b72aef2015-03-23 19:13:21 -070066 raise NotImplementedError
67
Tao Bao183e56e2017-03-05 17:05:09 -080068 def WriteRangeDataToFd(self, ranges, fd):
69 raise NotImplementedError
70
Dan Albert8b72aef2015-03-23 19:13:21 -070071
72class EmptyImage(Image):
Doug Zongkerfc44a512014-08-26 13:10:25 -070073 """A zero-length image."""
Tao Bao183e56e2017-03-05 17:05:09 -080074
75 def __init__(self):
76 self.blocksize = 4096
77 self.care_map = RangeSet()
78 self.clobbered_blocks = RangeSet()
79 self.extended = RangeSet()
80 self.total_blocks = 0
81 self.file_map = {}
82
83 def RangeSha1(self, ranges):
84 return sha1().hexdigest()
85
Doug Zongkerfc44a512014-08-26 13:10:25 -070086 def ReadRangeSet(self, ranges):
87 return ()
Tao Bao183e56e2017-03-05 17:05:09 -080088
Tao Bao68658c02015-06-01 13:40:49 -070089 def TotalSha1(self, include_clobbered_blocks=False):
90 # EmptyImage always carries empty clobbered_blocks, so
91 # include_clobbered_blocks can be ignored.
92 assert self.clobbered_blocks.size() == 0
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070093 return sha1().hexdigest()
94
Tao Bao183e56e2017-03-05 17:05:09 -080095 def WriteRangeDataToFd(self, ranges, fd):
96 raise ValueError("Can't write data from EmptyImage to file")
97
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070098
Dan Albert8b72aef2015-03-23 19:13:21 -070099class DataImage(Image):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700100 """An image wrapped around a single string of data."""
101
102 def __init__(self, data, trim=False, pad=False):
103 self.data = data
104 self.blocksize = 4096
105
106 assert not (trim and pad)
107
108 partial = len(self.data) % self.blocksize
Tao Bao7589e962015-09-05 20:35:32 -0700109 padded = False
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700110 if partial > 0:
111 if trim:
112 self.data = self.data[:-partial]
113 elif pad:
114 self.data += '\0' * (self.blocksize - partial)
Tao Bao7589e962015-09-05 20:35:32 -0700115 padded = True
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700116 else:
117 raise ValueError(("data for DataImage must be multiple of %d bytes "
118 "unless trim or pad is specified") %
119 (self.blocksize,))
120
121 assert len(self.data) % self.blocksize == 0
122
123 self.total_blocks = len(self.data) / self.blocksize
124 self.care_map = RangeSet(data=(0, self.total_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700125 # When the last block is padded, we always write the whole block even for
126 # incremental OTAs. Because otherwise the last block may get skipped if
127 # unchanged for an incremental, but would fail the post-install
128 # verification if it has non-zero contents in the padding bytes.
129 # Bug: 23828506
130 if padded:
Tao Bao42206c32015-09-08 13:39:40 -0700131 clobbered_blocks = [self.total_blocks-1, self.total_blocks]
Tao Bao7589e962015-09-05 20:35:32 -0700132 else:
Tao Bao42206c32015-09-08 13:39:40 -0700133 clobbered_blocks = []
134 self.clobbered_blocks = clobbered_blocks
Tao Baoe9b61912015-07-09 17:37:49 -0700135 self.extended = RangeSet()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700136
137 zero_blocks = []
138 nonzero_blocks = []
139 reference = '\0' * self.blocksize
140
Tao Bao7589e962015-09-05 20:35:32 -0700141 for i in range(self.total_blocks-1 if padded else self.total_blocks):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700142 d = self.data[i*self.blocksize : (i+1)*self.blocksize]
143 if d == reference:
144 zero_blocks.append(i)
145 zero_blocks.append(i+1)
146 else:
147 nonzero_blocks.append(i)
148 nonzero_blocks.append(i+1)
149
Tao Bao42206c32015-09-08 13:39:40 -0700150 assert zero_blocks or nonzero_blocks or clobbered_blocks
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700151
Tao Bao42206c32015-09-08 13:39:40 -0700152 self.file_map = dict()
153 if zero_blocks:
154 self.file_map["__ZERO"] = RangeSet(data=zero_blocks)
155 if nonzero_blocks:
156 self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)
157 if clobbered_blocks:
158 self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)
Tao Bao7589e962015-09-05 20:35:32 -0700159
Tao Bao183e56e2017-03-05 17:05:09 -0800160 def _GetRangeData(self, ranges):
161 for s, e in ranges:
162 yield self.data[s*self.blocksize:e*self.blocksize]
163
164 def RangeSha1(self, ranges):
165 h = sha1()
Tao Bao76def242017-11-21 09:25:31 -0800166 for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable
Tao Bao183e56e2017-03-05 17:05:09 -0800167 h.update(data)
168 return h.hexdigest()
169
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700170 def ReadRangeSet(self, ranges):
Tao Bao183e56e2017-03-05 17:05:09 -0800171 return [self._GetRangeData(ranges)]
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700172
Tao Bao68658c02015-06-01 13:40:49 -0700173 def TotalSha1(self, include_clobbered_blocks=False):
Tao Bao7589e962015-09-05 20:35:32 -0700174 if not include_clobbered_blocks:
Tao Bao183e56e2017-03-05 17:05:09 -0800175 return self.RangeSha1(self.care_map.subtract(self.clobbered_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700176 else:
177 return sha1(self.data).hexdigest()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700178
Tao Bao183e56e2017-03-05 17:05:09 -0800179 def WriteRangeDataToFd(self, ranges, fd):
Tao Bao76def242017-11-21 09:25:31 -0800180 for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable
Tao Bao183e56e2017-03-05 17:05:09 -0800181 fd.write(data)
182
Doug Zongkerfc44a512014-08-26 13:10:25 -0700183
184class Transfer(object):
Tao Bao183e56e2017-03-05 17:05:09 -0800185 def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, tgt_sha1,
186 src_sha1, style, by_id):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700187 self.tgt_name = tgt_name
188 self.src_name = src_name
189 self.tgt_ranges = tgt_ranges
190 self.src_ranges = src_ranges
Tao Bao183e56e2017-03-05 17:05:09 -0800191 self.tgt_sha1 = tgt_sha1
192 self.src_sha1 = src_sha1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700193 self.style = style
Tao Baob8c87172015-03-19 19:42:12 -0700194
195 # We use OrderedDict rather than dict so that the output is repeatable;
196 # otherwise it would depend on the hash values of the Transfer objects.
197 self.goes_before = OrderedDict()
198 self.goes_after = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700199
Doug Zongker62338182014-09-08 08:29:55 -0700200 self.stash_before = []
201 self.use_stash = []
202
Doug Zongkerfc44a512014-08-26 13:10:25 -0700203 self.id = len(by_id)
204 by_id.append(self)
205
Tianjie Xu25366072017-09-08 17:19:02 -0700206 self._patch = None
207
208 @property
209 def patch(self):
210 return self._patch
211
212 @patch.setter
213 def patch(self, patch):
214 if patch:
215 assert self.style == "diff"
216 self._patch = patch
217
Doug Zongker62338182014-09-08 08:29:55 -0700218 def NetStashChange(self):
219 return (sum(sr.size() for (_, sr) in self.stash_before) -
220 sum(sr.size() for (_, sr) in self.use_stash))
221
Tao Bao82c47982015-08-17 09:45:13 -0700222 def ConvertToNew(self):
223 assert self.style != "new"
224 self.use_stash = []
225 self.style = "new"
226 self.src_ranges = RangeSet()
Tianjie Xu25366072017-09-08 17:19:02 -0700227 self.patch = None
Tao Bao82c47982015-08-17 09:45:13 -0700228
Doug Zongkerfc44a512014-08-26 13:10:25 -0700229 def __str__(self):
230 return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
231 " to " + str(self.tgt_ranges) + ">")
232
233
Doug Zongker6ab2a502016-02-09 08:28:09 -0800234@functools.total_ordering
235class HeapItem(object):
236 def __init__(self, item):
237 self.item = item
Tao Bao186ec992017-12-23 11:50:52 -0800238 # Negate the score since python's heap is a min-heap and we want the
239 # maximum score.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800240 self.score = -item.score
Tao Bao186ec992017-12-23 11:50:52 -0800241
Doug Zongker6ab2a502016-02-09 08:28:09 -0800242 def clear(self):
243 self.item = None
Tao Bao186ec992017-12-23 11:50:52 -0800244
Doug Zongker6ab2a502016-02-09 08:28:09 -0800245 def __bool__(self):
Tao Bao186ec992017-12-23 11:50:52 -0800246 return self.item is not None
247
248 # Python 2 uses __nonzero__, while Python 3 uses __bool__.
249 __nonzero__ = __bool__
250
251 # The rest operations are generated by functools.total_ordering decorator.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800252 def __eq__(self, other):
253 return self.score == other.score
Tao Bao186ec992017-12-23 11:50:52 -0800254
Doug Zongker6ab2a502016-02-09 08:28:09 -0800255 def __le__(self, other):
256 return self.score <= other.score
257
258
Tao Bao294651d2018-02-08 23:21:52 -0800259class ImgdiffStats(object):
260 """A class that collects imgdiff stats.
261
262 It keeps track of the files that will be applied imgdiff while generating
263 BlockImageDiff. It also logs the ones that cannot use imgdiff, with specific
264 reasons. The stats is only meaningful when imgdiff not being disabled by the
265 caller of BlockImageDiff. In addition, only files with supported types
266 (BlockImageDiff.FileTypeSupportedByImgdiff()) are allowed to be logged.
Tao Bao294651d2018-02-08 23:21:52 -0800267 """
268
269 USED_IMGDIFF = "APK files diff'd with imgdiff"
270 USED_IMGDIFF_LARGE_APK = "Large APK files split and diff'd with imgdiff"
271
272 # Reasons for not applying imgdiff on APKs.
Tao Bao294651d2018-02-08 23:21:52 -0800273 SKIPPED_NONMONOTONIC = "Not used imgdiff due to having non-monotonic ranges"
Tao Baoe709b092018-02-07 12:40:00 -0800274 SKIPPED_SHARED_BLOCKS = "Not used imgdiff due to using shared blocks"
Tao Bao4ccea852018-02-06 15:16:41 -0800275 SKIPPED_INCOMPLETE = "Not used imgdiff due to incomplete RangeSet"
Tao Bao294651d2018-02-08 23:21:52 -0800276
277 # The list of valid reasons, which will also be the dumped order in a report.
278 REASONS = (
279 USED_IMGDIFF,
280 USED_IMGDIFF_LARGE_APK,
Tao Bao294651d2018-02-08 23:21:52 -0800281 SKIPPED_NONMONOTONIC,
Tao Baoe709b092018-02-07 12:40:00 -0800282 SKIPPED_SHARED_BLOCKS,
Tao Bao4ccea852018-02-06 15:16:41 -0800283 SKIPPED_INCOMPLETE,
Tao Bao294651d2018-02-08 23:21:52 -0800284 )
285
286 def __init__(self):
287 self.stats = {}
288
289 def Log(self, filename, reason):
290 """Logs why imgdiff can or cannot be applied to the given filename.
291
292 Args:
293 filename: The filename string.
294 reason: One of the reason constants listed in REASONS.
295
296 Raises:
297 AssertionError: On unsupported filetypes or invalid reason.
298 """
299 assert BlockImageDiff.FileTypeSupportedByImgdiff(filename)
300 assert reason in self.REASONS
301
302 if reason not in self.stats:
303 self.stats[reason] = set()
304 self.stats[reason].add(filename)
305
306 def Report(self):
307 """Prints a report of the collected imgdiff stats."""
308
309 def print_header(header, separator):
Tao Bao32fcdab2018-10-12 10:30:39 -0700310 logger.info(header)
311 logger.info(separator * len(header) + '\n')
Tao Bao294651d2018-02-08 23:21:52 -0800312
313 print_header(' Imgdiff Stats Report ', '=')
314 for key in self.REASONS:
315 if key not in self.stats:
316 continue
317 values = self.stats[key]
318 section_header = ' {} (count: {}) '.format(key, len(values))
319 print_header(section_header, '-')
Tao Bao32fcdab2018-10-12 10:30:39 -0700320 logger.info(''.join([' {}\n'.format(name) for name in values]))
Tao Bao294651d2018-02-08 23:21:52 -0800321
322
Doug Zongkerfc44a512014-08-26 13:10:25 -0700323class BlockImageDiff(object):
Tao Bao76def242017-11-21 09:25:31 -0800324 """Generates the diff of two block image objects.
325
326 BlockImageDiff works on two image objects. An image object is anything that
327 provides the following attributes:
328
329 blocksize: the size in bytes of a block, currently must be 4096.
330
331 total_blocks: the total size of the partition/image, in blocks.
332
333 care_map: a RangeSet containing which blocks (in the range [0,
334 total_blocks) we actually care about; i.e. which blocks contain data.
335
336 file_map: a dict that partitions the blocks contained in care_map into
337 smaller domains that are useful for doing diffs on. (Typically a domain
338 is a file, and the key in file_map is the pathname.)
339
340 clobbered_blocks: a RangeSet containing which blocks contain data but may
341 be altered by the FS. They need to be excluded when verifying the
342 partition integrity.
343
344 ReadRangeSet(): a function that takes a RangeSet and returns the data
345 contained in the image blocks of that RangeSet. The data is returned as
346 a list or tuple of strings; concatenating the elements together should
347 produce the requested data. Implementations are free to break up the
348 data into list/tuple elements in any way that is convenient.
349
350 RangeSha1(): a function that returns (as a hex string) the SHA-1 hash of
351 all the data in the specified range.
352
353 TotalSha1(): a function that returns (as a hex string) the SHA-1 hash of
354 all the data in the image (ie, all the blocks in the care_map minus
355 clobbered_blocks, or including the clobbered blocks if
356 include_clobbered_blocks is True).
357
358 When creating a BlockImageDiff, the src image may be None, in which case the
359 list of transfers produced will never read from the original image.
360 """
361
Tao Bao293fd132016-06-11 12:19:23 -0700362 def __init__(self, tgt, src=None, threads=None, version=4,
363 disable_imgdiff=False):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700364 if threads is None:
365 threads = multiprocessing.cpu_count() // 2
Dan Albert8b72aef2015-03-23 19:13:21 -0700366 if threads == 0:
367 threads = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700368 self.threads = threads
Doug Zongker62338182014-09-08 08:29:55 -0700369 self.version = version
Dan Albert8b72aef2015-03-23 19:13:21 -0700370 self.transfers = []
371 self.src_basenames = {}
372 self.src_numpatterns = {}
Tao Baob4cfca52016-02-04 14:26:02 -0800373 self._max_stashed_size = 0
Tao Baod522bdc2016-04-12 15:53:16 -0700374 self.touched_src_ranges = RangeSet()
375 self.touched_src_sha1 = None
Tao Bao293fd132016-06-11 12:19:23 -0700376 self.disable_imgdiff = disable_imgdiff
Tao Bao294651d2018-02-08 23:21:52 -0800377 self.imgdiff_stats = ImgdiffStats() if not disable_imgdiff else None
Doug Zongker62338182014-09-08 08:29:55 -0700378
Tao Bao8fad03e2017-03-01 14:36:26 -0800379 assert version in (3, 4)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700380
381 self.tgt = tgt
382 if src is None:
383 src = EmptyImage()
384 self.src = src
385
386 # The updater code that installs the patch always uses 4k blocks.
387 assert tgt.blocksize == 4096
388 assert src.blocksize == 4096
389
390 # The range sets in each filemap should comprise a partition of
391 # the care map.
392 self.AssertPartition(src.care_map, src.file_map.values())
393 self.AssertPartition(tgt.care_map, tgt.file_map.values())
394
Tao Baob4cfca52016-02-04 14:26:02 -0800395 @property
396 def max_stashed_size(self):
397 return self._max_stashed_size
398
Tao Baocb73aed2018-01-31 17:32:40 -0800399 @staticmethod
400 def FileTypeSupportedByImgdiff(filename):
401 """Returns whether the file type is supported by imgdiff."""
402 return filename.lower().endswith(('.apk', '.jar', '.zip'))
403
Tao Bao294651d2018-02-08 23:21:52 -0800404 def CanUseImgdiff(self, name, tgt_ranges, src_ranges, large_apk=False):
Tao Baocb73aed2018-01-31 17:32:40 -0800405 """Checks whether we can apply imgdiff for the given RangeSets.
406
407 For files in ZIP format (e.g., APKs, JARs, etc.) we would like to use
408 'imgdiff -z' if possible. Because it usually produces significantly smaller
409 patches than bsdiff.
410
411 This is permissible if all of the following conditions hold.
412 - The imgdiff hasn't been disabled by the caller (e.g. squashfs);
413 - The file type is supported by imgdiff;
414 - The source and target blocks are monotonic (i.e. the data is stored with
415 blocks in increasing order);
Tao Baoe709b092018-02-07 12:40:00 -0800416 - Both files don't contain shared blocks;
Tao Bao4ccea852018-02-06 15:16:41 -0800417 - Both files have complete lists of blocks;
Tao Baocb73aed2018-01-31 17:32:40 -0800418 - We haven't removed any blocks from the source set.
419
420 If all these conditions are satisfied, concatenating all the blocks in the
421 RangeSet in order will produce a valid ZIP file (plus possibly extra zeros
422 in the last block). imgdiff is fine with extra zeros at the end of the file.
423
424 Args:
425 name: The filename to be diff'd.
426 tgt_ranges: The target RangeSet.
427 src_ranges: The source RangeSet.
Tao Bao294651d2018-02-08 23:21:52 -0800428 large_apk: Whether this is to split a large APK.
Tao Baocb73aed2018-01-31 17:32:40 -0800429
430 Returns:
431 A boolean result.
432 """
Tao Bao508b0872018-02-09 13:44:43 -0800433 if self.disable_imgdiff or not self.FileTypeSupportedByImgdiff(name):
Tao Bao294651d2018-02-08 23:21:52 -0800434 return False
435
436 if not tgt_ranges.monotonic or not src_ranges.monotonic:
437 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_NONMONOTONIC)
438 return False
439
Tao Baoe709b092018-02-07 12:40:00 -0800440 if (tgt_ranges.extra.get('uses_shared_blocks') or
441 src_ranges.extra.get('uses_shared_blocks')):
442 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_SHARED_BLOCKS)
443 return False
444
Tao Bao4ccea852018-02-06 15:16:41 -0800445 if tgt_ranges.extra.get('incomplete') or src_ranges.extra.get('incomplete'):
446 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_INCOMPLETE)
447 return False
448
Tao Bao294651d2018-02-08 23:21:52 -0800449 reason = (ImgdiffStats.USED_IMGDIFF_LARGE_APK if large_apk
450 else ImgdiffStats.USED_IMGDIFF)
451 self.imgdiff_stats.Log(name, reason)
452 return True
Tao Baocb73aed2018-01-31 17:32:40 -0800453
Doug Zongkerfc44a512014-08-26 13:10:25 -0700454 def Compute(self, prefix):
455 # When looking for a source file to use as the diff input for a
456 # target file, we try:
457 # 1) an exact path match if available, otherwise
458 # 2) a exact basename match if available, otherwise
459 # 3) a basename match after all runs of digits are replaced by
460 # "#" if available, otherwise
461 # 4) we have no source for this target.
462 self.AbbreviateSourceNames()
463 self.FindTransfers()
464
465 # Find the ordering dependencies among transfers (this is O(n^2)
466 # in the number of transfers).
467 self.GenerateDigraph()
468 # Find a sequence of transfers that satisfies as many ordering
469 # dependencies as possible (heuristically).
470 self.FindVertexSequence()
471 # Fix up the ordering dependencies that the sequence didn't
472 # satisfy.
Tao Bao8fad03e2017-03-01 14:36:26 -0800473 self.ReverseBackwardEdges()
474 self.ImproveVertexSequence()
Doug Zongker62338182014-09-08 08:29:55 -0700475
Tao Bao82c47982015-08-17 09:45:13 -0700476 # Ensure the runtime stash size is under the limit.
Tao Bao8fad03e2017-03-01 14:36:26 -0800477 if common.OPTIONS.cache_size is not None:
Tao Bao82c47982015-08-17 09:45:13 -0700478 self.ReviseStashSize()
479
Doug Zongkerfc44a512014-08-26 13:10:25 -0700480 # Double-check our work.
481 self.AssertSequenceGood()
Tianjie Xu8a7ed9f2018-01-23 14:06:11 -0800482 self.AssertSha1Good()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700483
484 self.ComputePatches(prefix)
485 self.WriteTransfers(prefix)
486
Tao Bao294651d2018-02-08 23:21:52 -0800487 # Report the imgdiff stats.
Tao Bao32fcdab2018-10-12 10:30:39 -0700488 if not self.disable_imgdiff:
Tao Bao294651d2018-02-08 23:21:52 -0800489 self.imgdiff_stats.Report()
490
Doug Zongkerfc44a512014-08-26 13:10:25 -0700491 def WriteTransfers(self, prefix):
Tianjie Xu37e29302016-06-23 16:10:35 -0700492 def WriteSplitTransfers(out, style, target_blocks):
493 """Limit the size of operand in command 'new' and 'zero' to 1024 blocks.
Tianjie Xubb848c52016-06-21 15:54:09 -0700494
495 This prevents the target size of one command from being too large; and
496 might help to avoid fsync errors on some devices."""
497
Tao Bao3a2e3502016-12-28 09:14:39 -0800498 assert style == "new" or style == "zero"
Tianjie Xu37e29302016-06-23 16:10:35 -0700499 blocks_limit = 1024
Tianjie Xubb848c52016-06-21 15:54:09 -0700500 total = 0
Tianjie Xu37e29302016-06-23 16:10:35 -0700501 while target_blocks:
502 blocks_to_write = target_blocks.first(blocks_limit)
503 out.append("%s %s\n" % (style, blocks_to_write.to_string_raw()))
504 total += blocks_to_write.size()
505 target_blocks = target_blocks.subtract(blocks_to_write)
Tianjie Xubb848c52016-06-21 15:54:09 -0700506 return total
507
Doug Zongkerfc44a512014-08-26 13:10:25 -0700508 out = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700509 total = 0
Doug Zongkerfc44a512014-08-26 13:10:25 -0700510
Tao Bao3a2e3502016-12-28 09:14:39 -0800511 # In BBOTA v3+, it uses the hash of the stashed blocks as the stash slot
512 # id. 'stashes' records the map from 'hash' to the ref count. The stash
513 # will be freed only if the count decrements to zero.
Doug Zongker62338182014-09-08 08:29:55 -0700514 stashes = {}
515 stashed_blocks = 0
516 max_stashed_blocks = 0
517
Doug Zongkerfc44a512014-08-26 13:10:25 -0700518 for xf in self.transfers:
519
Tao Bao8fad03e2017-03-01 14:36:26 -0800520 for _, sr in xf.stash_before:
521 sh = self.src.RangeSha1(sr)
522 if sh in stashes:
523 stashes[sh] += 1
Sami Tolvanendd67a292014-12-09 16:40:34 +0000524 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800525 stashes[sh] = 1
526 stashed_blocks += sr.size()
527 self.touched_src_ranges = self.touched_src_ranges.union(sr)
528 out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
Doug Zongker62338182014-09-08 08:29:55 -0700529
530 if stashed_blocks > max_stashed_blocks:
531 max_stashed_blocks = stashed_blocks
532
Jesse Zhao7b985f62015-03-02 16:53:08 -0800533 free_string = []
caozhiyuan21b37d82015-10-21 15:14:03 +0800534 free_size = 0
Jesse Zhao7b985f62015-03-02 16:53:08 -0800535
Tao Bao8fad03e2017-03-01 14:36:26 -0800536 # <# blocks> <src ranges>
537 # OR
538 # <# blocks> <src ranges> <src locs> <stash refs...>
539 # OR
540 # <# blocks> - <stash refs...>
Doug Zongker62338182014-09-08 08:29:55 -0700541
Tao Bao8fad03e2017-03-01 14:36:26 -0800542 size = xf.src_ranges.size()
Tao Bao508b0872018-02-09 13:44:43 -0800543 src_str_buffer = [str(size)]
Doug Zongker62338182014-09-08 08:29:55 -0700544
Tao Bao8fad03e2017-03-01 14:36:26 -0800545 unstashed_src_ranges = xf.src_ranges
546 mapped_stashes = []
547 for _, sr in xf.use_stash:
548 unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
549 sh = self.src.RangeSha1(sr)
550 sr = xf.src_ranges.map_within(sr)
551 mapped_stashes.append(sr)
552 assert sh in stashes
Tao Bao508b0872018-02-09 13:44:43 -0800553 src_str_buffer.append("%s:%s" % (sh, sr.to_string_raw()))
Tao Bao8fad03e2017-03-01 14:36:26 -0800554 stashes[sh] -= 1
555 if stashes[sh] == 0:
556 free_string.append("free %s\n" % (sh,))
557 free_size += sr.size()
558 stashes.pop(sh)
Doug Zongker62338182014-09-08 08:29:55 -0700559
Tao Bao8fad03e2017-03-01 14:36:26 -0800560 if unstashed_src_ranges:
Tao Bao508b0872018-02-09 13:44:43 -0800561 src_str_buffer.insert(1, unstashed_src_ranges.to_string_raw())
Tao Bao8fad03e2017-03-01 14:36:26 -0800562 if xf.use_stash:
563 mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
Tao Bao508b0872018-02-09 13:44:43 -0800564 src_str_buffer.insert(2, mapped_unstashed.to_string_raw())
Tao Bao8fad03e2017-03-01 14:36:26 -0800565 mapped_stashes.append(mapped_unstashed)
Doug Zongker62338182014-09-08 08:29:55 -0700566 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Tao Bao8fad03e2017-03-01 14:36:26 -0800567 else:
Tao Bao508b0872018-02-09 13:44:43 -0800568 src_str_buffer.insert(1, "-")
Tao Bao8fad03e2017-03-01 14:36:26 -0800569 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Doug Zongker62338182014-09-08 08:29:55 -0700570
Tao Bao508b0872018-02-09 13:44:43 -0800571 src_str = " ".join(src_str_buffer)
Doug Zongker62338182014-09-08 08:29:55 -0700572
Tao Bao8fad03e2017-03-01 14:36:26 -0800573 # version 3+:
Doug Zongker62338182014-09-08 08:29:55 -0700574 # zero <rangeset>
575 # new <rangeset>
576 # erase <rangeset>
Dan Albert8b72aef2015-03-23 19:13:21 -0700577 # bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
578 # imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
579 # move hash <tgt rangeset> <src_str>
Doug Zongkerfc44a512014-08-26 13:10:25 -0700580
581 tgt_size = xf.tgt_ranges.size()
582
583 if xf.style == "new":
584 assert xf.tgt_ranges
Tianjie Xu37e29302016-06-23 16:10:35 -0700585 assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700586 total += tgt_size
587 elif xf.style == "move":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700588 assert xf.tgt_ranges
589 assert xf.src_ranges.size() == tgt_size
590 if xf.src_ranges != xf.tgt_ranges:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100591 # take into account automatic stashing of overlapping blocks
592 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700593 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100594 if temp_stash_usage > max_stashed_blocks:
595 max_stashed_blocks = temp_stash_usage
596
Tao Baod522bdc2016-04-12 15:53:16 -0700597 self.touched_src_ranges = self.touched_src_ranges.union(
598 xf.src_ranges)
599
Tao Bao8fad03e2017-03-01 14:36:26 -0800600 out.append("%s %s %s %s\n" % (
Sami Tolvanendd67a292014-12-09 16:40:34 +0000601 xf.style,
Tao Bao183e56e2017-03-05 17:05:09 -0800602 xf.tgt_sha1,
Dan Albert8b72aef2015-03-23 19:13:21 -0700603 xf.tgt_ranges.to_string_raw(), src_str))
Tao Bao8fad03e2017-03-01 14:36:26 -0800604 total += tgt_size
605 elif xf.style in ("bsdiff", "imgdiff"):
606 assert xf.tgt_ranges
607 assert xf.src_ranges
608 # take into account automatic stashing of overlapping blocks
609 if xf.src_ranges.overlaps(xf.tgt_ranges):
610 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
611 if temp_stash_usage > max_stashed_blocks:
612 max_stashed_blocks = temp_stash_usage
613
614 self.touched_src_ranges = self.touched_src_ranges.union(xf.src_ranges)
615
616 out.append("%s %d %d %s %s %s %s\n" % (
617 xf.style,
618 xf.patch_start, xf.patch_len,
619 xf.src_sha1,
620 xf.tgt_sha1,
621 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700622 total += tgt_size
623 elif xf.style == "zero":
624 assert xf.tgt_ranges
625 to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
Tianjie Xu37e29302016-06-23 16:10:35 -0700626 assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size()
Tianjie Xubb848c52016-06-21 15:54:09 -0700627 total += to_zero.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700628 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700629 raise ValueError("unknown transfer style '%s'\n" % xf.style)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700630
Sami Tolvanendd67a292014-12-09 16:40:34 +0000631 if free_string:
632 out.append("".join(free_string))
caozhiyuan21b37d82015-10-21 15:14:03 +0800633 stashed_blocks -= free_size
Sami Tolvanendd67a292014-12-09 16:40:34 +0000634
Tao Bao8fad03e2017-03-01 14:36:26 -0800635 if common.OPTIONS.cache_size is not None:
Tao Bao8dcf7382015-05-21 14:09:49 -0700636 # Sanity check: abort if we're going to need more stash space than
637 # the allowed size (cache_size * threshold). There are two purposes
638 # of having a threshold here. a) Part of the cache may have been
639 # occupied by some recovery logs. b) It will buy us some time to deal
640 # with the oversize issue.
641 cache_size = common.OPTIONS.cache_size
642 stash_threshold = common.OPTIONS.stash_threshold
643 max_allowed = cache_size * stash_threshold
Tao Baoe8c68a02017-02-26 10:48:11 -0800644 assert max_stashed_blocks * self.tgt.blocksize <= max_allowed, \
Tao Bao8dcf7382015-05-21 14:09:49 -0700645 'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
646 max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
647 self.tgt.blocksize, max_allowed, cache_size,
648 stash_threshold)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700649
Tao Bao8fad03e2017-03-01 14:36:26 -0800650 self.touched_src_sha1 = self.src.RangeSha1(self.touched_src_ranges)
Tao Baod522bdc2016-04-12 15:53:16 -0700651
Tianjie Xu67c7cbb2018-08-30 00:32:07 -0700652 if self.tgt.hashtree_info:
653 out.append("compute_hash_tree {} {} {} {} {}\n".format(
654 self.tgt.hashtree_info.hashtree_range.to_string_raw(),
655 self.tgt.hashtree_info.filesystem_range.to_string_raw(),
656 self.tgt.hashtree_info.hash_algorithm,
657 self.tgt.hashtree_info.salt,
658 self.tgt.hashtree_info.root_hash))
659
Tao Baoe9b61912015-07-09 17:37:49 -0700660 # Zero out extended blocks as a workaround for bug 20881595.
661 if self.tgt.extended:
Tianjie Xu37e29302016-06-23 16:10:35 -0700662 assert (WriteSplitTransfers(out, "zero", self.tgt.extended) ==
Tianjie Xubb848c52016-06-21 15:54:09 -0700663 self.tgt.extended.size())
Tao Baob32d56e2015-09-09 11:55:01 -0700664 total += self.tgt.extended.size()
Tao Baoe9b61912015-07-09 17:37:49 -0700665
666 # We erase all the blocks on the partition that a) don't contain useful
Tao Bao66f1fa62016-05-03 10:02:01 -0700667 # data in the new image; b) will not be touched by dm-verity. Out of those
668 # blocks, we erase the ones that won't be used in this update at the
669 # beginning of an update. The rest would be erased at the end. This is to
670 # work around the eMMC issue observed on some devices, which may otherwise
671 # get starving for clean blocks and thus fail the update. (b/28347095)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700672 all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
Tao Baoe9b61912015-07-09 17:37:49 -0700673 all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
674 new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
Tao Bao66f1fa62016-05-03 10:02:01 -0700675
676 erase_first = new_dontcare.subtract(self.touched_src_ranges)
677 if erase_first:
678 out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))
679
680 erase_last = new_dontcare.subtract(erase_first)
681 if erase_last:
682 out.append("erase %s\n" % (erase_last.to_string_raw(),))
Doug Zongkere985f6f2014-09-09 12:38:47 -0700683
684 out.insert(0, "%d\n" % (self.version,)) # format version number
Tao Baob32d56e2015-09-09 11:55:01 -0700685 out.insert(1, "%d\n" % (total,))
Tao Bao8fad03e2017-03-01 14:36:26 -0800686 # v3+: the number of stash slots is unused.
687 out.insert(2, "0\n")
688 out.insert(3, str(max_stashed_blocks) + "\n")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700689
690 with open(prefix + ".transfer.list", "wb") as f:
691 for i in out:
692 f.write(i)
693
Tao Bao8fad03e2017-03-01 14:36:26 -0800694 self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize
695 OPTIONS = common.OPTIONS
696 if OPTIONS.cache_size is not None:
697 max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
Tao Bao32fcdab2018-10-12 10:30:39 -0700698 logger.info(
699 "max stashed blocks: %d (%d bytes), limit: %d bytes (%.2f%%)\n",
700 max_stashed_blocks, self._max_stashed_size, max_allowed,
701 self._max_stashed_size * 100.0 / max_allowed)
Tao Bao8fad03e2017-03-01 14:36:26 -0800702 else:
Tao Bao32fcdab2018-10-12 10:30:39 -0700703 logger.info(
704 "max stashed blocks: %d (%d bytes), limit: <unknown>\n",
705 max_stashed_blocks, self._max_stashed_size)
Doug Zongker62338182014-09-08 08:29:55 -0700706
Tao Bao82c47982015-08-17 09:45:13 -0700707 def ReviseStashSize(self):
Tao Bao32fcdab2018-10-12 10:30:39 -0700708 logger.info("Revising stash size...")
Tao Baoe27acfd2016-12-16 11:13:55 -0800709 stash_map = {}
Tao Bao82c47982015-08-17 09:45:13 -0700710
711 # Create the map between a stash and its def/use points. For example, for a
Tao Bao3c5a16d2017-02-13 11:42:50 -0800712 # given stash of (raw_id, sr), stash_map[raw_id] = (sr, def_cmd, use_cmd).
Tao Bao82c47982015-08-17 09:45:13 -0700713 for xf in self.transfers:
714 # Command xf defines (stores) all the stashes in stash_before.
Tao Bao3a2e3502016-12-28 09:14:39 -0800715 for stash_raw_id, sr in xf.stash_before:
716 stash_map[stash_raw_id] = (sr, xf)
Tao Bao82c47982015-08-17 09:45:13 -0700717
718 # Record all the stashes command xf uses.
Tao Bao3a2e3502016-12-28 09:14:39 -0800719 for stash_raw_id, _ in xf.use_stash:
720 stash_map[stash_raw_id] += (xf,)
Tao Bao82c47982015-08-17 09:45:13 -0700721
722 # Compute the maximum blocks available for stash based on /cache size and
723 # the threshold.
724 cache_size = common.OPTIONS.cache_size
725 stash_threshold = common.OPTIONS.stash_threshold
726 max_allowed = cache_size * stash_threshold / self.tgt.blocksize
727
Tao Bao3a2e3502016-12-28 09:14:39 -0800728 # See the comments for 'stashes' in WriteTransfers().
Tao Baoe27acfd2016-12-16 11:13:55 -0800729 stashes = {}
Tao Bao82c47982015-08-17 09:45:13 -0700730 stashed_blocks = 0
Tao Bao9a5caf22015-08-25 15:10:10 -0700731 new_blocks = 0
Tao Bao82c47982015-08-17 09:45:13 -0700732
733 # Now go through all the commands. Compute the required stash size on the
734 # fly. If a command requires excess stash than available, it deletes the
735 # stash by replacing the command that uses the stash with a "new" command
736 # instead.
737 for xf in self.transfers:
738 replaced_cmds = []
739
740 # xf.stash_before generates explicit stash commands.
Tao Bao3a2e3502016-12-28 09:14:39 -0800741 for stash_raw_id, sr in xf.stash_before:
Tao Baoe27acfd2016-12-16 11:13:55 -0800742 # Check the post-command stashed_blocks.
743 stashed_blocks_after = stashed_blocks
Tao Bao8fad03e2017-03-01 14:36:26 -0800744 sh = self.src.RangeSha1(sr)
745 if sh not in stashes:
Tao Baoe27acfd2016-12-16 11:13:55 -0800746 stashed_blocks_after += sr.size()
Tao Baoe27acfd2016-12-16 11:13:55 -0800747
748 if stashed_blocks_after > max_allowed:
Tao Bao82c47982015-08-17 09:45:13 -0700749 # We cannot stash this one for a later command. Find out the command
750 # that will use this stash and replace the command with "new".
Tao Bao3a2e3502016-12-28 09:14:39 -0800751 use_cmd = stash_map[stash_raw_id][2]
Tao Bao82c47982015-08-17 09:45:13 -0700752 replaced_cmds.append(use_cmd)
Tao Bao32fcdab2018-10-12 10:30:39 -0700753 logger.info("%10d %9s %s", sr.size(), "explicit", use_cmd)
Tao Bao82c47982015-08-17 09:45:13 -0700754 else:
Tao Bao3c5a16d2017-02-13 11:42:50 -0800755 # Update the stashes map.
Tao Bao8fad03e2017-03-01 14:36:26 -0800756 if sh in stashes:
757 stashes[sh] += 1
Tao Bao3c5a16d2017-02-13 11:42:50 -0800758 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800759 stashes[sh] = 1
Tao Baoe27acfd2016-12-16 11:13:55 -0800760 stashed_blocks = stashed_blocks_after
Tao Bao82c47982015-08-17 09:45:13 -0700761
762 # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
763 # ComputePatches(), they both have the style of "diff".
Tao Bao8fad03e2017-03-01 14:36:26 -0800764 if xf.style == "diff":
Tao Bao82c47982015-08-17 09:45:13 -0700765 assert xf.tgt_ranges and xf.src_ranges
766 if xf.src_ranges.overlaps(xf.tgt_ranges):
767 if stashed_blocks + xf.src_ranges.size() > max_allowed:
768 replaced_cmds.append(xf)
Tao Bao32fcdab2018-10-12 10:30:39 -0700769 logger.info("%10d %9s %s", xf.src_ranges.size(), "implicit", xf)
Tao Bao82c47982015-08-17 09:45:13 -0700770
771 # Replace the commands in replaced_cmds with "new"s.
772 for cmd in replaced_cmds:
773 # It no longer uses any commands in "use_stash". Remove the def points
774 # for all those stashes.
Tao Bao3a2e3502016-12-28 09:14:39 -0800775 for stash_raw_id, sr in cmd.use_stash:
776 def_cmd = stash_map[stash_raw_id][1]
777 assert (stash_raw_id, sr) in def_cmd.stash_before
778 def_cmd.stash_before.remove((stash_raw_id, sr))
Tao Bao82c47982015-08-17 09:45:13 -0700779
Tianjie Xuebe39a02016-01-14 14:12:26 -0800780 # Add up blocks that violates space limit and print total number to
781 # screen later.
782 new_blocks += cmd.tgt_ranges.size()
Tao Bao82c47982015-08-17 09:45:13 -0700783 cmd.ConvertToNew()
784
Tao Bao3a2e3502016-12-28 09:14:39 -0800785 # xf.use_stash may generate free commands.
Tao Bao8fad03e2017-03-01 14:36:26 -0800786 for _, sr in xf.use_stash:
787 sh = self.src.RangeSha1(sr)
788 assert sh in stashes
789 stashes[sh] -= 1
790 if stashes[sh] == 0:
Tao Baoe27acfd2016-12-16 11:13:55 -0800791 stashed_blocks -= sr.size()
Tao Bao8fad03e2017-03-01 14:36:26 -0800792 stashes.pop(sh)
Tao Baoe27acfd2016-12-16 11:13:55 -0800793
Tianjie Xuebe39a02016-01-14 14:12:26 -0800794 num_of_bytes = new_blocks * self.tgt.blocksize
Tao Bao32fcdab2018-10-12 10:30:39 -0700795 logger.info(
796 " Total %d blocks (%d bytes) are packed as new blocks due to "
797 "insufficient cache size.", new_blocks, num_of_bytes)
Tao Bao304ee272016-12-19 11:01:38 -0800798 return new_blocks
Tao Bao9a5caf22015-08-25 15:10:10 -0700799
Doug Zongkerfc44a512014-08-26 13:10:25 -0700800 def ComputePatches(self, prefix):
Tao Bao32fcdab2018-10-12 10:30:39 -0700801 logger.info("Reticulating splines...")
Tao Bao183e56e2017-03-05 17:05:09 -0800802 diff_queue = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700803 patch_num = 0
804 with open(prefix + ".new.dat", "wb") as new_f:
Tao Bao183e56e2017-03-05 17:05:09 -0800805 for index, xf in enumerate(self.transfers):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700806 if xf.style == "zero":
Tao Bao08c85832016-09-19 22:26:30 -0700807 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao32fcdab2018-10-12 10:30:39 -0700808 logger.info(
809 "%10d %10d (%6.2f%%) %7s %s %s", tgt_size, tgt_size, 100.0,
810 xf.style, xf.tgt_name, str(xf.tgt_ranges))
Tao Bao08c85832016-09-19 22:26:30 -0700811
Doug Zongkerfc44a512014-08-26 13:10:25 -0700812 elif xf.style == "new":
Tao Bao183e56e2017-03-05 17:05:09 -0800813 self.tgt.WriteRangeDataToFd(xf.tgt_ranges, new_f)
Tao Bao08c85832016-09-19 22:26:30 -0700814 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao32fcdab2018-10-12 10:30:39 -0700815 logger.info(
816 "%10d %10d (%6.2f%%) %7s %s %s", tgt_size, tgt_size, 100.0,
817 xf.style, xf.tgt_name, str(xf.tgt_ranges))
Tao Bao08c85832016-09-19 22:26:30 -0700818
Doug Zongkerfc44a512014-08-26 13:10:25 -0700819 elif xf.style == "diff":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700820 # We can't compare src and tgt directly because they may have
821 # the same content but be broken up into blocks differently, eg:
822 #
823 # ["he", "llo"] vs ["h", "ello"]
824 #
825 # We want those to compare equal, ideally without having to
826 # actually concatenate the strings (these may be tens of
827 # megabytes).
Tao Bao183e56e2017-03-05 17:05:09 -0800828 if xf.src_sha1 == xf.tgt_sha1:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700829 # These are identical; we don't need to generate a patch,
830 # just issue copy commands on the device.
831 xf.style = "move"
Tianjie Xu25366072017-09-08 17:19:02 -0700832 xf.patch = None
Tao Bao183e56e2017-03-05 17:05:09 -0800833 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao08c85832016-09-19 22:26:30 -0700834 if xf.src_ranges != xf.tgt_ranges:
Tao Bao32fcdab2018-10-12 10:30:39 -0700835 logger.info(
836 "%10d %10d (%6.2f%%) %7s %s %s (from %s)", tgt_size, tgt_size,
837 100.0, xf.style,
Tao Bao08c85832016-09-19 22:26:30 -0700838 xf.tgt_name if xf.tgt_name == xf.src_name else (
839 xf.tgt_name + " (from " + xf.src_name + ")"),
Tao Bao32fcdab2018-10-12 10:30:39 -0700840 str(xf.tgt_ranges), str(xf.src_ranges))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700841 else:
Tianjie Xu25366072017-09-08 17:19:02 -0700842 if xf.patch:
Tao Bao5bab0dd2018-07-10 17:44:52 -0700843 # We have already generated the patch with imgdiff, while
844 # splitting large APKs (i.e. in FindTransfers()).
Tianjie Xu25366072017-09-08 17:19:02 -0700845 assert not self.disable_imgdiff
846 imgdiff = True
Tianjie Xu25366072017-09-08 17:19:02 -0700847 else:
Tao Baocb73aed2018-01-31 17:32:40 -0800848 imgdiff = self.CanUseImgdiff(
849 xf.tgt_name, xf.tgt_ranges, xf.src_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700850 xf.style = "imgdiff" if imgdiff else "bsdiff"
Tao Bao183e56e2017-03-05 17:05:09 -0800851 diff_queue.append((index, imgdiff, patch_num))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700852 patch_num += 1
853
854 else:
855 assert False, "unknown style " + xf.style
856
Tao Bao183e56e2017-03-05 17:05:09 -0800857 if diff_queue:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700858 if self.threads > 1:
Tao Bao32fcdab2018-10-12 10:30:39 -0700859 logger.info("Computing patches (using %d threads)...", self.threads)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700860 else:
Tao Bao32fcdab2018-10-12 10:30:39 -0700861 logger.info("Computing patches...")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700862
Tao Bao183e56e2017-03-05 17:05:09 -0800863 diff_total = len(diff_queue)
864 patches = [None] * diff_total
Tianjie Xub59c17f2016-10-28 17:55:53 -0700865 error_messages = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700866
Tao Bao183e56e2017-03-05 17:05:09 -0800867 # Using multiprocessing doesn't give additional benefits, due to the
868 # pattern of the code. The diffing work is done by subprocess.call, which
869 # already runs in a separate process (not affected much by the GIL -
870 # Global Interpreter Lock). Using multiprocess also requires either a)
871 # writing the diff input files in the main process before forking, or b)
872 # reopening the image file (SparseImage) in the worker processes. Doing
873 # neither of them further improves the performance.
Doug Zongkerfc44a512014-08-26 13:10:25 -0700874 lock = threading.Lock()
875 def diff_worker():
876 while True:
877 with lock:
Tao Bao183e56e2017-03-05 17:05:09 -0800878 if not diff_queue:
Dan Albert8b72aef2015-03-23 19:13:21 -0700879 return
Tao Bao183e56e2017-03-05 17:05:09 -0800880 xf_index, imgdiff, patch_index = diff_queue.pop()
Tao Bao97395142018-02-12 12:08:05 -0800881 xf = self.transfers[xf_index]
Tao Bao183e56e2017-03-05 17:05:09 -0800882
Tianjie Xu25366072017-09-08 17:19:02 -0700883 patch = xf.patch
884 if not patch:
885 src_ranges = xf.src_ranges
886 tgt_ranges = xf.tgt_ranges
Tao Bao183e56e2017-03-05 17:05:09 -0800887
Tianjie Xudf1166e2018-01-27 17:35:41 -0800888 src_file = common.MakeTempFile(prefix="src-")
889 with open(src_file, "wb") as fd:
890 self.src.WriteRangeDataToFd(src_ranges, fd)
Tianjie Xu25366072017-09-08 17:19:02 -0700891
Tianjie Xudf1166e2018-01-27 17:35:41 -0800892 tgt_file = common.MakeTempFile(prefix="tgt-")
893 with open(tgt_file, "wb") as fd:
894 self.tgt.WriteRangeDataToFd(tgt_ranges, fd)
Tianjie Xu25366072017-09-08 17:19:02 -0700895
896 message = []
897 try:
898 patch = compute_patch(src_file, tgt_file, imgdiff)
899 except ValueError as e:
900 message.append(
901 "Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (
Tao Bao508b0872018-02-09 13:44:43 -0800902 "imgdiff" if imgdiff else "bsdiff",
903 xf.tgt_name if xf.tgt_name == xf.src_name else
Tianjie Xu25366072017-09-08 17:19:02 -0700904 xf.tgt_name + " (from " + xf.src_name + ")",
Tao Bao508b0872018-02-09 13:44:43 -0800905 xf.tgt_ranges, xf.src_ranges, e.message))
Tianjie Xu25366072017-09-08 17:19:02 -0700906 if message:
907 with lock:
908 error_messages.extend(message)
Tao Bao183e56e2017-03-05 17:05:09 -0800909
910 with lock:
911 patches[patch_index] = (xf_index, patch)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700912
913 threads = [threading.Thread(target=diff_worker)
Dan Albert8b72aef2015-03-23 19:13:21 -0700914 for _ in range(self.threads)]
Doug Zongkerfc44a512014-08-26 13:10:25 -0700915 for th in threads:
916 th.start()
917 while threads:
918 threads.pop().join()
Tao Bao183e56e2017-03-05 17:05:09 -0800919
Tianjie Xub59c17f2016-10-28 17:55:53 -0700920 if error_messages:
Tao Bao32fcdab2018-10-12 10:30:39 -0700921 logger.error('ERROR:')
922 logger.error('\n'.join(error_messages))
923 logger.error('\n\n\n')
Tianjie Xub59c17f2016-10-28 17:55:53 -0700924 sys.exit(1)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700925 else:
926 patches = []
927
Tao Bao183e56e2017-03-05 17:05:09 -0800928 offset = 0
929 with open(prefix + ".patch.dat", "wb") as patch_fd:
930 for index, patch in patches:
931 xf = self.transfers[index]
Doug Zongkerfc44a512014-08-26 13:10:25 -0700932 xf.patch_len = len(patch)
Tao Bao183e56e2017-03-05 17:05:09 -0800933 xf.patch_start = offset
934 offset += xf.patch_len
935 patch_fd.write(patch)
936
Tao Bao32fcdab2018-10-12 10:30:39 -0700937 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
938 logger.info(
939 "%10d %10d (%6.2f%%) %7s %s %s %s", xf.patch_len, tgt_size,
940 xf.patch_len * 100.0 / tgt_size, xf.style,
941 xf.tgt_name if xf.tgt_name == xf.src_name else (
942 xf.tgt_name + " (from " + xf.src_name + ")"),
943 xf.tgt_ranges, xf.src_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700944
Tianjie Xu8a7ed9f2018-01-23 14:06:11 -0800945 def AssertSha1Good(self):
946 """Check the SHA-1 of the src & tgt blocks in the transfer list.
947
948 Double check the SHA-1 value to avoid the issue in b/71908713, where
949 SparseImage.RangeSha1() messed up with the hash calculation in multi-thread
950 environment. That specific problem has been fixed by protecting the
951 underlying generator function 'SparseImage._GetRangeData()' with lock.
952 """
953 for xf in self.transfers:
954 tgt_sha1 = self.tgt.RangeSha1(xf.tgt_ranges)
955 assert xf.tgt_sha1 == tgt_sha1
956 if xf.style == "diff":
957 src_sha1 = self.src.RangeSha1(xf.src_ranges)
958 assert xf.src_sha1 == src_sha1
959
Doug Zongkerfc44a512014-08-26 13:10:25 -0700960 def AssertSequenceGood(self):
961 # Simulate the sequences of transfers we will output, and check that:
962 # - we never read a block after writing it, and
963 # - we write every block we care about exactly once.
964
965 # Start with no blocks having been touched yet.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800966 touched = array.array("B", "\0" * self.tgt.total_blocks)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700967
968 # Imagine processing the transfers in order.
969 for xf in self.transfers:
970 # Check that the input blocks for this transfer haven't yet been touched.
Doug Zongker62338182014-09-08 08:29:55 -0700971
972 x = xf.src_ranges
Tao Bao8fad03e2017-03-01 14:36:26 -0800973 for _, sr in xf.use_stash:
974 x = x.subtract(sr)
Doug Zongker62338182014-09-08 08:29:55 -0700975
Doug Zongker6ab2a502016-02-09 08:28:09 -0800976 for s, e in x:
Tao Baoff75c232016-03-04 15:23:34 -0800977 # Source image could be larger. Don't check the blocks that are in the
978 # source image only. Since they are not in 'touched', and won't ever
979 # be touched.
980 for i in range(s, min(e, self.tgt.total_blocks)):
Doug Zongker6ab2a502016-02-09 08:28:09 -0800981 assert touched[i] == 0
982
983 # Check that the output blocks for this transfer haven't yet
984 # been touched, and touch all the blocks written by this
985 # transfer.
986 for s, e in xf.tgt_ranges:
987 for i in range(s, e):
988 assert touched[i] == 0
989 touched[i] = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700990
Tianjie Xu67c7cbb2018-08-30 00:32:07 -0700991 if self.tgt.hashtree_info:
992 for s, e in self.tgt.hashtree_info.hashtree_range:
993 for i in range(s, e):
994 assert touched[i] == 0
995 touched[i] = 1
996
Doug Zongkerfc44a512014-08-26 13:10:25 -0700997 # Check that we've written every target block.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800998 for s, e in self.tgt.care_map:
999 for i in range(s, e):
1000 assert touched[i] == 1
Doug Zongkerfc44a512014-08-26 13:10:25 -07001001
Doug Zongker62338182014-09-08 08:29:55 -07001002 def ImproveVertexSequence(self):
Tao Bao32fcdab2018-10-12 10:30:39 -07001003 logger.info("Improving vertex order...")
Doug Zongker62338182014-09-08 08:29:55 -07001004
1005 # At this point our digraph is acyclic; we reversed any edges that
1006 # were backwards in the heuristically-generated sequence. The
1007 # previously-generated order is still acceptable, but we hope to
1008 # find a better order that needs less memory for stashed data.
1009 # Now we do a topological sort to generate a new vertex order,
1010 # using a greedy algorithm to choose which vertex goes next
1011 # whenever we have a choice.
1012
1013 # Make a copy of the edge set; this copy will get destroyed by the
1014 # algorithm.
1015 for xf in self.transfers:
1016 xf.incoming = xf.goes_after.copy()
1017 xf.outgoing = xf.goes_before.copy()
1018
1019 L = [] # the new vertex order
1020
1021 # S is the set of sources in the remaining graph; we always choose
1022 # the one that leaves the least amount of stashed data after it's
1023 # executed.
1024 S = [(u.NetStashChange(), u.order, u) for u in self.transfers
1025 if not u.incoming]
1026 heapq.heapify(S)
1027
1028 while S:
1029 _, _, xf = heapq.heappop(S)
1030 L.append(xf)
1031 for u in xf.outgoing:
1032 del u.incoming[xf]
1033 if not u.incoming:
1034 heapq.heappush(S, (u.NetStashChange(), u.order, u))
1035
1036 # if this fails then our graph had a cycle.
1037 assert len(L) == len(self.transfers)
1038
1039 self.transfers = L
1040 for i, xf in enumerate(L):
1041 xf.order = i
1042
Doug Zongker62338182014-09-08 08:29:55 -07001043 def ReverseBackwardEdges(self):
Tao Bao3a2e3502016-12-28 09:14:39 -08001044 """Reverse unsatisfying edges and compute pairs of stashed blocks.
1045
1046 For each transfer, make sure it properly stashes the blocks it touches and
1047 will be used by later transfers. It uses pairs of (stash_raw_id, range) to
1048 record the blocks to be stashed. 'stash_raw_id' is an id that uniquely
1049 identifies each pair. Note that for the same range (e.g. RangeSet("1-5")),
1050 it is possible to have multiple pairs with different 'stash_raw_id's. Each
1051 'stash_raw_id' will be consumed by one transfer. In BBOTA v3+, identical
1052 blocks will be written to the same stash slot in WriteTransfers().
1053 """
1054
Tao Bao32fcdab2018-10-12 10:30:39 -07001055 logger.info("Reversing backward edges...")
Doug Zongker62338182014-09-08 08:29:55 -07001056 in_order = 0
1057 out_of_order = 0
Tao Bao3a2e3502016-12-28 09:14:39 -08001058 stash_raw_id = 0
Doug Zongker62338182014-09-08 08:29:55 -07001059 stash_size = 0
1060
1061 for xf in self.transfers:
Doug Zongker62338182014-09-08 08:29:55 -07001062 for u in xf.goes_before.copy():
1063 # xf should go before u
1064 if xf.order < u.order:
1065 # it does, hurray!
1066 in_order += 1
1067 else:
1068 # it doesn't, boo. modify u to stash the blocks that it
1069 # writes that xf wants to read, and then require u to go
1070 # before xf.
1071 out_of_order += 1
1072
1073 overlap = xf.src_ranges.intersect(u.tgt_ranges)
1074 assert overlap
1075
Tao Bao3a2e3502016-12-28 09:14:39 -08001076 u.stash_before.append((stash_raw_id, overlap))
1077 xf.use_stash.append((stash_raw_id, overlap))
1078 stash_raw_id += 1
Doug Zongker62338182014-09-08 08:29:55 -07001079 stash_size += overlap.size()
1080
1081 # reverse the edge direction; now xf must go after u
1082 del xf.goes_before[u]
1083 del u.goes_after[xf]
1084 xf.goes_after[u] = None # value doesn't matter
1085 u.goes_before[xf] = None
1086
Tao Bao32fcdab2018-10-12 10:30:39 -07001087 logger.info(
1088 " %d/%d dependencies (%.2f%%) were violated; %d source blocks "
1089 "stashed.", out_of_order, in_order + out_of_order,
1090 (out_of_order * 100.0 / (in_order + out_of_order)) if (
1091 in_order + out_of_order) else 0.0,
1092 stash_size)
Doug Zongker62338182014-09-08 08:29:55 -07001093
Doug Zongkerfc44a512014-08-26 13:10:25 -07001094 def FindVertexSequence(self):
Tao Bao32fcdab2018-10-12 10:30:39 -07001095 logger.info("Finding vertex sequence...")
Doug Zongkerfc44a512014-08-26 13:10:25 -07001096
1097 # This is based on "A Fast & Effective Heuristic for the Feedback
1098 # Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
1099 # it as starting with the digraph G and moving all the vertices to
1100 # be on a horizontal line in some order, trying to minimize the
1101 # number of edges that end up pointing to the left. Left-pointing
1102 # edges will get removed to turn the digraph into a DAG. In this
1103 # case each edge has a weight which is the number of source blocks
1104 # we'll lose if that edge is removed; we try to minimize the total
1105 # weight rather than just the number of edges.
1106
1107 # Make a copy of the edge set; this copy will get destroyed by the
1108 # algorithm.
1109 for xf in self.transfers:
1110 xf.incoming = xf.goes_after.copy()
1111 xf.outgoing = xf.goes_before.copy()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001112 xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())
Doug Zongkerfc44a512014-08-26 13:10:25 -07001113
1114 # We use an OrderedDict instead of just a set so that the output
1115 # is repeatable; otherwise it would depend on the hash values of
1116 # the transfer objects.
1117 G = OrderedDict()
1118 for xf in self.transfers:
1119 G[xf] = None
1120 s1 = deque() # the left side of the sequence, built from left to right
1121 s2 = deque() # the right side of the sequence, built from right to left
1122
Doug Zongker6ab2a502016-02-09 08:28:09 -08001123 heap = []
1124 for xf in self.transfers:
1125 xf.heap_item = HeapItem(xf)
1126 heap.append(xf.heap_item)
1127 heapq.heapify(heap)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001128
Tao Bao33482282016-10-24 16:49:08 -07001129 # Use OrderedDict() instead of set() to preserve the insertion order. Need
1130 # to use 'sinks[key] = None' to add key into the set. sinks will look like
1131 # { key1: None, key2: None, ... }.
1132 sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing)
1133 sources = OrderedDict.fromkeys(u for u in G if not u.incoming)
Doug Zongker6ab2a502016-02-09 08:28:09 -08001134
1135 def adjust_score(iu, delta):
1136 iu.score += delta
1137 iu.heap_item.clear()
1138 iu.heap_item = HeapItem(iu)
1139 heapq.heappush(heap, iu.heap_item)
1140
1141 while G:
Doug Zongkerfc44a512014-08-26 13:10:25 -07001142 # Put all sinks at the end of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001143 while sinks:
Tao Bao33482282016-10-24 16:49:08 -07001144 new_sinks = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001145 for u in sinks:
Tao Bao508b0872018-02-09 13:44:43 -08001146 if u not in G:
1147 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001148 s2.appendleft(u)
1149 del G[u]
1150 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001151 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001152 if not iu.outgoing:
1153 new_sinks[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001154 sinks = new_sinks
Doug Zongkerfc44a512014-08-26 13:10:25 -07001155
1156 # Put all the sources at the beginning of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001157 while sources:
Tao Bao33482282016-10-24 16:49:08 -07001158 new_sources = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001159 for u in sources:
Tao Bao508b0872018-02-09 13:44:43 -08001160 if u not in G:
1161 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001162 s1.append(u)
1163 del G[u]
1164 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001165 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001166 if not iu.incoming:
1167 new_sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001168 sources = new_sources
Doug Zongkerfc44a512014-08-26 13:10:25 -07001169
Tao Bao508b0872018-02-09 13:44:43 -08001170 if not G:
1171 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001172
1173 # Find the "best" vertex to put next. "Best" is the one that
1174 # maximizes the net difference in source blocks saved we get by
1175 # pretending it's a source rather than a sink.
1176
Doug Zongker6ab2a502016-02-09 08:28:09 -08001177 while True:
1178 u = heapq.heappop(heap)
1179 if u and u.item in G:
1180 u = u.item
1181 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001182
Doug Zongkerfc44a512014-08-26 13:10:25 -07001183 s1.append(u)
1184 del G[u]
1185 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001186 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001187 if not iu.incoming:
1188 sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001189
Doug Zongkerfc44a512014-08-26 13:10:25 -07001190 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001191 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001192 if not iu.outgoing:
1193 sinks[iu] = None
Doug Zongkerfc44a512014-08-26 13:10:25 -07001194
1195 # Now record the sequence in the 'order' field of each transfer,
1196 # and by rearranging self.transfers to be in the chosen sequence.
1197
1198 new_transfers = []
1199 for x in itertools.chain(s1, s2):
1200 x.order = len(new_transfers)
1201 new_transfers.append(x)
1202 del x.incoming
1203 del x.outgoing
1204
1205 self.transfers = new_transfers
1206
1207 def GenerateDigraph(self):
Tao Bao32fcdab2018-10-12 10:30:39 -07001208 logger.info("Generating digraph...")
Doug Zongker6ab2a502016-02-09 08:28:09 -08001209
1210 # Each item of source_ranges will be:
1211 # - None, if that block is not used as a source,
Tao Bao33482282016-10-24 16:49:08 -07001212 # - an ordered set of transfers.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001213 source_ranges = []
1214 for b in self.transfers:
1215 for s, e in b.src_ranges:
1216 if e > len(source_ranges):
1217 source_ranges.extend([None] * (e-len(source_ranges)))
1218 for i in range(s, e):
1219 if source_ranges[i] is None:
Tao Bao33482282016-10-24 16:49:08 -07001220 source_ranges[i] = OrderedDict.fromkeys([b])
Doug Zongker6ab2a502016-02-09 08:28:09 -08001221 else:
Tao Bao33482282016-10-24 16:49:08 -07001222 source_ranges[i][b] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001223
Doug Zongkerfc44a512014-08-26 13:10:25 -07001224 for a in self.transfers:
Tao Bao33482282016-10-24 16:49:08 -07001225 intersections = OrderedDict()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001226 for s, e in a.tgt_ranges:
1227 for i in range(s, e):
Tao Bao508b0872018-02-09 13:44:43 -08001228 if i >= len(source_ranges):
1229 break
Tao Bao33482282016-10-24 16:49:08 -07001230 # Add all the Transfers in source_ranges[i] to the (ordered) set.
1231 if source_ranges[i] is not None:
1232 for j in source_ranges[i]:
1233 intersections[j] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001234
1235 for b in intersections:
Tao Bao508b0872018-02-09 13:44:43 -08001236 if a is b:
1237 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001238
1239 # If the blocks written by A are read by B, then B needs to go before A.
1240 i = a.tgt_ranges.intersect(b.src_ranges)
1241 if i:
Doug Zongkerab7ca1d2014-08-26 10:40:28 -07001242 if b.src_name == "__ZERO":
1243 # the cost of removing source blocks for the __ZERO domain
1244 # is (nearly) zero.
1245 size = 0
1246 else:
1247 size = i.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001248 b.goes_before[a] = size
1249 a.goes_after[b] = size
1250
1251 def FindTransfers(self):
Tao Bao9a5caf22015-08-25 15:10:10 -07001252 """Parse the file_map to generate all the transfers."""
1253
Tianjie Xu41390d42017-11-22 11:35:18 -08001254 def AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
1255 src_ranges, style, by_id):
1256 """Add one or multiple Transfer()s by splitting large files.
1257
1258 For BBOTA v3, we need to stash source blocks for resumable feature.
1259 However, with the growth of file size and the shrink of the cache
1260 partition source blocks are too large to be stashed. If a file occupies
1261 too many blocks, we split it into smaller pieces by getting multiple
1262 Transfer()s.
1263
1264 The downside is that after splitting, we may increase the package size
1265 since the split pieces don't align well. According to our experiments,
1266 1/8 of the cache size as the per-piece limit appears to be optimal.
1267 Compared to the fixed 1024-block limit, it reduces the overall package
1268 size by 30% for volantis, and 20% for angler and bullhead."""
1269
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001270 pieces = 0
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001271 while (tgt_ranges.size() > max_blocks_per_transfer and
1272 src_ranges.size() > max_blocks_per_transfer):
Tao Bao9a5caf22015-08-25 15:10:10 -07001273 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1274 src_split_name = "%s-%d" % (src_name, pieces)
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001275 tgt_first = tgt_ranges.first(max_blocks_per_transfer)
1276 src_first = src_ranges.first(max_blocks_per_transfer)
1277
Tao Bao183e56e2017-03-05 17:05:09 -08001278 Transfer(tgt_split_name, src_split_name, tgt_first, src_first,
1279 self.tgt.RangeSha1(tgt_first), self.src.RangeSha1(src_first),
1280 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001281
1282 tgt_ranges = tgt_ranges.subtract(tgt_first)
1283 src_ranges = src_ranges.subtract(src_first)
1284 pieces += 1
1285
1286 # Handle remaining blocks.
1287 if tgt_ranges.size() or src_ranges.size():
1288 # Must be both non-empty.
1289 assert tgt_ranges.size() and src_ranges.size()
1290 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1291 src_split_name = "%s-%d" % (src_name, pieces)
Tao Bao183e56e2017-03-05 17:05:09 -08001292 Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges,
1293 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1294 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001295
Tianjie Xu41390d42017-11-22 11:35:18 -08001296 def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges, style,
1297 by_id):
1298 """Find all the zip files and split the others with a fixed chunk size.
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001299
Tianjie Xu41390d42017-11-22 11:35:18 -08001300 This function will construct a list of zip archives, which will later be
1301 split by imgdiff to reduce the final patch size. For the other files,
1302 we will plainly split them based on a fixed chunk size with the potential
1303 patch size penalty.
1304 """
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001305
1306 assert style == "diff"
1307
1308 # Change nothing for small files.
1309 if (tgt_ranges.size() <= max_blocks_per_transfer and
1310 src_ranges.size() <= max_blocks_per_transfer):
1311 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1312 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1313 style, by_id)
1314 return
1315
Tao Baocb73aed2018-01-31 17:32:40 -08001316 # Split large APKs with imgdiff, if possible. We're intentionally checking
1317 # file types one more time (CanUseImgdiff() checks that as well), before
1318 # calling the costly RangeSha1()s.
1319 if (self.FileTypeSupportedByImgdiff(tgt_name) and
1320 self.tgt.RangeSha1(tgt_ranges) != self.src.RangeSha1(src_ranges)):
Tao Bao294651d2018-02-08 23:21:52 -08001321 if self.CanUseImgdiff(tgt_name, tgt_ranges, src_ranges, True):
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001322 large_apks.append((tgt_name, src_name, tgt_ranges, src_ranges))
1323 return
1324
Tianjie Xu41390d42017-11-22 11:35:18 -08001325 AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
1326 src_ranges, style, by_id)
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001327
Tao Bao08c85832016-09-19 22:26:30 -07001328 def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
1329 split=False):
1330 """Wrapper function for adding a Transfer()."""
1331
1332 # We specialize diff transfers only (which covers bsdiff/imgdiff/move);
1333 # otherwise add the Transfer() as is.
1334 if style != "diff" or not split:
Tao Bao183e56e2017-03-05 17:05:09 -08001335 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1336 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1337 style, by_id)
Tao Bao08c85832016-09-19 22:26:30 -07001338 return
1339
1340 # Handle .odex files specially to analyze the block-wise difference. If
1341 # most of the blocks are identical with only few changes (e.g. header),
1342 # we will patch the changed blocks only. This avoids stashing unchanged
1343 # blocks while patching. We limit the analysis to files without size
1344 # changes only. This is to avoid sacrificing the OTA generation cost too
1345 # much.
1346 if (tgt_name.split(".")[-1].lower() == 'odex' and
1347 tgt_ranges.size() == src_ranges.size()):
1348
1349 # 0.5 threshold can be further tuned. The tradeoff is: if only very
1350 # few blocks remain identical, we lose the opportunity to use imgdiff
1351 # that may have better compression ratio than bsdiff.
1352 crop_threshold = 0.5
1353
1354 tgt_skipped = RangeSet()
1355 src_skipped = RangeSet()
1356 tgt_size = tgt_ranges.size()
1357 tgt_changed = 0
1358 for src_block, tgt_block in zip(src_ranges.next_item(),
1359 tgt_ranges.next_item()):
1360 src_rs = RangeSet(str(src_block))
1361 tgt_rs = RangeSet(str(tgt_block))
1362 if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):
1363 tgt_skipped = tgt_skipped.union(tgt_rs)
1364 src_skipped = src_skipped.union(src_rs)
1365 else:
1366 tgt_changed += tgt_rs.size()
1367
1368 # Terminate early if no clear sign of benefits.
1369 if tgt_changed > tgt_size * crop_threshold:
1370 break
1371
1372 if tgt_changed < tgt_size * crop_threshold:
1373 assert tgt_changed + tgt_skipped.size() == tgt_size
Tao Bao32fcdab2018-10-12 10:30:39 -07001374 logger.info(
1375 '%10d %10d (%6.2f%%) %s', tgt_skipped.size(), tgt_size,
1376 tgt_skipped.size() * 100.0 / tgt_size, tgt_name)
Tianjie Xu41390d42017-11-22 11:35:18 -08001377 AddSplitTransfers(
Tao Bao08c85832016-09-19 22:26:30 -07001378 "%s-skipped" % (tgt_name,),
1379 "%s-skipped" % (src_name,),
1380 tgt_skipped, src_skipped, style, by_id)
1381
1382 # Intentionally change the file extension to avoid being imgdiff'd as
1383 # the files are no longer in their original format.
1384 tgt_name = "%s-cropped" % (tgt_name,)
1385 src_name = "%s-cropped" % (src_name,)
1386 tgt_ranges = tgt_ranges.subtract(tgt_skipped)
1387 src_ranges = src_ranges.subtract(src_skipped)
1388
1389 # Possibly having no changed blocks.
1390 if not tgt_ranges:
1391 return
1392
1393 # Add the transfer(s).
Tianjie Xu41390d42017-11-22 11:35:18 -08001394 AddSplitTransfers(
Tao Bao08c85832016-09-19 22:26:30 -07001395 tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1396
Tianjie Xu25366072017-09-08 17:19:02 -07001397 def ParseAndValidateSplitInfo(patch_size, tgt_ranges, src_ranges,
1398 split_info):
1399 """Parse the split_info and return a list of info tuples.
1400
1401 Args:
1402 patch_size: total size of the patch file.
1403 tgt_ranges: Ranges of the target file within the original image.
1404 src_ranges: Ranges of the source file within the original image.
1405 split_info format:
1406 imgdiff version#
1407 count of pieces
1408 <patch_size_1> <tgt_size_1> <src_ranges_1>
1409 ...
1410 <patch_size_n> <tgt_size_n> <src_ranges_n>
1411
1412 Returns:
1413 [patch_start, patch_len, split_tgt_ranges, split_src_ranges]
1414 """
1415
1416 version = int(split_info[0])
1417 assert version == 2
1418 count = int(split_info[1])
1419 assert len(split_info) - 2 == count
1420
1421 split_info_list = []
1422 patch_start = 0
1423 tgt_remain = copy.deepcopy(tgt_ranges)
1424 # each line has the format <patch_size>, <tgt_size>, <src_ranges>
1425 for line in split_info[2:]:
1426 info = line.split()
1427 assert len(info) == 3
1428 patch_length = int(info[0])
1429
1430 split_tgt_size = int(info[1])
1431 assert split_tgt_size % 4096 == 0
1432 assert split_tgt_size / 4096 <= tgt_remain.size()
1433 split_tgt_ranges = tgt_remain.first(split_tgt_size / 4096)
1434 tgt_remain = tgt_remain.subtract(split_tgt_ranges)
1435
1436 # Find the split_src_ranges within the image file from its relative
1437 # position in file.
1438 split_src_indices = RangeSet.parse_raw(info[2])
1439 split_src_ranges = RangeSet()
1440 for r in split_src_indices:
1441 curr_range = src_ranges.first(r[1]).subtract(src_ranges.first(r[0]))
1442 assert not split_src_ranges.overlaps(curr_range)
1443 split_src_ranges = split_src_ranges.union(curr_range)
1444
1445 split_info_list.append((patch_start, patch_length,
1446 split_tgt_ranges, split_src_ranges))
1447 patch_start += patch_length
1448
1449 # Check that the sizes of all the split pieces add up to the final file
1450 # size for patch and target.
1451 assert tgt_remain.size() == 0
1452 assert patch_start == patch_size
1453 return split_info_list
1454
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001455 def SplitLargeApks():
1456 """Split the large apks files.
Tianjie Xu25366072017-09-08 17:19:02 -07001457
1458 Example: Chrome.apk will be split into
1459 src-0: Chrome.apk-0, tgt-0: Chrome.apk-0
1460 src-1: Chrome.apk-1, tgt-1: Chrome.apk-1
1461 ...
1462
1463 After the split, the target pieces are continuous and block aligned; and
1464 the source pieces are mutually exclusive. During the split, we also
1465 generate and save the image patch between src-X & tgt-X. This patch will
1466 be valid because the block ranges of src-X & tgt-X will always stay the
1467 same afterwards; but there's a chance we don't use the patch if we
1468 convert the "diff" command into "new" or "move" later.
1469 """
1470
1471 while True:
1472 with transfer_lock:
1473 if not large_apks:
1474 return
1475 tgt_name, src_name, tgt_ranges, src_ranges = large_apks.pop(0)
1476
1477 src_file = common.MakeTempFile(prefix="src-")
1478 tgt_file = common.MakeTempFile(prefix="tgt-")
Tianjie Xudf1166e2018-01-27 17:35:41 -08001479 with open(src_file, "wb") as src_fd:
1480 self.src.WriteRangeDataToFd(src_ranges, src_fd)
1481 with open(tgt_file, "wb") as tgt_fd:
1482 self.tgt.WriteRangeDataToFd(tgt_ranges, tgt_fd)
Tianjie Xu25366072017-09-08 17:19:02 -07001483
1484 patch_file = common.MakeTempFile(prefix="patch-")
1485 patch_info_file = common.MakeTempFile(prefix="split_info-")
1486 cmd = ["imgdiff", "-z",
1487 "--block-limit={}".format(max_blocks_per_transfer),
1488 "--split-info=" + patch_info_file,
1489 src_file, tgt_file, patch_file]
Tao Bao73dd4f42018-10-04 16:25:33 -07001490 proc = common.Run(cmd)
1491 imgdiff_output, _ = proc.communicate()
1492 assert proc.returncode == 0, \
Tao Bao4ccea852018-02-06 15:16:41 -08001493 "Failed to create imgdiff patch between {} and {}:\n{}".format(
1494 src_name, tgt_name, imgdiff_output)
Tianjie Xu25366072017-09-08 17:19:02 -07001495
1496 with open(patch_info_file) as patch_info:
1497 lines = patch_info.readlines()
1498
1499 patch_size_total = os.path.getsize(patch_file)
1500 split_info_list = ParseAndValidateSplitInfo(patch_size_total,
1501 tgt_ranges, src_ranges,
1502 lines)
1503 for index, (patch_start, patch_length, split_tgt_ranges,
Tao Bao508b0872018-02-09 13:44:43 -08001504 split_src_ranges) in enumerate(split_info_list):
Tianjie Xu25366072017-09-08 17:19:02 -07001505 with open(patch_file) as f:
1506 f.seek(patch_start)
1507 patch_content = f.read(patch_length)
1508
1509 split_src_name = "{}-{}".format(src_name, index)
1510 split_tgt_name = "{}-{}".format(tgt_name, index)
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001511 split_large_apks.append((split_tgt_name,
1512 split_src_name,
1513 split_tgt_ranges,
1514 split_src_ranges,
1515 patch_content))
Tianjie Xu25366072017-09-08 17:19:02 -07001516
Tao Bao32fcdab2018-10-12 10:30:39 -07001517 logger.info("Finding transfers...")
Tao Bao08c85832016-09-19 22:26:30 -07001518
Tianjie Xu25366072017-09-08 17:19:02 -07001519 large_apks = []
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001520 split_large_apks = []
Tianjie Xu25366072017-09-08 17:19:02 -07001521 cache_size = common.OPTIONS.cache_size
1522 split_threshold = 0.125
1523 max_blocks_per_transfer = int(cache_size * split_threshold /
1524 self.tgt.blocksize)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001525 empty = RangeSet()
Tianjie Xu20a86cd2018-01-12 12:21:00 -08001526 for tgt_fn, tgt_ranges in sorted(self.tgt.file_map.items()):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001527 if tgt_fn == "__ZERO":
1528 # the special "__ZERO" domain is all the blocks not contained
1529 # in any file and that are filled with zeros. We have a
1530 # special transfer style for zero blocks.
1531 src_ranges = self.src.file_map.get("__ZERO", empty)
Tao Bao9a5caf22015-08-25 15:10:10 -07001532 AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
1533 "zero", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001534 continue
1535
Tao Baoff777812015-05-12 11:42:31 -07001536 elif tgt_fn == "__COPY":
1537 # "__COPY" domain includes all the blocks not contained in any
1538 # file and that need to be copied unconditionally to the target.
Tao Bao9a5caf22015-08-25 15:10:10 -07001539 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Tao Baoff777812015-05-12 11:42:31 -07001540 continue
1541
Tianjie Xu67c7cbb2018-08-30 00:32:07 -07001542 elif tgt_fn == "__HASHTREE":
1543 continue
1544
Doug Zongkerfc44a512014-08-26 13:10:25 -07001545 elif tgt_fn in self.src.file_map:
1546 # Look for an exact pathname match in the source.
Tao Bao9a5caf22015-08-25 15:10:10 -07001547 AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001548 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001549 continue
1550
1551 b = os.path.basename(tgt_fn)
1552 if b in self.src_basenames:
1553 # Look for an exact basename match in the source.
1554 src_fn = self.src_basenames[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001555 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001556 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001557 continue
1558
1559 b = re.sub("[0-9]+", "#", b)
1560 if b in self.src_numpatterns:
1561 # Look for a 'number pattern' match (a basename match after
1562 # all runs of digits are replaced by "#"). (This is useful
1563 # for .so files that contain version numbers in the filename
1564 # that get bumped.)
1565 src_fn = self.src_numpatterns[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001566 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001567 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001568 continue
1569
Tao Bao9a5caf22015-08-25 15:10:10 -07001570 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001571
Tianjie Xu25366072017-09-08 17:19:02 -07001572 transfer_lock = threading.Lock()
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001573 threads = [threading.Thread(target=SplitLargeApks)
Tianjie Xu25366072017-09-08 17:19:02 -07001574 for _ in range(self.threads)]
1575 for th in threads:
1576 th.start()
1577 while threads:
1578 threads.pop().join()
1579
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001580 # Sort the split transfers for large apks to generate a determinate package.
1581 split_large_apks.sort()
1582 for (tgt_name, src_name, tgt_ranges, src_ranges,
1583 patch) in split_large_apks:
1584 transfer_split = Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1585 self.tgt.RangeSha1(tgt_ranges),
1586 self.src.RangeSha1(src_ranges),
1587 "diff", self.transfers)
1588 transfer_split.patch = patch
1589
Doug Zongkerfc44a512014-08-26 13:10:25 -07001590 def AbbreviateSourceNames(self):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001591 for k in self.src.file_map.keys():
1592 b = os.path.basename(k)
1593 self.src_basenames[b] = k
1594 b = re.sub("[0-9]+", "#", b)
1595 self.src_numpatterns[b] = k
1596
1597 @staticmethod
1598 def AssertPartition(total, seq):
1599 """Assert that all the RangeSets in 'seq' form a partition of the
1600 'total' RangeSet (ie, they are nonintersecting and their union
1601 equals 'total')."""
Doug Zongker6ab2a502016-02-09 08:28:09 -08001602
Doug Zongkerfc44a512014-08-26 13:10:25 -07001603 so_far = RangeSet()
1604 for i in seq:
1605 assert not so_far.overlaps(i)
1606 so_far = so_far.union(i)
1607 assert so_far == total