blob: aeb43798a435a0aab06f02c13ab556d83b73e907 [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015from __future__ import print_function
16
Doug Zongker6ab2a502016-02-09 08:28:09 -080017import array
Tianjie Xu25366072017-09-08 17:19:02 -070018import copy
Doug Zongker6ab2a502016-02-09 08:28:09 -080019import functools
Doug Zongker62338182014-09-08 08:29:55 -070020import heapq
Doug Zongkerfc44a512014-08-26 13:10:25 -070021import itertools
22import multiprocessing
23import os
Tao Bao3a2e3502016-12-28 09:14:39 -080024import os.path
Doug Zongkerfc44a512014-08-26 13:10:25 -070025import re
26import subprocess
Tao Bao183e56e2017-03-05 17:05:09 -080027import sys
Doug Zongkerfc44a512014-08-26 13:10:25 -070028import threading
Tao Bao3a2e3502016-12-28 09:14:39 -080029from collections import deque, OrderedDict
30from hashlib import sha1
Tao Bao508b0872018-02-09 13:44:43 -080031
32import common
Dan Albert8b72aef2015-03-23 19:13:21 -070033from rangelib import RangeSet
34
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070035__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
36
Dan Albert8b72aef2015-03-23 19:13:21 -070037
Tao Bao183e56e2017-03-05 17:05:09 -080038def compute_patch(srcfile, tgtfile, imgdiff=False):
Tianjie Xub59c17f2016-10-28 17:55:53 -070039 patchfile = common.MakeTempFile(prefix='patch-')
Doug Zongkerfc44a512014-08-26 13:10:25 -070040
Tianjie Xub59c17f2016-10-28 17:55:53 -070041 cmd = ['imgdiff', '-z'] if imgdiff else ['bsdiff']
42 cmd.extend([srcfile, tgtfile, patchfile])
Doug Zongkerfc44a512014-08-26 13:10:25 -070043
Tao Bao39451582017-05-04 11:10:47 -070044 # Don't dump the bsdiff/imgdiff commands, which are not useful for the case
45 # here, since they contain temp filenames only.
46 p = common.Run(cmd, verbose=False, stdout=subprocess.PIPE,
47 stderr=subprocess.STDOUT)
Tianjie Xub59c17f2016-10-28 17:55:53 -070048 output, _ = p.communicate()
Doug Zongkerfc44a512014-08-26 13:10:25 -070049
Tianjie Xub59c17f2016-10-28 17:55:53 -070050 if p.returncode != 0:
51 raise ValueError(output)
52
53 with open(patchfile, 'rb') as f:
Tao Bao183e56e2017-03-05 17:05:09 -080054 return f.read()
Doug Zongkerfc44a512014-08-26 13:10:25 -070055
Dan Albert8b72aef2015-03-23 19:13:21 -070056
57class Image(object):
Tao Bao183e56e2017-03-05 17:05:09 -080058 def RangeSha1(self, ranges):
59 raise NotImplementedError
60
Dan Albert8b72aef2015-03-23 19:13:21 -070061 def ReadRangeSet(self, ranges):
62 raise NotImplementedError
63
Tao Bao68658c02015-06-01 13:40:49 -070064 def TotalSha1(self, include_clobbered_blocks=False):
Dan Albert8b72aef2015-03-23 19:13:21 -070065 raise NotImplementedError
66
Tao Bao183e56e2017-03-05 17:05:09 -080067 def WriteRangeDataToFd(self, ranges, fd):
68 raise NotImplementedError
69
Dan Albert8b72aef2015-03-23 19:13:21 -070070
71class EmptyImage(Image):
Doug Zongkerfc44a512014-08-26 13:10:25 -070072 """A zero-length image."""
Tao Bao183e56e2017-03-05 17:05:09 -080073
74 def __init__(self):
75 self.blocksize = 4096
76 self.care_map = RangeSet()
77 self.clobbered_blocks = RangeSet()
78 self.extended = RangeSet()
79 self.total_blocks = 0
80 self.file_map = {}
81
82 def RangeSha1(self, ranges):
83 return sha1().hexdigest()
84
Doug Zongkerfc44a512014-08-26 13:10:25 -070085 def ReadRangeSet(self, ranges):
86 return ()
Tao Bao183e56e2017-03-05 17:05:09 -080087
Tao Bao68658c02015-06-01 13:40:49 -070088 def TotalSha1(self, include_clobbered_blocks=False):
89 # EmptyImage always carries empty clobbered_blocks, so
90 # include_clobbered_blocks can be ignored.
91 assert self.clobbered_blocks.size() == 0
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070092 return sha1().hexdigest()
93
Tao Bao183e56e2017-03-05 17:05:09 -080094 def WriteRangeDataToFd(self, ranges, fd):
95 raise ValueError("Can't write data from EmptyImage to file")
96
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070097
Dan Albert8b72aef2015-03-23 19:13:21 -070098class DataImage(Image):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070099 """An image wrapped around a single string of data."""
100
101 def __init__(self, data, trim=False, pad=False):
102 self.data = data
103 self.blocksize = 4096
104
105 assert not (trim and pad)
106
107 partial = len(self.data) % self.blocksize
Tao Bao7589e962015-09-05 20:35:32 -0700108 padded = False
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700109 if partial > 0:
110 if trim:
111 self.data = self.data[:-partial]
112 elif pad:
113 self.data += '\0' * (self.blocksize - partial)
Tao Bao7589e962015-09-05 20:35:32 -0700114 padded = True
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700115 else:
116 raise ValueError(("data for DataImage must be multiple of %d bytes "
117 "unless trim or pad is specified") %
118 (self.blocksize,))
119
120 assert len(self.data) % self.blocksize == 0
121
122 self.total_blocks = len(self.data) / self.blocksize
123 self.care_map = RangeSet(data=(0, self.total_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700124 # When the last block is padded, we always write the whole block even for
125 # incremental OTAs. Because otherwise the last block may get skipped if
126 # unchanged for an incremental, but would fail the post-install
127 # verification if it has non-zero contents in the padding bytes.
128 # Bug: 23828506
129 if padded:
Tao Bao42206c32015-09-08 13:39:40 -0700130 clobbered_blocks = [self.total_blocks-1, self.total_blocks]
Tao Bao7589e962015-09-05 20:35:32 -0700131 else:
Tao Bao42206c32015-09-08 13:39:40 -0700132 clobbered_blocks = []
133 self.clobbered_blocks = clobbered_blocks
Tao Baoe9b61912015-07-09 17:37:49 -0700134 self.extended = RangeSet()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700135
136 zero_blocks = []
137 nonzero_blocks = []
138 reference = '\0' * self.blocksize
139
Tao Bao7589e962015-09-05 20:35:32 -0700140 for i in range(self.total_blocks-1 if padded else self.total_blocks):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700141 d = self.data[i*self.blocksize : (i+1)*self.blocksize]
142 if d == reference:
143 zero_blocks.append(i)
144 zero_blocks.append(i+1)
145 else:
146 nonzero_blocks.append(i)
147 nonzero_blocks.append(i+1)
148
Tao Bao42206c32015-09-08 13:39:40 -0700149 assert zero_blocks or nonzero_blocks or clobbered_blocks
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700150
Tao Bao42206c32015-09-08 13:39:40 -0700151 self.file_map = dict()
152 if zero_blocks:
153 self.file_map["__ZERO"] = RangeSet(data=zero_blocks)
154 if nonzero_blocks:
155 self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)
156 if clobbered_blocks:
157 self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)
Tao Bao7589e962015-09-05 20:35:32 -0700158
Tao Bao183e56e2017-03-05 17:05:09 -0800159 def _GetRangeData(self, ranges):
160 for s, e in ranges:
161 yield self.data[s*self.blocksize:e*self.blocksize]
162
163 def RangeSha1(self, ranges):
164 h = sha1()
Tao Bao76def242017-11-21 09:25:31 -0800165 for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable
Tao Bao183e56e2017-03-05 17:05:09 -0800166 h.update(data)
167 return h.hexdigest()
168
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700169 def ReadRangeSet(self, ranges):
Tao Bao183e56e2017-03-05 17:05:09 -0800170 return [self._GetRangeData(ranges)]
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700171
Tao Bao68658c02015-06-01 13:40:49 -0700172 def TotalSha1(self, include_clobbered_blocks=False):
Tao Bao7589e962015-09-05 20:35:32 -0700173 if not include_clobbered_blocks:
Tao Bao183e56e2017-03-05 17:05:09 -0800174 return self.RangeSha1(self.care_map.subtract(self.clobbered_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700175 else:
176 return sha1(self.data).hexdigest()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700177
Tao Bao183e56e2017-03-05 17:05:09 -0800178 def WriteRangeDataToFd(self, ranges, fd):
Tao Bao76def242017-11-21 09:25:31 -0800179 for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable
Tao Bao183e56e2017-03-05 17:05:09 -0800180 fd.write(data)
181
Doug Zongkerfc44a512014-08-26 13:10:25 -0700182
183class Transfer(object):
Tao Bao183e56e2017-03-05 17:05:09 -0800184 def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, tgt_sha1,
185 src_sha1, style, by_id):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700186 self.tgt_name = tgt_name
187 self.src_name = src_name
188 self.tgt_ranges = tgt_ranges
189 self.src_ranges = src_ranges
Tao Bao183e56e2017-03-05 17:05:09 -0800190 self.tgt_sha1 = tgt_sha1
191 self.src_sha1 = src_sha1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700192 self.style = style
Tao Baob8c87172015-03-19 19:42:12 -0700193
194 # We use OrderedDict rather than dict so that the output is repeatable;
195 # otherwise it would depend on the hash values of the Transfer objects.
196 self.goes_before = OrderedDict()
197 self.goes_after = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700198
Doug Zongker62338182014-09-08 08:29:55 -0700199 self.stash_before = []
200 self.use_stash = []
201
Doug Zongkerfc44a512014-08-26 13:10:25 -0700202 self.id = len(by_id)
203 by_id.append(self)
204
Tianjie Xu25366072017-09-08 17:19:02 -0700205 self._patch = None
206
207 @property
208 def patch(self):
209 return self._patch
210
211 @patch.setter
212 def patch(self, patch):
213 if patch:
214 assert self.style == "diff"
215 self._patch = patch
216
Doug Zongker62338182014-09-08 08:29:55 -0700217 def NetStashChange(self):
218 return (sum(sr.size() for (_, sr) in self.stash_before) -
219 sum(sr.size() for (_, sr) in self.use_stash))
220
Tao Bao82c47982015-08-17 09:45:13 -0700221 def ConvertToNew(self):
222 assert self.style != "new"
223 self.use_stash = []
224 self.style = "new"
225 self.src_ranges = RangeSet()
Tianjie Xu25366072017-09-08 17:19:02 -0700226 self.patch = None
Tao Bao82c47982015-08-17 09:45:13 -0700227
Doug Zongkerfc44a512014-08-26 13:10:25 -0700228 def __str__(self):
229 return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
230 " to " + str(self.tgt_ranges) + ">")
231
232
Doug Zongker6ab2a502016-02-09 08:28:09 -0800233@functools.total_ordering
234class HeapItem(object):
235 def __init__(self, item):
236 self.item = item
Tao Bao186ec992017-12-23 11:50:52 -0800237 # Negate the score since python's heap is a min-heap and we want the
238 # maximum score.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800239 self.score = -item.score
Tao Bao186ec992017-12-23 11:50:52 -0800240
Doug Zongker6ab2a502016-02-09 08:28:09 -0800241 def clear(self):
242 self.item = None
Tao Bao186ec992017-12-23 11:50:52 -0800243
Doug Zongker6ab2a502016-02-09 08:28:09 -0800244 def __bool__(self):
Tao Bao186ec992017-12-23 11:50:52 -0800245 return self.item is not None
246
247 # Python 2 uses __nonzero__, while Python 3 uses __bool__.
248 __nonzero__ = __bool__
249
250 # The rest operations are generated by functools.total_ordering decorator.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800251 def __eq__(self, other):
252 return self.score == other.score
Tao Bao186ec992017-12-23 11:50:52 -0800253
Doug Zongker6ab2a502016-02-09 08:28:09 -0800254 def __le__(self, other):
255 return self.score <= other.score
256
257
Tao Bao294651d2018-02-08 23:21:52 -0800258class ImgdiffStats(object):
259 """A class that collects imgdiff stats.
260
261 It keeps track of the files that will be applied imgdiff while generating
262 BlockImageDiff. It also logs the ones that cannot use imgdiff, with specific
263 reasons. The stats is only meaningful when imgdiff not being disabled by the
264 caller of BlockImageDiff. In addition, only files with supported types
265 (BlockImageDiff.FileTypeSupportedByImgdiff()) are allowed to be logged.
Tao Bao294651d2018-02-08 23:21:52 -0800266 """
267
268 USED_IMGDIFF = "APK files diff'd with imgdiff"
269 USED_IMGDIFF_LARGE_APK = "Large APK files split and diff'd with imgdiff"
270
271 # Reasons for not applying imgdiff on APKs.
Tao Bao294651d2018-02-08 23:21:52 -0800272 SKIPPED_NONMONOTONIC = "Not used imgdiff due to having non-monotonic ranges"
Tao Baoe709b092018-02-07 12:40:00 -0800273 SKIPPED_SHARED_BLOCKS = "Not used imgdiff due to using shared blocks"
Tao Bao4ccea852018-02-06 15:16:41 -0800274 SKIPPED_INCOMPLETE = "Not used imgdiff due to incomplete RangeSet"
Tao Bao294651d2018-02-08 23:21:52 -0800275
276 # The list of valid reasons, which will also be the dumped order in a report.
277 REASONS = (
278 USED_IMGDIFF,
279 USED_IMGDIFF_LARGE_APK,
Tao Bao294651d2018-02-08 23:21:52 -0800280 SKIPPED_NONMONOTONIC,
Tao Baoe709b092018-02-07 12:40:00 -0800281 SKIPPED_SHARED_BLOCKS,
Tao Bao4ccea852018-02-06 15:16:41 -0800282 SKIPPED_INCOMPLETE,
Tao Bao294651d2018-02-08 23:21:52 -0800283 )
284
285 def __init__(self):
286 self.stats = {}
287
288 def Log(self, filename, reason):
289 """Logs why imgdiff can or cannot be applied to the given filename.
290
291 Args:
292 filename: The filename string.
293 reason: One of the reason constants listed in REASONS.
294
295 Raises:
296 AssertionError: On unsupported filetypes or invalid reason.
297 """
298 assert BlockImageDiff.FileTypeSupportedByImgdiff(filename)
299 assert reason in self.REASONS
300
301 if reason not in self.stats:
302 self.stats[reason] = set()
303 self.stats[reason].add(filename)
304
305 def Report(self):
306 """Prints a report of the collected imgdiff stats."""
307
308 def print_header(header, separator):
309 print(header)
310 print(separator * len(header) + '\n')
311
312 print_header(' Imgdiff Stats Report ', '=')
313 for key in self.REASONS:
314 if key not in self.stats:
315 continue
316 values = self.stats[key]
317 section_header = ' {} (count: {}) '.format(key, len(values))
318 print_header(section_header, '-')
319 print(''.join([' {}\n'.format(name) for name in values]))
320
321
Doug Zongkerfc44a512014-08-26 13:10:25 -0700322class BlockImageDiff(object):
Tao Bao76def242017-11-21 09:25:31 -0800323 """Generates the diff of two block image objects.
324
325 BlockImageDiff works on two image objects. An image object is anything that
326 provides the following attributes:
327
328 blocksize: the size in bytes of a block, currently must be 4096.
329
330 total_blocks: the total size of the partition/image, in blocks.
331
332 care_map: a RangeSet containing which blocks (in the range [0,
333 total_blocks) we actually care about; i.e. which blocks contain data.
334
335 file_map: a dict that partitions the blocks contained in care_map into
336 smaller domains that are useful for doing diffs on. (Typically a domain
337 is a file, and the key in file_map is the pathname.)
338
339 clobbered_blocks: a RangeSet containing which blocks contain data but may
340 be altered by the FS. They need to be excluded when verifying the
341 partition integrity.
342
343 ReadRangeSet(): a function that takes a RangeSet and returns the data
344 contained in the image blocks of that RangeSet. The data is returned as
345 a list or tuple of strings; concatenating the elements together should
346 produce the requested data. Implementations are free to break up the
347 data into list/tuple elements in any way that is convenient.
348
349 RangeSha1(): a function that returns (as a hex string) the SHA-1 hash of
350 all the data in the specified range.
351
352 TotalSha1(): a function that returns (as a hex string) the SHA-1 hash of
353 all the data in the image (ie, all the blocks in the care_map minus
354 clobbered_blocks, or including the clobbered blocks if
355 include_clobbered_blocks is True).
356
357 When creating a BlockImageDiff, the src image may be None, in which case the
358 list of transfers produced will never read from the original image.
359 """
360
Tao Bao293fd132016-06-11 12:19:23 -0700361 def __init__(self, tgt, src=None, threads=None, version=4,
362 disable_imgdiff=False):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700363 if threads is None:
364 threads = multiprocessing.cpu_count() // 2
Dan Albert8b72aef2015-03-23 19:13:21 -0700365 if threads == 0:
366 threads = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700367 self.threads = threads
Doug Zongker62338182014-09-08 08:29:55 -0700368 self.version = version
Dan Albert8b72aef2015-03-23 19:13:21 -0700369 self.transfers = []
370 self.src_basenames = {}
371 self.src_numpatterns = {}
Tao Baob4cfca52016-02-04 14:26:02 -0800372 self._max_stashed_size = 0
Tao Baod522bdc2016-04-12 15:53:16 -0700373 self.touched_src_ranges = RangeSet()
374 self.touched_src_sha1 = None
Tao Bao293fd132016-06-11 12:19:23 -0700375 self.disable_imgdiff = disable_imgdiff
Tao Bao294651d2018-02-08 23:21:52 -0800376 self.imgdiff_stats = ImgdiffStats() if not disable_imgdiff else None
Doug Zongker62338182014-09-08 08:29:55 -0700377
Tao Bao8fad03e2017-03-01 14:36:26 -0800378 assert version in (3, 4)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700379
380 self.tgt = tgt
381 if src is None:
382 src = EmptyImage()
383 self.src = src
384
385 # The updater code that installs the patch always uses 4k blocks.
386 assert tgt.blocksize == 4096
387 assert src.blocksize == 4096
388
389 # The range sets in each filemap should comprise a partition of
390 # the care map.
391 self.AssertPartition(src.care_map, src.file_map.values())
392 self.AssertPartition(tgt.care_map, tgt.file_map.values())
393
Tao Baob4cfca52016-02-04 14:26:02 -0800394 @property
395 def max_stashed_size(self):
396 return self._max_stashed_size
397
Tao Baocb73aed2018-01-31 17:32:40 -0800398 @staticmethod
399 def FileTypeSupportedByImgdiff(filename):
400 """Returns whether the file type is supported by imgdiff."""
401 return filename.lower().endswith(('.apk', '.jar', '.zip'))
402
Tao Bao294651d2018-02-08 23:21:52 -0800403 def CanUseImgdiff(self, name, tgt_ranges, src_ranges, large_apk=False):
Tao Baocb73aed2018-01-31 17:32:40 -0800404 """Checks whether we can apply imgdiff for the given RangeSets.
405
406 For files in ZIP format (e.g., APKs, JARs, etc.) we would like to use
407 'imgdiff -z' if possible. Because it usually produces significantly smaller
408 patches than bsdiff.
409
410 This is permissible if all of the following conditions hold.
411 - The imgdiff hasn't been disabled by the caller (e.g. squashfs);
412 - The file type is supported by imgdiff;
413 - The source and target blocks are monotonic (i.e. the data is stored with
414 blocks in increasing order);
Tao Baoe709b092018-02-07 12:40:00 -0800415 - Both files don't contain shared blocks;
Tao Bao4ccea852018-02-06 15:16:41 -0800416 - Both files have complete lists of blocks;
Tao Baocb73aed2018-01-31 17:32:40 -0800417 - We haven't removed any blocks from the source set.
418
419 If all these conditions are satisfied, concatenating all the blocks in the
420 RangeSet in order will produce a valid ZIP file (plus possibly extra zeros
421 in the last block). imgdiff is fine with extra zeros at the end of the file.
422
423 Args:
424 name: The filename to be diff'd.
425 tgt_ranges: The target RangeSet.
426 src_ranges: The source RangeSet.
Tao Bao294651d2018-02-08 23:21:52 -0800427 large_apk: Whether this is to split a large APK.
Tao Baocb73aed2018-01-31 17:32:40 -0800428
429 Returns:
430 A boolean result.
431 """
Tao Bao508b0872018-02-09 13:44:43 -0800432 if self.disable_imgdiff or not self.FileTypeSupportedByImgdiff(name):
Tao Bao294651d2018-02-08 23:21:52 -0800433 return False
434
435 if not tgt_ranges.monotonic or not src_ranges.monotonic:
436 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_NONMONOTONIC)
437 return False
438
Tao Baoe709b092018-02-07 12:40:00 -0800439 if (tgt_ranges.extra.get('uses_shared_blocks') or
440 src_ranges.extra.get('uses_shared_blocks')):
441 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_SHARED_BLOCKS)
442 return False
443
Tao Bao4ccea852018-02-06 15:16:41 -0800444 if tgt_ranges.extra.get('incomplete') or src_ranges.extra.get('incomplete'):
445 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_INCOMPLETE)
446 return False
447
Tao Bao294651d2018-02-08 23:21:52 -0800448 reason = (ImgdiffStats.USED_IMGDIFF_LARGE_APK if large_apk
449 else ImgdiffStats.USED_IMGDIFF)
450 self.imgdiff_stats.Log(name, reason)
451 return True
Tao Baocb73aed2018-01-31 17:32:40 -0800452
Doug Zongkerfc44a512014-08-26 13:10:25 -0700453 def Compute(self, prefix):
454 # When looking for a source file to use as the diff input for a
455 # target file, we try:
456 # 1) an exact path match if available, otherwise
457 # 2) a exact basename match if available, otherwise
458 # 3) a basename match after all runs of digits are replaced by
459 # "#" if available, otherwise
460 # 4) we have no source for this target.
461 self.AbbreviateSourceNames()
462 self.FindTransfers()
463
464 # Find the ordering dependencies among transfers (this is O(n^2)
465 # in the number of transfers).
466 self.GenerateDigraph()
467 # Find a sequence of transfers that satisfies as many ordering
468 # dependencies as possible (heuristically).
469 self.FindVertexSequence()
470 # Fix up the ordering dependencies that the sequence didn't
471 # satisfy.
Tao Bao8fad03e2017-03-01 14:36:26 -0800472 self.ReverseBackwardEdges()
473 self.ImproveVertexSequence()
Doug Zongker62338182014-09-08 08:29:55 -0700474
Tao Bao82c47982015-08-17 09:45:13 -0700475 # Ensure the runtime stash size is under the limit.
Tao Bao8fad03e2017-03-01 14:36:26 -0800476 if common.OPTIONS.cache_size is not None:
Tao Bao82c47982015-08-17 09:45:13 -0700477 self.ReviseStashSize()
478
Doug Zongkerfc44a512014-08-26 13:10:25 -0700479 # Double-check our work.
480 self.AssertSequenceGood()
Tianjie Xu8a7ed9f2018-01-23 14:06:11 -0800481 self.AssertSha1Good()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700482
483 self.ComputePatches(prefix)
484 self.WriteTransfers(prefix)
485
Tao Bao294651d2018-02-08 23:21:52 -0800486 # Report the imgdiff stats.
487 if common.OPTIONS.verbose and not self.disable_imgdiff:
488 self.imgdiff_stats.Report()
489
Doug Zongkerfc44a512014-08-26 13:10:25 -0700490 def WriteTransfers(self, prefix):
Tianjie Xu37e29302016-06-23 16:10:35 -0700491 def WriteSplitTransfers(out, style, target_blocks):
492 """Limit the size of operand in command 'new' and 'zero' to 1024 blocks.
Tianjie Xubb848c52016-06-21 15:54:09 -0700493
494 This prevents the target size of one command from being too large; and
495 might help to avoid fsync errors on some devices."""
496
Tao Bao3a2e3502016-12-28 09:14:39 -0800497 assert style == "new" or style == "zero"
Tianjie Xu37e29302016-06-23 16:10:35 -0700498 blocks_limit = 1024
Tianjie Xubb848c52016-06-21 15:54:09 -0700499 total = 0
Tianjie Xu37e29302016-06-23 16:10:35 -0700500 while target_blocks:
501 blocks_to_write = target_blocks.first(blocks_limit)
502 out.append("%s %s\n" % (style, blocks_to_write.to_string_raw()))
503 total += blocks_to_write.size()
504 target_blocks = target_blocks.subtract(blocks_to_write)
Tianjie Xubb848c52016-06-21 15:54:09 -0700505 return total
506
Doug Zongkerfc44a512014-08-26 13:10:25 -0700507 out = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700508 total = 0
Doug Zongkerfc44a512014-08-26 13:10:25 -0700509
Tao Bao3a2e3502016-12-28 09:14:39 -0800510 # In BBOTA v3+, it uses the hash of the stashed blocks as the stash slot
511 # id. 'stashes' records the map from 'hash' to the ref count. The stash
512 # will be freed only if the count decrements to zero.
Doug Zongker62338182014-09-08 08:29:55 -0700513 stashes = {}
514 stashed_blocks = 0
515 max_stashed_blocks = 0
516
Doug Zongkerfc44a512014-08-26 13:10:25 -0700517 for xf in self.transfers:
518
Tao Bao8fad03e2017-03-01 14:36:26 -0800519 for _, sr in xf.stash_before:
520 sh = self.src.RangeSha1(sr)
521 if sh in stashes:
522 stashes[sh] += 1
Sami Tolvanendd67a292014-12-09 16:40:34 +0000523 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800524 stashes[sh] = 1
525 stashed_blocks += sr.size()
526 self.touched_src_ranges = self.touched_src_ranges.union(sr)
527 out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
Doug Zongker62338182014-09-08 08:29:55 -0700528
529 if stashed_blocks > max_stashed_blocks:
530 max_stashed_blocks = stashed_blocks
531
Jesse Zhao7b985f62015-03-02 16:53:08 -0800532 free_string = []
caozhiyuan21b37d82015-10-21 15:14:03 +0800533 free_size = 0
Jesse Zhao7b985f62015-03-02 16:53:08 -0800534
Tao Bao8fad03e2017-03-01 14:36:26 -0800535 # <# blocks> <src ranges>
536 # OR
537 # <# blocks> <src ranges> <src locs> <stash refs...>
538 # OR
539 # <# blocks> - <stash refs...>
Doug Zongker62338182014-09-08 08:29:55 -0700540
Tao Bao8fad03e2017-03-01 14:36:26 -0800541 size = xf.src_ranges.size()
Tao Bao508b0872018-02-09 13:44:43 -0800542 src_str_buffer = [str(size)]
Doug Zongker62338182014-09-08 08:29:55 -0700543
Tao Bao8fad03e2017-03-01 14:36:26 -0800544 unstashed_src_ranges = xf.src_ranges
545 mapped_stashes = []
546 for _, sr in xf.use_stash:
547 unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
548 sh = self.src.RangeSha1(sr)
549 sr = xf.src_ranges.map_within(sr)
550 mapped_stashes.append(sr)
551 assert sh in stashes
Tao Bao508b0872018-02-09 13:44:43 -0800552 src_str_buffer.append("%s:%s" % (sh, sr.to_string_raw()))
Tao Bao8fad03e2017-03-01 14:36:26 -0800553 stashes[sh] -= 1
554 if stashes[sh] == 0:
555 free_string.append("free %s\n" % (sh,))
556 free_size += sr.size()
557 stashes.pop(sh)
Doug Zongker62338182014-09-08 08:29:55 -0700558
Tao Bao8fad03e2017-03-01 14:36:26 -0800559 if unstashed_src_ranges:
Tao Bao508b0872018-02-09 13:44:43 -0800560 src_str_buffer.insert(1, unstashed_src_ranges.to_string_raw())
Tao Bao8fad03e2017-03-01 14:36:26 -0800561 if xf.use_stash:
562 mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
Tao Bao508b0872018-02-09 13:44:43 -0800563 src_str_buffer.insert(2, mapped_unstashed.to_string_raw())
Tao Bao8fad03e2017-03-01 14:36:26 -0800564 mapped_stashes.append(mapped_unstashed)
Doug Zongker62338182014-09-08 08:29:55 -0700565 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Tao Bao8fad03e2017-03-01 14:36:26 -0800566 else:
Tao Bao508b0872018-02-09 13:44:43 -0800567 src_str_buffer.insert(1, "-")
Tao Bao8fad03e2017-03-01 14:36:26 -0800568 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Doug Zongker62338182014-09-08 08:29:55 -0700569
Tao Bao508b0872018-02-09 13:44:43 -0800570 src_str = " ".join(src_str_buffer)
Doug Zongker62338182014-09-08 08:29:55 -0700571
Tao Bao8fad03e2017-03-01 14:36:26 -0800572 # version 3+:
Doug Zongker62338182014-09-08 08:29:55 -0700573 # zero <rangeset>
574 # new <rangeset>
575 # erase <rangeset>
Dan Albert8b72aef2015-03-23 19:13:21 -0700576 # bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
577 # imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
578 # move hash <tgt rangeset> <src_str>
Doug Zongkerfc44a512014-08-26 13:10:25 -0700579
580 tgt_size = xf.tgt_ranges.size()
581
582 if xf.style == "new":
583 assert xf.tgt_ranges
Tianjie Xu37e29302016-06-23 16:10:35 -0700584 assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700585 total += tgt_size
586 elif xf.style == "move":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700587 assert xf.tgt_ranges
588 assert xf.src_ranges.size() == tgt_size
589 if xf.src_ranges != xf.tgt_ranges:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100590 # take into account automatic stashing of overlapping blocks
591 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700592 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100593 if temp_stash_usage > max_stashed_blocks:
594 max_stashed_blocks = temp_stash_usage
595
Tao Baod522bdc2016-04-12 15:53:16 -0700596 self.touched_src_ranges = self.touched_src_ranges.union(
597 xf.src_ranges)
598
Tao Bao8fad03e2017-03-01 14:36:26 -0800599 out.append("%s %s %s %s\n" % (
Sami Tolvanendd67a292014-12-09 16:40:34 +0000600 xf.style,
Tao Bao183e56e2017-03-05 17:05:09 -0800601 xf.tgt_sha1,
Dan Albert8b72aef2015-03-23 19:13:21 -0700602 xf.tgt_ranges.to_string_raw(), src_str))
Tao Bao8fad03e2017-03-01 14:36:26 -0800603 total += tgt_size
604 elif xf.style in ("bsdiff", "imgdiff"):
605 assert xf.tgt_ranges
606 assert xf.src_ranges
607 # take into account automatic stashing of overlapping blocks
608 if xf.src_ranges.overlaps(xf.tgt_ranges):
609 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
610 if temp_stash_usage > max_stashed_blocks:
611 max_stashed_blocks = temp_stash_usage
612
613 self.touched_src_ranges = self.touched_src_ranges.union(xf.src_ranges)
614
615 out.append("%s %d %d %s %s %s %s\n" % (
616 xf.style,
617 xf.patch_start, xf.patch_len,
618 xf.src_sha1,
619 xf.tgt_sha1,
620 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700621 total += tgt_size
622 elif xf.style == "zero":
623 assert xf.tgt_ranges
624 to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
Tianjie Xu37e29302016-06-23 16:10:35 -0700625 assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size()
Tianjie Xubb848c52016-06-21 15:54:09 -0700626 total += to_zero.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700627 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700628 raise ValueError("unknown transfer style '%s'\n" % xf.style)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700629
Sami Tolvanendd67a292014-12-09 16:40:34 +0000630 if free_string:
631 out.append("".join(free_string))
caozhiyuan21b37d82015-10-21 15:14:03 +0800632 stashed_blocks -= free_size
Sami Tolvanendd67a292014-12-09 16:40:34 +0000633
Tao Bao8fad03e2017-03-01 14:36:26 -0800634 if common.OPTIONS.cache_size is not None:
Tao Bao8dcf7382015-05-21 14:09:49 -0700635 # Sanity check: abort if we're going to need more stash space than
636 # the allowed size (cache_size * threshold). There are two purposes
637 # of having a threshold here. a) Part of the cache may have been
638 # occupied by some recovery logs. b) It will buy us some time to deal
639 # with the oversize issue.
640 cache_size = common.OPTIONS.cache_size
641 stash_threshold = common.OPTIONS.stash_threshold
642 max_allowed = cache_size * stash_threshold
Tao Baoe8c68a02017-02-26 10:48:11 -0800643 assert max_stashed_blocks * self.tgt.blocksize <= max_allowed, \
Tao Bao8dcf7382015-05-21 14:09:49 -0700644 'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
645 max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
646 self.tgt.blocksize, max_allowed, cache_size,
647 stash_threshold)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700648
Tao Bao8fad03e2017-03-01 14:36:26 -0800649 self.touched_src_sha1 = self.src.RangeSha1(self.touched_src_ranges)
Tao Baod522bdc2016-04-12 15:53:16 -0700650
Tianjie Xu67c7cbb2018-08-30 00:32:07 -0700651 if self.tgt.hashtree_info:
652 out.append("compute_hash_tree {} {} {} {} {}\n".format(
653 self.tgt.hashtree_info.hashtree_range.to_string_raw(),
654 self.tgt.hashtree_info.filesystem_range.to_string_raw(),
655 self.tgt.hashtree_info.hash_algorithm,
656 self.tgt.hashtree_info.salt,
657 self.tgt.hashtree_info.root_hash))
658
Tao Baoe9b61912015-07-09 17:37:49 -0700659 # Zero out extended blocks as a workaround for bug 20881595.
660 if self.tgt.extended:
Tianjie Xu37e29302016-06-23 16:10:35 -0700661 assert (WriteSplitTransfers(out, "zero", self.tgt.extended) ==
Tianjie Xubb848c52016-06-21 15:54:09 -0700662 self.tgt.extended.size())
Tao Baob32d56e2015-09-09 11:55:01 -0700663 total += self.tgt.extended.size()
Tao Baoe9b61912015-07-09 17:37:49 -0700664
665 # We erase all the blocks on the partition that a) don't contain useful
Tao Bao66f1fa62016-05-03 10:02:01 -0700666 # data in the new image; b) will not be touched by dm-verity. Out of those
667 # blocks, we erase the ones that won't be used in this update at the
668 # beginning of an update. The rest would be erased at the end. This is to
669 # work around the eMMC issue observed on some devices, which may otherwise
670 # get starving for clean blocks and thus fail the update. (b/28347095)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700671 all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
Tao Baoe9b61912015-07-09 17:37:49 -0700672 all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
673 new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
Tao Bao66f1fa62016-05-03 10:02:01 -0700674
675 erase_first = new_dontcare.subtract(self.touched_src_ranges)
676 if erase_first:
677 out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))
678
679 erase_last = new_dontcare.subtract(erase_first)
680 if erase_last:
681 out.append("erase %s\n" % (erase_last.to_string_raw(),))
Doug Zongkere985f6f2014-09-09 12:38:47 -0700682
683 out.insert(0, "%d\n" % (self.version,)) # format version number
Tao Baob32d56e2015-09-09 11:55:01 -0700684 out.insert(1, "%d\n" % (total,))
Tao Bao8fad03e2017-03-01 14:36:26 -0800685 # v3+: the number of stash slots is unused.
686 out.insert(2, "0\n")
687 out.insert(3, str(max_stashed_blocks) + "\n")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700688
689 with open(prefix + ".transfer.list", "wb") as f:
690 for i in out:
691 f.write(i)
692
Tao Bao8fad03e2017-03-01 14:36:26 -0800693 self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize
694 OPTIONS = common.OPTIONS
695 if OPTIONS.cache_size is not None:
696 max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
697 print("max stashed blocks: %d (%d bytes), "
698 "limit: %d bytes (%.2f%%)\n" % (
Tao Bao508b0872018-02-09 13:44:43 -0800699 max_stashed_blocks, self._max_stashed_size, max_allowed,
700 self._max_stashed_size * 100.0 / max_allowed))
Tao Bao8fad03e2017-03-01 14:36:26 -0800701 else:
702 print("max stashed blocks: %d (%d bytes), limit: <unknown>\n" % (
Tao Bao508b0872018-02-09 13:44:43 -0800703 max_stashed_blocks, self._max_stashed_size))
Doug Zongker62338182014-09-08 08:29:55 -0700704
Tao Bao82c47982015-08-17 09:45:13 -0700705 def ReviseStashSize(self):
706 print("Revising stash size...")
Tao Baoe27acfd2016-12-16 11:13:55 -0800707 stash_map = {}
Tao Bao82c47982015-08-17 09:45:13 -0700708
709 # Create the map between a stash and its def/use points. For example, for a
Tao Bao3c5a16d2017-02-13 11:42:50 -0800710 # given stash of (raw_id, sr), stash_map[raw_id] = (sr, def_cmd, use_cmd).
Tao Bao82c47982015-08-17 09:45:13 -0700711 for xf in self.transfers:
712 # Command xf defines (stores) all the stashes in stash_before.
Tao Bao3a2e3502016-12-28 09:14:39 -0800713 for stash_raw_id, sr in xf.stash_before:
714 stash_map[stash_raw_id] = (sr, xf)
Tao Bao82c47982015-08-17 09:45:13 -0700715
716 # Record all the stashes command xf uses.
Tao Bao3a2e3502016-12-28 09:14:39 -0800717 for stash_raw_id, _ in xf.use_stash:
718 stash_map[stash_raw_id] += (xf,)
Tao Bao82c47982015-08-17 09:45:13 -0700719
720 # Compute the maximum blocks available for stash based on /cache size and
721 # the threshold.
722 cache_size = common.OPTIONS.cache_size
723 stash_threshold = common.OPTIONS.stash_threshold
724 max_allowed = cache_size * stash_threshold / self.tgt.blocksize
725
Tao Bao3a2e3502016-12-28 09:14:39 -0800726 # See the comments for 'stashes' in WriteTransfers().
Tao Baoe27acfd2016-12-16 11:13:55 -0800727 stashes = {}
Tao Bao82c47982015-08-17 09:45:13 -0700728 stashed_blocks = 0
Tao Bao9a5caf22015-08-25 15:10:10 -0700729 new_blocks = 0
Tao Bao82c47982015-08-17 09:45:13 -0700730
731 # Now go through all the commands. Compute the required stash size on the
732 # fly. If a command requires excess stash than available, it deletes the
733 # stash by replacing the command that uses the stash with a "new" command
734 # instead.
735 for xf in self.transfers:
736 replaced_cmds = []
737
738 # xf.stash_before generates explicit stash commands.
Tao Bao3a2e3502016-12-28 09:14:39 -0800739 for stash_raw_id, sr in xf.stash_before:
Tao Baoe27acfd2016-12-16 11:13:55 -0800740 # Check the post-command stashed_blocks.
741 stashed_blocks_after = stashed_blocks
Tao Bao8fad03e2017-03-01 14:36:26 -0800742 sh = self.src.RangeSha1(sr)
743 if sh not in stashes:
Tao Baoe27acfd2016-12-16 11:13:55 -0800744 stashed_blocks_after += sr.size()
Tao Baoe27acfd2016-12-16 11:13:55 -0800745
746 if stashed_blocks_after > max_allowed:
Tao Bao82c47982015-08-17 09:45:13 -0700747 # We cannot stash this one for a later command. Find out the command
748 # that will use this stash and replace the command with "new".
Tao Bao3a2e3502016-12-28 09:14:39 -0800749 use_cmd = stash_map[stash_raw_id][2]
Tao Bao82c47982015-08-17 09:45:13 -0700750 replaced_cmds.append(use_cmd)
Tao Bao9a5caf22015-08-25 15:10:10 -0700751 print("%10d %9s %s" % (sr.size(), "explicit", use_cmd))
Tao Bao82c47982015-08-17 09:45:13 -0700752 else:
Tao Bao3c5a16d2017-02-13 11:42:50 -0800753 # Update the stashes map.
Tao Bao8fad03e2017-03-01 14:36:26 -0800754 if sh in stashes:
755 stashes[sh] += 1
Tao Bao3c5a16d2017-02-13 11:42:50 -0800756 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800757 stashes[sh] = 1
Tao Baoe27acfd2016-12-16 11:13:55 -0800758 stashed_blocks = stashed_blocks_after
Tao Bao82c47982015-08-17 09:45:13 -0700759
760 # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
761 # ComputePatches(), they both have the style of "diff".
Tao Bao8fad03e2017-03-01 14:36:26 -0800762 if xf.style == "diff":
Tao Bao82c47982015-08-17 09:45:13 -0700763 assert xf.tgt_ranges and xf.src_ranges
764 if xf.src_ranges.overlaps(xf.tgt_ranges):
765 if stashed_blocks + xf.src_ranges.size() > max_allowed:
766 replaced_cmds.append(xf)
Tao Bao9a5caf22015-08-25 15:10:10 -0700767 print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf))
Tao Bao82c47982015-08-17 09:45:13 -0700768
769 # Replace the commands in replaced_cmds with "new"s.
770 for cmd in replaced_cmds:
771 # It no longer uses any commands in "use_stash". Remove the def points
772 # for all those stashes.
Tao Bao3a2e3502016-12-28 09:14:39 -0800773 for stash_raw_id, sr in cmd.use_stash:
774 def_cmd = stash_map[stash_raw_id][1]
775 assert (stash_raw_id, sr) in def_cmd.stash_before
776 def_cmd.stash_before.remove((stash_raw_id, sr))
Tao Bao82c47982015-08-17 09:45:13 -0700777
Tianjie Xuebe39a02016-01-14 14:12:26 -0800778 # Add up blocks that violates space limit and print total number to
779 # screen later.
780 new_blocks += cmd.tgt_ranges.size()
Tao Bao82c47982015-08-17 09:45:13 -0700781 cmd.ConvertToNew()
782
Tao Bao3a2e3502016-12-28 09:14:39 -0800783 # xf.use_stash may generate free commands.
Tao Bao8fad03e2017-03-01 14:36:26 -0800784 for _, sr in xf.use_stash:
785 sh = self.src.RangeSha1(sr)
786 assert sh in stashes
787 stashes[sh] -= 1
788 if stashes[sh] == 0:
Tao Baoe27acfd2016-12-16 11:13:55 -0800789 stashed_blocks -= sr.size()
Tao Bao8fad03e2017-03-01 14:36:26 -0800790 stashes.pop(sh)
Tao Baoe27acfd2016-12-16 11:13:55 -0800791
Tianjie Xuebe39a02016-01-14 14:12:26 -0800792 num_of_bytes = new_blocks * self.tgt.blocksize
793 print(" Total %d blocks (%d bytes) are packed as new blocks due to "
794 "insufficient cache size." % (new_blocks, num_of_bytes))
Tao Bao304ee272016-12-19 11:01:38 -0800795 return new_blocks
Tao Bao9a5caf22015-08-25 15:10:10 -0700796
Doug Zongkerfc44a512014-08-26 13:10:25 -0700797 def ComputePatches(self, prefix):
798 print("Reticulating splines...")
Tao Bao183e56e2017-03-05 17:05:09 -0800799 diff_queue = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700800 patch_num = 0
801 with open(prefix + ".new.dat", "wb") as new_f:
Tao Bao183e56e2017-03-05 17:05:09 -0800802 for index, xf in enumerate(self.transfers):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700803 if xf.style == "zero":
Tao Bao08c85832016-09-19 22:26:30 -0700804 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
805 print("%10d %10d (%6.2f%%) %7s %s %s" % (
806 tgt_size, tgt_size, 100.0, xf.style, xf.tgt_name,
807 str(xf.tgt_ranges)))
808
Doug Zongkerfc44a512014-08-26 13:10:25 -0700809 elif xf.style == "new":
Tao Bao183e56e2017-03-05 17:05:09 -0800810 self.tgt.WriteRangeDataToFd(xf.tgt_ranges, new_f)
Tao Bao08c85832016-09-19 22:26:30 -0700811 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
812 print("%10d %10d (%6.2f%%) %7s %s %s" % (
813 tgt_size, tgt_size, 100.0, xf.style,
814 xf.tgt_name, str(xf.tgt_ranges)))
815
Doug Zongkerfc44a512014-08-26 13:10:25 -0700816 elif xf.style == "diff":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700817 # We can't compare src and tgt directly because they may have
818 # the same content but be broken up into blocks differently, eg:
819 #
820 # ["he", "llo"] vs ["h", "ello"]
821 #
822 # We want those to compare equal, ideally without having to
823 # actually concatenate the strings (these may be tens of
824 # megabytes).
Tao Bao183e56e2017-03-05 17:05:09 -0800825 if xf.src_sha1 == xf.tgt_sha1:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700826 # These are identical; we don't need to generate a patch,
827 # just issue copy commands on the device.
828 xf.style = "move"
Tianjie Xu25366072017-09-08 17:19:02 -0700829 xf.patch = None
Tao Bao183e56e2017-03-05 17:05:09 -0800830 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao08c85832016-09-19 22:26:30 -0700831 if xf.src_ranges != xf.tgt_ranges:
832 print("%10d %10d (%6.2f%%) %7s %s %s (from %s)" % (
833 tgt_size, tgt_size, 100.0, xf.style,
834 xf.tgt_name if xf.tgt_name == xf.src_name else (
835 xf.tgt_name + " (from " + xf.src_name + ")"),
836 str(xf.tgt_ranges), str(xf.src_ranges)))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700837 else:
Tianjie Xu25366072017-09-08 17:19:02 -0700838 if xf.patch:
Tao Bao5bab0dd2018-07-10 17:44:52 -0700839 # We have already generated the patch with imgdiff, while
840 # splitting large APKs (i.e. in FindTransfers()).
Tianjie Xu25366072017-09-08 17:19:02 -0700841 assert not self.disable_imgdiff
842 imgdiff = True
Tianjie Xu25366072017-09-08 17:19:02 -0700843 else:
Tao Baocb73aed2018-01-31 17:32:40 -0800844 imgdiff = self.CanUseImgdiff(
845 xf.tgt_name, xf.tgt_ranges, xf.src_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700846 xf.style = "imgdiff" if imgdiff else "bsdiff"
Tao Bao183e56e2017-03-05 17:05:09 -0800847 diff_queue.append((index, imgdiff, patch_num))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700848 patch_num += 1
849
850 else:
851 assert False, "unknown style " + xf.style
852
Tao Bao183e56e2017-03-05 17:05:09 -0800853 if diff_queue:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700854 if self.threads > 1:
855 print("Computing patches (using %d threads)..." % (self.threads,))
856 else:
857 print("Computing patches...")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700858
Tao Bao183e56e2017-03-05 17:05:09 -0800859 diff_total = len(diff_queue)
860 patches = [None] * diff_total
Tianjie Xub59c17f2016-10-28 17:55:53 -0700861 error_messages = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700862
Tao Bao183e56e2017-03-05 17:05:09 -0800863 # Using multiprocessing doesn't give additional benefits, due to the
864 # pattern of the code. The diffing work is done by subprocess.call, which
865 # already runs in a separate process (not affected much by the GIL -
866 # Global Interpreter Lock). Using multiprocess also requires either a)
867 # writing the diff input files in the main process before forking, or b)
868 # reopening the image file (SparseImage) in the worker processes. Doing
869 # neither of them further improves the performance.
Doug Zongkerfc44a512014-08-26 13:10:25 -0700870 lock = threading.Lock()
871 def diff_worker():
872 while True:
873 with lock:
Tao Bao183e56e2017-03-05 17:05:09 -0800874 if not diff_queue:
Dan Albert8b72aef2015-03-23 19:13:21 -0700875 return
Tao Bao183e56e2017-03-05 17:05:09 -0800876 xf_index, imgdiff, patch_index = diff_queue.pop()
Tao Bao97395142018-02-12 12:08:05 -0800877 xf = self.transfers[xf_index]
Tao Bao183e56e2017-03-05 17:05:09 -0800878
Tao Bao97395142018-02-12 12:08:05 -0800879 if sys.stdout.isatty():
880 diff_left = len(diff_queue)
881 progress = (diff_total - diff_left) * 100 / diff_total
882 # '\033[K' is to clear to EOL.
883 print(' [%3d%%] %s\033[K' % (progress, xf.tgt_name), end='\r')
884 sys.stdout.flush()
885
Tianjie Xu25366072017-09-08 17:19:02 -0700886 patch = xf.patch
887 if not patch:
888 src_ranges = xf.src_ranges
889 tgt_ranges = xf.tgt_ranges
Tao Bao183e56e2017-03-05 17:05:09 -0800890
Tianjie Xudf1166e2018-01-27 17:35:41 -0800891 src_file = common.MakeTempFile(prefix="src-")
892 with open(src_file, "wb") as fd:
893 self.src.WriteRangeDataToFd(src_ranges, fd)
Tianjie Xu25366072017-09-08 17:19:02 -0700894
Tianjie Xudf1166e2018-01-27 17:35:41 -0800895 tgt_file = common.MakeTempFile(prefix="tgt-")
896 with open(tgt_file, "wb") as fd:
897 self.tgt.WriteRangeDataToFd(tgt_ranges, fd)
Tianjie Xu25366072017-09-08 17:19:02 -0700898
899 message = []
900 try:
901 patch = compute_patch(src_file, tgt_file, imgdiff)
902 except ValueError as e:
903 message.append(
904 "Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (
Tao Bao508b0872018-02-09 13:44:43 -0800905 "imgdiff" if imgdiff else "bsdiff",
906 xf.tgt_name if xf.tgt_name == xf.src_name else
Tianjie Xu25366072017-09-08 17:19:02 -0700907 xf.tgt_name + " (from " + xf.src_name + ")",
Tao Bao508b0872018-02-09 13:44:43 -0800908 xf.tgt_ranges, xf.src_ranges, e.message))
Tianjie Xu25366072017-09-08 17:19:02 -0700909 if message:
910 with lock:
911 error_messages.extend(message)
Tao Bao183e56e2017-03-05 17:05:09 -0800912
913 with lock:
914 patches[patch_index] = (xf_index, patch)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700915
916 threads = [threading.Thread(target=diff_worker)
Dan Albert8b72aef2015-03-23 19:13:21 -0700917 for _ in range(self.threads)]
Doug Zongkerfc44a512014-08-26 13:10:25 -0700918 for th in threads:
919 th.start()
920 while threads:
921 threads.pop().join()
Tao Bao183e56e2017-03-05 17:05:09 -0800922
923 if sys.stdout.isatty():
924 print('\n')
Tianjie Xub59c17f2016-10-28 17:55:53 -0700925
926 if error_messages:
Tao Baob937ead2017-10-19 16:51:53 -0700927 print('ERROR:')
Tianjie Xub59c17f2016-10-28 17:55:53 -0700928 print('\n'.join(error_messages))
Tao Baob937ead2017-10-19 16:51:53 -0700929 print('\n\n\n')
Tianjie Xub59c17f2016-10-28 17:55:53 -0700930 sys.exit(1)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700931 else:
932 patches = []
933
Tao Bao183e56e2017-03-05 17:05:09 -0800934 offset = 0
935 with open(prefix + ".patch.dat", "wb") as patch_fd:
936 for index, patch in patches:
937 xf = self.transfers[index]
Doug Zongkerfc44a512014-08-26 13:10:25 -0700938 xf.patch_len = len(patch)
Tao Bao183e56e2017-03-05 17:05:09 -0800939 xf.patch_start = offset
940 offset += xf.patch_len
941 patch_fd.write(patch)
942
943 if common.OPTIONS.verbose:
944 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
945 print("%10d %10d (%6.2f%%) %7s %s %s %s" % (
Tao Bao508b0872018-02-09 13:44:43 -0800946 xf.patch_len, tgt_size, xf.patch_len * 100.0 / tgt_size,
947 xf.style,
948 xf.tgt_name if xf.tgt_name == xf.src_name else (
949 xf.tgt_name + " (from " + xf.src_name + ")"),
950 xf.tgt_ranges, xf.src_ranges))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700951
Tianjie Xu8a7ed9f2018-01-23 14:06:11 -0800952 def AssertSha1Good(self):
953 """Check the SHA-1 of the src & tgt blocks in the transfer list.
954
955 Double check the SHA-1 value to avoid the issue in b/71908713, where
956 SparseImage.RangeSha1() messed up with the hash calculation in multi-thread
957 environment. That specific problem has been fixed by protecting the
958 underlying generator function 'SparseImage._GetRangeData()' with lock.
959 """
960 for xf in self.transfers:
961 tgt_sha1 = self.tgt.RangeSha1(xf.tgt_ranges)
962 assert xf.tgt_sha1 == tgt_sha1
963 if xf.style == "diff":
964 src_sha1 = self.src.RangeSha1(xf.src_ranges)
965 assert xf.src_sha1 == src_sha1
966
Doug Zongkerfc44a512014-08-26 13:10:25 -0700967 def AssertSequenceGood(self):
968 # Simulate the sequences of transfers we will output, and check that:
969 # - we never read a block after writing it, and
970 # - we write every block we care about exactly once.
971
972 # Start with no blocks having been touched yet.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800973 touched = array.array("B", "\0" * self.tgt.total_blocks)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700974
975 # Imagine processing the transfers in order.
976 for xf in self.transfers:
977 # Check that the input blocks for this transfer haven't yet been touched.
Doug Zongker62338182014-09-08 08:29:55 -0700978
979 x = xf.src_ranges
Tao Bao8fad03e2017-03-01 14:36:26 -0800980 for _, sr in xf.use_stash:
981 x = x.subtract(sr)
Doug Zongker62338182014-09-08 08:29:55 -0700982
Doug Zongker6ab2a502016-02-09 08:28:09 -0800983 for s, e in x:
Tao Baoff75c232016-03-04 15:23:34 -0800984 # Source image could be larger. Don't check the blocks that are in the
985 # source image only. Since they are not in 'touched', and won't ever
986 # be touched.
987 for i in range(s, min(e, self.tgt.total_blocks)):
Doug Zongker6ab2a502016-02-09 08:28:09 -0800988 assert touched[i] == 0
989
990 # Check that the output blocks for this transfer haven't yet
991 # been touched, and touch all the blocks written by this
992 # transfer.
993 for s, e in xf.tgt_ranges:
994 for i in range(s, e):
995 assert touched[i] == 0
996 touched[i] = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700997
Tianjie Xu67c7cbb2018-08-30 00:32:07 -0700998 if self.tgt.hashtree_info:
999 for s, e in self.tgt.hashtree_info.hashtree_range:
1000 for i in range(s, e):
1001 assert touched[i] == 0
1002 touched[i] = 1
1003
Doug Zongkerfc44a512014-08-26 13:10:25 -07001004 # Check that we've written every target block.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001005 for s, e in self.tgt.care_map:
1006 for i in range(s, e):
1007 assert touched[i] == 1
Doug Zongkerfc44a512014-08-26 13:10:25 -07001008
Doug Zongker62338182014-09-08 08:29:55 -07001009 def ImproveVertexSequence(self):
1010 print("Improving vertex order...")
1011
1012 # At this point our digraph is acyclic; we reversed any edges that
1013 # were backwards in the heuristically-generated sequence. The
1014 # previously-generated order is still acceptable, but we hope to
1015 # find a better order that needs less memory for stashed data.
1016 # Now we do a topological sort to generate a new vertex order,
1017 # using a greedy algorithm to choose which vertex goes next
1018 # whenever we have a choice.
1019
1020 # Make a copy of the edge set; this copy will get destroyed by the
1021 # algorithm.
1022 for xf in self.transfers:
1023 xf.incoming = xf.goes_after.copy()
1024 xf.outgoing = xf.goes_before.copy()
1025
1026 L = [] # the new vertex order
1027
1028 # S is the set of sources in the remaining graph; we always choose
1029 # the one that leaves the least amount of stashed data after it's
1030 # executed.
1031 S = [(u.NetStashChange(), u.order, u) for u in self.transfers
1032 if not u.incoming]
1033 heapq.heapify(S)
1034
1035 while S:
1036 _, _, xf = heapq.heappop(S)
1037 L.append(xf)
1038 for u in xf.outgoing:
1039 del u.incoming[xf]
1040 if not u.incoming:
1041 heapq.heappush(S, (u.NetStashChange(), u.order, u))
1042
1043 # if this fails then our graph had a cycle.
1044 assert len(L) == len(self.transfers)
1045
1046 self.transfers = L
1047 for i, xf in enumerate(L):
1048 xf.order = i
1049
Doug Zongker62338182014-09-08 08:29:55 -07001050 def ReverseBackwardEdges(self):
Tao Bao3a2e3502016-12-28 09:14:39 -08001051 """Reverse unsatisfying edges and compute pairs of stashed blocks.
1052
1053 For each transfer, make sure it properly stashes the blocks it touches and
1054 will be used by later transfers. It uses pairs of (stash_raw_id, range) to
1055 record the blocks to be stashed. 'stash_raw_id' is an id that uniquely
1056 identifies each pair. Note that for the same range (e.g. RangeSet("1-5")),
1057 it is possible to have multiple pairs with different 'stash_raw_id's. Each
1058 'stash_raw_id' will be consumed by one transfer. In BBOTA v3+, identical
1059 blocks will be written to the same stash slot in WriteTransfers().
1060 """
1061
Doug Zongker62338182014-09-08 08:29:55 -07001062 print("Reversing backward edges...")
1063 in_order = 0
1064 out_of_order = 0
Tao Bao3a2e3502016-12-28 09:14:39 -08001065 stash_raw_id = 0
Doug Zongker62338182014-09-08 08:29:55 -07001066 stash_size = 0
1067
1068 for xf in self.transfers:
Doug Zongker62338182014-09-08 08:29:55 -07001069 for u in xf.goes_before.copy():
1070 # xf should go before u
1071 if xf.order < u.order:
1072 # it does, hurray!
1073 in_order += 1
1074 else:
1075 # it doesn't, boo. modify u to stash the blocks that it
1076 # writes that xf wants to read, and then require u to go
1077 # before xf.
1078 out_of_order += 1
1079
1080 overlap = xf.src_ranges.intersect(u.tgt_ranges)
1081 assert overlap
1082
Tao Bao3a2e3502016-12-28 09:14:39 -08001083 u.stash_before.append((stash_raw_id, overlap))
1084 xf.use_stash.append((stash_raw_id, overlap))
1085 stash_raw_id += 1
Doug Zongker62338182014-09-08 08:29:55 -07001086 stash_size += overlap.size()
1087
1088 # reverse the edge direction; now xf must go after u
1089 del xf.goes_before[u]
1090 del u.goes_after[xf]
1091 xf.goes_after[u] = None # value doesn't matter
1092 u.goes_before[xf] = None
1093
1094 print((" %d/%d dependencies (%.2f%%) were violated; "
1095 "%d source blocks stashed.") %
1096 (out_of_order, in_order + out_of_order,
1097 (out_of_order * 100.0 / (in_order + out_of_order))
1098 if (in_order + out_of_order) else 0.0,
1099 stash_size))
1100
Doug Zongkerfc44a512014-08-26 13:10:25 -07001101 def FindVertexSequence(self):
1102 print("Finding vertex sequence...")
1103
1104 # This is based on "A Fast & Effective Heuristic for the Feedback
1105 # Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
1106 # it as starting with the digraph G and moving all the vertices to
1107 # be on a horizontal line in some order, trying to minimize the
1108 # number of edges that end up pointing to the left. Left-pointing
1109 # edges will get removed to turn the digraph into a DAG. In this
1110 # case each edge has a weight which is the number of source blocks
1111 # we'll lose if that edge is removed; we try to minimize the total
1112 # weight rather than just the number of edges.
1113
1114 # Make a copy of the edge set; this copy will get destroyed by the
1115 # algorithm.
1116 for xf in self.transfers:
1117 xf.incoming = xf.goes_after.copy()
1118 xf.outgoing = xf.goes_before.copy()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001119 xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())
Doug Zongkerfc44a512014-08-26 13:10:25 -07001120
1121 # We use an OrderedDict instead of just a set so that the output
1122 # is repeatable; otherwise it would depend on the hash values of
1123 # the transfer objects.
1124 G = OrderedDict()
1125 for xf in self.transfers:
1126 G[xf] = None
1127 s1 = deque() # the left side of the sequence, built from left to right
1128 s2 = deque() # the right side of the sequence, built from right to left
1129
Doug Zongker6ab2a502016-02-09 08:28:09 -08001130 heap = []
1131 for xf in self.transfers:
1132 xf.heap_item = HeapItem(xf)
1133 heap.append(xf.heap_item)
1134 heapq.heapify(heap)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001135
Tao Bao33482282016-10-24 16:49:08 -07001136 # Use OrderedDict() instead of set() to preserve the insertion order. Need
1137 # to use 'sinks[key] = None' to add key into the set. sinks will look like
1138 # { key1: None, key2: None, ... }.
1139 sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing)
1140 sources = OrderedDict.fromkeys(u for u in G if not u.incoming)
Doug Zongker6ab2a502016-02-09 08:28:09 -08001141
1142 def adjust_score(iu, delta):
1143 iu.score += delta
1144 iu.heap_item.clear()
1145 iu.heap_item = HeapItem(iu)
1146 heapq.heappush(heap, iu.heap_item)
1147
1148 while G:
Doug Zongkerfc44a512014-08-26 13:10:25 -07001149 # Put all sinks at the end of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001150 while sinks:
Tao Bao33482282016-10-24 16:49:08 -07001151 new_sinks = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001152 for u in sinks:
Tao Bao508b0872018-02-09 13:44:43 -08001153 if u not in G:
1154 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001155 s2.appendleft(u)
1156 del G[u]
1157 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001158 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001159 if not iu.outgoing:
1160 new_sinks[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001161 sinks = new_sinks
Doug Zongkerfc44a512014-08-26 13:10:25 -07001162
1163 # Put all the sources at the beginning of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001164 while sources:
Tao Bao33482282016-10-24 16:49:08 -07001165 new_sources = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001166 for u in sources:
Tao Bao508b0872018-02-09 13:44:43 -08001167 if u not in G:
1168 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001169 s1.append(u)
1170 del G[u]
1171 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001172 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001173 if not iu.incoming:
1174 new_sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001175 sources = new_sources
Doug Zongkerfc44a512014-08-26 13:10:25 -07001176
Tao Bao508b0872018-02-09 13:44:43 -08001177 if not G:
1178 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001179
1180 # Find the "best" vertex to put next. "Best" is the one that
1181 # maximizes the net difference in source blocks saved we get by
1182 # pretending it's a source rather than a sink.
1183
Doug Zongker6ab2a502016-02-09 08:28:09 -08001184 while True:
1185 u = heapq.heappop(heap)
1186 if u and u.item in G:
1187 u = u.item
1188 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001189
Doug Zongkerfc44a512014-08-26 13:10:25 -07001190 s1.append(u)
1191 del G[u]
1192 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001193 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001194 if not iu.incoming:
1195 sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001196
Doug Zongkerfc44a512014-08-26 13:10:25 -07001197 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001198 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001199 if not iu.outgoing:
1200 sinks[iu] = None
Doug Zongkerfc44a512014-08-26 13:10:25 -07001201
1202 # Now record the sequence in the 'order' field of each transfer,
1203 # and by rearranging self.transfers to be in the chosen sequence.
1204
1205 new_transfers = []
1206 for x in itertools.chain(s1, s2):
1207 x.order = len(new_transfers)
1208 new_transfers.append(x)
1209 del x.incoming
1210 del x.outgoing
1211
1212 self.transfers = new_transfers
1213
1214 def GenerateDigraph(self):
1215 print("Generating digraph...")
Doug Zongker6ab2a502016-02-09 08:28:09 -08001216
1217 # Each item of source_ranges will be:
1218 # - None, if that block is not used as a source,
Tao Bao33482282016-10-24 16:49:08 -07001219 # - an ordered set of transfers.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001220 source_ranges = []
1221 for b in self.transfers:
1222 for s, e in b.src_ranges:
1223 if e > len(source_ranges):
1224 source_ranges.extend([None] * (e-len(source_ranges)))
1225 for i in range(s, e):
1226 if source_ranges[i] is None:
Tao Bao33482282016-10-24 16:49:08 -07001227 source_ranges[i] = OrderedDict.fromkeys([b])
Doug Zongker6ab2a502016-02-09 08:28:09 -08001228 else:
Tao Bao33482282016-10-24 16:49:08 -07001229 source_ranges[i][b] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001230
Doug Zongkerfc44a512014-08-26 13:10:25 -07001231 for a in self.transfers:
Tao Bao33482282016-10-24 16:49:08 -07001232 intersections = OrderedDict()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001233 for s, e in a.tgt_ranges:
1234 for i in range(s, e):
Tao Bao508b0872018-02-09 13:44:43 -08001235 if i >= len(source_ranges):
1236 break
Tao Bao33482282016-10-24 16:49:08 -07001237 # Add all the Transfers in source_ranges[i] to the (ordered) set.
1238 if source_ranges[i] is not None:
1239 for j in source_ranges[i]:
1240 intersections[j] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001241
1242 for b in intersections:
Tao Bao508b0872018-02-09 13:44:43 -08001243 if a is b:
1244 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001245
1246 # If the blocks written by A are read by B, then B needs to go before A.
1247 i = a.tgt_ranges.intersect(b.src_ranges)
1248 if i:
Doug Zongkerab7ca1d2014-08-26 10:40:28 -07001249 if b.src_name == "__ZERO":
1250 # the cost of removing source blocks for the __ZERO domain
1251 # is (nearly) zero.
1252 size = 0
1253 else:
1254 size = i.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001255 b.goes_before[a] = size
1256 a.goes_after[b] = size
1257
1258 def FindTransfers(self):
Tao Bao9a5caf22015-08-25 15:10:10 -07001259 """Parse the file_map to generate all the transfers."""
1260
Tianjie Xu41390d42017-11-22 11:35:18 -08001261 def AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
1262 src_ranges, style, by_id):
1263 """Add one or multiple Transfer()s by splitting large files.
1264
1265 For BBOTA v3, we need to stash source blocks for resumable feature.
1266 However, with the growth of file size and the shrink of the cache
1267 partition source blocks are too large to be stashed. If a file occupies
1268 too many blocks, we split it into smaller pieces by getting multiple
1269 Transfer()s.
1270
1271 The downside is that after splitting, we may increase the package size
1272 since the split pieces don't align well. According to our experiments,
1273 1/8 of the cache size as the per-piece limit appears to be optimal.
1274 Compared to the fixed 1024-block limit, it reduces the overall package
1275 size by 30% for volantis, and 20% for angler and bullhead."""
1276
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001277 pieces = 0
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001278 while (tgt_ranges.size() > max_blocks_per_transfer and
1279 src_ranges.size() > max_blocks_per_transfer):
Tao Bao9a5caf22015-08-25 15:10:10 -07001280 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1281 src_split_name = "%s-%d" % (src_name, pieces)
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001282 tgt_first = tgt_ranges.first(max_blocks_per_transfer)
1283 src_first = src_ranges.first(max_blocks_per_transfer)
1284
Tao Bao183e56e2017-03-05 17:05:09 -08001285 Transfer(tgt_split_name, src_split_name, tgt_first, src_first,
1286 self.tgt.RangeSha1(tgt_first), self.src.RangeSha1(src_first),
1287 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001288
1289 tgt_ranges = tgt_ranges.subtract(tgt_first)
1290 src_ranges = src_ranges.subtract(src_first)
1291 pieces += 1
1292
1293 # Handle remaining blocks.
1294 if tgt_ranges.size() or src_ranges.size():
1295 # Must be both non-empty.
1296 assert tgt_ranges.size() and src_ranges.size()
1297 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1298 src_split_name = "%s-%d" % (src_name, pieces)
Tao Bao183e56e2017-03-05 17:05:09 -08001299 Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges,
1300 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1301 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001302
Tianjie Xu41390d42017-11-22 11:35:18 -08001303 def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges, style,
1304 by_id):
1305 """Find all the zip files and split the others with a fixed chunk size.
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001306
Tianjie Xu41390d42017-11-22 11:35:18 -08001307 This function will construct a list of zip archives, which will later be
1308 split by imgdiff to reduce the final patch size. For the other files,
1309 we will plainly split them based on a fixed chunk size with the potential
1310 patch size penalty.
1311 """
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001312
1313 assert style == "diff"
1314
1315 # Change nothing for small files.
1316 if (tgt_ranges.size() <= max_blocks_per_transfer and
1317 src_ranges.size() <= max_blocks_per_transfer):
1318 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1319 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1320 style, by_id)
1321 return
1322
Tao Baocb73aed2018-01-31 17:32:40 -08001323 # Split large APKs with imgdiff, if possible. We're intentionally checking
1324 # file types one more time (CanUseImgdiff() checks that as well), before
1325 # calling the costly RangeSha1()s.
1326 if (self.FileTypeSupportedByImgdiff(tgt_name) and
1327 self.tgt.RangeSha1(tgt_ranges) != self.src.RangeSha1(src_ranges)):
Tao Bao294651d2018-02-08 23:21:52 -08001328 if self.CanUseImgdiff(tgt_name, tgt_ranges, src_ranges, True):
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001329 large_apks.append((tgt_name, src_name, tgt_ranges, src_ranges))
1330 return
1331
Tianjie Xu41390d42017-11-22 11:35:18 -08001332 AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
1333 src_ranges, style, by_id)
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001334
Tao Bao08c85832016-09-19 22:26:30 -07001335 def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
1336 split=False):
1337 """Wrapper function for adding a Transfer()."""
1338
1339 # We specialize diff transfers only (which covers bsdiff/imgdiff/move);
1340 # otherwise add the Transfer() as is.
1341 if style != "diff" or not split:
Tao Bao183e56e2017-03-05 17:05:09 -08001342 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1343 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1344 style, by_id)
Tao Bao08c85832016-09-19 22:26:30 -07001345 return
1346
1347 # Handle .odex files specially to analyze the block-wise difference. If
1348 # most of the blocks are identical with only few changes (e.g. header),
1349 # we will patch the changed blocks only. This avoids stashing unchanged
1350 # blocks while patching. We limit the analysis to files without size
1351 # changes only. This is to avoid sacrificing the OTA generation cost too
1352 # much.
1353 if (tgt_name.split(".")[-1].lower() == 'odex' and
1354 tgt_ranges.size() == src_ranges.size()):
1355
1356 # 0.5 threshold can be further tuned. The tradeoff is: if only very
1357 # few blocks remain identical, we lose the opportunity to use imgdiff
1358 # that may have better compression ratio than bsdiff.
1359 crop_threshold = 0.5
1360
1361 tgt_skipped = RangeSet()
1362 src_skipped = RangeSet()
1363 tgt_size = tgt_ranges.size()
1364 tgt_changed = 0
1365 for src_block, tgt_block in zip(src_ranges.next_item(),
1366 tgt_ranges.next_item()):
1367 src_rs = RangeSet(str(src_block))
1368 tgt_rs = RangeSet(str(tgt_block))
1369 if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):
1370 tgt_skipped = tgt_skipped.union(tgt_rs)
1371 src_skipped = src_skipped.union(src_rs)
1372 else:
1373 tgt_changed += tgt_rs.size()
1374
1375 # Terminate early if no clear sign of benefits.
1376 if tgt_changed > tgt_size * crop_threshold:
1377 break
1378
1379 if tgt_changed < tgt_size * crop_threshold:
1380 assert tgt_changed + tgt_skipped.size() == tgt_size
Tao Bao508b0872018-02-09 13:44:43 -08001381 print('%10d %10d (%6.2f%%) %s' % (
1382 tgt_skipped.size(), tgt_size,
1383 tgt_skipped.size() * 100.0 / tgt_size, tgt_name))
Tianjie Xu41390d42017-11-22 11:35:18 -08001384 AddSplitTransfers(
Tao Bao08c85832016-09-19 22:26:30 -07001385 "%s-skipped" % (tgt_name,),
1386 "%s-skipped" % (src_name,),
1387 tgt_skipped, src_skipped, style, by_id)
1388
1389 # Intentionally change the file extension to avoid being imgdiff'd as
1390 # the files are no longer in their original format.
1391 tgt_name = "%s-cropped" % (tgt_name,)
1392 src_name = "%s-cropped" % (src_name,)
1393 tgt_ranges = tgt_ranges.subtract(tgt_skipped)
1394 src_ranges = src_ranges.subtract(src_skipped)
1395
1396 # Possibly having no changed blocks.
1397 if not tgt_ranges:
1398 return
1399
1400 # Add the transfer(s).
Tianjie Xu41390d42017-11-22 11:35:18 -08001401 AddSplitTransfers(
Tao Bao08c85832016-09-19 22:26:30 -07001402 tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1403
Tianjie Xu25366072017-09-08 17:19:02 -07001404 def ParseAndValidateSplitInfo(patch_size, tgt_ranges, src_ranges,
1405 split_info):
1406 """Parse the split_info and return a list of info tuples.
1407
1408 Args:
1409 patch_size: total size of the patch file.
1410 tgt_ranges: Ranges of the target file within the original image.
1411 src_ranges: Ranges of the source file within the original image.
1412 split_info format:
1413 imgdiff version#
1414 count of pieces
1415 <patch_size_1> <tgt_size_1> <src_ranges_1>
1416 ...
1417 <patch_size_n> <tgt_size_n> <src_ranges_n>
1418
1419 Returns:
1420 [patch_start, patch_len, split_tgt_ranges, split_src_ranges]
1421 """
1422
1423 version = int(split_info[0])
1424 assert version == 2
1425 count = int(split_info[1])
1426 assert len(split_info) - 2 == count
1427
1428 split_info_list = []
1429 patch_start = 0
1430 tgt_remain = copy.deepcopy(tgt_ranges)
1431 # each line has the format <patch_size>, <tgt_size>, <src_ranges>
1432 for line in split_info[2:]:
1433 info = line.split()
1434 assert len(info) == 3
1435 patch_length = int(info[0])
1436
1437 split_tgt_size = int(info[1])
1438 assert split_tgt_size % 4096 == 0
1439 assert split_tgt_size / 4096 <= tgt_remain.size()
1440 split_tgt_ranges = tgt_remain.first(split_tgt_size / 4096)
1441 tgt_remain = tgt_remain.subtract(split_tgt_ranges)
1442
1443 # Find the split_src_ranges within the image file from its relative
1444 # position in file.
1445 split_src_indices = RangeSet.parse_raw(info[2])
1446 split_src_ranges = RangeSet()
1447 for r in split_src_indices:
1448 curr_range = src_ranges.first(r[1]).subtract(src_ranges.first(r[0]))
1449 assert not split_src_ranges.overlaps(curr_range)
1450 split_src_ranges = split_src_ranges.union(curr_range)
1451
1452 split_info_list.append((patch_start, patch_length,
1453 split_tgt_ranges, split_src_ranges))
1454 patch_start += patch_length
1455
1456 # Check that the sizes of all the split pieces add up to the final file
1457 # size for patch and target.
1458 assert tgt_remain.size() == 0
1459 assert patch_start == patch_size
1460 return split_info_list
1461
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001462 def SplitLargeApks():
1463 """Split the large apks files.
Tianjie Xu25366072017-09-08 17:19:02 -07001464
1465 Example: Chrome.apk will be split into
1466 src-0: Chrome.apk-0, tgt-0: Chrome.apk-0
1467 src-1: Chrome.apk-1, tgt-1: Chrome.apk-1
1468 ...
1469
1470 After the split, the target pieces are continuous and block aligned; and
1471 the source pieces are mutually exclusive. During the split, we also
1472 generate and save the image patch between src-X & tgt-X. This patch will
1473 be valid because the block ranges of src-X & tgt-X will always stay the
1474 same afterwards; but there's a chance we don't use the patch if we
1475 convert the "diff" command into "new" or "move" later.
1476 """
1477
1478 while True:
1479 with transfer_lock:
1480 if not large_apks:
1481 return
1482 tgt_name, src_name, tgt_ranges, src_ranges = large_apks.pop(0)
1483
1484 src_file = common.MakeTempFile(prefix="src-")
1485 tgt_file = common.MakeTempFile(prefix="tgt-")
Tianjie Xudf1166e2018-01-27 17:35:41 -08001486 with open(src_file, "wb") as src_fd:
1487 self.src.WriteRangeDataToFd(src_ranges, src_fd)
1488 with open(tgt_file, "wb") as tgt_fd:
1489 self.tgt.WriteRangeDataToFd(tgt_ranges, tgt_fd)
Tianjie Xu25366072017-09-08 17:19:02 -07001490
1491 patch_file = common.MakeTempFile(prefix="patch-")
1492 patch_info_file = common.MakeTempFile(prefix="split_info-")
1493 cmd = ["imgdiff", "-z",
1494 "--block-limit={}".format(max_blocks_per_transfer),
1495 "--split-info=" + patch_info_file,
1496 src_file, tgt_file, patch_file]
Tao Bao4ccea852018-02-06 15:16:41 -08001497 p = common.Run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
1498 imgdiff_output, _ = p.communicate()
1499 assert p.returncode == 0, \
1500 "Failed to create imgdiff patch between {} and {}:\n{}".format(
1501 src_name, tgt_name, imgdiff_output)
Tianjie Xu25366072017-09-08 17:19:02 -07001502
1503 with open(patch_info_file) as patch_info:
1504 lines = patch_info.readlines()
1505
1506 patch_size_total = os.path.getsize(patch_file)
1507 split_info_list = ParseAndValidateSplitInfo(patch_size_total,
1508 tgt_ranges, src_ranges,
1509 lines)
1510 for index, (patch_start, patch_length, split_tgt_ranges,
Tao Bao508b0872018-02-09 13:44:43 -08001511 split_src_ranges) in enumerate(split_info_list):
Tianjie Xu25366072017-09-08 17:19:02 -07001512 with open(patch_file) as f:
1513 f.seek(patch_start)
1514 patch_content = f.read(patch_length)
1515
1516 split_src_name = "{}-{}".format(src_name, index)
1517 split_tgt_name = "{}-{}".format(tgt_name, index)
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001518 split_large_apks.append((split_tgt_name,
1519 split_src_name,
1520 split_tgt_ranges,
1521 split_src_ranges,
1522 patch_content))
Tianjie Xu25366072017-09-08 17:19:02 -07001523
Tao Bao08c85832016-09-19 22:26:30 -07001524 print("Finding transfers...")
1525
Tianjie Xu25366072017-09-08 17:19:02 -07001526 large_apks = []
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001527 split_large_apks = []
Tianjie Xu25366072017-09-08 17:19:02 -07001528 cache_size = common.OPTIONS.cache_size
1529 split_threshold = 0.125
1530 max_blocks_per_transfer = int(cache_size * split_threshold /
1531 self.tgt.blocksize)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001532 empty = RangeSet()
Tianjie Xu20a86cd2018-01-12 12:21:00 -08001533 for tgt_fn, tgt_ranges in sorted(self.tgt.file_map.items()):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001534 if tgt_fn == "__ZERO":
1535 # the special "__ZERO" domain is all the blocks not contained
1536 # in any file and that are filled with zeros. We have a
1537 # special transfer style for zero blocks.
1538 src_ranges = self.src.file_map.get("__ZERO", empty)
Tao Bao9a5caf22015-08-25 15:10:10 -07001539 AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
1540 "zero", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001541 continue
1542
Tao Baoff777812015-05-12 11:42:31 -07001543 elif tgt_fn == "__COPY":
1544 # "__COPY" domain includes all the blocks not contained in any
1545 # file and that need to be copied unconditionally to the target.
Tao Bao9a5caf22015-08-25 15:10:10 -07001546 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Tao Baoff777812015-05-12 11:42:31 -07001547 continue
1548
Tianjie Xu67c7cbb2018-08-30 00:32:07 -07001549 elif tgt_fn == "__HASHTREE":
1550 continue
1551
Doug Zongkerfc44a512014-08-26 13:10:25 -07001552 elif tgt_fn in self.src.file_map:
1553 # Look for an exact pathname match in the source.
Tao Bao9a5caf22015-08-25 15:10:10 -07001554 AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001555 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001556 continue
1557
1558 b = os.path.basename(tgt_fn)
1559 if b in self.src_basenames:
1560 # Look for an exact basename match in the source.
1561 src_fn = self.src_basenames[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001562 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001563 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001564 continue
1565
1566 b = re.sub("[0-9]+", "#", b)
1567 if b in self.src_numpatterns:
1568 # Look for a 'number pattern' match (a basename match after
1569 # all runs of digits are replaced by "#"). (This is useful
1570 # for .so files that contain version numbers in the filename
1571 # that get bumped.)
1572 src_fn = self.src_numpatterns[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001573 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001574 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001575 continue
1576
Tao Bao9a5caf22015-08-25 15:10:10 -07001577 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001578
Tianjie Xu25366072017-09-08 17:19:02 -07001579 transfer_lock = threading.Lock()
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001580 threads = [threading.Thread(target=SplitLargeApks)
Tianjie Xu25366072017-09-08 17:19:02 -07001581 for _ in range(self.threads)]
1582 for th in threads:
1583 th.start()
1584 while threads:
1585 threads.pop().join()
1586
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001587 # Sort the split transfers for large apks to generate a determinate package.
1588 split_large_apks.sort()
1589 for (tgt_name, src_name, tgt_ranges, src_ranges,
1590 patch) in split_large_apks:
1591 transfer_split = Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1592 self.tgt.RangeSha1(tgt_ranges),
1593 self.src.RangeSha1(src_ranges),
1594 "diff", self.transfers)
1595 transfer_split.patch = patch
1596
Doug Zongkerfc44a512014-08-26 13:10:25 -07001597 def AbbreviateSourceNames(self):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001598 for k in self.src.file_map.keys():
1599 b = os.path.basename(k)
1600 self.src_basenames[b] = k
1601 b = re.sub("[0-9]+", "#", b)
1602 self.src_numpatterns[b] = k
1603
1604 @staticmethod
1605 def AssertPartition(total, seq):
1606 """Assert that all the RangeSets in 'seq' form a partition of the
1607 'total' RangeSet (ie, they are nonintersecting and their union
1608 equals 'total')."""
Doug Zongker6ab2a502016-02-09 08:28:09 -08001609
Doug Zongkerfc44a512014-08-26 13:10:25 -07001610 so_far = RangeSet()
1611 for i in seq:
1612 assert not so_far.overlaps(i)
1613 so_far = so_far.union(i)
1614 assert so_far == total