blob: 189dba28e3ecf6b2a779c3c21ad9612b8961012b [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015from __future__ import print_function
16
Doug Zongker6ab2a502016-02-09 08:28:09 -080017import array
Tianjie Xu25366072017-09-08 17:19:02 -070018import copy
Doug Zongker6ab2a502016-02-09 08:28:09 -080019import functools
Doug Zongker62338182014-09-08 08:29:55 -070020import heapq
Doug Zongkerfc44a512014-08-26 13:10:25 -070021import itertools
22import multiprocessing
23import os
Tao Bao3a2e3502016-12-28 09:14:39 -080024import os.path
Doug Zongkerfc44a512014-08-26 13:10:25 -070025import re
Tao Bao183e56e2017-03-05 17:05:09 -080026import sys
Doug Zongkerfc44a512014-08-26 13:10:25 -070027import threading
Tao Bao3a2e3502016-12-28 09:14:39 -080028from collections import deque, OrderedDict
29from hashlib import sha1
Tao Bao508b0872018-02-09 13:44:43 -080030
31import common
Dan Albert8b72aef2015-03-23 19:13:21 -070032from rangelib import RangeSet
33
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070034__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
35
Dan Albert8b72aef2015-03-23 19:13:21 -070036
Tao Bao183e56e2017-03-05 17:05:09 -080037def compute_patch(srcfile, tgtfile, imgdiff=False):
Tianjie Xub59c17f2016-10-28 17:55:53 -070038 patchfile = common.MakeTempFile(prefix='patch-')
Doug Zongkerfc44a512014-08-26 13:10:25 -070039
Tianjie Xub59c17f2016-10-28 17:55:53 -070040 cmd = ['imgdiff', '-z'] if imgdiff else ['bsdiff']
41 cmd.extend([srcfile, tgtfile, patchfile])
Doug Zongkerfc44a512014-08-26 13:10:25 -070042
Tao Bao39451582017-05-04 11:10:47 -070043 # Don't dump the bsdiff/imgdiff commands, which are not useful for the case
44 # here, since they contain temp filenames only.
Tao Bao73dd4f42018-10-04 16:25:33 -070045 proc = common.Run(cmd, verbose=False)
46 output, _ = proc.communicate()
Doug Zongkerfc44a512014-08-26 13:10:25 -070047
Tao Bao73dd4f42018-10-04 16:25:33 -070048 if proc.returncode != 0:
Tianjie Xub59c17f2016-10-28 17:55:53 -070049 raise ValueError(output)
50
51 with open(patchfile, 'rb') as f:
Tao Bao183e56e2017-03-05 17:05:09 -080052 return f.read()
Doug Zongkerfc44a512014-08-26 13:10:25 -070053
Dan Albert8b72aef2015-03-23 19:13:21 -070054
55class Image(object):
Tao Bao183e56e2017-03-05 17:05:09 -080056 def RangeSha1(self, ranges):
57 raise NotImplementedError
58
Dan Albert8b72aef2015-03-23 19:13:21 -070059 def ReadRangeSet(self, ranges):
60 raise NotImplementedError
61
Tao Bao68658c02015-06-01 13:40:49 -070062 def TotalSha1(self, include_clobbered_blocks=False):
Dan Albert8b72aef2015-03-23 19:13:21 -070063 raise NotImplementedError
64
Tao Bao183e56e2017-03-05 17:05:09 -080065 def WriteRangeDataToFd(self, ranges, fd):
66 raise NotImplementedError
67
Dan Albert8b72aef2015-03-23 19:13:21 -070068
69class EmptyImage(Image):
Doug Zongkerfc44a512014-08-26 13:10:25 -070070 """A zero-length image."""
Tao Bao183e56e2017-03-05 17:05:09 -080071
72 def __init__(self):
73 self.blocksize = 4096
74 self.care_map = RangeSet()
75 self.clobbered_blocks = RangeSet()
76 self.extended = RangeSet()
77 self.total_blocks = 0
78 self.file_map = {}
79
80 def RangeSha1(self, ranges):
81 return sha1().hexdigest()
82
Doug Zongkerfc44a512014-08-26 13:10:25 -070083 def ReadRangeSet(self, ranges):
84 return ()
Tao Bao183e56e2017-03-05 17:05:09 -080085
Tao Bao68658c02015-06-01 13:40:49 -070086 def TotalSha1(self, include_clobbered_blocks=False):
87 # EmptyImage always carries empty clobbered_blocks, so
88 # include_clobbered_blocks can be ignored.
89 assert self.clobbered_blocks.size() == 0
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070090 return sha1().hexdigest()
91
Tao Bao183e56e2017-03-05 17:05:09 -080092 def WriteRangeDataToFd(self, ranges, fd):
93 raise ValueError("Can't write data from EmptyImage to file")
94
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070095
Dan Albert8b72aef2015-03-23 19:13:21 -070096class DataImage(Image):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070097 """An image wrapped around a single string of data."""
98
99 def __init__(self, data, trim=False, pad=False):
100 self.data = data
101 self.blocksize = 4096
102
103 assert not (trim and pad)
104
105 partial = len(self.data) % self.blocksize
Tao Bao7589e962015-09-05 20:35:32 -0700106 padded = False
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700107 if partial > 0:
108 if trim:
109 self.data = self.data[:-partial]
110 elif pad:
111 self.data += '\0' * (self.blocksize - partial)
Tao Bao7589e962015-09-05 20:35:32 -0700112 padded = True
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700113 else:
114 raise ValueError(("data for DataImage must be multiple of %d bytes "
115 "unless trim or pad is specified") %
116 (self.blocksize,))
117
118 assert len(self.data) % self.blocksize == 0
119
120 self.total_blocks = len(self.data) / self.blocksize
121 self.care_map = RangeSet(data=(0, self.total_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700122 # When the last block is padded, we always write the whole block even for
123 # incremental OTAs. Because otherwise the last block may get skipped if
124 # unchanged for an incremental, but would fail the post-install
125 # verification if it has non-zero contents in the padding bytes.
126 # Bug: 23828506
127 if padded:
Tao Bao42206c32015-09-08 13:39:40 -0700128 clobbered_blocks = [self.total_blocks-1, self.total_blocks]
Tao Bao7589e962015-09-05 20:35:32 -0700129 else:
Tao Bao42206c32015-09-08 13:39:40 -0700130 clobbered_blocks = []
131 self.clobbered_blocks = clobbered_blocks
Tao Baoe9b61912015-07-09 17:37:49 -0700132 self.extended = RangeSet()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700133
134 zero_blocks = []
135 nonzero_blocks = []
136 reference = '\0' * self.blocksize
137
Tao Bao7589e962015-09-05 20:35:32 -0700138 for i in range(self.total_blocks-1 if padded else self.total_blocks):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700139 d = self.data[i*self.blocksize : (i+1)*self.blocksize]
140 if d == reference:
141 zero_blocks.append(i)
142 zero_blocks.append(i+1)
143 else:
144 nonzero_blocks.append(i)
145 nonzero_blocks.append(i+1)
146
Tao Bao42206c32015-09-08 13:39:40 -0700147 assert zero_blocks or nonzero_blocks or clobbered_blocks
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700148
Tao Bao42206c32015-09-08 13:39:40 -0700149 self.file_map = dict()
150 if zero_blocks:
151 self.file_map["__ZERO"] = RangeSet(data=zero_blocks)
152 if nonzero_blocks:
153 self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)
154 if clobbered_blocks:
155 self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)
Tao Bao7589e962015-09-05 20:35:32 -0700156
Tao Bao183e56e2017-03-05 17:05:09 -0800157 def _GetRangeData(self, ranges):
158 for s, e in ranges:
159 yield self.data[s*self.blocksize:e*self.blocksize]
160
161 def RangeSha1(self, ranges):
162 h = sha1()
Tao Bao76def242017-11-21 09:25:31 -0800163 for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable
Tao Bao183e56e2017-03-05 17:05:09 -0800164 h.update(data)
165 return h.hexdigest()
166
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700167 def ReadRangeSet(self, ranges):
Tao Bao183e56e2017-03-05 17:05:09 -0800168 return [self._GetRangeData(ranges)]
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700169
Tao Bao68658c02015-06-01 13:40:49 -0700170 def TotalSha1(self, include_clobbered_blocks=False):
Tao Bao7589e962015-09-05 20:35:32 -0700171 if not include_clobbered_blocks:
Tao Bao183e56e2017-03-05 17:05:09 -0800172 return self.RangeSha1(self.care_map.subtract(self.clobbered_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700173 else:
174 return sha1(self.data).hexdigest()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700175
Tao Bao183e56e2017-03-05 17:05:09 -0800176 def WriteRangeDataToFd(self, ranges, fd):
Tao Bao76def242017-11-21 09:25:31 -0800177 for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable
Tao Bao183e56e2017-03-05 17:05:09 -0800178 fd.write(data)
179
Doug Zongkerfc44a512014-08-26 13:10:25 -0700180
181class Transfer(object):
Tao Bao183e56e2017-03-05 17:05:09 -0800182 def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, tgt_sha1,
183 src_sha1, style, by_id):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700184 self.tgt_name = tgt_name
185 self.src_name = src_name
186 self.tgt_ranges = tgt_ranges
187 self.src_ranges = src_ranges
Tao Bao183e56e2017-03-05 17:05:09 -0800188 self.tgt_sha1 = tgt_sha1
189 self.src_sha1 = src_sha1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700190 self.style = style
Tao Baob8c87172015-03-19 19:42:12 -0700191
192 # We use OrderedDict rather than dict so that the output is repeatable;
193 # otherwise it would depend on the hash values of the Transfer objects.
194 self.goes_before = OrderedDict()
195 self.goes_after = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700196
Doug Zongker62338182014-09-08 08:29:55 -0700197 self.stash_before = []
198 self.use_stash = []
199
Doug Zongkerfc44a512014-08-26 13:10:25 -0700200 self.id = len(by_id)
201 by_id.append(self)
202
Tianjie Xu25366072017-09-08 17:19:02 -0700203 self._patch = None
204
205 @property
206 def patch(self):
207 return self._patch
208
209 @patch.setter
210 def patch(self, patch):
211 if patch:
212 assert self.style == "diff"
213 self._patch = patch
214
Doug Zongker62338182014-09-08 08:29:55 -0700215 def NetStashChange(self):
216 return (sum(sr.size() for (_, sr) in self.stash_before) -
217 sum(sr.size() for (_, sr) in self.use_stash))
218
Tao Bao82c47982015-08-17 09:45:13 -0700219 def ConvertToNew(self):
220 assert self.style != "new"
221 self.use_stash = []
222 self.style = "new"
223 self.src_ranges = RangeSet()
Tianjie Xu25366072017-09-08 17:19:02 -0700224 self.patch = None
Tao Bao82c47982015-08-17 09:45:13 -0700225
Doug Zongkerfc44a512014-08-26 13:10:25 -0700226 def __str__(self):
227 return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
228 " to " + str(self.tgt_ranges) + ">")
229
230
Doug Zongker6ab2a502016-02-09 08:28:09 -0800231@functools.total_ordering
232class HeapItem(object):
233 def __init__(self, item):
234 self.item = item
Tao Bao186ec992017-12-23 11:50:52 -0800235 # Negate the score since python's heap is a min-heap and we want the
236 # maximum score.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800237 self.score = -item.score
Tao Bao186ec992017-12-23 11:50:52 -0800238
Doug Zongker6ab2a502016-02-09 08:28:09 -0800239 def clear(self):
240 self.item = None
Tao Bao186ec992017-12-23 11:50:52 -0800241
Doug Zongker6ab2a502016-02-09 08:28:09 -0800242 def __bool__(self):
Tao Bao186ec992017-12-23 11:50:52 -0800243 return self.item is not None
244
245 # Python 2 uses __nonzero__, while Python 3 uses __bool__.
246 __nonzero__ = __bool__
247
248 # The rest operations are generated by functools.total_ordering decorator.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800249 def __eq__(self, other):
250 return self.score == other.score
Tao Bao186ec992017-12-23 11:50:52 -0800251
Doug Zongker6ab2a502016-02-09 08:28:09 -0800252 def __le__(self, other):
253 return self.score <= other.score
254
255
Tao Bao294651d2018-02-08 23:21:52 -0800256class ImgdiffStats(object):
257 """A class that collects imgdiff stats.
258
259 It keeps track of the files that will be applied imgdiff while generating
260 BlockImageDiff. It also logs the ones that cannot use imgdiff, with specific
261 reasons. The stats is only meaningful when imgdiff not being disabled by the
262 caller of BlockImageDiff. In addition, only files with supported types
263 (BlockImageDiff.FileTypeSupportedByImgdiff()) are allowed to be logged.
Tao Bao294651d2018-02-08 23:21:52 -0800264 """
265
266 USED_IMGDIFF = "APK files diff'd with imgdiff"
267 USED_IMGDIFF_LARGE_APK = "Large APK files split and diff'd with imgdiff"
268
269 # Reasons for not applying imgdiff on APKs.
Tao Bao294651d2018-02-08 23:21:52 -0800270 SKIPPED_NONMONOTONIC = "Not used imgdiff due to having non-monotonic ranges"
Tao Baoe709b092018-02-07 12:40:00 -0800271 SKIPPED_SHARED_BLOCKS = "Not used imgdiff due to using shared blocks"
Tao Bao4ccea852018-02-06 15:16:41 -0800272 SKIPPED_INCOMPLETE = "Not used imgdiff due to incomplete RangeSet"
Tao Bao294651d2018-02-08 23:21:52 -0800273
274 # The list of valid reasons, which will also be the dumped order in a report.
275 REASONS = (
276 USED_IMGDIFF,
277 USED_IMGDIFF_LARGE_APK,
Tao Bao294651d2018-02-08 23:21:52 -0800278 SKIPPED_NONMONOTONIC,
Tao Baoe709b092018-02-07 12:40:00 -0800279 SKIPPED_SHARED_BLOCKS,
Tao Bao4ccea852018-02-06 15:16:41 -0800280 SKIPPED_INCOMPLETE,
Tao Bao294651d2018-02-08 23:21:52 -0800281 )
282
283 def __init__(self):
284 self.stats = {}
285
286 def Log(self, filename, reason):
287 """Logs why imgdiff can or cannot be applied to the given filename.
288
289 Args:
290 filename: The filename string.
291 reason: One of the reason constants listed in REASONS.
292
293 Raises:
294 AssertionError: On unsupported filetypes or invalid reason.
295 """
296 assert BlockImageDiff.FileTypeSupportedByImgdiff(filename)
297 assert reason in self.REASONS
298
299 if reason not in self.stats:
300 self.stats[reason] = set()
301 self.stats[reason].add(filename)
302
303 def Report(self):
304 """Prints a report of the collected imgdiff stats."""
305
306 def print_header(header, separator):
307 print(header)
308 print(separator * len(header) + '\n')
309
310 print_header(' Imgdiff Stats Report ', '=')
311 for key in self.REASONS:
312 if key not in self.stats:
313 continue
314 values = self.stats[key]
315 section_header = ' {} (count: {}) '.format(key, len(values))
316 print_header(section_header, '-')
317 print(''.join([' {}\n'.format(name) for name in values]))
318
319
Doug Zongkerfc44a512014-08-26 13:10:25 -0700320class BlockImageDiff(object):
Tao Bao76def242017-11-21 09:25:31 -0800321 """Generates the diff of two block image objects.
322
323 BlockImageDiff works on two image objects. An image object is anything that
324 provides the following attributes:
325
326 blocksize: the size in bytes of a block, currently must be 4096.
327
328 total_blocks: the total size of the partition/image, in blocks.
329
330 care_map: a RangeSet containing which blocks (in the range [0,
331 total_blocks) we actually care about; i.e. which blocks contain data.
332
333 file_map: a dict that partitions the blocks contained in care_map into
334 smaller domains that are useful for doing diffs on. (Typically a domain
335 is a file, and the key in file_map is the pathname.)
336
337 clobbered_blocks: a RangeSet containing which blocks contain data but may
338 be altered by the FS. They need to be excluded when verifying the
339 partition integrity.
340
341 ReadRangeSet(): a function that takes a RangeSet and returns the data
342 contained in the image blocks of that RangeSet. The data is returned as
343 a list or tuple of strings; concatenating the elements together should
344 produce the requested data. Implementations are free to break up the
345 data into list/tuple elements in any way that is convenient.
346
347 RangeSha1(): a function that returns (as a hex string) the SHA-1 hash of
348 all the data in the specified range.
349
350 TotalSha1(): a function that returns (as a hex string) the SHA-1 hash of
351 all the data in the image (ie, all the blocks in the care_map minus
352 clobbered_blocks, or including the clobbered blocks if
353 include_clobbered_blocks is True).
354
355 When creating a BlockImageDiff, the src image may be None, in which case the
356 list of transfers produced will never read from the original image.
357 """
358
Tao Bao293fd132016-06-11 12:19:23 -0700359 def __init__(self, tgt, src=None, threads=None, version=4,
360 disable_imgdiff=False):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700361 if threads is None:
362 threads = multiprocessing.cpu_count() // 2
Dan Albert8b72aef2015-03-23 19:13:21 -0700363 if threads == 0:
364 threads = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700365 self.threads = threads
Doug Zongker62338182014-09-08 08:29:55 -0700366 self.version = version
Dan Albert8b72aef2015-03-23 19:13:21 -0700367 self.transfers = []
368 self.src_basenames = {}
369 self.src_numpatterns = {}
Tao Baob4cfca52016-02-04 14:26:02 -0800370 self._max_stashed_size = 0
Tao Baod522bdc2016-04-12 15:53:16 -0700371 self.touched_src_ranges = RangeSet()
372 self.touched_src_sha1 = None
Tao Bao293fd132016-06-11 12:19:23 -0700373 self.disable_imgdiff = disable_imgdiff
Tao Bao294651d2018-02-08 23:21:52 -0800374 self.imgdiff_stats = ImgdiffStats() if not disable_imgdiff else None
Doug Zongker62338182014-09-08 08:29:55 -0700375
Tao Bao8fad03e2017-03-01 14:36:26 -0800376 assert version in (3, 4)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700377
378 self.tgt = tgt
379 if src is None:
380 src = EmptyImage()
381 self.src = src
382
383 # The updater code that installs the patch always uses 4k blocks.
384 assert tgt.blocksize == 4096
385 assert src.blocksize == 4096
386
387 # The range sets in each filemap should comprise a partition of
388 # the care map.
389 self.AssertPartition(src.care_map, src.file_map.values())
390 self.AssertPartition(tgt.care_map, tgt.file_map.values())
391
Tao Baob4cfca52016-02-04 14:26:02 -0800392 @property
393 def max_stashed_size(self):
394 return self._max_stashed_size
395
Tao Baocb73aed2018-01-31 17:32:40 -0800396 @staticmethod
397 def FileTypeSupportedByImgdiff(filename):
398 """Returns whether the file type is supported by imgdiff."""
399 return filename.lower().endswith(('.apk', '.jar', '.zip'))
400
Tao Bao294651d2018-02-08 23:21:52 -0800401 def CanUseImgdiff(self, name, tgt_ranges, src_ranges, large_apk=False):
Tao Baocb73aed2018-01-31 17:32:40 -0800402 """Checks whether we can apply imgdiff for the given RangeSets.
403
404 For files in ZIP format (e.g., APKs, JARs, etc.) we would like to use
405 'imgdiff -z' if possible. Because it usually produces significantly smaller
406 patches than bsdiff.
407
408 This is permissible if all of the following conditions hold.
409 - The imgdiff hasn't been disabled by the caller (e.g. squashfs);
410 - The file type is supported by imgdiff;
411 - The source and target blocks are monotonic (i.e. the data is stored with
412 blocks in increasing order);
Tao Baoe709b092018-02-07 12:40:00 -0800413 - Both files don't contain shared blocks;
Tao Bao4ccea852018-02-06 15:16:41 -0800414 - Both files have complete lists of blocks;
Tao Baocb73aed2018-01-31 17:32:40 -0800415 - We haven't removed any blocks from the source set.
416
417 If all these conditions are satisfied, concatenating all the blocks in the
418 RangeSet in order will produce a valid ZIP file (plus possibly extra zeros
419 in the last block). imgdiff is fine with extra zeros at the end of the file.
420
421 Args:
422 name: The filename to be diff'd.
423 tgt_ranges: The target RangeSet.
424 src_ranges: The source RangeSet.
Tao Bao294651d2018-02-08 23:21:52 -0800425 large_apk: Whether this is to split a large APK.
Tao Baocb73aed2018-01-31 17:32:40 -0800426
427 Returns:
428 A boolean result.
429 """
Tao Bao508b0872018-02-09 13:44:43 -0800430 if self.disable_imgdiff or not self.FileTypeSupportedByImgdiff(name):
Tao Bao294651d2018-02-08 23:21:52 -0800431 return False
432
433 if not tgt_ranges.monotonic or not src_ranges.monotonic:
434 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_NONMONOTONIC)
435 return False
436
Tao Baoe709b092018-02-07 12:40:00 -0800437 if (tgt_ranges.extra.get('uses_shared_blocks') or
438 src_ranges.extra.get('uses_shared_blocks')):
439 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_SHARED_BLOCKS)
440 return False
441
Tao Bao4ccea852018-02-06 15:16:41 -0800442 if tgt_ranges.extra.get('incomplete') or src_ranges.extra.get('incomplete'):
443 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_INCOMPLETE)
444 return False
445
Tao Bao294651d2018-02-08 23:21:52 -0800446 reason = (ImgdiffStats.USED_IMGDIFF_LARGE_APK if large_apk
447 else ImgdiffStats.USED_IMGDIFF)
448 self.imgdiff_stats.Log(name, reason)
449 return True
Tao Baocb73aed2018-01-31 17:32:40 -0800450
Doug Zongkerfc44a512014-08-26 13:10:25 -0700451 def Compute(self, prefix):
452 # When looking for a source file to use as the diff input for a
453 # target file, we try:
454 # 1) an exact path match if available, otherwise
455 # 2) a exact basename match if available, otherwise
456 # 3) a basename match after all runs of digits are replaced by
457 # "#" if available, otherwise
458 # 4) we have no source for this target.
459 self.AbbreviateSourceNames()
460 self.FindTransfers()
461
462 # Find the ordering dependencies among transfers (this is O(n^2)
463 # in the number of transfers).
464 self.GenerateDigraph()
465 # Find a sequence of transfers that satisfies as many ordering
466 # dependencies as possible (heuristically).
467 self.FindVertexSequence()
468 # Fix up the ordering dependencies that the sequence didn't
469 # satisfy.
Tao Bao8fad03e2017-03-01 14:36:26 -0800470 self.ReverseBackwardEdges()
471 self.ImproveVertexSequence()
Doug Zongker62338182014-09-08 08:29:55 -0700472
Tao Bao82c47982015-08-17 09:45:13 -0700473 # Ensure the runtime stash size is under the limit.
Tao Bao8fad03e2017-03-01 14:36:26 -0800474 if common.OPTIONS.cache_size is not None:
Tao Bao82c47982015-08-17 09:45:13 -0700475 self.ReviseStashSize()
476
Doug Zongkerfc44a512014-08-26 13:10:25 -0700477 # Double-check our work.
478 self.AssertSequenceGood()
Tianjie Xu8a7ed9f2018-01-23 14:06:11 -0800479 self.AssertSha1Good()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700480
481 self.ComputePatches(prefix)
482 self.WriteTransfers(prefix)
483
Tao Bao294651d2018-02-08 23:21:52 -0800484 # Report the imgdiff stats.
485 if common.OPTIONS.verbose and not self.disable_imgdiff:
486 self.imgdiff_stats.Report()
487
Doug Zongkerfc44a512014-08-26 13:10:25 -0700488 def WriteTransfers(self, prefix):
Tianjie Xu37e29302016-06-23 16:10:35 -0700489 def WriteSplitTransfers(out, style, target_blocks):
490 """Limit the size of operand in command 'new' and 'zero' to 1024 blocks.
Tianjie Xubb848c52016-06-21 15:54:09 -0700491
492 This prevents the target size of one command from being too large; and
493 might help to avoid fsync errors on some devices."""
494
Tao Bao3a2e3502016-12-28 09:14:39 -0800495 assert style == "new" or style == "zero"
Tianjie Xu37e29302016-06-23 16:10:35 -0700496 blocks_limit = 1024
Tianjie Xubb848c52016-06-21 15:54:09 -0700497 total = 0
Tianjie Xu37e29302016-06-23 16:10:35 -0700498 while target_blocks:
499 blocks_to_write = target_blocks.first(blocks_limit)
500 out.append("%s %s\n" % (style, blocks_to_write.to_string_raw()))
501 total += blocks_to_write.size()
502 target_blocks = target_blocks.subtract(blocks_to_write)
Tianjie Xubb848c52016-06-21 15:54:09 -0700503 return total
504
Doug Zongkerfc44a512014-08-26 13:10:25 -0700505 out = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700506 total = 0
Doug Zongkerfc44a512014-08-26 13:10:25 -0700507
Tao Bao3a2e3502016-12-28 09:14:39 -0800508 # In BBOTA v3+, it uses the hash of the stashed blocks as the stash slot
509 # id. 'stashes' records the map from 'hash' to the ref count. The stash
510 # will be freed only if the count decrements to zero.
Doug Zongker62338182014-09-08 08:29:55 -0700511 stashes = {}
512 stashed_blocks = 0
513 max_stashed_blocks = 0
514
Doug Zongkerfc44a512014-08-26 13:10:25 -0700515 for xf in self.transfers:
516
Tao Bao8fad03e2017-03-01 14:36:26 -0800517 for _, sr in xf.stash_before:
518 sh = self.src.RangeSha1(sr)
519 if sh in stashes:
520 stashes[sh] += 1
Sami Tolvanendd67a292014-12-09 16:40:34 +0000521 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800522 stashes[sh] = 1
523 stashed_blocks += sr.size()
524 self.touched_src_ranges = self.touched_src_ranges.union(sr)
525 out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
Doug Zongker62338182014-09-08 08:29:55 -0700526
527 if stashed_blocks > max_stashed_blocks:
528 max_stashed_blocks = stashed_blocks
529
Jesse Zhao7b985f62015-03-02 16:53:08 -0800530 free_string = []
caozhiyuan21b37d82015-10-21 15:14:03 +0800531 free_size = 0
Jesse Zhao7b985f62015-03-02 16:53:08 -0800532
Tao Bao8fad03e2017-03-01 14:36:26 -0800533 # <# blocks> <src ranges>
534 # OR
535 # <# blocks> <src ranges> <src locs> <stash refs...>
536 # OR
537 # <# blocks> - <stash refs...>
Doug Zongker62338182014-09-08 08:29:55 -0700538
Tao Bao8fad03e2017-03-01 14:36:26 -0800539 size = xf.src_ranges.size()
Tao Bao508b0872018-02-09 13:44:43 -0800540 src_str_buffer = [str(size)]
Doug Zongker62338182014-09-08 08:29:55 -0700541
Tao Bao8fad03e2017-03-01 14:36:26 -0800542 unstashed_src_ranges = xf.src_ranges
543 mapped_stashes = []
544 for _, sr in xf.use_stash:
545 unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
546 sh = self.src.RangeSha1(sr)
547 sr = xf.src_ranges.map_within(sr)
548 mapped_stashes.append(sr)
549 assert sh in stashes
Tao Bao508b0872018-02-09 13:44:43 -0800550 src_str_buffer.append("%s:%s" % (sh, sr.to_string_raw()))
Tao Bao8fad03e2017-03-01 14:36:26 -0800551 stashes[sh] -= 1
552 if stashes[sh] == 0:
553 free_string.append("free %s\n" % (sh,))
554 free_size += sr.size()
555 stashes.pop(sh)
Doug Zongker62338182014-09-08 08:29:55 -0700556
Tao Bao8fad03e2017-03-01 14:36:26 -0800557 if unstashed_src_ranges:
Tao Bao508b0872018-02-09 13:44:43 -0800558 src_str_buffer.insert(1, unstashed_src_ranges.to_string_raw())
Tao Bao8fad03e2017-03-01 14:36:26 -0800559 if xf.use_stash:
560 mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
Tao Bao508b0872018-02-09 13:44:43 -0800561 src_str_buffer.insert(2, mapped_unstashed.to_string_raw())
Tao Bao8fad03e2017-03-01 14:36:26 -0800562 mapped_stashes.append(mapped_unstashed)
Doug Zongker62338182014-09-08 08:29:55 -0700563 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Tao Bao8fad03e2017-03-01 14:36:26 -0800564 else:
Tao Bao508b0872018-02-09 13:44:43 -0800565 src_str_buffer.insert(1, "-")
Tao Bao8fad03e2017-03-01 14:36:26 -0800566 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Doug Zongker62338182014-09-08 08:29:55 -0700567
Tao Bao508b0872018-02-09 13:44:43 -0800568 src_str = " ".join(src_str_buffer)
Doug Zongker62338182014-09-08 08:29:55 -0700569
Tao Bao8fad03e2017-03-01 14:36:26 -0800570 # version 3+:
Doug Zongker62338182014-09-08 08:29:55 -0700571 # zero <rangeset>
572 # new <rangeset>
573 # erase <rangeset>
Dan Albert8b72aef2015-03-23 19:13:21 -0700574 # bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
575 # imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
576 # move hash <tgt rangeset> <src_str>
Doug Zongkerfc44a512014-08-26 13:10:25 -0700577
578 tgt_size = xf.tgt_ranges.size()
579
580 if xf.style == "new":
581 assert xf.tgt_ranges
Tianjie Xu37e29302016-06-23 16:10:35 -0700582 assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700583 total += tgt_size
584 elif xf.style == "move":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700585 assert xf.tgt_ranges
586 assert xf.src_ranges.size() == tgt_size
587 if xf.src_ranges != xf.tgt_ranges:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100588 # take into account automatic stashing of overlapping blocks
589 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700590 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100591 if temp_stash_usage > max_stashed_blocks:
592 max_stashed_blocks = temp_stash_usage
593
Tao Baod522bdc2016-04-12 15:53:16 -0700594 self.touched_src_ranges = self.touched_src_ranges.union(
595 xf.src_ranges)
596
Tao Bao8fad03e2017-03-01 14:36:26 -0800597 out.append("%s %s %s %s\n" % (
Sami Tolvanendd67a292014-12-09 16:40:34 +0000598 xf.style,
Tao Bao183e56e2017-03-05 17:05:09 -0800599 xf.tgt_sha1,
Dan Albert8b72aef2015-03-23 19:13:21 -0700600 xf.tgt_ranges.to_string_raw(), src_str))
Tao Bao8fad03e2017-03-01 14:36:26 -0800601 total += tgt_size
602 elif xf.style in ("bsdiff", "imgdiff"):
603 assert xf.tgt_ranges
604 assert xf.src_ranges
605 # take into account automatic stashing of overlapping blocks
606 if xf.src_ranges.overlaps(xf.tgt_ranges):
607 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
608 if temp_stash_usage > max_stashed_blocks:
609 max_stashed_blocks = temp_stash_usage
610
611 self.touched_src_ranges = self.touched_src_ranges.union(xf.src_ranges)
612
613 out.append("%s %d %d %s %s %s %s\n" % (
614 xf.style,
615 xf.patch_start, xf.patch_len,
616 xf.src_sha1,
617 xf.tgt_sha1,
618 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700619 total += tgt_size
620 elif xf.style == "zero":
621 assert xf.tgt_ranges
622 to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
Tianjie Xu37e29302016-06-23 16:10:35 -0700623 assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size()
Tianjie Xubb848c52016-06-21 15:54:09 -0700624 total += to_zero.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700625 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700626 raise ValueError("unknown transfer style '%s'\n" % xf.style)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700627
Sami Tolvanendd67a292014-12-09 16:40:34 +0000628 if free_string:
629 out.append("".join(free_string))
caozhiyuan21b37d82015-10-21 15:14:03 +0800630 stashed_blocks -= free_size
Sami Tolvanendd67a292014-12-09 16:40:34 +0000631
Tao Bao8fad03e2017-03-01 14:36:26 -0800632 if common.OPTIONS.cache_size is not None:
Tao Bao8dcf7382015-05-21 14:09:49 -0700633 # Sanity check: abort if we're going to need more stash space than
634 # the allowed size (cache_size * threshold). There are two purposes
635 # of having a threshold here. a) Part of the cache may have been
636 # occupied by some recovery logs. b) It will buy us some time to deal
637 # with the oversize issue.
638 cache_size = common.OPTIONS.cache_size
639 stash_threshold = common.OPTIONS.stash_threshold
640 max_allowed = cache_size * stash_threshold
Tao Baoe8c68a02017-02-26 10:48:11 -0800641 assert max_stashed_blocks * self.tgt.blocksize <= max_allowed, \
Tao Bao8dcf7382015-05-21 14:09:49 -0700642 'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
643 max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
644 self.tgt.blocksize, max_allowed, cache_size,
645 stash_threshold)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700646
Tao Bao8fad03e2017-03-01 14:36:26 -0800647 self.touched_src_sha1 = self.src.RangeSha1(self.touched_src_ranges)
Tao Baod522bdc2016-04-12 15:53:16 -0700648
Tianjie Xu67c7cbb2018-08-30 00:32:07 -0700649 if self.tgt.hashtree_info:
650 out.append("compute_hash_tree {} {} {} {} {}\n".format(
651 self.tgt.hashtree_info.hashtree_range.to_string_raw(),
652 self.tgt.hashtree_info.filesystem_range.to_string_raw(),
653 self.tgt.hashtree_info.hash_algorithm,
654 self.tgt.hashtree_info.salt,
655 self.tgt.hashtree_info.root_hash))
656
Tao Baoe9b61912015-07-09 17:37:49 -0700657 # Zero out extended blocks as a workaround for bug 20881595.
658 if self.tgt.extended:
Tianjie Xu37e29302016-06-23 16:10:35 -0700659 assert (WriteSplitTransfers(out, "zero", self.tgt.extended) ==
Tianjie Xubb848c52016-06-21 15:54:09 -0700660 self.tgt.extended.size())
Tao Baob32d56e2015-09-09 11:55:01 -0700661 total += self.tgt.extended.size()
Tao Baoe9b61912015-07-09 17:37:49 -0700662
663 # We erase all the blocks on the partition that a) don't contain useful
Tao Bao66f1fa62016-05-03 10:02:01 -0700664 # data in the new image; b) will not be touched by dm-verity. Out of those
665 # blocks, we erase the ones that won't be used in this update at the
666 # beginning of an update. The rest would be erased at the end. This is to
667 # work around the eMMC issue observed on some devices, which may otherwise
668 # get starving for clean blocks and thus fail the update. (b/28347095)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700669 all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
Tao Baoe9b61912015-07-09 17:37:49 -0700670 all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
671 new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
Tao Bao66f1fa62016-05-03 10:02:01 -0700672
673 erase_first = new_dontcare.subtract(self.touched_src_ranges)
674 if erase_first:
675 out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))
676
677 erase_last = new_dontcare.subtract(erase_first)
678 if erase_last:
679 out.append("erase %s\n" % (erase_last.to_string_raw(),))
Doug Zongkere985f6f2014-09-09 12:38:47 -0700680
681 out.insert(0, "%d\n" % (self.version,)) # format version number
Tao Baob32d56e2015-09-09 11:55:01 -0700682 out.insert(1, "%d\n" % (total,))
Tao Bao8fad03e2017-03-01 14:36:26 -0800683 # v3+: the number of stash slots is unused.
684 out.insert(2, "0\n")
685 out.insert(3, str(max_stashed_blocks) + "\n")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700686
687 with open(prefix + ".transfer.list", "wb") as f:
688 for i in out:
689 f.write(i)
690
Tao Bao8fad03e2017-03-01 14:36:26 -0800691 self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize
692 OPTIONS = common.OPTIONS
693 if OPTIONS.cache_size is not None:
694 max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
695 print("max stashed blocks: %d (%d bytes), "
696 "limit: %d bytes (%.2f%%)\n" % (
Tao Bao508b0872018-02-09 13:44:43 -0800697 max_stashed_blocks, self._max_stashed_size, max_allowed,
698 self._max_stashed_size * 100.0 / max_allowed))
Tao Bao8fad03e2017-03-01 14:36:26 -0800699 else:
700 print("max stashed blocks: %d (%d bytes), limit: <unknown>\n" % (
Tao Bao508b0872018-02-09 13:44:43 -0800701 max_stashed_blocks, self._max_stashed_size))
Doug Zongker62338182014-09-08 08:29:55 -0700702
Tao Bao82c47982015-08-17 09:45:13 -0700703 def ReviseStashSize(self):
704 print("Revising stash size...")
Tao Baoe27acfd2016-12-16 11:13:55 -0800705 stash_map = {}
Tao Bao82c47982015-08-17 09:45:13 -0700706
707 # Create the map between a stash and its def/use points. For example, for a
Tao Bao3c5a16d2017-02-13 11:42:50 -0800708 # given stash of (raw_id, sr), stash_map[raw_id] = (sr, def_cmd, use_cmd).
Tao Bao82c47982015-08-17 09:45:13 -0700709 for xf in self.transfers:
710 # Command xf defines (stores) all the stashes in stash_before.
Tao Bao3a2e3502016-12-28 09:14:39 -0800711 for stash_raw_id, sr in xf.stash_before:
712 stash_map[stash_raw_id] = (sr, xf)
Tao Bao82c47982015-08-17 09:45:13 -0700713
714 # Record all the stashes command xf uses.
Tao Bao3a2e3502016-12-28 09:14:39 -0800715 for stash_raw_id, _ in xf.use_stash:
716 stash_map[stash_raw_id] += (xf,)
Tao Bao82c47982015-08-17 09:45:13 -0700717
718 # Compute the maximum blocks available for stash based on /cache size and
719 # the threshold.
720 cache_size = common.OPTIONS.cache_size
721 stash_threshold = common.OPTIONS.stash_threshold
722 max_allowed = cache_size * stash_threshold / self.tgt.blocksize
723
Tao Bao3a2e3502016-12-28 09:14:39 -0800724 # See the comments for 'stashes' in WriteTransfers().
Tao Baoe27acfd2016-12-16 11:13:55 -0800725 stashes = {}
Tao Bao82c47982015-08-17 09:45:13 -0700726 stashed_blocks = 0
Tao Bao9a5caf22015-08-25 15:10:10 -0700727 new_blocks = 0
Tao Bao82c47982015-08-17 09:45:13 -0700728
729 # Now go through all the commands. Compute the required stash size on the
730 # fly. If a command requires excess stash than available, it deletes the
731 # stash by replacing the command that uses the stash with a "new" command
732 # instead.
733 for xf in self.transfers:
734 replaced_cmds = []
735
736 # xf.stash_before generates explicit stash commands.
Tao Bao3a2e3502016-12-28 09:14:39 -0800737 for stash_raw_id, sr in xf.stash_before:
Tao Baoe27acfd2016-12-16 11:13:55 -0800738 # Check the post-command stashed_blocks.
739 stashed_blocks_after = stashed_blocks
Tao Bao8fad03e2017-03-01 14:36:26 -0800740 sh = self.src.RangeSha1(sr)
741 if sh not in stashes:
Tao Baoe27acfd2016-12-16 11:13:55 -0800742 stashed_blocks_after += sr.size()
Tao Baoe27acfd2016-12-16 11:13:55 -0800743
744 if stashed_blocks_after > max_allowed:
Tao Bao82c47982015-08-17 09:45:13 -0700745 # We cannot stash this one for a later command. Find out the command
746 # that will use this stash and replace the command with "new".
Tao Bao3a2e3502016-12-28 09:14:39 -0800747 use_cmd = stash_map[stash_raw_id][2]
Tao Bao82c47982015-08-17 09:45:13 -0700748 replaced_cmds.append(use_cmd)
Tao Bao9a5caf22015-08-25 15:10:10 -0700749 print("%10d %9s %s" % (sr.size(), "explicit", use_cmd))
Tao Bao82c47982015-08-17 09:45:13 -0700750 else:
Tao Bao3c5a16d2017-02-13 11:42:50 -0800751 # Update the stashes map.
Tao Bao8fad03e2017-03-01 14:36:26 -0800752 if sh in stashes:
753 stashes[sh] += 1
Tao Bao3c5a16d2017-02-13 11:42:50 -0800754 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800755 stashes[sh] = 1
Tao Baoe27acfd2016-12-16 11:13:55 -0800756 stashed_blocks = stashed_blocks_after
Tao Bao82c47982015-08-17 09:45:13 -0700757
758 # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
759 # ComputePatches(), they both have the style of "diff".
Tao Bao8fad03e2017-03-01 14:36:26 -0800760 if xf.style == "diff":
Tao Bao82c47982015-08-17 09:45:13 -0700761 assert xf.tgt_ranges and xf.src_ranges
762 if xf.src_ranges.overlaps(xf.tgt_ranges):
763 if stashed_blocks + xf.src_ranges.size() > max_allowed:
764 replaced_cmds.append(xf)
Tao Bao9a5caf22015-08-25 15:10:10 -0700765 print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf))
Tao Bao82c47982015-08-17 09:45:13 -0700766
767 # Replace the commands in replaced_cmds with "new"s.
768 for cmd in replaced_cmds:
769 # It no longer uses any commands in "use_stash". Remove the def points
770 # for all those stashes.
Tao Bao3a2e3502016-12-28 09:14:39 -0800771 for stash_raw_id, sr in cmd.use_stash:
772 def_cmd = stash_map[stash_raw_id][1]
773 assert (stash_raw_id, sr) in def_cmd.stash_before
774 def_cmd.stash_before.remove((stash_raw_id, sr))
Tao Bao82c47982015-08-17 09:45:13 -0700775
Tianjie Xuebe39a02016-01-14 14:12:26 -0800776 # Add up blocks that violates space limit and print total number to
777 # screen later.
778 new_blocks += cmd.tgt_ranges.size()
Tao Bao82c47982015-08-17 09:45:13 -0700779 cmd.ConvertToNew()
780
Tao Bao3a2e3502016-12-28 09:14:39 -0800781 # xf.use_stash may generate free commands.
Tao Bao8fad03e2017-03-01 14:36:26 -0800782 for _, sr in xf.use_stash:
783 sh = self.src.RangeSha1(sr)
784 assert sh in stashes
785 stashes[sh] -= 1
786 if stashes[sh] == 0:
Tao Baoe27acfd2016-12-16 11:13:55 -0800787 stashed_blocks -= sr.size()
Tao Bao8fad03e2017-03-01 14:36:26 -0800788 stashes.pop(sh)
Tao Baoe27acfd2016-12-16 11:13:55 -0800789
Tianjie Xuebe39a02016-01-14 14:12:26 -0800790 num_of_bytes = new_blocks * self.tgt.blocksize
791 print(" Total %d blocks (%d bytes) are packed as new blocks due to "
792 "insufficient cache size." % (new_blocks, num_of_bytes))
Tao Bao304ee272016-12-19 11:01:38 -0800793 return new_blocks
Tao Bao9a5caf22015-08-25 15:10:10 -0700794
Doug Zongkerfc44a512014-08-26 13:10:25 -0700795 def ComputePatches(self, prefix):
796 print("Reticulating splines...")
Tao Bao183e56e2017-03-05 17:05:09 -0800797 diff_queue = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700798 patch_num = 0
799 with open(prefix + ".new.dat", "wb") as new_f:
Tao Bao183e56e2017-03-05 17:05:09 -0800800 for index, xf in enumerate(self.transfers):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700801 if xf.style == "zero":
Tao Bao08c85832016-09-19 22:26:30 -0700802 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
803 print("%10d %10d (%6.2f%%) %7s %s %s" % (
804 tgt_size, tgt_size, 100.0, xf.style, xf.tgt_name,
805 str(xf.tgt_ranges)))
806
Doug Zongkerfc44a512014-08-26 13:10:25 -0700807 elif xf.style == "new":
Tao Bao183e56e2017-03-05 17:05:09 -0800808 self.tgt.WriteRangeDataToFd(xf.tgt_ranges, new_f)
Tao Bao08c85832016-09-19 22:26:30 -0700809 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
810 print("%10d %10d (%6.2f%%) %7s %s %s" % (
811 tgt_size, tgt_size, 100.0, xf.style,
812 xf.tgt_name, str(xf.tgt_ranges)))
813
Doug Zongkerfc44a512014-08-26 13:10:25 -0700814 elif xf.style == "diff":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700815 # We can't compare src and tgt directly because they may have
816 # the same content but be broken up into blocks differently, eg:
817 #
818 # ["he", "llo"] vs ["h", "ello"]
819 #
820 # We want those to compare equal, ideally without having to
821 # actually concatenate the strings (these may be tens of
822 # megabytes).
Tao Bao183e56e2017-03-05 17:05:09 -0800823 if xf.src_sha1 == xf.tgt_sha1:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700824 # These are identical; we don't need to generate a patch,
825 # just issue copy commands on the device.
826 xf.style = "move"
Tianjie Xu25366072017-09-08 17:19:02 -0700827 xf.patch = None
Tao Bao183e56e2017-03-05 17:05:09 -0800828 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao08c85832016-09-19 22:26:30 -0700829 if xf.src_ranges != xf.tgt_ranges:
830 print("%10d %10d (%6.2f%%) %7s %s %s (from %s)" % (
831 tgt_size, tgt_size, 100.0, xf.style,
832 xf.tgt_name if xf.tgt_name == xf.src_name else (
833 xf.tgt_name + " (from " + xf.src_name + ")"),
834 str(xf.tgt_ranges), str(xf.src_ranges)))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700835 else:
Tianjie Xu25366072017-09-08 17:19:02 -0700836 if xf.patch:
Tao Bao5bab0dd2018-07-10 17:44:52 -0700837 # We have already generated the patch with imgdiff, while
838 # splitting large APKs (i.e. in FindTransfers()).
Tianjie Xu25366072017-09-08 17:19:02 -0700839 assert not self.disable_imgdiff
840 imgdiff = True
Tianjie Xu25366072017-09-08 17:19:02 -0700841 else:
Tao Baocb73aed2018-01-31 17:32:40 -0800842 imgdiff = self.CanUseImgdiff(
843 xf.tgt_name, xf.tgt_ranges, xf.src_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700844 xf.style = "imgdiff" if imgdiff else "bsdiff"
Tao Bao183e56e2017-03-05 17:05:09 -0800845 diff_queue.append((index, imgdiff, patch_num))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700846 patch_num += 1
847
848 else:
849 assert False, "unknown style " + xf.style
850
Tao Bao183e56e2017-03-05 17:05:09 -0800851 if diff_queue:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700852 if self.threads > 1:
853 print("Computing patches (using %d threads)..." % (self.threads,))
854 else:
855 print("Computing patches...")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700856
Tao Bao183e56e2017-03-05 17:05:09 -0800857 diff_total = len(diff_queue)
858 patches = [None] * diff_total
Tianjie Xub59c17f2016-10-28 17:55:53 -0700859 error_messages = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700860
Tao Bao183e56e2017-03-05 17:05:09 -0800861 # Using multiprocessing doesn't give additional benefits, due to the
862 # pattern of the code. The diffing work is done by subprocess.call, which
863 # already runs in a separate process (not affected much by the GIL -
864 # Global Interpreter Lock). Using multiprocess also requires either a)
865 # writing the diff input files in the main process before forking, or b)
866 # reopening the image file (SparseImage) in the worker processes. Doing
867 # neither of them further improves the performance.
Doug Zongkerfc44a512014-08-26 13:10:25 -0700868 lock = threading.Lock()
869 def diff_worker():
870 while True:
871 with lock:
Tao Bao183e56e2017-03-05 17:05:09 -0800872 if not diff_queue:
Dan Albert8b72aef2015-03-23 19:13:21 -0700873 return
Tao Bao183e56e2017-03-05 17:05:09 -0800874 xf_index, imgdiff, patch_index = diff_queue.pop()
Tao Bao97395142018-02-12 12:08:05 -0800875 xf = self.transfers[xf_index]
Tao Bao183e56e2017-03-05 17:05:09 -0800876
Tao Bao97395142018-02-12 12:08:05 -0800877 if sys.stdout.isatty():
878 diff_left = len(diff_queue)
879 progress = (diff_total - diff_left) * 100 / diff_total
880 # '\033[K' is to clear to EOL.
881 print(' [%3d%%] %s\033[K' % (progress, xf.tgt_name), end='\r')
882 sys.stdout.flush()
883
Tianjie Xu25366072017-09-08 17:19:02 -0700884 patch = xf.patch
885 if not patch:
886 src_ranges = xf.src_ranges
887 tgt_ranges = xf.tgt_ranges
Tao Bao183e56e2017-03-05 17:05:09 -0800888
Tianjie Xudf1166e2018-01-27 17:35:41 -0800889 src_file = common.MakeTempFile(prefix="src-")
890 with open(src_file, "wb") as fd:
891 self.src.WriteRangeDataToFd(src_ranges, fd)
Tianjie Xu25366072017-09-08 17:19:02 -0700892
Tianjie Xudf1166e2018-01-27 17:35:41 -0800893 tgt_file = common.MakeTempFile(prefix="tgt-")
894 with open(tgt_file, "wb") as fd:
895 self.tgt.WriteRangeDataToFd(tgt_ranges, fd)
Tianjie Xu25366072017-09-08 17:19:02 -0700896
897 message = []
898 try:
899 patch = compute_patch(src_file, tgt_file, imgdiff)
900 except ValueError as e:
901 message.append(
902 "Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (
Tao Bao508b0872018-02-09 13:44:43 -0800903 "imgdiff" if imgdiff else "bsdiff",
904 xf.tgt_name if xf.tgt_name == xf.src_name else
Tianjie Xu25366072017-09-08 17:19:02 -0700905 xf.tgt_name + " (from " + xf.src_name + ")",
Tao Bao508b0872018-02-09 13:44:43 -0800906 xf.tgt_ranges, xf.src_ranges, e.message))
Tianjie Xu25366072017-09-08 17:19:02 -0700907 if message:
908 with lock:
909 error_messages.extend(message)
Tao Bao183e56e2017-03-05 17:05:09 -0800910
911 with lock:
912 patches[patch_index] = (xf_index, patch)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700913
914 threads = [threading.Thread(target=diff_worker)
Dan Albert8b72aef2015-03-23 19:13:21 -0700915 for _ in range(self.threads)]
Doug Zongkerfc44a512014-08-26 13:10:25 -0700916 for th in threads:
917 th.start()
918 while threads:
919 threads.pop().join()
Tao Bao183e56e2017-03-05 17:05:09 -0800920
921 if sys.stdout.isatty():
922 print('\n')
Tianjie Xub59c17f2016-10-28 17:55:53 -0700923
924 if error_messages:
Tao Baob937ead2017-10-19 16:51:53 -0700925 print('ERROR:')
Tianjie Xub59c17f2016-10-28 17:55:53 -0700926 print('\n'.join(error_messages))
Tao Baob937ead2017-10-19 16:51:53 -0700927 print('\n\n\n')
Tianjie Xub59c17f2016-10-28 17:55:53 -0700928 sys.exit(1)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700929 else:
930 patches = []
931
Tao Bao183e56e2017-03-05 17:05:09 -0800932 offset = 0
933 with open(prefix + ".patch.dat", "wb") as patch_fd:
934 for index, patch in patches:
935 xf = self.transfers[index]
Doug Zongkerfc44a512014-08-26 13:10:25 -0700936 xf.patch_len = len(patch)
Tao Bao183e56e2017-03-05 17:05:09 -0800937 xf.patch_start = offset
938 offset += xf.patch_len
939 patch_fd.write(patch)
940
941 if common.OPTIONS.verbose:
942 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
943 print("%10d %10d (%6.2f%%) %7s %s %s %s" % (
Tao Bao508b0872018-02-09 13:44:43 -0800944 xf.patch_len, tgt_size, xf.patch_len * 100.0 / tgt_size,
945 xf.style,
946 xf.tgt_name if xf.tgt_name == xf.src_name else (
947 xf.tgt_name + " (from " + xf.src_name + ")"),
948 xf.tgt_ranges, xf.src_ranges))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700949
Tianjie Xu8a7ed9f2018-01-23 14:06:11 -0800950 def AssertSha1Good(self):
951 """Check the SHA-1 of the src & tgt blocks in the transfer list.
952
953 Double check the SHA-1 value to avoid the issue in b/71908713, where
954 SparseImage.RangeSha1() messed up with the hash calculation in multi-thread
955 environment. That specific problem has been fixed by protecting the
956 underlying generator function 'SparseImage._GetRangeData()' with lock.
957 """
958 for xf in self.transfers:
959 tgt_sha1 = self.tgt.RangeSha1(xf.tgt_ranges)
960 assert xf.tgt_sha1 == tgt_sha1
961 if xf.style == "diff":
962 src_sha1 = self.src.RangeSha1(xf.src_ranges)
963 assert xf.src_sha1 == src_sha1
964
Doug Zongkerfc44a512014-08-26 13:10:25 -0700965 def AssertSequenceGood(self):
966 # Simulate the sequences of transfers we will output, and check that:
967 # - we never read a block after writing it, and
968 # - we write every block we care about exactly once.
969
970 # Start with no blocks having been touched yet.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800971 touched = array.array("B", "\0" * self.tgt.total_blocks)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700972
973 # Imagine processing the transfers in order.
974 for xf in self.transfers:
975 # Check that the input blocks for this transfer haven't yet been touched.
Doug Zongker62338182014-09-08 08:29:55 -0700976
977 x = xf.src_ranges
Tao Bao8fad03e2017-03-01 14:36:26 -0800978 for _, sr in xf.use_stash:
979 x = x.subtract(sr)
Doug Zongker62338182014-09-08 08:29:55 -0700980
Doug Zongker6ab2a502016-02-09 08:28:09 -0800981 for s, e in x:
Tao Baoff75c232016-03-04 15:23:34 -0800982 # Source image could be larger. Don't check the blocks that are in the
983 # source image only. Since they are not in 'touched', and won't ever
984 # be touched.
985 for i in range(s, min(e, self.tgt.total_blocks)):
Doug Zongker6ab2a502016-02-09 08:28:09 -0800986 assert touched[i] == 0
987
988 # Check that the output blocks for this transfer haven't yet
989 # been touched, and touch all the blocks written by this
990 # transfer.
991 for s, e in xf.tgt_ranges:
992 for i in range(s, e):
993 assert touched[i] == 0
994 touched[i] = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700995
Tianjie Xu67c7cbb2018-08-30 00:32:07 -0700996 if self.tgt.hashtree_info:
997 for s, e in self.tgt.hashtree_info.hashtree_range:
998 for i in range(s, e):
999 assert touched[i] == 0
1000 touched[i] = 1
1001
Doug Zongkerfc44a512014-08-26 13:10:25 -07001002 # Check that we've written every target block.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001003 for s, e in self.tgt.care_map:
1004 for i in range(s, e):
1005 assert touched[i] == 1
Doug Zongkerfc44a512014-08-26 13:10:25 -07001006
Doug Zongker62338182014-09-08 08:29:55 -07001007 def ImproveVertexSequence(self):
1008 print("Improving vertex order...")
1009
1010 # At this point our digraph is acyclic; we reversed any edges that
1011 # were backwards in the heuristically-generated sequence. The
1012 # previously-generated order is still acceptable, but we hope to
1013 # find a better order that needs less memory for stashed data.
1014 # Now we do a topological sort to generate a new vertex order,
1015 # using a greedy algorithm to choose which vertex goes next
1016 # whenever we have a choice.
1017
1018 # Make a copy of the edge set; this copy will get destroyed by the
1019 # algorithm.
1020 for xf in self.transfers:
1021 xf.incoming = xf.goes_after.copy()
1022 xf.outgoing = xf.goes_before.copy()
1023
1024 L = [] # the new vertex order
1025
1026 # S is the set of sources in the remaining graph; we always choose
1027 # the one that leaves the least amount of stashed data after it's
1028 # executed.
1029 S = [(u.NetStashChange(), u.order, u) for u in self.transfers
1030 if not u.incoming]
1031 heapq.heapify(S)
1032
1033 while S:
1034 _, _, xf = heapq.heappop(S)
1035 L.append(xf)
1036 for u in xf.outgoing:
1037 del u.incoming[xf]
1038 if not u.incoming:
1039 heapq.heappush(S, (u.NetStashChange(), u.order, u))
1040
1041 # if this fails then our graph had a cycle.
1042 assert len(L) == len(self.transfers)
1043
1044 self.transfers = L
1045 for i, xf in enumerate(L):
1046 xf.order = i
1047
Doug Zongker62338182014-09-08 08:29:55 -07001048 def ReverseBackwardEdges(self):
Tao Bao3a2e3502016-12-28 09:14:39 -08001049 """Reverse unsatisfying edges and compute pairs of stashed blocks.
1050
1051 For each transfer, make sure it properly stashes the blocks it touches and
1052 will be used by later transfers. It uses pairs of (stash_raw_id, range) to
1053 record the blocks to be stashed. 'stash_raw_id' is an id that uniquely
1054 identifies each pair. Note that for the same range (e.g. RangeSet("1-5")),
1055 it is possible to have multiple pairs with different 'stash_raw_id's. Each
1056 'stash_raw_id' will be consumed by one transfer. In BBOTA v3+, identical
1057 blocks will be written to the same stash slot in WriteTransfers().
1058 """
1059
Doug Zongker62338182014-09-08 08:29:55 -07001060 print("Reversing backward edges...")
1061 in_order = 0
1062 out_of_order = 0
Tao Bao3a2e3502016-12-28 09:14:39 -08001063 stash_raw_id = 0
Doug Zongker62338182014-09-08 08:29:55 -07001064 stash_size = 0
1065
1066 for xf in self.transfers:
Doug Zongker62338182014-09-08 08:29:55 -07001067 for u in xf.goes_before.copy():
1068 # xf should go before u
1069 if xf.order < u.order:
1070 # it does, hurray!
1071 in_order += 1
1072 else:
1073 # it doesn't, boo. modify u to stash the blocks that it
1074 # writes that xf wants to read, and then require u to go
1075 # before xf.
1076 out_of_order += 1
1077
1078 overlap = xf.src_ranges.intersect(u.tgt_ranges)
1079 assert overlap
1080
Tao Bao3a2e3502016-12-28 09:14:39 -08001081 u.stash_before.append((stash_raw_id, overlap))
1082 xf.use_stash.append((stash_raw_id, overlap))
1083 stash_raw_id += 1
Doug Zongker62338182014-09-08 08:29:55 -07001084 stash_size += overlap.size()
1085
1086 # reverse the edge direction; now xf must go after u
1087 del xf.goes_before[u]
1088 del u.goes_after[xf]
1089 xf.goes_after[u] = None # value doesn't matter
1090 u.goes_before[xf] = None
1091
1092 print((" %d/%d dependencies (%.2f%%) were violated; "
1093 "%d source blocks stashed.") %
1094 (out_of_order, in_order + out_of_order,
1095 (out_of_order * 100.0 / (in_order + out_of_order))
1096 if (in_order + out_of_order) else 0.0,
1097 stash_size))
1098
Doug Zongkerfc44a512014-08-26 13:10:25 -07001099 def FindVertexSequence(self):
1100 print("Finding vertex sequence...")
1101
1102 # This is based on "A Fast & Effective Heuristic for the Feedback
1103 # Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
1104 # it as starting with the digraph G and moving all the vertices to
1105 # be on a horizontal line in some order, trying to minimize the
1106 # number of edges that end up pointing to the left. Left-pointing
1107 # edges will get removed to turn the digraph into a DAG. In this
1108 # case each edge has a weight which is the number of source blocks
1109 # we'll lose if that edge is removed; we try to minimize the total
1110 # weight rather than just the number of edges.
1111
1112 # Make a copy of the edge set; this copy will get destroyed by the
1113 # algorithm.
1114 for xf in self.transfers:
1115 xf.incoming = xf.goes_after.copy()
1116 xf.outgoing = xf.goes_before.copy()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001117 xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())
Doug Zongkerfc44a512014-08-26 13:10:25 -07001118
1119 # We use an OrderedDict instead of just a set so that the output
1120 # is repeatable; otherwise it would depend on the hash values of
1121 # the transfer objects.
1122 G = OrderedDict()
1123 for xf in self.transfers:
1124 G[xf] = None
1125 s1 = deque() # the left side of the sequence, built from left to right
1126 s2 = deque() # the right side of the sequence, built from right to left
1127
Doug Zongker6ab2a502016-02-09 08:28:09 -08001128 heap = []
1129 for xf in self.transfers:
1130 xf.heap_item = HeapItem(xf)
1131 heap.append(xf.heap_item)
1132 heapq.heapify(heap)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001133
Tao Bao33482282016-10-24 16:49:08 -07001134 # Use OrderedDict() instead of set() to preserve the insertion order. Need
1135 # to use 'sinks[key] = None' to add key into the set. sinks will look like
1136 # { key1: None, key2: None, ... }.
1137 sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing)
1138 sources = OrderedDict.fromkeys(u for u in G if not u.incoming)
Doug Zongker6ab2a502016-02-09 08:28:09 -08001139
1140 def adjust_score(iu, delta):
1141 iu.score += delta
1142 iu.heap_item.clear()
1143 iu.heap_item = HeapItem(iu)
1144 heapq.heappush(heap, iu.heap_item)
1145
1146 while G:
Doug Zongkerfc44a512014-08-26 13:10:25 -07001147 # Put all sinks at the end of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001148 while sinks:
Tao Bao33482282016-10-24 16:49:08 -07001149 new_sinks = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001150 for u in sinks:
Tao Bao508b0872018-02-09 13:44:43 -08001151 if u not in G:
1152 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001153 s2.appendleft(u)
1154 del G[u]
1155 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001156 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001157 if not iu.outgoing:
1158 new_sinks[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001159 sinks = new_sinks
Doug Zongkerfc44a512014-08-26 13:10:25 -07001160
1161 # Put all the sources at the beginning of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001162 while sources:
Tao Bao33482282016-10-24 16:49:08 -07001163 new_sources = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001164 for u in sources:
Tao Bao508b0872018-02-09 13:44:43 -08001165 if u not in G:
1166 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001167 s1.append(u)
1168 del G[u]
1169 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001170 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001171 if not iu.incoming:
1172 new_sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001173 sources = new_sources
Doug Zongkerfc44a512014-08-26 13:10:25 -07001174
Tao Bao508b0872018-02-09 13:44:43 -08001175 if not G:
1176 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001177
1178 # Find the "best" vertex to put next. "Best" is the one that
1179 # maximizes the net difference in source blocks saved we get by
1180 # pretending it's a source rather than a sink.
1181
Doug Zongker6ab2a502016-02-09 08:28:09 -08001182 while True:
1183 u = heapq.heappop(heap)
1184 if u and u.item in G:
1185 u = u.item
1186 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001187
Doug Zongkerfc44a512014-08-26 13:10:25 -07001188 s1.append(u)
1189 del G[u]
1190 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001191 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001192 if not iu.incoming:
1193 sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001194
Doug Zongkerfc44a512014-08-26 13:10:25 -07001195 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001196 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001197 if not iu.outgoing:
1198 sinks[iu] = None
Doug Zongkerfc44a512014-08-26 13:10:25 -07001199
1200 # Now record the sequence in the 'order' field of each transfer,
1201 # and by rearranging self.transfers to be in the chosen sequence.
1202
1203 new_transfers = []
1204 for x in itertools.chain(s1, s2):
1205 x.order = len(new_transfers)
1206 new_transfers.append(x)
1207 del x.incoming
1208 del x.outgoing
1209
1210 self.transfers = new_transfers
1211
1212 def GenerateDigraph(self):
1213 print("Generating digraph...")
Doug Zongker6ab2a502016-02-09 08:28:09 -08001214
1215 # Each item of source_ranges will be:
1216 # - None, if that block is not used as a source,
Tao Bao33482282016-10-24 16:49:08 -07001217 # - an ordered set of transfers.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001218 source_ranges = []
1219 for b in self.transfers:
1220 for s, e in b.src_ranges:
1221 if e > len(source_ranges):
1222 source_ranges.extend([None] * (e-len(source_ranges)))
1223 for i in range(s, e):
1224 if source_ranges[i] is None:
Tao Bao33482282016-10-24 16:49:08 -07001225 source_ranges[i] = OrderedDict.fromkeys([b])
Doug Zongker6ab2a502016-02-09 08:28:09 -08001226 else:
Tao Bao33482282016-10-24 16:49:08 -07001227 source_ranges[i][b] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001228
Doug Zongkerfc44a512014-08-26 13:10:25 -07001229 for a in self.transfers:
Tao Bao33482282016-10-24 16:49:08 -07001230 intersections = OrderedDict()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001231 for s, e in a.tgt_ranges:
1232 for i in range(s, e):
Tao Bao508b0872018-02-09 13:44:43 -08001233 if i >= len(source_ranges):
1234 break
Tao Bao33482282016-10-24 16:49:08 -07001235 # Add all the Transfers in source_ranges[i] to the (ordered) set.
1236 if source_ranges[i] is not None:
1237 for j in source_ranges[i]:
1238 intersections[j] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001239
1240 for b in intersections:
Tao Bao508b0872018-02-09 13:44:43 -08001241 if a is b:
1242 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001243
1244 # If the blocks written by A are read by B, then B needs to go before A.
1245 i = a.tgt_ranges.intersect(b.src_ranges)
1246 if i:
Doug Zongkerab7ca1d2014-08-26 10:40:28 -07001247 if b.src_name == "__ZERO":
1248 # the cost of removing source blocks for the __ZERO domain
1249 # is (nearly) zero.
1250 size = 0
1251 else:
1252 size = i.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001253 b.goes_before[a] = size
1254 a.goes_after[b] = size
1255
1256 def FindTransfers(self):
Tao Bao9a5caf22015-08-25 15:10:10 -07001257 """Parse the file_map to generate all the transfers."""
1258
Tianjie Xu41390d42017-11-22 11:35:18 -08001259 def AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
1260 src_ranges, style, by_id):
1261 """Add one or multiple Transfer()s by splitting large files.
1262
1263 For BBOTA v3, we need to stash source blocks for resumable feature.
1264 However, with the growth of file size and the shrink of the cache
1265 partition source blocks are too large to be stashed. If a file occupies
1266 too many blocks, we split it into smaller pieces by getting multiple
1267 Transfer()s.
1268
1269 The downside is that after splitting, we may increase the package size
1270 since the split pieces don't align well. According to our experiments,
1271 1/8 of the cache size as the per-piece limit appears to be optimal.
1272 Compared to the fixed 1024-block limit, it reduces the overall package
1273 size by 30% for volantis, and 20% for angler and bullhead."""
1274
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001275 pieces = 0
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001276 while (tgt_ranges.size() > max_blocks_per_transfer and
1277 src_ranges.size() > max_blocks_per_transfer):
Tao Bao9a5caf22015-08-25 15:10:10 -07001278 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1279 src_split_name = "%s-%d" % (src_name, pieces)
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001280 tgt_first = tgt_ranges.first(max_blocks_per_transfer)
1281 src_first = src_ranges.first(max_blocks_per_transfer)
1282
Tao Bao183e56e2017-03-05 17:05:09 -08001283 Transfer(tgt_split_name, src_split_name, tgt_first, src_first,
1284 self.tgt.RangeSha1(tgt_first), self.src.RangeSha1(src_first),
1285 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001286
1287 tgt_ranges = tgt_ranges.subtract(tgt_first)
1288 src_ranges = src_ranges.subtract(src_first)
1289 pieces += 1
1290
1291 # Handle remaining blocks.
1292 if tgt_ranges.size() or src_ranges.size():
1293 # Must be both non-empty.
1294 assert tgt_ranges.size() and src_ranges.size()
1295 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1296 src_split_name = "%s-%d" % (src_name, pieces)
Tao Bao183e56e2017-03-05 17:05:09 -08001297 Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges,
1298 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1299 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001300
Tianjie Xu41390d42017-11-22 11:35:18 -08001301 def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges, style,
1302 by_id):
1303 """Find all the zip files and split the others with a fixed chunk size.
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001304
Tianjie Xu41390d42017-11-22 11:35:18 -08001305 This function will construct a list of zip archives, which will later be
1306 split by imgdiff to reduce the final patch size. For the other files,
1307 we will plainly split them based on a fixed chunk size with the potential
1308 patch size penalty.
1309 """
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001310
1311 assert style == "diff"
1312
1313 # Change nothing for small files.
1314 if (tgt_ranges.size() <= max_blocks_per_transfer and
1315 src_ranges.size() <= max_blocks_per_transfer):
1316 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1317 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1318 style, by_id)
1319 return
1320
Tao Baocb73aed2018-01-31 17:32:40 -08001321 # Split large APKs with imgdiff, if possible. We're intentionally checking
1322 # file types one more time (CanUseImgdiff() checks that as well), before
1323 # calling the costly RangeSha1()s.
1324 if (self.FileTypeSupportedByImgdiff(tgt_name) and
1325 self.tgt.RangeSha1(tgt_ranges) != self.src.RangeSha1(src_ranges)):
Tao Bao294651d2018-02-08 23:21:52 -08001326 if self.CanUseImgdiff(tgt_name, tgt_ranges, src_ranges, True):
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001327 large_apks.append((tgt_name, src_name, tgt_ranges, src_ranges))
1328 return
1329
Tianjie Xu41390d42017-11-22 11:35:18 -08001330 AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
1331 src_ranges, style, by_id)
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001332
Tao Bao08c85832016-09-19 22:26:30 -07001333 def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
1334 split=False):
1335 """Wrapper function for adding a Transfer()."""
1336
1337 # We specialize diff transfers only (which covers bsdiff/imgdiff/move);
1338 # otherwise add the Transfer() as is.
1339 if style != "diff" or not split:
Tao Bao183e56e2017-03-05 17:05:09 -08001340 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1341 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1342 style, by_id)
Tao Bao08c85832016-09-19 22:26:30 -07001343 return
1344
1345 # Handle .odex files specially to analyze the block-wise difference. If
1346 # most of the blocks are identical with only few changes (e.g. header),
1347 # we will patch the changed blocks only. This avoids stashing unchanged
1348 # blocks while patching. We limit the analysis to files without size
1349 # changes only. This is to avoid sacrificing the OTA generation cost too
1350 # much.
1351 if (tgt_name.split(".")[-1].lower() == 'odex' and
1352 tgt_ranges.size() == src_ranges.size()):
1353
1354 # 0.5 threshold can be further tuned. The tradeoff is: if only very
1355 # few blocks remain identical, we lose the opportunity to use imgdiff
1356 # that may have better compression ratio than bsdiff.
1357 crop_threshold = 0.5
1358
1359 tgt_skipped = RangeSet()
1360 src_skipped = RangeSet()
1361 tgt_size = tgt_ranges.size()
1362 tgt_changed = 0
1363 for src_block, tgt_block in zip(src_ranges.next_item(),
1364 tgt_ranges.next_item()):
1365 src_rs = RangeSet(str(src_block))
1366 tgt_rs = RangeSet(str(tgt_block))
1367 if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):
1368 tgt_skipped = tgt_skipped.union(tgt_rs)
1369 src_skipped = src_skipped.union(src_rs)
1370 else:
1371 tgt_changed += tgt_rs.size()
1372
1373 # Terminate early if no clear sign of benefits.
1374 if tgt_changed > tgt_size * crop_threshold:
1375 break
1376
1377 if tgt_changed < tgt_size * crop_threshold:
1378 assert tgt_changed + tgt_skipped.size() == tgt_size
Tao Bao508b0872018-02-09 13:44:43 -08001379 print('%10d %10d (%6.2f%%) %s' % (
1380 tgt_skipped.size(), tgt_size,
1381 tgt_skipped.size() * 100.0 / tgt_size, tgt_name))
Tianjie Xu41390d42017-11-22 11:35:18 -08001382 AddSplitTransfers(
Tao Bao08c85832016-09-19 22:26:30 -07001383 "%s-skipped" % (tgt_name,),
1384 "%s-skipped" % (src_name,),
1385 tgt_skipped, src_skipped, style, by_id)
1386
1387 # Intentionally change the file extension to avoid being imgdiff'd as
1388 # the files are no longer in their original format.
1389 tgt_name = "%s-cropped" % (tgt_name,)
1390 src_name = "%s-cropped" % (src_name,)
1391 tgt_ranges = tgt_ranges.subtract(tgt_skipped)
1392 src_ranges = src_ranges.subtract(src_skipped)
1393
1394 # Possibly having no changed blocks.
1395 if not tgt_ranges:
1396 return
1397
1398 # Add the transfer(s).
Tianjie Xu41390d42017-11-22 11:35:18 -08001399 AddSplitTransfers(
Tao Bao08c85832016-09-19 22:26:30 -07001400 tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1401
Tianjie Xu25366072017-09-08 17:19:02 -07001402 def ParseAndValidateSplitInfo(patch_size, tgt_ranges, src_ranges,
1403 split_info):
1404 """Parse the split_info and return a list of info tuples.
1405
1406 Args:
1407 patch_size: total size of the patch file.
1408 tgt_ranges: Ranges of the target file within the original image.
1409 src_ranges: Ranges of the source file within the original image.
1410 split_info format:
1411 imgdiff version#
1412 count of pieces
1413 <patch_size_1> <tgt_size_1> <src_ranges_1>
1414 ...
1415 <patch_size_n> <tgt_size_n> <src_ranges_n>
1416
1417 Returns:
1418 [patch_start, patch_len, split_tgt_ranges, split_src_ranges]
1419 """
1420
1421 version = int(split_info[0])
1422 assert version == 2
1423 count = int(split_info[1])
1424 assert len(split_info) - 2 == count
1425
1426 split_info_list = []
1427 patch_start = 0
1428 tgt_remain = copy.deepcopy(tgt_ranges)
1429 # each line has the format <patch_size>, <tgt_size>, <src_ranges>
1430 for line in split_info[2:]:
1431 info = line.split()
1432 assert len(info) == 3
1433 patch_length = int(info[0])
1434
1435 split_tgt_size = int(info[1])
1436 assert split_tgt_size % 4096 == 0
1437 assert split_tgt_size / 4096 <= tgt_remain.size()
1438 split_tgt_ranges = tgt_remain.first(split_tgt_size / 4096)
1439 tgt_remain = tgt_remain.subtract(split_tgt_ranges)
1440
1441 # Find the split_src_ranges within the image file from its relative
1442 # position in file.
1443 split_src_indices = RangeSet.parse_raw(info[2])
1444 split_src_ranges = RangeSet()
1445 for r in split_src_indices:
1446 curr_range = src_ranges.first(r[1]).subtract(src_ranges.first(r[0]))
1447 assert not split_src_ranges.overlaps(curr_range)
1448 split_src_ranges = split_src_ranges.union(curr_range)
1449
1450 split_info_list.append((patch_start, patch_length,
1451 split_tgt_ranges, split_src_ranges))
1452 patch_start += patch_length
1453
1454 # Check that the sizes of all the split pieces add up to the final file
1455 # size for patch and target.
1456 assert tgt_remain.size() == 0
1457 assert patch_start == patch_size
1458 return split_info_list
1459
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001460 def SplitLargeApks():
1461 """Split the large apks files.
Tianjie Xu25366072017-09-08 17:19:02 -07001462
1463 Example: Chrome.apk will be split into
1464 src-0: Chrome.apk-0, tgt-0: Chrome.apk-0
1465 src-1: Chrome.apk-1, tgt-1: Chrome.apk-1
1466 ...
1467
1468 After the split, the target pieces are continuous and block aligned; and
1469 the source pieces are mutually exclusive. During the split, we also
1470 generate and save the image patch between src-X & tgt-X. This patch will
1471 be valid because the block ranges of src-X & tgt-X will always stay the
1472 same afterwards; but there's a chance we don't use the patch if we
1473 convert the "diff" command into "new" or "move" later.
1474 """
1475
1476 while True:
1477 with transfer_lock:
1478 if not large_apks:
1479 return
1480 tgt_name, src_name, tgt_ranges, src_ranges = large_apks.pop(0)
1481
1482 src_file = common.MakeTempFile(prefix="src-")
1483 tgt_file = common.MakeTempFile(prefix="tgt-")
Tianjie Xudf1166e2018-01-27 17:35:41 -08001484 with open(src_file, "wb") as src_fd:
1485 self.src.WriteRangeDataToFd(src_ranges, src_fd)
1486 with open(tgt_file, "wb") as tgt_fd:
1487 self.tgt.WriteRangeDataToFd(tgt_ranges, tgt_fd)
Tianjie Xu25366072017-09-08 17:19:02 -07001488
1489 patch_file = common.MakeTempFile(prefix="patch-")
1490 patch_info_file = common.MakeTempFile(prefix="split_info-")
1491 cmd = ["imgdiff", "-z",
1492 "--block-limit={}".format(max_blocks_per_transfer),
1493 "--split-info=" + patch_info_file,
1494 src_file, tgt_file, patch_file]
Tao Bao73dd4f42018-10-04 16:25:33 -07001495 proc = common.Run(cmd)
1496 imgdiff_output, _ = proc.communicate()
1497 assert proc.returncode == 0, \
Tao Bao4ccea852018-02-06 15:16:41 -08001498 "Failed to create imgdiff patch between {} and {}:\n{}".format(
1499 src_name, tgt_name, imgdiff_output)
Tianjie Xu25366072017-09-08 17:19:02 -07001500
1501 with open(patch_info_file) as patch_info:
1502 lines = patch_info.readlines()
1503
1504 patch_size_total = os.path.getsize(patch_file)
1505 split_info_list = ParseAndValidateSplitInfo(patch_size_total,
1506 tgt_ranges, src_ranges,
1507 lines)
1508 for index, (patch_start, patch_length, split_tgt_ranges,
Tao Bao508b0872018-02-09 13:44:43 -08001509 split_src_ranges) in enumerate(split_info_list):
Tianjie Xu25366072017-09-08 17:19:02 -07001510 with open(patch_file) as f:
1511 f.seek(patch_start)
1512 patch_content = f.read(patch_length)
1513
1514 split_src_name = "{}-{}".format(src_name, index)
1515 split_tgt_name = "{}-{}".format(tgt_name, index)
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001516 split_large_apks.append((split_tgt_name,
1517 split_src_name,
1518 split_tgt_ranges,
1519 split_src_ranges,
1520 patch_content))
Tianjie Xu25366072017-09-08 17:19:02 -07001521
Tao Bao08c85832016-09-19 22:26:30 -07001522 print("Finding transfers...")
1523
Tianjie Xu25366072017-09-08 17:19:02 -07001524 large_apks = []
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001525 split_large_apks = []
Tianjie Xu25366072017-09-08 17:19:02 -07001526 cache_size = common.OPTIONS.cache_size
1527 split_threshold = 0.125
1528 max_blocks_per_transfer = int(cache_size * split_threshold /
1529 self.tgt.blocksize)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001530 empty = RangeSet()
Tianjie Xu20a86cd2018-01-12 12:21:00 -08001531 for tgt_fn, tgt_ranges in sorted(self.tgt.file_map.items()):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001532 if tgt_fn == "__ZERO":
1533 # the special "__ZERO" domain is all the blocks not contained
1534 # in any file and that are filled with zeros. We have a
1535 # special transfer style for zero blocks.
1536 src_ranges = self.src.file_map.get("__ZERO", empty)
Tao Bao9a5caf22015-08-25 15:10:10 -07001537 AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
1538 "zero", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001539 continue
1540
Tao Baoff777812015-05-12 11:42:31 -07001541 elif tgt_fn == "__COPY":
1542 # "__COPY" domain includes all the blocks not contained in any
1543 # file and that need to be copied unconditionally to the target.
Tao Bao9a5caf22015-08-25 15:10:10 -07001544 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Tao Baoff777812015-05-12 11:42:31 -07001545 continue
1546
Tianjie Xu67c7cbb2018-08-30 00:32:07 -07001547 elif tgt_fn == "__HASHTREE":
1548 continue
1549
Doug Zongkerfc44a512014-08-26 13:10:25 -07001550 elif tgt_fn in self.src.file_map:
1551 # Look for an exact pathname match in the source.
Tao Bao9a5caf22015-08-25 15:10:10 -07001552 AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001553 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001554 continue
1555
1556 b = os.path.basename(tgt_fn)
1557 if b in self.src_basenames:
1558 # Look for an exact basename match in the source.
1559 src_fn = self.src_basenames[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001560 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001561 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001562 continue
1563
1564 b = re.sub("[0-9]+", "#", b)
1565 if b in self.src_numpatterns:
1566 # Look for a 'number pattern' match (a basename match after
1567 # all runs of digits are replaced by "#"). (This is useful
1568 # for .so files that contain version numbers in the filename
1569 # that get bumped.)
1570 src_fn = self.src_numpatterns[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001571 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001572 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001573 continue
1574
Tao Bao9a5caf22015-08-25 15:10:10 -07001575 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001576
Tianjie Xu25366072017-09-08 17:19:02 -07001577 transfer_lock = threading.Lock()
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001578 threads = [threading.Thread(target=SplitLargeApks)
Tianjie Xu25366072017-09-08 17:19:02 -07001579 for _ in range(self.threads)]
1580 for th in threads:
1581 th.start()
1582 while threads:
1583 threads.pop().join()
1584
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001585 # Sort the split transfers for large apks to generate a determinate package.
1586 split_large_apks.sort()
1587 for (tgt_name, src_name, tgt_ranges, src_ranges,
1588 patch) in split_large_apks:
1589 transfer_split = Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1590 self.tgt.RangeSha1(tgt_ranges),
1591 self.src.RangeSha1(src_ranges),
1592 "diff", self.transfers)
1593 transfer_split.patch = patch
1594
Doug Zongkerfc44a512014-08-26 13:10:25 -07001595 def AbbreviateSourceNames(self):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001596 for k in self.src.file_map.keys():
1597 b = os.path.basename(k)
1598 self.src_basenames[b] = k
1599 b = re.sub("[0-9]+", "#", b)
1600 self.src_numpatterns[b] = k
1601
1602 @staticmethod
1603 def AssertPartition(total, seq):
1604 """Assert that all the RangeSets in 'seq' form a partition of the
1605 'total' RangeSet (ie, they are nonintersecting and their union
1606 equals 'total')."""
Doug Zongker6ab2a502016-02-09 08:28:09 -08001607
Doug Zongkerfc44a512014-08-26 13:10:25 -07001608 so_far = RangeSet()
1609 for i in seq:
1610 assert not so_far.overlaps(i)
1611 so_far = so_far.union(i)
1612 assert so_far == total