blob: 24c5b2de7f31c3c46f807d20688a0a2d5b363aee [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015from __future__ import print_function
16
Doug Zongker6ab2a502016-02-09 08:28:09 -080017import array
Tianjie Xu25366072017-09-08 17:19:02 -070018import copy
Doug Zongker6ab2a502016-02-09 08:28:09 -080019import functools
Doug Zongker62338182014-09-08 08:29:55 -070020import heapq
Doug Zongkerfc44a512014-08-26 13:10:25 -070021import itertools
22import multiprocessing
23import os
Tao Bao3a2e3502016-12-28 09:14:39 -080024import os.path
Doug Zongkerfc44a512014-08-26 13:10:25 -070025import re
26import subprocess
Tao Bao183e56e2017-03-05 17:05:09 -080027import sys
Doug Zongkerfc44a512014-08-26 13:10:25 -070028import threading
Tao Bao3a2e3502016-12-28 09:14:39 -080029from collections import deque, OrderedDict
30from hashlib import sha1
Tao Bao508b0872018-02-09 13:44:43 -080031
32import common
Dan Albert8b72aef2015-03-23 19:13:21 -070033from rangelib import RangeSet
34
Doug Zongkerfc44a512014-08-26 13:10:25 -070035
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070036__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
37
Dan Albert8b72aef2015-03-23 19:13:21 -070038
Tao Bao183e56e2017-03-05 17:05:09 -080039def compute_patch(srcfile, tgtfile, imgdiff=False):
Tianjie Xub59c17f2016-10-28 17:55:53 -070040 patchfile = common.MakeTempFile(prefix='patch-')
Doug Zongkerfc44a512014-08-26 13:10:25 -070041
Tianjie Xub59c17f2016-10-28 17:55:53 -070042 cmd = ['imgdiff', '-z'] if imgdiff else ['bsdiff']
43 cmd.extend([srcfile, tgtfile, patchfile])
Doug Zongkerfc44a512014-08-26 13:10:25 -070044
Tao Bao39451582017-05-04 11:10:47 -070045 # Don't dump the bsdiff/imgdiff commands, which are not useful for the case
46 # here, since they contain temp filenames only.
47 p = common.Run(cmd, verbose=False, stdout=subprocess.PIPE,
48 stderr=subprocess.STDOUT)
Tianjie Xub59c17f2016-10-28 17:55:53 -070049 output, _ = p.communicate()
Doug Zongkerfc44a512014-08-26 13:10:25 -070050
Tianjie Xub59c17f2016-10-28 17:55:53 -070051 if p.returncode != 0:
52 raise ValueError(output)
53
54 with open(patchfile, 'rb') as f:
Tao Bao183e56e2017-03-05 17:05:09 -080055 return f.read()
Doug Zongkerfc44a512014-08-26 13:10:25 -070056
Dan Albert8b72aef2015-03-23 19:13:21 -070057
58class Image(object):
Tao Bao183e56e2017-03-05 17:05:09 -080059 def RangeSha1(self, ranges):
60 raise NotImplementedError
61
Dan Albert8b72aef2015-03-23 19:13:21 -070062 def ReadRangeSet(self, ranges):
63 raise NotImplementedError
64
Tao Bao68658c02015-06-01 13:40:49 -070065 def TotalSha1(self, include_clobbered_blocks=False):
Dan Albert8b72aef2015-03-23 19:13:21 -070066 raise NotImplementedError
67
Tao Bao183e56e2017-03-05 17:05:09 -080068 def WriteRangeDataToFd(self, ranges, fd):
69 raise NotImplementedError
70
Dan Albert8b72aef2015-03-23 19:13:21 -070071
72class EmptyImage(Image):
Doug Zongkerfc44a512014-08-26 13:10:25 -070073 """A zero-length image."""
Tao Bao183e56e2017-03-05 17:05:09 -080074
75 def __init__(self):
76 self.blocksize = 4096
77 self.care_map = RangeSet()
78 self.clobbered_blocks = RangeSet()
79 self.extended = RangeSet()
80 self.total_blocks = 0
81 self.file_map = {}
82
83 def RangeSha1(self, ranges):
84 return sha1().hexdigest()
85
Doug Zongkerfc44a512014-08-26 13:10:25 -070086 def ReadRangeSet(self, ranges):
87 return ()
Tao Bao183e56e2017-03-05 17:05:09 -080088
Tao Bao68658c02015-06-01 13:40:49 -070089 def TotalSha1(self, include_clobbered_blocks=False):
90 # EmptyImage always carries empty clobbered_blocks, so
91 # include_clobbered_blocks can be ignored.
92 assert self.clobbered_blocks.size() == 0
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070093 return sha1().hexdigest()
94
Tao Bao183e56e2017-03-05 17:05:09 -080095 def WriteRangeDataToFd(self, ranges, fd):
96 raise ValueError("Can't write data from EmptyImage to file")
97
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070098
Dan Albert8b72aef2015-03-23 19:13:21 -070099class DataImage(Image):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700100 """An image wrapped around a single string of data."""
101
102 def __init__(self, data, trim=False, pad=False):
103 self.data = data
104 self.blocksize = 4096
105
106 assert not (trim and pad)
107
108 partial = len(self.data) % self.blocksize
Tao Bao7589e962015-09-05 20:35:32 -0700109 padded = False
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700110 if partial > 0:
111 if trim:
112 self.data = self.data[:-partial]
113 elif pad:
114 self.data += '\0' * (self.blocksize - partial)
Tao Bao7589e962015-09-05 20:35:32 -0700115 padded = True
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700116 else:
117 raise ValueError(("data for DataImage must be multiple of %d bytes "
118 "unless trim or pad is specified") %
119 (self.blocksize,))
120
121 assert len(self.data) % self.blocksize == 0
122
123 self.total_blocks = len(self.data) / self.blocksize
124 self.care_map = RangeSet(data=(0, self.total_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700125 # When the last block is padded, we always write the whole block even for
126 # incremental OTAs. Because otherwise the last block may get skipped if
127 # unchanged for an incremental, but would fail the post-install
128 # verification if it has non-zero contents in the padding bytes.
129 # Bug: 23828506
130 if padded:
Tao Bao42206c32015-09-08 13:39:40 -0700131 clobbered_blocks = [self.total_blocks-1, self.total_blocks]
Tao Bao7589e962015-09-05 20:35:32 -0700132 else:
Tao Bao42206c32015-09-08 13:39:40 -0700133 clobbered_blocks = []
134 self.clobbered_blocks = clobbered_blocks
Tao Baoe9b61912015-07-09 17:37:49 -0700135 self.extended = RangeSet()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700136
137 zero_blocks = []
138 nonzero_blocks = []
139 reference = '\0' * self.blocksize
140
Tao Bao7589e962015-09-05 20:35:32 -0700141 for i in range(self.total_blocks-1 if padded else self.total_blocks):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700142 d = self.data[i*self.blocksize : (i+1)*self.blocksize]
143 if d == reference:
144 zero_blocks.append(i)
145 zero_blocks.append(i+1)
146 else:
147 nonzero_blocks.append(i)
148 nonzero_blocks.append(i+1)
149
Tao Bao42206c32015-09-08 13:39:40 -0700150 assert zero_blocks or nonzero_blocks or clobbered_blocks
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700151
Tao Bao42206c32015-09-08 13:39:40 -0700152 self.file_map = dict()
153 if zero_blocks:
154 self.file_map["__ZERO"] = RangeSet(data=zero_blocks)
155 if nonzero_blocks:
156 self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)
157 if clobbered_blocks:
158 self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)
Tao Bao7589e962015-09-05 20:35:32 -0700159
Tao Bao183e56e2017-03-05 17:05:09 -0800160 def _GetRangeData(self, ranges):
161 for s, e in ranges:
162 yield self.data[s*self.blocksize:e*self.blocksize]
163
164 def RangeSha1(self, ranges):
165 h = sha1()
166 for data in self._GetRangeData(ranges):
167 h.update(data)
168 return h.hexdigest()
169
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700170 def ReadRangeSet(self, ranges):
Tao Bao183e56e2017-03-05 17:05:09 -0800171 return [self._GetRangeData(ranges)]
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700172
Tao Bao68658c02015-06-01 13:40:49 -0700173 def TotalSha1(self, include_clobbered_blocks=False):
Tao Bao7589e962015-09-05 20:35:32 -0700174 if not include_clobbered_blocks:
Tao Bao183e56e2017-03-05 17:05:09 -0800175 return self.RangeSha1(self.care_map.subtract(self.clobbered_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700176 else:
177 return sha1(self.data).hexdigest()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700178
Tao Bao183e56e2017-03-05 17:05:09 -0800179 def WriteRangeDataToFd(self, ranges, fd):
180 for data in self._GetRangeData(ranges):
181 fd.write(data)
182
Doug Zongkerfc44a512014-08-26 13:10:25 -0700183
184class Transfer(object):
Tao Bao183e56e2017-03-05 17:05:09 -0800185 def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, tgt_sha1,
186 src_sha1, style, by_id):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700187 self.tgt_name = tgt_name
188 self.src_name = src_name
189 self.tgt_ranges = tgt_ranges
190 self.src_ranges = src_ranges
Tao Bao183e56e2017-03-05 17:05:09 -0800191 self.tgt_sha1 = tgt_sha1
192 self.src_sha1 = src_sha1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700193 self.style = style
Tao Baob8c87172015-03-19 19:42:12 -0700194
195 # We use OrderedDict rather than dict so that the output is repeatable;
196 # otherwise it would depend on the hash values of the Transfer objects.
197 self.goes_before = OrderedDict()
198 self.goes_after = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700199
Doug Zongker62338182014-09-08 08:29:55 -0700200 self.stash_before = []
201 self.use_stash = []
202
Doug Zongkerfc44a512014-08-26 13:10:25 -0700203 self.id = len(by_id)
204 by_id.append(self)
205
Tianjie Xu25366072017-09-08 17:19:02 -0700206 self._patch = None
207
208 @property
209 def patch(self):
210 return self._patch
211
212 @patch.setter
213 def patch(self, patch):
214 if patch:
215 assert self.style == "diff"
216 self._patch = patch
217
Doug Zongker62338182014-09-08 08:29:55 -0700218 def NetStashChange(self):
219 return (sum(sr.size() for (_, sr) in self.stash_before) -
220 sum(sr.size() for (_, sr) in self.use_stash))
221
Tao Bao82c47982015-08-17 09:45:13 -0700222 def ConvertToNew(self):
223 assert self.style != "new"
224 self.use_stash = []
225 self.style = "new"
226 self.src_ranges = RangeSet()
Tianjie Xu25366072017-09-08 17:19:02 -0700227 self.patch = None
Tao Bao82c47982015-08-17 09:45:13 -0700228
Doug Zongkerfc44a512014-08-26 13:10:25 -0700229 def __str__(self):
230 return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
231 " to " + str(self.tgt_ranges) + ">")
232
233
Doug Zongker6ab2a502016-02-09 08:28:09 -0800234@functools.total_ordering
235class HeapItem(object):
236 def __init__(self, item):
237 self.item = item
Tao Bao186ec992017-12-23 11:50:52 -0800238 # Negate the score since python's heap is a min-heap and we want the
239 # maximum score.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800240 self.score = -item.score
Tao Bao186ec992017-12-23 11:50:52 -0800241
Doug Zongker6ab2a502016-02-09 08:28:09 -0800242 def clear(self):
243 self.item = None
Tao Bao186ec992017-12-23 11:50:52 -0800244
Doug Zongker6ab2a502016-02-09 08:28:09 -0800245 def __bool__(self):
Tao Bao186ec992017-12-23 11:50:52 -0800246 return self.item is not None
247
248 # Python 2 uses __nonzero__, while Python 3 uses __bool__.
249 __nonzero__ = __bool__
250
251 # The rest operations are generated by functools.total_ordering decorator.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800252 def __eq__(self, other):
253 return self.score == other.score
Tao Bao186ec992017-12-23 11:50:52 -0800254
Doug Zongker6ab2a502016-02-09 08:28:09 -0800255 def __le__(self, other):
256 return self.score <= other.score
257
258
Tao Bao294651d2018-02-08 23:21:52 -0800259class ImgdiffStats(object):
260 """A class that collects imgdiff stats.
261
262 It keeps track of the files that will be applied imgdiff while generating
263 BlockImageDiff. It also logs the ones that cannot use imgdiff, with specific
264 reasons. The stats is only meaningful when imgdiff not being disabled by the
265 caller of BlockImageDiff. In addition, only files with supported types
266 (BlockImageDiff.FileTypeSupportedByImgdiff()) are allowed to be logged.
Tao Bao294651d2018-02-08 23:21:52 -0800267 """
268
269 USED_IMGDIFF = "APK files diff'd with imgdiff"
270 USED_IMGDIFF_LARGE_APK = "Large APK files split and diff'd with imgdiff"
271
272 # Reasons for not applying imgdiff on APKs.
273 SKIPPED_TRIMMED = "Not used imgdiff due to trimmed RangeSet"
274 SKIPPED_NONMONOTONIC = "Not used imgdiff due to having non-monotonic ranges"
Tao Baoe709b092018-02-07 12:40:00 -0800275 SKIPPED_SHARED_BLOCKS = "Not used imgdiff due to using shared blocks"
Tao Bao4ccea852018-02-06 15:16:41 -0800276 SKIPPED_INCOMPLETE = "Not used imgdiff due to incomplete RangeSet"
Tao Bao294651d2018-02-08 23:21:52 -0800277
278 # The list of valid reasons, which will also be the dumped order in a report.
279 REASONS = (
280 USED_IMGDIFF,
281 USED_IMGDIFF_LARGE_APK,
282 SKIPPED_TRIMMED,
283 SKIPPED_NONMONOTONIC,
Tao Baoe709b092018-02-07 12:40:00 -0800284 SKIPPED_SHARED_BLOCKS,
Tao Bao4ccea852018-02-06 15:16:41 -0800285 SKIPPED_INCOMPLETE,
Tao Bao294651d2018-02-08 23:21:52 -0800286 )
287
288 def __init__(self):
289 self.stats = {}
290
291 def Log(self, filename, reason):
292 """Logs why imgdiff can or cannot be applied to the given filename.
293
294 Args:
295 filename: The filename string.
296 reason: One of the reason constants listed in REASONS.
297
298 Raises:
299 AssertionError: On unsupported filetypes or invalid reason.
300 """
301 assert BlockImageDiff.FileTypeSupportedByImgdiff(filename)
302 assert reason in self.REASONS
303
304 if reason not in self.stats:
305 self.stats[reason] = set()
306 self.stats[reason].add(filename)
307
308 def Report(self):
309 """Prints a report of the collected imgdiff stats."""
310
311 def print_header(header, separator):
312 print(header)
313 print(separator * len(header) + '\n')
314
315 print_header(' Imgdiff Stats Report ', '=')
316 for key in self.REASONS:
317 if key not in self.stats:
318 continue
319 values = self.stats[key]
320 section_header = ' {} (count: {}) '.format(key, len(values))
321 print_header(section_header, '-')
322 print(''.join([' {}\n'.format(name) for name in values]))
323
324
Doug Zongkerfc44a512014-08-26 13:10:25 -0700325# BlockImageDiff works on two image objects. An image object is
326# anything that provides the following attributes:
327#
328# blocksize: the size in bytes of a block, currently must be 4096.
329#
330# total_blocks: the total size of the partition/image, in blocks.
331#
332# care_map: a RangeSet containing which blocks (in the range [0,
333# total_blocks) we actually care about; i.e. which blocks contain
334# data.
335#
336# file_map: a dict that partitions the blocks contained in care_map
337# into smaller domains that are useful for doing diffs on.
338# (Typically a domain is a file, and the key in file_map is the
339# pathname.)
340#
Tao Baoff777812015-05-12 11:42:31 -0700341# clobbered_blocks: a RangeSet containing which blocks contain data
342# but may be altered by the FS. They need to be excluded when
343# verifying the partition integrity.
344#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700345# ReadRangeSet(): a function that takes a RangeSet and returns the
346# data contained in the image blocks of that RangeSet. The data
347# is returned as a list or tuple of strings; concatenating the
348# elements together should produce the requested data.
349# Implementations are free to break up the data into list/tuple
350# elements in any way that is convenient.
351#
Tao Bao183e56e2017-03-05 17:05:09 -0800352# RangeSha1(): a function that returns (as a hex string) the SHA-1
353# hash of all the data in the specified range.
354#
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700355# TotalSha1(): a function that returns (as a hex string) the SHA-1
356# hash of all the data in the image (ie, all the blocks in the
Tao Bao68658c02015-06-01 13:40:49 -0700357# care_map minus clobbered_blocks, or including the clobbered
358# blocks if include_clobbered_blocks is True).
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700359#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700360# When creating a BlockImageDiff, the src image may be None, in which
361# case the list of transfers produced will never read from the
362# original image.
363
364class BlockImageDiff(object):
Tao Bao293fd132016-06-11 12:19:23 -0700365 def __init__(self, tgt, src=None, threads=None, version=4,
366 disable_imgdiff=False):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700367 if threads is None:
368 threads = multiprocessing.cpu_count() // 2
Dan Albert8b72aef2015-03-23 19:13:21 -0700369 if threads == 0:
370 threads = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700371 self.threads = threads
Doug Zongker62338182014-09-08 08:29:55 -0700372 self.version = version
Dan Albert8b72aef2015-03-23 19:13:21 -0700373 self.transfers = []
374 self.src_basenames = {}
375 self.src_numpatterns = {}
Tao Baob4cfca52016-02-04 14:26:02 -0800376 self._max_stashed_size = 0
Tao Baod522bdc2016-04-12 15:53:16 -0700377 self.touched_src_ranges = RangeSet()
378 self.touched_src_sha1 = None
Tao Bao293fd132016-06-11 12:19:23 -0700379 self.disable_imgdiff = disable_imgdiff
Tao Bao294651d2018-02-08 23:21:52 -0800380 self.imgdiff_stats = ImgdiffStats() if not disable_imgdiff else None
Doug Zongker62338182014-09-08 08:29:55 -0700381
Tao Bao8fad03e2017-03-01 14:36:26 -0800382 assert version in (3, 4)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700383
384 self.tgt = tgt
385 if src is None:
386 src = EmptyImage()
387 self.src = src
388
389 # The updater code that installs the patch always uses 4k blocks.
390 assert tgt.blocksize == 4096
391 assert src.blocksize == 4096
392
393 # The range sets in each filemap should comprise a partition of
394 # the care map.
395 self.AssertPartition(src.care_map, src.file_map.values())
396 self.AssertPartition(tgt.care_map, tgt.file_map.values())
397
Tao Baob4cfca52016-02-04 14:26:02 -0800398 @property
399 def max_stashed_size(self):
400 return self._max_stashed_size
401
Tao Baocb73aed2018-01-31 17:32:40 -0800402 @staticmethod
403 def FileTypeSupportedByImgdiff(filename):
404 """Returns whether the file type is supported by imgdiff."""
405 return filename.lower().endswith(('.apk', '.jar', '.zip'))
406
Tao Bao294651d2018-02-08 23:21:52 -0800407 def CanUseImgdiff(self, name, tgt_ranges, src_ranges, large_apk=False):
Tao Baocb73aed2018-01-31 17:32:40 -0800408 """Checks whether we can apply imgdiff for the given RangeSets.
409
410 For files in ZIP format (e.g., APKs, JARs, etc.) we would like to use
411 'imgdiff -z' if possible. Because it usually produces significantly smaller
412 patches than bsdiff.
413
414 This is permissible if all of the following conditions hold.
415 - The imgdiff hasn't been disabled by the caller (e.g. squashfs);
416 - The file type is supported by imgdiff;
417 - The source and target blocks are monotonic (i.e. the data is stored with
418 blocks in increasing order);
Tao Baoe709b092018-02-07 12:40:00 -0800419 - Both files don't contain shared blocks;
Tao Bao4ccea852018-02-06 15:16:41 -0800420 - Both files have complete lists of blocks;
Tao Baocb73aed2018-01-31 17:32:40 -0800421 - We haven't removed any blocks from the source set.
422
423 If all these conditions are satisfied, concatenating all the blocks in the
424 RangeSet in order will produce a valid ZIP file (plus possibly extra zeros
425 in the last block). imgdiff is fine with extra zeros at the end of the file.
426
427 Args:
428 name: The filename to be diff'd.
429 tgt_ranges: The target RangeSet.
430 src_ranges: The source RangeSet.
Tao Bao294651d2018-02-08 23:21:52 -0800431 large_apk: Whether this is to split a large APK.
Tao Baocb73aed2018-01-31 17:32:40 -0800432
433 Returns:
434 A boolean result.
435 """
Tao Bao508b0872018-02-09 13:44:43 -0800436 if self.disable_imgdiff or not self.FileTypeSupportedByImgdiff(name):
Tao Bao294651d2018-02-08 23:21:52 -0800437 return False
438
439 if not tgt_ranges.monotonic or not src_ranges.monotonic:
440 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_NONMONOTONIC)
441 return False
442
Tao Baoe709b092018-02-07 12:40:00 -0800443 if (tgt_ranges.extra.get('uses_shared_blocks') or
444 src_ranges.extra.get('uses_shared_blocks')):
445 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_SHARED_BLOCKS)
446 return False
447
Tao Bao4ccea852018-02-06 15:16:41 -0800448 if tgt_ranges.extra.get('incomplete') or src_ranges.extra.get('incomplete'):
449 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_INCOMPLETE)
450 return False
451
Tao Bao294651d2018-02-08 23:21:52 -0800452 if tgt_ranges.extra.get('trimmed') or src_ranges.extra.get('trimmed'):
453 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_TRIMMED)
454 return False
455
456 reason = (ImgdiffStats.USED_IMGDIFF_LARGE_APK if large_apk
457 else ImgdiffStats.USED_IMGDIFF)
458 self.imgdiff_stats.Log(name, reason)
459 return True
Tao Baocb73aed2018-01-31 17:32:40 -0800460
Doug Zongkerfc44a512014-08-26 13:10:25 -0700461 def Compute(self, prefix):
462 # When looking for a source file to use as the diff input for a
463 # target file, we try:
464 # 1) an exact path match if available, otherwise
465 # 2) a exact basename match if available, otherwise
466 # 3) a basename match after all runs of digits are replaced by
467 # "#" if available, otherwise
468 # 4) we have no source for this target.
469 self.AbbreviateSourceNames()
470 self.FindTransfers()
471
472 # Find the ordering dependencies among transfers (this is O(n^2)
473 # in the number of transfers).
474 self.GenerateDigraph()
475 # Find a sequence of transfers that satisfies as many ordering
476 # dependencies as possible (heuristically).
477 self.FindVertexSequence()
478 # Fix up the ordering dependencies that the sequence didn't
479 # satisfy.
Tao Bao8fad03e2017-03-01 14:36:26 -0800480 self.ReverseBackwardEdges()
481 self.ImproveVertexSequence()
Doug Zongker62338182014-09-08 08:29:55 -0700482
Tao Bao82c47982015-08-17 09:45:13 -0700483 # Ensure the runtime stash size is under the limit.
Tao Bao8fad03e2017-03-01 14:36:26 -0800484 if common.OPTIONS.cache_size is not None:
Tao Bao82c47982015-08-17 09:45:13 -0700485 self.ReviseStashSize()
486
Doug Zongkerfc44a512014-08-26 13:10:25 -0700487 # Double-check our work.
488 self.AssertSequenceGood()
Tianjie Xu8a7ed9f2018-01-23 14:06:11 -0800489 self.AssertSha1Good()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700490
491 self.ComputePatches(prefix)
492 self.WriteTransfers(prefix)
493
Tao Bao294651d2018-02-08 23:21:52 -0800494 # Report the imgdiff stats.
495 if common.OPTIONS.verbose and not self.disable_imgdiff:
496 self.imgdiff_stats.Report()
497
Doug Zongkerfc44a512014-08-26 13:10:25 -0700498 def WriteTransfers(self, prefix):
Tianjie Xu37e29302016-06-23 16:10:35 -0700499 def WriteSplitTransfers(out, style, target_blocks):
500 """Limit the size of operand in command 'new' and 'zero' to 1024 blocks.
Tianjie Xubb848c52016-06-21 15:54:09 -0700501
502 This prevents the target size of one command from being too large; and
503 might help to avoid fsync errors on some devices."""
504
Tao Bao3a2e3502016-12-28 09:14:39 -0800505 assert style == "new" or style == "zero"
Tianjie Xu37e29302016-06-23 16:10:35 -0700506 blocks_limit = 1024
Tianjie Xubb848c52016-06-21 15:54:09 -0700507 total = 0
Tianjie Xu37e29302016-06-23 16:10:35 -0700508 while target_blocks:
509 blocks_to_write = target_blocks.first(blocks_limit)
510 out.append("%s %s\n" % (style, blocks_to_write.to_string_raw()))
511 total += blocks_to_write.size()
512 target_blocks = target_blocks.subtract(blocks_to_write)
Tianjie Xubb848c52016-06-21 15:54:09 -0700513 return total
514
Doug Zongkerfc44a512014-08-26 13:10:25 -0700515 out = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700516 total = 0
Doug Zongkerfc44a512014-08-26 13:10:25 -0700517
Tao Bao3a2e3502016-12-28 09:14:39 -0800518 # In BBOTA v3+, it uses the hash of the stashed blocks as the stash slot
519 # id. 'stashes' records the map from 'hash' to the ref count. The stash
520 # will be freed only if the count decrements to zero.
Doug Zongker62338182014-09-08 08:29:55 -0700521 stashes = {}
522 stashed_blocks = 0
523 max_stashed_blocks = 0
524
Doug Zongkerfc44a512014-08-26 13:10:25 -0700525 for xf in self.transfers:
526
Tao Bao8fad03e2017-03-01 14:36:26 -0800527 for _, sr in xf.stash_before:
528 sh = self.src.RangeSha1(sr)
529 if sh in stashes:
530 stashes[sh] += 1
Sami Tolvanendd67a292014-12-09 16:40:34 +0000531 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800532 stashes[sh] = 1
533 stashed_blocks += sr.size()
534 self.touched_src_ranges = self.touched_src_ranges.union(sr)
535 out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
Doug Zongker62338182014-09-08 08:29:55 -0700536
537 if stashed_blocks > max_stashed_blocks:
538 max_stashed_blocks = stashed_blocks
539
Jesse Zhao7b985f62015-03-02 16:53:08 -0800540 free_string = []
caozhiyuan21b37d82015-10-21 15:14:03 +0800541 free_size = 0
Jesse Zhao7b985f62015-03-02 16:53:08 -0800542
Tao Bao8fad03e2017-03-01 14:36:26 -0800543 # <# blocks> <src ranges>
544 # OR
545 # <# blocks> <src ranges> <src locs> <stash refs...>
546 # OR
547 # <# blocks> - <stash refs...>
Doug Zongker62338182014-09-08 08:29:55 -0700548
Tao Bao8fad03e2017-03-01 14:36:26 -0800549 size = xf.src_ranges.size()
Tao Bao508b0872018-02-09 13:44:43 -0800550 src_str_buffer = [str(size)]
Doug Zongker62338182014-09-08 08:29:55 -0700551
Tao Bao8fad03e2017-03-01 14:36:26 -0800552 unstashed_src_ranges = xf.src_ranges
553 mapped_stashes = []
554 for _, sr in xf.use_stash:
555 unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
556 sh = self.src.RangeSha1(sr)
557 sr = xf.src_ranges.map_within(sr)
558 mapped_stashes.append(sr)
559 assert sh in stashes
Tao Bao508b0872018-02-09 13:44:43 -0800560 src_str_buffer.append("%s:%s" % (sh, sr.to_string_raw()))
Tao Bao8fad03e2017-03-01 14:36:26 -0800561 stashes[sh] -= 1
562 if stashes[sh] == 0:
563 free_string.append("free %s\n" % (sh,))
564 free_size += sr.size()
565 stashes.pop(sh)
Doug Zongker62338182014-09-08 08:29:55 -0700566
Tao Bao8fad03e2017-03-01 14:36:26 -0800567 if unstashed_src_ranges:
Tao Bao508b0872018-02-09 13:44:43 -0800568 src_str_buffer.insert(1, unstashed_src_ranges.to_string_raw())
Tao Bao8fad03e2017-03-01 14:36:26 -0800569 if xf.use_stash:
570 mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
Tao Bao508b0872018-02-09 13:44:43 -0800571 src_str_buffer.insert(2, mapped_unstashed.to_string_raw())
Tao Bao8fad03e2017-03-01 14:36:26 -0800572 mapped_stashes.append(mapped_unstashed)
Doug Zongker62338182014-09-08 08:29:55 -0700573 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Tao Bao8fad03e2017-03-01 14:36:26 -0800574 else:
Tao Bao508b0872018-02-09 13:44:43 -0800575 src_str_buffer.insert(1, "-")
Tao Bao8fad03e2017-03-01 14:36:26 -0800576 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Doug Zongker62338182014-09-08 08:29:55 -0700577
Tao Bao508b0872018-02-09 13:44:43 -0800578 src_str = " ".join(src_str_buffer)
Doug Zongker62338182014-09-08 08:29:55 -0700579
Tao Bao8fad03e2017-03-01 14:36:26 -0800580 # version 3+:
Doug Zongker62338182014-09-08 08:29:55 -0700581 # zero <rangeset>
582 # new <rangeset>
583 # erase <rangeset>
Dan Albert8b72aef2015-03-23 19:13:21 -0700584 # bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
585 # imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
586 # move hash <tgt rangeset> <src_str>
Doug Zongkerfc44a512014-08-26 13:10:25 -0700587
588 tgt_size = xf.tgt_ranges.size()
589
590 if xf.style == "new":
591 assert xf.tgt_ranges
Tianjie Xu37e29302016-06-23 16:10:35 -0700592 assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700593 total += tgt_size
594 elif xf.style == "move":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700595 assert xf.tgt_ranges
596 assert xf.src_ranges.size() == tgt_size
597 if xf.src_ranges != xf.tgt_ranges:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100598 # take into account automatic stashing of overlapping blocks
599 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700600 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100601 if temp_stash_usage > max_stashed_blocks:
602 max_stashed_blocks = temp_stash_usage
603
Tao Baod522bdc2016-04-12 15:53:16 -0700604 self.touched_src_ranges = self.touched_src_ranges.union(
605 xf.src_ranges)
606
Tao Bao8fad03e2017-03-01 14:36:26 -0800607 out.append("%s %s %s %s\n" % (
Sami Tolvanendd67a292014-12-09 16:40:34 +0000608 xf.style,
Tao Bao183e56e2017-03-05 17:05:09 -0800609 xf.tgt_sha1,
Dan Albert8b72aef2015-03-23 19:13:21 -0700610 xf.tgt_ranges.to_string_raw(), src_str))
Tao Bao8fad03e2017-03-01 14:36:26 -0800611 total += tgt_size
612 elif xf.style in ("bsdiff", "imgdiff"):
613 assert xf.tgt_ranges
614 assert xf.src_ranges
615 # take into account automatic stashing of overlapping blocks
616 if xf.src_ranges.overlaps(xf.tgt_ranges):
617 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
618 if temp_stash_usage > max_stashed_blocks:
619 max_stashed_blocks = temp_stash_usage
620
621 self.touched_src_ranges = self.touched_src_ranges.union(xf.src_ranges)
622
623 out.append("%s %d %d %s %s %s %s\n" % (
624 xf.style,
625 xf.patch_start, xf.patch_len,
626 xf.src_sha1,
627 xf.tgt_sha1,
628 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700629 total += tgt_size
630 elif xf.style == "zero":
631 assert xf.tgt_ranges
632 to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
Tianjie Xu37e29302016-06-23 16:10:35 -0700633 assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size()
Tianjie Xubb848c52016-06-21 15:54:09 -0700634 total += to_zero.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700635 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700636 raise ValueError("unknown transfer style '%s'\n" % xf.style)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700637
Sami Tolvanendd67a292014-12-09 16:40:34 +0000638 if free_string:
639 out.append("".join(free_string))
caozhiyuan21b37d82015-10-21 15:14:03 +0800640 stashed_blocks -= free_size
Sami Tolvanendd67a292014-12-09 16:40:34 +0000641
Tao Bao8fad03e2017-03-01 14:36:26 -0800642 if common.OPTIONS.cache_size is not None:
Tao Bao8dcf7382015-05-21 14:09:49 -0700643 # Sanity check: abort if we're going to need more stash space than
644 # the allowed size (cache_size * threshold). There are two purposes
645 # of having a threshold here. a) Part of the cache may have been
646 # occupied by some recovery logs. b) It will buy us some time to deal
647 # with the oversize issue.
648 cache_size = common.OPTIONS.cache_size
649 stash_threshold = common.OPTIONS.stash_threshold
650 max_allowed = cache_size * stash_threshold
Tao Baoe8c68a02017-02-26 10:48:11 -0800651 assert max_stashed_blocks * self.tgt.blocksize <= max_allowed, \
Tao Bao8dcf7382015-05-21 14:09:49 -0700652 'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
653 max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
654 self.tgt.blocksize, max_allowed, cache_size,
655 stash_threshold)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700656
Tao Bao8fad03e2017-03-01 14:36:26 -0800657 self.touched_src_sha1 = self.src.RangeSha1(self.touched_src_ranges)
Tao Baod522bdc2016-04-12 15:53:16 -0700658
Tao Baoe9b61912015-07-09 17:37:49 -0700659 # Zero out extended blocks as a workaround for bug 20881595.
660 if self.tgt.extended:
Tianjie Xu37e29302016-06-23 16:10:35 -0700661 assert (WriteSplitTransfers(out, "zero", self.tgt.extended) ==
Tianjie Xubb848c52016-06-21 15:54:09 -0700662 self.tgt.extended.size())
Tao Baob32d56e2015-09-09 11:55:01 -0700663 total += self.tgt.extended.size()
Tao Baoe9b61912015-07-09 17:37:49 -0700664
665 # We erase all the blocks on the partition that a) don't contain useful
Tao Bao66f1fa62016-05-03 10:02:01 -0700666 # data in the new image; b) will not be touched by dm-verity. Out of those
667 # blocks, we erase the ones that won't be used in this update at the
668 # beginning of an update. The rest would be erased at the end. This is to
669 # work around the eMMC issue observed on some devices, which may otherwise
670 # get starving for clean blocks and thus fail the update. (b/28347095)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700671 all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
Tao Baoe9b61912015-07-09 17:37:49 -0700672 all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
673 new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
Tao Bao66f1fa62016-05-03 10:02:01 -0700674
675 erase_first = new_dontcare.subtract(self.touched_src_ranges)
676 if erase_first:
677 out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))
678
679 erase_last = new_dontcare.subtract(erase_first)
680 if erase_last:
681 out.append("erase %s\n" % (erase_last.to_string_raw(),))
Doug Zongkere985f6f2014-09-09 12:38:47 -0700682
683 out.insert(0, "%d\n" % (self.version,)) # format version number
Tao Baob32d56e2015-09-09 11:55:01 -0700684 out.insert(1, "%d\n" % (total,))
Tao Bao8fad03e2017-03-01 14:36:26 -0800685 # v3+: the number of stash slots is unused.
686 out.insert(2, "0\n")
687 out.insert(3, str(max_stashed_blocks) + "\n")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700688
689 with open(prefix + ".transfer.list", "wb") as f:
690 for i in out:
691 f.write(i)
692
Tao Bao8fad03e2017-03-01 14:36:26 -0800693 self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize
694 OPTIONS = common.OPTIONS
695 if OPTIONS.cache_size is not None:
696 max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
697 print("max stashed blocks: %d (%d bytes), "
698 "limit: %d bytes (%.2f%%)\n" % (
Tao Bao508b0872018-02-09 13:44:43 -0800699 max_stashed_blocks, self._max_stashed_size, max_allowed,
700 self._max_stashed_size * 100.0 / max_allowed))
Tao Bao8fad03e2017-03-01 14:36:26 -0800701 else:
702 print("max stashed blocks: %d (%d bytes), limit: <unknown>\n" % (
Tao Bao508b0872018-02-09 13:44:43 -0800703 max_stashed_blocks, self._max_stashed_size))
Doug Zongker62338182014-09-08 08:29:55 -0700704
Tao Bao82c47982015-08-17 09:45:13 -0700705 def ReviseStashSize(self):
706 print("Revising stash size...")
Tao Baoe27acfd2016-12-16 11:13:55 -0800707 stash_map = {}
Tao Bao82c47982015-08-17 09:45:13 -0700708
709 # Create the map between a stash and its def/use points. For example, for a
Tao Bao3c5a16d2017-02-13 11:42:50 -0800710 # given stash of (raw_id, sr), stash_map[raw_id] = (sr, def_cmd, use_cmd).
Tao Bao82c47982015-08-17 09:45:13 -0700711 for xf in self.transfers:
712 # Command xf defines (stores) all the stashes in stash_before.
Tao Bao3a2e3502016-12-28 09:14:39 -0800713 for stash_raw_id, sr in xf.stash_before:
714 stash_map[stash_raw_id] = (sr, xf)
Tao Bao82c47982015-08-17 09:45:13 -0700715
716 # Record all the stashes command xf uses.
Tao Bao3a2e3502016-12-28 09:14:39 -0800717 for stash_raw_id, _ in xf.use_stash:
718 stash_map[stash_raw_id] += (xf,)
Tao Bao82c47982015-08-17 09:45:13 -0700719
720 # Compute the maximum blocks available for stash based on /cache size and
721 # the threshold.
722 cache_size = common.OPTIONS.cache_size
723 stash_threshold = common.OPTIONS.stash_threshold
724 max_allowed = cache_size * stash_threshold / self.tgt.blocksize
725
Tao Bao3a2e3502016-12-28 09:14:39 -0800726 # See the comments for 'stashes' in WriteTransfers().
Tao Baoe27acfd2016-12-16 11:13:55 -0800727 stashes = {}
Tao Bao82c47982015-08-17 09:45:13 -0700728 stashed_blocks = 0
Tao Bao9a5caf22015-08-25 15:10:10 -0700729 new_blocks = 0
Tao Bao82c47982015-08-17 09:45:13 -0700730
731 # Now go through all the commands. Compute the required stash size on the
732 # fly. If a command requires excess stash than available, it deletes the
733 # stash by replacing the command that uses the stash with a "new" command
734 # instead.
735 for xf in self.transfers:
736 replaced_cmds = []
737
738 # xf.stash_before generates explicit stash commands.
Tao Bao3a2e3502016-12-28 09:14:39 -0800739 for stash_raw_id, sr in xf.stash_before:
Tao Baoe27acfd2016-12-16 11:13:55 -0800740 # Check the post-command stashed_blocks.
741 stashed_blocks_after = stashed_blocks
Tao Bao8fad03e2017-03-01 14:36:26 -0800742 sh = self.src.RangeSha1(sr)
743 if sh not in stashes:
Tao Baoe27acfd2016-12-16 11:13:55 -0800744 stashed_blocks_after += sr.size()
Tao Baoe27acfd2016-12-16 11:13:55 -0800745
746 if stashed_blocks_after > max_allowed:
Tao Bao82c47982015-08-17 09:45:13 -0700747 # We cannot stash this one for a later command. Find out the command
748 # that will use this stash and replace the command with "new".
Tao Bao3a2e3502016-12-28 09:14:39 -0800749 use_cmd = stash_map[stash_raw_id][2]
Tao Bao82c47982015-08-17 09:45:13 -0700750 replaced_cmds.append(use_cmd)
Tao Bao9a5caf22015-08-25 15:10:10 -0700751 print("%10d %9s %s" % (sr.size(), "explicit", use_cmd))
Tao Bao82c47982015-08-17 09:45:13 -0700752 else:
Tao Bao3c5a16d2017-02-13 11:42:50 -0800753 # Update the stashes map.
Tao Bao8fad03e2017-03-01 14:36:26 -0800754 if sh in stashes:
755 stashes[sh] += 1
Tao Bao3c5a16d2017-02-13 11:42:50 -0800756 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800757 stashes[sh] = 1
Tao Baoe27acfd2016-12-16 11:13:55 -0800758 stashed_blocks = stashed_blocks_after
Tao Bao82c47982015-08-17 09:45:13 -0700759
760 # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
761 # ComputePatches(), they both have the style of "diff".
Tao Bao8fad03e2017-03-01 14:36:26 -0800762 if xf.style == "diff":
Tao Bao82c47982015-08-17 09:45:13 -0700763 assert xf.tgt_ranges and xf.src_ranges
764 if xf.src_ranges.overlaps(xf.tgt_ranges):
765 if stashed_blocks + xf.src_ranges.size() > max_allowed:
766 replaced_cmds.append(xf)
Tao Bao9a5caf22015-08-25 15:10:10 -0700767 print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf))
Tao Bao82c47982015-08-17 09:45:13 -0700768
769 # Replace the commands in replaced_cmds with "new"s.
770 for cmd in replaced_cmds:
771 # It no longer uses any commands in "use_stash". Remove the def points
772 # for all those stashes.
Tao Bao3a2e3502016-12-28 09:14:39 -0800773 for stash_raw_id, sr in cmd.use_stash:
774 def_cmd = stash_map[stash_raw_id][1]
775 assert (stash_raw_id, sr) in def_cmd.stash_before
776 def_cmd.stash_before.remove((stash_raw_id, sr))
Tao Bao82c47982015-08-17 09:45:13 -0700777
Tianjie Xuebe39a02016-01-14 14:12:26 -0800778 # Add up blocks that violates space limit and print total number to
779 # screen later.
780 new_blocks += cmd.tgt_ranges.size()
Tao Bao82c47982015-08-17 09:45:13 -0700781 cmd.ConvertToNew()
782
Tao Bao3a2e3502016-12-28 09:14:39 -0800783 # xf.use_stash may generate free commands.
Tao Bao8fad03e2017-03-01 14:36:26 -0800784 for _, sr in xf.use_stash:
785 sh = self.src.RangeSha1(sr)
786 assert sh in stashes
787 stashes[sh] -= 1
788 if stashes[sh] == 0:
Tao Baoe27acfd2016-12-16 11:13:55 -0800789 stashed_blocks -= sr.size()
Tao Bao8fad03e2017-03-01 14:36:26 -0800790 stashes.pop(sh)
Tao Baoe27acfd2016-12-16 11:13:55 -0800791
Tianjie Xuebe39a02016-01-14 14:12:26 -0800792 num_of_bytes = new_blocks * self.tgt.blocksize
793 print(" Total %d blocks (%d bytes) are packed as new blocks due to "
794 "insufficient cache size." % (new_blocks, num_of_bytes))
Tao Bao304ee272016-12-19 11:01:38 -0800795 return new_blocks
Tao Bao9a5caf22015-08-25 15:10:10 -0700796
Doug Zongkerfc44a512014-08-26 13:10:25 -0700797 def ComputePatches(self, prefix):
798 print("Reticulating splines...")
Tao Bao183e56e2017-03-05 17:05:09 -0800799 diff_queue = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700800 patch_num = 0
801 with open(prefix + ".new.dat", "wb") as new_f:
Tao Bao183e56e2017-03-05 17:05:09 -0800802 for index, xf in enumerate(self.transfers):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700803 if xf.style == "zero":
Tao Bao08c85832016-09-19 22:26:30 -0700804 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
805 print("%10d %10d (%6.2f%%) %7s %s %s" % (
806 tgt_size, tgt_size, 100.0, xf.style, xf.tgt_name,
807 str(xf.tgt_ranges)))
808
Doug Zongkerfc44a512014-08-26 13:10:25 -0700809 elif xf.style == "new":
Tao Bao183e56e2017-03-05 17:05:09 -0800810 self.tgt.WriteRangeDataToFd(xf.tgt_ranges, new_f)
Tao Bao08c85832016-09-19 22:26:30 -0700811 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
812 print("%10d %10d (%6.2f%%) %7s %s %s" % (
813 tgt_size, tgt_size, 100.0, xf.style,
814 xf.tgt_name, str(xf.tgt_ranges)))
815
Doug Zongkerfc44a512014-08-26 13:10:25 -0700816 elif xf.style == "diff":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700817 # We can't compare src and tgt directly because they may have
818 # the same content but be broken up into blocks differently, eg:
819 #
820 # ["he", "llo"] vs ["h", "ello"]
821 #
822 # We want those to compare equal, ideally without having to
823 # actually concatenate the strings (these may be tens of
824 # megabytes).
Tao Bao183e56e2017-03-05 17:05:09 -0800825 if xf.src_sha1 == xf.tgt_sha1:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700826 # These are identical; we don't need to generate a patch,
827 # just issue copy commands on the device.
828 xf.style = "move"
Tianjie Xu25366072017-09-08 17:19:02 -0700829 xf.patch = None
Tao Bao183e56e2017-03-05 17:05:09 -0800830 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao08c85832016-09-19 22:26:30 -0700831 if xf.src_ranges != xf.tgt_ranges:
832 print("%10d %10d (%6.2f%%) %7s %s %s (from %s)" % (
833 tgt_size, tgt_size, 100.0, xf.style,
834 xf.tgt_name if xf.tgt_name == xf.src_name else (
835 xf.tgt_name + " (from " + xf.src_name + ")"),
836 str(xf.tgt_ranges), str(xf.src_ranges)))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700837 else:
Tianjie Xu25366072017-09-08 17:19:02 -0700838 if xf.patch:
839 # We have already generated the patch with imgdiff. Check if the
840 # transfer is intact.
841 assert not self.disable_imgdiff
842 imgdiff = True
Tao Baocb73aed2018-01-31 17:32:40 -0800843 if (xf.src_ranges.extra.get('trimmed') or
844 xf.tgt_ranges.extra.get('trimmed')):
Tianjie Xu25366072017-09-08 17:19:02 -0700845 imgdiff = False
846 xf.patch = None
847 else:
Tao Baocb73aed2018-01-31 17:32:40 -0800848 imgdiff = self.CanUseImgdiff(
849 xf.tgt_name, xf.tgt_ranges, xf.src_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700850 xf.style = "imgdiff" if imgdiff else "bsdiff"
Tao Bao183e56e2017-03-05 17:05:09 -0800851 diff_queue.append((index, imgdiff, patch_num))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700852 patch_num += 1
853
854 else:
855 assert False, "unknown style " + xf.style
856
Tao Bao183e56e2017-03-05 17:05:09 -0800857 if diff_queue:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700858 if self.threads > 1:
859 print("Computing patches (using %d threads)..." % (self.threads,))
860 else:
861 print("Computing patches...")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700862
Tao Bao183e56e2017-03-05 17:05:09 -0800863 diff_total = len(diff_queue)
864 patches = [None] * diff_total
Tianjie Xub59c17f2016-10-28 17:55:53 -0700865 error_messages = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700866
Tao Bao183e56e2017-03-05 17:05:09 -0800867 # Using multiprocessing doesn't give additional benefits, due to the
868 # pattern of the code. The diffing work is done by subprocess.call, which
869 # already runs in a separate process (not affected much by the GIL -
870 # Global Interpreter Lock). Using multiprocess also requires either a)
871 # writing the diff input files in the main process before forking, or b)
872 # reopening the image file (SparseImage) in the worker processes. Doing
873 # neither of them further improves the performance.
Doug Zongkerfc44a512014-08-26 13:10:25 -0700874 lock = threading.Lock()
875 def diff_worker():
876 while True:
877 with lock:
Tao Bao183e56e2017-03-05 17:05:09 -0800878 if not diff_queue:
Dan Albert8b72aef2015-03-23 19:13:21 -0700879 return
Tao Bao183e56e2017-03-05 17:05:09 -0800880 xf_index, imgdiff, patch_index = diff_queue.pop()
Tao Bao97395142018-02-12 12:08:05 -0800881 xf = self.transfers[xf_index]
Tao Bao183e56e2017-03-05 17:05:09 -0800882
Tao Bao97395142018-02-12 12:08:05 -0800883 if sys.stdout.isatty():
884 diff_left = len(diff_queue)
885 progress = (diff_total - diff_left) * 100 / diff_total
886 # '\033[K' is to clear to EOL.
887 print(' [%3d%%] %s\033[K' % (progress, xf.tgt_name), end='\r')
888 sys.stdout.flush()
889
Tianjie Xu25366072017-09-08 17:19:02 -0700890 patch = xf.patch
891 if not patch:
892 src_ranges = xf.src_ranges
893 tgt_ranges = xf.tgt_ranges
Tao Bao183e56e2017-03-05 17:05:09 -0800894
Tianjie Xudf1166e2018-01-27 17:35:41 -0800895 src_file = common.MakeTempFile(prefix="src-")
896 with open(src_file, "wb") as fd:
897 self.src.WriteRangeDataToFd(src_ranges, fd)
Tianjie Xu25366072017-09-08 17:19:02 -0700898
Tianjie Xudf1166e2018-01-27 17:35:41 -0800899 tgt_file = common.MakeTempFile(prefix="tgt-")
900 with open(tgt_file, "wb") as fd:
901 self.tgt.WriteRangeDataToFd(tgt_ranges, fd)
Tianjie Xu25366072017-09-08 17:19:02 -0700902
903 message = []
904 try:
905 patch = compute_patch(src_file, tgt_file, imgdiff)
906 except ValueError as e:
907 message.append(
908 "Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (
Tao Bao508b0872018-02-09 13:44:43 -0800909 "imgdiff" if imgdiff else "bsdiff",
910 xf.tgt_name if xf.tgt_name == xf.src_name else
Tianjie Xu25366072017-09-08 17:19:02 -0700911 xf.tgt_name + " (from " + xf.src_name + ")",
Tao Bao508b0872018-02-09 13:44:43 -0800912 xf.tgt_ranges, xf.src_ranges, e.message))
Tianjie Xu25366072017-09-08 17:19:02 -0700913 if message:
914 with lock:
915 error_messages.extend(message)
Tao Bao183e56e2017-03-05 17:05:09 -0800916
917 with lock:
918 patches[patch_index] = (xf_index, patch)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700919
920 threads = [threading.Thread(target=diff_worker)
Dan Albert8b72aef2015-03-23 19:13:21 -0700921 for _ in range(self.threads)]
Doug Zongkerfc44a512014-08-26 13:10:25 -0700922 for th in threads:
923 th.start()
924 while threads:
925 threads.pop().join()
Tao Bao183e56e2017-03-05 17:05:09 -0800926
927 if sys.stdout.isatty():
928 print('\n')
Tianjie Xub59c17f2016-10-28 17:55:53 -0700929
930 if error_messages:
Tao Baob937ead2017-10-19 16:51:53 -0700931 print('ERROR:')
Tianjie Xub59c17f2016-10-28 17:55:53 -0700932 print('\n'.join(error_messages))
Tao Baob937ead2017-10-19 16:51:53 -0700933 print('\n\n\n')
Tianjie Xub59c17f2016-10-28 17:55:53 -0700934 sys.exit(1)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700935 else:
936 patches = []
937
Tao Bao183e56e2017-03-05 17:05:09 -0800938 offset = 0
939 with open(prefix + ".patch.dat", "wb") as patch_fd:
940 for index, patch in patches:
941 xf = self.transfers[index]
Doug Zongkerfc44a512014-08-26 13:10:25 -0700942 xf.patch_len = len(patch)
Tao Bao183e56e2017-03-05 17:05:09 -0800943 xf.patch_start = offset
944 offset += xf.patch_len
945 patch_fd.write(patch)
946
947 if common.OPTIONS.verbose:
948 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
949 print("%10d %10d (%6.2f%%) %7s %s %s %s" % (
Tao Bao508b0872018-02-09 13:44:43 -0800950 xf.patch_len, tgt_size, xf.patch_len * 100.0 / tgt_size,
951 xf.style,
952 xf.tgt_name if xf.tgt_name == xf.src_name else (
953 xf.tgt_name + " (from " + xf.src_name + ")"),
954 xf.tgt_ranges, xf.src_ranges))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700955
Tianjie Xu8a7ed9f2018-01-23 14:06:11 -0800956 def AssertSha1Good(self):
957 """Check the SHA-1 of the src & tgt blocks in the transfer list.
958
959 Double check the SHA-1 value to avoid the issue in b/71908713, where
960 SparseImage.RangeSha1() messed up with the hash calculation in multi-thread
961 environment. That specific problem has been fixed by protecting the
962 underlying generator function 'SparseImage._GetRangeData()' with lock.
963 """
964 for xf in self.transfers:
965 tgt_sha1 = self.tgt.RangeSha1(xf.tgt_ranges)
966 assert xf.tgt_sha1 == tgt_sha1
967 if xf.style == "diff":
968 src_sha1 = self.src.RangeSha1(xf.src_ranges)
969 assert xf.src_sha1 == src_sha1
970
Doug Zongkerfc44a512014-08-26 13:10:25 -0700971 def AssertSequenceGood(self):
972 # Simulate the sequences of transfers we will output, and check that:
973 # - we never read a block after writing it, and
974 # - we write every block we care about exactly once.
975
976 # Start with no blocks having been touched yet.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800977 touched = array.array("B", "\0" * self.tgt.total_blocks)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700978
979 # Imagine processing the transfers in order.
980 for xf in self.transfers:
981 # Check that the input blocks for this transfer haven't yet been touched.
Doug Zongker62338182014-09-08 08:29:55 -0700982
983 x = xf.src_ranges
Tao Bao8fad03e2017-03-01 14:36:26 -0800984 for _, sr in xf.use_stash:
985 x = x.subtract(sr)
Doug Zongker62338182014-09-08 08:29:55 -0700986
Doug Zongker6ab2a502016-02-09 08:28:09 -0800987 for s, e in x:
Tao Baoff75c232016-03-04 15:23:34 -0800988 # Source image could be larger. Don't check the blocks that are in the
989 # source image only. Since they are not in 'touched', and won't ever
990 # be touched.
991 for i in range(s, min(e, self.tgt.total_blocks)):
Doug Zongker6ab2a502016-02-09 08:28:09 -0800992 assert touched[i] == 0
993
994 # Check that the output blocks for this transfer haven't yet
995 # been touched, and touch all the blocks written by this
996 # transfer.
997 for s, e in xf.tgt_ranges:
998 for i in range(s, e):
999 assert touched[i] == 0
1000 touched[i] = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -07001001
1002 # Check that we've written every target block.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001003 for s, e in self.tgt.care_map:
1004 for i in range(s, e):
1005 assert touched[i] == 1
Doug Zongkerfc44a512014-08-26 13:10:25 -07001006
Doug Zongker62338182014-09-08 08:29:55 -07001007 def ImproveVertexSequence(self):
1008 print("Improving vertex order...")
1009
1010 # At this point our digraph is acyclic; we reversed any edges that
1011 # were backwards in the heuristically-generated sequence. The
1012 # previously-generated order is still acceptable, but we hope to
1013 # find a better order that needs less memory for stashed data.
1014 # Now we do a topological sort to generate a new vertex order,
1015 # using a greedy algorithm to choose which vertex goes next
1016 # whenever we have a choice.
1017
1018 # Make a copy of the edge set; this copy will get destroyed by the
1019 # algorithm.
1020 for xf in self.transfers:
1021 xf.incoming = xf.goes_after.copy()
1022 xf.outgoing = xf.goes_before.copy()
1023
1024 L = [] # the new vertex order
1025
1026 # S is the set of sources in the remaining graph; we always choose
1027 # the one that leaves the least amount of stashed data after it's
1028 # executed.
1029 S = [(u.NetStashChange(), u.order, u) for u in self.transfers
1030 if not u.incoming]
1031 heapq.heapify(S)
1032
1033 while S:
1034 _, _, xf = heapq.heappop(S)
1035 L.append(xf)
1036 for u in xf.outgoing:
1037 del u.incoming[xf]
1038 if not u.incoming:
1039 heapq.heappush(S, (u.NetStashChange(), u.order, u))
1040
1041 # if this fails then our graph had a cycle.
1042 assert len(L) == len(self.transfers)
1043
1044 self.transfers = L
1045 for i, xf in enumerate(L):
1046 xf.order = i
1047
Doug Zongkerfc44a512014-08-26 13:10:25 -07001048 def RemoveBackwardEdges(self):
1049 print("Removing backward edges...")
1050 in_order = 0
1051 out_of_order = 0
1052 lost_source = 0
1053
1054 for xf in self.transfers:
Doug Zongkerfc44a512014-08-26 13:10:25 -07001055 lost = 0
1056 size = xf.src_ranges.size()
1057 for u in xf.goes_before:
1058 # xf should go before u
1059 if xf.order < u.order:
1060 # it does, hurray!
Doug Zongker62338182014-09-08 08:29:55 -07001061 in_order += 1
Doug Zongkerfc44a512014-08-26 13:10:25 -07001062 else:
1063 # it doesn't, boo. trim the blocks that u writes from xf's
1064 # source, so that xf can go after u.
Doug Zongker62338182014-09-08 08:29:55 -07001065 out_of_order += 1
Doug Zongkerfc44a512014-08-26 13:10:25 -07001066 assert xf.src_ranges.overlaps(u.tgt_ranges)
1067 xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges)
Tao Baocb73aed2018-01-31 17:32:40 -08001068 xf.src_ranges.extra['trimmed'] = True
Doug Zongkerfc44a512014-08-26 13:10:25 -07001069
1070 if xf.style == "diff" and not xf.src_ranges:
1071 # nothing left to diff from; treat as new data
1072 xf.style = "new"
1073
1074 lost = size - xf.src_ranges.size()
1075 lost_source += lost
Doug Zongkerfc44a512014-08-26 13:10:25 -07001076
1077 print((" %d/%d dependencies (%.2f%%) were violated; "
1078 "%d source blocks removed.") %
1079 (out_of_order, in_order + out_of_order,
1080 (out_of_order * 100.0 / (in_order + out_of_order))
1081 if (in_order + out_of_order) else 0.0,
1082 lost_source))
1083
Doug Zongker62338182014-09-08 08:29:55 -07001084 def ReverseBackwardEdges(self):
Tao Bao3a2e3502016-12-28 09:14:39 -08001085 """Reverse unsatisfying edges and compute pairs of stashed blocks.
1086
1087 For each transfer, make sure it properly stashes the blocks it touches and
1088 will be used by later transfers. It uses pairs of (stash_raw_id, range) to
1089 record the blocks to be stashed. 'stash_raw_id' is an id that uniquely
1090 identifies each pair. Note that for the same range (e.g. RangeSet("1-5")),
1091 it is possible to have multiple pairs with different 'stash_raw_id's. Each
1092 'stash_raw_id' will be consumed by one transfer. In BBOTA v3+, identical
1093 blocks will be written to the same stash slot in WriteTransfers().
1094 """
1095
Doug Zongker62338182014-09-08 08:29:55 -07001096 print("Reversing backward edges...")
1097 in_order = 0
1098 out_of_order = 0
Tao Bao3a2e3502016-12-28 09:14:39 -08001099 stash_raw_id = 0
Doug Zongker62338182014-09-08 08:29:55 -07001100 stash_size = 0
1101
1102 for xf in self.transfers:
Doug Zongker62338182014-09-08 08:29:55 -07001103 for u in xf.goes_before.copy():
1104 # xf should go before u
1105 if xf.order < u.order:
1106 # it does, hurray!
1107 in_order += 1
1108 else:
1109 # it doesn't, boo. modify u to stash the blocks that it
1110 # writes that xf wants to read, and then require u to go
1111 # before xf.
1112 out_of_order += 1
1113
1114 overlap = xf.src_ranges.intersect(u.tgt_ranges)
1115 assert overlap
1116
Tao Bao3a2e3502016-12-28 09:14:39 -08001117 u.stash_before.append((stash_raw_id, overlap))
1118 xf.use_stash.append((stash_raw_id, overlap))
1119 stash_raw_id += 1
Doug Zongker62338182014-09-08 08:29:55 -07001120 stash_size += overlap.size()
1121
1122 # reverse the edge direction; now xf must go after u
1123 del xf.goes_before[u]
1124 del u.goes_after[xf]
1125 xf.goes_after[u] = None # value doesn't matter
1126 u.goes_before[xf] = None
1127
1128 print((" %d/%d dependencies (%.2f%%) were violated; "
1129 "%d source blocks stashed.") %
1130 (out_of_order, in_order + out_of_order,
1131 (out_of_order * 100.0 / (in_order + out_of_order))
1132 if (in_order + out_of_order) else 0.0,
1133 stash_size))
1134
Doug Zongkerfc44a512014-08-26 13:10:25 -07001135 def FindVertexSequence(self):
1136 print("Finding vertex sequence...")
1137
1138 # This is based on "A Fast & Effective Heuristic for the Feedback
1139 # Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
1140 # it as starting with the digraph G and moving all the vertices to
1141 # be on a horizontal line in some order, trying to minimize the
1142 # number of edges that end up pointing to the left. Left-pointing
1143 # edges will get removed to turn the digraph into a DAG. In this
1144 # case each edge has a weight which is the number of source blocks
1145 # we'll lose if that edge is removed; we try to minimize the total
1146 # weight rather than just the number of edges.
1147
1148 # Make a copy of the edge set; this copy will get destroyed by the
1149 # algorithm.
1150 for xf in self.transfers:
1151 xf.incoming = xf.goes_after.copy()
1152 xf.outgoing = xf.goes_before.copy()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001153 xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())
Doug Zongkerfc44a512014-08-26 13:10:25 -07001154
1155 # We use an OrderedDict instead of just a set so that the output
1156 # is repeatable; otherwise it would depend on the hash values of
1157 # the transfer objects.
1158 G = OrderedDict()
1159 for xf in self.transfers:
1160 G[xf] = None
1161 s1 = deque() # the left side of the sequence, built from left to right
1162 s2 = deque() # the right side of the sequence, built from right to left
1163
Doug Zongker6ab2a502016-02-09 08:28:09 -08001164 heap = []
1165 for xf in self.transfers:
1166 xf.heap_item = HeapItem(xf)
1167 heap.append(xf.heap_item)
1168 heapq.heapify(heap)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001169
Tao Bao33482282016-10-24 16:49:08 -07001170 # Use OrderedDict() instead of set() to preserve the insertion order. Need
1171 # to use 'sinks[key] = None' to add key into the set. sinks will look like
1172 # { key1: None, key2: None, ... }.
1173 sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing)
1174 sources = OrderedDict.fromkeys(u for u in G if not u.incoming)
Doug Zongker6ab2a502016-02-09 08:28:09 -08001175
1176 def adjust_score(iu, delta):
1177 iu.score += delta
1178 iu.heap_item.clear()
1179 iu.heap_item = HeapItem(iu)
1180 heapq.heappush(heap, iu.heap_item)
1181
1182 while G:
Doug Zongkerfc44a512014-08-26 13:10:25 -07001183 # Put all sinks at the end of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001184 while sinks:
Tao Bao33482282016-10-24 16:49:08 -07001185 new_sinks = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001186 for u in sinks:
Tao Bao508b0872018-02-09 13:44:43 -08001187 if u not in G:
1188 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001189 s2.appendleft(u)
1190 del G[u]
1191 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001192 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001193 if not iu.outgoing:
1194 new_sinks[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001195 sinks = new_sinks
Doug Zongkerfc44a512014-08-26 13:10:25 -07001196
1197 # Put all the sources at the beginning of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001198 while sources:
Tao Bao33482282016-10-24 16:49:08 -07001199 new_sources = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001200 for u in sources:
Tao Bao508b0872018-02-09 13:44:43 -08001201 if u not in G:
1202 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001203 s1.append(u)
1204 del G[u]
1205 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001206 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001207 if not iu.incoming:
1208 new_sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001209 sources = new_sources
Doug Zongkerfc44a512014-08-26 13:10:25 -07001210
Tao Bao508b0872018-02-09 13:44:43 -08001211 if not G:
1212 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001213
1214 # Find the "best" vertex to put next. "Best" is the one that
1215 # maximizes the net difference in source blocks saved we get by
1216 # pretending it's a source rather than a sink.
1217
Doug Zongker6ab2a502016-02-09 08:28:09 -08001218 while True:
1219 u = heapq.heappop(heap)
1220 if u and u.item in G:
1221 u = u.item
1222 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001223
Doug Zongkerfc44a512014-08-26 13:10:25 -07001224 s1.append(u)
1225 del G[u]
1226 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001227 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001228 if not iu.incoming:
1229 sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001230
Doug Zongkerfc44a512014-08-26 13:10:25 -07001231 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001232 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001233 if not iu.outgoing:
1234 sinks[iu] = None
Doug Zongkerfc44a512014-08-26 13:10:25 -07001235
1236 # Now record the sequence in the 'order' field of each transfer,
1237 # and by rearranging self.transfers to be in the chosen sequence.
1238
1239 new_transfers = []
1240 for x in itertools.chain(s1, s2):
1241 x.order = len(new_transfers)
1242 new_transfers.append(x)
1243 del x.incoming
1244 del x.outgoing
1245
1246 self.transfers = new_transfers
1247
1248 def GenerateDigraph(self):
1249 print("Generating digraph...")
Doug Zongker6ab2a502016-02-09 08:28:09 -08001250
1251 # Each item of source_ranges will be:
1252 # - None, if that block is not used as a source,
Tao Bao33482282016-10-24 16:49:08 -07001253 # - an ordered set of transfers.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001254 source_ranges = []
1255 for b in self.transfers:
1256 for s, e in b.src_ranges:
1257 if e > len(source_ranges):
1258 source_ranges.extend([None] * (e-len(source_ranges)))
1259 for i in range(s, e):
1260 if source_ranges[i] is None:
Tao Bao33482282016-10-24 16:49:08 -07001261 source_ranges[i] = OrderedDict.fromkeys([b])
Doug Zongker6ab2a502016-02-09 08:28:09 -08001262 else:
Tao Bao33482282016-10-24 16:49:08 -07001263 source_ranges[i][b] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001264
Doug Zongkerfc44a512014-08-26 13:10:25 -07001265 for a in self.transfers:
Tao Bao33482282016-10-24 16:49:08 -07001266 intersections = OrderedDict()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001267 for s, e in a.tgt_ranges:
1268 for i in range(s, e):
Tao Bao508b0872018-02-09 13:44:43 -08001269 if i >= len(source_ranges):
1270 break
Tao Bao33482282016-10-24 16:49:08 -07001271 # Add all the Transfers in source_ranges[i] to the (ordered) set.
1272 if source_ranges[i] is not None:
1273 for j in source_ranges[i]:
1274 intersections[j] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001275
1276 for b in intersections:
Tao Bao508b0872018-02-09 13:44:43 -08001277 if a is b:
1278 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001279
1280 # If the blocks written by A are read by B, then B needs to go before A.
1281 i = a.tgt_ranges.intersect(b.src_ranges)
1282 if i:
Doug Zongkerab7ca1d2014-08-26 10:40:28 -07001283 if b.src_name == "__ZERO":
1284 # the cost of removing source blocks for the __ZERO domain
1285 # is (nearly) zero.
1286 size = 0
1287 else:
1288 size = i.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001289 b.goes_before[a] = size
1290 a.goes_after[b] = size
1291
1292 def FindTransfers(self):
Tao Bao9a5caf22015-08-25 15:10:10 -07001293 """Parse the file_map to generate all the transfers."""
1294
Tianjie Xu41390d42017-11-22 11:35:18 -08001295 def AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
1296 src_ranges, style, by_id):
1297 """Add one or multiple Transfer()s by splitting large files.
1298
1299 For BBOTA v3, we need to stash source blocks for resumable feature.
1300 However, with the growth of file size and the shrink of the cache
1301 partition source blocks are too large to be stashed. If a file occupies
1302 too many blocks, we split it into smaller pieces by getting multiple
1303 Transfer()s.
1304
1305 The downside is that after splitting, we may increase the package size
1306 since the split pieces don't align well. According to our experiments,
1307 1/8 of the cache size as the per-piece limit appears to be optimal.
1308 Compared to the fixed 1024-block limit, it reduces the overall package
1309 size by 30% for volantis, and 20% for angler and bullhead."""
1310
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001311 pieces = 0
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001312 while (tgt_ranges.size() > max_blocks_per_transfer and
1313 src_ranges.size() > max_blocks_per_transfer):
Tao Bao9a5caf22015-08-25 15:10:10 -07001314 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1315 src_split_name = "%s-%d" % (src_name, pieces)
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001316 tgt_first = tgt_ranges.first(max_blocks_per_transfer)
1317 src_first = src_ranges.first(max_blocks_per_transfer)
1318
Tao Bao183e56e2017-03-05 17:05:09 -08001319 Transfer(tgt_split_name, src_split_name, tgt_first, src_first,
1320 self.tgt.RangeSha1(tgt_first), self.src.RangeSha1(src_first),
1321 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001322
1323 tgt_ranges = tgt_ranges.subtract(tgt_first)
1324 src_ranges = src_ranges.subtract(src_first)
1325 pieces += 1
1326
1327 # Handle remaining blocks.
1328 if tgt_ranges.size() or src_ranges.size():
1329 # Must be both non-empty.
1330 assert tgt_ranges.size() and src_ranges.size()
1331 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1332 src_split_name = "%s-%d" % (src_name, pieces)
Tao Bao183e56e2017-03-05 17:05:09 -08001333 Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges,
1334 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1335 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001336
Tianjie Xu41390d42017-11-22 11:35:18 -08001337 def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges, style,
1338 by_id):
1339 """Find all the zip files and split the others with a fixed chunk size.
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001340
Tianjie Xu41390d42017-11-22 11:35:18 -08001341 This function will construct a list of zip archives, which will later be
1342 split by imgdiff to reduce the final patch size. For the other files,
1343 we will plainly split them based on a fixed chunk size with the potential
1344 patch size penalty.
1345 """
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001346
1347 assert style == "diff"
1348
1349 # Change nothing for small files.
1350 if (tgt_ranges.size() <= max_blocks_per_transfer and
1351 src_ranges.size() <= max_blocks_per_transfer):
1352 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1353 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1354 style, by_id)
1355 return
1356
Tao Baocb73aed2018-01-31 17:32:40 -08001357 # Split large APKs with imgdiff, if possible. We're intentionally checking
1358 # file types one more time (CanUseImgdiff() checks that as well), before
1359 # calling the costly RangeSha1()s.
1360 if (self.FileTypeSupportedByImgdiff(tgt_name) and
1361 self.tgt.RangeSha1(tgt_ranges) != self.src.RangeSha1(src_ranges)):
Tao Bao294651d2018-02-08 23:21:52 -08001362 if self.CanUseImgdiff(tgt_name, tgt_ranges, src_ranges, True):
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001363 large_apks.append((tgt_name, src_name, tgt_ranges, src_ranges))
1364 return
1365
Tianjie Xu41390d42017-11-22 11:35:18 -08001366 AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
1367 src_ranges, style, by_id)
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001368
Tao Bao08c85832016-09-19 22:26:30 -07001369 def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
1370 split=False):
1371 """Wrapper function for adding a Transfer()."""
1372
1373 # We specialize diff transfers only (which covers bsdiff/imgdiff/move);
1374 # otherwise add the Transfer() as is.
1375 if style != "diff" or not split:
Tao Bao183e56e2017-03-05 17:05:09 -08001376 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1377 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1378 style, by_id)
Tao Bao08c85832016-09-19 22:26:30 -07001379 return
1380
1381 # Handle .odex files specially to analyze the block-wise difference. If
1382 # most of the blocks are identical with only few changes (e.g. header),
1383 # we will patch the changed blocks only. This avoids stashing unchanged
1384 # blocks while patching. We limit the analysis to files without size
1385 # changes only. This is to avoid sacrificing the OTA generation cost too
1386 # much.
1387 if (tgt_name.split(".")[-1].lower() == 'odex' and
1388 tgt_ranges.size() == src_ranges.size()):
1389
1390 # 0.5 threshold can be further tuned. The tradeoff is: if only very
1391 # few blocks remain identical, we lose the opportunity to use imgdiff
1392 # that may have better compression ratio than bsdiff.
1393 crop_threshold = 0.5
1394
1395 tgt_skipped = RangeSet()
1396 src_skipped = RangeSet()
1397 tgt_size = tgt_ranges.size()
1398 tgt_changed = 0
1399 for src_block, tgt_block in zip(src_ranges.next_item(),
1400 tgt_ranges.next_item()):
1401 src_rs = RangeSet(str(src_block))
1402 tgt_rs = RangeSet(str(tgt_block))
1403 if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):
1404 tgt_skipped = tgt_skipped.union(tgt_rs)
1405 src_skipped = src_skipped.union(src_rs)
1406 else:
1407 tgt_changed += tgt_rs.size()
1408
1409 # Terminate early if no clear sign of benefits.
1410 if tgt_changed > tgt_size * crop_threshold:
1411 break
1412
1413 if tgt_changed < tgt_size * crop_threshold:
1414 assert tgt_changed + tgt_skipped.size() == tgt_size
Tao Bao508b0872018-02-09 13:44:43 -08001415 print('%10d %10d (%6.2f%%) %s' % (
1416 tgt_skipped.size(), tgt_size,
1417 tgt_skipped.size() * 100.0 / tgt_size, tgt_name))
Tianjie Xu41390d42017-11-22 11:35:18 -08001418 AddSplitTransfers(
Tao Bao08c85832016-09-19 22:26:30 -07001419 "%s-skipped" % (tgt_name,),
1420 "%s-skipped" % (src_name,),
1421 tgt_skipped, src_skipped, style, by_id)
1422
1423 # Intentionally change the file extension to avoid being imgdiff'd as
1424 # the files are no longer in their original format.
1425 tgt_name = "%s-cropped" % (tgt_name,)
1426 src_name = "%s-cropped" % (src_name,)
1427 tgt_ranges = tgt_ranges.subtract(tgt_skipped)
1428 src_ranges = src_ranges.subtract(src_skipped)
1429
1430 # Possibly having no changed blocks.
1431 if not tgt_ranges:
1432 return
1433
1434 # Add the transfer(s).
Tianjie Xu41390d42017-11-22 11:35:18 -08001435 AddSplitTransfers(
Tao Bao08c85832016-09-19 22:26:30 -07001436 tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1437
Tianjie Xu25366072017-09-08 17:19:02 -07001438 def ParseAndValidateSplitInfo(patch_size, tgt_ranges, src_ranges,
1439 split_info):
1440 """Parse the split_info and return a list of info tuples.
1441
1442 Args:
1443 patch_size: total size of the patch file.
1444 tgt_ranges: Ranges of the target file within the original image.
1445 src_ranges: Ranges of the source file within the original image.
1446 split_info format:
1447 imgdiff version#
1448 count of pieces
1449 <patch_size_1> <tgt_size_1> <src_ranges_1>
1450 ...
1451 <patch_size_n> <tgt_size_n> <src_ranges_n>
1452
1453 Returns:
1454 [patch_start, patch_len, split_tgt_ranges, split_src_ranges]
1455 """
1456
1457 version = int(split_info[0])
1458 assert version == 2
1459 count = int(split_info[1])
1460 assert len(split_info) - 2 == count
1461
1462 split_info_list = []
1463 patch_start = 0
1464 tgt_remain = copy.deepcopy(tgt_ranges)
1465 # each line has the format <patch_size>, <tgt_size>, <src_ranges>
1466 for line in split_info[2:]:
1467 info = line.split()
1468 assert len(info) == 3
1469 patch_length = int(info[0])
1470
1471 split_tgt_size = int(info[1])
1472 assert split_tgt_size % 4096 == 0
1473 assert split_tgt_size / 4096 <= tgt_remain.size()
1474 split_tgt_ranges = tgt_remain.first(split_tgt_size / 4096)
1475 tgt_remain = tgt_remain.subtract(split_tgt_ranges)
1476
1477 # Find the split_src_ranges within the image file from its relative
1478 # position in file.
1479 split_src_indices = RangeSet.parse_raw(info[2])
1480 split_src_ranges = RangeSet()
1481 for r in split_src_indices:
1482 curr_range = src_ranges.first(r[1]).subtract(src_ranges.first(r[0]))
1483 assert not split_src_ranges.overlaps(curr_range)
1484 split_src_ranges = split_src_ranges.union(curr_range)
1485
1486 split_info_list.append((patch_start, patch_length,
1487 split_tgt_ranges, split_src_ranges))
1488 patch_start += patch_length
1489
1490 # Check that the sizes of all the split pieces add up to the final file
1491 # size for patch and target.
1492 assert tgt_remain.size() == 0
1493 assert patch_start == patch_size
1494 return split_info_list
1495
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001496 def SplitLargeApks():
1497 """Split the large apks files.
Tianjie Xu25366072017-09-08 17:19:02 -07001498
1499 Example: Chrome.apk will be split into
1500 src-0: Chrome.apk-0, tgt-0: Chrome.apk-0
1501 src-1: Chrome.apk-1, tgt-1: Chrome.apk-1
1502 ...
1503
1504 After the split, the target pieces are continuous and block aligned; and
1505 the source pieces are mutually exclusive. During the split, we also
1506 generate and save the image patch between src-X & tgt-X. This patch will
1507 be valid because the block ranges of src-X & tgt-X will always stay the
1508 same afterwards; but there's a chance we don't use the patch if we
1509 convert the "diff" command into "new" or "move" later.
1510 """
1511
1512 while True:
1513 with transfer_lock:
1514 if not large_apks:
1515 return
1516 tgt_name, src_name, tgt_ranges, src_ranges = large_apks.pop(0)
1517
1518 src_file = common.MakeTempFile(prefix="src-")
1519 tgt_file = common.MakeTempFile(prefix="tgt-")
Tianjie Xudf1166e2018-01-27 17:35:41 -08001520 with open(src_file, "wb") as src_fd:
1521 self.src.WriteRangeDataToFd(src_ranges, src_fd)
1522 with open(tgt_file, "wb") as tgt_fd:
1523 self.tgt.WriteRangeDataToFd(tgt_ranges, tgt_fd)
Tianjie Xu25366072017-09-08 17:19:02 -07001524
1525 patch_file = common.MakeTempFile(prefix="patch-")
1526 patch_info_file = common.MakeTempFile(prefix="split_info-")
1527 cmd = ["imgdiff", "-z",
1528 "--block-limit={}".format(max_blocks_per_transfer),
1529 "--split-info=" + patch_info_file,
1530 src_file, tgt_file, patch_file]
Tao Bao4ccea852018-02-06 15:16:41 -08001531 p = common.Run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
1532 imgdiff_output, _ = p.communicate()
1533 assert p.returncode == 0, \
1534 "Failed to create imgdiff patch between {} and {}:\n{}".format(
1535 src_name, tgt_name, imgdiff_output)
Tianjie Xu25366072017-09-08 17:19:02 -07001536
1537 with open(patch_info_file) as patch_info:
1538 lines = patch_info.readlines()
1539
1540 patch_size_total = os.path.getsize(patch_file)
1541 split_info_list = ParseAndValidateSplitInfo(patch_size_total,
1542 tgt_ranges, src_ranges,
1543 lines)
1544 for index, (patch_start, patch_length, split_tgt_ranges,
Tao Bao508b0872018-02-09 13:44:43 -08001545 split_src_ranges) in enumerate(split_info_list):
Tianjie Xu25366072017-09-08 17:19:02 -07001546 with open(patch_file) as f:
1547 f.seek(patch_start)
1548 patch_content = f.read(patch_length)
1549
1550 split_src_name = "{}-{}".format(src_name, index)
1551 split_tgt_name = "{}-{}".format(tgt_name, index)
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001552 split_large_apks.append((split_tgt_name,
1553 split_src_name,
1554 split_tgt_ranges,
1555 split_src_ranges,
1556 patch_content))
Tianjie Xu25366072017-09-08 17:19:02 -07001557
Tao Bao08c85832016-09-19 22:26:30 -07001558 print("Finding transfers...")
1559
Tianjie Xu25366072017-09-08 17:19:02 -07001560 large_apks = []
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001561 split_large_apks = []
Tianjie Xu25366072017-09-08 17:19:02 -07001562 cache_size = common.OPTIONS.cache_size
1563 split_threshold = 0.125
1564 max_blocks_per_transfer = int(cache_size * split_threshold /
1565 self.tgt.blocksize)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001566 empty = RangeSet()
Tianjie Xu20a86cd2018-01-12 12:21:00 -08001567 for tgt_fn, tgt_ranges in sorted(self.tgt.file_map.items()):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001568 if tgt_fn == "__ZERO":
1569 # the special "__ZERO" domain is all the blocks not contained
1570 # in any file and that are filled with zeros. We have a
1571 # special transfer style for zero blocks.
1572 src_ranges = self.src.file_map.get("__ZERO", empty)
Tao Bao9a5caf22015-08-25 15:10:10 -07001573 AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
1574 "zero", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001575 continue
1576
Tao Baoff777812015-05-12 11:42:31 -07001577 elif tgt_fn == "__COPY":
1578 # "__COPY" domain includes all the blocks not contained in any
1579 # file and that need to be copied unconditionally to the target.
Tao Bao9a5caf22015-08-25 15:10:10 -07001580 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Tao Baoff777812015-05-12 11:42:31 -07001581 continue
1582
Doug Zongkerfc44a512014-08-26 13:10:25 -07001583 elif tgt_fn in self.src.file_map:
1584 # Look for an exact pathname match in the source.
Tao Bao9a5caf22015-08-25 15:10:10 -07001585 AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001586 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001587 continue
1588
1589 b = os.path.basename(tgt_fn)
1590 if b in self.src_basenames:
1591 # Look for an exact basename match in the source.
1592 src_fn = self.src_basenames[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001593 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001594 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001595 continue
1596
1597 b = re.sub("[0-9]+", "#", b)
1598 if b in self.src_numpatterns:
1599 # Look for a 'number pattern' match (a basename match after
1600 # all runs of digits are replaced by "#"). (This is useful
1601 # for .so files that contain version numbers in the filename
1602 # that get bumped.)
1603 src_fn = self.src_numpatterns[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001604 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001605 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001606 continue
1607
Tao Bao9a5caf22015-08-25 15:10:10 -07001608 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001609
Tianjie Xu25366072017-09-08 17:19:02 -07001610 transfer_lock = threading.Lock()
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001611 threads = [threading.Thread(target=SplitLargeApks)
Tianjie Xu25366072017-09-08 17:19:02 -07001612 for _ in range(self.threads)]
1613 for th in threads:
1614 th.start()
1615 while threads:
1616 threads.pop().join()
1617
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001618 # Sort the split transfers for large apks to generate a determinate package.
1619 split_large_apks.sort()
1620 for (tgt_name, src_name, tgt_ranges, src_ranges,
1621 patch) in split_large_apks:
1622 transfer_split = Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1623 self.tgt.RangeSha1(tgt_ranges),
1624 self.src.RangeSha1(src_ranges),
1625 "diff", self.transfers)
1626 transfer_split.patch = patch
1627
Doug Zongkerfc44a512014-08-26 13:10:25 -07001628 def AbbreviateSourceNames(self):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001629 for k in self.src.file_map.keys():
1630 b = os.path.basename(k)
1631 self.src_basenames[b] = k
1632 b = re.sub("[0-9]+", "#", b)
1633 self.src_numpatterns[b] = k
1634
1635 @staticmethod
1636 def AssertPartition(total, seq):
1637 """Assert that all the RangeSets in 'seq' form a partition of the
1638 'total' RangeSet (ie, they are nonintersecting and their union
1639 equals 'total')."""
Doug Zongker6ab2a502016-02-09 08:28:09 -08001640
Doug Zongkerfc44a512014-08-26 13:10:25 -07001641 so_far = RangeSet()
1642 for i in seq:
1643 assert not so_far.overlaps(i)
1644 so_far = so_far.union(i)
1645 assert so_far == total