blob: fe37b391da5f16c8d279a2e22c8711e8d26e0c98 [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015from __future__ import print_function
16
Doug Zongker6ab2a502016-02-09 08:28:09 -080017import array
Tao Bao8dcf7382015-05-21 14:09:49 -070018import common
Tianjie Xu25366072017-09-08 17:19:02 -070019import copy
Doug Zongker6ab2a502016-02-09 08:28:09 -080020import functools
Doug Zongker62338182014-09-08 08:29:55 -070021import heapq
Doug Zongkerfc44a512014-08-26 13:10:25 -070022import itertools
23import multiprocessing
24import os
Tao Bao3a2e3502016-12-28 09:14:39 -080025import os.path
Doug Zongkerfc44a512014-08-26 13:10:25 -070026import re
27import subprocess
Tao Bao183e56e2017-03-05 17:05:09 -080028import sys
Doug Zongkerfc44a512014-08-26 13:10:25 -070029import threading
Doug Zongkerfc44a512014-08-26 13:10:25 -070030
Tao Bao3a2e3502016-12-28 09:14:39 -080031from collections import deque, OrderedDict
32from hashlib import sha1
Dan Albert8b72aef2015-03-23 19:13:21 -070033from rangelib import RangeSet
34
Doug Zongkerfc44a512014-08-26 13:10:25 -070035
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070036__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
37
Dan Albert8b72aef2015-03-23 19:13:21 -070038
Tao Bao183e56e2017-03-05 17:05:09 -080039def compute_patch(srcfile, tgtfile, imgdiff=False):
Tianjie Xub59c17f2016-10-28 17:55:53 -070040 patchfile = common.MakeTempFile(prefix='patch-')
Doug Zongkerfc44a512014-08-26 13:10:25 -070041
Tianjie Xub59c17f2016-10-28 17:55:53 -070042 cmd = ['imgdiff', '-z'] if imgdiff else ['bsdiff']
43 cmd.extend([srcfile, tgtfile, patchfile])
Doug Zongkerfc44a512014-08-26 13:10:25 -070044
Tao Bao39451582017-05-04 11:10:47 -070045 # Don't dump the bsdiff/imgdiff commands, which are not useful for the case
46 # here, since they contain temp filenames only.
47 p = common.Run(cmd, verbose=False, stdout=subprocess.PIPE,
48 stderr=subprocess.STDOUT)
Tianjie Xub59c17f2016-10-28 17:55:53 -070049 output, _ = p.communicate()
Doug Zongkerfc44a512014-08-26 13:10:25 -070050
Tianjie Xub59c17f2016-10-28 17:55:53 -070051 if p.returncode != 0:
52 raise ValueError(output)
53
54 with open(patchfile, 'rb') as f:
Tao Bao183e56e2017-03-05 17:05:09 -080055 return f.read()
Doug Zongkerfc44a512014-08-26 13:10:25 -070056
Dan Albert8b72aef2015-03-23 19:13:21 -070057
58class Image(object):
Tao Bao183e56e2017-03-05 17:05:09 -080059 def RangeSha1(self, ranges):
60 raise NotImplementedError
61
Dan Albert8b72aef2015-03-23 19:13:21 -070062 def ReadRangeSet(self, ranges):
63 raise NotImplementedError
64
Tao Bao68658c02015-06-01 13:40:49 -070065 def TotalSha1(self, include_clobbered_blocks=False):
Dan Albert8b72aef2015-03-23 19:13:21 -070066 raise NotImplementedError
67
Tao Bao183e56e2017-03-05 17:05:09 -080068 def WriteRangeDataToFd(self, ranges, fd):
69 raise NotImplementedError
70
Dan Albert8b72aef2015-03-23 19:13:21 -070071
72class EmptyImage(Image):
Doug Zongkerfc44a512014-08-26 13:10:25 -070073 """A zero-length image."""
Tao Bao183e56e2017-03-05 17:05:09 -080074
75 def __init__(self):
76 self.blocksize = 4096
77 self.care_map = RangeSet()
78 self.clobbered_blocks = RangeSet()
79 self.extended = RangeSet()
80 self.total_blocks = 0
81 self.file_map = {}
82
83 def RangeSha1(self, ranges):
84 return sha1().hexdigest()
85
Doug Zongkerfc44a512014-08-26 13:10:25 -070086 def ReadRangeSet(self, ranges):
87 return ()
Tao Bao183e56e2017-03-05 17:05:09 -080088
Tao Bao68658c02015-06-01 13:40:49 -070089 def TotalSha1(self, include_clobbered_blocks=False):
90 # EmptyImage always carries empty clobbered_blocks, so
91 # include_clobbered_blocks can be ignored.
92 assert self.clobbered_blocks.size() == 0
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070093 return sha1().hexdigest()
94
Tao Bao183e56e2017-03-05 17:05:09 -080095 def WriteRangeDataToFd(self, ranges, fd):
96 raise ValueError("Can't write data from EmptyImage to file")
97
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070098
Dan Albert8b72aef2015-03-23 19:13:21 -070099class DataImage(Image):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700100 """An image wrapped around a single string of data."""
101
102 def __init__(self, data, trim=False, pad=False):
103 self.data = data
104 self.blocksize = 4096
105
106 assert not (trim and pad)
107
108 partial = len(self.data) % self.blocksize
Tao Bao7589e962015-09-05 20:35:32 -0700109 padded = False
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700110 if partial > 0:
111 if trim:
112 self.data = self.data[:-partial]
113 elif pad:
114 self.data += '\0' * (self.blocksize - partial)
Tao Bao7589e962015-09-05 20:35:32 -0700115 padded = True
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700116 else:
117 raise ValueError(("data for DataImage must be multiple of %d bytes "
118 "unless trim or pad is specified") %
119 (self.blocksize,))
120
121 assert len(self.data) % self.blocksize == 0
122
123 self.total_blocks = len(self.data) / self.blocksize
124 self.care_map = RangeSet(data=(0, self.total_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700125 # When the last block is padded, we always write the whole block even for
126 # incremental OTAs. Because otherwise the last block may get skipped if
127 # unchanged for an incremental, but would fail the post-install
128 # verification if it has non-zero contents in the padding bytes.
129 # Bug: 23828506
130 if padded:
Tao Bao42206c32015-09-08 13:39:40 -0700131 clobbered_blocks = [self.total_blocks-1, self.total_blocks]
Tao Bao7589e962015-09-05 20:35:32 -0700132 else:
Tao Bao42206c32015-09-08 13:39:40 -0700133 clobbered_blocks = []
134 self.clobbered_blocks = clobbered_blocks
Tao Baoe9b61912015-07-09 17:37:49 -0700135 self.extended = RangeSet()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700136
137 zero_blocks = []
138 nonzero_blocks = []
139 reference = '\0' * self.blocksize
140
Tao Bao7589e962015-09-05 20:35:32 -0700141 for i in range(self.total_blocks-1 if padded else self.total_blocks):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700142 d = self.data[i*self.blocksize : (i+1)*self.blocksize]
143 if d == reference:
144 zero_blocks.append(i)
145 zero_blocks.append(i+1)
146 else:
147 nonzero_blocks.append(i)
148 nonzero_blocks.append(i+1)
149
Tao Bao42206c32015-09-08 13:39:40 -0700150 assert zero_blocks or nonzero_blocks or clobbered_blocks
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700151
Tao Bao42206c32015-09-08 13:39:40 -0700152 self.file_map = dict()
153 if zero_blocks:
154 self.file_map["__ZERO"] = RangeSet(data=zero_blocks)
155 if nonzero_blocks:
156 self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)
157 if clobbered_blocks:
158 self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)
Tao Bao7589e962015-09-05 20:35:32 -0700159
Tao Bao183e56e2017-03-05 17:05:09 -0800160 def _GetRangeData(self, ranges):
161 for s, e in ranges:
162 yield self.data[s*self.blocksize:e*self.blocksize]
163
164 def RangeSha1(self, ranges):
165 h = sha1()
166 for data in self._GetRangeData(ranges):
167 h.update(data)
168 return h.hexdigest()
169
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700170 def ReadRangeSet(self, ranges):
Tao Bao183e56e2017-03-05 17:05:09 -0800171 return [self._GetRangeData(ranges)]
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700172
Tao Bao68658c02015-06-01 13:40:49 -0700173 def TotalSha1(self, include_clobbered_blocks=False):
Tao Bao7589e962015-09-05 20:35:32 -0700174 if not include_clobbered_blocks:
Tao Bao183e56e2017-03-05 17:05:09 -0800175 return self.RangeSha1(self.care_map.subtract(self.clobbered_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700176 else:
177 return sha1(self.data).hexdigest()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700178
Tao Bao183e56e2017-03-05 17:05:09 -0800179 def WriteRangeDataToFd(self, ranges, fd):
180 for data in self._GetRangeData(ranges):
181 fd.write(data)
182
Doug Zongkerfc44a512014-08-26 13:10:25 -0700183
184class Transfer(object):
Tao Bao183e56e2017-03-05 17:05:09 -0800185 def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, tgt_sha1,
186 src_sha1, style, by_id):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700187 self.tgt_name = tgt_name
188 self.src_name = src_name
189 self.tgt_ranges = tgt_ranges
190 self.src_ranges = src_ranges
Tao Bao183e56e2017-03-05 17:05:09 -0800191 self.tgt_sha1 = tgt_sha1
192 self.src_sha1 = src_sha1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700193 self.style = style
Tao Baob8c87172015-03-19 19:42:12 -0700194
195 # We use OrderedDict rather than dict so that the output is repeatable;
196 # otherwise it would depend on the hash values of the Transfer objects.
197 self.goes_before = OrderedDict()
198 self.goes_after = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700199
Doug Zongker62338182014-09-08 08:29:55 -0700200 self.stash_before = []
201 self.use_stash = []
202
Doug Zongkerfc44a512014-08-26 13:10:25 -0700203 self.id = len(by_id)
204 by_id.append(self)
205
Tianjie Xu25366072017-09-08 17:19:02 -0700206 self._patch = None
207
208 @property
209 def patch(self):
210 return self._patch
211
212 @patch.setter
213 def patch(self, patch):
214 if patch:
215 assert self.style == "diff"
216 self._patch = patch
217
Doug Zongker62338182014-09-08 08:29:55 -0700218 def NetStashChange(self):
219 return (sum(sr.size() for (_, sr) in self.stash_before) -
220 sum(sr.size() for (_, sr) in self.use_stash))
221
Tao Bao82c47982015-08-17 09:45:13 -0700222 def ConvertToNew(self):
223 assert self.style != "new"
224 self.use_stash = []
225 self.style = "new"
226 self.src_ranges = RangeSet()
Tianjie Xu25366072017-09-08 17:19:02 -0700227 self.patch = None
Tao Bao82c47982015-08-17 09:45:13 -0700228
Doug Zongkerfc44a512014-08-26 13:10:25 -0700229 def __str__(self):
230 return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
231 " to " + str(self.tgt_ranges) + ">")
232
233
Doug Zongker6ab2a502016-02-09 08:28:09 -0800234@functools.total_ordering
235class HeapItem(object):
236 def __init__(self, item):
237 self.item = item
Tao Bao186ec992017-12-23 11:50:52 -0800238 # Negate the score since python's heap is a min-heap and we want the
239 # maximum score.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800240 self.score = -item.score
Tao Bao186ec992017-12-23 11:50:52 -0800241
Doug Zongker6ab2a502016-02-09 08:28:09 -0800242 def clear(self):
243 self.item = None
Tao Bao186ec992017-12-23 11:50:52 -0800244
Doug Zongker6ab2a502016-02-09 08:28:09 -0800245 def __bool__(self):
Tao Bao186ec992017-12-23 11:50:52 -0800246 return self.item is not None
247
248 # Python 2 uses __nonzero__, while Python 3 uses __bool__.
249 __nonzero__ = __bool__
250
251 # The rest operations are generated by functools.total_ordering decorator.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800252 def __eq__(self, other):
253 return self.score == other.score
Tao Bao186ec992017-12-23 11:50:52 -0800254
Doug Zongker6ab2a502016-02-09 08:28:09 -0800255 def __le__(self, other):
256 return self.score <= other.score
257
258
Tao Bao294651d2018-02-08 23:21:52 -0800259class ImgdiffStats(object):
260 """A class that collects imgdiff stats.
261
262 It keeps track of the files that will be applied imgdiff while generating
263 BlockImageDiff. It also logs the ones that cannot use imgdiff, with specific
264 reasons. The stats is only meaningful when imgdiff not being disabled by the
265 caller of BlockImageDiff. In addition, only files with supported types
266 (BlockImageDiff.FileTypeSupportedByImgdiff()) are allowed to be logged.
267
268 TODO: The info could be inaccurate due to the unconditional fallback from
269 imgdiff to bsdiff on errors. The fallbacks will be removed.
270 """
271
272 USED_IMGDIFF = "APK files diff'd with imgdiff"
273 USED_IMGDIFF_LARGE_APK = "Large APK files split and diff'd with imgdiff"
274
275 # Reasons for not applying imgdiff on APKs.
276 SKIPPED_TRIMMED = "Not used imgdiff due to trimmed RangeSet"
277 SKIPPED_NONMONOTONIC = "Not used imgdiff due to having non-monotonic ranges"
278
279 # The list of valid reasons, which will also be the dumped order in a report.
280 REASONS = (
281 USED_IMGDIFF,
282 USED_IMGDIFF_LARGE_APK,
283 SKIPPED_TRIMMED,
284 SKIPPED_NONMONOTONIC,
285 )
286
287 def __init__(self):
288 self.stats = {}
289
290 def Log(self, filename, reason):
291 """Logs why imgdiff can or cannot be applied to the given filename.
292
293 Args:
294 filename: The filename string.
295 reason: One of the reason constants listed in REASONS.
296
297 Raises:
298 AssertionError: On unsupported filetypes or invalid reason.
299 """
300 assert BlockImageDiff.FileTypeSupportedByImgdiff(filename)
301 assert reason in self.REASONS
302
303 if reason not in self.stats:
304 self.stats[reason] = set()
305 self.stats[reason].add(filename)
306
307 def Report(self):
308 """Prints a report of the collected imgdiff stats."""
309
310 def print_header(header, separator):
311 print(header)
312 print(separator * len(header) + '\n')
313
314 print_header(' Imgdiff Stats Report ', '=')
315 for key in self.REASONS:
316 if key not in self.stats:
317 continue
318 values = self.stats[key]
319 section_header = ' {} (count: {}) '.format(key, len(values))
320 print_header(section_header, '-')
321 print(''.join([' {}\n'.format(name) for name in values]))
322
323
Doug Zongkerfc44a512014-08-26 13:10:25 -0700324# BlockImageDiff works on two image objects. An image object is
325# anything that provides the following attributes:
326#
327# blocksize: the size in bytes of a block, currently must be 4096.
328#
329# total_blocks: the total size of the partition/image, in blocks.
330#
331# care_map: a RangeSet containing which blocks (in the range [0,
332# total_blocks) we actually care about; i.e. which blocks contain
333# data.
334#
335# file_map: a dict that partitions the blocks contained in care_map
336# into smaller domains that are useful for doing diffs on.
337# (Typically a domain is a file, and the key in file_map is the
338# pathname.)
339#
Tao Baoff777812015-05-12 11:42:31 -0700340# clobbered_blocks: a RangeSet containing which blocks contain data
341# but may be altered by the FS. They need to be excluded when
342# verifying the partition integrity.
343#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700344# ReadRangeSet(): a function that takes a RangeSet and returns the
345# data contained in the image blocks of that RangeSet. The data
346# is returned as a list or tuple of strings; concatenating the
347# elements together should produce the requested data.
348# Implementations are free to break up the data into list/tuple
349# elements in any way that is convenient.
350#
Tao Bao183e56e2017-03-05 17:05:09 -0800351# RangeSha1(): a function that returns (as a hex string) the SHA-1
352# hash of all the data in the specified range.
353#
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700354# TotalSha1(): a function that returns (as a hex string) the SHA-1
355# hash of all the data in the image (ie, all the blocks in the
Tao Bao68658c02015-06-01 13:40:49 -0700356# care_map minus clobbered_blocks, or including the clobbered
357# blocks if include_clobbered_blocks is True).
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700358#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700359# When creating a BlockImageDiff, the src image may be None, in which
360# case the list of transfers produced will never read from the
361# original image.
362
363class BlockImageDiff(object):
Tao Bao293fd132016-06-11 12:19:23 -0700364 def __init__(self, tgt, src=None, threads=None, version=4,
365 disable_imgdiff=False):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700366 if threads is None:
367 threads = multiprocessing.cpu_count() // 2
Dan Albert8b72aef2015-03-23 19:13:21 -0700368 if threads == 0:
369 threads = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700370 self.threads = threads
Doug Zongker62338182014-09-08 08:29:55 -0700371 self.version = version
Dan Albert8b72aef2015-03-23 19:13:21 -0700372 self.transfers = []
373 self.src_basenames = {}
374 self.src_numpatterns = {}
Tao Baob4cfca52016-02-04 14:26:02 -0800375 self._max_stashed_size = 0
Tao Baod522bdc2016-04-12 15:53:16 -0700376 self.touched_src_ranges = RangeSet()
377 self.touched_src_sha1 = None
Tao Bao293fd132016-06-11 12:19:23 -0700378 self.disable_imgdiff = disable_imgdiff
Tao Bao294651d2018-02-08 23:21:52 -0800379 self.imgdiff_stats = ImgdiffStats() if not disable_imgdiff else None
Doug Zongker62338182014-09-08 08:29:55 -0700380
Tao Bao8fad03e2017-03-01 14:36:26 -0800381 assert version in (3, 4)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700382
383 self.tgt = tgt
384 if src is None:
385 src = EmptyImage()
386 self.src = src
387
388 # The updater code that installs the patch always uses 4k blocks.
389 assert tgt.blocksize == 4096
390 assert src.blocksize == 4096
391
392 # The range sets in each filemap should comprise a partition of
393 # the care map.
394 self.AssertPartition(src.care_map, src.file_map.values())
395 self.AssertPartition(tgt.care_map, tgt.file_map.values())
396
Tao Baob4cfca52016-02-04 14:26:02 -0800397 @property
398 def max_stashed_size(self):
399 return self._max_stashed_size
400
Tao Baocb73aed2018-01-31 17:32:40 -0800401 @staticmethod
402 def FileTypeSupportedByImgdiff(filename):
403 """Returns whether the file type is supported by imgdiff."""
404 return filename.lower().endswith(('.apk', '.jar', '.zip'))
405
Tao Bao294651d2018-02-08 23:21:52 -0800406 def CanUseImgdiff(self, name, tgt_ranges, src_ranges, large_apk=False):
Tao Baocb73aed2018-01-31 17:32:40 -0800407 """Checks whether we can apply imgdiff for the given RangeSets.
408
409 For files in ZIP format (e.g., APKs, JARs, etc.) we would like to use
410 'imgdiff -z' if possible. Because it usually produces significantly smaller
411 patches than bsdiff.
412
413 This is permissible if all of the following conditions hold.
414 - The imgdiff hasn't been disabled by the caller (e.g. squashfs);
415 - The file type is supported by imgdiff;
416 - The source and target blocks are monotonic (i.e. the data is stored with
417 blocks in increasing order);
418 - We haven't removed any blocks from the source set.
419
420 If all these conditions are satisfied, concatenating all the blocks in the
421 RangeSet in order will produce a valid ZIP file (plus possibly extra zeros
422 in the last block). imgdiff is fine with extra zeros at the end of the file.
423
424 Args:
425 name: The filename to be diff'd.
426 tgt_ranges: The target RangeSet.
427 src_ranges: The source RangeSet.
Tao Bao294651d2018-02-08 23:21:52 -0800428 large_apk: Whether this is to split a large APK.
Tao Baocb73aed2018-01-31 17:32:40 -0800429
430 Returns:
431 A boolean result.
432 """
Tao Bao294651d2018-02-08 23:21:52 -0800433 if (self.disable_imgdiff or not self.FileTypeSupportedByImgdiff(name)):
434 return False
435
436 if not tgt_ranges.monotonic or not src_ranges.monotonic:
437 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_NONMONOTONIC)
438 return False
439
440 if tgt_ranges.extra.get('trimmed') or src_ranges.extra.get('trimmed'):
441 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_TRIMMED)
442 return False
443
444 reason = (ImgdiffStats.USED_IMGDIFF_LARGE_APK if large_apk
445 else ImgdiffStats.USED_IMGDIFF)
446 self.imgdiff_stats.Log(name, reason)
447 return True
Tao Baocb73aed2018-01-31 17:32:40 -0800448
Doug Zongkerfc44a512014-08-26 13:10:25 -0700449 def Compute(self, prefix):
450 # When looking for a source file to use as the diff input for a
451 # target file, we try:
452 # 1) an exact path match if available, otherwise
453 # 2) a exact basename match if available, otherwise
454 # 3) a basename match after all runs of digits are replaced by
455 # "#" if available, otherwise
456 # 4) we have no source for this target.
457 self.AbbreviateSourceNames()
458 self.FindTransfers()
459
460 # Find the ordering dependencies among transfers (this is O(n^2)
461 # in the number of transfers).
462 self.GenerateDigraph()
463 # Find a sequence of transfers that satisfies as many ordering
464 # dependencies as possible (heuristically).
465 self.FindVertexSequence()
466 # Fix up the ordering dependencies that the sequence didn't
467 # satisfy.
Tao Bao8fad03e2017-03-01 14:36:26 -0800468 self.ReverseBackwardEdges()
469 self.ImproveVertexSequence()
Doug Zongker62338182014-09-08 08:29:55 -0700470
Tao Bao82c47982015-08-17 09:45:13 -0700471 # Ensure the runtime stash size is under the limit.
Tao Bao8fad03e2017-03-01 14:36:26 -0800472 if common.OPTIONS.cache_size is not None:
Tao Bao82c47982015-08-17 09:45:13 -0700473 self.ReviseStashSize()
474
Doug Zongkerfc44a512014-08-26 13:10:25 -0700475 # Double-check our work.
476 self.AssertSequenceGood()
Tianjie Xu8a7ed9f2018-01-23 14:06:11 -0800477 self.AssertSha1Good()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700478
479 self.ComputePatches(prefix)
480 self.WriteTransfers(prefix)
481
Tao Bao294651d2018-02-08 23:21:52 -0800482 # Report the imgdiff stats.
483 if common.OPTIONS.verbose and not self.disable_imgdiff:
484 self.imgdiff_stats.Report()
485
Doug Zongkerfc44a512014-08-26 13:10:25 -0700486 def WriteTransfers(self, prefix):
Tianjie Xu37e29302016-06-23 16:10:35 -0700487 def WriteSplitTransfers(out, style, target_blocks):
488 """Limit the size of operand in command 'new' and 'zero' to 1024 blocks.
Tianjie Xubb848c52016-06-21 15:54:09 -0700489
490 This prevents the target size of one command from being too large; and
491 might help to avoid fsync errors on some devices."""
492
Tao Bao3a2e3502016-12-28 09:14:39 -0800493 assert style == "new" or style == "zero"
Tianjie Xu37e29302016-06-23 16:10:35 -0700494 blocks_limit = 1024
Tianjie Xubb848c52016-06-21 15:54:09 -0700495 total = 0
Tianjie Xu37e29302016-06-23 16:10:35 -0700496 while target_blocks:
497 blocks_to_write = target_blocks.first(blocks_limit)
498 out.append("%s %s\n" % (style, blocks_to_write.to_string_raw()))
499 total += blocks_to_write.size()
500 target_blocks = target_blocks.subtract(blocks_to_write)
Tianjie Xubb848c52016-06-21 15:54:09 -0700501 return total
502
Doug Zongkerfc44a512014-08-26 13:10:25 -0700503 out = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700504 total = 0
Doug Zongkerfc44a512014-08-26 13:10:25 -0700505
Tao Bao3a2e3502016-12-28 09:14:39 -0800506 # In BBOTA v3+, it uses the hash of the stashed blocks as the stash slot
507 # id. 'stashes' records the map from 'hash' to the ref count. The stash
508 # will be freed only if the count decrements to zero.
Doug Zongker62338182014-09-08 08:29:55 -0700509 stashes = {}
510 stashed_blocks = 0
511 max_stashed_blocks = 0
512
Doug Zongkerfc44a512014-08-26 13:10:25 -0700513 for xf in self.transfers:
514
Tao Bao8fad03e2017-03-01 14:36:26 -0800515 for _, sr in xf.stash_before:
516 sh = self.src.RangeSha1(sr)
517 if sh in stashes:
518 stashes[sh] += 1
Sami Tolvanendd67a292014-12-09 16:40:34 +0000519 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800520 stashes[sh] = 1
521 stashed_blocks += sr.size()
522 self.touched_src_ranges = self.touched_src_ranges.union(sr)
523 out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
Doug Zongker62338182014-09-08 08:29:55 -0700524
525 if stashed_blocks > max_stashed_blocks:
526 max_stashed_blocks = stashed_blocks
527
Jesse Zhao7b985f62015-03-02 16:53:08 -0800528 free_string = []
caozhiyuan21b37d82015-10-21 15:14:03 +0800529 free_size = 0
Jesse Zhao7b985f62015-03-02 16:53:08 -0800530
Tao Bao8fad03e2017-03-01 14:36:26 -0800531 # <# blocks> <src ranges>
532 # OR
533 # <# blocks> <src ranges> <src locs> <stash refs...>
534 # OR
535 # <# blocks> - <stash refs...>
Doug Zongker62338182014-09-08 08:29:55 -0700536
Tao Bao8fad03e2017-03-01 14:36:26 -0800537 size = xf.src_ranges.size()
538 src_str = [str(size)]
Doug Zongker62338182014-09-08 08:29:55 -0700539
Tao Bao8fad03e2017-03-01 14:36:26 -0800540 unstashed_src_ranges = xf.src_ranges
541 mapped_stashes = []
542 for _, sr in xf.use_stash:
543 unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
544 sh = self.src.RangeSha1(sr)
545 sr = xf.src_ranges.map_within(sr)
546 mapped_stashes.append(sr)
547 assert sh in stashes
548 src_str.append("%s:%s" % (sh, sr.to_string_raw()))
549 stashes[sh] -= 1
550 if stashes[sh] == 0:
551 free_string.append("free %s\n" % (sh,))
552 free_size += sr.size()
553 stashes.pop(sh)
Doug Zongker62338182014-09-08 08:29:55 -0700554
Tao Bao8fad03e2017-03-01 14:36:26 -0800555 if unstashed_src_ranges:
556 src_str.insert(1, unstashed_src_ranges.to_string_raw())
557 if xf.use_stash:
558 mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
559 src_str.insert(2, mapped_unstashed.to_string_raw())
560 mapped_stashes.append(mapped_unstashed)
Doug Zongker62338182014-09-08 08:29:55 -0700561 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Tao Bao8fad03e2017-03-01 14:36:26 -0800562 else:
563 src_str.insert(1, "-")
564 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Doug Zongker62338182014-09-08 08:29:55 -0700565
Tao Bao8fad03e2017-03-01 14:36:26 -0800566 src_str = " ".join(src_str)
Doug Zongker62338182014-09-08 08:29:55 -0700567
Tao Bao8fad03e2017-03-01 14:36:26 -0800568 # version 3+:
Doug Zongker62338182014-09-08 08:29:55 -0700569 # zero <rangeset>
570 # new <rangeset>
571 # erase <rangeset>
Dan Albert8b72aef2015-03-23 19:13:21 -0700572 # bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
573 # imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
574 # move hash <tgt rangeset> <src_str>
Doug Zongkerfc44a512014-08-26 13:10:25 -0700575
576 tgt_size = xf.tgt_ranges.size()
577
578 if xf.style == "new":
579 assert xf.tgt_ranges
Tianjie Xu37e29302016-06-23 16:10:35 -0700580 assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700581 total += tgt_size
582 elif xf.style == "move":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700583 assert xf.tgt_ranges
584 assert xf.src_ranges.size() == tgt_size
585 if xf.src_ranges != xf.tgt_ranges:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100586 # take into account automatic stashing of overlapping blocks
587 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700588 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100589 if temp_stash_usage > max_stashed_blocks:
590 max_stashed_blocks = temp_stash_usage
591
Tao Baod522bdc2016-04-12 15:53:16 -0700592 self.touched_src_ranges = self.touched_src_ranges.union(
593 xf.src_ranges)
594
Tao Bao8fad03e2017-03-01 14:36:26 -0800595 out.append("%s %s %s %s\n" % (
Sami Tolvanendd67a292014-12-09 16:40:34 +0000596 xf.style,
Tao Bao183e56e2017-03-05 17:05:09 -0800597 xf.tgt_sha1,
Dan Albert8b72aef2015-03-23 19:13:21 -0700598 xf.tgt_ranges.to_string_raw(), src_str))
Tao Bao8fad03e2017-03-01 14:36:26 -0800599 total += tgt_size
600 elif xf.style in ("bsdiff", "imgdiff"):
601 assert xf.tgt_ranges
602 assert xf.src_ranges
603 # take into account automatic stashing of overlapping blocks
604 if xf.src_ranges.overlaps(xf.tgt_ranges):
605 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
606 if temp_stash_usage > max_stashed_blocks:
607 max_stashed_blocks = temp_stash_usage
608
609 self.touched_src_ranges = self.touched_src_ranges.union(xf.src_ranges)
610
611 out.append("%s %d %d %s %s %s %s\n" % (
612 xf.style,
613 xf.patch_start, xf.patch_len,
614 xf.src_sha1,
615 xf.tgt_sha1,
616 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700617 total += tgt_size
618 elif xf.style == "zero":
619 assert xf.tgt_ranges
620 to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
Tianjie Xu37e29302016-06-23 16:10:35 -0700621 assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size()
Tianjie Xubb848c52016-06-21 15:54:09 -0700622 total += to_zero.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700623 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700624 raise ValueError("unknown transfer style '%s'\n" % xf.style)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700625
Sami Tolvanendd67a292014-12-09 16:40:34 +0000626 if free_string:
627 out.append("".join(free_string))
caozhiyuan21b37d82015-10-21 15:14:03 +0800628 stashed_blocks -= free_size
Sami Tolvanendd67a292014-12-09 16:40:34 +0000629
Tao Bao8fad03e2017-03-01 14:36:26 -0800630 if common.OPTIONS.cache_size is not None:
Tao Bao8dcf7382015-05-21 14:09:49 -0700631 # Sanity check: abort if we're going to need more stash space than
632 # the allowed size (cache_size * threshold). There are two purposes
633 # of having a threshold here. a) Part of the cache may have been
634 # occupied by some recovery logs. b) It will buy us some time to deal
635 # with the oversize issue.
636 cache_size = common.OPTIONS.cache_size
637 stash_threshold = common.OPTIONS.stash_threshold
638 max_allowed = cache_size * stash_threshold
Tao Baoe8c68a02017-02-26 10:48:11 -0800639 assert max_stashed_blocks * self.tgt.blocksize <= max_allowed, \
Tao Bao8dcf7382015-05-21 14:09:49 -0700640 'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
641 max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
642 self.tgt.blocksize, max_allowed, cache_size,
643 stash_threshold)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700644
Tao Bao8fad03e2017-03-01 14:36:26 -0800645 self.touched_src_sha1 = self.src.RangeSha1(self.touched_src_ranges)
Tao Baod522bdc2016-04-12 15:53:16 -0700646
Tao Baoe9b61912015-07-09 17:37:49 -0700647 # Zero out extended blocks as a workaround for bug 20881595.
648 if self.tgt.extended:
Tianjie Xu37e29302016-06-23 16:10:35 -0700649 assert (WriteSplitTransfers(out, "zero", self.tgt.extended) ==
Tianjie Xubb848c52016-06-21 15:54:09 -0700650 self.tgt.extended.size())
Tao Baob32d56e2015-09-09 11:55:01 -0700651 total += self.tgt.extended.size()
Tao Baoe9b61912015-07-09 17:37:49 -0700652
653 # We erase all the blocks on the partition that a) don't contain useful
Tao Bao66f1fa62016-05-03 10:02:01 -0700654 # data in the new image; b) will not be touched by dm-verity. Out of those
655 # blocks, we erase the ones that won't be used in this update at the
656 # beginning of an update. The rest would be erased at the end. This is to
657 # work around the eMMC issue observed on some devices, which may otherwise
658 # get starving for clean blocks and thus fail the update. (b/28347095)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700659 all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
Tao Baoe9b61912015-07-09 17:37:49 -0700660 all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
661 new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
Tao Bao66f1fa62016-05-03 10:02:01 -0700662
663 erase_first = new_dontcare.subtract(self.touched_src_ranges)
664 if erase_first:
665 out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))
666
667 erase_last = new_dontcare.subtract(erase_first)
668 if erase_last:
669 out.append("erase %s\n" % (erase_last.to_string_raw(),))
Doug Zongkere985f6f2014-09-09 12:38:47 -0700670
671 out.insert(0, "%d\n" % (self.version,)) # format version number
Tao Baob32d56e2015-09-09 11:55:01 -0700672 out.insert(1, "%d\n" % (total,))
Tao Bao8fad03e2017-03-01 14:36:26 -0800673 # v3+: the number of stash slots is unused.
674 out.insert(2, "0\n")
675 out.insert(3, str(max_stashed_blocks) + "\n")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700676
677 with open(prefix + ".transfer.list", "wb") as f:
678 for i in out:
679 f.write(i)
680
Tao Bao8fad03e2017-03-01 14:36:26 -0800681 self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize
682 OPTIONS = common.OPTIONS
683 if OPTIONS.cache_size is not None:
684 max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
685 print("max stashed blocks: %d (%d bytes), "
686 "limit: %d bytes (%.2f%%)\n" % (
687 max_stashed_blocks, self._max_stashed_size, max_allowed,
688 self._max_stashed_size * 100.0 / max_allowed))
689 else:
690 print("max stashed blocks: %d (%d bytes), limit: <unknown>\n" % (
691 max_stashed_blocks, self._max_stashed_size))
Doug Zongker62338182014-09-08 08:29:55 -0700692
Tao Bao82c47982015-08-17 09:45:13 -0700693 def ReviseStashSize(self):
694 print("Revising stash size...")
Tao Baoe27acfd2016-12-16 11:13:55 -0800695 stash_map = {}
Tao Bao82c47982015-08-17 09:45:13 -0700696
697 # Create the map between a stash and its def/use points. For example, for a
Tao Bao3c5a16d2017-02-13 11:42:50 -0800698 # given stash of (raw_id, sr), stash_map[raw_id] = (sr, def_cmd, use_cmd).
Tao Bao82c47982015-08-17 09:45:13 -0700699 for xf in self.transfers:
700 # Command xf defines (stores) all the stashes in stash_before.
Tao Bao3a2e3502016-12-28 09:14:39 -0800701 for stash_raw_id, sr in xf.stash_before:
702 stash_map[stash_raw_id] = (sr, xf)
Tao Bao82c47982015-08-17 09:45:13 -0700703
704 # Record all the stashes command xf uses.
Tao Bao3a2e3502016-12-28 09:14:39 -0800705 for stash_raw_id, _ in xf.use_stash:
706 stash_map[stash_raw_id] += (xf,)
Tao Bao82c47982015-08-17 09:45:13 -0700707
708 # Compute the maximum blocks available for stash based on /cache size and
709 # the threshold.
710 cache_size = common.OPTIONS.cache_size
711 stash_threshold = common.OPTIONS.stash_threshold
712 max_allowed = cache_size * stash_threshold / self.tgt.blocksize
713
Tao Bao3a2e3502016-12-28 09:14:39 -0800714 # See the comments for 'stashes' in WriteTransfers().
Tao Baoe27acfd2016-12-16 11:13:55 -0800715 stashes = {}
Tao Bao82c47982015-08-17 09:45:13 -0700716 stashed_blocks = 0
Tao Bao9a5caf22015-08-25 15:10:10 -0700717 new_blocks = 0
Tao Bao82c47982015-08-17 09:45:13 -0700718
719 # Now go through all the commands. Compute the required stash size on the
720 # fly. If a command requires excess stash than available, it deletes the
721 # stash by replacing the command that uses the stash with a "new" command
722 # instead.
723 for xf in self.transfers:
724 replaced_cmds = []
725
726 # xf.stash_before generates explicit stash commands.
Tao Bao3a2e3502016-12-28 09:14:39 -0800727 for stash_raw_id, sr in xf.stash_before:
Tao Baoe27acfd2016-12-16 11:13:55 -0800728 # Check the post-command stashed_blocks.
729 stashed_blocks_after = stashed_blocks
Tao Bao8fad03e2017-03-01 14:36:26 -0800730 sh = self.src.RangeSha1(sr)
731 if sh not in stashes:
Tao Baoe27acfd2016-12-16 11:13:55 -0800732 stashed_blocks_after += sr.size()
Tao Baoe27acfd2016-12-16 11:13:55 -0800733
734 if stashed_blocks_after > max_allowed:
Tao Bao82c47982015-08-17 09:45:13 -0700735 # We cannot stash this one for a later command. Find out the command
736 # that will use this stash and replace the command with "new".
Tao Bao3a2e3502016-12-28 09:14:39 -0800737 use_cmd = stash_map[stash_raw_id][2]
Tao Bao82c47982015-08-17 09:45:13 -0700738 replaced_cmds.append(use_cmd)
Tao Bao9a5caf22015-08-25 15:10:10 -0700739 print("%10d %9s %s" % (sr.size(), "explicit", use_cmd))
Tao Bao82c47982015-08-17 09:45:13 -0700740 else:
Tao Bao3c5a16d2017-02-13 11:42:50 -0800741 # Update the stashes map.
Tao Bao8fad03e2017-03-01 14:36:26 -0800742 if sh in stashes:
743 stashes[sh] += 1
Tao Bao3c5a16d2017-02-13 11:42:50 -0800744 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800745 stashes[sh] = 1
Tao Baoe27acfd2016-12-16 11:13:55 -0800746 stashed_blocks = stashed_blocks_after
Tao Bao82c47982015-08-17 09:45:13 -0700747
748 # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
749 # ComputePatches(), they both have the style of "diff".
Tao Bao8fad03e2017-03-01 14:36:26 -0800750 if xf.style == "diff":
Tao Bao82c47982015-08-17 09:45:13 -0700751 assert xf.tgt_ranges and xf.src_ranges
752 if xf.src_ranges.overlaps(xf.tgt_ranges):
753 if stashed_blocks + xf.src_ranges.size() > max_allowed:
754 replaced_cmds.append(xf)
Tao Bao9a5caf22015-08-25 15:10:10 -0700755 print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf))
Tao Bao82c47982015-08-17 09:45:13 -0700756
757 # Replace the commands in replaced_cmds with "new"s.
758 for cmd in replaced_cmds:
759 # It no longer uses any commands in "use_stash". Remove the def points
760 # for all those stashes.
Tao Bao3a2e3502016-12-28 09:14:39 -0800761 for stash_raw_id, sr in cmd.use_stash:
762 def_cmd = stash_map[stash_raw_id][1]
763 assert (stash_raw_id, sr) in def_cmd.stash_before
764 def_cmd.stash_before.remove((stash_raw_id, sr))
Tao Bao82c47982015-08-17 09:45:13 -0700765
Tianjie Xuebe39a02016-01-14 14:12:26 -0800766 # Add up blocks that violates space limit and print total number to
767 # screen later.
768 new_blocks += cmd.tgt_ranges.size()
Tao Bao82c47982015-08-17 09:45:13 -0700769 cmd.ConvertToNew()
770
Tao Bao3a2e3502016-12-28 09:14:39 -0800771 # xf.use_stash may generate free commands.
Tao Bao8fad03e2017-03-01 14:36:26 -0800772 for _, sr in xf.use_stash:
773 sh = self.src.RangeSha1(sr)
774 assert sh in stashes
775 stashes[sh] -= 1
776 if stashes[sh] == 0:
Tao Baoe27acfd2016-12-16 11:13:55 -0800777 stashed_blocks -= sr.size()
Tao Bao8fad03e2017-03-01 14:36:26 -0800778 stashes.pop(sh)
Tao Baoe27acfd2016-12-16 11:13:55 -0800779
Tianjie Xuebe39a02016-01-14 14:12:26 -0800780 num_of_bytes = new_blocks * self.tgt.blocksize
781 print(" Total %d blocks (%d bytes) are packed as new blocks due to "
782 "insufficient cache size." % (new_blocks, num_of_bytes))
Tao Bao304ee272016-12-19 11:01:38 -0800783 return new_blocks
Tao Bao9a5caf22015-08-25 15:10:10 -0700784
Doug Zongkerfc44a512014-08-26 13:10:25 -0700785 def ComputePatches(self, prefix):
786 print("Reticulating splines...")
Tao Bao183e56e2017-03-05 17:05:09 -0800787 diff_queue = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700788 patch_num = 0
789 with open(prefix + ".new.dat", "wb") as new_f:
Tao Bao183e56e2017-03-05 17:05:09 -0800790 for index, xf in enumerate(self.transfers):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700791 if xf.style == "zero":
Tao Bao08c85832016-09-19 22:26:30 -0700792 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
793 print("%10d %10d (%6.2f%%) %7s %s %s" % (
794 tgt_size, tgt_size, 100.0, xf.style, xf.tgt_name,
795 str(xf.tgt_ranges)))
796
Doug Zongkerfc44a512014-08-26 13:10:25 -0700797 elif xf.style == "new":
Tao Bao183e56e2017-03-05 17:05:09 -0800798 self.tgt.WriteRangeDataToFd(xf.tgt_ranges, new_f)
Tao Bao08c85832016-09-19 22:26:30 -0700799 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
800 print("%10d %10d (%6.2f%%) %7s %s %s" % (
801 tgt_size, tgt_size, 100.0, xf.style,
802 xf.tgt_name, str(xf.tgt_ranges)))
803
Doug Zongkerfc44a512014-08-26 13:10:25 -0700804 elif xf.style == "diff":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700805 # We can't compare src and tgt directly because they may have
806 # the same content but be broken up into blocks differently, eg:
807 #
808 # ["he", "llo"] vs ["h", "ello"]
809 #
810 # We want those to compare equal, ideally without having to
811 # actually concatenate the strings (these may be tens of
812 # megabytes).
Tao Bao183e56e2017-03-05 17:05:09 -0800813 if xf.src_sha1 == xf.tgt_sha1:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700814 # These are identical; we don't need to generate a patch,
815 # just issue copy commands on the device.
816 xf.style = "move"
Tianjie Xu25366072017-09-08 17:19:02 -0700817 xf.patch = None
Tao Bao183e56e2017-03-05 17:05:09 -0800818 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao08c85832016-09-19 22:26:30 -0700819 if xf.src_ranges != xf.tgt_ranges:
820 print("%10d %10d (%6.2f%%) %7s %s %s (from %s)" % (
821 tgt_size, tgt_size, 100.0, xf.style,
822 xf.tgt_name if xf.tgt_name == xf.src_name else (
823 xf.tgt_name + " (from " + xf.src_name + ")"),
824 str(xf.tgt_ranges), str(xf.src_ranges)))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700825 else:
Tianjie Xu25366072017-09-08 17:19:02 -0700826 if xf.patch:
827 # We have already generated the patch with imgdiff. Check if the
828 # transfer is intact.
829 assert not self.disable_imgdiff
830 imgdiff = True
Tao Baocb73aed2018-01-31 17:32:40 -0800831 if (xf.src_ranges.extra.get('trimmed') or
832 xf.tgt_ranges.extra.get('trimmed')):
Tianjie Xu25366072017-09-08 17:19:02 -0700833 imgdiff = False
834 xf.patch = None
835 else:
Tao Baocb73aed2018-01-31 17:32:40 -0800836 imgdiff = self.CanUseImgdiff(
837 xf.tgt_name, xf.tgt_ranges, xf.src_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700838 xf.style = "imgdiff" if imgdiff else "bsdiff"
Tao Bao183e56e2017-03-05 17:05:09 -0800839 diff_queue.append((index, imgdiff, patch_num))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700840 patch_num += 1
841
842 else:
843 assert False, "unknown style " + xf.style
844
Tao Bao183e56e2017-03-05 17:05:09 -0800845 if diff_queue:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700846 if self.threads > 1:
847 print("Computing patches (using %d threads)..." % (self.threads,))
848 else:
849 print("Computing patches...")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700850
Tao Bao183e56e2017-03-05 17:05:09 -0800851 diff_total = len(diff_queue)
852 patches = [None] * diff_total
Tianjie Xub59c17f2016-10-28 17:55:53 -0700853 error_messages = []
Tao Baob937ead2017-10-19 16:51:53 -0700854 warning_messages = []
Tao Bao33635b12017-03-12 13:02:51 -0700855 if sys.stdout.isatty():
856 global diff_done
857 diff_done = 0
Doug Zongkerfc44a512014-08-26 13:10:25 -0700858
Tao Bao183e56e2017-03-05 17:05:09 -0800859 # Using multiprocessing doesn't give additional benefits, due to the
860 # pattern of the code. The diffing work is done by subprocess.call, which
861 # already runs in a separate process (not affected much by the GIL -
862 # Global Interpreter Lock). Using multiprocess also requires either a)
863 # writing the diff input files in the main process before forking, or b)
864 # reopening the image file (SparseImage) in the worker processes. Doing
865 # neither of them further improves the performance.
Doug Zongkerfc44a512014-08-26 13:10:25 -0700866 lock = threading.Lock()
867 def diff_worker():
868 while True:
869 with lock:
Tao Bao183e56e2017-03-05 17:05:09 -0800870 if not diff_queue:
Dan Albert8b72aef2015-03-23 19:13:21 -0700871 return
Tao Bao183e56e2017-03-05 17:05:09 -0800872 xf_index, imgdiff, patch_index = diff_queue.pop()
873
874 xf = self.transfers[xf_index]
Tianjie Xu25366072017-09-08 17:19:02 -0700875 patch = xf.patch
876 if not patch:
877 src_ranges = xf.src_ranges
878 tgt_ranges = xf.tgt_ranges
Tao Bao183e56e2017-03-05 17:05:09 -0800879
Tianjie Xudf1166e2018-01-27 17:35:41 -0800880 src_file = common.MakeTempFile(prefix="src-")
881 with open(src_file, "wb") as fd:
882 self.src.WriteRangeDataToFd(src_ranges, fd)
Tianjie Xu25366072017-09-08 17:19:02 -0700883
Tianjie Xudf1166e2018-01-27 17:35:41 -0800884 tgt_file = common.MakeTempFile(prefix="tgt-")
885 with open(tgt_file, "wb") as fd:
886 self.tgt.WriteRangeDataToFd(tgt_ranges, fd)
Tianjie Xu25366072017-09-08 17:19:02 -0700887
888 message = []
889 try:
890 patch = compute_patch(src_file, tgt_file, imgdiff)
891 except ValueError as e:
892 message.append(
893 "Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (
894 "imgdiff" if imgdiff else "bsdiff",
895 xf.tgt_name if xf.tgt_name == xf.src_name else
896 xf.tgt_name + " (from " + xf.src_name + ")",
897 xf.tgt_ranges, xf.src_ranges, e.message))
898 # TODO(b/68016761): Better handle the holes in mke2fs created
899 # images.
900 if imgdiff:
901 try:
902 patch = compute_patch(src_file, tgt_file, imgdiff=False)
903 message.append(
904 "Fell back and generated with bsdiff instead for %s" % (
905 xf.tgt_name,))
906 xf.style = "bsdiff"
907 with lock:
908 warning_messages.extend(message)
909 del message[:]
910 except ValueError as e:
911 message.append(
912 "Also failed to generate with bsdiff for %s:\n%s" % (
913 xf.tgt_name, e.message))
914
915 if message:
916 with lock:
917 error_messages.extend(message)
Tao Bao183e56e2017-03-05 17:05:09 -0800918
919 with lock:
920 patches[patch_index] = (xf_index, patch)
921 if sys.stdout.isatty():
Tao Bao33635b12017-03-12 13:02:51 -0700922 global diff_done
923 diff_done += 1
924 progress = diff_done * 100 / diff_total
Tao Bao183e56e2017-03-05 17:05:09 -0800925 # '\033[K' is to clear to EOL.
926 print(' [%d%%] %s\033[K' % (progress, xf.tgt_name), end='\r')
927 sys.stdout.flush()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700928
929 threads = [threading.Thread(target=diff_worker)
Dan Albert8b72aef2015-03-23 19:13:21 -0700930 for _ in range(self.threads)]
Doug Zongkerfc44a512014-08-26 13:10:25 -0700931 for th in threads:
932 th.start()
933 while threads:
934 threads.pop().join()
Tao Bao183e56e2017-03-05 17:05:09 -0800935
936 if sys.stdout.isatty():
937 print('\n')
Tianjie Xub59c17f2016-10-28 17:55:53 -0700938
Tao Baob937ead2017-10-19 16:51:53 -0700939 if warning_messages:
940 print('WARNING:')
941 print('\n'.join(warning_messages))
942 print('\n\n\n')
943
Tianjie Xub59c17f2016-10-28 17:55:53 -0700944 if error_messages:
Tao Baob937ead2017-10-19 16:51:53 -0700945 print('ERROR:')
Tianjie Xub59c17f2016-10-28 17:55:53 -0700946 print('\n'.join(error_messages))
Tao Baob937ead2017-10-19 16:51:53 -0700947 print('\n\n\n')
Tianjie Xub59c17f2016-10-28 17:55:53 -0700948 sys.exit(1)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700949 else:
950 patches = []
951
Tao Bao183e56e2017-03-05 17:05:09 -0800952 offset = 0
953 with open(prefix + ".patch.dat", "wb") as patch_fd:
954 for index, patch in patches:
955 xf = self.transfers[index]
Doug Zongkerfc44a512014-08-26 13:10:25 -0700956 xf.patch_len = len(patch)
Tao Bao183e56e2017-03-05 17:05:09 -0800957 xf.patch_start = offset
958 offset += xf.patch_len
959 patch_fd.write(patch)
960
961 if common.OPTIONS.verbose:
962 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
963 print("%10d %10d (%6.2f%%) %7s %s %s %s" % (
964 xf.patch_len, tgt_size, xf.patch_len * 100.0 / tgt_size,
965 xf.style,
966 xf.tgt_name if xf.tgt_name == xf.src_name else (
967 xf.tgt_name + " (from " + xf.src_name + ")"),
968 xf.tgt_ranges, xf.src_ranges))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700969
Tianjie Xu8a7ed9f2018-01-23 14:06:11 -0800970 def AssertSha1Good(self):
971 """Check the SHA-1 of the src & tgt blocks in the transfer list.
972
973 Double check the SHA-1 value to avoid the issue in b/71908713, where
974 SparseImage.RangeSha1() messed up with the hash calculation in multi-thread
975 environment. That specific problem has been fixed by protecting the
976 underlying generator function 'SparseImage._GetRangeData()' with lock.
977 """
978 for xf in self.transfers:
979 tgt_sha1 = self.tgt.RangeSha1(xf.tgt_ranges)
980 assert xf.tgt_sha1 == tgt_sha1
981 if xf.style == "diff":
982 src_sha1 = self.src.RangeSha1(xf.src_ranges)
983 assert xf.src_sha1 == src_sha1
984
Doug Zongkerfc44a512014-08-26 13:10:25 -0700985 def AssertSequenceGood(self):
986 # Simulate the sequences of transfers we will output, and check that:
987 # - we never read a block after writing it, and
988 # - we write every block we care about exactly once.
989
990 # Start with no blocks having been touched yet.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800991 touched = array.array("B", "\0" * self.tgt.total_blocks)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700992
993 # Imagine processing the transfers in order.
994 for xf in self.transfers:
995 # Check that the input blocks for this transfer haven't yet been touched.
Doug Zongker62338182014-09-08 08:29:55 -0700996
997 x = xf.src_ranges
Tao Bao8fad03e2017-03-01 14:36:26 -0800998 for _, sr in xf.use_stash:
999 x = x.subtract(sr)
Doug Zongker62338182014-09-08 08:29:55 -07001000
Doug Zongker6ab2a502016-02-09 08:28:09 -08001001 for s, e in x:
Tao Baoff75c232016-03-04 15:23:34 -08001002 # Source image could be larger. Don't check the blocks that are in the
1003 # source image only. Since they are not in 'touched', and won't ever
1004 # be touched.
1005 for i in range(s, min(e, self.tgt.total_blocks)):
Doug Zongker6ab2a502016-02-09 08:28:09 -08001006 assert touched[i] == 0
1007
1008 # Check that the output blocks for this transfer haven't yet
1009 # been touched, and touch all the blocks written by this
1010 # transfer.
1011 for s, e in xf.tgt_ranges:
1012 for i in range(s, e):
1013 assert touched[i] == 0
1014 touched[i] = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -07001015
1016 # Check that we've written every target block.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001017 for s, e in self.tgt.care_map:
1018 for i in range(s, e):
1019 assert touched[i] == 1
Doug Zongkerfc44a512014-08-26 13:10:25 -07001020
Doug Zongker62338182014-09-08 08:29:55 -07001021 def ImproveVertexSequence(self):
1022 print("Improving vertex order...")
1023
1024 # At this point our digraph is acyclic; we reversed any edges that
1025 # were backwards in the heuristically-generated sequence. The
1026 # previously-generated order is still acceptable, but we hope to
1027 # find a better order that needs less memory for stashed data.
1028 # Now we do a topological sort to generate a new vertex order,
1029 # using a greedy algorithm to choose which vertex goes next
1030 # whenever we have a choice.
1031
1032 # Make a copy of the edge set; this copy will get destroyed by the
1033 # algorithm.
1034 for xf in self.transfers:
1035 xf.incoming = xf.goes_after.copy()
1036 xf.outgoing = xf.goes_before.copy()
1037
1038 L = [] # the new vertex order
1039
1040 # S is the set of sources in the remaining graph; we always choose
1041 # the one that leaves the least amount of stashed data after it's
1042 # executed.
1043 S = [(u.NetStashChange(), u.order, u) for u in self.transfers
1044 if not u.incoming]
1045 heapq.heapify(S)
1046
1047 while S:
1048 _, _, xf = heapq.heappop(S)
1049 L.append(xf)
1050 for u in xf.outgoing:
1051 del u.incoming[xf]
1052 if not u.incoming:
1053 heapq.heappush(S, (u.NetStashChange(), u.order, u))
1054
1055 # if this fails then our graph had a cycle.
1056 assert len(L) == len(self.transfers)
1057
1058 self.transfers = L
1059 for i, xf in enumerate(L):
1060 xf.order = i
1061
Doug Zongkerfc44a512014-08-26 13:10:25 -07001062 def RemoveBackwardEdges(self):
1063 print("Removing backward edges...")
1064 in_order = 0
1065 out_of_order = 0
1066 lost_source = 0
1067
1068 for xf in self.transfers:
Doug Zongkerfc44a512014-08-26 13:10:25 -07001069 lost = 0
1070 size = xf.src_ranges.size()
1071 for u in xf.goes_before:
1072 # xf should go before u
1073 if xf.order < u.order:
1074 # it does, hurray!
Doug Zongker62338182014-09-08 08:29:55 -07001075 in_order += 1
Doug Zongkerfc44a512014-08-26 13:10:25 -07001076 else:
1077 # it doesn't, boo. trim the blocks that u writes from xf's
1078 # source, so that xf can go after u.
Doug Zongker62338182014-09-08 08:29:55 -07001079 out_of_order += 1
Doug Zongkerfc44a512014-08-26 13:10:25 -07001080 assert xf.src_ranges.overlaps(u.tgt_ranges)
1081 xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges)
Tao Baocb73aed2018-01-31 17:32:40 -08001082 xf.src_ranges.extra['trimmed'] = True
Doug Zongkerfc44a512014-08-26 13:10:25 -07001083
1084 if xf.style == "diff" and not xf.src_ranges:
1085 # nothing left to diff from; treat as new data
1086 xf.style = "new"
1087
1088 lost = size - xf.src_ranges.size()
1089 lost_source += lost
Doug Zongkerfc44a512014-08-26 13:10:25 -07001090
1091 print((" %d/%d dependencies (%.2f%%) were violated; "
1092 "%d source blocks removed.") %
1093 (out_of_order, in_order + out_of_order,
1094 (out_of_order * 100.0 / (in_order + out_of_order))
1095 if (in_order + out_of_order) else 0.0,
1096 lost_source))
1097
Doug Zongker62338182014-09-08 08:29:55 -07001098 def ReverseBackwardEdges(self):
Tao Bao3a2e3502016-12-28 09:14:39 -08001099 """Reverse unsatisfying edges and compute pairs of stashed blocks.
1100
1101 For each transfer, make sure it properly stashes the blocks it touches and
1102 will be used by later transfers. It uses pairs of (stash_raw_id, range) to
1103 record the blocks to be stashed. 'stash_raw_id' is an id that uniquely
1104 identifies each pair. Note that for the same range (e.g. RangeSet("1-5")),
1105 it is possible to have multiple pairs with different 'stash_raw_id's. Each
1106 'stash_raw_id' will be consumed by one transfer. In BBOTA v3+, identical
1107 blocks will be written to the same stash slot in WriteTransfers().
1108 """
1109
Doug Zongker62338182014-09-08 08:29:55 -07001110 print("Reversing backward edges...")
1111 in_order = 0
1112 out_of_order = 0
Tao Bao3a2e3502016-12-28 09:14:39 -08001113 stash_raw_id = 0
Doug Zongker62338182014-09-08 08:29:55 -07001114 stash_size = 0
1115
1116 for xf in self.transfers:
Doug Zongker62338182014-09-08 08:29:55 -07001117 for u in xf.goes_before.copy():
1118 # xf should go before u
1119 if xf.order < u.order:
1120 # it does, hurray!
1121 in_order += 1
1122 else:
1123 # it doesn't, boo. modify u to stash the blocks that it
1124 # writes that xf wants to read, and then require u to go
1125 # before xf.
1126 out_of_order += 1
1127
1128 overlap = xf.src_ranges.intersect(u.tgt_ranges)
1129 assert overlap
1130
Tao Bao3a2e3502016-12-28 09:14:39 -08001131 u.stash_before.append((stash_raw_id, overlap))
1132 xf.use_stash.append((stash_raw_id, overlap))
1133 stash_raw_id += 1
Doug Zongker62338182014-09-08 08:29:55 -07001134 stash_size += overlap.size()
1135
1136 # reverse the edge direction; now xf must go after u
1137 del xf.goes_before[u]
1138 del u.goes_after[xf]
1139 xf.goes_after[u] = None # value doesn't matter
1140 u.goes_before[xf] = None
1141
1142 print((" %d/%d dependencies (%.2f%%) were violated; "
1143 "%d source blocks stashed.") %
1144 (out_of_order, in_order + out_of_order,
1145 (out_of_order * 100.0 / (in_order + out_of_order))
1146 if (in_order + out_of_order) else 0.0,
1147 stash_size))
1148
Doug Zongkerfc44a512014-08-26 13:10:25 -07001149 def FindVertexSequence(self):
1150 print("Finding vertex sequence...")
1151
1152 # This is based on "A Fast & Effective Heuristic for the Feedback
1153 # Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
1154 # it as starting with the digraph G and moving all the vertices to
1155 # be on a horizontal line in some order, trying to minimize the
1156 # number of edges that end up pointing to the left. Left-pointing
1157 # edges will get removed to turn the digraph into a DAG. In this
1158 # case each edge has a weight which is the number of source blocks
1159 # we'll lose if that edge is removed; we try to minimize the total
1160 # weight rather than just the number of edges.
1161
1162 # Make a copy of the edge set; this copy will get destroyed by the
1163 # algorithm.
1164 for xf in self.transfers:
1165 xf.incoming = xf.goes_after.copy()
1166 xf.outgoing = xf.goes_before.copy()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001167 xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())
Doug Zongkerfc44a512014-08-26 13:10:25 -07001168
1169 # We use an OrderedDict instead of just a set so that the output
1170 # is repeatable; otherwise it would depend on the hash values of
1171 # the transfer objects.
1172 G = OrderedDict()
1173 for xf in self.transfers:
1174 G[xf] = None
1175 s1 = deque() # the left side of the sequence, built from left to right
1176 s2 = deque() # the right side of the sequence, built from right to left
1177
Doug Zongker6ab2a502016-02-09 08:28:09 -08001178 heap = []
1179 for xf in self.transfers:
1180 xf.heap_item = HeapItem(xf)
1181 heap.append(xf.heap_item)
1182 heapq.heapify(heap)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001183
Tao Bao33482282016-10-24 16:49:08 -07001184 # Use OrderedDict() instead of set() to preserve the insertion order. Need
1185 # to use 'sinks[key] = None' to add key into the set. sinks will look like
1186 # { key1: None, key2: None, ... }.
1187 sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing)
1188 sources = OrderedDict.fromkeys(u for u in G if not u.incoming)
Doug Zongker6ab2a502016-02-09 08:28:09 -08001189
1190 def adjust_score(iu, delta):
1191 iu.score += delta
1192 iu.heap_item.clear()
1193 iu.heap_item = HeapItem(iu)
1194 heapq.heappush(heap, iu.heap_item)
1195
1196 while G:
Doug Zongkerfc44a512014-08-26 13:10:25 -07001197 # Put all sinks at the end of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001198 while sinks:
Tao Bao33482282016-10-24 16:49:08 -07001199 new_sinks = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001200 for u in sinks:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001201 if u not in G: continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001202 s2.appendleft(u)
1203 del G[u]
1204 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001205 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001206 if not iu.outgoing:
1207 new_sinks[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001208 sinks = new_sinks
Doug Zongkerfc44a512014-08-26 13:10:25 -07001209
1210 # Put all the sources at the beginning of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001211 while sources:
Tao Bao33482282016-10-24 16:49:08 -07001212 new_sources = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001213 for u in sources:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001214 if u not in G: continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001215 s1.append(u)
1216 del G[u]
1217 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001218 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001219 if not iu.incoming:
1220 new_sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001221 sources = new_sources
Doug Zongkerfc44a512014-08-26 13:10:25 -07001222
Doug Zongker6ab2a502016-02-09 08:28:09 -08001223 if not G: break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001224
1225 # Find the "best" vertex to put next. "Best" is the one that
1226 # maximizes the net difference in source blocks saved we get by
1227 # pretending it's a source rather than a sink.
1228
Doug Zongker6ab2a502016-02-09 08:28:09 -08001229 while True:
1230 u = heapq.heappop(heap)
1231 if u and u.item in G:
1232 u = u.item
1233 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001234
Doug Zongkerfc44a512014-08-26 13:10:25 -07001235 s1.append(u)
1236 del G[u]
1237 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001238 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001239 if not iu.incoming:
1240 sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001241
Doug Zongkerfc44a512014-08-26 13:10:25 -07001242 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001243 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001244 if not iu.outgoing:
1245 sinks[iu] = None
Doug Zongkerfc44a512014-08-26 13:10:25 -07001246
1247 # Now record the sequence in the 'order' field of each transfer,
1248 # and by rearranging self.transfers to be in the chosen sequence.
1249
1250 new_transfers = []
1251 for x in itertools.chain(s1, s2):
1252 x.order = len(new_transfers)
1253 new_transfers.append(x)
1254 del x.incoming
1255 del x.outgoing
1256
1257 self.transfers = new_transfers
1258
1259 def GenerateDigraph(self):
1260 print("Generating digraph...")
Doug Zongker6ab2a502016-02-09 08:28:09 -08001261
1262 # Each item of source_ranges will be:
1263 # - None, if that block is not used as a source,
Tao Bao33482282016-10-24 16:49:08 -07001264 # - an ordered set of transfers.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001265 source_ranges = []
1266 for b in self.transfers:
1267 for s, e in b.src_ranges:
1268 if e > len(source_ranges):
1269 source_ranges.extend([None] * (e-len(source_ranges)))
1270 for i in range(s, e):
1271 if source_ranges[i] is None:
Tao Bao33482282016-10-24 16:49:08 -07001272 source_ranges[i] = OrderedDict.fromkeys([b])
Doug Zongker6ab2a502016-02-09 08:28:09 -08001273 else:
Tao Bao33482282016-10-24 16:49:08 -07001274 source_ranges[i][b] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001275
Doug Zongkerfc44a512014-08-26 13:10:25 -07001276 for a in self.transfers:
Tao Bao33482282016-10-24 16:49:08 -07001277 intersections = OrderedDict()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001278 for s, e in a.tgt_ranges:
1279 for i in range(s, e):
1280 if i >= len(source_ranges): break
Tao Bao33482282016-10-24 16:49:08 -07001281 # Add all the Transfers in source_ranges[i] to the (ordered) set.
1282 if source_ranges[i] is not None:
1283 for j in source_ranges[i]:
1284 intersections[j] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001285
1286 for b in intersections:
1287 if a is b: continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001288
1289 # If the blocks written by A are read by B, then B needs to go before A.
1290 i = a.tgt_ranges.intersect(b.src_ranges)
1291 if i:
Doug Zongkerab7ca1d2014-08-26 10:40:28 -07001292 if b.src_name == "__ZERO":
1293 # the cost of removing source blocks for the __ZERO domain
1294 # is (nearly) zero.
1295 size = 0
1296 else:
1297 size = i.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001298 b.goes_before[a] = size
1299 a.goes_after[b] = size
1300
1301 def FindTransfers(self):
Tao Bao9a5caf22015-08-25 15:10:10 -07001302 """Parse the file_map to generate all the transfers."""
1303
Tianjie Xu41390d42017-11-22 11:35:18 -08001304 def AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
1305 src_ranges, style, by_id):
1306 """Add one or multiple Transfer()s by splitting large files.
1307
1308 For BBOTA v3, we need to stash source blocks for resumable feature.
1309 However, with the growth of file size and the shrink of the cache
1310 partition source blocks are too large to be stashed. If a file occupies
1311 too many blocks, we split it into smaller pieces by getting multiple
1312 Transfer()s.
1313
1314 The downside is that after splitting, we may increase the package size
1315 since the split pieces don't align well. According to our experiments,
1316 1/8 of the cache size as the per-piece limit appears to be optimal.
1317 Compared to the fixed 1024-block limit, it reduces the overall package
1318 size by 30% for volantis, and 20% for angler and bullhead."""
1319
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001320 pieces = 0
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001321 while (tgt_ranges.size() > max_blocks_per_transfer and
1322 src_ranges.size() > max_blocks_per_transfer):
Tao Bao9a5caf22015-08-25 15:10:10 -07001323 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1324 src_split_name = "%s-%d" % (src_name, pieces)
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001325 tgt_first = tgt_ranges.first(max_blocks_per_transfer)
1326 src_first = src_ranges.first(max_blocks_per_transfer)
1327
Tao Bao183e56e2017-03-05 17:05:09 -08001328 Transfer(tgt_split_name, src_split_name, tgt_first, src_first,
1329 self.tgt.RangeSha1(tgt_first), self.src.RangeSha1(src_first),
1330 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001331
1332 tgt_ranges = tgt_ranges.subtract(tgt_first)
1333 src_ranges = src_ranges.subtract(src_first)
1334 pieces += 1
1335
1336 # Handle remaining blocks.
1337 if tgt_ranges.size() or src_ranges.size():
1338 # Must be both non-empty.
1339 assert tgt_ranges.size() and src_ranges.size()
1340 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1341 src_split_name = "%s-%d" % (src_name, pieces)
Tao Bao183e56e2017-03-05 17:05:09 -08001342 Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges,
1343 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1344 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001345
Tianjie Xu41390d42017-11-22 11:35:18 -08001346 def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges, style,
1347 by_id):
1348 """Find all the zip files and split the others with a fixed chunk size.
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001349
Tianjie Xu41390d42017-11-22 11:35:18 -08001350 This function will construct a list of zip archives, which will later be
1351 split by imgdiff to reduce the final patch size. For the other files,
1352 we will plainly split them based on a fixed chunk size with the potential
1353 patch size penalty.
1354 """
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001355
1356 assert style == "diff"
1357
1358 # Change nothing for small files.
1359 if (tgt_ranges.size() <= max_blocks_per_transfer and
1360 src_ranges.size() <= max_blocks_per_transfer):
1361 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1362 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1363 style, by_id)
1364 return
1365
Tao Baocb73aed2018-01-31 17:32:40 -08001366 # Split large APKs with imgdiff, if possible. We're intentionally checking
1367 # file types one more time (CanUseImgdiff() checks that as well), before
1368 # calling the costly RangeSha1()s.
1369 if (self.FileTypeSupportedByImgdiff(tgt_name) and
1370 self.tgt.RangeSha1(tgt_ranges) != self.src.RangeSha1(src_ranges)):
Tao Bao294651d2018-02-08 23:21:52 -08001371 if self.CanUseImgdiff(tgt_name, tgt_ranges, src_ranges, True):
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001372 large_apks.append((tgt_name, src_name, tgt_ranges, src_ranges))
1373 return
1374
Tianjie Xu41390d42017-11-22 11:35:18 -08001375 AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
1376 src_ranges, style, by_id)
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001377
Tao Bao08c85832016-09-19 22:26:30 -07001378 def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
1379 split=False):
1380 """Wrapper function for adding a Transfer()."""
1381
1382 # We specialize diff transfers only (which covers bsdiff/imgdiff/move);
1383 # otherwise add the Transfer() as is.
1384 if style != "diff" or not split:
Tao Bao183e56e2017-03-05 17:05:09 -08001385 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1386 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1387 style, by_id)
Tao Bao08c85832016-09-19 22:26:30 -07001388 return
1389
1390 # Handle .odex files specially to analyze the block-wise difference. If
1391 # most of the blocks are identical with only few changes (e.g. header),
1392 # we will patch the changed blocks only. This avoids stashing unchanged
1393 # blocks while patching. We limit the analysis to files without size
1394 # changes only. This is to avoid sacrificing the OTA generation cost too
1395 # much.
1396 if (tgt_name.split(".")[-1].lower() == 'odex' and
1397 tgt_ranges.size() == src_ranges.size()):
1398
1399 # 0.5 threshold can be further tuned. The tradeoff is: if only very
1400 # few blocks remain identical, we lose the opportunity to use imgdiff
1401 # that may have better compression ratio than bsdiff.
1402 crop_threshold = 0.5
1403
1404 tgt_skipped = RangeSet()
1405 src_skipped = RangeSet()
1406 tgt_size = tgt_ranges.size()
1407 tgt_changed = 0
1408 for src_block, tgt_block in zip(src_ranges.next_item(),
1409 tgt_ranges.next_item()):
1410 src_rs = RangeSet(str(src_block))
1411 tgt_rs = RangeSet(str(tgt_block))
1412 if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):
1413 tgt_skipped = tgt_skipped.union(tgt_rs)
1414 src_skipped = src_skipped.union(src_rs)
1415 else:
1416 tgt_changed += tgt_rs.size()
1417
1418 # Terminate early if no clear sign of benefits.
1419 if tgt_changed > tgt_size * crop_threshold:
1420 break
1421
1422 if tgt_changed < tgt_size * crop_threshold:
1423 assert tgt_changed + tgt_skipped.size() == tgt_size
1424 print('%10d %10d (%6.2f%%) %s' % (tgt_skipped.size(), tgt_size,
1425 tgt_skipped.size() * 100.0 / tgt_size, tgt_name))
Tianjie Xu41390d42017-11-22 11:35:18 -08001426 AddSplitTransfers(
Tao Bao08c85832016-09-19 22:26:30 -07001427 "%s-skipped" % (tgt_name,),
1428 "%s-skipped" % (src_name,),
1429 tgt_skipped, src_skipped, style, by_id)
1430
1431 # Intentionally change the file extension to avoid being imgdiff'd as
1432 # the files are no longer in their original format.
1433 tgt_name = "%s-cropped" % (tgt_name,)
1434 src_name = "%s-cropped" % (src_name,)
1435 tgt_ranges = tgt_ranges.subtract(tgt_skipped)
1436 src_ranges = src_ranges.subtract(src_skipped)
1437
1438 # Possibly having no changed blocks.
1439 if not tgt_ranges:
1440 return
1441
1442 # Add the transfer(s).
Tianjie Xu41390d42017-11-22 11:35:18 -08001443 AddSplitTransfers(
Tao Bao08c85832016-09-19 22:26:30 -07001444 tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1445
Tianjie Xu25366072017-09-08 17:19:02 -07001446 def ParseAndValidateSplitInfo(patch_size, tgt_ranges, src_ranges,
1447 split_info):
1448 """Parse the split_info and return a list of info tuples.
1449
1450 Args:
1451 patch_size: total size of the patch file.
1452 tgt_ranges: Ranges of the target file within the original image.
1453 src_ranges: Ranges of the source file within the original image.
1454 split_info format:
1455 imgdiff version#
1456 count of pieces
1457 <patch_size_1> <tgt_size_1> <src_ranges_1>
1458 ...
1459 <patch_size_n> <tgt_size_n> <src_ranges_n>
1460
1461 Returns:
1462 [patch_start, patch_len, split_tgt_ranges, split_src_ranges]
1463 """
1464
1465 version = int(split_info[0])
1466 assert version == 2
1467 count = int(split_info[1])
1468 assert len(split_info) - 2 == count
1469
1470 split_info_list = []
1471 patch_start = 0
1472 tgt_remain = copy.deepcopy(tgt_ranges)
1473 # each line has the format <patch_size>, <tgt_size>, <src_ranges>
1474 for line in split_info[2:]:
1475 info = line.split()
1476 assert len(info) == 3
1477 patch_length = int(info[0])
1478
1479 split_tgt_size = int(info[1])
1480 assert split_tgt_size % 4096 == 0
1481 assert split_tgt_size / 4096 <= tgt_remain.size()
1482 split_tgt_ranges = tgt_remain.first(split_tgt_size / 4096)
1483 tgt_remain = tgt_remain.subtract(split_tgt_ranges)
1484
1485 # Find the split_src_ranges within the image file from its relative
1486 # position in file.
1487 split_src_indices = RangeSet.parse_raw(info[2])
1488 split_src_ranges = RangeSet()
1489 for r in split_src_indices:
1490 curr_range = src_ranges.first(r[1]).subtract(src_ranges.first(r[0]))
1491 assert not split_src_ranges.overlaps(curr_range)
1492 split_src_ranges = split_src_ranges.union(curr_range)
1493
1494 split_info_list.append((patch_start, patch_length,
1495 split_tgt_ranges, split_src_ranges))
1496 patch_start += patch_length
1497
1498 # Check that the sizes of all the split pieces add up to the final file
1499 # size for patch and target.
1500 assert tgt_remain.size() == 0
1501 assert patch_start == patch_size
1502 return split_info_list
1503
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001504 def SplitLargeApks():
1505 """Split the large apks files.
Tianjie Xu25366072017-09-08 17:19:02 -07001506
1507 Example: Chrome.apk will be split into
1508 src-0: Chrome.apk-0, tgt-0: Chrome.apk-0
1509 src-1: Chrome.apk-1, tgt-1: Chrome.apk-1
1510 ...
1511
1512 After the split, the target pieces are continuous and block aligned; and
1513 the source pieces are mutually exclusive. During the split, we also
1514 generate and save the image patch between src-X & tgt-X. This patch will
1515 be valid because the block ranges of src-X & tgt-X will always stay the
1516 same afterwards; but there's a chance we don't use the patch if we
1517 convert the "diff" command into "new" or "move" later.
Tianjie Xu41390d42017-11-22 11:35:18 -08001518
1519 The split will be attempted by calling imgdiff, which expects the input
1520 files to be valid zip archives. If imgdiff fails for some reason (i.e.
1521 holes in the APK file), we will fall back to split the failed APKs into
1522 fixed size chunks.
Tianjie Xu25366072017-09-08 17:19:02 -07001523 """
1524
1525 while True:
1526 with transfer_lock:
1527 if not large_apks:
1528 return
1529 tgt_name, src_name, tgt_ranges, src_ranges = large_apks.pop(0)
1530
1531 src_file = common.MakeTempFile(prefix="src-")
1532 tgt_file = common.MakeTempFile(prefix="tgt-")
Tianjie Xudf1166e2018-01-27 17:35:41 -08001533 with open(src_file, "wb") as src_fd:
1534 self.src.WriteRangeDataToFd(src_ranges, src_fd)
1535 with open(tgt_file, "wb") as tgt_fd:
1536 self.tgt.WriteRangeDataToFd(tgt_ranges, tgt_fd)
Tianjie Xu25366072017-09-08 17:19:02 -07001537
1538 patch_file = common.MakeTempFile(prefix="patch-")
1539 patch_info_file = common.MakeTempFile(prefix="split_info-")
1540 cmd = ["imgdiff", "-z",
1541 "--block-limit={}".format(max_blocks_per_transfer),
1542 "--split-info=" + patch_info_file,
1543 src_file, tgt_file, patch_file]
1544 p = common.Run(cmd, stdout=subprocess.PIPE)
1545 p.communicate()
Tianjie Xu25366072017-09-08 17:19:02 -07001546 if p.returncode != 0:
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001547 print("Failed to create patch between {} and {},"
1548 " falling back to bsdiff".format(src_name, tgt_name))
1549 with transfer_lock:
Tianjie Xu41390d42017-11-22 11:35:18 -08001550 AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name,
1551 tgt_ranges, src_ranges,
1552 "diff", self.transfers)
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001553 continue
Tianjie Xu25366072017-09-08 17:19:02 -07001554
1555 with open(patch_info_file) as patch_info:
1556 lines = patch_info.readlines()
1557
1558 patch_size_total = os.path.getsize(patch_file)
1559 split_info_list = ParseAndValidateSplitInfo(patch_size_total,
1560 tgt_ranges, src_ranges,
1561 lines)
1562 for index, (patch_start, patch_length, split_tgt_ranges,
1563 split_src_ranges) in enumerate(split_info_list):
1564 with open(patch_file) as f:
1565 f.seek(patch_start)
1566 patch_content = f.read(patch_length)
1567
1568 split_src_name = "{}-{}".format(src_name, index)
1569 split_tgt_name = "{}-{}".format(tgt_name, index)
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001570 split_large_apks.append((split_tgt_name,
1571 split_src_name,
1572 split_tgt_ranges,
1573 split_src_ranges,
1574 patch_content))
Tianjie Xu25366072017-09-08 17:19:02 -07001575
Tao Bao08c85832016-09-19 22:26:30 -07001576 print("Finding transfers...")
1577
Tianjie Xu25366072017-09-08 17:19:02 -07001578 large_apks = []
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001579 split_large_apks = []
Tianjie Xu25366072017-09-08 17:19:02 -07001580 cache_size = common.OPTIONS.cache_size
1581 split_threshold = 0.125
1582 max_blocks_per_transfer = int(cache_size * split_threshold /
1583 self.tgt.blocksize)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001584 empty = RangeSet()
Tianjie Xu20a86cd2018-01-12 12:21:00 -08001585 for tgt_fn, tgt_ranges in sorted(self.tgt.file_map.items()):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001586 if tgt_fn == "__ZERO":
1587 # the special "__ZERO" domain is all the blocks not contained
1588 # in any file and that are filled with zeros. We have a
1589 # special transfer style for zero blocks.
1590 src_ranges = self.src.file_map.get("__ZERO", empty)
Tao Bao9a5caf22015-08-25 15:10:10 -07001591 AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
1592 "zero", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001593 continue
1594
Tao Baoff777812015-05-12 11:42:31 -07001595 elif tgt_fn == "__COPY":
1596 # "__COPY" domain includes all the blocks not contained in any
1597 # file and that need to be copied unconditionally to the target.
Tao Bao9a5caf22015-08-25 15:10:10 -07001598 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Tao Baoff777812015-05-12 11:42:31 -07001599 continue
1600
Doug Zongkerfc44a512014-08-26 13:10:25 -07001601 elif tgt_fn in self.src.file_map:
1602 # Look for an exact pathname match in the source.
Tao Bao9a5caf22015-08-25 15:10:10 -07001603 AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001604 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001605 continue
1606
1607 b = os.path.basename(tgt_fn)
1608 if b in self.src_basenames:
1609 # Look for an exact basename match in the source.
1610 src_fn = self.src_basenames[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001611 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001612 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001613 continue
1614
1615 b = re.sub("[0-9]+", "#", b)
1616 if b in self.src_numpatterns:
1617 # Look for a 'number pattern' match (a basename match after
1618 # all runs of digits are replaced by "#"). (This is useful
1619 # for .so files that contain version numbers in the filename
1620 # that get bumped.)
1621 src_fn = self.src_numpatterns[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001622 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001623 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001624 continue
1625
Tao Bao9a5caf22015-08-25 15:10:10 -07001626 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001627
Tianjie Xu25366072017-09-08 17:19:02 -07001628 transfer_lock = threading.Lock()
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001629 threads = [threading.Thread(target=SplitLargeApks)
Tianjie Xu25366072017-09-08 17:19:02 -07001630 for _ in range(self.threads)]
1631 for th in threads:
1632 th.start()
1633 while threads:
1634 threads.pop().join()
1635
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001636 # Sort the split transfers for large apks to generate a determinate package.
1637 split_large_apks.sort()
1638 for (tgt_name, src_name, tgt_ranges, src_ranges,
1639 patch) in split_large_apks:
1640 transfer_split = Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1641 self.tgt.RangeSha1(tgt_ranges),
1642 self.src.RangeSha1(src_ranges),
1643 "diff", self.transfers)
1644 transfer_split.patch = patch
1645
Doug Zongkerfc44a512014-08-26 13:10:25 -07001646 def AbbreviateSourceNames(self):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001647 for k in self.src.file_map.keys():
1648 b = os.path.basename(k)
1649 self.src_basenames[b] = k
1650 b = re.sub("[0-9]+", "#", b)
1651 self.src_numpatterns[b] = k
1652
1653 @staticmethod
1654 def AssertPartition(total, seq):
1655 """Assert that all the RangeSets in 'seq' form a partition of the
1656 'total' RangeSet (ie, they are nonintersecting and their union
1657 equals 'total')."""
Doug Zongker6ab2a502016-02-09 08:28:09 -08001658
Doug Zongkerfc44a512014-08-26 13:10:25 -07001659 so_far = RangeSet()
1660 for i in seq:
1661 assert not so_far.overlaps(i)
1662 so_far = so_far.union(i)
1663 assert so_far == total