blob: e82e66ab068389a47d35db2b4537489085d9ba0c [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015from __future__ import print_function
16
Doug Zongker6ab2a502016-02-09 08:28:09 -080017import array
Tianjie Xu25366072017-09-08 17:19:02 -070018import copy
Doug Zongker6ab2a502016-02-09 08:28:09 -080019import functools
Doug Zongker62338182014-09-08 08:29:55 -070020import heapq
Doug Zongkerfc44a512014-08-26 13:10:25 -070021import itertools
22import multiprocessing
23import os
Tao Bao3a2e3502016-12-28 09:14:39 -080024import os.path
Doug Zongkerfc44a512014-08-26 13:10:25 -070025import re
26import subprocess
Tao Bao183e56e2017-03-05 17:05:09 -080027import sys
Doug Zongkerfc44a512014-08-26 13:10:25 -070028import threading
Tao Bao3a2e3502016-12-28 09:14:39 -080029from collections import deque, OrderedDict
30from hashlib import sha1
Tao Bao508b0872018-02-09 13:44:43 -080031
32import common
Dan Albert8b72aef2015-03-23 19:13:21 -070033from rangelib import RangeSet
34
Doug Zongkerfc44a512014-08-26 13:10:25 -070035
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070036__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
37
Dan Albert8b72aef2015-03-23 19:13:21 -070038
Tao Bao183e56e2017-03-05 17:05:09 -080039def compute_patch(srcfile, tgtfile, imgdiff=False):
Tianjie Xub59c17f2016-10-28 17:55:53 -070040 patchfile = common.MakeTempFile(prefix='patch-')
Doug Zongkerfc44a512014-08-26 13:10:25 -070041
Tianjie Xub59c17f2016-10-28 17:55:53 -070042 cmd = ['imgdiff', '-z'] if imgdiff else ['bsdiff']
43 cmd.extend([srcfile, tgtfile, patchfile])
Doug Zongkerfc44a512014-08-26 13:10:25 -070044
Tao Bao39451582017-05-04 11:10:47 -070045 # Don't dump the bsdiff/imgdiff commands, which are not useful for the case
46 # here, since they contain temp filenames only.
47 p = common.Run(cmd, verbose=False, stdout=subprocess.PIPE,
48 stderr=subprocess.STDOUT)
Tianjie Xub59c17f2016-10-28 17:55:53 -070049 output, _ = p.communicate()
Doug Zongkerfc44a512014-08-26 13:10:25 -070050
Tianjie Xub59c17f2016-10-28 17:55:53 -070051 if p.returncode != 0:
52 raise ValueError(output)
53
54 with open(patchfile, 'rb') as f:
Tao Bao183e56e2017-03-05 17:05:09 -080055 return f.read()
Doug Zongkerfc44a512014-08-26 13:10:25 -070056
Dan Albert8b72aef2015-03-23 19:13:21 -070057
58class Image(object):
Tao Bao183e56e2017-03-05 17:05:09 -080059 def RangeSha1(self, ranges):
60 raise NotImplementedError
61
Dan Albert8b72aef2015-03-23 19:13:21 -070062 def ReadRangeSet(self, ranges):
63 raise NotImplementedError
64
Tao Bao68658c02015-06-01 13:40:49 -070065 def TotalSha1(self, include_clobbered_blocks=False):
Dan Albert8b72aef2015-03-23 19:13:21 -070066 raise NotImplementedError
67
Tao Bao183e56e2017-03-05 17:05:09 -080068 def WriteRangeDataToFd(self, ranges, fd):
69 raise NotImplementedError
70
Dan Albert8b72aef2015-03-23 19:13:21 -070071
72class EmptyImage(Image):
Doug Zongkerfc44a512014-08-26 13:10:25 -070073 """A zero-length image."""
Tao Bao183e56e2017-03-05 17:05:09 -080074
75 def __init__(self):
76 self.blocksize = 4096
77 self.care_map = RangeSet()
78 self.clobbered_blocks = RangeSet()
79 self.extended = RangeSet()
80 self.total_blocks = 0
81 self.file_map = {}
82
83 def RangeSha1(self, ranges):
84 return sha1().hexdigest()
85
Doug Zongkerfc44a512014-08-26 13:10:25 -070086 def ReadRangeSet(self, ranges):
87 return ()
Tao Bao183e56e2017-03-05 17:05:09 -080088
Tao Bao68658c02015-06-01 13:40:49 -070089 def TotalSha1(self, include_clobbered_blocks=False):
90 # EmptyImage always carries empty clobbered_blocks, so
91 # include_clobbered_blocks can be ignored.
92 assert self.clobbered_blocks.size() == 0
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070093 return sha1().hexdigest()
94
Tao Bao183e56e2017-03-05 17:05:09 -080095 def WriteRangeDataToFd(self, ranges, fd):
96 raise ValueError("Can't write data from EmptyImage to file")
97
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070098
Dan Albert8b72aef2015-03-23 19:13:21 -070099class DataImage(Image):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700100 """An image wrapped around a single string of data."""
101
102 def __init__(self, data, trim=False, pad=False):
103 self.data = data
104 self.blocksize = 4096
105
106 assert not (trim and pad)
107
108 partial = len(self.data) % self.blocksize
Tao Bao7589e962015-09-05 20:35:32 -0700109 padded = False
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700110 if partial > 0:
111 if trim:
112 self.data = self.data[:-partial]
113 elif pad:
114 self.data += '\0' * (self.blocksize - partial)
Tao Bao7589e962015-09-05 20:35:32 -0700115 padded = True
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700116 else:
117 raise ValueError(("data for DataImage must be multiple of %d bytes "
118 "unless trim or pad is specified") %
119 (self.blocksize,))
120
121 assert len(self.data) % self.blocksize == 0
122
123 self.total_blocks = len(self.data) / self.blocksize
124 self.care_map = RangeSet(data=(0, self.total_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700125 # When the last block is padded, we always write the whole block even for
126 # incremental OTAs. Because otherwise the last block may get skipped if
127 # unchanged for an incremental, but would fail the post-install
128 # verification if it has non-zero contents in the padding bytes.
129 # Bug: 23828506
130 if padded:
Tao Bao42206c32015-09-08 13:39:40 -0700131 clobbered_blocks = [self.total_blocks-1, self.total_blocks]
Tao Bao7589e962015-09-05 20:35:32 -0700132 else:
Tao Bao42206c32015-09-08 13:39:40 -0700133 clobbered_blocks = []
134 self.clobbered_blocks = clobbered_blocks
Tao Baoe9b61912015-07-09 17:37:49 -0700135 self.extended = RangeSet()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700136
137 zero_blocks = []
138 nonzero_blocks = []
139 reference = '\0' * self.blocksize
140
Tao Bao7589e962015-09-05 20:35:32 -0700141 for i in range(self.total_blocks-1 if padded else self.total_blocks):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700142 d = self.data[i*self.blocksize : (i+1)*self.blocksize]
143 if d == reference:
144 zero_blocks.append(i)
145 zero_blocks.append(i+1)
146 else:
147 nonzero_blocks.append(i)
148 nonzero_blocks.append(i+1)
149
Tao Bao42206c32015-09-08 13:39:40 -0700150 assert zero_blocks or nonzero_blocks or clobbered_blocks
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700151
Tao Bao42206c32015-09-08 13:39:40 -0700152 self.file_map = dict()
153 if zero_blocks:
154 self.file_map["__ZERO"] = RangeSet(data=zero_blocks)
155 if nonzero_blocks:
156 self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)
157 if clobbered_blocks:
158 self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)
Tao Bao7589e962015-09-05 20:35:32 -0700159
Tao Bao183e56e2017-03-05 17:05:09 -0800160 def _GetRangeData(self, ranges):
161 for s, e in ranges:
162 yield self.data[s*self.blocksize:e*self.blocksize]
163
164 def RangeSha1(self, ranges):
165 h = sha1()
166 for data in self._GetRangeData(ranges):
167 h.update(data)
168 return h.hexdigest()
169
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700170 def ReadRangeSet(self, ranges):
Tao Bao183e56e2017-03-05 17:05:09 -0800171 return [self._GetRangeData(ranges)]
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700172
Tao Bao68658c02015-06-01 13:40:49 -0700173 def TotalSha1(self, include_clobbered_blocks=False):
Tao Bao7589e962015-09-05 20:35:32 -0700174 if not include_clobbered_blocks:
Tao Bao183e56e2017-03-05 17:05:09 -0800175 return self.RangeSha1(self.care_map.subtract(self.clobbered_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700176 else:
177 return sha1(self.data).hexdigest()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700178
Tao Bao183e56e2017-03-05 17:05:09 -0800179 def WriteRangeDataToFd(self, ranges, fd):
180 for data in self._GetRangeData(ranges):
181 fd.write(data)
182
Doug Zongkerfc44a512014-08-26 13:10:25 -0700183
184class Transfer(object):
Tao Bao183e56e2017-03-05 17:05:09 -0800185 def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, tgt_sha1,
186 src_sha1, style, by_id):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700187 self.tgt_name = tgt_name
188 self.src_name = src_name
189 self.tgt_ranges = tgt_ranges
190 self.src_ranges = src_ranges
Tao Bao183e56e2017-03-05 17:05:09 -0800191 self.tgt_sha1 = tgt_sha1
192 self.src_sha1 = src_sha1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700193 self.style = style
Tao Baob8c87172015-03-19 19:42:12 -0700194
195 # We use OrderedDict rather than dict so that the output is repeatable;
196 # otherwise it would depend on the hash values of the Transfer objects.
197 self.goes_before = OrderedDict()
198 self.goes_after = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700199
Doug Zongker62338182014-09-08 08:29:55 -0700200 self.stash_before = []
201 self.use_stash = []
202
Doug Zongkerfc44a512014-08-26 13:10:25 -0700203 self.id = len(by_id)
204 by_id.append(self)
205
Tianjie Xu25366072017-09-08 17:19:02 -0700206 self._patch = None
207
208 @property
209 def patch(self):
210 return self._patch
211
212 @patch.setter
213 def patch(self, patch):
214 if patch:
215 assert self.style == "diff"
216 self._patch = patch
217
Doug Zongker62338182014-09-08 08:29:55 -0700218 def NetStashChange(self):
219 return (sum(sr.size() for (_, sr) in self.stash_before) -
220 sum(sr.size() for (_, sr) in self.use_stash))
221
Tao Bao82c47982015-08-17 09:45:13 -0700222 def ConvertToNew(self):
223 assert self.style != "new"
224 self.use_stash = []
225 self.style = "new"
226 self.src_ranges = RangeSet()
Tianjie Xu25366072017-09-08 17:19:02 -0700227 self.patch = None
Tao Bao82c47982015-08-17 09:45:13 -0700228
Doug Zongkerfc44a512014-08-26 13:10:25 -0700229 def __str__(self):
230 return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
231 " to " + str(self.tgt_ranges) + ">")
232
233
Doug Zongker6ab2a502016-02-09 08:28:09 -0800234@functools.total_ordering
235class HeapItem(object):
236 def __init__(self, item):
237 self.item = item
Tao Bao186ec992017-12-23 11:50:52 -0800238 # Negate the score since python's heap is a min-heap and we want the
239 # maximum score.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800240 self.score = -item.score
Tao Bao186ec992017-12-23 11:50:52 -0800241
Doug Zongker6ab2a502016-02-09 08:28:09 -0800242 def clear(self):
243 self.item = None
Tao Bao186ec992017-12-23 11:50:52 -0800244
Doug Zongker6ab2a502016-02-09 08:28:09 -0800245 def __bool__(self):
Tao Bao186ec992017-12-23 11:50:52 -0800246 return self.item is not None
247
248 # Python 2 uses __nonzero__, while Python 3 uses __bool__.
249 __nonzero__ = __bool__
250
251 # The rest operations are generated by functools.total_ordering decorator.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800252 def __eq__(self, other):
253 return self.score == other.score
Tao Bao186ec992017-12-23 11:50:52 -0800254
Doug Zongker6ab2a502016-02-09 08:28:09 -0800255 def __le__(self, other):
256 return self.score <= other.score
257
258
Tao Bao294651d2018-02-08 23:21:52 -0800259class ImgdiffStats(object):
260 """A class that collects imgdiff stats.
261
262 It keeps track of the files that will be applied imgdiff while generating
263 BlockImageDiff. It also logs the ones that cannot use imgdiff, with specific
264 reasons. The stats is only meaningful when imgdiff not being disabled by the
265 caller of BlockImageDiff. In addition, only files with supported types
266 (BlockImageDiff.FileTypeSupportedByImgdiff()) are allowed to be logged.
Tao Bao294651d2018-02-08 23:21:52 -0800267 """
268
269 USED_IMGDIFF = "APK files diff'd with imgdiff"
270 USED_IMGDIFF_LARGE_APK = "Large APK files split and diff'd with imgdiff"
271
272 # Reasons for not applying imgdiff on APKs.
Tao Bao294651d2018-02-08 23:21:52 -0800273 SKIPPED_NONMONOTONIC = "Not used imgdiff due to having non-monotonic ranges"
Tao Baoe709b092018-02-07 12:40:00 -0800274 SKIPPED_SHARED_BLOCKS = "Not used imgdiff due to using shared blocks"
Tao Bao4ccea852018-02-06 15:16:41 -0800275 SKIPPED_INCOMPLETE = "Not used imgdiff due to incomplete RangeSet"
Tao Bao294651d2018-02-08 23:21:52 -0800276
277 # The list of valid reasons, which will also be the dumped order in a report.
278 REASONS = (
279 USED_IMGDIFF,
280 USED_IMGDIFF_LARGE_APK,
Tao Bao294651d2018-02-08 23:21:52 -0800281 SKIPPED_NONMONOTONIC,
Tao Baoe709b092018-02-07 12:40:00 -0800282 SKIPPED_SHARED_BLOCKS,
Tao Bao4ccea852018-02-06 15:16:41 -0800283 SKIPPED_INCOMPLETE,
Tao Bao294651d2018-02-08 23:21:52 -0800284 )
285
286 def __init__(self):
287 self.stats = {}
288
289 def Log(self, filename, reason):
290 """Logs why imgdiff can or cannot be applied to the given filename.
291
292 Args:
293 filename: The filename string.
294 reason: One of the reason constants listed in REASONS.
295
296 Raises:
297 AssertionError: On unsupported filetypes or invalid reason.
298 """
299 assert BlockImageDiff.FileTypeSupportedByImgdiff(filename)
300 assert reason in self.REASONS
301
302 if reason not in self.stats:
303 self.stats[reason] = set()
304 self.stats[reason].add(filename)
305
306 def Report(self):
307 """Prints a report of the collected imgdiff stats."""
308
309 def print_header(header, separator):
310 print(header)
311 print(separator * len(header) + '\n')
312
313 print_header(' Imgdiff Stats Report ', '=')
314 for key in self.REASONS:
315 if key not in self.stats:
316 continue
317 values = self.stats[key]
318 section_header = ' {} (count: {}) '.format(key, len(values))
319 print_header(section_header, '-')
320 print(''.join([' {}\n'.format(name) for name in values]))
321
322
Doug Zongkerfc44a512014-08-26 13:10:25 -0700323# BlockImageDiff works on two image objects. An image object is
324# anything that provides the following attributes:
325#
326# blocksize: the size in bytes of a block, currently must be 4096.
327#
328# total_blocks: the total size of the partition/image, in blocks.
329#
330# care_map: a RangeSet containing which blocks (in the range [0,
331# total_blocks) we actually care about; i.e. which blocks contain
332# data.
333#
334# file_map: a dict that partitions the blocks contained in care_map
335# into smaller domains that are useful for doing diffs on.
336# (Typically a domain is a file, and the key in file_map is the
337# pathname.)
338#
Tao Baoff777812015-05-12 11:42:31 -0700339# clobbered_blocks: a RangeSet containing which blocks contain data
340# but may be altered by the FS. They need to be excluded when
341# verifying the partition integrity.
342#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700343# ReadRangeSet(): a function that takes a RangeSet and returns the
344# data contained in the image blocks of that RangeSet. The data
345# is returned as a list or tuple of strings; concatenating the
346# elements together should produce the requested data.
347# Implementations are free to break up the data into list/tuple
348# elements in any way that is convenient.
349#
Tao Bao183e56e2017-03-05 17:05:09 -0800350# RangeSha1(): a function that returns (as a hex string) the SHA-1
351# hash of all the data in the specified range.
352#
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700353# TotalSha1(): a function that returns (as a hex string) the SHA-1
354# hash of all the data in the image (ie, all the blocks in the
Tao Bao68658c02015-06-01 13:40:49 -0700355# care_map minus clobbered_blocks, or including the clobbered
356# blocks if include_clobbered_blocks is True).
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700357#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700358# When creating a BlockImageDiff, the src image may be None, in which
359# case the list of transfers produced will never read from the
360# original image.
361
362class BlockImageDiff(object):
Tao Bao293fd132016-06-11 12:19:23 -0700363 def __init__(self, tgt, src=None, threads=None, version=4,
364 disable_imgdiff=False):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700365 if threads is None:
366 threads = multiprocessing.cpu_count() // 2
Dan Albert8b72aef2015-03-23 19:13:21 -0700367 if threads == 0:
368 threads = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700369 self.threads = threads
Doug Zongker62338182014-09-08 08:29:55 -0700370 self.version = version
Dan Albert8b72aef2015-03-23 19:13:21 -0700371 self.transfers = []
372 self.src_basenames = {}
373 self.src_numpatterns = {}
Tao Baob4cfca52016-02-04 14:26:02 -0800374 self._max_stashed_size = 0
Tao Baod522bdc2016-04-12 15:53:16 -0700375 self.touched_src_ranges = RangeSet()
376 self.touched_src_sha1 = None
Tao Bao293fd132016-06-11 12:19:23 -0700377 self.disable_imgdiff = disable_imgdiff
Tao Bao294651d2018-02-08 23:21:52 -0800378 self.imgdiff_stats = ImgdiffStats() if not disable_imgdiff else None
Doug Zongker62338182014-09-08 08:29:55 -0700379
Tao Bao8fad03e2017-03-01 14:36:26 -0800380 assert version in (3, 4)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700381
382 self.tgt = tgt
383 if src is None:
384 src = EmptyImage()
385 self.src = src
386
387 # The updater code that installs the patch always uses 4k blocks.
388 assert tgt.blocksize == 4096
389 assert src.blocksize == 4096
390
391 # The range sets in each filemap should comprise a partition of
392 # the care map.
393 self.AssertPartition(src.care_map, src.file_map.values())
394 self.AssertPartition(tgt.care_map, tgt.file_map.values())
395
Tao Baob4cfca52016-02-04 14:26:02 -0800396 @property
397 def max_stashed_size(self):
398 return self._max_stashed_size
399
Tao Baocb73aed2018-01-31 17:32:40 -0800400 @staticmethod
401 def FileTypeSupportedByImgdiff(filename):
402 """Returns whether the file type is supported by imgdiff."""
403 return filename.lower().endswith(('.apk', '.jar', '.zip'))
404
Tao Bao294651d2018-02-08 23:21:52 -0800405 def CanUseImgdiff(self, name, tgt_ranges, src_ranges, large_apk=False):
Tao Baocb73aed2018-01-31 17:32:40 -0800406 """Checks whether we can apply imgdiff for the given RangeSets.
407
408 For files in ZIP format (e.g., APKs, JARs, etc.) we would like to use
409 'imgdiff -z' if possible. Because it usually produces significantly smaller
410 patches than bsdiff.
411
412 This is permissible if all of the following conditions hold.
413 - The imgdiff hasn't been disabled by the caller (e.g. squashfs);
414 - The file type is supported by imgdiff;
415 - The source and target blocks are monotonic (i.e. the data is stored with
416 blocks in increasing order);
Tao Baoe709b092018-02-07 12:40:00 -0800417 - Both files don't contain shared blocks;
Tao Bao4ccea852018-02-06 15:16:41 -0800418 - Both files have complete lists of blocks;
Tao Baocb73aed2018-01-31 17:32:40 -0800419 - We haven't removed any blocks from the source set.
420
421 If all these conditions are satisfied, concatenating all the blocks in the
422 RangeSet in order will produce a valid ZIP file (plus possibly extra zeros
423 in the last block). imgdiff is fine with extra zeros at the end of the file.
424
425 Args:
426 name: The filename to be diff'd.
427 tgt_ranges: The target RangeSet.
428 src_ranges: The source RangeSet.
Tao Bao294651d2018-02-08 23:21:52 -0800429 large_apk: Whether this is to split a large APK.
Tao Baocb73aed2018-01-31 17:32:40 -0800430
431 Returns:
432 A boolean result.
433 """
Tao Bao508b0872018-02-09 13:44:43 -0800434 if self.disable_imgdiff or not self.FileTypeSupportedByImgdiff(name):
Tao Bao294651d2018-02-08 23:21:52 -0800435 return False
436
437 if not tgt_ranges.monotonic or not src_ranges.monotonic:
438 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_NONMONOTONIC)
439 return False
440
Tao Baoe709b092018-02-07 12:40:00 -0800441 if (tgt_ranges.extra.get('uses_shared_blocks') or
442 src_ranges.extra.get('uses_shared_blocks')):
443 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_SHARED_BLOCKS)
444 return False
445
Tao Bao4ccea852018-02-06 15:16:41 -0800446 if tgt_ranges.extra.get('incomplete') or src_ranges.extra.get('incomplete'):
447 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_INCOMPLETE)
448 return False
449
Tao Bao294651d2018-02-08 23:21:52 -0800450 reason = (ImgdiffStats.USED_IMGDIFF_LARGE_APK if large_apk
451 else ImgdiffStats.USED_IMGDIFF)
452 self.imgdiff_stats.Log(name, reason)
453 return True
Tao Baocb73aed2018-01-31 17:32:40 -0800454
Doug Zongkerfc44a512014-08-26 13:10:25 -0700455 def Compute(self, prefix):
456 # When looking for a source file to use as the diff input for a
457 # target file, we try:
458 # 1) an exact path match if available, otherwise
459 # 2) a exact basename match if available, otherwise
460 # 3) a basename match after all runs of digits are replaced by
461 # "#" if available, otherwise
462 # 4) we have no source for this target.
463 self.AbbreviateSourceNames()
464 self.FindTransfers()
465
466 # Find the ordering dependencies among transfers (this is O(n^2)
467 # in the number of transfers).
468 self.GenerateDigraph()
469 # Find a sequence of transfers that satisfies as many ordering
470 # dependencies as possible (heuristically).
471 self.FindVertexSequence()
472 # Fix up the ordering dependencies that the sequence didn't
473 # satisfy.
Tao Bao8fad03e2017-03-01 14:36:26 -0800474 self.ReverseBackwardEdges()
475 self.ImproveVertexSequence()
Doug Zongker62338182014-09-08 08:29:55 -0700476
Tao Bao82c47982015-08-17 09:45:13 -0700477 # Ensure the runtime stash size is under the limit.
Tao Bao8fad03e2017-03-01 14:36:26 -0800478 if common.OPTIONS.cache_size is not None:
Tao Bao82c47982015-08-17 09:45:13 -0700479 self.ReviseStashSize()
480
Doug Zongkerfc44a512014-08-26 13:10:25 -0700481 # Double-check our work.
482 self.AssertSequenceGood()
Tianjie Xu8a7ed9f2018-01-23 14:06:11 -0800483 self.AssertSha1Good()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700484
485 self.ComputePatches(prefix)
486 self.WriteTransfers(prefix)
487
Tao Bao294651d2018-02-08 23:21:52 -0800488 # Report the imgdiff stats.
489 if common.OPTIONS.verbose and not self.disable_imgdiff:
490 self.imgdiff_stats.Report()
491
Doug Zongkerfc44a512014-08-26 13:10:25 -0700492 def WriteTransfers(self, prefix):
Tianjie Xu37e29302016-06-23 16:10:35 -0700493 def WriteSplitTransfers(out, style, target_blocks):
494 """Limit the size of operand in command 'new' and 'zero' to 1024 blocks.
Tianjie Xubb848c52016-06-21 15:54:09 -0700495
496 This prevents the target size of one command from being too large; and
497 might help to avoid fsync errors on some devices."""
498
Tao Bao3a2e3502016-12-28 09:14:39 -0800499 assert style == "new" or style == "zero"
Tianjie Xu37e29302016-06-23 16:10:35 -0700500 blocks_limit = 1024
Tianjie Xubb848c52016-06-21 15:54:09 -0700501 total = 0
Tianjie Xu37e29302016-06-23 16:10:35 -0700502 while target_blocks:
503 blocks_to_write = target_blocks.first(blocks_limit)
504 out.append("%s %s\n" % (style, blocks_to_write.to_string_raw()))
505 total += blocks_to_write.size()
506 target_blocks = target_blocks.subtract(blocks_to_write)
Tianjie Xubb848c52016-06-21 15:54:09 -0700507 return total
508
Doug Zongkerfc44a512014-08-26 13:10:25 -0700509 out = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700510 total = 0
Doug Zongkerfc44a512014-08-26 13:10:25 -0700511
Tao Bao3a2e3502016-12-28 09:14:39 -0800512 # In BBOTA v3+, it uses the hash of the stashed blocks as the stash slot
513 # id. 'stashes' records the map from 'hash' to the ref count. The stash
514 # will be freed only if the count decrements to zero.
Doug Zongker62338182014-09-08 08:29:55 -0700515 stashes = {}
516 stashed_blocks = 0
517 max_stashed_blocks = 0
518
Doug Zongkerfc44a512014-08-26 13:10:25 -0700519 for xf in self.transfers:
520
Tao Bao8fad03e2017-03-01 14:36:26 -0800521 for _, sr in xf.stash_before:
522 sh = self.src.RangeSha1(sr)
523 if sh in stashes:
524 stashes[sh] += 1
Sami Tolvanendd67a292014-12-09 16:40:34 +0000525 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800526 stashes[sh] = 1
527 stashed_blocks += sr.size()
528 self.touched_src_ranges = self.touched_src_ranges.union(sr)
529 out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
Doug Zongker62338182014-09-08 08:29:55 -0700530
531 if stashed_blocks > max_stashed_blocks:
532 max_stashed_blocks = stashed_blocks
533
Jesse Zhao7b985f62015-03-02 16:53:08 -0800534 free_string = []
caozhiyuan21b37d82015-10-21 15:14:03 +0800535 free_size = 0
Jesse Zhao7b985f62015-03-02 16:53:08 -0800536
Tao Bao8fad03e2017-03-01 14:36:26 -0800537 # <# blocks> <src ranges>
538 # OR
539 # <# blocks> <src ranges> <src locs> <stash refs...>
540 # OR
541 # <# blocks> - <stash refs...>
Doug Zongker62338182014-09-08 08:29:55 -0700542
Tao Bao8fad03e2017-03-01 14:36:26 -0800543 size = xf.src_ranges.size()
Tao Bao508b0872018-02-09 13:44:43 -0800544 src_str_buffer = [str(size)]
Doug Zongker62338182014-09-08 08:29:55 -0700545
Tao Bao8fad03e2017-03-01 14:36:26 -0800546 unstashed_src_ranges = xf.src_ranges
547 mapped_stashes = []
548 for _, sr in xf.use_stash:
549 unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
550 sh = self.src.RangeSha1(sr)
551 sr = xf.src_ranges.map_within(sr)
552 mapped_stashes.append(sr)
553 assert sh in stashes
Tao Bao508b0872018-02-09 13:44:43 -0800554 src_str_buffer.append("%s:%s" % (sh, sr.to_string_raw()))
Tao Bao8fad03e2017-03-01 14:36:26 -0800555 stashes[sh] -= 1
556 if stashes[sh] == 0:
557 free_string.append("free %s\n" % (sh,))
558 free_size += sr.size()
559 stashes.pop(sh)
Doug Zongker62338182014-09-08 08:29:55 -0700560
Tao Bao8fad03e2017-03-01 14:36:26 -0800561 if unstashed_src_ranges:
Tao Bao508b0872018-02-09 13:44:43 -0800562 src_str_buffer.insert(1, unstashed_src_ranges.to_string_raw())
Tao Bao8fad03e2017-03-01 14:36:26 -0800563 if xf.use_stash:
564 mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
Tao Bao508b0872018-02-09 13:44:43 -0800565 src_str_buffer.insert(2, mapped_unstashed.to_string_raw())
Tao Bao8fad03e2017-03-01 14:36:26 -0800566 mapped_stashes.append(mapped_unstashed)
Doug Zongker62338182014-09-08 08:29:55 -0700567 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Tao Bao8fad03e2017-03-01 14:36:26 -0800568 else:
Tao Bao508b0872018-02-09 13:44:43 -0800569 src_str_buffer.insert(1, "-")
Tao Bao8fad03e2017-03-01 14:36:26 -0800570 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Doug Zongker62338182014-09-08 08:29:55 -0700571
Tao Bao508b0872018-02-09 13:44:43 -0800572 src_str = " ".join(src_str_buffer)
Doug Zongker62338182014-09-08 08:29:55 -0700573
Tao Bao8fad03e2017-03-01 14:36:26 -0800574 # version 3+:
Doug Zongker62338182014-09-08 08:29:55 -0700575 # zero <rangeset>
576 # new <rangeset>
577 # erase <rangeset>
Dan Albert8b72aef2015-03-23 19:13:21 -0700578 # bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
579 # imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
580 # move hash <tgt rangeset> <src_str>
Doug Zongkerfc44a512014-08-26 13:10:25 -0700581
582 tgt_size = xf.tgt_ranges.size()
583
584 if xf.style == "new":
585 assert xf.tgt_ranges
Tianjie Xu37e29302016-06-23 16:10:35 -0700586 assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700587 total += tgt_size
588 elif xf.style == "move":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700589 assert xf.tgt_ranges
590 assert xf.src_ranges.size() == tgt_size
591 if xf.src_ranges != xf.tgt_ranges:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100592 # take into account automatic stashing of overlapping blocks
593 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700594 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100595 if temp_stash_usage > max_stashed_blocks:
596 max_stashed_blocks = temp_stash_usage
597
Tao Baod522bdc2016-04-12 15:53:16 -0700598 self.touched_src_ranges = self.touched_src_ranges.union(
599 xf.src_ranges)
600
Tao Bao8fad03e2017-03-01 14:36:26 -0800601 out.append("%s %s %s %s\n" % (
Sami Tolvanendd67a292014-12-09 16:40:34 +0000602 xf.style,
Tao Bao183e56e2017-03-05 17:05:09 -0800603 xf.tgt_sha1,
Dan Albert8b72aef2015-03-23 19:13:21 -0700604 xf.tgt_ranges.to_string_raw(), src_str))
Tao Bao8fad03e2017-03-01 14:36:26 -0800605 total += tgt_size
606 elif xf.style in ("bsdiff", "imgdiff"):
607 assert xf.tgt_ranges
608 assert xf.src_ranges
609 # take into account automatic stashing of overlapping blocks
610 if xf.src_ranges.overlaps(xf.tgt_ranges):
611 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
612 if temp_stash_usage > max_stashed_blocks:
613 max_stashed_blocks = temp_stash_usage
614
615 self.touched_src_ranges = self.touched_src_ranges.union(xf.src_ranges)
616
617 out.append("%s %d %d %s %s %s %s\n" % (
618 xf.style,
619 xf.patch_start, xf.patch_len,
620 xf.src_sha1,
621 xf.tgt_sha1,
622 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700623 total += tgt_size
624 elif xf.style == "zero":
625 assert xf.tgt_ranges
626 to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
Tianjie Xu37e29302016-06-23 16:10:35 -0700627 assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size()
Tianjie Xubb848c52016-06-21 15:54:09 -0700628 total += to_zero.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700629 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700630 raise ValueError("unknown transfer style '%s'\n" % xf.style)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700631
Sami Tolvanendd67a292014-12-09 16:40:34 +0000632 if free_string:
633 out.append("".join(free_string))
caozhiyuan21b37d82015-10-21 15:14:03 +0800634 stashed_blocks -= free_size
Sami Tolvanendd67a292014-12-09 16:40:34 +0000635
Tao Bao8fad03e2017-03-01 14:36:26 -0800636 if common.OPTIONS.cache_size is not None:
Tao Bao8dcf7382015-05-21 14:09:49 -0700637 # Sanity check: abort if we're going to need more stash space than
638 # the allowed size (cache_size * threshold). There are two purposes
639 # of having a threshold here. a) Part of the cache may have been
640 # occupied by some recovery logs. b) It will buy us some time to deal
641 # with the oversize issue.
642 cache_size = common.OPTIONS.cache_size
643 stash_threshold = common.OPTIONS.stash_threshold
644 max_allowed = cache_size * stash_threshold
Tao Baoe8c68a02017-02-26 10:48:11 -0800645 assert max_stashed_blocks * self.tgt.blocksize <= max_allowed, \
Tao Bao8dcf7382015-05-21 14:09:49 -0700646 'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
647 max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
648 self.tgt.blocksize, max_allowed, cache_size,
649 stash_threshold)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700650
Tao Bao8fad03e2017-03-01 14:36:26 -0800651 self.touched_src_sha1 = self.src.RangeSha1(self.touched_src_ranges)
Tao Baod522bdc2016-04-12 15:53:16 -0700652
Tao Baoe9b61912015-07-09 17:37:49 -0700653 # Zero out extended blocks as a workaround for bug 20881595.
654 if self.tgt.extended:
Tianjie Xu37e29302016-06-23 16:10:35 -0700655 assert (WriteSplitTransfers(out, "zero", self.tgt.extended) ==
Tianjie Xubb848c52016-06-21 15:54:09 -0700656 self.tgt.extended.size())
Tao Baob32d56e2015-09-09 11:55:01 -0700657 total += self.tgt.extended.size()
Tao Baoe9b61912015-07-09 17:37:49 -0700658
659 # We erase all the blocks on the partition that a) don't contain useful
Tao Bao66f1fa62016-05-03 10:02:01 -0700660 # data in the new image; b) will not be touched by dm-verity. Out of those
661 # blocks, we erase the ones that won't be used in this update at the
662 # beginning of an update. The rest would be erased at the end. This is to
663 # work around the eMMC issue observed on some devices, which may otherwise
664 # get starving for clean blocks and thus fail the update. (b/28347095)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700665 all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
Tao Baoe9b61912015-07-09 17:37:49 -0700666 all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
667 new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
Tao Bao66f1fa62016-05-03 10:02:01 -0700668
669 erase_first = new_dontcare.subtract(self.touched_src_ranges)
670 if erase_first:
671 out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))
672
673 erase_last = new_dontcare.subtract(erase_first)
674 if erase_last:
675 out.append("erase %s\n" % (erase_last.to_string_raw(),))
Doug Zongkere985f6f2014-09-09 12:38:47 -0700676
677 out.insert(0, "%d\n" % (self.version,)) # format version number
Tao Baob32d56e2015-09-09 11:55:01 -0700678 out.insert(1, "%d\n" % (total,))
Tao Bao8fad03e2017-03-01 14:36:26 -0800679 # v3+: the number of stash slots is unused.
680 out.insert(2, "0\n")
681 out.insert(3, str(max_stashed_blocks) + "\n")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700682
683 with open(prefix + ".transfer.list", "wb") as f:
684 for i in out:
685 f.write(i)
686
Tao Bao8fad03e2017-03-01 14:36:26 -0800687 self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize
688 OPTIONS = common.OPTIONS
689 if OPTIONS.cache_size is not None:
690 max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
691 print("max stashed blocks: %d (%d bytes), "
692 "limit: %d bytes (%.2f%%)\n" % (
Tao Bao508b0872018-02-09 13:44:43 -0800693 max_stashed_blocks, self._max_stashed_size, max_allowed,
694 self._max_stashed_size * 100.0 / max_allowed))
Tao Bao8fad03e2017-03-01 14:36:26 -0800695 else:
696 print("max stashed blocks: %d (%d bytes), limit: <unknown>\n" % (
Tao Bao508b0872018-02-09 13:44:43 -0800697 max_stashed_blocks, self._max_stashed_size))
Doug Zongker62338182014-09-08 08:29:55 -0700698
Tao Bao82c47982015-08-17 09:45:13 -0700699 def ReviseStashSize(self):
700 print("Revising stash size...")
Tao Baoe27acfd2016-12-16 11:13:55 -0800701 stash_map = {}
Tao Bao82c47982015-08-17 09:45:13 -0700702
703 # Create the map between a stash and its def/use points. For example, for a
Tao Bao3c5a16d2017-02-13 11:42:50 -0800704 # given stash of (raw_id, sr), stash_map[raw_id] = (sr, def_cmd, use_cmd).
Tao Bao82c47982015-08-17 09:45:13 -0700705 for xf in self.transfers:
706 # Command xf defines (stores) all the stashes in stash_before.
Tao Bao3a2e3502016-12-28 09:14:39 -0800707 for stash_raw_id, sr in xf.stash_before:
708 stash_map[stash_raw_id] = (sr, xf)
Tao Bao82c47982015-08-17 09:45:13 -0700709
710 # Record all the stashes command xf uses.
Tao Bao3a2e3502016-12-28 09:14:39 -0800711 for stash_raw_id, _ in xf.use_stash:
712 stash_map[stash_raw_id] += (xf,)
Tao Bao82c47982015-08-17 09:45:13 -0700713
714 # Compute the maximum blocks available for stash based on /cache size and
715 # the threshold.
716 cache_size = common.OPTIONS.cache_size
717 stash_threshold = common.OPTIONS.stash_threshold
718 max_allowed = cache_size * stash_threshold / self.tgt.blocksize
719
Tao Bao3a2e3502016-12-28 09:14:39 -0800720 # See the comments for 'stashes' in WriteTransfers().
Tao Baoe27acfd2016-12-16 11:13:55 -0800721 stashes = {}
Tao Bao82c47982015-08-17 09:45:13 -0700722 stashed_blocks = 0
Tao Bao9a5caf22015-08-25 15:10:10 -0700723 new_blocks = 0
Tao Bao82c47982015-08-17 09:45:13 -0700724
725 # Now go through all the commands. Compute the required stash size on the
726 # fly. If a command requires excess stash than available, it deletes the
727 # stash by replacing the command that uses the stash with a "new" command
728 # instead.
729 for xf in self.transfers:
730 replaced_cmds = []
731
732 # xf.stash_before generates explicit stash commands.
Tao Bao3a2e3502016-12-28 09:14:39 -0800733 for stash_raw_id, sr in xf.stash_before:
Tao Baoe27acfd2016-12-16 11:13:55 -0800734 # Check the post-command stashed_blocks.
735 stashed_blocks_after = stashed_blocks
Tao Bao8fad03e2017-03-01 14:36:26 -0800736 sh = self.src.RangeSha1(sr)
737 if sh not in stashes:
Tao Baoe27acfd2016-12-16 11:13:55 -0800738 stashed_blocks_after += sr.size()
Tao Baoe27acfd2016-12-16 11:13:55 -0800739
740 if stashed_blocks_after > max_allowed:
Tao Bao82c47982015-08-17 09:45:13 -0700741 # We cannot stash this one for a later command. Find out the command
742 # that will use this stash and replace the command with "new".
Tao Bao3a2e3502016-12-28 09:14:39 -0800743 use_cmd = stash_map[stash_raw_id][2]
Tao Bao82c47982015-08-17 09:45:13 -0700744 replaced_cmds.append(use_cmd)
Tao Bao9a5caf22015-08-25 15:10:10 -0700745 print("%10d %9s %s" % (sr.size(), "explicit", use_cmd))
Tao Bao82c47982015-08-17 09:45:13 -0700746 else:
Tao Bao3c5a16d2017-02-13 11:42:50 -0800747 # Update the stashes map.
Tao Bao8fad03e2017-03-01 14:36:26 -0800748 if sh in stashes:
749 stashes[sh] += 1
Tao Bao3c5a16d2017-02-13 11:42:50 -0800750 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800751 stashes[sh] = 1
Tao Baoe27acfd2016-12-16 11:13:55 -0800752 stashed_blocks = stashed_blocks_after
Tao Bao82c47982015-08-17 09:45:13 -0700753
754 # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
755 # ComputePatches(), they both have the style of "diff".
Tao Bao8fad03e2017-03-01 14:36:26 -0800756 if xf.style == "diff":
Tao Bao82c47982015-08-17 09:45:13 -0700757 assert xf.tgt_ranges and xf.src_ranges
758 if xf.src_ranges.overlaps(xf.tgt_ranges):
759 if stashed_blocks + xf.src_ranges.size() > max_allowed:
760 replaced_cmds.append(xf)
Tao Bao9a5caf22015-08-25 15:10:10 -0700761 print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf))
Tao Bao82c47982015-08-17 09:45:13 -0700762
763 # Replace the commands in replaced_cmds with "new"s.
764 for cmd in replaced_cmds:
765 # It no longer uses any commands in "use_stash". Remove the def points
766 # for all those stashes.
Tao Bao3a2e3502016-12-28 09:14:39 -0800767 for stash_raw_id, sr in cmd.use_stash:
768 def_cmd = stash_map[stash_raw_id][1]
769 assert (stash_raw_id, sr) in def_cmd.stash_before
770 def_cmd.stash_before.remove((stash_raw_id, sr))
Tao Bao82c47982015-08-17 09:45:13 -0700771
Tianjie Xuebe39a02016-01-14 14:12:26 -0800772 # Add up blocks that violates space limit and print total number to
773 # screen later.
774 new_blocks += cmd.tgt_ranges.size()
Tao Bao82c47982015-08-17 09:45:13 -0700775 cmd.ConvertToNew()
776
Tao Bao3a2e3502016-12-28 09:14:39 -0800777 # xf.use_stash may generate free commands.
Tao Bao8fad03e2017-03-01 14:36:26 -0800778 for _, sr in xf.use_stash:
779 sh = self.src.RangeSha1(sr)
780 assert sh in stashes
781 stashes[sh] -= 1
782 if stashes[sh] == 0:
Tao Baoe27acfd2016-12-16 11:13:55 -0800783 stashed_blocks -= sr.size()
Tao Bao8fad03e2017-03-01 14:36:26 -0800784 stashes.pop(sh)
Tao Baoe27acfd2016-12-16 11:13:55 -0800785
Tianjie Xuebe39a02016-01-14 14:12:26 -0800786 num_of_bytes = new_blocks * self.tgt.blocksize
787 print(" Total %d blocks (%d bytes) are packed as new blocks due to "
788 "insufficient cache size." % (new_blocks, num_of_bytes))
Tao Bao304ee272016-12-19 11:01:38 -0800789 return new_blocks
Tao Bao9a5caf22015-08-25 15:10:10 -0700790
Doug Zongkerfc44a512014-08-26 13:10:25 -0700791 def ComputePatches(self, prefix):
792 print("Reticulating splines...")
Tao Bao183e56e2017-03-05 17:05:09 -0800793 diff_queue = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700794 patch_num = 0
795 with open(prefix + ".new.dat", "wb") as new_f:
Tao Bao183e56e2017-03-05 17:05:09 -0800796 for index, xf in enumerate(self.transfers):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700797 if xf.style == "zero":
Tao Bao08c85832016-09-19 22:26:30 -0700798 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
799 print("%10d %10d (%6.2f%%) %7s %s %s" % (
800 tgt_size, tgt_size, 100.0, xf.style, xf.tgt_name,
801 str(xf.tgt_ranges)))
802
Doug Zongkerfc44a512014-08-26 13:10:25 -0700803 elif xf.style == "new":
Tao Bao183e56e2017-03-05 17:05:09 -0800804 self.tgt.WriteRangeDataToFd(xf.tgt_ranges, new_f)
Tao Bao08c85832016-09-19 22:26:30 -0700805 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
806 print("%10d %10d (%6.2f%%) %7s %s %s" % (
807 tgt_size, tgt_size, 100.0, xf.style,
808 xf.tgt_name, str(xf.tgt_ranges)))
809
Doug Zongkerfc44a512014-08-26 13:10:25 -0700810 elif xf.style == "diff":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700811 # We can't compare src and tgt directly because they may have
812 # the same content but be broken up into blocks differently, eg:
813 #
814 # ["he", "llo"] vs ["h", "ello"]
815 #
816 # We want those to compare equal, ideally without having to
817 # actually concatenate the strings (these may be tens of
818 # megabytes).
Tao Bao183e56e2017-03-05 17:05:09 -0800819 if xf.src_sha1 == xf.tgt_sha1:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700820 # These are identical; we don't need to generate a patch,
821 # just issue copy commands on the device.
822 xf.style = "move"
Tianjie Xu25366072017-09-08 17:19:02 -0700823 xf.patch = None
Tao Bao183e56e2017-03-05 17:05:09 -0800824 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao08c85832016-09-19 22:26:30 -0700825 if xf.src_ranges != xf.tgt_ranges:
826 print("%10d %10d (%6.2f%%) %7s %s %s (from %s)" % (
827 tgt_size, tgt_size, 100.0, xf.style,
828 xf.tgt_name if xf.tgt_name == xf.src_name else (
829 xf.tgt_name + " (from " + xf.src_name + ")"),
830 str(xf.tgt_ranges), str(xf.src_ranges)))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700831 else:
Tianjie Xu25366072017-09-08 17:19:02 -0700832 if xf.patch:
Tao Bao5bab0dd2018-07-10 17:44:52 -0700833 # We have already generated the patch with imgdiff, while
834 # splitting large APKs (i.e. in FindTransfers()).
Tianjie Xu25366072017-09-08 17:19:02 -0700835 assert not self.disable_imgdiff
836 imgdiff = True
Tianjie Xu25366072017-09-08 17:19:02 -0700837 else:
Tao Baocb73aed2018-01-31 17:32:40 -0800838 imgdiff = self.CanUseImgdiff(
839 xf.tgt_name, xf.tgt_ranges, xf.src_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700840 xf.style = "imgdiff" if imgdiff else "bsdiff"
Tao Bao183e56e2017-03-05 17:05:09 -0800841 diff_queue.append((index, imgdiff, patch_num))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700842 patch_num += 1
843
844 else:
845 assert False, "unknown style " + xf.style
846
Tao Bao183e56e2017-03-05 17:05:09 -0800847 if diff_queue:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700848 if self.threads > 1:
849 print("Computing patches (using %d threads)..." % (self.threads,))
850 else:
851 print("Computing patches...")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700852
Tao Bao183e56e2017-03-05 17:05:09 -0800853 diff_total = len(diff_queue)
854 patches = [None] * diff_total
Tianjie Xub59c17f2016-10-28 17:55:53 -0700855 error_messages = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700856
Tao Bao183e56e2017-03-05 17:05:09 -0800857 # Using multiprocessing doesn't give additional benefits, due to the
858 # pattern of the code. The diffing work is done by subprocess.call, which
859 # already runs in a separate process (not affected much by the GIL -
860 # Global Interpreter Lock). Using multiprocess also requires either a)
861 # writing the diff input files in the main process before forking, or b)
862 # reopening the image file (SparseImage) in the worker processes. Doing
863 # neither of them further improves the performance.
Doug Zongkerfc44a512014-08-26 13:10:25 -0700864 lock = threading.Lock()
865 def diff_worker():
866 while True:
867 with lock:
Tao Bao183e56e2017-03-05 17:05:09 -0800868 if not diff_queue:
Dan Albert8b72aef2015-03-23 19:13:21 -0700869 return
Tao Bao183e56e2017-03-05 17:05:09 -0800870 xf_index, imgdiff, patch_index = diff_queue.pop()
Tao Bao97395142018-02-12 12:08:05 -0800871 xf = self.transfers[xf_index]
Tao Bao183e56e2017-03-05 17:05:09 -0800872
Tao Bao97395142018-02-12 12:08:05 -0800873 if sys.stdout.isatty():
874 diff_left = len(diff_queue)
875 progress = (diff_total - diff_left) * 100 / diff_total
876 # '\033[K' is to clear to EOL.
877 print(' [%3d%%] %s\033[K' % (progress, xf.tgt_name), end='\r')
878 sys.stdout.flush()
879
Tianjie Xu25366072017-09-08 17:19:02 -0700880 patch = xf.patch
881 if not patch:
882 src_ranges = xf.src_ranges
883 tgt_ranges = xf.tgt_ranges
Tao Bao183e56e2017-03-05 17:05:09 -0800884
Tianjie Xudf1166e2018-01-27 17:35:41 -0800885 src_file = common.MakeTempFile(prefix="src-")
886 with open(src_file, "wb") as fd:
887 self.src.WriteRangeDataToFd(src_ranges, fd)
Tianjie Xu25366072017-09-08 17:19:02 -0700888
Tianjie Xudf1166e2018-01-27 17:35:41 -0800889 tgt_file = common.MakeTempFile(prefix="tgt-")
890 with open(tgt_file, "wb") as fd:
891 self.tgt.WriteRangeDataToFd(tgt_ranges, fd)
Tianjie Xu25366072017-09-08 17:19:02 -0700892
893 message = []
894 try:
895 patch = compute_patch(src_file, tgt_file, imgdiff)
896 except ValueError as e:
897 message.append(
898 "Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (
Tao Bao508b0872018-02-09 13:44:43 -0800899 "imgdiff" if imgdiff else "bsdiff",
900 xf.tgt_name if xf.tgt_name == xf.src_name else
Tianjie Xu25366072017-09-08 17:19:02 -0700901 xf.tgt_name + " (from " + xf.src_name + ")",
Tao Bao508b0872018-02-09 13:44:43 -0800902 xf.tgt_ranges, xf.src_ranges, e.message))
Tianjie Xu25366072017-09-08 17:19:02 -0700903 if message:
904 with lock:
905 error_messages.extend(message)
Tao Bao183e56e2017-03-05 17:05:09 -0800906
907 with lock:
908 patches[patch_index] = (xf_index, patch)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700909
910 threads = [threading.Thread(target=diff_worker)
Dan Albert8b72aef2015-03-23 19:13:21 -0700911 for _ in range(self.threads)]
Doug Zongkerfc44a512014-08-26 13:10:25 -0700912 for th in threads:
913 th.start()
914 while threads:
915 threads.pop().join()
Tao Bao183e56e2017-03-05 17:05:09 -0800916
917 if sys.stdout.isatty():
918 print('\n')
Tianjie Xub59c17f2016-10-28 17:55:53 -0700919
920 if error_messages:
Tao Baob937ead2017-10-19 16:51:53 -0700921 print('ERROR:')
Tianjie Xub59c17f2016-10-28 17:55:53 -0700922 print('\n'.join(error_messages))
Tao Baob937ead2017-10-19 16:51:53 -0700923 print('\n\n\n')
Tianjie Xub59c17f2016-10-28 17:55:53 -0700924 sys.exit(1)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700925 else:
926 patches = []
927
Tao Bao183e56e2017-03-05 17:05:09 -0800928 offset = 0
929 with open(prefix + ".patch.dat", "wb") as patch_fd:
930 for index, patch in patches:
931 xf = self.transfers[index]
Doug Zongkerfc44a512014-08-26 13:10:25 -0700932 xf.patch_len = len(patch)
Tao Bao183e56e2017-03-05 17:05:09 -0800933 xf.patch_start = offset
934 offset += xf.patch_len
935 patch_fd.write(patch)
936
937 if common.OPTIONS.verbose:
938 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
939 print("%10d %10d (%6.2f%%) %7s %s %s %s" % (
Tao Bao508b0872018-02-09 13:44:43 -0800940 xf.patch_len, tgt_size, xf.patch_len * 100.0 / tgt_size,
941 xf.style,
942 xf.tgt_name if xf.tgt_name == xf.src_name else (
943 xf.tgt_name + " (from " + xf.src_name + ")"),
944 xf.tgt_ranges, xf.src_ranges))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700945
Tianjie Xu8a7ed9f2018-01-23 14:06:11 -0800946 def AssertSha1Good(self):
947 """Check the SHA-1 of the src & tgt blocks in the transfer list.
948
949 Double check the SHA-1 value to avoid the issue in b/71908713, where
950 SparseImage.RangeSha1() messed up with the hash calculation in multi-thread
951 environment. That specific problem has been fixed by protecting the
952 underlying generator function 'SparseImage._GetRangeData()' with lock.
953 """
954 for xf in self.transfers:
955 tgt_sha1 = self.tgt.RangeSha1(xf.tgt_ranges)
956 assert xf.tgt_sha1 == tgt_sha1
957 if xf.style == "diff":
958 src_sha1 = self.src.RangeSha1(xf.src_ranges)
959 assert xf.src_sha1 == src_sha1
960
Doug Zongkerfc44a512014-08-26 13:10:25 -0700961 def AssertSequenceGood(self):
962 # Simulate the sequences of transfers we will output, and check that:
963 # - we never read a block after writing it, and
964 # - we write every block we care about exactly once.
965
966 # Start with no blocks having been touched yet.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800967 touched = array.array("B", "\0" * self.tgt.total_blocks)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700968
969 # Imagine processing the transfers in order.
970 for xf in self.transfers:
971 # Check that the input blocks for this transfer haven't yet been touched.
Doug Zongker62338182014-09-08 08:29:55 -0700972
973 x = xf.src_ranges
Tao Bao8fad03e2017-03-01 14:36:26 -0800974 for _, sr in xf.use_stash:
975 x = x.subtract(sr)
Doug Zongker62338182014-09-08 08:29:55 -0700976
Doug Zongker6ab2a502016-02-09 08:28:09 -0800977 for s, e in x:
Tao Baoff75c232016-03-04 15:23:34 -0800978 # Source image could be larger. Don't check the blocks that are in the
979 # source image only. Since they are not in 'touched', and won't ever
980 # be touched.
981 for i in range(s, min(e, self.tgt.total_blocks)):
Doug Zongker6ab2a502016-02-09 08:28:09 -0800982 assert touched[i] == 0
983
984 # Check that the output blocks for this transfer haven't yet
985 # been touched, and touch all the blocks written by this
986 # transfer.
987 for s, e in xf.tgt_ranges:
988 for i in range(s, e):
989 assert touched[i] == 0
990 touched[i] = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700991
992 # Check that we've written every target block.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800993 for s, e in self.tgt.care_map:
994 for i in range(s, e):
995 assert touched[i] == 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700996
Doug Zongker62338182014-09-08 08:29:55 -0700997 def ImproveVertexSequence(self):
998 print("Improving vertex order...")
999
1000 # At this point our digraph is acyclic; we reversed any edges that
1001 # were backwards in the heuristically-generated sequence. The
1002 # previously-generated order is still acceptable, but we hope to
1003 # find a better order that needs less memory for stashed data.
1004 # Now we do a topological sort to generate a new vertex order,
1005 # using a greedy algorithm to choose which vertex goes next
1006 # whenever we have a choice.
1007
1008 # Make a copy of the edge set; this copy will get destroyed by the
1009 # algorithm.
1010 for xf in self.transfers:
1011 xf.incoming = xf.goes_after.copy()
1012 xf.outgoing = xf.goes_before.copy()
1013
1014 L = [] # the new vertex order
1015
1016 # S is the set of sources in the remaining graph; we always choose
1017 # the one that leaves the least amount of stashed data after it's
1018 # executed.
1019 S = [(u.NetStashChange(), u.order, u) for u in self.transfers
1020 if not u.incoming]
1021 heapq.heapify(S)
1022
1023 while S:
1024 _, _, xf = heapq.heappop(S)
1025 L.append(xf)
1026 for u in xf.outgoing:
1027 del u.incoming[xf]
1028 if not u.incoming:
1029 heapq.heappush(S, (u.NetStashChange(), u.order, u))
1030
1031 # if this fails then our graph had a cycle.
1032 assert len(L) == len(self.transfers)
1033
1034 self.transfers = L
1035 for i, xf in enumerate(L):
1036 xf.order = i
1037
Doug Zongker62338182014-09-08 08:29:55 -07001038 def ReverseBackwardEdges(self):
Tao Bao3a2e3502016-12-28 09:14:39 -08001039 """Reverse unsatisfying edges and compute pairs of stashed blocks.
1040
1041 For each transfer, make sure it properly stashes the blocks it touches and
1042 will be used by later transfers. It uses pairs of (stash_raw_id, range) to
1043 record the blocks to be stashed. 'stash_raw_id' is an id that uniquely
1044 identifies each pair. Note that for the same range (e.g. RangeSet("1-5")),
1045 it is possible to have multiple pairs with different 'stash_raw_id's. Each
1046 'stash_raw_id' will be consumed by one transfer. In BBOTA v3+, identical
1047 blocks will be written to the same stash slot in WriteTransfers().
1048 """
1049
Doug Zongker62338182014-09-08 08:29:55 -07001050 print("Reversing backward edges...")
1051 in_order = 0
1052 out_of_order = 0
Tao Bao3a2e3502016-12-28 09:14:39 -08001053 stash_raw_id = 0
Doug Zongker62338182014-09-08 08:29:55 -07001054 stash_size = 0
1055
1056 for xf in self.transfers:
Doug Zongker62338182014-09-08 08:29:55 -07001057 for u in xf.goes_before.copy():
1058 # xf should go before u
1059 if xf.order < u.order:
1060 # it does, hurray!
1061 in_order += 1
1062 else:
1063 # it doesn't, boo. modify u to stash the blocks that it
1064 # writes that xf wants to read, and then require u to go
1065 # before xf.
1066 out_of_order += 1
1067
1068 overlap = xf.src_ranges.intersect(u.tgt_ranges)
1069 assert overlap
1070
Tao Bao3a2e3502016-12-28 09:14:39 -08001071 u.stash_before.append((stash_raw_id, overlap))
1072 xf.use_stash.append((stash_raw_id, overlap))
1073 stash_raw_id += 1
Doug Zongker62338182014-09-08 08:29:55 -07001074 stash_size += overlap.size()
1075
1076 # reverse the edge direction; now xf must go after u
1077 del xf.goes_before[u]
1078 del u.goes_after[xf]
1079 xf.goes_after[u] = None # value doesn't matter
1080 u.goes_before[xf] = None
1081
1082 print((" %d/%d dependencies (%.2f%%) were violated; "
1083 "%d source blocks stashed.") %
1084 (out_of_order, in_order + out_of_order,
1085 (out_of_order * 100.0 / (in_order + out_of_order))
1086 if (in_order + out_of_order) else 0.0,
1087 stash_size))
1088
Doug Zongkerfc44a512014-08-26 13:10:25 -07001089 def FindVertexSequence(self):
1090 print("Finding vertex sequence...")
1091
1092 # This is based on "A Fast & Effective Heuristic for the Feedback
1093 # Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
1094 # it as starting with the digraph G and moving all the vertices to
1095 # be on a horizontal line in some order, trying to minimize the
1096 # number of edges that end up pointing to the left. Left-pointing
1097 # edges will get removed to turn the digraph into a DAG. In this
1098 # case each edge has a weight which is the number of source blocks
1099 # we'll lose if that edge is removed; we try to minimize the total
1100 # weight rather than just the number of edges.
1101
1102 # Make a copy of the edge set; this copy will get destroyed by the
1103 # algorithm.
1104 for xf in self.transfers:
1105 xf.incoming = xf.goes_after.copy()
1106 xf.outgoing = xf.goes_before.copy()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001107 xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())
Doug Zongkerfc44a512014-08-26 13:10:25 -07001108
1109 # We use an OrderedDict instead of just a set so that the output
1110 # is repeatable; otherwise it would depend on the hash values of
1111 # the transfer objects.
1112 G = OrderedDict()
1113 for xf in self.transfers:
1114 G[xf] = None
1115 s1 = deque() # the left side of the sequence, built from left to right
1116 s2 = deque() # the right side of the sequence, built from right to left
1117
Doug Zongker6ab2a502016-02-09 08:28:09 -08001118 heap = []
1119 for xf in self.transfers:
1120 xf.heap_item = HeapItem(xf)
1121 heap.append(xf.heap_item)
1122 heapq.heapify(heap)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001123
Tao Bao33482282016-10-24 16:49:08 -07001124 # Use OrderedDict() instead of set() to preserve the insertion order. Need
1125 # to use 'sinks[key] = None' to add key into the set. sinks will look like
1126 # { key1: None, key2: None, ... }.
1127 sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing)
1128 sources = OrderedDict.fromkeys(u for u in G if not u.incoming)
Doug Zongker6ab2a502016-02-09 08:28:09 -08001129
1130 def adjust_score(iu, delta):
1131 iu.score += delta
1132 iu.heap_item.clear()
1133 iu.heap_item = HeapItem(iu)
1134 heapq.heappush(heap, iu.heap_item)
1135
1136 while G:
Doug Zongkerfc44a512014-08-26 13:10:25 -07001137 # Put all sinks at the end of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001138 while sinks:
Tao Bao33482282016-10-24 16:49:08 -07001139 new_sinks = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001140 for u in sinks:
Tao Bao508b0872018-02-09 13:44:43 -08001141 if u not in G:
1142 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001143 s2.appendleft(u)
1144 del G[u]
1145 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001146 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001147 if not iu.outgoing:
1148 new_sinks[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001149 sinks = new_sinks
Doug Zongkerfc44a512014-08-26 13:10:25 -07001150
1151 # Put all the sources at the beginning of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001152 while sources:
Tao Bao33482282016-10-24 16:49:08 -07001153 new_sources = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001154 for u in sources:
Tao Bao508b0872018-02-09 13:44:43 -08001155 if u not in G:
1156 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001157 s1.append(u)
1158 del G[u]
1159 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001160 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001161 if not iu.incoming:
1162 new_sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001163 sources = new_sources
Doug Zongkerfc44a512014-08-26 13:10:25 -07001164
Tao Bao508b0872018-02-09 13:44:43 -08001165 if not G:
1166 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001167
1168 # Find the "best" vertex to put next. "Best" is the one that
1169 # maximizes the net difference in source blocks saved we get by
1170 # pretending it's a source rather than a sink.
1171
Doug Zongker6ab2a502016-02-09 08:28:09 -08001172 while True:
1173 u = heapq.heappop(heap)
1174 if u and u.item in G:
1175 u = u.item
1176 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001177
Doug Zongkerfc44a512014-08-26 13:10:25 -07001178 s1.append(u)
1179 del G[u]
1180 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001181 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001182 if not iu.incoming:
1183 sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001184
Doug Zongkerfc44a512014-08-26 13:10:25 -07001185 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001186 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001187 if not iu.outgoing:
1188 sinks[iu] = None
Doug Zongkerfc44a512014-08-26 13:10:25 -07001189
1190 # Now record the sequence in the 'order' field of each transfer,
1191 # and by rearranging self.transfers to be in the chosen sequence.
1192
1193 new_transfers = []
1194 for x in itertools.chain(s1, s2):
1195 x.order = len(new_transfers)
1196 new_transfers.append(x)
1197 del x.incoming
1198 del x.outgoing
1199
1200 self.transfers = new_transfers
1201
1202 def GenerateDigraph(self):
1203 print("Generating digraph...")
Doug Zongker6ab2a502016-02-09 08:28:09 -08001204
1205 # Each item of source_ranges will be:
1206 # - None, if that block is not used as a source,
Tao Bao33482282016-10-24 16:49:08 -07001207 # - an ordered set of transfers.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001208 source_ranges = []
1209 for b in self.transfers:
1210 for s, e in b.src_ranges:
1211 if e > len(source_ranges):
1212 source_ranges.extend([None] * (e-len(source_ranges)))
1213 for i in range(s, e):
1214 if source_ranges[i] is None:
Tao Bao33482282016-10-24 16:49:08 -07001215 source_ranges[i] = OrderedDict.fromkeys([b])
Doug Zongker6ab2a502016-02-09 08:28:09 -08001216 else:
Tao Bao33482282016-10-24 16:49:08 -07001217 source_ranges[i][b] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001218
Doug Zongkerfc44a512014-08-26 13:10:25 -07001219 for a in self.transfers:
Tao Bao33482282016-10-24 16:49:08 -07001220 intersections = OrderedDict()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001221 for s, e in a.tgt_ranges:
1222 for i in range(s, e):
Tao Bao508b0872018-02-09 13:44:43 -08001223 if i >= len(source_ranges):
1224 break
Tao Bao33482282016-10-24 16:49:08 -07001225 # Add all the Transfers in source_ranges[i] to the (ordered) set.
1226 if source_ranges[i] is not None:
1227 for j in source_ranges[i]:
1228 intersections[j] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001229
1230 for b in intersections:
Tao Bao508b0872018-02-09 13:44:43 -08001231 if a is b:
1232 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001233
1234 # If the blocks written by A are read by B, then B needs to go before A.
1235 i = a.tgt_ranges.intersect(b.src_ranges)
1236 if i:
Doug Zongkerab7ca1d2014-08-26 10:40:28 -07001237 if b.src_name == "__ZERO":
1238 # the cost of removing source blocks for the __ZERO domain
1239 # is (nearly) zero.
1240 size = 0
1241 else:
1242 size = i.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001243 b.goes_before[a] = size
1244 a.goes_after[b] = size
1245
1246 def FindTransfers(self):
Tao Bao9a5caf22015-08-25 15:10:10 -07001247 """Parse the file_map to generate all the transfers."""
1248
Tianjie Xu41390d42017-11-22 11:35:18 -08001249 def AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
1250 src_ranges, style, by_id):
1251 """Add one or multiple Transfer()s by splitting large files.
1252
1253 For BBOTA v3, we need to stash source blocks for resumable feature.
1254 However, with the growth of file size and the shrink of the cache
1255 partition source blocks are too large to be stashed. If a file occupies
1256 too many blocks, we split it into smaller pieces by getting multiple
1257 Transfer()s.
1258
1259 The downside is that after splitting, we may increase the package size
1260 since the split pieces don't align well. According to our experiments,
1261 1/8 of the cache size as the per-piece limit appears to be optimal.
1262 Compared to the fixed 1024-block limit, it reduces the overall package
1263 size by 30% for volantis, and 20% for angler and bullhead."""
1264
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001265 pieces = 0
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001266 while (tgt_ranges.size() > max_blocks_per_transfer and
1267 src_ranges.size() > max_blocks_per_transfer):
Tao Bao9a5caf22015-08-25 15:10:10 -07001268 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1269 src_split_name = "%s-%d" % (src_name, pieces)
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001270 tgt_first = tgt_ranges.first(max_blocks_per_transfer)
1271 src_first = src_ranges.first(max_blocks_per_transfer)
1272
Tao Bao183e56e2017-03-05 17:05:09 -08001273 Transfer(tgt_split_name, src_split_name, tgt_first, src_first,
1274 self.tgt.RangeSha1(tgt_first), self.src.RangeSha1(src_first),
1275 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001276
1277 tgt_ranges = tgt_ranges.subtract(tgt_first)
1278 src_ranges = src_ranges.subtract(src_first)
1279 pieces += 1
1280
1281 # Handle remaining blocks.
1282 if tgt_ranges.size() or src_ranges.size():
1283 # Must be both non-empty.
1284 assert tgt_ranges.size() and src_ranges.size()
1285 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1286 src_split_name = "%s-%d" % (src_name, pieces)
Tao Bao183e56e2017-03-05 17:05:09 -08001287 Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges,
1288 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1289 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001290
Tianjie Xu41390d42017-11-22 11:35:18 -08001291 def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges, style,
1292 by_id):
1293 """Find all the zip files and split the others with a fixed chunk size.
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001294
Tianjie Xu41390d42017-11-22 11:35:18 -08001295 This function will construct a list of zip archives, which will later be
1296 split by imgdiff to reduce the final patch size. For the other files,
1297 we will plainly split them based on a fixed chunk size with the potential
1298 patch size penalty.
1299 """
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001300
1301 assert style == "diff"
1302
1303 # Change nothing for small files.
1304 if (tgt_ranges.size() <= max_blocks_per_transfer and
1305 src_ranges.size() <= max_blocks_per_transfer):
1306 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1307 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1308 style, by_id)
1309 return
1310
Tao Baocb73aed2018-01-31 17:32:40 -08001311 # Split large APKs with imgdiff, if possible. We're intentionally checking
1312 # file types one more time (CanUseImgdiff() checks that as well), before
1313 # calling the costly RangeSha1()s.
1314 if (self.FileTypeSupportedByImgdiff(tgt_name) and
1315 self.tgt.RangeSha1(tgt_ranges) != self.src.RangeSha1(src_ranges)):
Tao Bao294651d2018-02-08 23:21:52 -08001316 if self.CanUseImgdiff(tgt_name, tgt_ranges, src_ranges, True):
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001317 large_apks.append((tgt_name, src_name, tgt_ranges, src_ranges))
1318 return
1319
Tianjie Xu41390d42017-11-22 11:35:18 -08001320 AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
1321 src_ranges, style, by_id)
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001322
Tao Bao08c85832016-09-19 22:26:30 -07001323 def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
1324 split=False):
1325 """Wrapper function for adding a Transfer()."""
1326
1327 # We specialize diff transfers only (which covers bsdiff/imgdiff/move);
1328 # otherwise add the Transfer() as is.
1329 if style != "diff" or not split:
Tao Bao183e56e2017-03-05 17:05:09 -08001330 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1331 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1332 style, by_id)
Tao Bao08c85832016-09-19 22:26:30 -07001333 return
1334
1335 # Handle .odex files specially to analyze the block-wise difference. If
1336 # most of the blocks are identical with only few changes (e.g. header),
1337 # we will patch the changed blocks only. This avoids stashing unchanged
1338 # blocks while patching. We limit the analysis to files without size
1339 # changes only. This is to avoid sacrificing the OTA generation cost too
1340 # much.
1341 if (tgt_name.split(".")[-1].lower() == 'odex' and
1342 tgt_ranges.size() == src_ranges.size()):
1343
1344 # 0.5 threshold can be further tuned. The tradeoff is: if only very
1345 # few blocks remain identical, we lose the opportunity to use imgdiff
1346 # that may have better compression ratio than bsdiff.
1347 crop_threshold = 0.5
1348
1349 tgt_skipped = RangeSet()
1350 src_skipped = RangeSet()
1351 tgt_size = tgt_ranges.size()
1352 tgt_changed = 0
1353 for src_block, tgt_block in zip(src_ranges.next_item(),
1354 tgt_ranges.next_item()):
1355 src_rs = RangeSet(str(src_block))
1356 tgt_rs = RangeSet(str(tgt_block))
1357 if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):
1358 tgt_skipped = tgt_skipped.union(tgt_rs)
1359 src_skipped = src_skipped.union(src_rs)
1360 else:
1361 tgt_changed += tgt_rs.size()
1362
1363 # Terminate early if no clear sign of benefits.
1364 if tgt_changed > tgt_size * crop_threshold:
1365 break
1366
1367 if tgt_changed < tgt_size * crop_threshold:
1368 assert tgt_changed + tgt_skipped.size() == tgt_size
Tao Bao508b0872018-02-09 13:44:43 -08001369 print('%10d %10d (%6.2f%%) %s' % (
1370 tgt_skipped.size(), tgt_size,
1371 tgt_skipped.size() * 100.0 / tgt_size, tgt_name))
Tianjie Xu41390d42017-11-22 11:35:18 -08001372 AddSplitTransfers(
Tao Bao08c85832016-09-19 22:26:30 -07001373 "%s-skipped" % (tgt_name,),
1374 "%s-skipped" % (src_name,),
1375 tgt_skipped, src_skipped, style, by_id)
1376
1377 # Intentionally change the file extension to avoid being imgdiff'd as
1378 # the files are no longer in their original format.
1379 tgt_name = "%s-cropped" % (tgt_name,)
1380 src_name = "%s-cropped" % (src_name,)
1381 tgt_ranges = tgt_ranges.subtract(tgt_skipped)
1382 src_ranges = src_ranges.subtract(src_skipped)
1383
1384 # Possibly having no changed blocks.
1385 if not tgt_ranges:
1386 return
1387
1388 # Add the transfer(s).
Tianjie Xu41390d42017-11-22 11:35:18 -08001389 AddSplitTransfers(
Tao Bao08c85832016-09-19 22:26:30 -07001390 tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1391
Tianjie Xu25366072017-09-08 17:19:02 -07001392 def ParseAndValidateSplitInfo(patch_size, tgt_ranges, src_ranges,
1393 split_info):
1394 """Parse the split_info and return a list of info tuples.
1395
1396 Args:
1397 patch_size: total size of the patch file.
1398 tgt_ranges: Ranges of the target file within the original image.
1399 src_ranges: Ranges of the source file within the original image.
1400 split_info format:
1401 imgdiff version#
1402 count of pieces
1403 <patch_size_1> <tgt_size_1> <src_ranges_1>
1404 ...
1405 <patch_size_n> <tgt_size_n> <src_ranges_n>
1406
1407 Returns:
1408 [patch_start, patch_len, split_tgt_ranges, split_src_ranges]
1409 """
1410
1411 version = int(split_info[0])
1412 assert version == 2
1413 count = int(split_info[1])
1414 assert len(split_info) - 2 == count
1415
1416 split_info_list = []
1417 patch_start = 0
1418 tgt_remain = copy.deepcopy(tgt_ranges)
1419 # each line has the format <patch_size>, <tgt_size>, <src_ranges>
1420 for line in split_info[2:]:
1421 info = line.split()
1422 assert len(info) == 3
1423 patch_length = int(info[0])
1424
1425 split_tgt_size = int(info[1])
1426 assert split_tgt_size % 4096 == 0
1427 assert split_tgt_size / 4096 <= tgt_remain.size()
1428 split_tgt_ranges = tgt_remain.first(split_tgt_size / 4096)
1429 tgt_remain = tgt_remain.subtract(split_tgt_ranges)
1430
1431 # Find the split_src_ranges within the image file from its relative
1432 # position in file.
1433 split_src_indices = RangeSet.parse_raw(info[2])
1434 split_src_ranges = RangeSet()
1435 for r in split_src_indices:
1436 curr_range = src_ranges.first(r[1]).subtract(src_ranges.first(r[0]))
1437 assert not split_src_ranges.overlaps(curr_range)
1438 split_src_ranges = split_src_ranges.union(curr_range)
1439
1440 split_info_list.append((patch_start, patch_length,
1441 split_tgt_ranges, split_src_ranges))
1442 patch_start += patch_length
1443
1444 # Check that the sizes of all the split pieces add up to the final file
1445 # size for patch and target.
1446 assert tgt_remain.size() == 0
1447 assert patch_start == patch_size
1448 return split_info_list
1449
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001450 def SplitLargeApks():
1451 """Split the large apks files.
Tianjie Xu25366072017-09-08 17:19:02 -07001452
1453 Example: Chrome.apk will be split into
1454 src-0: Chrome.apk-0, tgt-0: Chrome.apk-0
1455 src-1: Chrome.apk-1, tgt-1: Chrome.apk-1
1456 ...
1457
1458 After the split, the target pieces are continuous and block aligned; and
1459 the source pieces are mutually exclusive. During the split, we also
1460 generate and save the image patch between src-X & tgt-X. This patch will
1461 be valid because the block ranges of src-X & tgt-X will always stay the
1462 same afterwards; but there's a chance we don't use the patch if we
1463 convert the "diff" command into "new" or "move" later.
1464 """
1465
1466 while True:
1467 with transfer_lock:
1468 if not large_apks:
1469 return
1470 tgt_name, src_name, tgt_ranges, src_ranges = large_apks.pop(0)
1471
1472 src_file = common.MakeTempFile(prefix="src-")
1473 tgt_file = common.MakeTempFile(prefix="tgt-")
Tianjie Xudf1166e2018-01-27 17:35:41 -08001474 with open(src_file, "wb") as src_fd:
1475 self.src.WriteRangeDataToFd(src_ranges, src_fd)
1476 with open(tgt_file, "wb") as tgt_fd:
1477 self.tgt.WriteRangeDataToFd(tgt_ranges, tgt_fd)
Tianjie Xu25366072017-09-08 17:19:02 -07001478
1479 patch_file = common.MakeTempFile(prefix="patch-")
1480 patch_info_file = common.MakeTempFile(prefix="split_info-")
1481 cmd = ["imgdiff", "-z",
1482 "--block-limit={}".format(max_blocks_per_transfer),
1483 "--split-info=" + patch_info_file,
1484 src_file, tgt_file, patch_file]
Tao Bao4ccea852018-02-06 15:16:41 -08001485 p = common.Run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
1486 imgdiff_output, _ = p.communicate()
1487 assert p.returncode == 0, \
1488 "Failed to create imgdiff patch between {} and {}:\n{}".format(
1489 src_name, tgt_name, imgdiff_output)
Tianjie Xu25366072017-09-08 17:19:02 -07001490
1491 with open(patch_info_file) as patch_info:
1492 lines = patch_info.readlines()
1493
1494 patch_size_total = os.path.getsize(patch_file)
1495 split_info_list = ParseAndValidateSplitInfo(patch_size_total,
1496 tgt_ranges, src_ranges,
1497 lines)
1498 for index, (patch_start, patch_length, split_tgt_ranges,
Tao Bao508b0872018-02-09 13:44:43 -08001499 split_src_ranges) in enumerate(split_info_list):
Tianjie Xu25366072017-09-08 17:19:02 -07001500 with open(patch_file) as f:
1501 f.seek(patch_start)
1502 patch_content = f.read(patch_length)
1503
1504 split_src_name = "{}-{}".format(src_name, index)
1505 split_tgt_name = "{}-{}".format(tgt_name, index)
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001506 split_large_apks.append((split_tgt_name,
1507 split_src_name,
1508 split_tgt_ranges,
1509 split_src_ranges,
1510 patch_content))
Tianjie Xu25366072017-09-08 17:19:02 -07001511
Tao Bao08c85832016-09-19 22:26:30 -07001512 print("Finding transfers...")
1513
Tianjie Xu25366072017-09-08 17:19:02 -07001514 large_apks = []
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001515 split_large_apks = []
Tianjie Xu25366072017-09-08 17:19:02 -07001516 cache_size = common.OPTIONS.cache_size
1517 split_threshold = 0.125
1518 max_blocks_per_transfer = int(cache_size * split_threshold /
1519 self.tgt.blocksize)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001520 empty = RangeSet()
Tianjie Xu20a86cd2018-01-12 12:21:00 -08001521 for tgt_fn, tgt_ranges in sorted(self.tgt.file_map.items()):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001522 if tgt_fn == "__ZERO":
1523 # the special "__ZERO" domain is all the blocks not contained
1524 # in any file and that are filled with zeros. We have a
1525 # special transfer style for zero blocks.
1526 src_ranges = self.src.file_map.get("__ZERO", empty)
Tao Bao9a5caf22015-08-25 15:10:10 -07001527 AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
1528 "zero", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001529 continue
1530
Tao Baoff777812015-05-12 11:42:31 -07001531 elif tgt_fn == "__COPY":
1532 # "__COPY" domain includes all the blocks not contained in any
1533 # file and that need to be copied unconditionally to the target.
Tao Bao9a5caf22015-08-25 15:10:10 -07001534 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Tao Baoff777812015-05-12 11:42:31 -07001535 continue
1536
Doug Zongkerfc44a512014-08-26 13:10:25 -07001537 elif tgt_fn in self.src.file_map:
1538 # Look for an exact pathname match in the source.
Tao Bao9a5caf22015-08-25 15:10:10 -07001539 AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001540 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001541 continue
1542
1543 b = os.path.basename(tgt_fn)
1544 if b in self.src_basenames:
1545 # Look for an exact basename match in the source.
1546 src_fn = self.src_basenames[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001547 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001548 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001549 continue
1550
1551 b = re.sub("[0-9]+", "#", b)
1552 if b in self.src_numpatterns:
1553 # Look for a 'number pattern' match (a basename match after
1554 # all runs of digits are replaced by "#"). (This is useful
1555 # for .so files that contain version numbers in the filename
1556 # that get bumped.)
1557 src_fn = self.src_numpatterns[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001558 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001559 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001560 continue
1561
Tao Bao9a5caf22015-08-25 15:10:10 -07001562 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001563
Tianjie Xu25366072017-09-08 17:19:02 -07001564 transfer_lock = threading.Lock()
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001565 threads = [threading.Thread(target=SplitLargeApks)
Tianjie Xu25366072017-09-08 17:19:02 -07001566 for _ in range(self.threads)]
1567 for th in threads:
1568 th.start()
1569 while threads:
1570 threads.pop().join()
1571
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001572 # Sort the split transfers for large apks to generate a determinate package.
1573 split_large_apks.sort()
1574 for (tgt_name, src_name, tgt_ranges, src_ranges,
1575 patch) in split_large_apks:
1576 transfer_split = Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1577 self.tgt.RangeSha1(tgt_ranges),
1578 self.src.RangeSha1(src_ranges),
1579 "diff", self.transfers)
1580 transfer_split.patch = patch
1581
Doug Zongkerfc44a512014-08-26 13:10:25 -07001582 def AbbreviateSourceNames(self):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001583 for k in self.src.file_map.keys():
1584 b = os.path.basename(k)
1585 self.src_basenames[b] = k
1586 b = re.sub("[0-9]+", "#", b)
1587 self.src_numpatterns[b] = k
1588
1589 @staticmethod
1590 def AssertPartition(total, seq):
1591 """Assert that all the RangeSets in 'seq' form a partition of the
1592 'total' RangeSet (ie, they are nonintersecting and their union
1593 equals 'total')."""
Doug Zongker6ab2a502016-02-09 08:28:09 -08001594
Doug Zongkerfc44a512014-08-26 13:10:25 -07001595 so_far = RangeSet()
1596 for i in seq:
1597 assert not so_far.overlaps(i)
1598 so_far = so_far.union(i)
1599 assert so_far == total