blob: be5b32a5916c9e7de81c447a4fdecba91ba8b166 [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015from __future__ import print_function
16
Doug Zongker6ab2a502016-02-09 08:28:09 -080017import array
Tianjie Xu25366072017-09-08 17:19:02 -070018import copy
Doug Zongker6ab2a502016-02-09 08:28:09 -080019import functools
Doug Zongker62338182014-09-08 08:29:55 -070020import heapq
Doug Zongkerfc44a512014-08-26 13:10:25 -070021import itertools
22import multiprocessing
23import os
Tao Bao3a2e3502016-12-28 09:14:39 -080024import os.path
Doug Zongkerfc44a512014-08-26 13:10:25 -070025import re
26import subprocess
Tao Bao183e56e2017-03-05 17:05:09 -080027import sys
Doug Zongkerfc44a512014-08-26 13:10:25 -070028import threading
Tao Bao3a2e3502016-12-28 09:14:39 -080029from collections import deque, OrderedDict
30from hashlib import sha1
Tao Bao508b0872018-02-09 13:44:43 -080031
32import common
Dan Albert8b72aef2015-03-23 19:13:21 -070033from rangelib import RangeSet
34
Doug Zongkerfc44a512014-08-26 13:10:25 -070035
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070036__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
37
Dan Albert8b72aef2015-03-23 19:13:21 -070038
Tao Bao183e56e2017-03-05 17:05:09 -080039def compute_patch(srcfile, tgtfile, imgdiff=False):
Tianjie Xub59c17f2016-10-28 17:55:53 -070040 patchfile = common.MakeTempFile(prefix='patch-')
Doug Zongkerfc44a512014-08-26 13:10:25 -070041
Tianjie Xub59c17f2016-10-28 17:55:53 -070042 cmd = ['imgdiff', '-z'] if imgdiff else ['bsdiff']
43 cmd.extend([srcfile, tgtfile, patchfile])
Doug Zongkerfc44a512014-08-26 13:10:25 -070044
Tao Bao39451582017-05-04 11:10:47 -070045 # Don't dump the bsdiff/imgdiff commands, which are not useful for the case
46 # here, since they contain temp filenames only.
47 p = common.Run(cmd, verbose=False, stdout=subprocess.PIPE,
48 stderr=subprocess.STDOUT)
Tianjie Xub59c17f2016-10-28 17:55:53 -070049 output, _ = p.communicate()
Doug Zongkerfc44a512014-08-26 13:10:25 -070050
Tianjie Xub59c17f2016-10-28 17:55:53 -070051 if p.returncode != 0:
52 raise ValueError(output)
53
54 with open(patchfile, 'rb') as f:
Tao Bao183e56e2017-03-05 17:05:09 -080055 return f.read()
Doug Zongkerfc44a512014-08-26 13:10:25 -070056
Dan Albert8b72aef2015-03-23 19:13:21 -070057
58class Image(object):
Tao Bao183e56e2017-03-05 17:05:09 -080059 def RangeSha1(self, ranges):
60 raise NotImplementedError
61
Dan Albert8b72aef2015-03-23 19:13:21 -070062 def ReadRangeSet(self, ranges):
63 raise NotImplementedError
64
Tao Bao68658c02015-06-01 13:40:49 -070065 def TotalSha1(self, include_clobbered_blocks=False):
Dan Albert8b72aef2015-03-23 19:13:21 -070066 raise NotImplementedError
67
Tao Bao183e56e2017-03-05 17:05:09 -080068 def WriteRangeDataToFd(self, ranges, fd):
69 raise NotImplementedError
70
Dan Albert8b72aef2015-03-23 19:13:21 -070071
72class EmptyImage(Image):
Doug Zongkerfc44a512014-08-26 13:10:25 -070073 """A zero-length image."""
Tao Bao183e56e2017-03-05 17:05:09 -080074
75 def __init__(self):
76 self.blocksize = 4096
77 self.care_map = RangeSet()
78 self.clobbered_blocks = RangeSet()
79 self.extended = RangeSet()
80 self.total_blocks = 0
81 self.file_map = {}
82
83 def RangeSha1(self, ranges):
84 return sha1().hexdigest()
85
Doug Zongkerfc44a512014-08-26 13:10:25 -070086 def ReadRangeSet(self, ranges):
87 return ()
Tao Bao183e56e2017-03-05 17:05:09 -080088
Tao Bao68658c02015-06-01 13:40:49 -070089 def TotalSha1(self, include_clobbered_blocks=False):
90 # EmptyImage always carries empty clobbered_blocks, so
91 # include_clobbered_blocks can be ignored.
92 assert self.clobbered_blocks.size() == 0
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070093 return sha1().hexdigest()
94
Tao Bao183e56e2017-03-05 17:05:09 -080095 def WriteRangeDataToFd(self, ranges, fd):
96 raise ValueError("Can't write data from EmptyImage to file")
97
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070098
Dan Albert8b72aef2015-03-23 19:13:21 -070099class DataImage(Image):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700100 """An image wrapped around a single string of data."""
101
102 def __init__(self, data, trim=False, pad=False):
103 self.data = data
104 self.blocksize = 4096
105
106 assert not (trim and pad)
107
108 partial = len(self.data) % self.blocksize
Tao Bao7589e962015-09-05 20:35:32 -0700109 padded = False
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700110 if partial > 0:
111 if trim:
112 self.data = self.data[:-partial]
113 elif pad:
114 self.data += '\0' * (self.blocksize - partial)
Tao Bao7589e962015-09-05 20:35:32 -0700115 padded = True
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700116 else:
117 raise ValueError(("data for DataImage must be multiple of %d bytes "
118 "unless trim or pad is specified") %
119 (self.blocksize,))
120
121 assert len(self.data) % self.blocksize == 0
122
123 self.total_blocks = len(self.data) / self.blocksize
124 self.care_map = RangeSet(data=(0, self.total_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700125 # When the last block is padded, we always write the whole block even for
126 # incremental OTAs. Because otherwise the last block may get skipped if
127 # unchanged for an incremental, but would fail the post-install
128 # verification if it has non-zero contents in the padding bytes.
129 # Bug: 23828506
130 if padded:
Tao Bao42206c32015-09-08 13:39:40 -0700131 clobbered_blocks = [self.total_blocks-1, self.total_blocks]
Tao Bao7589e962015-09-05 20:35:32 -0700132 else:
Tao Bao42206c32015-09-08 13:39:40 -0700133 clobbered_blocks = []
134 self.clobbered_blocks = clobbered_blocks
Tao Baoe9b61912015-07-09 17:37:49 -0700135 self.extended = RangeSet()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700136
137 zero_blocks = []
138 nonzero_blocks = []
139 reference = '\0' * self.blocksize
140
Tao Bao7589e962015-09-05 20:35:32 -0700141 for i in range(self.total_blocks-1 if padded else self.total_blocks):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700142 d = self.data[i*self.blocksize : (i+1)*self.blocksize]
143 if d == reference:
144 zero_blocks.append(i)
145 zero_blocks.append(i+1)
146 else:
147 nonzero_blocks.append(i)
148 nonzero_blocks.append(i+1)
149
Tao Bao42206c32015-09-08 13:39:40 -0700150 assert zero_blocks or nonzero_blocks or clobbered_blocks
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700151
Tao Bao42206c32015-09-08 13:39:40 -0700152 self.file_map = dict()
153 if zero_blocks:
154 self.file_map["__ZERO"] = RangeSet(data=zero_blocks)
155 if nonzero_blocks:
156 self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)
157 if clobbered_blocks:
158 self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)
Tao Bao7589e962015-09-05 20:35:32 -0700159
Tao Bao183e56e2017-03-05 17:05:09 -0800160 def _GetRangeData(self, ranges):
161 for s, e in ranges:
162 yield self.data[s*self.blocksize:e*self.blocksize]
163
164 def RangeSha1(self, ranges):
165 h = sha1()
166 for data in self._GetRangeData(ranges):
167 h.update(data)
168 return h.hexdigest()
169
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700170 def ReadRangeSet(self, ranges):
Tao Bao183e56e2017-03-05 17:05:09 -0800171 return [self._GetRangeData(ranges)]
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700172
Tao Bao68658c02015-06-01 13:40:49 -0700173 def TotalSha1(self, include_clobbered_blocks=False):
Tao Bao7589e962015-09-05 20:35:32 -0700174 if not include_clobbered_blocks:
Tao Bao183e56e2017-03-05 17:05:09 -0800175 return self.RangeSha1(self.care_map.subtract(self.clobbered_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700176 else:
177 return sha1(self.data).hexdigest()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700178
Tao Bao183e56e2017-03-05 17:05:09 -0800179 def WriteRangeDataToFd(self, ranges, fd):
180 for data in self._GetRangeData(ranges):
181 fd.write(data)
182
Doug Zongkerfc44a512014-08-26 13:10:25 -0700183
184class Transfer(object):
Tao Bao183e56e2017-03-05 17:05:09 -0800185 def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, tgt_sha1,
186 src_sha1, style, by_id):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700187 self.tgt_name = tgt_name
188 self.src_name = src_name
189 self.tgt_ranges = tgt_ranges
190 self.src_ranges = src_ranges
Tao Bao183e56e2017-03-05 17:05:09 -0800191 self.tgt_sha1 = tgt_sha1
192 self.src_sha1 = src_sha1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700193 self.style = style
Tao Baob8c87172015-03-19 19:42:12 -0700194
195 # We use OrderedDict rather than dict so that the output is repeatable;
196 # otherwise it would depend on the hash values of the Transfer objects.
197 self.goes_before = OrderedDict()
198 self.goes_after = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700199
Doug Zongker62338182014-09-08 08:29:55 -0700200 self.stash_before = []
201 self.use_stash = []
202
Doug Zongkerfc44a512014-08-26 13:10:25 -0700203 self.id = len(by_id)
204 by_id.append(self)
205
Tianjie Xu25366072017-09-08 17:19:02 -0700206 self._patch = None
207
208 @property
209 def patch(self):
210 return self._patch
211
212 @patch.setter
213 def patch(self, patch):
214 if patch:
215 assert self.style == "diff"
216 self._patch = patch
217
Doug Zongker62338182014-09-08 08:29:55 -0700218 def NetStashChange(self):
219 return (sum(sr.size() for (_, sr) in self.stash_before) -
220 sum(sr.size() for (_, sr) in self.use_stash))
221
Tao Bao82c47982015-08-17 09:45:13 -0700222 def ConvertToNew(self):
223 assert self.style != "new"
224 self.use_stash = []
225 self.style = "new"
226 self.src_ranges = RangeSet()
Tianjie Xu25366072017-09-08 17:19:02 -0700227 self.patch = None
Tao Bao82c47982015-08-17 09:45:13 -0700228
Doug Zongkerfc44a512014-08-26 13:10:25 -0700229 def __str__(self):
230 return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
231 " to " + str(self.tgt_ranges) + ">")
232
233
Doug Zongker6ab2a502016-02-09 08:28:09 -0800234@functools.total_ordering
235class HeapItem(object):
236 def __init__(self, item):
237 self.item = item
Tao Bao186ec992017-12-23 11:50:52 -0800238 # Negate the score since python's heap is a min-heap and we want the
239 # maximum score.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800240 self.score = -item.score
Tao Bao186ec992017-12-23 11:50:52 -0800241
Doug Zongker6ab2a502016-02-09 08:28:09 -0800242 def clear(self):
243 self.item = None
Tao Bao186ec992017-12-23 11:50:52 -0800244
Doug Zongker6ab2a502016-02-09 08:28:09 -0800245 def __bool__(self):
Tao Bao186ec992017-12-23 11:50:52 -0800246 return self.item is not None
247
248 # Python 2 uses __nonzero__, while Python 3 uses __bool__.
249 __nonzero__ = __bool__
250
251 # The rest operations are generated by functools.total_ordering decorator.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800252 def __eq__(self, other):
253 return self.score == other.score
Tao Bao186ec992017-12-23 11:50:52 -0800254
Doug Zongker6ab2a502016-02-09 08:28:09 -0800255 def __le__(self, other):
256 return self.score <= other.score
257
258
Tao Bao294651d2018-02-08 23:21:52 -0800259class ImgdiffStats(object):
260 """A class that collects imgdiff stats.
261
262 It keeps track of the files that will be applied imgdiff while generating
263 BlockImageDiff. It also logs the ones that cannot use imgdiff, with specific
264 reasons. The stats is only meaningful when imgdiff not being disabled by the
265 caller of BlockImageDiff. In addition, only files with supported types
266 (BlockImageDiff.FileTypeSupportedByImgdiff()) are allowed to be logged.
Tao Bao294651d2018-02-08 23:21:52 -0800267 """
268
269 USED_IMGDIFF = "APK files diff'd with imgdiff"
270 USED_IMGDIFF_LARGE_APK = "Large APK files split and diff'd with imgdiff"
271
272 # Reasons for not applying imgdiff on APKs.
273 SKIPPED_TRIMMED = "Not used imgdiff due to trimmed RangeSet"
274 SKIPPED_NONMONOTONIC = "Not used imgdiff due to having non-monotonic ranges"
Tao Bao4ccea852018-02-06 15:16:41 -0800275 SKIPPED_INCOMPLETE = "Not used imgdiff due to incomplete RangeSet"
Tao Bao294651d2018-02-08 23:21:52 -0800276
277 # The list of valid reasons, which will also be the dumped order in a report.
278 REASONS = (
279 USED_IMGDIFF,
280 USED_IMGDIFF_LARGE_APK,
281 SKIPPED_TRIMMED,
282 SKIPPED_NONMONOTONIC,
Tao Bao4ccea852018-02-06 15:16:41 -0800283 SKIPPED_INCOMPLETE,
Tao Bao294651d2018-02-08 23:21:52 -0800284 )
285
286 def __init__(self):
287 self.stats = {}
288
289 def Log(self, filename, reason):
290 """Logs why imgdiff can or cannot be applied to the given filename.
291
292 Args:
293 filename: The filename string.
294 reason: One of the reason constants listed in REASONS.
295
296 Raises:
297 AssertionError: On unsupported filetypes or invalid reason.
298 """
299 assert BlockImageDiff.FileTypeSupportedByImgdiff(filename)
300 assert reason in self.REASONS
301
302 if reason not in self.stats:
303 self.stats[reason] = set()
304 self.stats[reason].add(filename)
305
306 def Report(self):
307 """Prints a report of the collected imgdiff stats."""
308
309 def print_header(header, separator):
310 print(header)
311 print(separator * len(header) + '\n')
312
313 print_header(' Imgdiff Stats Report ', '=')
314 for key in self.REASONS:
315 if key not in self.stats:
316 continue
317 values = self.stats[key]
318 section_header = ' {} (count: {}) '.format(key, len(values))
319 print_header(section_header, '-')
320 print(''.join([' {}\n'.format(name) for name in values]))
321
322
Doug Zongkerfc44a512014-08-26 13:10:25 -0700323# BlockImageDiff works on two image objects. An image object is
324# anything that provides the following attributes:
325#
326# blocksize: the size in bytes of a block, currently must be 4096.
327#
328# total_blocks: the total size of the partition/image, in blocks.
329#
330# care_map: a RangeSet containing which blocks (in the range [0,
331# total_blocks) we actually care about; i.e. which blocks contain
332# data.
333#
334# file_map: a dict that partitions the blocks contained in care_map
335# into smaller domains that are useful for doing diffs on.
336# (Typically a domain is a file, and the key in file_map is the
337# pathname.)
338#
Tao Baoff777812015-05-12 11:42:31 -0700339# clobbered_blocks: a RangeSet containing which blocks contain data
340# but may be altered by the FS. They need to be excluded when
341# verifying the partition integrity.
342#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700343# ReadRangeSet(): a function that takes a RangeSet and returns the
344# data contained in the image blocks of that RangeSet. The data
345# is returned as a list or tuple of strings; concatenating the
346# elements together should produce the requested data.
347# Implementations are free to break up the data into list/tuple
348# elements in any way that is convenient.
349#
Tao Bao183e56e2017-03-05 17:05:09 -0800350# RangeSha1(): a function that returns (as a hex string) the SHA-1
351# hash of all the data in the specified range.
352#
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700353# TotalSha1(): a function that returns (as a hex string) the SHA-1
354# hash of all the data in the image (ie, all the blocks in the
Tao Bao68658c02015-06-01 13:40:49 -0700355# care_map minus clobbered_blocks, or including the clobbered
356# blocks if include_clobbered_blocks is True).
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700357#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700358# When creating a BlockImageDiff, the src image may be None, in which
359# case the list of transfers produced will never read from the
360# original image.
361
362class BlockImageDiff(object):
Tao Bao293fd132016-06-11 12:19:23 -0700363 def __init__(self, tgt, src=None, threads=None, version=4,
364 disable_imgdiff=False):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700365 if threads is None:
366 threads = multiprocessing.cpu_count() // 2
Dan Albert8b72aef2015-03-23 19:13:21 -0700367 if threads == 0:
368 threads = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700369 self.threads = threads
Doug Zongker62338182014-09-08 08:29:55 -0700370 self.version = version
Dan Albert8b72aef2015-03-23 19:13:21 -0700371 self.transfers = []
372 self.src_basenames = {}
373 self.src_numpatterns = {}
Tao Baob4cfca52016-02-04 14:26:02 -0800374 self._max_stashed_size = 0
Tao Baod522bdc2016-04-12 15:53:16 -0700375 self.touched_src_ranges = RangeSet()
376 self.touched_src_sha1 = None
Tao Bao293fd132016-06-11 12:19:23 -0700377 self.disable_imgdiff = disable_imgdiff
Tao Bao294651d2018-02-08 23:21:52 -0800378 self.imgdiff_stats = ImgdiffStats() if not disable_imgdiff else None
Doug Zongker62338182014-09-08 08:29:55 -0700379
Tao Bao8fad03e2017-03-01 14:36:26 -0800380 assert version in (3, 4)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700381
382 self.tgt = tgt
383 if src is None:
384 src = EmptyImage()
385 self.src = src
386
387 # The updater code that installs the patch always uses 4k blocks.
388 assert tgt.blocksize == 4096
389 assert src.blocksize == 4096
390
391 # The range sets in each filemap should comprise a partition of
392 # the care map.
393 self.AssertPartition(src.care_map, src.file_map.values())
394 self.AssertPartition(tgt.care_map, tgt.file_map.values())
395
Tao Baob4cfca52016-02-04 14:26:02 -0800396 @property
397 def max_stashed_size(self):
398 return self._max_stashed_size
399
Tao Baocb73aed2018-01-31 17:32:40 -0800400 @staticmethod
401 def FileTypeSupportedByImgdiff(filename):
402 """Returns whether the file type is supported by imgdiff."""
403 return filename.lower().endswith(('.apk', '.jar', '.zip'))
404
Tao Bao294651d2018-02-08 23:21:52 -0800405 def CanUseImgdiff(self, name, tgt_ranges, src_ranges, large_apk=False):
Tao Baocb73aed2018-01-31 17:32:40 -0800406 """Checks whether we can apply imgdiff for the given RangeSets.
407
408 For files in ZIP format (e.g., APKs, JARs, etc.) we would like to use
409 'imgdiff -z' if possible. Because it usually produces significantly smaller
410 patches than bsdiff.
411
412 This is permissible if all of the following conditions hold.
413 - The imgdiff hasn't been disabled by the caller (e.g. squashfs);
414 - The file type is supported by imgdiff;
415 - The source and target blocks are monotonic (i.e. the data is stored with
416 blocks in increasing order);
Tao Bao4ccea852018-02-06 15:16:41 -0800417 - Both files have complete lists of blocks;
Tao Baocb73aed2018-01-31 17:32:40 -0800418 - We haven't removed any blocks from the source set.
419
420 If all these conditions are satisfied, concatenating all the blocks in the
421 RangeSet in order will produce a valid ZIP file (plus possibly extra zeros
422 in the last block). imgdiff is fine with extra zeros at the end of the file.
423
424 Args:
425 name: The filename to be diff'd.
426 tgt_ranges: The target RangeSet.
427 src_ranges: The source RangeSet.
Tao Bao294651d2018-02-08 23:21:52 -0800428 large_apk: Whether this is to split a large APK.
Tao Baocb73aed2018-01-31 17:32:40 -0800429
430 Returns:
431 A boolean result.
432 """
Tao Bao508b0872018-02-09 13:44:43 -0800433 if self.disable_imgdiff or not self.FileTypeSupportedByImgdiff(name):
Tao Bao294651d2018-02-08 23:21:52 -0800434 return False
435
436 if not tgt_ranges.monotonic or not src_ranges.monotonic:
437 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_NONMONOTONIC)
438 return False
439
Tao Bao4ccea852018-02-06 15:16:41 -0800440 if tgt_ranges.extra.get('incomplete') or src_ranges.extra.get('incomplete'):
441 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_INCOMPLETE)
442 return False
443
Tao Bao294651d2018-02-08 23:21:52 -0800444 if tgt_ranges.extra.get('trimmed') or src_ranges.extra.get('trimmed'):
445 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_TRIMMED)
446 return False
447
448 reason = (ImgdiffStats.USED_IMGDIFF_LARGE_APK if large_apk
449 else ImgdiffStats.USED_IMGDIFF)
450 self.imgdiff_stats.Log(name, reason)
451 return True
Tao Baocb73aed2018-01-31 17:32:40 -0800452
Doug Zongkerfc44a512014-08-26 13:10:25 -0700453 def Compute(self, prefix):
454 # When looking for a source file to use as the diff input for a
455 # target file, we try:
456 # 1) an exact path match if available, otherwise
457 # 2) a exact basename match if available, otherwise
458 # 3) a basename match after all runs of digits are replaced by
459 # "#" if available, otherwise
460 # 4) we have no source for this target.
461 self.AbbreviateSourceNames()
462 self.FindTransfers()
463
464 # Find the ordering dependencies among transfers (this is O(n^2)
465 # in the number of transfers).
466 self.GenerateDigraph()
467 # Find a sequence of transfers that satisfies as many ordering
468 # dependencies as possible (heuristically).
469 self.FindVertexSequence()
470 # Fix up the ordering dependencies that the sequence didn't
471 # satisfy.
Tao Bao8fad03e2017-03-01 14:36:26 -0800472 self.ReverseBackwardEdges()
473 self.ImproveVertexSequence()
Doug Zongker62338182014-09-08 08:29:55 -0700474
Tao Bao82c47982015-08-17 09:45:13 -0700475 # Ensure the runtime stash size is under the limit.
Tao Bao8fad03e2017-03-01 14:36:26 -0800476 if common.OPTIONS.cache_size is not None:
Tao Bao82c47982015-08-17 09:45:13 -0700477 self.ReviseStashSize()
478
Doug Zongkerfc44a512014-08-26 13:10:25 -0700479 # Double-check our work.
480 self.AssertSequenceGood()
Tianjie Xu8a7ed9f2018-01-23 14:06:11 -0800481 self.AssertSha1Good()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700482
483 self.ComputePatches(prefix)
484 self.WriteTransfers(prefix)
485
Tao Bao294651d2018-02-08 23:21:52 -0800486 # Report the imgdiff stats.
487 if common.OPTIONS.verbose and not self.disable_imgdiff:
488 self.imgdiff_stats.Report()
489
Doug Zongkerfc44a512014-08-26 13:10:25 -0700490 def WriteTransfers(self, prefix):
Tianjie Xu37e29302016-06-23 16:10:35 -0700491 def WriteSplitTransfers(out, style, target_blocks):
492 """Limit the size of operand in command 'new' and 'zero' to 1024 blocks.
Tianjie Xubb848c52016-06-21 15:54:09 -0700493
494 This prevents the target size of one command from being too large; and
495 might help to avoid fsync errors on some devices."""
496
Tao Bao3a2e3502016-12-28 09:14:39 -0800497 assert style == "new" or style == "zero"
Tianjie Xu37e29302016-06-23 16:10:35 -0700498 blocks_limit = 1024
Tianjie Xubb848c52016-06-21 15:54:09 -0700499 total = 0
Tianjie Xu37e29302016-06-23 16:10:35 -0700500 while target_blocks:
501 blocks_to_write = target_blocks.first(blocks_limit)
502 out.append("%s %s\n" % (style, blocks_to_write.to_string_raw()))
503 total += blocks_to_write.size()
504 target_blocks = target_blocks.subtract(blocks_to_write)
Tianjie Xubb848c52016-06-21 15:54:09 -0700505 return total
506
Doug Zongkerfc44a512014-08-26 13:10:25 -0700507 out = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700508 total = 0
Doug Zongkerfc44a512014-08-26 13:10:25 -0700509
Tao Bao3a2e3502016-12-28 09:14:39 -0800510 # In BBOTA v3+, it uses the hash of the stashed blocks as the stash slot
511 # id. 'stashes' records the map from 'hash' to the ref count. The stash
512 # will be freed only if the count decrements to zero.
Doug Zongker62338182014-09-08 08:29:55 -0700513 stashes = {}
514 stashed_blocks = 0
515 max_stashed_blocks = 0
516
Doug Zongkerfc44a512014-08-26 13:10:25 -0700517 for xf in self.transfers:
518
Tao Bao8fad03e2017-03-01 14:36:26 -0800519 for _, sr in xf.stash_before:
520 sh = self.src.RangeSha1(sr)
521 if sh in stashes:
522 stashes[sh] += 1
Sami Tolvanendd67a292014-12-09 16:40:34 +0000523 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800524 stashes[sh] = 1
525 stashed_blocks += sr.size()
526 self.touched_src_ranges = self.touched_src_ranges.union(sr)
527 out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
Doug Zongker62338182014-09-08 08:29:55 -0700528
529 if stashed_blocks > max_stashed_blocks:
530 max_stashed_blocks = stashed_blocks
531
Jesse Zhao7b985f62015-03-02 16:53:08 -0800532 free_string = []
caozhiyuan21b37d82015-10-21 15:14:03 +0800533 free_size = 0
Jesse Zhao7b985f62015-03-02 16:53:08 -0800534
Tao Bao8fad03e2017-03-01 14:36:26 -0800535 # <# blocks> <src ranges>
536 # OR
537 # <# blocks> <src ranges> <src locs> <stash refs...>
538 # OR
539 # <# blocks> - <stash refs...>
Doug Zongker62338182014-09-08 08:29:55 -0700540
Tao Bao8fad03e2017-03-01 14:36:26 -0800541 size = xf.src_ranges.size()
Tao Bao508b0872018-02-09 13:44:43 -0800542 src_str_buffer = [str(size)]
Doug Zongker62338182014-09-08 08:29:55 -0700543
Tao Bao8fad03e2017-03-01 14:36:26 -0800544 unstashed_src_ranges = xf.src_ranges
545 mapped_stashes = []
546 for _, sr in xf.use_stash:
547 unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
548 sh = self.src.RangeSha1(sr)
549 sr = xf.src_ranges.map_within(sr)
550 mapped_stashes.append(sr)
551 assert sh in stashes
Tao Bao508b0872018-02-09 13:44:43 -0800552 src_str_buffer.append("%s:%s" % (sh, sr.to_string_raw()))
Tao Bao8fad03e2017-03-01 14:36:26 -0800553 stashes[sh] -= 1
554 if stashes[sh] == 0:
555 free_string.append("free %s\n" % (sh,))
556 free_size += sr.size()
557 stashes.pop(sh)
Doug Zongker62338182014-09-08 08:29:55 -0700558
Tao Bao8fad03e2017-03-01 14:36:26 -0800559 if unstashed_src_ranges:
Tao Bao508b0872018-02-09 13:44:43 -0800560 src_str_buffer.insert(1, unstashed_src_ranges.to_string_raw())
Tao Bao8fad03e2017-03-01 14:36:26 -0800561 if xf.use_stash:
562 mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
Tao Bao508b0872018-02-09 13:44:43 -0800563 src_str_buffer.insert(2, mapped_unstashed.to_string_raw())
Tao Bao8fad03e2017-03-01 14:36:26 -0800564 mapped_stashes.append(mapped_unstashed)
Doug Zongker62338182014-09-08 08:29:55 -0700565 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Tao Bao8fad03e2017-03-01 14:36:26 -0800566 else:
Tao Bao508b0872018-02-09 13:44:43 -0800567 src_str_buffer.insert(1, "-")
Tao Bao8fad03e2017-03-01 14:36:26 -0800568 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Doug Zongker62338182014-09-08 08:29:55 -0700569
Tao Bao508b0872018-02-09 13:44:43 -0800570 src_str = " ".join(src_str_buffer)
Doug Zongker62338182014-09-08 08:29:55 -0700571
Tao Bao8fad03e2017-03-01 14:36:26 -0800572 # version 3+:
Doug Zongker62338182014-09-08 08:29:55 -0700573 # zero <rangeset>
574 # new <rangeset>
575 # erase <rangeset>
Dan Albert8b72aef2015-03-23 19:13:21 -0700576 # bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
577 # imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
578 # move hash <tgt rangeset> <src_str>
Doug Zongkerfc44a512014-08-26 13:10:25 -0700579
580 tgt_size = xf.tgt_ranges.size()
581
582 if xf.style == "new":
583 assert xf.tgt_ranges
Tianjie Xu37e29302016-06-23 16:10:35 -0700584 assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700585 total += tgt_size
586 elif xf.style == "move":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700587 assert xf.tgt_ranges
588 assert xf.src_ranges.size() == tgt_size
589 if xf.src_ranges != xf.tgt_ranges:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100590 # take into account automatic stashing of overlapping blocks
591 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700592 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100593 if temp_stash_usage > max_stashed_blocks:
594 max_stashed_blocks = temp_stash_usage
595
Tao Baod522bdc2016-04-12 15:53:16 -0700596 self.touched_src_ranges = self.touched_src_ranges.union(
597 xf.src_ranges)
598
Tao Bao8fad03e2017-03-01 14:36:26 -0800599 out.append("%s %s %s %s\n" % (
Sami Tolvanendd67a292014-12-09 16:40:34 +0000600 xf.style,
Tao Bao183e56e2017-03-05 17:05:09 -0800601 xf.tgt_sha1,
Dan Albert8b72aef2015-03-23 19:13:21 -0700602 xf.tgt_ranges.to_string_raw(), src_str))
Tao Bao8fad03e2017-03-01 14:36:26 -0800603 total += tgt_size
604 elif xf.style in ("bsdiff", "imgdiff"):
605 assert xf.tgt_ranges
606 assert xf.src_ranges
607 # take into account automatic stashing of overlapping blocks
608 if xf.src_ranges.overlaps(xf.tgt_ranges):
609 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
610 if temp_stash_usage > max_stashed_blocks:
611 max_stashed_blocks = temp_stash_usage
612
613 self.touched_src_ranges = self.touched_src_ranges.union(xf.src_ranges)
614
615 out.append("%s %d %d %s %s %s %s\n" % (
616 xf.style,
617 xf.patch_start, xf.patch_len,
618 xf.src_sha1,
619 xf.tgt_sha1,
620 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700621 total += tgt_size
622 elif xf.style == "zero":
623 assert xf.tgt_ranges
624 to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
Tianjie Xu37e29302016-06-23 16:10:35 -0700625 assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size()
Tianjie Xubb848c52016-06-21 15:54:09 -0700626 total += to_zero.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700627 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700628 raise ValueError("unknown transfer style '%s'\n" % xf.style)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700629
Sami Tolvanendd67a292014-12-09 16:40:34 +0000630 if free_string:
631 out.append("".join(free_string))
caozhiyuan21b37d82015-10-21 15:14:03 +0800632 stashed_blocks -= free_size
Sami Tolvanendd67a292014-12-09 16:40:34 +0000633
Tao Bao8fad03e2017-03-01 14:36:26 -0800634 if common.OPTIONS.cache_size is not None:
Tao Bao8dcf7382015-05-21 14:09:49 -0700635 # Sanity check: abort if we're going to need more stash space than
636 # the allowed size (cache_size * threshold). There are two purposes
637 # of having a threshold here. a) Part of the cache may have been
638 # occupied by some recovery logs. b) It will buy us some time to deal
639 # with the oversize issue.
640 cache_size = common.OPTIONS.cache_size
641 stash_threshold = common.OPTIONS.stash_threshold
642 max_allowed = cache_size * stash_threshold
Tao Baoe8c68a02017-02-26 10:48:11 -0800643 assert max_stashed_blocks * self.tgt.blocksize <= max_allowed, \
Tao Bao8dcf7382015-05-21 14:09:49 -0700644 'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
645 max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
646 self.tgt.blocksize, max_allowed, cache_size,
647 stash_threshold)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700648
Tao Bao8fad03e2017-03-01 14:36:26 -0800649 self.touched_src_sha1 = self.src.RangeSha1(self.touched_src_ranges)
Tao Baod522bdc2016-04-12 15:53:16 -0700650
Tao Baoe9b61912015-07-09 17:37:49 -0700651 # Zero out extended blocks as a workaround for bug 20881595.
652 if self.tgt.extended:
Tianjie Xu37e29302016-06-23 16:10:35 -0700653 assert (WriteSplitTransfers(out, "zero", self.tgt.extended) ==
Tianjie Xubb848c52016-06-21 15:54:09 -0700654 self.tgt.extended.size())
Tao Baob32d56e2015-09-09 11:55:01 -0700655 total += self.tgt.extended.size()
Tao Baoe9b61912015-07-09 17:37:49 -0700656
657 # We erase all the blocks on the partition that a) don't contain useful
Tao Bao66f1fa62016-05-03 10:02:01 -0700658 # data in the new image; b) will not be touched by dm-verity. Out of those
659 # blocks, we erase the ones that won't be used in this update at the
660 # beginning of an update. The rest would be erased at the end. This is to
661 # work around the eMMC issue observed on some devices, which may otherwise
662 # get starving for clean blocks and thus fail the update. (b/28347095)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700663 all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
Tao Baoe9b61912015-07-09 17:37:49 -0700664 all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
665 new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
Tao Bao66f1fa62016-05-03 10:02:01 -0700666
667 erase_first = new_dontcare.subtract(self.touched_src_ranges)
668 if erase_first:
669 out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))
670
671 erase_last = new_dontcare.subtract(erase_first)
672 if erase_last:
673 out.append("erase %s\n" % (erase_last.to_string_raw(),))
Doug Zongkere985f6f2014-09-09 12:38:47 -0700674
675 out.insert(0, "%d\n" % (self.version,)) # format version number
Tao Baob32d56e2015-09-09 11:55:01 -0700676 out.insert(1, "%d\n" % (total,))
Tao Bao8fad03e2017-03-01 14:36:26 -0800677 # v3+: the number of stash slots is unused.
678 out.insert(2, "0\n")
679 out.insert(3, str(max_stashed_blocks) + "\n")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700680
681 with open(prefix + ".transfer.list", "wb") as f:
682 for i in out:
683 f.write(i)
684
Tao Bao8fad03e2017-03-01 14:36:26 -0800685 self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize
686 OPTIONS = common.OPTIONS
687 if OPTIONS.cache_size is not None:
688 max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
689 print("max stashed blocks: %d (%d bytes), "
690 "limit: %d bytes (%.2f%%)\n" % (
Tao Bao508b0872018-02-09 13:44:43 -0800691 max_stashed_blocks, self._max_stashed_size, max_allowed,
692 self._max_stashed_size * 100.0 / max_allowed))
Tao Bao8fad03e2017-03-01 14:36:26 -0800693 else:
694 print("max stashed blocks: %d (%d bytes), limit: <unknown>\n" % (
Tao Bao508b0872018-02-09 13:44:43 -0800695 max_stashed_blocks, self._max_stashed_size))
Doug Zongker62338182014-09-08 08:29:55 -0700696
Tao Bao82c47982015-08-17 09:45:13 -0700697 def ReviseStashSize(self):
698 print("Revising stash size...")
Tao Baoe27acfd2016-12-16 11:13:55 -0800699 stash_map = {}
Tao Bao82c47982015-08-17 09:45:13 -0700700
701 # Create the map between a stash and its def/use points. For example, for a
Tao Bao3c5a16d2017-02-13 11:42:50 -0800702 # given stash of (raw_id, sr), stash_map[raw_id] = (sr, def_cmd, use_cmd).
Tao Bao82c47982015-08-17 09:45:13 -0700703 for xf in self.transfers:
704 # Command xf defines (stores) all the stashes in stash_before.
Tao Bao3a2e3502016-12-28 09:14:39 -0800705 for stash_raw_id, sr in xf.stash_before:
706 stash_map[stash_raw_id] = (sr, xf)
Tao Bao82c47982015-08-17 09:45:13 -0700707
708 # Record all the stashes command xf uses.
Tao Bao3a2e3502016-12-28 09:14:39 -0800709 for stash_raw_id, _ in xf.use_stash:
710 stash_map[stash_raw_id] += (xf,)
Tao Bao82c47982015-08-17 09:45:13 -0700711
712 # Compute the maximum blocks available for stash based on /cache size and
713 # the threshold.
714 cache_size = common.OPTIONS.cache_size
715 stash_threshold = common.OPTIONS.stash_threshold
716 max_allowed = cache_size * stash_threshold / self.tgt.blocksize
717
Tao Bao3a2e3502016-12-28 09:14:39 -0800718 # See the comments for 'stashes' in WriteTransfers().
Tao Baoe27acfd2016-12-16 11:13:55 -0800719 stashes = {}
Tao Bao82c47982015-08-17 09:45:13 -0700720 stashed_blocks = 0
Tao Bao9a5caf22015-08-25 15:10:10 -0700721 new_blocks = 0
Tao Bao82c47982015-08-17 09:45:13 -0700722
723 # Now go through all the commands. Compute the required stash size on the
724 # fly. If a command requires excess stash than available, it deletes the
725 # stash by replacing the command that uses the stash with a "new" command
726 # instead.
727 for xf in self.transfers:
728 replaced_cmds = []
729
730 # xf.stash_before generates explicit stash commands.
Tao Bao3a2e3502016-12-28 09:14:39 -0800731 for stash_raw_id, sr in xf.stash_before:
Tao Baoe27acfd2016-12-16 11:13:55 -0800732 # Check the post-command stashed_blocks.
733 stashed_blocks_after = stashed_blocks
Tao Bao8fad03e2017-03-01 14:36:26 -0800734 sh = self.src.RangeSha1(sr)
735 if sh not in stashes:
Tao Baoe27acfd2016-12-16 11:13:55 -0800736 stashed_blocks_after += sr.size()
Tao Baoe27acfd2016-12-16 11:13:55 -0800737
738 if stashed_blocks_after > max_allowed:
Tao Bao82c47982015-08-17 09:45:13 -0700739 # We cannot stash this one for a later command. Find out the command
740 # that will use this stash and replace the command with "new".
Tao Bao3a2e3502016-12-28 09:14:39 -0800741 use_cmd = stash_map[stash_raw_id][2]
Tao Bao82c47982015-08-17 09:45:13 -0700742 replaced_cmds.append(use_cmd)
Tao Bao9a5caf22015-08-25 15:10:10 -0700743 print("%10d %9s %s" % (sr.size(), "explicit", use_cmd))
Tao Bao82c47982015-08-17 09:45:13 -0700744 else:
Tao Bao3c5a16d2017-02-13 11:42:50 -0800745 # Update the stashes map.
Tao Bao8fad03e2017-03-01 14:36:26 -0800746 if sh in stashes:
747 stashes[sh] += 1
Tao Bao3c5a16d2017-02-13 11:42:50 -0800748 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800749 stashes[sh] = 1
Tao Baoe27acfd2016-12-16 11:13:55 -0800750 stashed_blocks = stashed_blocks_after
Tao Bao82c47982015-08-17 09:45:13 -0700751
752 # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
753 # ComputePatches(), they both have the style of "diff".
Tao Bao8fad03e2017-03-01 14:36:26 -0800754 if xf.style == "diff":
Tao Bao82c47982015-08-17 09:45:13 -0700755 assert xf.tgt_ranges and xf.src_ranges
756 if xf.src_ranges.overlaps(xf.tgt_ranges):
757 if stashed_blocks + xf.src_ranges.size() > max_allowed:
758 replaced_cmds.append(xf)
Tao Bao9a5caf22015-08-25 15:10:10 -0700759 print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf))
Tao Bao82c47982015-08-17 09:45:13 -0700760
761 # Replace the commands in replaced_cmds with "new"s.
762 for cmd in replaced_cmds:
763 # It no longer uses any commands in "use_stash". Remove the def points
764 # for all those stashes.
Tao Bao3a2e3502016-12-28 09:14:39 -0800765 for stash_raw_id, sr in cmd.use_stash:
766 def_cmd = stash_map[stash_raw_id][1]
767 assert (stash_raw_id, sr) in def_cmd.stash_before
768 def_cmd.stash_before.remove((stash_raw_id, sr))
Tao Bao82c47982015-08-17 09:45:13 -0700769
Tianjie Xuebe39a02016-01-14 14:12:26 -0800770 # Add up blocks that violates space limit and print total number to
771 # screen later.
772 new_blocks += cmd.tgt_ranges.size()
Tao Bao82c47982015-08-17 09:45:13 -0700773 cmd.ConvertToNew()
774
Tao Bao3a2e3502016-12-28 09:14:39 -0800775 # xf.use_stash may generate free commands.
Tao Bao8fad03e2017-03-01 14:36:26 -0800776 for _, sr in xf.use_stash:
777 sh = self.src.RangeSha1(sr)
778 assert sh in stashes
779 stashes[sh] -= 1
780 if stashes[sh] == 0:
Tao Baoe27acfd2016-12-16 11:13:55 -0800781 stashed_blocks -= sr.size()
Tao Bao8fad03e2017-03-01 14:36:26 -0800782 stashes.pop(sh)
Tao Baoe27acfd2016-12-16 11:13:55 -0800783
Tianjie Xuebe39a02016-01-14 14:12:26 -0800784 num_of_bytes = new_blocks * self.tgt.blocksize
785 print(" Total %d blocks (%d bytes) are packed as new blocks due to "
786 "insufficient cache size." % (new_blocks, num_of_bytes))
Tao Bao304ee272016-12-19 11:01:38 -0800787 return new_blocks
Tao Bao9a5caf22015-08-25 15:10:10 -0700788
Doug Zongkerfc44a512014-08-26 13:10:25 -0700789 def ComputePatches(self, prefix):
790 print("Reticulating splines...")
Tao Bao183e56e2017-03-05 17:05:09 -0800791 diff_queue = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700792 patch_num = 0
793 with open(prefix + ".new.dat", "wb") as new_f:
Tao Bao183e56e2017-03-05 17:05:09 -0800794 for index, xf in enumerate(self.transfers):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700795 if xf.style == "zero":
Tao Bao08c85832016-09-19 22:26:30 -0700796 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
797 print("%10d %10d (%6.2f%%) %7s %s %s" % (
798 tgt_size, tgt_size, 100.0, xf.style, xf.tgt_name,
799 str(xf.tgt_ranges)))
800
Doug Zongkerfc44a512014-08-26 13:10:25 -0700801 elif xf.style == "new":
Tao Bao183e56e2017-03-05 17:05:09 -0800802 self.tgt.WriteRangeDataToFd(xf.tgt_ranges, new_f)
Tao Bao08c85832016-09-19 22:26:30 -0700803 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
804 print("%10d %10d (%6.2f%%) %7s %s %s" % (
805 tgt_size, tgt_size, 100.0, xf.style,
806 xf.tgt_name, str(xf.tgt_ranges)))
807
Doug Zongkerfc44a512014-08-26 13:10:25 -0700808 elif xf.style == "diff":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700809 # We can't compare src and tgt directly because they may have
810 # the same content but be broken up into blocks differently, eg:
811 #
812 # ["he", "llo"] vs ["h", "ello"]
813 #
814 # We want those to compare equal, ideally without having to
815 # actually concatenate the strings (these may be tens of
816 # megabytes).
Tao Bao183e56e2017-03-05 17:05:09 -0800817 if xf.src_sha1 == xf.tgt_sha1:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700818 # These are identical; we don't need to generate a patch,
819 # just issue copy commands on the device.
820 xf.style = "move"
Tianjie Xu25366072017-09-08 17:19:02 -0700821 xf.patch = None
Tao Bao183e56e2017-03-05 17:05:09 -0800822 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao08c85832016-09-19 22:26:30 -0700823 if xf.src_ranges != xf.tgt_ranges:
824 print("%10d %10d (%6.2f%%) %7s %s %s (from %s)" % (
825 tgt_size, tgt_size, 100.0, xf.style,
826 xf.tgt_name if xf.tgt_name == xf.src_name else (
827 xf.tgt_name + " (from " + xf.src_name + ")"),
828 str(xf.tgt_ranges), str(xf.src_ranges)))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700829 else:
Tianjie Xu25366072017-09-08 17:19:02 -0700830 if xf.patch:
831 # We have already generated the patch with imgdiff. Check if the
832 # transfer is intact.
833 assert not self.disable_imgdiff
834 imgdiff = True
Tao Baocb73aed2018-01-31 17:32:40 -0800835 if (xf.src_ranges.extra.get('trimmed') or
836 xf.tgt_ranges.extra.get('trimmed')):
Tianjie Xu25366072017-09-08 17:19:02 -0700837 imgdiff = False
838 xf.patch = None
839 else:
Tao Baocb73aed2018-01-31 17:32:40 -0800840 imgdiff = self.CanUseImgdiff(
841 xf.tgt_name, xf.tgt_ranges, xf.src_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700842 xf.style = "imgdiff" if imgdiff else "bsdiff"
Tao Bao183e56e2017-03-05 17:05:09 -0800843 diff_queue.append((index, imgdiff, patch_num))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700844 patch_num += 1
845
846 else:
847 assert False, "unknown style " + xf.style
848
Tao Bao183e56e2017-03-05 17:05:09 -0800849 if diff_queue:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700850 if self.threads > 1:
851 print("Computing patches (using %d threads)..." % (self.threads,))
852 else:
853 print("Computing patches...")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700854
Tao Bao183e56e2017-03-05 17:05:09 -0800855 diff_total = len(diff_queue)
856 patches = [None] * diff_total
Tianjie Xub59c17f2016-10-28 17:55:53 -0700857 error_messages = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700858
Tao Bao183e56e2017-03-05 17:05:09 -0800859 # Using multiprocessing doesn't give additional benefits, due to the
860 # pattern of the code. The diffing work is done by subprocess.call, which
861 # already runs in a separate process (not affected much by the GIL -
862 # Global Interpreter Lock). Using multiprocess also requires either a)
863 # writing the diff input files in the main process before forking, or b)
864 # reopening the image file (SparseImage) in the worker processes. Doing
865 # neither of them further improves the performance.
Doug Zongkerfc44a512014-08-26 13:10:25 -0700866 lock = threading.Lock()
867 def diff_worker():
868 while True:
869 with lock:
Tao Bao183e56e2017-03-05 17:05:09 -0800870 if not diff_queue:
Dan Albert8b72aef2015-03-23 19:13:21 -0700871 return
Tao Bao183e56e2017-03-05 17:05:09 -0800872 xf_index, imgdiff, patch_index = diff_queue.pop()
Tao Bao97395142018-02-12 12:08:05 -0800873 xf = self.transfers[xf_index]
Tao Bao183e56e2017-03-05 17:05:09 -0800874
Tao Bao97395142018-02-12 12:08:05 -0800875 if sys.stdout.isatty():
876 diff_left = len(diff_queue)
877 progress = (diff_total - diff_left) * 100 / diff_total
878 # '\033[K' is to clear to EOL.
879 print(' [%3d%%] %s\033[K' % (progress, xf.tgt_name), end='\r')
880 sys.stdout.flush()
881
Tianjie Xu25366072017-09-08 17:19:02 -0700882 patch = xf.patch
883 if not patch:
884 src_ranges = xf.src_ranges
885 tgt_ranges = xf.tgt_ranges
Tao Bao183e56e2017-03-05 17:05:09 -0800886
Tianjie Xudf1166e2018-01-27 17:35:41 -0800887 src_file = common.MakeTempFile(prefix="src-")
888 with open(src_file, "wb") as fd:
889 self.src.WriteRangeDataToFd(src_ranges, fd)
Tianjie Xu25366072017-09-08 17:19:02 -0700890
Tianjie Xudf1166e2018-01-27 17:35:41 -0800891 tgt_file = common.MakeTempFile(prefix="tgt-")
892 with open(tgt_file, "wb") as fd:
893 self.tgt.WriteRangeDataToFd(tgt_ranges, fd)
Tianjie Xu25366072017-09-08 17:19:02 -0700894
895 message = []
896 try:
897 patch = compute_patch(src_file, tgt_file, imgdiff)
898 except ValueError as e:
899 message.append(
900 "Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (
Tao Bao508b0872018-02-09 13:44:43 -0800901 "imgdiff" if imgdiff else "bsdiff",
902 xf.tgt_name if xf.tgt_name == xf.src_name else
Tianjie Xu25366072017-09-08 17:19:02 -0700903 xf.tgt_name + " (from " + xf.src_name + ")",
Tao Bao508b0872018-02-09 13:44:43 -0800904 xf.tgt_ranges, xf.src_ranges, e.message))
Tianjie Xu25366072017-09-08 17:19:02 -0700905 if message:
906 with lock:
907 error_messages.extend(message)
Tao Bao183e56e2017-03-05 17:05:09 -0800908
909 with lock:
910 patches[patch_index] = (xf_index, patch)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700911
912 threads = [threading.Thread(target=diff_worker)
Dan Albert8b72aef2015-03-23 19:13:21 -0700913 for _ in range(self.threads)]
Doug Zongkerfc44a512014-08-26 13:10:25 -0700914 for th in threads:
915 th.start()
916 while threads:
917 threads.pop().join()
Tao Bao183e56e2017-03-05 17:05:09 -0800918
919 if sys.stdout.isatty():
920 print('\n')
Tianjie Xub59c17f2016-10-28 17:55:53 -0700921
922 if error_messages:
Tao Baob937ead2017-10-19 16:51:53 -0700923 print('ERROR:')
Tianjie Xub59c17f2016-10-28 17:55:53 -0700924 print('\n'.join(error_messages))
Tao Baob937ead2017-10-19 16:51:53 -0700925 print('\n\n\n')
Tianjie Xub59c17f2016-10-28 17:55:53 -0700926 sys.exit(1)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700927 else:
928 patches = []
929
Tao Bao183e56e2017-03-05 17:05:09 -0800930 offset = 0
931 with open(prefix + ".patch.dat", "wb") as patch_fd:
932 for index, patch in patches:
933 xf = self.transfers[index]
Doug Zongkerfc44a512014-08-26 13:10:25 -0700934 xf.patch_len = len(patch)
Tao Bao183e56e2017-03-05 17:05:09 -0800935 xf.patch_start = offset
936 offset += xf.patch_len
937 patch_fd.write(patch)
938
939 if common.OPTIONS.verbose:
940 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
941 print("%10d %10d (%6.2f%%) %7s %s %s %s" % (
Tao Bao508b0872018-02-09 13:44:43 -0800942 xf.patch_len, tgt_size, xf.patch_len * 100.0 / tgt_size,
943 xf.style,
944 xf.tgt_name if xf.tgt_name == xf.src_name else (
945 xf.tgt_name + " (from " + xf.src_name + ")"),
946 xf.tgt_ranges, xf.src_ranges))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700947
Tianjie Xu8a7ed9f2018-01-23 14:06:11 -0800948 def AssertSha1Good(self):
949 """Check the SHA-1 of the src & tgt blocks in the transfer list.
950
951 Double check the SHA-1 value to avoid the issue in b/71908713, where
952 SparseImage.RangeSha1() messed up with the hash calculation in multi-thread
953 environment. That specific problem has been fixed by protecting the
954 underlying generator function 'SparseImage._GetRangeData()' with lock.
955 """
956 for xf in self.transfers:
957 tgt_sha1 = self.tgt.RangeSha1(xf.tgt_ranges)
958 assert xf.tgt_sha1 == tgt_sha1
959 if xf.style == "diff":
960 src_sha1 = self.src.RangeSha1(xf.src_ranges)
961 assert xf.src_sha1 == src_sha1
962
Doug Zongkerfc44a512014-08-26 13:10:25 -0700963 def AssertSequenceGood(self):
964 # Simulate the sequences of transfers we will output, and check that:
965 # - we never read a block after writing it, and
966 # - we write every block we care about exactly once.
967
968 # Start with no blocks having been touched yet.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800969 touched = array.array("B", "\0" * self.tgt.total_blocks)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700970
971 # Imagine processing the transfers in order.
972 for xf in self.transfers:
973 # Check that the input blocks for this transfer haven't yet been touched.
Doug Zongker62338182014-09-08 08:29:55 -0700974
975 x = xf.src_ranges
Tao Bao8fad03e2017-03-01 14:36:26 -0800976 for _, sr in xf.use_stash:
977 x = x.subtract(sr)
Doug Zongker62338182014-09-08 08:29:55 -0700978
Doug Zongker6ab2a502016-02-09 08:28:09 -0800979 for s, e in x:
Tao Baoff75c232016-03-04 15:23:34 -0800980 # Source image could be larger. Don't check the blocks that are in the
981 # source image only. Since they are not in 'touched', and won't ever
982 # be touched.
983 for i in range(s, min(e, self.tgt.total_blocks)):
Doug Zongker6ab2a502016-02-09 08:28:09 -0800984 assert touched[i] == 0
985
986 # Check that the output blocks for this transfer haven't yet
987 # been touched, and touch all the blocks written by this
988 # transfer.
989 for s, e in xf.tgt_ranges:
990 for i in range(s, e):
991 assert touched[i] == 0
992 touched[i] = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700993
994 # Check that we've written every target block.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800995 for s, e in self.tgt.care_map:
996 for i in range(s, e):
997 assert touched[i] == 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700998
Doug Zongker62338182014-09-08 08:29:55 -0700999 def ImproveVertexSequence(self):
1000 print("Improving vertex order...")
1001
1002 # At this point our digraph is acyclic; we reversed any edges that
1003 # were backwards in the heuristically-generated sequence. The
1004 # previously-generated order is still acceptable, but we hope to
1005 # find a better order that needs less memory for stashed data.
1006 # Now we do a topological sort to generate a new vertex order,
1007 # using a greedy algorithm to choose which vertex goes next
1008 # whenever we have a choice.
1009
1010 # Make a copy of the edge set; this copy will get destroyed by the
1011 # algorithm.
1012 for xf in self.transfers:
1013 xf.incoming = xf.goes_after.copy()
1014 xf.outgoing = xf.goes_before.copy()
1015
1016 L = [] # the new vertex order
1017
1018 # S is the set of sources in the remaining graph; we always choose
1019 # the one that leaves the least amount of stashed data after it's
1020 # executed.
1021 S = [(u.NetStashChange(), u.order, u) for u in self.transfers
1022 if not u.incoming]
1023 heapq.heapify(S)
1024
1025 while S:
1026 _, _, xf = heapq.heappop(S)
1027 L.append(xf)
1028 for u in xf.outgoing:
1029 del u.incoming[xf]
1030 if not u.incoming:
1031 heapq.heappush(S, (u.NetStashChange(), u.order, u))
1032
1033 # if this fails then our graph had a cycle.
1034 assert len(L) == len(self.transfers)
1035
1036 self.transfers = L
1037 for i, xf in enumerate(L):
1038 xf.order = i
1039
Doug Zongkerfc44a512014-08-26 13:10:25 -07001040 def RemoveBackwardEdges(self):
1041 print("Removing backward edges...")
1042 in_order = 0
1043 out_of_order = 0
1044 lost_source = 0
1045
1046 for xf in self.transfers:
Doug Zongkerfc44a512014-08-26 13:10:25 -07001047 lost = 0
1048 size = xf.src_ranges.size()
1049 for u in xf.goes_before:
1050 # xf should go before u
1051 if xf.order < u.order:
1052 # it does, hurray!
Doug Zongker62338182014-09-08 08:29:55 -07001053 in_order += 1
Doug Zongkerfc44a512014-08-26 13:10:25 -07001054 else:
1055 # it doesn't, boo. trim the blocks that u writes from xf's
1056 # source, so that xf can go after u.
Doug Zongker62338182014-09-08 08:29:55 -07001057 out_of_order += 1
Doug Zongkerfc44a512014-08-26 13:10:25 -07001058 assert xf.src_ranges.overlaps(u.tgt_ranges)
1059 xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges)
Tao Baocb73aed2018-01-31 17:32:40 -08001060 xf.src_ranges.extra['trimmed'] = True
Doug Zongkerfc44a512014-08-26 13:10:25 -07001061
1062 if xf.style == "diff" and not xf.src_ranges:
1063 # nothing left to diff from; treat as new data
1064 xf.style = "new"
1065
1066 lost = size - xf.src_ranges.size()
1067 lost_source += lost
Doug Zongkerfc44a512014-08-26 13:10:25 -07001068
1069 print((" %d/%d dependencies (%.2f%%) were violated; "
1070 "%d source blocks removed.") %
1071 (out_of_order, in_order + out_of_order,
1072 (out_of_order * 100.0 / (in_order + out_of_order))
1073 if (in_order + out_of_order) else 0.0,
1074 lost_source))
1075
Doug Zongker62338182014-09-08 08:29:55 -07001076 def ReverseBackwardEdges(self):
Tao Bao3a2e3502016-12-28 09:14:39 -08001077 """Reverse unsatisfying edges and compute pairs of stashed blocks.
1078
1079 For each transfer, make sure it properly stashes the blocks it touches and
1080 will be used by later transfers. It uses pairs of (stash_raw_id, range) to
1081 record the blocks to be stashed. 'stash_raw_id' is an id that uniquely
1082 identifies each pair. Note that for the same range (e.g. RangeSet("1-5")),
1083 it is possible to have multiple pairs with different 'stash_raw_id's. Each
1084 'stash_raw_id' will be consumed by one transfer. In BBOTA v3+, identical
1085 blocks will be written to the same stash slot in WriteTransfers().
1086 """
1087
Doug Zongker62338182014-09-08 08:29:55 -07001088 print("Reversing backward edges...")
1089 in_order = 0
1090 out_of_order = 0
Tao Bao3a2e3502016-12-28 09:14:39 -08001091 stash_raw_id = 0
Doug Zongker62338182014-09-08 08:29:55 -07001092 stash_size = 0
1093
1094 for xf in self.transfers:
Doug Zongker62338182014-09-08 08:29:55 -07001095 for u in xf.goes_before.copy():
1096 # xf should go before u
1097 if xf.order < u.order:
1098 # it does, hurray!
1099 in_order += 1
1100 else:
1101 # it doesn't, boo. modify u to stash the blocks that it
1102 # writes that xf wants to read, and then require u to go
1103 # before xf.
1104 out_of_order += 1
1105
1106 overlap = xf.src_ranges.intersect(u.tgt_ranges)
1107 assert overlap
1108
Tao Bao3a2e3502016-12-28 09:14:39 -08001109 u.stash_before.append((stash_raw_id, overlap))
1110 xf.use_stash.append((stash_raw_id, overlap))
1111 stash_raw_id += 1
Doug Zongker62338182014-09-08 08:29:55 -07001112 stash_size += overlap.size()
1113
1114 # reverse the edge direction; now xf must go after u
1115 del xf.goes_before[u]
1116 del u.goes_after[xf]
1117 xf.goes_after[u] = None # value doesn't matter
1118 u.goes_before[xf] = None
1119
1120 print((" %d/%d dependencies (%.2f%%) were violated; "
1121 "%d source blocks stashed.") %
1122 (out_of_order, in_order + out_of_order,
1123 (out_of_order * 100.0 / (in_order + out_of_order))
1124 if (in_order + out_of_order) else 0.0,
1125 stash_size))
1126
Doug Zongkerfc44a512014-08-26 13:10:25 -07001127 def FindVertexSequence(self):
1128 print("Finding vertex sequence...")
1129
1130 # This is based on "A Fast & Effective Heuristic for the Feedback
1131 # Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
1132 # it as starting with the digraph G and moving all the vertices to
1133 # be on a horizontal line in some order, trying to minimize the
1134 # number of edges that end up pointing to the left. Left-pointing
1135 # edges will get removed to turn the digraph into a DAG. In this
1136 # case each edge has a weight which is the number of source blocks
1137 # we'll lose if that edge is removed; we try to minimize the total
1138 # weight rather than just the number of edges.
1139
1140 # Make a copy of the edge set; this copy will get destroyed by the
1141 # algorithm.
1142 for xf in self.transfers:
1143 xf.incoming = xf.goes_after.copy()
1144 xf.outgoing = xf.goes_before.copy()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001145 xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())
Doug Zongkerfc44a512014-08-26 13:10:25 -07001146
1147 # We use an OrderedDict instead of just a set so that the output
1148 # is repeatable; otherwise it would depend on the hash values of
1149 # the transfer objects.
1150 G = OrderedDict()
1151 for xf in self.transfers:
1152 G[xf] = None
1153 s1 = deque() # the left side of the sequence, built from left to right
1154 s2 = deque() # the right side of the sequence, built from right to left
1155
Doug Zongker6ab2a502016-02-09 08:28:09 -08001156 heap = []
1157 for xf in self.transfers:
1158 xf.heap_item = HeapItem(xf)
1159 heap.append(xf.heap_item)
1160 heapq.heapify(heap)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001161
Tao Bao33482282016-10-24 16:49:08 -07001162 # Use OrderedDict() instead of set() to preserve the insertion order. Need
1163 # to use 'sinks[key] = None' to add key into the set. sinks will look like
1164 # { key1: None, key2: None, ... }.
1165 sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing)
1166 sources = OrderedDict.fromkeys(u for u in G if not u.incoming)
Doug Zongker6ab2a502016-02-09 08:28:09 -08001167
1168 def adjust_score(iu, delta):
1169 iu.score += delta
1170 iu.heap_item.clear()
1171 iu.heap_item = HeapItem(iu)
1172 heapq.heappush(heap, iu.heap_item)
1173
1174 while G:
Doug Zongkerfc44a512014-08-26 13:10:25 -07001175 # Put all sinks at the end of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001176 while sinks:
Tao Bao33482282016-10-24 16:49:08 -07001177 new_sinks = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001178 for u in sinks:
Tao Bao508b0872018-02-09 13:44:43 -08001179 if u not in G:
1180 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001181 s2.appendleft(u)
1182 del G[u]
1183 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001184 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001185 if not iu.outgoing:
1186 new_sinks[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001187 sinks = new_sinks
Doug Zongkerfc44a512014-08-26 13:10:25 -07001188
1189 # Put all the sources at the beginning of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001190 while sources:
Tao Bao33482282016-10-24 16:49:08 -07001191 new_sources = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001192 for u in sources:
Tao Bao508b0872018-02-09 13:44:43 -08001193 if u not in G:
1194 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001195 s1.append(u)
1196 del G[u]
1197 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001198 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001199 if not iu.incoming:
1200 new_sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001201 sources = new_sources
Doug Zongkerfc44a512014-08-26 13:10:25 -07001202
Tao Bao508b0872018-02-09 13:44:43 -08001203 if not G:
1204 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001205
1206 # Find the "best" vertex to put next. "Best" is the one that
1207 # maximizes the net difference in source blocks saved we get by
1208 # pretending it's a source rather than a sink.
1209
Doug Zongker6ab2a502016-02-09 08:28:09 -08001210 while True:
1211 u = heapq.heappop(heap)
1212 if u and u.item in G:
1213 u = u.item
1214 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001215
Doug Zongkerfc44a512014-08-26 13:10:25 -07001216 s1.append(u)
1217 del G[u]
1218 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001219 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001220 if not iu.incoming:
1221 sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001222
Doug Zongkerfc44a512014-08-26 13:10:25 -07001223 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001224 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001225 if not iu.outgoing:
1226 sinks[iu] = None
Doug Zongkerfc44a512014-08-26 13:10:25 -07001227
1228 # Now record the sequence in the 'order' field of each transfer,
1229 # and by rearranging self.transfers to be in the chosen sequence.
1230
1231 new_transfers = []
1232 for x in itertools.chain(s1, s2):
1233 x.order = len(new_transfers)
1234 new_transfers.append(x)
1235 del x.incoming
1236 del x.outgoing
1237
1238 self.transfers = new_transfers
1239
1240 def GenerateDigraph(self):
1241 print("Generating digraph...")
Doug Zongker6ab2a502016-02-09 08:28:09 -08001242
1243 # Each item of source_ranges will be:
1244 # - None, if that block is not used as a source,
Tao Bao33482282016-10-24 16:49:08 -07001245 # - an ordered set of transfers.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001246 source_ranges = []
1247 for b in self.transfers:
1248 for s, e in b.src_ranges:
1249 if e > len(source_ranges):
1250 source_ranges.extend([None] * (e-len(source_ranges)))
1251 for i in range(s, e):
1252 if source_ranges[i] is None:
Tao Bao33482282016-10-24 16:49:08 -07001253 source_ranges[i] = OrderedDict.fromkeys([b])
Doug Zongker6ab2a502016-02-09 08:28:09 -08001254 else:
Tao Bao33482282016-10-24 16:49:08 -07001255 source_ranges[i][b] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001256
Doug Zongkerfc44a512014-08-26 13:10:25 -07001257 for a in self.transfers:
Tao Bao33482282016-10-24 16:49:08 -07001258 intersections = OrderedDict()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001259 for s, e in a.tgt_ranges:
1260 for i in range(s, e):
Tao Bao508b0872018-02-09 13:44:43 -08001261 if i >= len(source_ranges):
1262 break
Tao Bao33482282016-10-24 16:49:08 -07001263 # Add all the Transfers in source_ranges[i] to the (ordered) set.
1264 if source_ranges[i] is not None:
1265 for j in source_ranges[i]:
1266 intersections[j] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001267
1268 for b in intersections:
Tao Bao508b0872018-02-09 13:44:43 -08001269 if a is b:
1270 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001271
1272 # If the blocks written by A are read by B, then B needs to go before A.
1273 i = a.tgt_ranges.intersect(b.src_ranges)
1274 if i:
Doug Zongkerab7ca1d2014-08-26 10:40:28 -07001275 if b.src_name == "__ZERO":
1276 # the cost of removing source blocks for the __ZERO domain
1277 # is (nearly) zero.
1278 size = 0
1279 else:
1280 size = i.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001281 b.goes_before[a] = size
1282 a.goes_after[b] = size
1283
1284 def FindTransfers(self):
Tao Bao9a5caf22015-08-25 15:10:10 -07001285 """Parse the file_map to generate all the transfers."""
1286
Tianjie Xu41390d42017-11-22 11:35:18 -08001287 def AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
1288 src_ranges, style, by_id):
1289 """Add one or multiple Transfer()s by splitting large files.
1290
1291 For BBOTA v3, we need to stash source blocks for resumable feature.
1292 However, with the growth of file size and the shrink of the cache
1293 partition source blocks are too large to be stashed. If a file occupies
1294 too many blocks, we split it into smaller pieces by getting multiple
1295 Transfer()s.
1296
1297 The downside is that after splitting, we may increase the package size
1298 since the split pieces don't align well. According to our experiments,
1299 1/8 of the cache size as the per-piece limit appears to be optimal.
1300 Compared to the fixed 1024-block limit, it reduces the overall package
1301 size by 30% for volantis, and 20% for angler and bullhead."""
1302
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001303 pieces = 0
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001304 while (tgt_ranges.size() > max_blocks_per_transfer and
1305 src_ranges.size() > max_blocks_per_transfer):
Tao Bao9a5caf22015-08-25 15:10:10 -07001306 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1307 src_split_name = "%s-%d" % (src_name, pieces)
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001308 tgt_first = tgt_ranges.first(max_blocks_per_transfer)
1309 src_first = src_ranges.first(max_blocks_per_transfer)
1310
Tao Bao183e56e2017-03-05 17:05:09 -08001311 Transfer(tgt_split_name, src_split_name, tgt_first, src_first,
1312 self.tgt.RangeSha1(tgt_first), self.src.RangeSha1(src_first),
1313 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001314
1315 tgt_ranges = tgt_ranges.subtract(tgt_first)
1316 src_ranges = src_ranges.subtract(src_first)
1317 pieces += 1
1318
1319 # Handle remaining blocks.
1320 if tgt_ranges.size() or src_ranges.size():
1321 # Must be both non-empty.
1322 assert tgt_ranges.size() and src_ranges.size()
1323 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1324 src_split_name = "%s-%d" % (src_name, pieces)
Tao Bao183e56e2017-03-05 17:05:09 -08001325 Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges,
1326 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1327 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001328
Tianjie Xu41390d42017-11-22 11:35:18 -08001329 def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges, style,
1330 by_id):
1331 """Find all the zip files and split the others with a fixed chunk size.
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001332
Tianjie Xu41390d42017-11-22 11:35:18 -08001333 This function will construct a list of zip archives, which will later be
1334 split by imgdiff to reduce the final patch size. For the other files,
1335 we will plainly split them based on a fixed chunk size with the potential
1336 patch size penalty.
1337 """
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001338
1339 assert style == "diff"
1340
1341 # Change nothing for small files.
1342 if (tgt_ranges.size() <= max_blocks_per_transfer and
1343 src_ranges.size() <= max_blocks_per_transfer):
1344 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1345 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1346 style, by_id)
1347 return
1348
Tao Baocb73aed2018-01-31 17:32:40 -08001349 # Split large APKs with imgdiff, if possible. We're intentionally checking
1350 # file types one more time (CanUseImgdiff() checks that as well), before
1351 # calling the costly RangeSha1()s.
1352 if (self.FileTypeSupportedByImgdiff(tgt_name) and
1353 self.tgt.RangeSha1(tgt_ranges) != self.src.RangeSha1(src_ranges)):
Tao Bao294651d2018-02-08 23:21:52 -08001354 if self.CanUseImgdiff(tgt_name, tgt_ranges, src_ranges, True):
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001355 large_apks.append((tgt_name, src_name, tgt_ranges, src_ranges))
1356 return
1357
Tianjie Xu41390d42017-11-22 11:35:18 -08001358 AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
1359 src_ranges, style, by_id)
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001360
Tao Bao08c85832016-09-19 22:26:30 -07001361 def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
1362 split=False):
1363 """Wrapper function for adding a Transfer()."""
1364
1365 # We specialize diff transfers only (which covers bsdiff/imgdiff/move);
1366 # otherwise add the Transfer() as is.
1367 if style != "diff" or not split:
Tao Bao183e56e2017-03-05 17:05:09 -08001368 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1369 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1370 style, by_id)
Tao Bao08c85832016-09-19 22:26:30 -07001371 return
1372
1373 # Handle .odex files specially to analyze the block-wise difference. If
1374 # most of the blocks are identical with only few changes (e.g. header),
1375 # we will patch the changed blocks only. This avoids stashing unchanged
1376 # blocks while patching. We limit the analysis to files without size
1377 # changes only. This is to avoid sacrificing the OTA generation cost too
1378 # much.
1379 if (tgt_name.split(".")[-1].lower() == 'odex' and
1380 tgt_ranges.size() == src_ranges.size()):
1381
1382 # 0.5 threshold can be further tuned. The tradeoff is: if only very
1383 # few blocks remain identical, we lose the opportunity to use imgdiff
1384 # that may have better compression ratio than bsdiff.
1385 crop_threshold = 0.5
1386
1387 tgt_skipped = RangeSet()
1388 src_skipped = RangeSet()
1389 tgt_size = tgt_ranges.size()
1390 tgt_changed = 0
1391 for src_block, tgt_block in zip(src_ranges.next_item(),
1392 tgt_ranges.next_item()):
1393 src_rs = RangeSet(str(src_block))
1394 tgt_rs = RangeSet(str(tgt_block))
1395 if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):
1396 tgt_skipped = tgt_skipped.union(tgt_rs)
1397 src_skipped = src_skipped.union(src_rs)
1398 else:
1399 tgt_changed += tgt_rs.size()
1400
1401 # Terminate early if no clear sign of benefits.
1402 if tgt_changed > tgt_size * crop_threshold:
1403 break
1404
1405 if tgt_changed < tgt_size * crop_threshold:
1406 assert tgt_changed + tgt_skipped.size() == tgt_size
Tao Bao508b0872018-02-09 13:44:43 -08001407 print('%10d %10d (%6.2f%%) %s' % (
1408 tgt_skipped.size(), tgt_size,
1409 tgt_skipped.size() * 100.0 / tgt_size, tgt_name))
Tianjie Xu41390d42017-11-22 11:35:18 -08001410 AddSplitTransfers(
Tao Bao08c85832016-09-19 22:26:30 -07001411 "%s-skipped" % (tgt_name,),
1412 "%s-skipped" % (src_name,),
1413 tgt_skipped, src_skipped, style, by_id)
1414
1415 # Intentionally change the file extension to avoid being imgdiff'd as
1416 # the files are no longer in their original format.
1417 tgt_name = "%s-cropped" % (tgt_name,)
1418 src_name = "%s-cropped" % (src_name,)
1419 tgt_ranges = tgt_ranges.subtract(tgt_skipped)
1420 src_ranges = src_ranges.subtract(src_skipped)
1421
1422 # Possibly having no changed blocks.
1423 if not tgt_ranges:
1424 return
1425
1426 # Add the transfer(s).
Tianjie Xu41390d42017-11-22 11:35:18 -08001427 AddSplitTransfers(
Tao Bao08c85832016-09-19 22:26:30 -07001428 tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1429
Tianjie Xu25366072017-09-08 17:19:02 -07001430 def ParseAndValidateSplitInfo(patch_size, tgt_ranges, src_ranges,
1431 split_info):
1432 """Parse the split_info and return a list of info tuples.
1433
1434 Args:
1435 patch_size: total size of the patch file.
1436 tgt_ranges: Ranges of the target file within the original image.
1437 src_ranges: Ranges of the source file within the original image.
1438 split_info format:
1439 imgdiff version#
1440 count of pieces
1441 <patch_size_1> <tgt_size_1> <src_ranges_1>
1442 ...
1443 <patch_size_n> <tgt_size_n> <src_ranges_n>
1444
1445 Returns:
1446 [patch_start, patch_len, split_tgt_ranges, split_src_ranges]
1447 """
1448
1449 version = int(split_info[0])
1450 assert version == 2
1451 count = int(split_info[1])
1452 assert len(split_info) - 2 == count
1453
1454 split_info_list = []
1455 patch_start = 0
1456 tgt_remain = copy.deepcopy(tgt_ranges)
1457 # each line has the format <patch_size>, <tgt_size>, <src_ranges>
1458 for line in split_info[2:]:
1459 info = line.split()
1460 assert len(info) == 3
1461 patch_length = int(info[0])
1462
1463 split_tgt_size = int(info[1])
1464 assert split_tgt_size % 4096 == 0
1465 assert split_tgt_size / 4096 <= tgt_remain.size()
1466 split_tgt_ranges = tgt_remain.first(split_tgt_size / 4096)
1467 tgt_remain = tgt_remain.subtract(split_tgt_ranges)
1468
1469 # Find the split_src_ranges within the image file from its relative
1470 # position in file.
1471 split_src_indices = RangeSet.parse_raw(info[2])
1472 split_src_ranges = RangeSet()
1473 for r in split_src_indices:
1474 curr_range = src_ranges.first(r[1]).subtract(src_ranges.first(r[0]))
1475 assert not split_src_ranges.overlaps(curr_range)
1476 split_src_ranges = split_src_ranges.union(curr_range)
1477
1478 split_info_list.append((patch_start, patch_length,
1479 split_tgt_ranges, split_src_ranges))
1480 patch_start += patch_length
1481
1482 # Check that the sizes of all the split pieces add up to the final file
1483 # size for patch and target.
1484 assert tgt_remain.size() == 0
1485 assert patch_start == patch_size
1486 return split_info_list
1487
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001488 def SplitLargeApks():
1489 """Split the large apks files.
Tianjie Xu25366072017-09-08 17:19:02 -07001490
1491 Example: Chrome.apk will be split into
1492 src-0: Chrome.apk-0, tgt-0: Chrome.apk-0
1493 src-1: Chrome.apk-1, tgt-1: Chrome.apk-1
1494 ...
1495
1496 After the split, the target pieces are continuous and block aligned; and
1497 the source pieces are mutually exclusive. During the split, we also
1498 generate and save the image patch between src-X & tgt-X. This patch will
1499 be valid because the block ranges of src-X & tgt-X will always stay the
1500 same afterwards; but there's a chance we don't use the patch if we
1501 convert the "diff" command into "new" or "move" later.
1502 """
1503
1504 while True:
1505 with transfer_lock:
1506 if not large_apks:
1507 return
1508 tgt_name, src_name, tgt_ranges, src_ranges = large_apks.pop(0)
1509
1510 src_file = common.MakeTempFile(prefix="src-")
1511 tgt_file = common.MakeTempFile(prefix="tgt-")
Tianjie Xudf1166e2018-01-27 17:35:41 -08001512 with open(src_file, "wb") as src_fd:
1513 self.src.WriteRangeDataToFd(src_ranges, src_fd)
1514 with open(tgt_file, "wb") as tgt_fd:
1515 self.tgt.WriteRangeDataToFd(tgt_ranges, tgt_fd)
Tianjie Xu25366072017-09-08 17:19:02 -07001516
1517 patch_file = common.MakeTempFile(prefix="patch-")
1518 patch_info_file = common.MakeTempFile(prefix="split_info-")
1519 cmd = ["imgdiff", "-z",
1520 "--block-limit={}".format(max_blocks_per_transfer),
1521 "--split-info=" + patch_info_file,
1522 src_file, tgt_file, patch_file]
Tao Bao4ccea852018-02-06 15:16:41 -08001523 p = common.Run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
1524 imgdiff_output, _ = p.communicate()
1525 assert p.returncode == 0, \
1526 "Failed to create imgdiff patch between {} and {}:\n{}".format(
1527 src_name, tgt_name, imgdiff_output)
Tianjie Xu25366072017-09-08 17:19:02 -07001528
1529 with open(patch_info_file) as patch_info:
1530 lines = patch_info.readlines()
1531
1532 patch_size_total = os.path.getsize(patch_file)
1533 split_info_list = ParseAndValidateSplitInfo(patch_size_total,
1534 tgt_ranges, src_ranges,
1535 lines)
1536 for index, (patch_start, patch_length, split_tgt_ranges,
Tao Bao508b0872018-02-09 13:44:43 -08001537 split_src_ranges) in enumerate(split_info_list):
Tianjie Xu25366072017-09-08 17:19:02 -07001538 with open(patch_file) as f:
1539 f.seek(patch_start)
1540 patch_content = f.read(patch_length)
1541
1542 split_src_name = "{}-{}".format(src_name, index)
1543 split_tgt_name = "{}-{}".format(tgt_name, index)
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001544 split_large_apks.append((split_tgt_name,
1545 split_src_name,
1546 split_tgt_ranges,
1547 split_src_ranges,
1548 patch_content))
Tianjie Xu25366072017-09-08 17:19:02 -07001549
Tao Bao08c85832016-09-19 22:26:30 -07001550 print("Finding transfers...")
1551
Tianjie Xu25366072017-09-08 17:19:02 -07001552 large_apks = []
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001553 split_large_apks = []
Tianjie Xu25366072017-09-08 17:19:02 -07001554 cache_size = common.OPTIONS.cache_size
1555 split_threshold = 0.125
1556 max_blocks_per_transfer = int(cache_size * split_threshold /
1557 self.tgt.blocksize)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001558 empty = RangeSet()
Tianjie Xu20a86cd2018-01-12 12:21:00 -08001559 for tgt_fn, tgt_ranges in sorted(self.tgt.file_map.items()):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001560 if tgt_fn == "__ZERO":
1561 # the special "__ZERO" domain is all the blocks not contained
1562 # in any file and that are filled with zeros. We have a
1563 # special transfer style for zero blocks.
1564 src_ranges = self.src.file_map.get("__ZERO", empty)
Tao Bao9a5caf22015-08-25 15:10:10 -07001565 AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
1566 "zero", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001567 continue
1568
Tao Baoff777812015-05-12 11:42:31 -07001569 elif tgt_fn == "__COPY":
1570 # "__COPY" domain includes all the blocks not contained in any
1571 # file and that need to be copied unconditionally to the target.
Tao Bao9a5caf22015-08-25 15:10:10 -07001572 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Tao Baoff777812015-05-12 11:42:31 -07001573 continue
1574
Doug Zongkerfc44a512014-08-26 13:10:25 -07001575 elif tgt_fn in self.src.file_map:
1576 # Look for an exact pathname match in the source.
Tao Bao9a5caf22015-08-25 15:10:10 -07001577 AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001578 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001579 continue
1580
1581 b = os.path.basename(tgt_fn)
1582 if b in self.src_basenames:
1583 # Look for an exact basename match in the source.
1584 src_fn = self.src_basenames[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001585 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001586 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001587 continue
1588
1589 b = re.sub("[0-9]+", "#", b)
1590 if b in self.src_numpatterns:
1591 # Look for a 'number pattern' match (a basename match after
1592 # all runs of digits are replaced by "#"). (This is useful
1593 # for .so files that contain version numbers in the filename
1594 # that get bumped.)
1595 src_fn = self.src_numpatterns[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001596 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001597 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001598 continue
1599
Tao Bao9a5caf22015-08-25 15:10:10 -07001600 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001601
Tianjie Xu25366072017-09-08 17:19:02 -07001602 transfer_lock = threading.Lock()
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001603 threads = [threading.Thread(target=SplitLargeApks)
Tianjie Xu25366072017-09-08 17:19:02 -07001604 for _ in range(self.threads)]
1605 for th in threads:
1606 th.start()
1607 while threads:
1608 threads.pop().join()
1609
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001610 # Sort the split transfers for large apks to generate a determinate package.
1611 split_large_apks.sort()
1612 for (tgt_name, src_name, tgt_ranges, src_ranges,
1613 patch) in split_large_apks:
1614 transfer_split = Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1615 self.tgt.RangeSha1(tgt_ranges),
1616 self.src.RangeSha1(src_ranges),
1617 "diff", self.transfers)
1618 transfer_split.patch = patch
1619
Doug Zongkerfc44a512014-08-26 13:10:25 -07001620 def AbbreviateSourceNames(self):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001621 for k in self.src.file_map.keys():
1622 b = os.path.basename(k)
1623 self.src_basenames[b] = k
1624 b = re.sub("[0-9]+", "#", b)
1625 self.src_numpatterns[b] = k
1626
1627 @staticmethod
1628 def AssertPartition(total, seq):
1629 """Assert that all the RangeSets in 'seq' form a partition of the
1630 'total' RangeSet (ie, they are nonintersecting and their union
1631 equals 'total')."""
Doug Zongker6ab2a502016-02-09 08:28:09 -08001632
Doug Zongkerfc44a512014-08-26 13:10:25 -07001633 so_far = RangeSet()
1634 for i in seq:
1635 assert not so_far.overlaps(i)
1636 so_far = so_far.union(i)
1637 assert so_far == total