blob: f1cb6dba88833fdeff0fa7bf481b94dac8e9499f [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015from __future__ import print_function
16
Doug Zongker6ab2a502016-02-09 08:28:09 -080017import array
Tao Bao8dcf7382015-05-21 14:09:49 -070018import common
Tianjie Xu25366072017-09-08 17:19:02 -070019import copy
Doug Zongker6ab2a502016-02-09 08:28:09 -080020import functools
Doug Zongker62338182014-09-08 08:29:55 -070021import heapq
Doug Zongkerfc44a512014-08-26 13:10:25 -070022import itertools
23import multiprocessing
24import os
Tao Bao3a2e3502016-12-28 09:14:39 -080025import os.path
Doug Zongkerfc44a512014-08-26 13:10:25 -070026import re
27import subprocess
Tao Bao183e56e2017-03-05 17:05:09 -080028import sys
Doug Zongkerfc44a512014-08-26 13:10:25 -070029import threading
Doug Zongkerfc44a512014-08-26 13:10:25 -070030
Tao Bao3a2e3502016-12-28 09:14:39 -080031from collections import deque, OrderedDict
32from hashlib import sha1
Dan Albert8b72aef2015-03-23 19:13:21 -070033from rangelib import RangeSet
34
Doug Zongkerfc44a512014-08-26 13:10:25 -070035
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070036__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
37
Dan Albert8b72aef2015-03-23 19:13:21 -070038
Tao Bao183e56e2017-03-05 17:05:09 -080039def compute_patch(srcfile, tgtfile, imgdiff=False):
Tianjie Xub59c17f2016-10-28 17:55:53 -070040 patchfile = common.MakeTempFile(prefix='patch-')
Doug Zongkerfc44a512014-08-26 13:10:25 -070041
Tianjie Xub59c17f2016-10-28 17:55:53 -070042 cmd = ['imgdiff', '-z'] if imgdiff else ['bsdiff']
43 cmd.extend([srcfile, tgtfile, patchfile])
Doug Zongkerfc44a512014-08-26 13:10:25 -070044
Tao Bao39451582017-05-04 11:10:47 -070045 # Don't dump the bsdiff/imgdiff commands, which are not useful for the case
46 # here, since they contain temp filenames only.
47 p = common.Run(cmd, verbose=False, stdout=subprocess.PIPE,
48 stderr=subprocess.STDOUT)
Tianjie Xub59c17f2016-10-28 17:55:53 -070049 output, _ = p.communicate()
Doug Zongkerfc44a512014-08-26 13:10:25 -070050
Tianjie Xub59c17f2016-10-28 17:55:53 -070051 if p.returncode != 0:
52 raise ValueError(output)
53
54 with open(patchfile, 'rb') as f:
Tao Bao183e56e2017-03-05 17:05:09 -080055 return f.read()
Doug Zongkerfc44a512014-08-26 13:10:25 -070056
Dan Albert8b72aef2015-03-23 19:13:21 -070057
58class Image(object):
Tao Bao183e56e2017-03-05 17:05:09 -080059 def RangeSha1(self, ranges):
60 raise NotImplementedError
61
Dan Albert8b72aef2015-03-23 19:13:21 -070062 def ReadRangeSet(self, ranges):
63 raise NotImplementedError
64
Tao Bao68658c02015-06-01 13:40:49 -070065 def TotalSha1(self, include_clobbered_blocks=False):
Dan Albert8b72aef2015-03-23 19:13:21 -070066 raise NotImplementedError
67
Tao Bao183e56e2017-03-05 17:05:09 -080068 def WriteRangeDataToFd(self, ranges, fd):
69 raise NotImplementedError
70
Dan Albert8b72aef2015-03-23 19:13:21 -070071
72class EmptyImage(Image):
Doug Zongkerfc44a512014-08-26 13:10:25 -070073 """A zero-length image."""
Tao Bao183e56e2017-03-05 17:05:09 -080074
75 def __init__(self):
76 self.blocksize = 4096
77 self.care_map = RangeSet()
78 self.clobbered_blocks = RangeSet()
79 self.extended = RangeSet()
80 self.total_blocks = 0
81 self.file_map = {}
82
83 def RangeSha1(self, ranges):
84 return sha1().hexdigest()
85
Doug Zongkerfc44a512014-08-26 13:10:25 -070086 def ReadRangeSet(self, ranges):
87 return ()
Tao Bao183e56e2017-03-05 17:05:09 -080088
Tao Bao68658c02015-06-01 13:40:49 -070089 def TotalSha1(self, include_clobbered_blocks=False):
90 # EmptyImage always carries empty clobbered_blocks, so
91 # include_clobbered_blocks can be ignored.
92 assert self.clobbered_blocks.size() == 0
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070093 return sha1().hexdigest()
94
Tao Bao183e56e2017-03-05 17:05:09 -080095 def WriteRangeDataToFd(self, ranges, fd):
96 raise ValueError("Can't write data from EmptyImage to file")
97
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070098
Dan Albert8b72aef2015-03-23 19:13:21 -070099class DataImage(Image):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700100 """An image wrapped around a single string of data."""
101
102 def __init__(self, data, trim=False, pad=False):
103 self.data = data
104 self.blocksize = 4096
105
106 assert not (trim and pad)
107
108 partial = len(self.data) % self.blocksize
Tao Bao7589e962015-09-05 20:35:32 -0700109 padded = False
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700110 if partial > 0:
111 if trim:
112 self.data = self.data[:-partial]
113 elif pad:
114 self.data += '\0' * (self.blocksize - partial)
Tao Bao7589e962015-09-05 20:35:32 -0700115 padded = True
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700116 else:
117 raise ValueError(("data for DataImage must be multiple of %d bytes "
118 "unless trim or pad is specified") %
119 (self.blocksize,))
120
121 assert len(self.data) % self.blocksize == 0
122
123 self.total_blocks = len(self.data) / self.blocksize
124 self.care_map = RangeSet(data=(0, self.total_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700125 # When the last block is padded, we always write the whole block even for
126 # incremental OTAs. Because otherwise the last block may get skipped if
127 # unchanged for an incremental, but would fail the post-install
128 # verification if it has non-zero contents in the padding bytes.
129 # Bug: 23828506
130 if padded:
Tao Bao42206c32015-09-08 13:39:40 -0700131 clobbered_blocks = [self.total_blocks-1, self.total_blocks]
Tao Bao7589e962015-09-05 20:35:32 -0700132 else:
Tao Bao42206c32015-09-08 13:39:40 -0700133 clobbered_blocks = []
134 self.clobbered_blocks = clobbered_blocks
Tao Baoe9b61912015-07-09 17:37:49 -0700135 self.extended = RangeSet()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700136
137 zero_blocks = []
138 nonzero_blocks = []
139 reference = '\0' * self.blocksize
140
Tao Bao7589e962015-09-05 20:35:32 -0700141 for i in range(self.total_blocks-1 if padded else self.total_blocks):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700142 d = self.data[i*self.blocksize : (i+1)*self.blocksize]
143 if d == reference:
144 zero_blocks.append(i)
145 zero_blocks.append(i+1)
146 else:
147 nonzero_blocks.append(i)
148 nonzero_blocks.append(i+1)
149
Tao Bao42206c32015-09-08 13:39:40 -0700150 assert zero_blocks or nonzero_blocks or clobbered_blocks
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700151
Tao Bao42206c32015-09-08 13:39:40 -0700152 self.file_map = dict()
153 if zero_blocks:
154 self.file_map["__ZERO"] = RangeSet(data=zero_blocks)
155 if nonzero_blocks:
156 self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)
157 if clobbered_blocks:
158 self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)
Tao Bao7589e962015-09-05 20:35:32 -0700159
Tao Bao183e56e2017-03-05 17:05:09 -0800160 def _GetRangeData(self, ranges):
161 for s, e in ranges:
162 yield self.data[s*self.blocksize:e*self.blocksize]
163
164 def RangeSha1(self, ranges):
165 h = sha1()
166 for data in self._GetRangeData(ranges):
167 h.update(data)
168 return h.hexdigest()
169
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700170 def ReadRangeSet(self, ranges):
Tao Bao183e56e2017-03-05 17:05:09 -0800171 return [self._GetRangeData(ranges)]
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700172
Tao Bao68658c02015-06-01 13:40:49 -0700173 def TotalSha1(self, include_clobbered_blocks=False):
Tao Bao7589e962015-09-05 20:35:32 -0700174 if not include_clobbered_blocks:
Tao Bao183e56e2017-03-05 17:05:09 -0800175 return self.RangeSha1(self.care_map.subtract(self.clobbered_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700176 else:
177 return sha1(self.data).hexdigest()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700178
Tao Bao183e56e2017-03-05 17:05:09 -0800179 def WriteRangeDataToFd(self, ranges, fd):
180 for data in self._GetRangeData(ranges):
181 fd.write(data)
182
Doug Zongkerfc44a512014-08-26 13:10:25 -0700183
184class Transfer(object):
Tao Bao183e56e2017-03-05 17:05:09 -0800185 def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, tgt_sha1,
186 src_sha1, style, by_id):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700187 self.tgt_name = tgt_name
188 self.src_name = src_name
189 self.tgt_ranges = tgt_ranges
190 self.src_ranges = src_ranges
Tao Bao183e56e2017-03-05 17:05:09 -0800191 self.tgt_sha1 = tgt_sha1
192 self.src_sha1 = src_sha1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700193 self.style = style
Tao Baob8c87172015-03-19 19:42:12 -0700194
195 # We use OrderedDict rather than dict so that the output is repeatable;
196 # otherwise it would depend on the hash values of the Transfer objects.
197 self.goes_before = OrderedDict()
198 self.goes_after = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700199
Doug Zongker62338182014-09-08 08:29:55 -0700200 self.stash_before = []
201 self.use_stash = []
202
Doug Zongkerfc44a512014-08-26 13:10:25 -0700203 self.id = len(by_id)
204 by_id.append(self)
205
Tianjie Xu25366072017-09-08 17:19:02 -0700206 self._patch = None
207
208 @property
209 def patch(self):
210 return self._patch
211
212 @patch.setter
213 def patch(self, patch):
214 if patch:
215 assert self.style == "diff"
216 self._patch = patch
217
Doug Zongker62338182014-09-08 08:29:55 -0700218 def NetStashChange(self):
219 return (sum(sr.size() for (_, sr) in self.stash_before) -
220 sum(sr.size() for (_, sr) in self.use_stash))
221
Tao Bao82c47982015-08-17 09:45:13 -0700222 def ConvertToNew(self):
223 assert self.style != "new"
224 self.use_stash = []
225 self.style = "new"
226 self.src_ranges = RangeSet()
Tianjie Xu25366072017-09-08 17:19:02 -0700227 self.patch = None
Tao Bao82c47982015-08-17 09:45:13 -0700228
Doug Zongkerfc44a512014-08-26 13:10:25 -0700229 def __str__(self):
230 return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
231 " to " + str(self.tgt_ranges) + ">")
232
233
Doug Zongker6ab2a502016-02-09 08:28:09 -0800234@functools.total_ordering
235class HeapItem(object):
236 def __init__(self, item):
237 self.item = item
Tao Bao186ec992017-12-23 11:50:52 -0800238 # Negate the score since python's heap is a min-heap and we want the
239 # maximum score.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800240 self.score = -item.score
Tao Bao186ec992017-12-23 11:50:52 -0800241
Doug Zongker6ab2a502016-02-09 08:28:09 -0800242 def clear(self):
243 self.item = None
Tao Bao186ec992017-12-23 11:50:52 -0800244
Doug Zongker6ab2a502016-02-09 08:28:09 -0800245 def __bool__(self):
Tao Bao186ec992017-12-23 11:50:52 -0800246 return self.item is not None
247
248 # Python 2 uses __nonzero__, while Python 3 uses __bool__.
249 __nonzero__ = __bool__
250
251 # The rest operations are generated by functools.total_ordering decorator.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800252 def __eq__(self, other):
253 return self.score == other.score
Tao Bao186ec992017-12-23 11:50:52 -0800254
Doug Zongker6ab2a502016-02-09 08:28:09 -0800255 def __le__(self, other):
256 return self.score <= other.score
257
258
Doug Zongkerfc44a512014-08-26 13:10:25 -0700259# BlockImageDiff works on two image objects. An image object is
260# anything that provides the following attributes:
261#
262# blocksize: the size in bytes of a block, currently must be 4096.
263#
264# total_blocks: the total size of the partition/image, in blocks.
265#
266# care_map: a RangeSet containing which blocks (in the range [0,
267# total_blocks) we actually care about; i.e. which blocks contain
268# data.
269#
270# file_map: a dict that partitions the blocks contained in care_map
271# into smaller domains that are useful for doing diffs on.
272# (Typically a domain is a file, and the key in file_map is the
273# pathname.)
274#
Tao Baoff777812015-05-12 11:42:31 -0700275# clobbered_blocks: a RangeSet containing which blocks contain data
276# but may be altered by the FS. They need to be excluded when
277# verifying the partition integrity.
278#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700279# ReadRangeSet(): a function that takes a RangeSet and returns the
280# data contained in the image blocks of that RangeSet. The data
281# is returned as a list or tuple of strings; concatenating the
282# elements together should produce the requested data.
283# Implementations are free to break up the data into list/tuple
284# elements in any way that is convenient.
285#
Tao Bao183e56e2017-03-05 17:05:09 -0800286# RangeSha1(): a function that returns (as a hex string) the SHA-1
287# hash of all the data in the specified range.
288#
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700289# TotalSha1(): a function that returns (as a hex string) the SHA-1
290# hash of all the data in the image (ie, all the blocks in the
Tao Bao68658c02015-06-01 13:40:49 -0700291# care_map minus clobbered_blocks, or including the clobbered
292# blocks if include_clobbered_blocks is True).
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700293#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700294# When creating a BlockImageDiff, the src image may be None, in which
295# case the list of transfers produced will never read from the
296# original image.
297
298class BlockImageDiff(object):
Tao Bao293fd132016-06-11 12:19:23 -0700299 def __init__(self, tgt, src=None, threads=None, version=4,
300 disable_imgdiff=False):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700301 if threads is None:
302 threads = multiprocessing.cpu_count() // 2
Dan Albert8b72aef2015-03-23 19:13:21 -0700303 if threads == 0:
304 threads = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700305 self.threads = threads
Doug Zongker62338182014-09-08 08:29:55 -0700306 self.version = version
Dan Albert8b72aef2015-03-23 19:13:21 -0700307 self.transfers = []
308 self.src_basenames = {}
309 self.src_numpatterns = {}
Tao Baob4cfca52016-02-04 14:26:02 -0800310 self._max_stashed_size = 0
Tao Baod522bdc2016-04-12 15:53:16 -0700311 self.touched_src_ranges = RangeSet()
312 self.touched_src_sha1 = None
Tao Bao293fd132016-06-11 12:19:23 -0700313 self.disable_imgdiff = disable_imgdiff
Doug Zongker62338182014-09-08 08:29:55 -0700314
Tao Bao8fad03e2017-03-01 14:36:26 -0800315 assert version in (3, 4)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700316
317 self.tgt = tgt
318 if src is None:
319 src = EmptyImage()
320 self.src = src
321
322 # The updater code that installs the patch always uses 4k blocks.
323 assert tgt.blocksize == 4096
324 assert src.blocksize == 4096
325
326 # The range sets in each filemap should comprise a partition of
327 # the care map.
328 self.AssertPartition(src.care_map, src.file_map.values())
329 self.AssertPartition(tgt.care_map, tgt.file_map.values())
330
Tao Baob4cfca52016-02-04 14:26:02 -0800331 @property
332 def max_stashed_size(self):
333 return self._max_stashed_size
334
Tao Baocb73aed2018-01-31 17:32:40 -0800335 @staticmethod
336 def FileTypeSupportedByImgdiff(filename):
337 """Returns whether the file type is supported by imgdiff."""
338 return filename.lower().endswith(('.apk', '.jar', '.zip'))
339
340 def CanUseImgdiff(self, name, tgt_ranges, src_ranges):
341 """Checks whether we can apply imgdiff for the given RangeSets.
342
343 For files in ZIP format (e.g., APKs, JARs, etc.) we would like to use
344 'imgdiff -z' if possible. Because it usually produces significantly smaller
345 patches than bsdiff.
346
347 This is permissible if all of the following conditions hold.
348 - The imgdiff hasn't been disabled by the caller (e.g. squashfs);
349 - The file type is supported by imgdiff;
350 - The source and target blocks are monotonic (i.e. the data is stored with
351 blocks in increasing order);
352 - We haven't removed any blocks from the source set.
353
354 If all these conditions are satisfied, concatenating all the blocks in the
355 RangeSet in order will produce a valid ZIP file (plus possibly extra zeros
356 in the last block). imgdiff is fine with extra zeros at the end of the file.
357
358 Args:
359 name: The filename to be diff'd.
360 tgt_ranges: The target RangeSet.
361 src_ranges: The source RangeSet.
362
363 Returns:
364 A boolean result.
365 """
366 return (
367 not self.disable_imgdiff and
368 self.FileTypeSupportedByImgdiff(name) and
369 tgt_ranges.monotonic and
370 src_ranges.monotonic and
371 not tgt_ranges.extra.get('trimmed') and
372 not src_ranges.extra.get('trimmed'))
373
Doug Zongkerfc44a512014-08-26 13:10:25 -0700374 def Compute(self, prefix):
375 # When looking for a source file to use as the diff input for a
376 # target file, we try:
377 # 1) an exact path match if available, otherwise
378 # 2) a exact basename match if available, otherwise
379 # 3) a basename match after all runs of digits are replaced by
380 # "#" if available, otherwise
381 # 4) we have no source for this target.
382 self.AbbreviateSourceNames()
383 self.FindTransfers()
384
385 # Find the ordering dependencies among transfers (this is O(n^2)
386 # in the number of transfers).
387 self.GenerateDigraph()
388 # Find a sequence of transfers that satisfies as many ordering
389 # dependencies as possible (heuristically).
390 self.FindVertexSequence()
391 # Fix up the ordering dependencies that the sequence didn't
392 # satisfy.
Tao Bao8fad03e2017-03-01 14:36:26 -0800393 self.ReverseBackwardEdges()
394 self.ImproveVertexSequence()
Doug Zongker62338182014-09-08 08:29:55 -0700395
Tao Bao82c47982015-08-17 09:45:13 -0700396 # Ensure the runtime stash size is under the limit.
Tao Bao8fad03e2017-03-01 14:36:26 -0800397 if common.OPTIONS.cache_size is not None:
Tao Bao82c47982015-08-17 09:45:13 -0700398 self.ReviseStashSize()
399
Doug Zongkerfc44a512014-08-26 13:10:25 -0700400 # Double-check our work.
401 self.AssertSequenceGood()
Tianjie Xu8a7ed9f2018-01-23 14:06:11 -0800402 self.AssertSha1Good()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700403
404 self.ComputePatches(prefix)
405 self.WriteTransfers(prefix)
406
407 def WriteTransfers(self, prefix):
Tianjie Xu37e29302016-06-23 16:10:35 -0700408 def WriteSplitTransfers(out, style, target_blocks):
409 """Limit the size of operand in command 'new' and 'zero' to 1024 blocks.
Tianjie Xubb848c52016-06-21 15:54:09 -0700410
411 This prevents the target size of one command from being too large; and
412 might help to avoid fsync errors on some devices."""
413
Tao Bao3a2e3502016-12-28 09:14:39 -0800414 assert style == "new" or style == "zero"
Tianjie Xu37e29302016-06-23 16:10:35 -0700415 blocks_limit = 1024
Tianjie Xubb848c52016-06-21 15:54:09 -0700416 total = 0
Tianjie Xu37e29302016-06-23 16:10:35 -0700417 while target_blocks:
418 blocks_to_write = target_blocks.first(blocks_limit)
419 out.append("%s %s\n" % (style, blocks_to_write.to_string_raw()))
420 total += blocks_to_write.size()
421 target_blocks = target_blocks.subtract(blocks_to_write)
Tianjie Xubb848c52016-06-21 15:54:09 -0700422 return total
423
Doug Zongkerfc44a512014-08-26 13:10:25 -0700424 out = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700425 total = 0
Doug Zongkerfc44a512014-08-26 13:10:25 -0700426
Tao Bao3a2e3502016-12-28 09:14:39 -0800427 # In BBOTA v3+, it uses the hash of the stashed blocks as the stash slot
428 # id. 'stashes' records the map from 'hash' to the ref count. The stash
429 # will be freed only if the count decrements to zero.
Doug Zongker62338182014-09-08 08:29:55 -0700430 stashes = {}
431 stashed_blocks = 0
432 max_stashed_blocks = 0
433
Doug Zongkerfc44a512014-08-26 13:10:25 -0700434 for xf in self.transfers:
435
Tao Bao8fad03e2017-03-01 14:36:26 -0800436 for _, sr in xf.stash_before:
437 sh = self.src.RangeSha1(sr)
438 if sh in stashes:
439 stashes[sh] += 1
Sami Tolvanendd67a292014-12-09 16:40:34 +0000440 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800441 stashes[sh] = 1
442 stashed_blocks += sr.size()
443 self.touched_src_ranges = self.touched_src_ranges.union(sr)
444 out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
Doug Zongker62338182014-09-08 08:29:55 -0700445
446 if stashed_blocks > max_stashed_blocks:
447 max_stashed_blocks = stashed_blocks
448
Jesse Zhao7b985f62015-03-02 16:53:08 -0800449 free_string = []
caozhiyuan21b37d82015-10-21 15:14:03 +0800450 free_size = 0
Jesse Zhao7b985f62015-03-02 16:53:08 -0800451
Tao Bao8fad03e2017-03-01 14:36:26 -0800452 # <# blocks> <src ranges>
453 # OR
454 # <# blocks> <src ranges> <src locs> <stash refs...>
455 # OR
456 # <# blocks> - <stash refs...>
Doug Zongker62338182014-09-08 08:29:55 -0700457
Tao Bao8fad03e2017-03-01 14:36:26 -0800458 size = xf.src_ranges.size()
459 src_str = [str(size)]
Doug Zongker62338182014-09-08 08:29:55 -0700460
Tao Bao8fad03e2017-03-01 14:36:26 -0800461 unstashed_src_ranges = xf.src_ranges
462 mapped_stashes = []
463 for _, sr in xf.use_stash:
464 unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
465 sh = self.src.RangeSha1(sr)
466 sr = xf.src_ranges.map_within(sr)
467 mapped_stashes.append(sr)
468 assert sh in stashes
469 src_str.append("%s:%s" % (sh, sr.to_string_raw()))
470 stashes[sh] -= 1
471 if stashes[sh] == 0:
472 free_string.append("free %s\n" % (sh,))
473 free_size += sr.size()
474 stashes.pop(sh)
Doug Zongker62338182014-09-08 08:29:55 -0700475
Tao Bao8fad03e2017-03-01 14:36:26 -0800476 if unstashed_src_ranges:
477 src_str.insert(1, unstashed_src_ranges.to_string_raw())
478 if xf.use_stash:
479 mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
480 src_str.insert(2, mapped_unstashed.to_string_raw())
481 mapped_stashes.append(mapped_unstashed)
Doug Zongker62338182014-09-08 08:29:55 -0700482 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Tao Bao8fad03e2017-03-01 14:36:26 -0800483 else:
484 src_str.insert(1, "-")
485 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Doug Zongker62338182014-09-08 08:29:55 -0700486
Tao Bao8fad03e2017-03-01 14:36:26 -0800487 src_str = " ".join(src_str)
Doug Zongker62338182014-09-08 08:29:55 -0700488
Tao Bao8fad03e2017-03-01 14:36:26 -0800489 # version 3+:
Doug Zongker62338182014-09-08 08:29:55 -0700490 # zero <rangeset>
491 # new <rangeset>
492 # erase <rangeset>
Dan Albert8b72aef2015-03-23 19:13:21 -0700493 # bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
494 # imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
495 # move hash <tgt rangeset> <src_str>
Doug Zongkerfc44a512014-08-26 13:10:25 -0700496
497 tgt_size = xf.tgt_ranges.size()
498
499 if xf.style == "new":
500 assert xf.tgt_ranges
Tianjie Xu37e29302016-06-23 16:10:35 -0700501 assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700502 total += tgt_size
503 elif xf.style == "move":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700504 assert xf.tgt_ranges
505 assert xf.src_ranges.size() == tgt_size
506 if xf.src_ranges != xf.tgt_ranges:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100507 # take into account automatic stashing of overlapping blocks
508 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700509 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100510 if temp_stash_usage > max_stashed_blocks:
511 max_stashed_blocks = temp_stash_usage
512
Tao Baod522bdc2016-04-12 15:53:16 -0700513 self.touched_src_ranges = self.touched_src_ranges.union(
514 xf.src_ranges)
515
Tao Bao8fad03e2017-03-01 14:36:26 -0800516 out.append("%s %s %s %s\n" % (
Sami Tolvanendd67a292014-12-09 16:40:34 +0000517 xf.style,
Tao Bao183e56e2017-03-05 17:05:09 -0800518 xf.tgt_sha1,
Dan Albert8b72aef2015-03-23 19:13:21 -0700519 xf.tgt_ranges.to_string_raw(), src_str))
Tao Bao8fad03e2017-03-01 14:36:26 -0800520 total += tgt_size
521 elif xf.style in ("bsdiff", "imgdiff"):
522 assert xf.tgt_ranges
523 assert xf.src_ranges
524 # take into account automatic stashing of overlapping blocks
525 if xf.src_ranges.overlaps(xf.tgt_ranges):
526 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
527 if temp_stash_usage > max_stashed_blocks:
528 max_stashed_blocks = temp_stash_usage
529
530 self.touched_src_ranges = self.touched_src_ranges.union(xf.src_ranges)
531
532 out.append("%s %d %d %s %s %s %s\n" % (
533 xf.style,
534 xf.patch_start, xf.patch_len,
535 xf.src_sha1,
536 xf.tgt_sha1,
537 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700538 total += tgt_size
539 elif xf.style == "zero":
540 assert xf.tgt_ranges
541 to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
Tianjie Xu37e29302016-06-23 16:10:35 -0700542 assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size()
Tianjie Xubb848c52016-06-21 15:54:09 -0700543 total += to_zero.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700544 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700545 raise ValueError("unknown transfer style '%s'\n" % xf.style)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700546
Sami Tolvanendd67a292014-12-09 16:40:34 +0000547 if free_string:
548 out.append("".join(free_string))
caozhiyuan21b37d82015-10-21 15:14:03 +0800549 stashed_blocks -= free_size
Sami Tolvanendd67a292014-12-09 16:40:34 +0000550
Tao Bao8fad03e2017-03-01 14:36:26 -0800551 if common.OPTIONS.cache_size is not None:
Tao Bao8dcf7382015-05-21 14:09:49 -0700552 # Sanity check: abort if we're going to need more stash space than
553 # the allowed size (cache_size * threshold). There are two purposes
554 # of having a threshold here. a) Part of the cache may have been
555 # occupied by some recovery logs. b) It will buy us some time to deal
556 # with the oversize issue.
557 cache_size = common.OPTIONS.cache_size
558 stash_threshold = common.OPTIONS.stash_threshold
559 max_allowed = cache_size * stash_threshold
Tao Baoe8c68a02017-02-26 10:48:11 -0800560 assert max_stashed_blocks * self.tgt.blocksize <= max_allowed, \
Tao Bao8dcf7382015-05-21 14:09:49 -0700561 'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
562 max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
563 self.tgt.blocksize, max_allowed, cache_size,
564 stash_threshold)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700565
Tao Bao8fad03e2017-03-01 14:36:26 -0800566 self.touched_src_sha1 = self.src.RangeSha1(self.touched_src_ranges)
Tao Baod522bdc2016-04-12 15:53:16 -0700567
Tao Baoe9b61912015-07-09 17:37:49 -0700568 # Zero out extended blocks as a workaround for bug 20881595.
569 if self.tgt.extended:
Tianjie Xu37e29302016-06-23 16:10:35 -0700570 assert (WriteSplitTransfers(out, "zero", self.tgt.extended) ==
Tianjie Xubb848c52016-06-21 15:54:09 -0700571 self.tgt.extended.size())
Tao Baob32d56e2015-09-09 11:55:01 -0700572 total += self.tgt.extended.size()
Tao Baoe9b61912015-07-09 17:37:49 -0700573
574 # We erase all the blocks on the partition that a) don't contain useful
Tao Bao66f1fa62016-05-03 10:02:01 -0700575 # data in the new image; b) will not be touched by dm-verity. Out of those
576 # blocks, we erase the ones that won't be used in this update at the
577 # beginning of an update. The rest would be erased at the end. This is to
578 # work around the eMMC issue observed on some devices, which may otherwise
579 # get starving for clean blocks and thus fail the update. (b/28347095)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700580 all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
Tao Baoe9b61912015-07-09 17:37:49 -0700581 all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
582 new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
Tao Bao66f1fa62016-05-03 10:02:01 -0700583
584 erase_first = new_dontcare.subtract(self.touched_src_ranges)
585 if erase_first:
586 out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))
587
588 erase_last = new_dontcare.subtract(erase_first)
589 if erase_last:
590 out.append("erase %s\n" % (erase_last.to_string_raw(),))
Doug Zongkere985f6f2014-09-09 12:38:47 -0700591
592 out.insert(0, "%d\n" % (self.version,)) # format version number
Tao Baob32d56e2015-09-09 11:55:01 -0700593 out.insert(1, "%d\n" % (total,))
Tao Bao8fad03e2017-03-01 14:36:26 -0800594 # v3+: the number of stash slots is unused.
595 out.insert(2, "0\n")
596 out.insert(3, str(max_stashed_blocks) + "\n")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700597
598 with open(prefix + ".transfer.list", "wb") as f:
599 for i in out:
600 f.write(i)
601
Tao Bao8fad03e2017-03-01 14:36:26 -0800602 self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize
603 OPTIONS = common.OPTIONS
604 if OPTIONS.cache_size is not None:
605 max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
606 print("max stashed blocks: %d (%d bytes), "
607 "limit: %d bytes (%.2f%%)\n" % (
608 max_stashed_blocks, self._max_stashed_size, max_allowed,
609 self._max_stashed_size * 100.0 / max_allowed))
610 else:
611 print("max stashed blocks: %d (%d bytes), limit: <unknown>\n" % (
612 max_stashed_blocks, self._max_stashed_size))
Doug Zongker62338182014-09-08 08:29:55 -0700613
Tao Bao82c47982015-08-17 09:45:13 -0700614 def ReviseStashSize(self):
615 print("Revising stash size...")
Tao Baoe27acfd2016-12-16 11:13:55 -0800616 stash_map = {}
Tao Bao82c47982015-08-17 09:45:13 -0700617
618 # Create the map between a stash and its def/use points. For example, for a
Tao Bao3c5a16d2017-02-13 11:42:50 -0800619 # given stash of (raw_id, sr), stash_map[raw_id] = (sr, def_cmd, use_cmd).
Tao Bao82c47982015-08-17 09:45:13 -0700620 for xf in self.transfers:
621 # Command xf defines (stores) all the stashes in stash_before.
Tao Bao3a2e3502016-12-28 09:14:39 -0800622 for stash_raw_id, sr in xf.stash_before:
623 stash_map[stash_raw_id] = (sr, xf)
Tao Bao82c47982015-08-17 09:45:13 -0700624
625 # Record all the stashes command xf uses.
Tao Bao3a2e3502016-12-28 09:14:39 -0800626 for stash_raw_id, _ in xf.use_stash:
627 stash_map[stash_raw_id] += (xf,)
Tao Bao82c47982015-08-17 09:45:13 -0700628
629 # Compute the maximum blocks available for stash based on /cache size and
630 # the threshold.
631 cache_size = common.OPTIONS.cache_size
632 stash_threshold = common.OPTIONS.stash_threshold
633 max_allowed = cache_size * stash_threshold / self.tgt.blocksize
634
Tao Bao3a2e3502016-12-28 09:14:39 -0800635 # See the comments for 'stashes' in WriteTransfers().
Tao Baoe27acfd2016-12-16 11:13:55 -0800636 stashes = {}
Tao Bao82c47982015-08-17 09:45:13 -0700637 stashed_blocks = 0
Tao Bao9a5caf22015-08-25 15:10:10 -0700638 new_blocks = 0
Tao Bao82c47982015-08-17 09:45:13 -0700639
640 # Now go through all the commands. Compute the required stash size on the
641 # fly. If a command requires excess stash than available, it deletes the
642 # stash by replacing the command that uses the stash with a "new" command
643 # instead.
644 for xf in self.transfers:
645 replaced_cmds = []
646
647 # xf.stash_before generates explicit stash commands.
Tao Bao3a2e3502016-12-28 09:14:39 -0800648 for stash_raw_id, sr in xf.stash_before:
Tao Baoe27acfd2016-12-16 11:13:55 -0800649 # Check the post-command stashed_blocks.
650 stashed_blocks_after = stashed_blocks
Tao Bao8fad03e2017-03-01 14:36:26 -0800651 sh = self.src.RangeSha1(sr)
652 if sh not in stashes:
Tao Baoe27acfd2016-12-16 11:13:55 -0800653 stashed_blocks_after += sr.size()
Tao Baoe27acfd2016-12-16 11:13:55 -0800654
655 if stashed_blocks_after > max_allowed:
Tao Bao82c47982015-08-17 09:45:13 -0700656 # We cannot stash this one for a later command. Find out the command
657 # that will use this stash and replace the command with "new".
Tao Bao3a2e3502016-12-28 09:14:39 -0800658 use_cmd = stash_map[stash_raw_id][2]
Tao Bao82c47982015-08-17 09:45:13 -0700659 replaced_cmds.append(use_cmd)
Tao Bao9a5caf22015-08-25 15:10:10 -0700660 print("%10d %9s %s" % (sr.size(), "explicit", use_cmd))
Tao Bao82c47982015-08-17 09:45:13 -0700661 else:
Tao Bao3c5a16d2017-02-13 11:42:50 -0800662 # Update the stashes map.
Tao Bao8fad03e2017-03-01 14:36:26 -0800663 if sh in stashes:
664 stashes[sh] += 1
Tao Bao3c5a16d2017-02-13 11:42:50 -0800665 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800666 stashes[sh] = 1
Tao Baoe27acfd2016-12-16 11:13:55 -0800667 stashed_blocks = stashed_blocks_after
Tao Bao82c47982015-08-17 09:45:13 -0700668
669 # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
670 # ComputePatches(), they both have the style of "diff".
Tao Bao8fad03e2017-03-01 14:36:26 -0800671 if xf.style == "diff":
Tao Bao82c47982015-08-17 09:45:13 -0700672 assert xf.tgt_ranges and xf.src_ranges
673 if xf.src_ranges.overlaps(xf.tgt_ranges):
674 if stashed_blocks + xf.src_ranges.size() > max_allowed:
675 replaced_cmds.append(xf)
Tao Bao9a5caf22015-08-25 15:10:10 -0700676 print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf))
Tao Bao82c47982015-08-17 09:45:13 -0700677
678 # Replace the commands in replaced_cmds with "new"s.
679 for cmd in replaced_cmds:
680 # It no longer uses any commands in "use_stash". Remove the def points
681 # for all those stashes.
Tao Bao3a2e3502016-12-28 09:14:39 -0800682 for stash_raw_id, sr in cmd.use_stash:
683 def_cmd = stash_map[stash_raw_id][1]
684 assert (stash_raw_id, sr) in def_cmd.stash_before
685 def_cmd.stash_before.remove((stash_raw_id, sr))
Tao Bao82c47982015-08-17 09:45:13 -0700686
Tianjie Xuebe39a02016-01-14 14:12:26 -0800687 # Add up blocks that violates space limit and print total number to
688 # screen later.
689 new_blocks += cmd.tgt_ranges.size()
Tao Bao82c47982015-08-17 09:45:13 -0700690 cmd.ConvertToNew()
691
Tao Bao3a2e3502016-12-28 09:14:39 -0800692 # xf.use_stash may generate free commands.
Tao Bao8fad03e2017-03-01 14:36:26 -0800693 for _, sr in xf.use_stash:
694 sh = self.src.RangeSha1(sr)
695 assert sh in stashes
696 stashes[sh] -= 1
697 if stashes[sh] == 0:
Tao Baoe27acfd2016-12-16 11:13:55 -0800698 stashed_blocks -= sr.size()
Tao Bao8fad03e2017-03-01 14:36:26 -0800699 stashes.pop(sh)
Tao Baoe27acfd2016-12-16 11:13:55 -0800700
Tianjie Xuebe39a02016-01-14 14:12:26 -0800701 num_of_bytes = new_blocks * self.tgt.blocksize
702 print(" Total %d blocks (%d bytes) are packed as new blocks due to "
703 "insufficient cache size." % (new_blocks, num_of_bytes))
Tao Bao304ee272016-12-19 11:01:38 -0800704 return new_blocks
Tao Bao9a5caf22015-08-25 15:10:10 -0700705
Doug Zongkerfc44a512014-08-26 13:10:25 -0700706 def ComputePatches(self, prefix):
707 print("Reticulating splines...")
Tao Bao183e56e2017-03-05 17:05:09 -0800708 diff_queue = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700709 patch_num = 0
710 with open(prefix + ".new.dat", "wb") as new_f:
Tao Bao183e56e2017-03-05 17:05:09 -0800711 for index, xf in enumerate(self.transfers):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700712 if xf.style == "zero":
Tao Bao08c85832016-09-19 22:26:30 -0700713 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
714 print("%10d %10d (%6.2f%%) %7s %s %s" % (
715 tgt_size, tgt_size, 100.0, xf.style, xf.tgt_name,
716 str(xf.tgt_ranges)))
717
Doug Zongkerfc44a512014-08-26 13:10:25 -0700718 elif xf.style == "new":
Tao Bao183e56e2017-03-05 17:05:09 -0800719 self.tgt.WriteRangeDataToFd(xf.tgt_ranges, new_f)
Tao Bao08c85832016-09-19 22:26:30 -0700720 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
721 print("%10d %10d (%6.2f%%) %7s %s %s" % (
722 tgt_size, tgt_size, 100.0, xf.style,
723 xf.tgt_name, str(xf.tgt_ranges)))
724
Doug Zongkerfc44a512014-08-26 13:10:25 -0700725 elif xf.style == "diff":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700726 # We can't compare src and tgt directly because they may have
727 # the same content but be broken up into blocks differently, eg:
728 #
729 # ["he", "llo"] vs ["h", "ello"]
730 #
731 # We want those to compare equal, ideally without having to
732 # actually concatenate the strings (these may be tens of
733 # megabytes).
Tao Bao183e56e2017-03-05 17:05:09 -0800734 if xf.src_sha1 == xf.tgt_sha1:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700735 # These are identical; we don't need to generate a patch,
736 # just issue copy commands on the device.
737 xf.style = "move"
Tianjie Xu25366072017-09-08 17:19:02 -0700738 xf.patch = None
Tao Bao183e56e2017-03-05 17:05:09 -0800739 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao08c85832016-09-19 22:26:30 -0700740 if xf.src_ranges != xf.tgt_ranges:
741 print("%10d %10d (%6.2f%%) %7s %s %s (from %s)" % (
742 tgt_size, tgt_size, 100.0, xf.style,
743 xf.tgt_name if xf.tgt_name == xf.src_name else (
744 xf.tgt_name + " (from " + xf.src_name + ")"),
745 str(xf.tgt_ranges), str(xf.src_ranges)))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700746 else:
Tianjie Xu25366072017-09-08 17:19:02 -0700747 if xf.patch:
748 # We have already generated the patch with imgdiff. Check if the
749 # transfer is intact.
750 assert not self.disable_imgdiff
751 imgdiff = True
Tao Baocb73aed2018-01-31 17:32:40 -0800752 if (xf.src_ranges.extra.get('trimmed') or
753 xf.tgt_ranges.extra.get('trimmed')):
Tianjie Xu25366072017-09-08 17:19:02 -0700754 imgdiff = False
755 xf.patch = None
756 else:
Tao Baocb73aed2018-01-31 17:32:40 -0800757 imgdiff = self.CanUseImgdiff(
758 xf.tgt_name, xf.tgt_ranges, xf.src_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700759 xf.style = "imgdiff" if imgdiff else "bsdiff"
Tao Bao183e56e2017-03-05 17:05:09 -0800760 diff_queue.append((index, imgdiff, patch_num))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700761 patch_num += 1
762
763 else:
764 assert False, "unknown style " + xf.style
765
Tao Bao183e56e2017-03-05 17:05:09 -0800766 if diff_queue:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700767 if self.threads > 1:
768 print("Computing patches (using %d threads)..." % (self.threads,))
769 else:
770 print("Computing patches...")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700771
Tao Bao183e56e2017-03-05 17:05:09 -0800772 diff_total = len(diff_queue)
773 patches = [None] * diff_total
Tianjie Xub59c17f2016-10-28 17:55:53 -0700774 error_messages = []
Tao Baob937ead2017-10-19 16:51:53 -0700775 warning_messages = []
Tao Bao33635b12017-03-12 13:02:51 -0700776 if sys.stdout.isatty():
777 global diff_done
778 diff_done = 0
Doug Zongkerfc44a512014-08-26 13:10:25 -0700779
Tao Bao183e56e2017-03-05 17:05:09 -0800780 # Using multiprocessing doesn't give additional benefits, due to the
781 # pattern of the code. The diffing work is done by subprocess.call, which
782 # already runs in a separate process (not affected much by the GIL -
783 # Global Interpreter Lock). Using multiprocess also requires either a)
784 # writing the diff input files in the main process before forking, or b)
785 # reopening the image file (SparseImage) in the worker processes. Doing
786 # neither of them further improves the performance.
Doug Zongkerfc44a512014-08-26 13:10:25 -0700787 lock = threading.Lock()
788 def diff_worker():
789 while True:
790 with lock:
Tao Bao183e56e2017-03-05 17:05:09 -0800791 if not diff_queue:
Dan Albert8b72aef2015-03-23 19:13:21 -0700792 return
Tao Bao183e56e2017-03-05 17:05:09 -0800793 xf_index, imgdiff, patch_index = diff_queue.pop()
794
795 xf = self.transfers[xf_index]
Tianjie Xu25366072017-09-08 17:19:02 -0700796 patch = xf.patch
797 if not patch:
798 src_ranges = xf.src_ranges
799 tgt_ranges = xf.tgt_ranges
Tao Bao183e56e2017-03-05 17:05:09 -0800800
Tianjie Xudf1166e2018-01-27 17:35:41 -0800801 src_file = common.MakeTempFile(prefix="src-")
802 with open(src_file, "wb") as fd:
803 self.src.WriteRangeDataToFd(src_ranges, fd)
Tianjie Xu25366072017-09-08 17:19:02 -0700804
Tianjie Xudf1166e2018-01-27 17:35:41 -0800805 tgt_file = common.MakeTempFile(prefix="tgt-")
806 with open(tgt_file, "wb") as fd:
807 self.tgt.WriteRangeDataToFd(tgt_ranges, fd)
Tianjie Xu25366072017-09-08 17:19:02 -0700808
809 message = []
810 try:
811 patch = compute_patch(src_file, tgt_file, imgdiff)
812 except ValueError as e:
813 message.append(
814 "Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (
815 "imgdiff" if imgdiff else "bsdiff",
816 xf.tgt_name if xf.tgt_name == xf.src_name else
817 xf.tgt_name + " (from " + xf.src_name + ")",
818 xf.tgt_ranges, xf.src_ranges, e.message))
819 # TODO(b/68016761): Better handle the holes in mke2fs created
820 # images.
821 if imgdiff:
822 try:
823 patch = compute_patch(src_file, tgt_file, imgdiff=False)
824 message.append(
825 "Fell back and generated with bsdiff instead for %s" % (
826 xf.tgt_name,))
827 xf.style = "bsdiff"
828 with lock:
829 warning_messages.extend(message)
830 del message[:]
831 except ValueError as e:
832 message.append(
833 "Also failed to generate with bsdiff for %s:\n%s" % (
834 xf.tgt_name, e.message))
835
836 if message:
837 with lock:
838 error_messages.extend(message)
Tao Bao183e56e2017-03-05 17:05:09 -0800839
840 with lock:
841 patches[patch_index] = (xf_index, patch)
842 if sys.stdout.isatty():
Tao Bao33635b12017-03-12 13:02:51 -0700843 global diff_done
844 diff_done += 1
845 progress = diff_done * 100 / diff_total
Tao Bao183e56e2017-03-05 17:05:09 -0800846 # '\033[K' is to clear to EOL.
847 print(' [%d%%] %s\033[K' % (progress, xf.tgt_name), end='\r')
848 sys.stdout.flush()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700849
850 threads = [threading.Thread(target=diff_worker)
Dan Albert8b72aef2015-03-23 19:13:21 -0700851 for _ in range(self.threads)]
Doug Zongkerfc44a512014-08-26 13:10:25 -0700852 for th in threads:
853 th.start()
854 while threads:
855 threads.pop().join()
Tao Bao183e56e2017-03-05 17:05:09 -0800856
857 if sys.stdout.isatty():
858 print('\n')
Tianjie Xub59c17f2016-10-28 17:55:53 -0700859
Tao Baob937ead2017-10-19 16:51:53 -0700860 if warning_messages:
861 print('WARNING:')
862 print('\n'.join(warning_messages))
863 print('\n\n\n')
864
Tianjie Xub59c17f2016-10-28 17:55:53 -0700865 if error_messages:
Tao Baob937ead2017-10-19 16:51:53 -0700866 print('ERROR:')
Tianjie Xub59c17f2016-10-28 17:55:53 -0700867 print('\n'.join(error_messages))
Tao Baob937ead2017-10-19 16:51:53 -0700868 print('\n\n\n')
Tianjie Xub59c17f2016-10-28 17:55:53 -0700869 sys.exit(1)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700870 else:
871 patches = []
872
Tao Bao183e56e2017-03-05 17:05:09 -0800873 offset = 0
874 with open(prefix + ".patch.dat", "wb") as patch_fd:
875 for index, patch in patches:
876 xf = self.transfers[index]
Doug Zongkerfc44a512014-08-26 13:10:25 -0700877 xf.patch_len = len(patch)
Tao Bao183e56e2017-03-05 17:05:09 -0800878 xf.patch_start = offset
879 offset += xf.patch_len
880 patch_fd.write(patch)
881
882 if common.OPTIONS.verbose:
883 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
884 print("%10d %10d (%6.2f%%) %7s %s %s %s" % (
885 xf.patch_len, tgt_size, xf.patch_len * 100.0 / tgt_size,
886 xf.style,
887 xf.tgt_name if xf.tgt_name == xf.src_name else (
888 xf.tgt_name + " (from " + xf.src_name + ")"),
889 xf.tgt_ranges, xf.src_ranges))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700890
Tianjie Xu8a7ed9f2018-01-23 14:06:11 -0800891 def AssertSha1Good(self):
892 """Check the SHA-1 of the src & tgt blocks in the transfer list.
893
894 Double check the SHA-1 value to avoid the issue in b/71908713, where
895 SparseImage.RangeSha1() messed up with the hash calculation in multi-thread
896 environment. That specific problem has been fixed by protecting the
897 underlying generator function 'SparseImage._GetRangeData()' with lock.
898 """
899 for xf in self.transfers:
900 tgt_sha1 = self.tgt.RangeSha1(xf.tgt_ranges)
901 assert xf.tgt_sha1 == tgt_sha1
902 if xf.style == "diff":
903 src_sha1 = self.src.RangeSha1(xf.src_ranges)
904 assert xf.src_sha1 == src_sha1
905
Doug Zongkerfc44a512014-08-26 13:10:25 -0700906 def AssertSequenceGood(self):
907 # Simulate the sequences of transfers we will output, and check that:
908 # - we never read a block after writing it, and
909 # - we write every block we care about exactly once.
910
911 # Start with no blocks having been touched yet.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800912 touched = array.array("B", "\0" * self.tgt.total_blocks)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700913
914 # Imagine processing the transfers in order.
915 for xf in self.transfers:
916 # Check that the input blocks for this transfer haven't yet been touched.
Doug Zongker62338182014-09-08 08:29:55 -0700917
918 x = xf.src_ranges
Tao Bao8fad03e2017-03-01 14:36:26 -0800919 for _, sr in xf.use_stash:
920 x = x.subtract(sr)
Doug Zongker62338182014-09-08 08:29:55 -0700921
Doug Zongker6ab2a502016-02-09 08:28:09 -0800922 for s, e in x:
Tao Baoff75c232016-03-04 15:23:34 -0800923 # Source image could be larger. Don't check the blocks that are in the
924 # source image only. Since they are not in 'touched', and won't ever
925 # be touched.
926 for i in range(s, min(e, self.tgt.total_blocks)):
Doug Zongker6ab2a502016-02-09 08:28:09 -0800927 assert touched[i] == 0
928
929 # Check that the output blocks for this transfer haven't yet
930 # been touched, and touch all the blocks written by this
931 # transfer.
932 for s, e in xf.tgt_ranges:
933 for i in range(s, e):
934 assert touched[i] == 0
935 touched[i] = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700936
937 # Check that we've written every target block.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800938 for s, e in self.tgt.care_map:
939 for i in range(s, e):
940 assert touched[i] == 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700941
Doug Zongker62338182014-09-08 08:29:55 -0700942 def ImproveVertexSequence(self):
943 print("Improving vertex order...")
944
945 # At this point our digraph is acyclic; we reversed any edges that
946 # were backwards in the heuristically-generated sequence. The
947 # previously-generated order is still acceptable, but we hope to
948 # find a better order that needs less memory for stashed data.
949 # Now we do a topological sort to generate a new vertex order,
950 # using a greedy algorithm to choose which vertex goes next
951 # whenever we have a choice.
952
953 # Make a copy of the edge set; this copy will get destroyed by the
954 # algorithm.
955 for xf in self.transfers:
956 xf.incoming = xf.goes_after.copy()
957 xf.outgoing = xf.goes_before.copy()
958
959 L = [] # the new vertex order
960
961 # S is the set of sources in the remaining graph; we always choose
962 # the one that leaves the least amount of stashed data after it's
963 # executed.
964 S = [(u.NetStashChange(), u.order, u) for u in self.transfers
965 if not u.incoming]
966 heapq.heapify(S)
967
968 while S:
969 _, _, xf = heapq.heappop(S)
970 L.append(xf)
971 for u in xf.outgoing:
972 del u.incoming[xf]
973 if not u.incoming:
974 heapq.heappush(S, (u.NetStashChange(), u.order, u))
975
976 # if this fails then our graph had a cycle.
977 assert len(L) == len(self.transfers)
978
979 self.transfers = L
980 for i, xf in enumerate(L):
981 xf.order = i
982
Doug Zongkerfc44a512014-08-26 13:10:25 -0700983 def RemoveBackwardEdges(self):
984 print("Removing backward edges...")
985 in_order = 0
986 out_of_order = 0
987 lost_source = 0
988
989 for xf in self.transfers:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700990 lost = 0
991 size = xf.src_ranges.size()
992 for u in xf.goes_before:
993 # xf should go before u
994 if xf.order < u.order:
995 # it does, hurray!
Doug Zongker62338182014-09-08 08:29:55 -0700996 in_order += 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700997 else:
998 # it doesn't, boo. trim the blocks that u writes from xf's
999 # source, so that xf can go after u.
Doug Zongker62338182014-09-08 08:29:55 -07001000 out_of_order += 1
Doug Zongkerfc44a512014-08-26 13:10:25 -07001001 assert xf.src_ranges.overlaps(u.tgt_ranges)
1002 xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges)
Tao Baocb73aed2018-01-31 17:32:40 -08001003 xf.src_ranges.extra['trimmed'] = True
Doug Zongkerfc44a512014-08-26 13:10:25 -07001004
1005 if xf.style == "diff" and not xf.src_ranges:
1006 # nothing left to diff from; treat as new data
1007 xf.style = "new"
1008
1009 lost = size - xf.src_ranges.size()
1010 lost_source += lost
Doug Zongkerfc44a512014-08-26 13:10:25 -07001011
1012 print((" %d/%d dependencies (%.2f%%) were violated; "
1013 "%d source blocks removed.") %
1014 (out_of_order, in_order + out_of_order,
1015 (out_of_order * 100.0 / (in_order + out_of_order))
1016 if (in_order + out_of_order) else 0.0,
1017 lost_source))
1018
Doug Zongker62338182014-09-08 08:29:55 -07001019 def ReverseBackwardEdges(self):
Tao Bao3a2e3502016-12-28 09:14:39 -08001020 """Reverse unsatisfying edges and compute pairs of stashed blocks.
1021
1022 For each transfer, make sure it properly stashes the blocks it touches and
1023 will be used by later transfers. It uses pairs of (stash_raw_id, range) to
1024 record the blocks to be stashed. 'stash_raw_id' is an id that uniquely
1025 identifies each pair. Note that for the same range (e.g. RangeSet("1-5")),
1026 it is possible to have multiple pairs with different 'stash_raw_id's. Each
1027 'stash_raw_id' will be consumed by one transfer. In BBOTA v3+, identical
1028 blocks will be written to the same stash slot in WriteTransfers().
1029 """
1030
Doug Zongker62338182014-09-08 08:29:55 -07001031 print("Reversing backward edges...")
1032 in_order = 0
1033 out_of_order = 0
Tao Bao3a2e3502016-12-28 09:14:39 -08001034 stash_raw_id = 0
Doug Zongker62338182014-09-08 08:29:55 -07001035 stash_size = 0
1036
1037 for xf in self.transfers:
Doug Zongker62338182014-09-08 08:29:55 -07001038 for u in xf.goes_before.copy():
1039 # xf should go before u
1040 if xf.order < u.order:
1041 # it does, hurray!
1042 in_order += 1
1043 else:
1044 # it doesn't, boo. modify u to stash the blocks that it
1045 # writes that xf wants to read, and then require u to go
1046 # before xf.
1047 out_of_order += 1
1048
1049 overlap = xf.src_ranges.intersect(u.tgt_ranges)
1050 assert overlap
1051
Tao Bao3a2e3502016-12-28 09:14:39 -08001052 u.stash_before.append((stash_raw_id, overlap))
1053 xf.use_stash.append((stash_raw_id, overlap))
1054 stash_raw_id += 1
Doug Zongker62338182014-09-08 08:29:55 -07001055 stash_size += overlap.size()
1056
1057 # reverse the edge direction; now xf must go after u
1058 del xf.goes_before[u]
1059 del u.goes_after[xf]
1060 xf.goes_after[u] = None # value doesn't matter
1061 u.goes_before[xf] = None
1062
1063 print((" %d/%d dependencies (%.2f%%) were violated; "
1064 "%d source blocks stashed.") %
1065 (out_of_order, in_order + out_of_order,
1066 (out_of_order * 100.0 / (in_order + out_of_order))
1067 if (in_order + out_of_order) else 0.0,
1068 stash_size))
1069
Doug Zongkerfc44a512014-08-26 13:10:25 -07001070 def FindVertexSequence(self):
1071 print("Finding vertex sequence...")
1072
1073 # This is based on "A Fast & Effective Heuristic for the Feedback
1074 # Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
1075 # it as starting with the digraph G and moving all the vertices to
1076 # be on a horizontal line in some order, trying to minimize the
1077 # number of edges that end up pointing to the left. Left-pointing
1078 # edges will get removed to turn the digraph into a DAG. In this
1079 # case each edge has a weight which is the number of source blocks
1080 # we'll lose if that edge is removed; we try to minimize the total
1081 # weight rather than just the number of edges.
1082
1083 # Make a copy of the edge set; this copy will get destroyed by the
1084 # algorithm.
1085 for xf in self.transfers:
1086 xf.incoming = xf.goes_after.copy()
1087 xf.outgoing = xf.goes_before.copy()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001088 xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())
Doug Zongkerfc44a512014-08-26 13:10:25 -07001089
1090 # We use an OrderedDict instead of just a set so that the output
1091 # is repeatable; otherwise it would depend on the hash values of
1092 # the transfer objects.
1093 G = OrderedDict()
1094 for xf in self.transfers:
1095 G[xf] = None
1096 s1 = deque() # the left side of the sequence, built from left to right
1097 s2 = deque() # the right side of the sequence, built from right to left
1098
Doug Zongker6ab2a502016-02-09 08:28:09 -08001099 heap = []
1100 for xf in self.transfers:
1101 xf.heap_item = HeapItem(xf)
1102 heap.append(xf.heap_item)
1103 heapq.heapify(heap)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001104
Tao Bao33482282016-10-24 16:49:08 -07001105 # Use OrderedDict() instead of set() to preserve the insertion order. Need
1106 # to use 'sinks[key] = None' to add key into the set. sinks will look like
1107 # { key1: None, key2: None, ... }.
1108 sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing)
1109 sources = OrderedDict.fromkeys(u for u in G if not u.incoming)
Doug Zongker6ab2a502016-02-09 08:28:09 -08001110
1111 def adjust_score(iu, delta):
1112 iu.score += delta
1113 iu.heap_item.clear()
1114 iu.heap_item = HeapItem(iu)
1115 heapq.heappush(heap, iu.heap_item)
1116
1117 while G:
Doug Zongkerfc44a512014-08-26 13:10:25 -07001118 # Put all sinks at the end of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001119 while sinks:
Tao Bao33482282016-10-24 16:49:08 -07001120 new_sinks = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001121 for u in sinks:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001122 if u not in G: continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001123 s2.appendleft(u)
1124 del G[u]
1125 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001126 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001127 if not iu.outgoing:
1128 new_sinks[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001129 sinks = new_sinks
Doug Zongkerfc44a512014-08-26 13:10:25 -07001130
1131 # Put all the sources at the beginning of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001132 while sources:
Tao Bao33482282016-10-24 16:49:08 -07001133 new_sources = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001134 for u in sources:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001135 if u not in G: continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001136 s1.append(u)
1137 del G[u]
1138 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001139 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001140 if not iu.incoming:
1141 new_sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001142 sources = new_sources
Doug Zongkerfc44a512014-08-26 13:10:25 -07001143
Doug Zongker6ab2a502016-02-09 08:28:09 -08001144 if not G: break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001145
1146 # Find the "best" vertex to put next. "Best" is the one that
1147 # maximizes the net difference in source blocks saved we get by
1148 # pretending it's a source rather than a sink.
1149
Doug Zongker6ab2a502016-02-09 08:28:09 -08001150 while True:
1151 u = heapq.heappop(heap)
1152 if u and u.item in G:
1153 u = u.item
1154 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001155
Doug Zongkerfc44a512014-08-26 13:10:25 -07001156 s1.append(u)
1157 del G[u]
1158 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001159 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001160 if not iu.incoming:
1161 sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001162
Doug Zongkerfc44a512014-08-26 13:10:25 -07001163 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001164 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001165 if not iu.outgoing:
1166 sinks[iu] = None
Doug Zongkerfc44a512014-08-26 13:10:25 -07001167
1168 # Now record the sequence in the 'order' field of each transfer,
1169 # and by rearranging self.transfers to be in the chosen sequence.
1170
1171 new_transfers = []
1172 for x in itertools.chain(s1, s2):
1173 x.order = len(new_transfers)
1174 new_transfers.append(x)
1175 del x.incoming
1176 del x.outgoing
1177
1178 self.transfers = new_transfers
1179
1180 def GenerateDigraph(self):
1181 print("Generating digraph...")
Doug Zongker6ab2a502016-02-09 08:28:09 -08001182
1183 # Each item of source_ranges will be:
1184 # - None, if that block is not used as a source,
Tao Bao33482282016-10-24 16:49:08 -07001185 # - an ordered set of transfers.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001186 source_ranges = []
1187 for b in self.transfers:
1188 for s, e in b.src_ranges:
1189 if e > len(source_ranges):
1190 source_ranges.extend([None] * (e-len(source_ranges)))
1191 for i in range(s, e):
1192 if source_ranges[i] is None:
Tao Bao33482282016-10-24 16:49:08 -07001193 source_ranges[i] = OrderedDict.fromkeys([b])
Doug Zongker6ab2a502016-02-09 08:28:09 -08001194 else:
Tao Bao33482282016-10-24 16:49:08 -07001195 source_ranges[i][b] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001196
Doug Zongkerfc44a512014-08-26 13:10:25 -07001197 for a in self.transfers:
Tao Bao33482282016-10-24 16:49:08 -07001198 intersections = OrderedDict()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001199 for s, e in a.tgt_ranges:
1200 for i in range(s, e):
1201 if i >= len(source_ranges): break
Tao Bao33482282016-10-24 16:49:08 -07001202 # Add all the Transfers in source_ranges[i] to the (ordered) set.
1203 if source_ranges[i] is not None:
1204 for j in source_ranges[i]:
1205 intersections[j] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001206
1207 for b in intersections:
1208 if a is b: continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001209
1210 # If the blocks written by A are read by B, then B needs to go before A.
1211 i = a.tgt_ranges.intersect(b.src_ranges)
1212 if i:
Doug Zongkerab7ca1d2014-08-26 10:40:28 -07001213 if b.src_name == "__ZERO":
1214 # the cost of removing source blocks for the __ZERO domain
1215 # is (nearly) zero.
1216 size = 0
1217 else:
1218 size = i.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001219 b.goes_before[a] = size
1220 a.goes_after[b] = size
1221
1222 def FindTransfers(self):
Tao Bao9a5caf22015-08-25 15:10:10 -07001223 """Parse the file_map to generate all the transfers."""
1224
Tianjie Xu41390d42017-11-22 11:35:18 -08001225 def AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
1226 src_ranges, style, by_id):
1227 """Add one or multiple Transfer()s by splitting large files.
1228
1229 For BBOTA v3, we need to stash source blocks for resumable feature.
1230 However, with the growth of file size and the shrink of the cache
1231 partition source blocks are too large to be stashed. If a file occupies
1232 too many blocks, we split it into smaller pieces by getting multiple
1233 Transfer()s.
1234
1235 The downside is that after splitting, we may increase the package size
1236 since the split pieces don't align well. According to our experiments,
1237 1/8 of the cache size as the per-piece limit appears to be optimal.
1238 Compared to the fixed 1024-block limit, it reduces the overall package
1239 size by 30% for volantis, and 20% for angler and bullhead."""
1240
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001241 pieces = 0
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001242 while (tgt_ranges.size() > max_blocks_per_transfer and
1243 src_ranges.size() > max_blocks_per_transfer):
Tao Bao9a5caf22015-08-25 15:10:10 -07001244 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1245 src_split_name = "%s-%d" % (src_name, pieces)
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001246 tgt_first = tgt_ranges.first(max_blocks_per_transfer)
1247 src_first = src_ranges.first(max_blocks_per_transfer)
1248
Tao Bao183e56e2017-03-05 17:05:09 -08001249 Transfer(tgt_split_name, src_split_name, tgt_first, src_first,
1250 self.tgt.RangeSha1(tgt_first), self.src.RangeSha1(src_first),
1251 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001252
1253 tgt_ranges = tgt_ranges.subtract(tgt_first)
1254 src_ranges = src_ranges.subtract(src_first)
1255 pieces += 1
1256
1257 # Handle remaining blocks.
1258 if tgt_ranges.size() or src_ranges.size():
1259 # Must be both non-empty.
1260 assert tgt_ranges.size() and src_ranges.size()
1261 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1262 src_split_name = "%s-%d" % (src_name, pieces)
Tao Bao183e56e2017-03-05 17:05:09 -08001263 Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges,
1264 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1265 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001266
Tianjie Xu41390d42017-11-22 11:35:18 -08001267 def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges, style,
1268 by_id):
1269 """Find all the zip files and split the others with a fixed chunk size.
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001270
Tianjie Xu41390d42017-11-22 11:35:18 -08001271 This function will construct a list of zip archives, which will later be
1272 split by imgdiff to reduce the final patch size. For the other files,
1273 we will plainly split them based on a fixed chunk size with the potential
1274 patch size penalty.
1275 """
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001276
1277 assert style == "diff"
1278
1279 # Change nothing for small files.
1280 if (tgt_ranges.size() <= max_blocks_per_transfer and
1281 src_ranges.size() <= max_blocks_per_transfer):
1282 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1283 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1284 style, by_id)
1285 return
1286
Tao Baocb73aed2018-01-31 17:32:40 -08001287 # Split large APKs with imgdiff, if possible. We're intentionally checking
1288 # file types one more time (CanUseImgdiff() checks that as well), before
1289 # calling the costly RangeSha1()s.
1290 if (self.FileTypeSupportedByImgdiff(tgt_name) and
1291 self.tgt.RangeSha1(tgt_ranges) != self.src.RangeSha1(src_ranges)):
1292 if self.CanUseImgdiff(tgt_name, tgt_ranges, src_ranges):
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001293 large_apks.append((tgt_name, src_name, tgt_ranges, src_ranges))
1294 return
1295
Tianjie Xu41390d42017-11-22 11:35:18 -08001296 AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
1297 src_ranges, style, by_id)
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001298
Tao Bao08c85832016-09-19 22:26:30 -07001299 def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
1300 split=False):
1301 """Wrapper function for adding a Transfer()."""
1302
1303 # We specialize diff transfers only (which covers bsdiff/imgdiff/move);
1304 # otherwise add the Transfer() as is.
1305 if style != "diff" or not split:
Tao Bao183e56e2017-03-05 17:05:09 -08001306 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1307 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1308 style, by_id)
Tao Bao08c85832016-09-19 22:26:30 -07001309 return
1310
1311 # Handle .odex files specially to analyze the block-wise difference. If
1312 # most of the blocks are identical with only few changes (e.g. header),
1313 # we will patch the changed blocks only. This avoids stashing unchanged
1314 # blocks while patching. We limit the analysis to files without size
1315 # changes only. This is to avoid sacrificing the OTA generation cost too
1316 # much.
1317 if (tgt_name.split(".")[-1].lower() == 'odex' and
1318 tgt_ranges.size() == src_ranges.size()):
1319
1320 # 0.5 threshold can be further tuned. The tradeoff is: if only very
1321 # few blocks remain identical, we lose the opportunity to use imgdiff
1322 # that may have better compression ratio than bsdiff.
1323 crop_threshold = 0.5
1324
1325 tgt_skipped = RangeSet()
1326 src_skipped = RangeSet()
1327 tgt_size = tgt_ranges.size()
1328 tgt_changed = 0
1329 for src_block, tgt_block in zip(src_ranges.next_item(),
1330 tgt_ranges.next_item()):
1331 src_rs = RangeSet(str(src_block))
1332 tgt_rs = RangeSet(str(tgt_block))
1333 if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):
1334 tgt_skipped = tgt_skipped.union(tgt_rs)
1335 src_skipped = src_skipped.union(src_rs)
1336 else:
1337 tgt_changed += tgt_rs.size()
1338
1339 # Terminate early if no clear sign of benefits.
1340 if tgt_changed > tgt_size * crop_threshold:
1341 break
1342
1343 if tgt_changed < tgt_size * crop_threshold:
1344 assert tgt_changed + tgt_skipped.size() == tgt_size
1345 print('%10d %10d (%6.2f%%) %s' % (tgt_skipped.size(), tgt_size,
1346 tgt_skipped.size() * 100.0 / tgt_size, tgt_name))
Tianjie Xu41390d42017-11-22 11:35:18 -08001347 AddSplitTransfers(
Tao Bao08c85832016-09-19 22:26:30 -07001348 "%s-skipped" % (tgt_name,),
1349 "%s-skipped" % (src_name,),
1350 tgt_skipped, src_skipped, style, by_id)
1351
1352 # Intentionally change the file extension to avoid being imgdiff'd as
1353 # the files are no longer in their original format.
1354 tgt_name = "%s-cropped" % (tgt_name,)
1355 src_name = "%s-cropped" % (src_name,)
1356 tgt_ranges = tgt_ranges.subtract(tgt_skipped)
1357 src_ranges = src_ranges.subtract(src_skipped)
1358
1359 # Possibly having no changed blocks.
1360 if not tgt_ranges:
1361 return
1362
1363 # Add the transfer(s).
Tianjie Xu41390d42017-11-22 11:35:18 -08001364 AddSplitTransfers(
Tao Bao08c85832016-09-19 22:26:30 -07001365 tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1366
Tianjie Xu25366072017-09-08 17:19:02 -07001367 def ParseAndValidateSplitInfo(patch_size, tgt_ranges, src_ranges,
1368 split_info):
1369 """Parse the split_info and return a list of info tuples.
1370
1371 Args:
1372 patch_size: total size of the patch file.
1373 tgt_ranges: Ranges of the target file within the original image.
1374 src_ranges: Ranges of the source file within the original image.
1375 split_info format:
1376 imgdiff version#
1377 count of pieces
1378 <patch_size_1> <tgt_size_1> <src_ranges_1>
1379 ...
1380 <patch_size_n> <tgt_size_n> <src_ranges_n>
1381
1382 Returns:
1383 [patch_start, patch_len, split_tgt_ranges, split_src_ranges]
1384 """
1385
1386 version = int(split_info[0])
1387 assert version == 2
1388 count = int(split_info[1])
1389 assert len(split_info) - 2 == count
1390
1391 split_info_list = []
1392 patch_start = 0
1393 tgt_remain = copy.deepcopy(tgt_ranges)
1394 # each line has the format <patch_size>, <tgt_size>, <src_ranges>
1395 for line in split_info[2:]:
1396 info = line.split()
1397 assert len(info) == 3
1398 patch_length = int(info[0])
1399
1400 split_tgt_size = int(info[1])
1401 assert split_tgt_size % 4096 == 0
1402 assert split_tgt_size / 4096 <= tgt_remain.size()
1403 split_tgt_ranges = tgt_remain.first(split_tgt_size / 4096)
1404 tgt_remain = tgt_remain.subtract(split_tgt_ranges)
1405
1406 # Find the split_src_ranges within the image file from its relative
1407 # position in file.
1408 split_src_indices = RangeSet.parse_raw(info[2])
1409 split_src_ranges = RangeSet()
1410 for r in split_src_indices:
1411 curr_range = src_ranges.first(r[1]).subtract(src_ranges.first(r[0]))
1412 assert not split_src_ranges.overlaps(curr_range)
1413 split_src_ranges = split_src_ranges.union(curr_range)
1414
1415 split_info_list.append((patch_start, patch_length,
1416 split_tgt_ranges, split_src_ranges))
1417 patch_start += patch_length
1418
1419 # Check that the sizes of all the split pieces add up to the final file
1420 # size for patch and target.
1421 assert tgt_remain.size() == 0
1422 assert patch_start == patch_size
1423 return split_info_list
1424
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001425 def SplitLargeApks():
1426 """Split the large apks files.
Tianjie Xu25366072017-09-08 17:19:02 -07001427
1428 Example: Chrome.apk will be split into
1429 src-0: Chrome.apk-0, tgt-0: Chrome.apk-0
1430 src-1: Chrome.apk-1, tgt-1: Chrome.apk-1
1431 ...
1432
1433 After the split, the target pieces are continuous and block aligned; and
1434 the source pieces are mutually exclusive. During the split, we also
1435 generate and save the image patch between src-X & tgt-X. This patch will
1436 be valid because the block ranges of src-X & tgt-X will always stay the
1437 same afterwards; but there's a chance we don't use the patch if we
1438 convert the "diff" command into "new" or "move" later.
Tianjie Xu41390d42017-11-22 11:35:18 -08001439
1440 The split will be attempted by calling imgdiff, which expects the input
1441 files to be valid zip archives. If imgdiff fails for some reason (i.e.
1442 holes in the APK file), we will fall back to split the failed APKs into
1443 fixed size chunks.
Tianjie Xu25366072017-09-08 17:19:02 -07001444 """
1445
1446 while True:
1447 with transfer_lock:
1448 if not large_apks:
1449 return
1450 tgt_name, src_name, tgt_ranges, src_ranges = large_apks.pop(0)
1451
1452 src_file = common.MakeTempFile(prefix="src-")
1453 tgt_file = common.MakeTempFile(prefix="tgt-")
Tianjie Xudf1166e2018-01-27 17:35:41 -08001454 with open(src_file, "wb") as src_fd:
1455 self.src.WriteRangeDataToFd(src_ranges, src_fd)
1456 with open(tgt_file, "wb") as tgt_fd:
1457 self.tgt.WriteRangeDataToFd(tgt_ranges, tgt_fd)
Tianjie Xu25366072017-09-08 17:19:02 -07001458
1459 patch_file = common.MakeTempFile(prefix="patch-")
1460 patch_info_file = common.MakeTempFile(prefix="split_info-")
1461 cmd = ["imgdiff", "-z",
1462 "--block-limit={}".format(max_blocks_per_transfer),
1463 "--split-info=" + patch_info_file,
1464 src_file, tgt_file, patch_file]
1465 p = common.Run(cmd, stdout=subprocess.PIPE)
1466 p.communicate()
Tianjie Xu25366072017-09-08 17:19:02 -07001467 if p.returncode != 0:
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001468 print("Failed to create patch between {} and {},"
1469 " falling back to bsdiff".format(src_name, tgt_name))
1470 with transfer_lock:
Tianjie Xu41390d42017-11-22 11:35:18 -08001471 AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name,
1472 tgt_ranges, src_ranges,
1473 "diff", self.transfers)
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001474 continue
Tianjie Xu25366072017-09-08 17:19:02 -07001475
1476 with open(patch_info_file) as patch_info:
1477 lines = patch_info.readlines()
1478
1479 patch_size_total = os.path.getsize(patch_file)
1480 split_info_list = ParseAndValidateSplitInfo(patch_size_total,
1481 tgt_ranges, src_ranges,
1482 lines)
1483 for index, (patch_start, patch_length, split_tgt_ranges,
1484 split_src_ranges) in enumerate(split_info_list):
1485 with open(patch_file) as f:
1486 f.seek(patch_start)
1487 patch_content = f.read(patch_length)
1488
1489 split_src_name = "{}-{}".format(src_name, index)
1490 split_tgt_name = "{}-{}".format(tgt_name, index)
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001491 split_large_apks.append((split_tgt_name,
1492 split_src_name,
1493 split_tgt_ranges,
1494 split_src_ranges,
1495 patch_content))
Tianjie Xu25366072017-09-08 17:19:02 -07001496
Tao Bao08c85832016-09-19 22:26:30 -07001497 print("Finding transfers...")
1498
Tianjie Xu25366072017-09-08 17:19:02 -07001499 large_apks = []
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001500 split_large_apks = []
Tianjie Xu25366072017-09-08 17:19:02 -07001501 cache_size = common.OPTIONS.cache_size
1502 split_threshold = 0.125
1503 max_blocks_per_transfer = int(cache_size * split_threshold /
1504 self.tgt.blocksize)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001505 empty = RangeSet()
Tianjie Xu20a86cd2018-01-12 12:21:00 -08001506 for tgt_fn, tgt_ranges in sorted(self.tgt.file_map.items()):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001507 if tgt_fn == "__ZERO":
1508 # the special "__ZERO" domain is all the blocks not contained
1509 # in any file and that are filled with zeros. We have a
1510 # special transfer style for zero blocks.
1511 src_ranges = self.src.file_map.get("__ZERO", empty)
Tao Bao9a5caf22015-08-25 15:10:10 -07001512 AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
1513 "zero", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001514 continue
1515
Tao Baoff777812015-05-12 11:42:31 -07001516 elif tgt_fn == "__COPY":
1517 # "__COPY" domain includes all the blocks not contained in any
1518 # file and that need to be copied unconditionally to the target.
Tao Bao9a5caf22015-08-25 15:10:10 -07001519 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Tao Baoff777812015-05-12 11:42:31 -07001520 continue
1521
Doug Zongkerfc44a512014-08-26 13:10:25 -07001522 elif tgt_fn in self.src.file_map:
1523 # Look for an exact pathname match in the source.
Tao Bao9a5caf22015-08-25 15:10:10 -07001524 AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001525 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001526 continue
1527
1528 b = os.path.basename(tgt_fn)
1529 if b in self.src_basenames:
1530 # Look for an exact basename match in the source.
1531 src_fn = self.src_basenames[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001532 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001533 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001534 continue
1535
1536 b = re.sub("[0-9]+", "#", b)
1537 if b in self.src_numpatterns:
1538 # Look for a 'number pattern' match (a basename match after
1539 # all runs of digits are replaced by "#"). (This is useful
1540 # for .so files that contain version numbers in the filename
1541 # that get bumped.)
1542 src_fn = self.src_numpatterns[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001543 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001544 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001545 continue
1546
Tao Bao9a5caf22015-08-25 15:10:10 -07001547 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001548
Tianjie Xu25366072017-09-08 17:19:02 -07001549 transfer_lock = threading.Lock()
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001550 threads = [threading.Thread(target=SplitLargeApks)
Tianjie Xu25366072017-09-08 17:19:02 -07001551 for _ in range(self.threads)]
1552 for th in threads:
1553 th.start()
1554 while threads:
1555 threads.pop().join()
1556
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001557 # Sort the split transfers for large apks to generate a determinate package.
1558 split_large_apks.sort()
1559 for (tgt_name, src_name, tgt_ranges, src_ranges,
1560 patch) in split_large_apks:
1561 transfer_split = Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1562 self.tgt.RangeSha1(tgt_ranges),
1563 self.src.RangeSha1(src_ranges),
1564 "diff", self.transfers)
1565 transfer_split.patch = patch
1566
Doug Zongkerfc44a512014-08-26 13:10:25 -07001567 def AbbreviateSourceNames(self):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001568 for k in self.src.file_map.keys():
1569 b = os.path.basename(k)
1570 self.src_basenames[b] = k
1571 b = re.sub("[0-9]+", "#", b)
1572 self.src_numpatterns[b] = k
1573
1574 @staticmethod
1575 def AssertPartition(total, seq):
1576 """Assert that all the RangeSets in 'seq' form a partition of the
1577 'total' RangeSet (ie, they are nonintersecting and their union
1578 equals 'total')."""
Doug Zongker6ab2a502016-02-09 08:28:09 -08001579
Doug Zongkerfc44a512014-08-26 13:10:25 -07001580 so_far = RangeSet()
1581 for i in seq:
1582 assert not so_far.overlaps(i)
1583 so_far = so_far.union(i)
1584 assert so_far == total