blob: 456f791ea18776a959d0ac27ace696d93ce92f7a [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015from __future__ import print_function
16
Doug Zongker6ab2a502016-02-09 08:28:09 -080017import array
Tianjie Xu25366072017-09-08 17:19:02 -070018import copy
Doug Zongker6ab2a502016-02-09 08:28:09 -080019import functools
Doug Zongker62338182014-09-08 08:29:55 -070020import heapq
Doug Zongkerfc44a512014-08-26 13:10:25 -070021import itertools
Tao Bao32fcdab2018-10-12 10:30:39 -070022import logging
Doug Zongkerfc44a512014-08-26 13:10:25 -070023import multiprocessing
24import os
Tao Bao3a2e3502016-12-28 09:14:39 -080025import os.path
Doug Zongkerfc44a512014-08-26 13:10:25 -070026import re
Tao Bao183e56e2017-03-05 17:05:09 -080027import sys
Doug Zongkerfc44a512014-08-26 13:10:25 -070028import threading
xunchang21531832018-12-06 14:20:05 -080029import zlib
30from collections import deque, namedtuple, OrderedDict
Tao Bao3a2e3502016-12-28 09:14:39 -080031from hashlib import sha1
Tao Bao508b0872018-02-09 13:44:43 -080032
33import common
Dan Albert8b72aef2015-03-23 19:13:21 -070034from rangelib import RangeSet
35
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070036__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
37
Tao Bao32fcdab2018-10-12 10:30:39 -070038logger = logging.getLogger(__name__)
39
xunchang21531832018-12-06 14:20:05 -080040# The tuple contains the style and bytes of a bsdiff|imgdiff patch.
41PatchInfo = namedtuple("PatchInfo", ["imgdiff", "content"])
42
Dan Albert8b72aef2015-03-23 19:13:21 -070043
Tao Bao183e56e2017-03-05 17:05:09 -080044def compute_patch(srcfile, tgtfile, imgdiff=False):
xunchang21531832018-12-06 14:20:05 -080045 """Calls bsdiff|imgdiff to compute the patch data, returns a PatchInfo."""
Tianjie Xub59c17f2016-10-28 17:55:53 -070046 patchfile = common.MakeTempFile(prefix='patch-')
Doug Zongkerfc44a512014-08-26 13:10:25 -070047
Tianjie Xub59c17f2016-10-28 17:55:53 -070048 cmd = ['imgdiff', '-z'] if imgdiff else ['bsdiff']
49 cmd.extend([srcfile, tgtfile, patchfile])
Doug Zongkerfc44a512014-08-26 13:10:25 -070050
Tao Bao39451582017-05-04 11:10:47 -070051 # Don't dump the bsdiff/imgdiff commands, which are not useful for the case
52 # here, since they contain temp filenames only.
Tao Bao73dd4f42018-10-04 16:25:33 -070053 proc = common.Run(cmd, verbose=False)
54 output, _ = proc.communicate()
Doug Zongkerfc44a512014-08-26 13:10:25 -070055
Tao Bao73dd4f42018-10-04 16:25:33 -070056 if proc.returncode != 0:
Tianjie Xub59c17f2016-10-28 17:55:53 -070057 raise ValueError(output)
58
59 with open(patchfile, 'rb') as f:
xunchang21531832018-12-06 14:20:05 -080060 return PatchInfo(imgdiff, f.read())
Doug Zongkerfc44a512014-08-26 13:10:25 -070061
Dan Albert8b72aef2015-03-23 19:13:21 -070062
63class Image(object):
Tao Bao183e56e2017-03-05 17:05:09 -080064 def RangeSha1(self, ranges):
65 raise NotImplementedError
66
Dan Albert8b72aef2015-03-23 19:13:21 -070067 def ReadRangeSet(self, ranges):
68 raise NotImplementedError
69
Tao Bao68658c02015-06-01 13:40:49 -070070 def TotalSha1(self, include_clobbered_blocks=False):
Dan Albert8b72aef2015-03-23 19:13:21 -070071 raise NotImplementedError
72
Tao Bao183e56e2017-03-05 17:05:09 -080073 def WriteRangeDataToFd(self, ranges, fd):
74 raise NotImplementedError
75
Dan Albert8b72aef2015-03-23 19:13:21 -070076
77class EmptyImage(Image):
Doug Zongkerfc44a512014-08-26 13:10:25 -070078 """A zero-length image."""
Tao Bao183e56e2017-03-05 17:05:09 -080079
80 def __init__(self):
81 self.blocksize = 4096
82 self.care_map = RangeSet()
83 self.clobbered_blocks = RangeSet()
84 self.extended = RangeSet()
85 self.total_blocks = 0
86 self.file_map = {}
87
88 def RangeSha1(self, ranges):
89 return sha1().hexdigest()
90
Doug Zongkerfc44a512014-08-26 13:10:25 -070091 def ReadRangeSet(self, ranges):
92 return ()
Tao Bao183e56e2017-03-05 17:05:09 -080093
Tao Bao68658c02015-06-01 13:40:49 -070094 def TotalSha1(self, include_clobbered_blocks=False):
95 # EmptyImage always carries empty clobbered_blocks, so
96 # include_clobbered_blocks can be ignored.
97 assert self.clobbered_blocks.size() == 0
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070098 return sha1().hexdigest()
99
Tao Bao183e56e2017-03-05 17:05:09 -0800100 def WriteRangeDataToFd(self, ranges, fd):
101 raise ValueError("Can't write data from EmptyImage to file")
102
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700103
Dan Albert8b72aef2015-03-23 19:13:21 -0700104class DataImage(Image):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700105 """An image wrapped around a single string of data."""
106
107 def __init__(self, data, trim=False, pad=False):
108 self.data = data
109 self.blocksize = 4096
110
111 assert not (trim and pad)
112
113 partial = len(self.data) % self.blocksize
Tao Bao7589e962015-09-05 20:35:32 -0700114 padded = False
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700115 if partial > 0:
116 if trim:
117 self.data = self.data[:-partial]
118 elif pad:
119 self.data += '\0' * (self.blocksize - partial)
Tao Bao7589e962015-09-05 20:35:32 -0700120 padded = True
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700121 else:
122 raise ValueError(("data for DataImage must be multiple of %d bytes "
123 "unless trim or pad is specified") %
124 (self.blocksize,))
125
126 assert len(self.data) % self.blocksize == 0
127
128 self.total_blocks = len(self.data) / self.blocksize
129 self.care_map = RangeSet(data=(0, self.total_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700130 # When the last block is padded, we always write the whole block even for
131 # incremental OTAs. Because otherwise the last block may get skipped if
132 # unchanged for an incremental, but would fail the post-install
133 # verification if it has non-zero contents in the padding bytes.
134 # Bug: 23828506
135 if padded:
Tao Bao42206c32015-09-08 13:39:40 -0700136 clobbered_blocks = [self.total_blocks-1, self.total_blocks]
Tao Bao7589e962015-09-05 20:35:32 -0700137 else:
Tao Bao42206c32015-09-08 13:39:40 -0700138 clobbered_blocks = []
139 self.clobbered_blocks = clobbered_blocks
Tao Baoe9b61912015-07-09 17:37:49 -0700140 self.extended = RangeSet()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700141
142 zero_blocks = []
143 nonzero_blocks = []
144 reference = '\0' * self.blocksize
145
Tao Bao7589e962015-09-05 20:35:32 -0700146 for i in range(self.total_blocks-1 if padded else self.total_blocks):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700147 d = self.data[i*self.blocksize : (i+1)*self.blocksize]
148 if d == reference:
149 zero_blocks.append(i)
150 zero_blocks.append(i+1)
151 else:
152 nonzero_blocks.append(i)
153 nonzero_blocks.append(i+1)
154
Tao Bao42206c32015-09-08 13:39:40 -0700155 assert zero_blocks or nonzero_blocks or clobbered_blocks
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700156
Tao Bao42206c32015-09-08 13:39:40 -0700157 self.file_map = dict()
158 if zero_blocks:
159 self.file_map["__ZERO"] = RangeSet(data=zero_blocks)
160 if nonzero_blocks:
161 self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)
162 if clobbered_blocks:
163 self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)
Tao Bao7589e962015-09-05 20:35:32 -0700164
Tao Bao183e56e2017-03-05 17:05:09 -0800165 def _GetRangeData(self, ranges):
166 for s, e in ranges:
167 yield self.data[s*self.blocksize:e*self.blocksize]
168
169 def RangeSha1(self, ranges):
170 h = sha1()
Tao Bao76def242017-11-21 09:25:31 -0800171 for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable
Tao Bao183e56e2017-03-05 17:05:09 -0800172 h.update(data)
173 return h.hexdigest()
174
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700175 def ReadRangeSet(self, ranges):
Tao Bao183e56e2017-03-05 17:05:09 -0800176 return [self._GetRangeData(ranges)]
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700177
Tao Bao68658c02015-06-01 13:40:49 -0700178 def TotalSha1(self, include_clobbered_blocks=False):
Tao Bao7589e962015-09-05 20:35:32 -0700179 if not include_clobbered_blocks:
Tao Bao183e56e2017-03-05 17:05:09 -0800180 return self.RangeSha1(self.care_map.subtract(self.clobbered_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700181 else:
182 return sha1(self.data).hexdigest()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700183
Tao Bao183e56e2017-03-05 17:05:09 -0800184 def WriteRangeDataToFd(self, ranges, fd):
Tao Bao76def242017-11-21 09:25:31 -0800185 for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable
Tao Bao183e56e2017-03-05 17:05:09 -0800186 fd.write(data)
187
Doug Zongkerfc44a512014-08-26 13:10:25 -0700188
189class Transfer(object):
Tao Bao183e56e2017-03-05 17:05:09 -0800190 def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, tgt_sha1,
191 src_sha1, style, by_id):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700192 self.tgt_name = tgt_name
193 self.src_name = src_name
194 self.tgt_ranges = tgt_ranges
195 self.src_ranges = src_ranges
Tao Bao183e56e2017-03-05 17:05:09 -0800196 self.tgt_sha1 = tgt_sha1
197 self.src_sha1 = src_sha1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700198 self.style = style
Tao Baob8c87172015-03-19 19:42:12 -0700199
200 # We use OrderedDict rather than dict so that the output is repeatable;
201 # otherwise it would depend on the hash values of the Transfer objects.
202 self.goes_before = OrderedDict()
203 self.goes_after = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700204
Doug Zongker62338182014-09-08 08:29:55 -0700205 self.stash_before = []
206 self.use_stash = []
207
Doug Zongkerfc44a512014-08-26 13:10:25 -0700208 self.id = len(by_id)
209 by_id.append(self)
210
xunchang21531832018-12-06 14:20:05 -0800211 self._patch_info = None
Tianjie Xu25366072017-09-08 17:19:02 -0700212
213 @property
xunchang21531832018-12-06 14:20:05 -0800214 def patch_info(self):
215 return self._patch_info
Tianjie Xu25366072017-09-08 17:19:02 -0700216
xunchang21531832018-12-06 14:20:05 -0800217 @patch_info.setter
218 def patch_info(self, info):
219 if info:
Tianjie Xu25366072017-09-08 17:19:02 -0700220 assert self.style == "diff"
xunchang21531832018-12-06 14:20:05 -0800221 self._patch_info = info
Tianjie Xu25366072017-09-08 17:19:02 -0700222
Doug Zongker62338182014-09-08 08:29:55 -0700223 def NetStashChange(self):
224 return (sum(sr.size() for (_, sr) in self.stash_before) -
225 sum(sr.size() for (_, sr) in self.use_stash))
226
Tao Bao82c47982015-08-17 09:45:13 -0700227 def ConvertToNew(self):
228 assert self.style != "new"
229 self.use_stash = []
230 self.style = "new"
231 self.src_ranges = RangeSet()
xunchang21531832018-12-06 14:20:05 -0800232 self.patch_info = None
Tao Bao82c47982015-08-17 09:45:13 -0700233
Doug Zongkerfc44a512014-08-26 13:10:25 -0700234 def __str__(self):
235 return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
236 " to " + str(self.tgt_ranges) + ">")
237
238
Doug Zongker6ab2a502016-02-09 08:28:09 -0800239@functools.total_ordering
240class HeapItem(object):
241 def __init__(self, item):
242 self.item = item
Tao Bao186ec992017-12-23 11:50:52 -0800243 # Negate the score since python's heap is a min-heap and we want the
244 # maximum score.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800245 self.score = -item.score
Tao Bao186ec992017-12-23 11:50:52 -0800246
Doug Zongker6ab2a502016-02-09 08:28:09 -0800247 def clear(self):
248 self.item = None
Tao Bao186ec992017-12-23 11:50:52 -0800249
Doug Zongker6ab2a502016-02-09 08:28:09 -0800250 def __bool__(self):
Tao Bao186ec992017-12-23 11:50:52 -0800251 return self.item is not None
252
253 # Python 2 uses __nonzero__, while Python 3 uses __bool__.
254 __nonzero__ = __bool__
255
256 # The rest operations are generated by functools.total_ordering decorator.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800257 def __eq__(self, other):
258 return self.score == other.score
Tao Bao186ec992017-12-23 11:50:52 -0800259
Doug Zongker6ab2a502016-02-09 08:28:09 -0800260 def __le__(self, other):
261 return self.score <= other.score
262
263
Tao Bao294651d2018-02-08 23:21:52 -0800264class ImgdiffStats(object):
265 """A class that collects imgdiff stats.
266
267 It keeps track of the files that will be applied imgdiff while generating
268 BlockImageDiff. It also logs the ones that cannot use imgdiff, with specific
269 reasons. The stats is only meaningful when imgdiff not being disabled by the
270 caller of BlockImageDiff. In addition, only files with supported types
271 (BlockImageDiff.FileTypeSupportedByImgdiff()) are allowed to be logged.
Tao Bao294651d2018-02-08 23:21:52 -0800272 """
273
274 USED_IMGDIFF = "APK files diff'd with imgdiff"
275 USED_IMGDIFF_LARGE_APK = "Large APK files split and diff'd with imgdiff"
276
277 # Reasons for not applying imgdiff on APKs.
Tao Bao294651d2018-02-08 23:21:52 -0800278 SKIPPED_NONMONOTONIC = "Not used imgdiff due to having non-monotonic ranges"
Tao Baoe709b092018-02-07 12:40:00 -0800279 SKIPPED_SHARED_BLOCKS = "Not used imgdiff due to using shared blocks"
Tao Bao4ccea852018-02-06 15:16:41 -0800280 SKIPPED_INCOMPLETE = "Not used imgdiff due to incomplete RangeSet"
Tao Bao294651d2018-02-08 23:21:52 -0800281
282 # The list of valid reasons, which will also be the dumped order in a report.
283 REASONS = (
284 USED_IMGDIFF,
285 USED_IMGDIFF_LARGE_APK,
Tao Bao294651d2018-02-08 23:21:52 -0800286 SKIPPED_NONMONOTONIC,
Tao Baoe709b092018-02-07 12:40:00 -0800287 SKIPPED_SHARED_BLOCKS,
Tao Bao4ccea852018-02-06 15:16:41 -0800288 SKIPPED_INCOMPLETE,
Tao Bao294651d2018-02-08 23:21:52 -0800289 )
290
291 def __init__(self):
292 self.stats = {}
293
294 def Log(self, filename, reason):
295 """Logs why imgdiff can or cannot be applied to the given filename.
296
297 Args:
298 filename: The filename string.
299 reason: One of the reason constants listed in REASONS.
300
301 Raises:
302 AssertionError: On unsupported filetypes or invalid reason.
303 """
304 assert BlockImageDiff.FileTypeSupportedByImgdiff(filename)
305 assert reason in self.REASONS
306
307 if reason not in self.stats:
308 self.stats[reason] = set()
309 self.stats[reason].add(filename)
310
311 def Report(self):
312 """Prints a report of the collected imgdiff stats."""
313
314 def print_header(header, separator):
Tao Bao32fcdab2018-10-12 10:30:39 -0700315 logger.info(header)
316 logger.info(separator * len(header) + '\n')
Tao Bao294651d2018-02-08 23:21:52 -0800317
318 print_header(' Imgdiff Stats Report ', '=')
319 for key in self.REASONS:
320 if key not in self.stats:
321 continue
322 values = self.stats[key]
323 section_header = ' {} (count: {}) '.format(key, len(values))
324 print_header(section_header, '-')
Tao Bao32fcdab2018-10-12 10:30:39 -0700325 logger.info(''.join([' {}\n'.format(name) for name in values]))
Tao Bao294651d2018-02-08 23:21:52 -0800326
327
Doug Zongkerfc44a512014-08-26 13:10:25 -0700328class BlockImageDiff(object):
Tao Bao76def242017-11-21 09:25:31 -0800329 """Generates the diff of two block image objects.
330
331 BlockImageDiff works on two image objects. An image object is anything that
332 provides the following attributes:
333
334 blocksize: the size in bytes of a block, currently must be 4096.
335
336 total_blocks: the total size of the partition/image, in blocks.
337
338 care_map: a RangeSet containing which blocks (in the range [0,
339 total_blocks) we actually care about; i.e. which blocks contain data.
340
341 file_map: a dict that partitions the blocks contained in care_map into
342 smaller domains that are useful for doing diffs on. (Typically a domain
343 is a file, and the key in file_map is the pathname.)
344
345 clobbered_blocks: a RangeSet containing which blocks contain data but may
346 be altered by the FS. They need to be excluded when verifying the
347 partition integrity.
348
349 ReadRangeSet(): a function that takes a RangeSet and returns the data
350 contained in the image blocks of that RangeSet. The data is returned as
351 a list or tuple of strings; concatenating the elements together should
352 produce the requested data. Implementations are free to break up the
353 data into list/tuple elements in any way that is convenient.
354
355 RangeSha1(): a function that returns (as a hex string) the SHA-1 hash of
356 all the data in the specified range.
357
358 TotalSha1(): a function that returns (as a hex string) the SHA-1 hash of
359 all the data in the image (ie, all the blocks in the care_map minus
360 clobbered_blocks, or including the clobbered blocks if
361 include_clobbered_blocks is True).
362
363 When creating a BlockImageDiff, the src image may be None, in which case the
364 list of transfers produced will never read from the original image.
365 """
366
Tao Bao293fd132016-06-11 12:19:23 -0700367 def __init__(self, tgt, src=None, threads=None, version=4,
368 disable_imgdiff=False):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700369 if threads is None:
370 threads = multiprocessing.cpu_count() // 2
Dan Albert8b72aef2015-03-23 19:13:21 -0700371 if threads == 0:
372 threads = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700373 self.threads = threads
Doug Zongker62338182014-09-08 08:29:55 -0700374 self.version = version
Dan Albert8b72aef2015-03-23 19:13:21 -0700375 self.transfers = []
376 self.src_basenames = {}
377 self.src_numpatterns = {}
Tao Baob4cfca52016-02-04 14:26:02 -0800378 self._max_stashed_size = 0
Tao Baod522bdc2016-04-12 15:53:16 -0700379 self.touched_src_ranges = RangeSet()
380 self.touched_src_sha1 = None
Tao Bao293fd132016-06-11 12:19:23 -0700381 self.disable_imgdiff = disable_imgdiff
Tao Bao294651d2018-02-08 23:21:52 -0800382 self.imgdiff_stats = ImgdiffStats() if not disable_imgdiff else None
Doug Zongker62338182014-09-08 08:29:55 -0700383
Tao Bao8fad03e2017-03-01 14:36:26 -0800384 assert version in (3, 4)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700385
386 self.tgt = tgt
387 if src is None:
388 src = EmptyImage()
389 self.src = src
390
391 # The updater code that installs the patch always uses 4k blocks.
392 assert tgt.blocksize == 4096
393 assert src.blocksize == 4096
394
395 # The range sets in each filemap should comprise a partition of
396 # the care map.
397 self.AssertPartition(src.care_map, src.file_map.values())
398 self.AssertPartition(tgt.care_map, tgt.file_map.values())
399
Tao Baob4cfca52016-02-04 14:26:02 -0800400 @property
401 def max_stashed_size(self):
402 return self._max_stashed_size
403
Tao Baocb73aed2018-01-31 17:32:40 -0800404 @staticmethod
405 def FileTypeSupportedByImgdiff(filename):
406 """Returns whether the file type is supported by imgdiff."""
407 return filename.lower().endswith(('.apk', '.jar', '.zip'))
408
Tao Bao294651d2018-02-08 23:21:52 -0800409 def CanUseImgdiff(self, name, tgt_ranges, src_ranges, large_apk=False):
Tao Baocb73aed2018-01-31 17:32:40 -0800410 """Checks whether we can apply imgdiff for the given RangeSets.
411
412 For files in ZIP format (e.g., APKs, JARs, etc.) we would like to use
413 'imgdiff -z' if possible. Because it usually produces significantly smaller
414 patches than bsdiff.
415
416 This is permissible if all of the following conditions hold.
417 - The imgdiff hasn't been disabled by the caller (e.g. squashfs);
418 - The file type is supported by imgdiff;
419 - The source and target blocks are monotonic (i.e. the data is stored with
420 blocks in increasing order);
Tao Baoe709b092018-02-07 12:40:00 -0800421 - Both files don't contain shared blocks;
Tao Bao4ccea852018-02-06 15:16:41 -0800422 - Both files have complete lists of blocks;
Tao Baocb73aed2018-01-31 17:32:40 -0800423 - We haven't removed any blocks from the source set.
424
425 If all these conditions are satisfied, concatenating all the blocks in the
426 RangeSet in order will produce a valid ZIP file (plus possibly extra zeros
427 in the last block). imgdiff is fine with extra zeros at the end of the file.
428
429 Args:
430 name: The filename to be diff'd.
431 tgt_ranges: The target RangeSet.
432 src_ranges: The source RangeSet.
Tao Bao294651d2018-02-08 23:21:52 -0800433 large_apk: Whether this is to split a large APK.
Tao Baocb73aed2018-01-31 17:32:40 -0800434
435 Returns:
436 A boolean result.
437 """
Tao Bao508b0872018-02-09 13:44:43 -0800438 if self.disable_imgdiff or not self.FileTypeSupportedByImgdiff(name):
Tao Bao294651d2018-02-08 23:21:52 -0800439 return False
440
441 if not tgt_ranges.monotonic or not src_ranges.monotonic:
442 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_NONMONOTONIC)
443 return False
444
Tao Baoe709b092018-02-07 12:40:00 -0800445 if (tgt_ranges.extra.get('uses_shared_blocks') or
446 src_ranges.extra.get('uses_shared_blocks')):
447 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_SHARED_BLOCKS)
448 return False
449
Tao Bao4ccea852018-02-06 15:16:41 -0800450 if tgt_ranges.extra.get('incomplete') or src_ranges.extra.get('incomplete'):
451 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_INCOMPLETE)
452 return False
453
Tao Bao294651d2018-02-08 23:21:52 -0800454 reason = (ImgdiffStats.USED_IMGDIFF_LARGE_APK if large_apk
455 else ImgdiffStats.USED_IMGDIFF)
456 self.imgdiff_stats.Log(name, reason)
457 return True
Tao Baocb73aed2018-01-31 17:32:40 -0800458
Doug Zongkerfc44a512014-08-26 13:10:25 -0700459 def Compute(self, prefix):
460 # When looking for a source file to use as the diff input for a
461 # target file, we try:
462 # 1) an exact path match if available, otherwise
463 # 2) a exact basename match if available, otherwise
464 # 3) a basename match after all runs of digits are replaced by
465 # "#" if available, otherwise
466 # 4) we have no source for this target.
467 self.AbbreviateSourceNames()
468 self.FindTransfers()
469
xunchang21531832018-12-06 14:20:05 -0800470 self.FindSequenceForTransfers()
Doug Zongker62338182014-09-08 08:29:55 -0700471
Tao Bao82c47982015-08-17 09:45:13 -0700472 # Ensure the runtime stash size is under the limit.
Tao Bao8fad03e2017-03-01 14:36:26 -0800473 if common.OPTIONS.cache_size is not None:
xunchang3df4d5e2018-12-06 15:03:45 -0800474 stash_limit = (common.OPTIONS.cache_size *
475 common.OPTIONS.stash_threshold / self.tgt.blocksize)
476 # Ignore the stash limit and calculate the maximum simultaneously stashed
477 # blocks needed.
478 _, max_stashed_blocks = self.ReviseStashSize(ignore_stash_limit=True)
479
480 # We cannot stash more blocks than the stash limit simultaneously. As a
481 # result, some 'diff' commands will be converted to new; leading to an
482 # unintended large package. To mitigate this issue, we can carefully
483 # choose the transfers for conversion. The number '1024' can be further
484 # tweaked here to balance the package size and build time.
485 if max_stashed_blocks > stash_limit + 1024:
xunchangb6105dc2018-12-06 16:39:46 -0800486 self.SelectAndConvertDiffTransfersToNew(
487 max_stashed_blocks - stash_limit)
xunchang3df4d5e2018-12-06 15:03:45 -0800488 # Regenerate the sequence as the graph has changed.
489 self.FindSequenceForTransfers()
490
491 # Revise the stash size again to keep the size under limit.
Tao Bao82c47982015-08-17 09:45:13 -0700492 self.ReviseStashSize()
493
Doug Zongkerfc44a512014-08-26 13:10:25 -0700494 # Double-check our work.
495 self.AssertSequenceGood()
Tianjie Xu8a7ed9f2018-01-23 14:06:11 -0800496 self.AssertSha1Good()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700497
498 self.ComputePatches(prefix)
499 self.WriteTransfers(prefix)
500
Tao Bao294651d2018-02-08 23:21:52 -0800501 # Report the imgdiff stats.
Tao Bao32fcdab2018-10-12 10:30:39 -0700502 if not self.disable_imgdiff:
Tao Bao294651d2018-02-08 23:21:52 -0800503 self.imgdiff_stats.Report()
504
Doug Zongkerfc44a512014-08-26 13:10:25 -0700505 def WriteTransfers(self, prefix):
Tianjie Xu37e29302016-06-23 16:10:35 -0700506 def WriteSplitTransfers(out, style, target_blocks):
507 """Limit the size of operand in command 'new' and 'zero' to 1024 blocks.
Tianjie Xubb848c52016-06-21 15:54:09 -0700508
509 This prevents the target size of one command from being too large; and
510 might help to avoid fsync errors on some devices."""
511
Tao Bao3a2e3502016-12-28 09:14:39 -0800512 assert style == "new" or style == "zero"
Tianjie Xu37e29302016-06-23 16:10:35 -0700513 blocks_limit = 1024
Tianjie Xubb848c52016-06-21 15:54:09 -0700514 total = 0
Tianjie Xu37e29302016-06-23 16:10:35 -0700515 while target_blocks:
516 blocks_to_write = target_blocks.first(blocks_limit)
517 out.append("%s %s\n" % (style, blocks_to_write.to_string_raw()))
518 total += blocks_to_write.size()
519 target_blocks = target_blocks.subtract(blocks_to_write)
Tianjie Xubb848c52016-06-21 15:54:09 -0700520 return total
521
Doug Zongkerfc44a512014-08-26 13:10:25 -0700522 out = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700523 total = 0
Doug Zongkerfc44a512014-08-26 13:10:25 -0700524
Tao Bao3a2e3502016-12-28 09:14:39 -0800525 # In BBOTA v3+, it uses the hash of the stashed blocks as the stash slot
526 # id. 'stashes' records the map from 'hash' to the ref count. The stash
527 # will be freed only if the count decrements to zero.
Doug Zongker62338182014-09-08 08:29:55 -0700528 stashes = {}
529 stashed_blocks = 0
530 max_stashed_blocks = 0
531
Doug Zongkerfc44a512014-08-26 13:10:25 -0700532 for xf in self.transfers:
533
Tao Bao8fad03e2017-03-01 14:36:26 -0800534 for _, sr in xf.stash_before:
535 sh = self.src.RangeSha1(sr)
536 if sh in stashes:
537 stashes[sh] += 1
Sami Tolvanendd67a292014-12-09 16:40:34 +0000538 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800539 stashes[sh] = 1
540 stashed_blocks += sr.size()
541 self.touched_src_ranges = self.touched_src_ranges.union(sr)
542 out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
Doug Zongker62338182014-09-08 08:29:55 -0700543
544 if stashed_blocks > max_stashed_blocks:
545 max_stashed_blocks = stashed_blocks
546
Jesse Zhao7b985f62015-03-02 16:53:08 -0800547 free_string = []
caozhiyuan21b37d82015-10-21 15:14:03 +0800548 free_size = 0
Jesse Zhao7b985f62015-03-02 16:53:08 -0800549
Tao Bao8fad03e2017-03-01 14:36:26 -0800550 # <# blocks> <src ranges>
551 # OR
552 # <# blocks> <src ranges> <src locs> <stash refs...>
553 # OR
554 # <# blocks> - <stash refs...>
Doug Zongker62338182014-09-08 08:29:55 -0700555
Tao Bao8fad03e2017-03-01 14:36:26 -0800556 size = xf.src_ranges.size()
Tao Bao508b0872018-02-09 13:44:43 -0800557 src_str_buffer = [str(size)]
Doug Zongker62338182014-09-08 08:29:55 -0700558
Tao Bao8fad03e2017-03-01 14:36:26 -0800559 unstashed_src_ranges = xf.src_ranges
560 mapped_stashes = []
561 for _, sr in xf.use_stash:
562 unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
563 sh = self.src.RangeSha1(sr)
564 sr = xf.src_ranges.map_within(sr)
565 mapped_stashes.append(sr)
566 assert sh in stashes
Tao Bao508b0872018-02-09 13:44:43 -0800567 src_str_buffer.append("%s:%s" % (sh, sr.to_string_raw()))
Tao Bao8fad03e2017-03-01 14:36:26 -0800568 stashes[sh] -= 1
569 if stashes[sh] == 0:
570 free_string.append("free %s\n" % (sh,))
571 free_size += sr.size()
572 stashes.pop(sh)
Doug Zongker62338182014-09-08 08:29:55 -0700573
Tao Bao8fad03e2017-03-01 14:36:26 -0800574 if unstashed_src_ranges:
Tao Bao508b0872018-02-09 13:44:43 -0800575 src_str_buffer.insert(1, unstashed_src_ranges.to_string_raw())
Tao Bao8fad03e2017-03-01 14:36:26 -0800576 if xf.use_stash:
577 mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
Tao Bao508b0872018-02-09 13:44:43 -0800578 src_str_buffer.insert(2, mapped_unstashed.to_string_raw())
Tao Bao8fad03e2017-03-01 14:36:26 -0800579 mapped_stashes.append(mapped_unstashed)
Doug Zongker62338182014-09-08 08:29:55 -0700580 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Tao Bao8fad03e2017-03-01 14:36:26 -0800581 else:
Tao Bao508b0872018-02-09 13:44:43 -0800582 src_str_buffer.insert(1, "-")
Tao Bao8fad03e2017-03-01 14:36:26 -0800583 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Doug Zongker62338182014-09-08 08:29:55 -0700584
Tao Bao508b0872018-02-09 13:44:43 -0800585 src_str = " ".join(src_str_buffer)
Doug Zongker62338182014-09-08 08:29:55 -0700586
Tao Bao8fad03e2017-03-01 14:36:26 -0800587 # version 3+:
Doug Zongker62338182014-09-08 08:29:55 -0700588 # zero <rangeset>
589 # new <rangeset>
590 # erase <rangeset>
Dan Albert8b72aef2015-03-23 19:13:21 -0700591 # bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
592 # imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
593 # move hash <tgt rangeset> <src_str>
Doug Zongkerfc44a512014-08-26 13:10:25 -0700594
595 tgt_size = xf.tgt_ranges.size()
596
597 if xf.style == "new":
598 assert xf.tgt_ranges
Tianjie Xu37e29302016-06-23 16:10:35 -0700599 assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700600 total += tgt_size
601 elif xf.style == "move":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700602 assert xf.tgt_ranges
603 assert xf.src_ranges.size() == tgt_size
604 if xf.src_ranges != xf.tgt_ranges:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100605 # take into account automatic stashing of overlapping blocks
606 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700607 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100608 if temp_stash_usage > max_stashed_blocks:
609 max_stashed_blocks = temp_stash_usage
610
Tao Baod522bdc2016-04-12 15:53:16 -0700611 self.touched_src_ranges = self.touched_src_ranges.union(
612 xf.src_ranges)
613
Tao Bao8fad03e2017-03-01 14:36:26 -0800614 out.append("%s %s %s %s\n" % (
Sami Tolvanendd67a292014-12-09 16:40:34 +0000615 xf.style,
Tao Bao183e56e2017-03-05 17:05:09 -0800616 xf.tgt_sha1,
Dan Albert8b72aef2015-03-23 19:13:21 -0700617 xf.tgt_ranges.to_string_raw(), src_str))
Tao Bao8fad03e2017-03-01 14:36:26 -0800618 total += tgt_size
619 elif xf.style in ("bsdiff", "imgdiff"):
620 assert xf.tgt_ranges
621 assert xf.src_ranges
622 # take into account automatic stashing of overlapping blocks
623 if xf.src_ranges.overlaps(xf.tgt_ranges):
624 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
625 if temp_stash_usage > max_stashed_blocks:
626 max_stashed_blocks = temp_stash_usage
627
628 self.touched_src_ranges = self.touched_src_ranges.union(xf.src_ranges)
629
630 out.append("%s %d %d %s %s %s %s\n" % (
631 xf.style,
632 xf.patch_start, xf.patch_len,
633 xf.src_sha1,
634 xf.tgt_sha1,
635 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700636 total += tgt_size
637 elif xf.style == "zero":
638 assert xf.tgt_ranges
639 to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
Tianjie Xu37e29302016-06-23 16:10:35 -0700640 assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size()
Tianjie Xubb848c52016-06-21 15:54:09 -0700641 total += to_zero.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700642 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700643 raise ValueError("unknown transfer style '%s'\n" % xf.style)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700644
Sami Tolvanendd67a292014-12-09 16:40:34 +0000645 if free_string:
646 out.append("".join(free_string))
caozhiyuan21b37d82015-10-21 15:14:03 +0800647 stashed_blocks -= free_size
Sami Tolvanendd67a292014-12-09 16:40:34 +0000648
Tao Bao8fad03e2017-03-01 14:36:26 -0800649 if common.OPTIONS.cache_size is not None:
Tao Bao8dcf7382015-05-21 14:09:49 -0700650 # Sanity check: abort if we're going to need more stash space than
651 # the allowed size (cache_size * threshold). There are two purposes
652 # of having a threshold here. a) Part of the cache may have been
653 # occupied by some recovery logs. b) It will buy us some time to deal
654 # with the oversize issue.
655 cache_size = common.OPTIONS.cache_size
656 stash_threshold = common.OPTIONS.stash_threshold
657 max_allowed = cache_size * stash_threshold
Tao Baoe8c68a02017-02-26 10:48:11 -0800658 assert max_stashed_blocks * self.tgt.blocksize <= max_allowed, \
Tao Bao8dcf7382015-05-21 14:09:49 -0700659 'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
660 max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
661 self.tgt.blocksize, max_allowed, cache_size,
662 stash_threshold)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700663
Tao Bao8fad03e2017-03-01 14:36:26 -0800664 self.touched_src_sha1 = self.src.RangeSha1(self.touched_src_ranges)
Tao Baod522bdc2016-04-12 15:53:16 -0700665
Tianjie Xu67c7cbb2018-08-30 00:32:07 -0700666 if self.tgt.hashtree_info:
667 out.append("compute_hash_tree {} {} {} {} {}\n".format(
668 self.tgt.hashtree_info.hashtree_range.to_string_raw(),
669 self.tgt.hashtree_info.filesystem_range.to_string_raw(),
670 self.tgt.hashtree_info.hash_algorithm,
671 self.tgt.hashtree_info.salt,
672 self.tgt.hashtree_info.root_hash))
673
Tao Baoe9b61912015-07-09 17:37:49 -0700674 # Zero out extended blocks as a workaround for bug 20881595.
675 if self.tgt.extended:
Tianjie Xu37e29302016-06-23 16:10:35 -0700676 assert (WriteSplitTransfers(out, "zero", self.tgt.extended) ==
Tianjie Xubb848c52016-06-21 15:54:09 -0700677 self.tgt.extended.size())
Tao Baob32d56e2015-09-09 11:55:01 -0700678 total += self.tgt.extended.size()
Tao Baoe9b61912015-07-09 17:37:49 -0700679
680 # We erase all the blocks on the partition that a) don't contain useful
Tao Bao66f1fa62016-05-03 10:02:01 -0700681 # data in the new image; b) will not be touched by dm-verity. Out of those
682 # blocks, we erase the ones that won't be used in this update at the
683 # beginning of an update. The rest would be erased at the end. This is to
684 # work around the eMMC issue observed on some devices, which may otherwise
685 # get starving for clean blocks and thus fail the update. (b/28347095)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700686 all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
Tao Baoe9b61912015-07-09 17:37:49 -0700687 all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
688 new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
Tao Bao66f1fa62016-05-03 10:02:01 -0700689
690 erase_first = new_dontcare.subtract(self.touched_src_ranges)
691 if erase_first:
692 out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))
693
694 erase_last = new_dontcare.subtract(erase_first)
695 if erase_last:
696 out.append("erase %s\n" % (erase_last.to_string_raw(),))
Doug Zongkere985f6f2014-09-09 12:38:47 -0700697
698 out.insert(0, "%d\n" % (self.version,)) # format version number
Tao Baob32d56e2015-09-09 11:55:01 -0700699 out.insert(1, "%d\n" % (total,))
Tao Bao8fad03e2017-03-01 14:36:26 -0800700 # v3+: the number of stash slots is unused.
701 out.insert(2, "0\n")
702 out.insert(3, str(max_stashed_blocks) + "\n")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700703
704 with open(prefix + ".transfer.list", "wb") as f:
705 for i in out:
706 f.write(i)
707
Tao Bao8fad03e2017-03-01 14:36:26 -0800708 self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize
709 OPTIONS = common.OPTIONS
710 if OPTIONS.cache_size is not None:
711 max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
Tao Bao32fcdab2018-10-12 10:30:39 -0700712 logger.info(
713 "max stashed blocks: %d (%d bytes), limit: %d bytes (%.2f%%)\n",
714 max_stashed_blocks, self._max_stashed_size, max_allowed,
715 self._max_stashed_size * 100.0 / max_allowed)
Tao Bao8fad03e2017-03-01 14:36:26 -0800716 else:
Tao Bao32fcdab2018-10-12 10:30:39 -0700717 logger.info(
718 "max stashed blocks: %d (%d bytes), limit: <unknown>\n",
719 max_stashed_blocks, self._max_stashed_size)
Doug Zongker62338182014-09-08 08:29:55 -0700720
xunchang3df4d5e2018-12-06 15:03:45 -0800721 def ReviseStashSize(self, ignore_stash_limit=False):
722 """ Revises the transfers to keep the stash size within the size limit.
723
724 Iterates through the transfer list and calculates the stash size each
725 transfer generates. Converts the affected transfers to new if we reach the
726 stash limit.
727
728 Args:
729 ignore_stash_limit: Ignores the stash limit and calculates the max
730 simultaneous stashed blocks instead. No change will be made to the
731 transfer list with this flag.
732
733 Return:
734 A tuple of (tgt blocks converted to new, max stashed blocks)
735 """
Tao Bao32fcdab2018-10-12 10:30:39 -0700736 logger.info("Revising stash size...")
Tao Baoe27acfd2016-12-16 11:13:55 -0800737 stash_map = {}
Tao Bao82c47982015-08-17 09:45:13 -0700738
739 # Create the map between a stash and its def/use points. For example, for a
Tao Bao3c5a16d2017-02-13 11:42:50 -0800740 # given stash of (raw_id, sr), stash_map[raw_id] = (sr, def_cmd, use_cmd).
Tao Bao82c47982015-08-17 09:45:13 -0700741 for xf in self.transfers:
742 # Command xf defines (stores) all the stashes in stash_before.
Tao Bao3a2e3502016-12-28 09:14:39 -0800743 for stash_raw_id, sr in xf.stash_before:
744 stash_map[stash_raw_id] = (sr, xf)
Tao Bao82c47982015-08-17 09:45:13 -0700745
746 # Record all the stashes command xf uses.
Tao Bao3a2e3502016-12-28 09:14:39 -0800747 for stash_raw_id, _ in xf.use_stash:
748 stash_map[stash_raw_id] += (xf,)
Tao Bao82c47982015-08-17 09:45:13 -0700749
xunchang3df4d5e2018-12-06 15:03:45 -0800750 max_allowed_blocks = None
751 if not ignore_stash_limit:
752 # Compute the maximum blocks available for stash based on /cache size and
753 # the threshold.
754 cache_size = common.OPTIONS.cache_size
755 stash_threshold = common.OPTIONS.stash_threshold
756 max_allowed_blocks = cache_size * stash_threshold / self.tgt.blocksize
Tao Bao82c47982015-08-17 09:45:13 -0700757
Tao Bao3a2e3502016-12-28 09:14:39 -0800758 # See the comments for 'stashes' in WriteTransfers().
Tao Baoe27acfd2016-12-16 11:13:55 -0800759 stashes = {}
Tao Bao82c47982015-08-17 09:45:13 -0700760 stashed_blocks = 0
Tao Bao9a5caf22015-08-25 15:10:10 -0700761 new_blocks = 0
xunchang3df4d5e2018-12-06 15:03:45 -0800762 max_stashed_blocks = 0
Tao Bao82c47982015-08-17 09:45:13 -0700763
764 # Now go through all the commands. Compute the required stash size on the
765 # fly. If a command requires excess stash than available, it deletes the
766 # stash by replacing the command that uses the stash with a "new" command
767 # instead.
768 for xf in self.transfers:
769 replaced_cmds = []
770
771 # xf.stash_before generates explicit stash commands.
Tao Bao3a2e3502016-12-28 09:14:39 -0800772 for stash_raw_id, sr in xf.stash_before:
Tao Baoe27acfd2016-12-16 11:13:55 -0800773 # Check the post-command stashed_blocks.
774 stashed_blocks_after = stashed_blocks
Tao Bao8fad03e2017-03-01 14:36:26 -0800775 sh = self.src.RangeSha1(sr)
776 if sh not in stashes:
Tao Baoe27acfd2016-12-16 11:13:55 -0800777 stashed_blocks_after += sr.size()
Tao Baoe27acfd2016-12-16 11:13:55 -0800778
xunchang3df4d5e2018-12-06 15:03:45 -0800779 if max_allowed_blocks and stashed_blocks_after > max_allowed_blocks:
Tao Bao82c47982015-08-17 09:45:13 -0700780 # We cannot stash this one for a later command. Find out the command
781 # that will use this stash and replace the command with "new".
Tao Bao3a2e3502016-12-28 09:14:39 -0800782 use_cmd = stash_map[stash_raw_id][2]
Tao Bao82c47982015-08-17 09:45:13 -0700783 replaced_cmds.append(use_cmd)
Tao Bao32fcdab2018-10-12 10:30:39 -0700784 logger.info("%10d %9s %s", sr.size(), "explicit", use_cmd)
Tao Bao82c47982015-08-17 09:45:13 -0700785 else:
Tao Bao3c5a16d2017-02-13 11:42:50 -0800786 # Update the stashes map.
Tao Bao8fad03e2017-03-01 14:36:26 -0800787 if sh in stashes:
788 stashes[sh] += 1
Tao Bao3c5a16d2017-02-13 11:42:50 -0800789 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800790 stashes[sh] = 1
Tao Baoe27acfd2016-12-16 11:13:55 -0800791 stashed_blocks = stashed_blocks_after
xunchang3df4d5e2018-12-06 15:03:45 -0800792 max_stashed_blocks = max(max_stashed_blocks, stashed_blocks)
Tao Bao82c47982015-08-17 09:45:13 -0700793
794 # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
795 # ComputePatches(), they both have the style of "diff".
Tao Bao8fad03e2017-03-01 14:36:26 -0800796 if xf.style == "diff":
Tao Bao82c47982015-08-17 09:45:13 -0700797 assert xf.tgt_ranges and xf.src_ranges
798 if xf.src_ranges.overlaps(xf.tgt_ranges):
xunchang3df4d5e2018-12-06 15:03:45 -0800799 if (max_allowed_blocks and
800 stashed_blocks + xf.src_ranges.size() > max_allowed_blocks):
Tao Bao82c47982015-08-17 09:45:13 -0700801 replaced_cmds.append(xf)
Tao Bao32fcdab2018-10-12 10:30:39 -0700802 logger.info("%10d %9s %s", xf.src_ranges.size(), "implicit", xf)
xunchang3df4d5e2018-12-06 15:03:45 -0800803 else:
804 # The whole source ranges will be stashed for implicit stashes.
805 max_stashed_blocks = max(max_stashed_blocks,
806 stashed_blocks + xf.src_ranges.size())
Tao Bao82c47982015-08-17 09:45:13 -0700807
808 # Replace the commands in replaced_cmds with "new"s.
809 for cmd in replaced_cmds:
810 # It no longer uses any commands in "use_stash". Remove the def points
811 # for all those stashes.
Tao Bao3a2e3502016-12-28 09:14:39 -0800812 for stash_raw_id, sr in cmd.use_stash:
813 def_cmd = stash_map[stash_raw_id][1]
814 assert (stash_raw_id, sr) in def_cmd.stash_before
815 def_cmd.stash_before.remove((stash_raw_id, sr))
Tao Bao82c47982015-08-17 09:45:13 -0700816
Tianjie Xuebe39a02016-01-14 14:12:26 -0800817 # Add up blocks that violates space limit and print total number to
818 # screen later.
819 new_blocks += cmd.tgt_ranges.size()
Tao Bao82c47982015-08-17 09:45:13 -0700820 cmd.ConvertToNew()
821
Tao Bao3a2e3502016-12-28 09:14:39 -0800822 # xf.use_stash may generate free commands.
Tao Bao8fad03e2017-03-01 14:36:26 -0800823 for _, sr in xf.use_stash:
824 sh = self.src.RangeSha1(sr)
825 assert sh in stashes
826 stashes[sh] -= 1
827 if stashes[sh] == 0:
Tao Baoe27acfd2016-12-16 11:13:55 -0800828 stashed_blocks -= sr.size()
Tao Bao8fad03e2017-03-01 14:36:26 -0800829 stashes.pop(sh)
Tao Baoe27acfd2016-12-16 11:13:55 -0800830
Tianjie Xuebe39a02016-01-14 14:12:26 -0800831 num_of_bytes = new_blocks * self.tgt.blocksize
Tao Bao32fcdab2018-10-12 10:30:39 -0700832 logger.info(
833 " Total %d blocks (%d bytes) are packed as new blocks due to "
xunchangb6105dc2018-12-06 16:39:46 -0800834 "insufficient cache size. Maximum blocks stashed simultaneously: %d",
835 new_blocks, num_of_bytes, max_stashed_blocks)
xunchang3df4d5e2018-12-06 15:03:45 -0800836 return new_blocks, max_stashed_blocks
Tao Bao9a5caf22015-08-25 15:10:10 -0700837
Doug Zongkerfc44a512014-08-26 13:10:25 -0700838 def ComputePatches(self, prefix):
Tao Bao32fcdab2018-10-12 10:30:39 -0700839 logger.info("Reticulating splines...")
Tao Bao183e56e2017-03-05 17:05:09 -0800840 diff_queue = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700841 patch_num = 0
842 with open(prefix + ".new.dat", "wb") as new_f:
Tao Bao183e56e2017-03-05 17:05:09 -0800843 for index, xf in enumerate(self.transfers):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700844 if xf.style == "zero":
Tao Bao08c85832016-09-19 22:26:30 -0700845 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao32fcdab2018-10-12 10:30:39 -0700846 logger.info(
847 "%10d %10d (%6.2f%%) %7s %s %s", tgt_size, tgt_size, 100.0,
848 xf.style, xf.tgt_name, str(xf.tgt_ranges))
Tao Bao08c85832016-09-19 22:26:30 -0700849
Doug Zongkerfc44a512014-08-26 13:10:25 -0700850 elif xf.style == "new":
Tao Bao183e56e2017-03-05 17:05:09 -0800851 self.tgt.WriteRangeDataToFd(xf.tgt_ranges, new_f)
Tao Bao08c85832016-09-19 22:26:30 -0700852 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao32fcdab2018-10-12 10:30:39 -0700853 logger.info(
854 "%10d %10d (%6.2f%%) %7s %s %s", tgt_size, tgt_size, 100.0,
855 xf.style, xf.tgt_name, str(xf.tgt_ranges))
Tao Bao08c85832016-09-19 22:26:30 -0700856
Doug Zongkerfc44a512014-08-26 13:10:25 -0700857 elif xf.style == "diff":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700858 # We can't compare src and tgt directly because they may have
859 # the same content but be broken up into blocks differently, eg:
860 #
861 # ["he", "llo"] vs ["h", "ello"]
862 #
863 # We want those to compare equal, ideally without having to
864 # actually concatenate the strings (these may be tens of
865 # megabytes).
Tao Bao183e56e2017-03-05 17:05:09 -0800866 if xf.src_sha1 == xf.tgt_sha1:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700867 # These are identical; we don't need to generate a patch,
868 # just issue copy commands on the device.
869 xf.style = "move"
xunchang21531832018-12-06 14:20:05 -0800870 xf.patch_info = None
Tao Bao183e56e2017-03-05 17:05:09 -0800871 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao08c85832016-09-19 22:26:30 -0700872 if xf.src_ranges != xf.tgt_ranges:
Tao Bao32fcdab2018-10-12 10:30:39 -0700873 logger.info(
874 "%10d %10d (%6.2f%%) %7s %s %s (from %s)", tgt_size, tgt_size,
875 100.0, xf.style,
Tao Bao08c85832016-09-19 22:26:30 -0700876 xf.tgt_name if xf.tgt_name == xf.src_name else (
877 xf.tgt_name + " (from " + xf.src_name + ")"),
Tao Bao32fcdab2018-10-12 10:30:39 -0700878 str(xf.tgt_ranges), str(xf.src_ranges))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700879 else:
xunchang21531832018-12-06 14:20:05 -0800880 if xf.patch_info:
881 # We have already generated the patch (e.g. during split of large
882 # APKs or reduction of stash size)
883 imgdiff = xf.patch_info.imgdiff
Tianjie Xu25366072017-09-08 17:19:02 -0700884 else:
Tao Baocb73aed2018-01-31 17:32:40 -0800885 imgdiff = self.CanUseImgdiff(
886 xf.tgt_name, xf.tgt_ranges, xf.src_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700887 xf.style = "imgdiff" if imgdiff else "bsdiff"
Tao Bao183e56e2017-03-05 17:05:09 -0800888 diff_queue.append((index, imgdiff, patch_num))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700889 patch_num += 1
890
891 else:
892 assert False, "unknown style " + xf.style
893
xunchang21531832018-12-06 14:20:05 -0800894 patches = self.ComputePatchesForInputList(diff_queue, False)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700895
Tao Bao183e56e2017-03-05 17:05:09 -0800896 offset = 0
897 with open(prefix + ".patch.dat", "wb") as patch_fd:
xunchang21531832018-12-06 14:20:05 -0800898 for index, patch_info, _ in patches:
Tao Bao183e56e2017-03-05 17:05:09 -0800899 xf = self.transfers[index]
xunchang21531832018-12-06 14:20:05 -0800900 xf.patch_len = len(patch_info.content)
Tao Bao183e56e2017-03-05 17:05:09 -0800901 xf.patch_start = offset
902 offset += xf.patch_len
xunchang21531832018-12-06 14:20:05 -0800903 patch_fd.write(patch_info.content)
Tao Bao183e56e2017-03-05 17:05:09 -0800904
Tao Bao32fcdab2018-10-12 10:30:39 -0700905 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
906 logger.info(
907 "%10d %10d (%6.2f%%) %7s %s %s %s", xf.patch_len, tgt_size,
908 xf.patch_len * 100.0 / tgt_size, xf.style,
909 xf.tgt_name if xf.tgt_name == xf.src_name else (
910 xf.tgt_name + " (from " + xf.src_name + ")"),
911 xf.tgt_ranges, xf.src_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700912
Tianjie Xu8a7ed9f2018-01-23 14:06:11 -0800913 def AssertSha1Good(self):
914 """Check the SHA-1 of the src & tgt blocks in the transfer list.
915
916 Double check the SHA-1 value to avoid the issue in b/71908713, where
917 SparseImage.RangeSha1() messed up with the hash calculation in multi-thread
918 environment. That specific problem has been fixed by protecting the
919 underlying generator function 'SparseImage._GetRangeData()' with lock.
920 """
921 for xf in self.transfers:
922 tgt_sha1 = self.tgt.RangeSha1(xf.tgt_ranges)
923 assert xf.tgt_sha1 == tgt_sha1
924 if xf.style == "diff":
925 src_sha1 = self.src.RangeSha1(xf.src_ranges)
926 assert xf.src_sha1 == src_sha1
927
Doug Zongkerfc44a512014-08-26 13:10:25 -0700928 def AssertSequenceGood(self):
929 # Simulate the sequences of transfers we will output, and check that:
930 # - we never read a block after writing it, and
931 # - we write every block we care about exactly once.
932
933 # Start with no blocks having been touched yet.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800934 touched = array.array("B", "\0" * self.tgt.total_blocks)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700935
936 # Imagine processing the transfers in order.
937 for xf in self.transfers:
938 # Check that the input blocks for this transfer haven't yet been touched.
Doug Zongker62338182014-09-08 08:29:55 -0700939
940 x = xf.src_ranges
Tao Bao8fad03e2017-03-01 14:36:26 -0800941 for _, sr in xf.use_stash:
942 x = x.subtract(sr)
Doug Zongker62338182014-09-08 08:29:55 -0700943
Doug Zongker6ab2a502016-02-09 08:28:09 -0800944 for s, e in x:
Tao Baoff75c232016-03-04 15:23:34 -0800945 # Source image could be larger. Don't check the blocks that are in the
946 # source image only. Since they are not in 'touched', and won't ever
947 # be touched.
948 for i in range(s, min(e, self.tgt.total_blocks)):
Doug Zongker6ab2a502016-02-09 08:28:09 -0800949 assert touched[i] == 0
950
951 # Check that the output blocks for this transfer haven't yet
952 # been touched, and touch all the blocks written by this
953 # transfer.
954 for s, e in xf.tgt_ranges:
955 for i in range(s, e):
956 assert touched[i] == 0
957 touched[i] = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700958
Tianjie Xu67c7cbb2018-08-30 00:32:07 -0700959 if self.tgt.hashtree_info:
960 for s, e in self.tgt.hashtree_info.hashtree_range:
961 for i in range(s, e):
962 assert touched[i] == 0
963 touched[i] = 1
964
Doug Zongkerfc44a512014-08-26 13:10:25 -0700965 # Check that we've written every target block.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800966 for s, e in self.tgt.care_map:
967 for i in range(s, e):
968 assert touched[i] == 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700969
xunchang21531832018-12-06 14:20:05 -0800970 def FindSequenceForTransfers(self):
971 """Finds a sequence for the given transfers.
972
973 The goal is to minimize the violation of order dependencies between these
974 transfers, so that fewer blocks are stashed when applying the update.
975 """
976
977 # Clear the existing dependency between transfers
978 for xf in self.transfers:
979 xf.goes_before = OrderedDict()
980 xf.goes_after = OrderedDict()
981
982 xf.stash_before = []
983 xf.use_stash = []
984
985 # Find the ordering dependencies among transfers (this is O(n^2)
986 # in the number of transfers).
987 self.GenerateDigraph()
988 # Find a sequence of transfers that satisfies as many ordering
989 # dependencies as possible (heuristically).
990 self.FindVertexSequence()
991 # Fix up the ordering dependencies that the sequence didn't
992 # satisfy.
993 self.ReverseBackwardEdges()
994 self.ImproveVertexSequence()
995
Doug Zongker62338182014-09-08 08:29:55 -0700996 def ImproveVertexSequence(self):
Tao Bao32fcdab2018-10-12 10:30:39 -0700997 logger.info("Improving vertex order...")
Doug Zongker62338182014-09-08 08:29:55 -0700998
999 # At this point our digraph is acyclic; we reversed any edges that
1000 # were backwards in the heuristically-generated sequence. The
1001 # previously-generated order is still acceptable, but we hope to
1002 # find a better order that needs less memory for stashed data.
1003 # Now we do a topological sort to generate a new vertex order,
1004 # using a greedy algorithm to choose which vertex goes next
1005 # whenever we have a choice.
1006
1007 # Make a copy of the edge set; this copy will get destroyed by the
1008 # algorithm.
1009 for xf in self.transfers:
1010 xf.incoming = xf.goes_after.copy()
1011 xf.outgoing = xf.goes_before.copy()
1012
1013 L = [] # the new vertex order
1014
1015 # S is the set of sources in the remaining graph; we always choose
1016 # the one that leaves the least amount of stashed data after it's
1017 # executed.
1018 S = [(u.NetStashChange(), u.order, u) for u in self.transfers
1019 if not u.incoming]
1020 heapq.heapify(S)
1021
1022 while S:
1023 _, _, xf = heapq.heappop(S)
1024 L.append(xf)
1025 for u in xf.outgoing:
1026 del u.incoming[xf]
1027 if not u.incoming:
1028 heapq.heappush(S, (u.NetStashChange(), u.order, u))
1029
1030 # if this fails then our graph had a cycle.
1031 assert len(L) == len(self.transfers)
1032
1033 self.transfers = L
1034 for i, xf in enumerate(L):
1035 xf.order = i
1036
Doug Zongker62338182014-09-08 08:29:55 -07001037 def ReverseBackwardEdges(self):
Tao Bao3a2e3502016-12-28 09:14:39 -08001038 """Reverse unsatisfying edges and compute pairs of stashed blocks.
1039
1040 For each transfer, make sure it properly stashes the blocks it touches and
1041 will be used by later transfers. It uses pairs of (stash_raw_id, range) to
1042 record the blocks to be stashed. 'stash_raw_id' is an id that uniquely
1043 identifies each pair. Note that for the same range (e.g. RangeSet("1-5")),
1044 it is possible to have multiple pairs with different 'stash_raw_id's. Each
1045 'stash_raw_id' will be consumed by one transfer. In BBOTA v3+, identical
1046 blocks will be written to the same stash slot in WriteTransfers().
1047 """
1048
Tao Bao32fcdab2018-10-12 10:30:39 -07001049 logger.info("Reversing backward edges...")
Doug Zongker62338182014-09-08 08:29:55 -07001050 in_order = 0
1051 out_of_order = 0
Tao Bao3a2e3502016-12-28 09:14:39 -08001052 stash_raw_id = 0
Doug Zongker62338182014-09-08 08:29:55 -07001053 stash_size = 0
1054
1055 for xf in self.transfers:
Doug Zongker62338182014-09-08 08:29:55 -07001056 for u in xf.goes_before.copy():
1057 # xf should go before u
1058 if xf.order < u.order:
1059 # it does, hurray!
1060 in_order += 1
1061 else:
1062 # it doesn't, boo. modify u to stash the blocks that it
1063 # writes that xf wants to read, and then require u to go
1064 # before xf.
1065 out_of_order += 1
1066
1067 overlap = xf.src_ranges.intersect(u.tgt_ranges)
1068 assert overlap
1069
Tao Bao3a2e3502016-12-28 09:14:39 -08001070 u.stash_before.append((stash_raw_id, overlap))
1071 xf.use_stash.append((stash_raw_id, overlap))
1072 stash_raw_id += 1
Doug Zongker62338182014-09-08 08:29:55 -07001073 stash_size += overlap.size()
1074
1075 # reverse the edge direction; now xf must go after u
1076 del xf.goes_before[u]
1077 del u.goes_after[xf]
1078 xf.goes_after[u] = None # value doesn't matter
1079 u.goes_before[xf] = None
1080
Tao Bao32fcdab2018-10-12 10:30:39 -07001081 logger.info(
1082 " %d/%d dependencies (%.2f%%) were violated; %d source blocks "
1083 "stashed.", out_of_order, in_order + out_of_order,
1084 (out_of_order * 100.0 / (in_order + out_of_order)) if (
1085 in_order + out_of_order) else 0.0,
1086 stash_size)
Doug Zongker62338182014-09-08 08:29:55 -07001087
Doug Zongkerfc44a512014-08-26 13:10:25 -07001088 def FindVertexSequence(self):
Tao Bao32fcdab2018-10-12 10:30:39 -07001089 logger.info("Finding vertex sequence...")
Doug Zongkerfc44a512014-08-26 13:10:25 -07001090
1091 # This is based on "A Fast & Effective Heuristic for the Feedback
1092 # Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
1093 # it as starting with the digraph G and moving all the vertices to
1094 # be on a horizontal line in some order, trying to minimize the
1095 # number of edges that end up pointing to the left. Left-pointing
1096 # edges will get removed to turn the digraph into a DAG. In this
1097 # case each edge has a weight which is the number of source blocks
1098 # we'll lose if that edge is removed; we try to minimize the total
1099 # weight rather than just the number of edges.
1100
1101 # Make a copy of the edge set; this copy will get destroyed by the
1102 # algorithm.
1103 for xf in self.transfers:
1104 xf.incoming = xf.goes_after.copy()
1105 xf.outgoing = xf.goes_before.copy()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001106 xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())
Doug Zongkerfc44a512014-08-26 13:10:25 -07001107
1108 # We use an OrderedDict instead of just a set so that the output
1109 # is repeatable; otherwise it would depend on the hash values of
1110 # the transfer objects.
1111 G = OrderedDict()
1112 for xf in self.transfers:
1113 G[xf] = None
1114 s1 = deque() # the left side of the sequence, built from left to right
1115 s2 = deque() # the right side of the sequence, built from right to left
1116
Doug Zongker6ab2a502016-02-09 08:28:09 -08001117 heap = []
1118 for xf in self.transfers:
1119 xf.heap_item = HeapItem(xf)
1120 heap.append(xf.heap_item)
1121 heapq.heapify(heap)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001122
Tao Bao33482282016-10-24 16:49:08 -07001123 # Use OrderedDict() instead of set() to preserve the insertion order. Need
1124 # to use 'sinks[key] = None' to add key into the set. sinks will look like
1125 # { key1: None, key2: None, ... }.
1126 sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing)
1127 sources = OrderedDict.fromkeys(u for u in G if not u.incoming)
Doug Zongker6ab2a502016-02-09 08:28:09 -08001128
1129 def adjust_score(iu, delta):
1130 iu.score += delta
1131 iu.heap_item.clear()
1132 iu.heap_item = HeapItem(iu)
1133 heapq.heappush(heap, iu.heap_item)
1134
1135 while G:
Doug Zongkerfc44a512014-08-26 13:10:25 -07001136 # Put all sinks at the end of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001137 while sinks:
Tao Bao33482282016-10-24 16:49:08 -07001138 new_sinks = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001139 for u in sinks:
Tao Bao508b0872018-02-09 13:44:43 -08001140 if u not in G:
1141 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001142 s2.appendleft(u)
1143 del G[u]
1144 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001145 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001146 if not iu.outgoing:
1147 new_sinks[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001148 sinks = new_sinks
Doug Zongkerfc44a512014-08-26 13:10:25 -07001149
1150 # Put all the sources at the beginning of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001151 while sources:
Tao Bao33482282016-10-24 16:49:08 -07001152 new_sources = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001153 for u in sources:
Tao Bao508b0872018-02-09 13:44:43 -08001154 if u not in G:
1155 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001156 s1.append(u)
1157 del G[u]
1158 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001159 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001160 if not iu.incoming:
1161 new_sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001162 sources = new_sources
Doug Zongkerfc44a512014-08-26 13:10:25 -07001163
Tao Bao508b0872018-02-09 13:44:43 -08001164 if not G:
1165 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001166
1167 # Find the "best" vertex to put next. "Best" is the one that
1168 # maximizes the net difference in source blocks saved we get by
1169 # pretending it's a source rather than a sink.
1170
Doug Zongker6ab2a502016-02-09 08:28:09 -08001171 while True:
1172 u = heapq.heappop(heap)
1173 if u and u.item in G:
1174 u = u.item
1175 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001176
Doug Zongkerfc44a512014-08-26 13:10:25 -07001177 s1.append(u)
1178 del G[u]
1179 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001180 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001181 if not iu.incoming:
1182 sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001183
Doug Zongkerfc44a512014-08-26 13:10:25 -07001184 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001185 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001186 if not iu.outgoing:
1187 sinks[iu] = None
Doug Zongkerfc44a512014-08-26 13:10:25 -07001188
1189 # Now record the sequence in the 'order' field of each transfer,
1190 # and by rearranging self.transfers to be in the chosen sequence.
1191
1192 new_transfers = []
1193 for x in itertools.chain(s1, s2):
1194 x.order = len(new_transfers)
1195 new_transfers.append(x)
1196 del x.incoming
1197 del x.outgoing
1198
1199 self.transfers = new_transfers
1200
1201 def GenerateDigraph(self):
Tao Bao32fcdab2018-10-12 10:30:39 -07001202 logger.info("Generating digraph...")
Doug Zongker6ab2a502016-02-09 08:28:09 -08001203
1204 # Each item of source_ranges will be:
1205 # - None, if that block is not used as a source,
Tao Bao33482282016-10-24 16:49:08 -07001206 # - an ordered set of transfers.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001207 source_ranges = []
1208 for b in self.transfers:
1209 for s, e in b.src_ranges:
1210 if e > len(source_ranges):
1211 source_ranges.extend([None] * (e-len(source_ranges)))
1212 for i in range(s, e):
1213 if source_ranges[i] is None:
Tao Bao33482282016-10-24 16:49:08 -07001214 source_ranges[i] = OrderedDict.fromkeys([b])
Doug Zongker6ab2a502016-02-09 08:28:09 -08001215 else:
Tao Bao33482282016-10-24 16:49:08 -07001216 source_ranges[i][b] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001217
Doug Zongkerfc44a512014-08-26 13:10:25 -07001218 for a in self.transfers:
Tao Bao33482282016-10-24 16:49:08 -07001219 intersections = OrderedDict()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001220 for s, e in a.tgt_ranges:
1221 for i in range(s, e):
Tao Bao508b0872018-02-09 13:44:43 -08001222 if i >= len(source_ranges):
1223 break
Tao Bao33482282016-10-24 16:49:08 -07001224 # Add all the Transfers in source_ranges[i] to the (ordered) set.
1225 if source_ranges[i] is not None:
1226 for j in source_ranges[i]:
1227 intersections[j] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001228
1229 for b in intersections:
Tao Bao508b0872018-02-09 13:44:43 -08001230 if a is b:
1231 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001232
1233 # If the blocks written by A are read by B, then B needs to go before A.
1234 i = a.tgt_ranges.intersect(b.src_ranges)
1235 if i:
Doug Zongkerab7ca1d2014-08-26 10:40:28 -07001236 if b.src_name == "__ZERO":
1237 # the cost of removing source blocks for the __ZERO domain
1238 # is (nearly) zero.
1239 size = 0
1240 else:
1241 size = i.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001242 b.goes_before[a] = size
1243 a.goes_after[b] = size
1244
xunchang21531832018-12-06 14:20:05 -08001245 def ComputePatchesForInputList(self, diff_queue, compress_target):
1246 """Returns a list of patch information for the input list of transfers.
1247
1248 Args:
1249 diff_queue: a list of transfers with style 'diff'
1250 compress_target: If True, compresses the target ranges of each
1251 transfers; and save the size.
1252
1253 Returns:
1254 A list of (transfer order, patch_info, compressed_size) tuples.
1255 """
1256
1257 if not diff_queue:
1258 return []
1259
1260 if self.threads > 1:
1261 logger.info("Computing patches (using %d threads)...", self.threads)
1262 else:
1263 logger.info("Computing patches...")
1264
1265 diff_total = len(diff_queue)
1266 patches = [None] * diff_total
1267 error_messages = []
1268
1269 # Using multiprocessing doesn't give additional benefits, due to the
1270 # pattern of the code. The diffing work is done by subprocess.call, which
1271 # already runs in a separate process (not affected much by the GIL -
1272 # Global Interpreter Lock). Using multiprocess also requires either a)
1273 # writing the diff input files in the main process before forking, or b)
1274 # reopening the image file (SparseImage) in the worker processes. Doing
1275 # neither of them further improves the performance.
1276 lock = threading.Lock()
1277
1278 def diff_worker():
1279 while True:
1280 with lock:
1281 if not diff_queue:
1282 return
1283 xf_index, imgdiff, patch_index = diff_queue.pop()
1284 xf = self.transfers[xf_index]
1285
1286 message = []
1287 compressed_size = None
1288
1289 patch_info = xf.patch_info
1290 if not patch_info:
1291 src_file = common.MakeTempFile(prefix="src-")
1292 with open(src_file, "wb") as fd:
1293 self.src.WriteRangeDataToFd(xf.src_ranges, fd)
1294
1295 tgt_file = common.MakeTempFile(prefix="tgt-")
1296 with open(tgt_file, "wb") as fd:
1297 self.tgt.WriteRangeDataToFd(xf.tgt_ranges, fd)
1298
1299 try:
1300 patch_info = compute_patch(src_file, tgt_file, imgdiff)
1301 except ValueError as e:
1302 message.append(
1303 "Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (
1304 "imgdiff" if imgdiff else "bsdiff",
1305 xf.tgt_name if xf.tgt_name == xf.src_name else
1306 xf.tgt_name + " (from " + xf.src_name + ")",
1307 xf.tgt_ranges, xf.src_ranges, e.message))
1308
1309 if compress_target:
1310 tgt_data = self.tgt.ReadRangeSet(xf.tgt_ranges)
1311 try:
1312 # Compresses with the default level
1313 compress_obj = zlib.compressobj(6, zlib.DEFLATED, -zlib.MAX_WBITS)
1314 compressed_data = (compress_obj.compress("".join(tgt_data))
1315 + compress_obj.flush())
1316 compressed_size = len(compressed_data)
1317 except zlib.error as e:
1318 message.append(
1319 "Failed to compress the data in target range {} for {}:\n"
1320 "{}".format(xf.tgt_ranges, xf.tgt_name, e.message))
1321
1322 if message:
1323 with lock:
1324 error_messages.extend(message)
1325
1326 with lock:
1327 patches[patch_index] = (xf_index, patch_info, compressed_size)
1328
1329 threads = [threading.Thread(target=diff_worker)
1330 for _ in range(self.threads)]
1331 for th in threads:
1332 th.start()
1333 while threads:
1334 threads.pop().join()
1335
1336 if error_messages:
1337 logger.error('ERROR:')
1338 logger.error('\n'.join(error_messages))
1339 logger.error('\n\n\n')
1340 sys.exit(1)
1341
1342 return patches
1343
xunchangb6105dc2018-12-06 16:39:46 -08001344 def SelectAndConvertDiffTransfersToNew(self, violated_stash_blocks):
xunchang3df4d5e2018-12-06 15:03:45 -08001345 """Converts the diff transfers to reduce the max simultaneous stash.
1346
1347 Since the 'new' data is compressed with deflate, we can select the 'diff'
1348 transfers for conversion by comparing its patch size with the size of the
1349 compressed data. Ideally, we want to convert the transfers with a small
1350 size increase, but using a large number of stashed blocks.
1351 """
xunchangb6105dc2018-12-06 16:39:46 -08001352 TransferSizeScore = namedtuple("TransferSizeScore",
1353 "xf, used_stash_blocks, score")
xunchang3df4d5e2018-12-06 15:03:45 -08001354
1355 logger.info("Selecting diff commands to convert to new.")
1356 diff_queue = []
1357 for xf in self.transfers:
1358 if xf.style == "diff" and xf.src_sha1 != xf.tgt_sha1:
1359 use_imgdiff = self.CanUseImgdiff(xf.tgt_name, xf.tgt_ranges,
1360 xf.src_ranges)
1361 diff_queue.append((xf.order, use_imgdiff, len(diff_queue)))
1362
1363 # Remove the 'move' transfers, and compute the patch & compressed size
1364 # for the remaining.
1365 result = self.ComputePatchesForInputList(diff_queue, True)
1366
xunchangb6105dc2018-12-06 16:39:46 -08001367 conversion_candidates = []
xunchang3df4d5e2018-12-06 15:03:45 -08001368 for xf_index, patch_info, compressed_size in result:
1369 xf = self.transfers[xf_index]
1370 if not xf.patch_info:
1371 xf.patch_info = patch_info
1372
1373 size_ratio = len(xf.patch_info.content) * 100.0 / compressed_size
1374 diff_style = "imgdiff" if xf.patch_info.imgdiff else "bsdiff"
xunchangb6105dc2018-12-06 16:39:46 -08001375 logger.info("%s, target size: %d blocks, style: %s, patch size: %d,"
xunchang3df4d5e2018-12-06 15:03:45 -08001376 " compression_size: %d, ratio %.2f%%", xf.tgt_name,
1377 xf.tgt_ranges.size(), diff_style,
1378 len(xf.patch_info.content), compressed_size, size_ratio)
1379
xunchangb6105dc2018-12-06 16:39:46 -08001380 used_stash_blocks = sum(sr.size() for _, sr in xf.use_stash)
xunchang3df4d5e2018-12-06 15:03:45 -08001381 # Convert the transfer to new if the compressed size is smaller or equal.
1382 # We don't need to maintain the stash_before lists here because the
1383 # graph will be regenerated later.
1384 if len(xf.patch_info.content) >= compressed_size:
xunchangb6105dc2018-12-06 16:39:46 -08001385 # Add the transfer to the candidate list with negative score. And it
1386 # will be converted later.
1387 conversion_candidates.append(TransferSizeScore(xf, used_stash_blocks,
1388 -1))
1389 elif used_stash_blocks > 0:
1390 # This heuristic represents the size increase in the final package to
1391 # remove per unit of stashed data.
1392 score = ((compressed_size - len(xf.patch_info.content)) * 100.0
1393 / used_stash_blocks)
1394 conversion_candidates.append(TransferSizeScore(xf, used_stash_blocks,
1395 score))
1396 # Transfers with lower score (i.e. less expensive to convert) will be
1397 # converted first.
1398 conversion_candidates.sort(key=lambda x: x.score)
xunchang3df4d5e2018-12-06 15:03:45 -08001399
xunchangb6105dc2018-12-06 16:39:46 -08001400 # TODO(xunchang), improve the logic to find the transfers to convert, e.g.
1401 # convert the ones that contribute to the max stash, run ReviseStashSize
1402 # multiple times etc.
1403 removed_stashed_blocks = 0
1404 for xf, used_stash_blocks, _ in conversion_candidates:
1405 logger.info("Converting %s to new", xf.tgt_name)
1406 xf.ConvertToNew()
1407 removed_stashed_blocks += used_stash_blocks
1408 # Experiments show that we will get a smaller package size if we remove
1409 # slightly more stashed blocks than the violated stash blocks.
1410 if removed_stashed_blocks >= violated_stash_blocks:
1411 break
xunchang3df4d5e2018-12-06 15:03:45 -08001412
1413 logger.info("Removed %d stashed blocks", removed_stashed_blocks)
1414
Doug Zongkerfc44a512014-08-26 13:10:25 -07001415 def FindTransfers(self):
Tao Bao9a5caf22015-08-25 15:10:10 -07001416 """Parse the file_map to generate all the transfers."""
1417
Tianjie Xu41390d42017-11-22 11:35:18 -08001418 def AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
1419 src_ranges, style, by_id):
1420 """Add one or multiple Transfer()s by splitting large files.
1421
1422 For BBOTA v3, we need to stash source blocks for resumable feature.
1423 However, with the growth of file size and the shrink of the cache
1424 partition source blocks are too large to be stashed. If a file occupies
1425 too many blocks, we split it into smaller pieces by getting multiple
1426 Transfer()s.
1427
1428 The downside is that after splitting, we may increase the package size
1429 since the split pieces don't align well. According to our experiments,
1430 1/8 of the cache size as the per-piece limit appears to be optimal.
1431 Compared to the fixed 1024-block limit, it reduces the overall package
1432 size by 30% for volantis, and 20% for angler and bullhead."""
1433
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001434 pieces = 0
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001435 while (tgt_ranges.size() > max_blocks_per_transfer and
1436 src_ranges.size() > max_blocks_per_transfer):
Tao Bao9a5caf22015-08-25 15:10:10 -07001437 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1438 src_split_name = "%s-%d" % (src_name, pieces)
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001439 tgt_first = tgt_ranges.first(max_blocks_per_transfer)
1440 src_first = src_ranges.first(max_blocks_per_transfer)
1441
Tao Bao183e56e2017-03-05 17:05:09 -08001442 Transfer(tgt_split_name, src_split_name, tgt_first, src_first,
1443 self.tgt.RangeSha1(tgt_first), self.src.RangeSha1(src_first),
1444 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001445
1446 tgt_ranges = tgt_ranges.subtract(tgt_first)
1447 src_ranges = src_ranges.subtract(src_first)
1448 pieces += 1
1449
1450 # Handle remaining blocks.
1451 if tgt_ranges.size() or src_ranges.size():
1452 # Must be both non-empty.
1453 assert tgt_ranges.size() and src_ranges.size()
1454 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1455 src_split_name = "%s-%d" % (src_name, pieces)
Tao Bao183e56e2017-03-05 17:05:09 -08001456 Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges,
1457 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1458 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001459
Tianjie Xu41390d42017-11-22 11:35:18 -08001460 def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges, style,
1461 by_id):
1462 """Find all the zip files and split the others with a fixed chunk size.
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001463
Tianjie Xu41390d42017-11-22 11:35:18 -08001464 This function will construct a list of zip archives, which will later be
1465 split by imgdiff to reduce the final patch size. For the other files,
1466 we will plainly split them based on a fixed chunk size with the potential
1467 patch size penalty.
1468 """
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001469
1470 assert style == "diff"
1471
1472 # Change nothing for small files.
1473 if (tgt_ranges.size() <= max_blocks_per_transfer and
1474 src_ranges.size() <= max_blocks_per_transfer):
1475 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1476 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1477 style, by_id)
1478 return
1479
Tao Baocb73aed2018-01-31 17:32:40 -08001480 # Split large APKs with imgdiff, if possible. We're intentionally checking
1481 # file types one more time (CanUseImgdiff() checks that as well), before
1482 # calling the costly RangeSha1()s.
1483 if (self.FileTypeSupportedByImgdiff(tgt_name) and
1484 self.tgt.RangeSha1(tgt_ranges) != self.src.RangeSha1(src_ranges)):
Tao Bao294651d2018-02-08 23:21:52 -08001485 if self.CanUseImgdiff(tgt_name, tgt_ranges, src_ranges, True):
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001486 large_apks.append((tgt_name, src_name, tgt_ranges, src_ranges))
1487 return
1488
Tianjie Xu41390d42017-11-22 11:35:18 -08001489 AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
1490 src_ranges, style, by_id)
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001491
Tao Bao08c85832016-09-19 22:26:30 -07001492 def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
1493 split=False):
1494 """Wrapper function for adding a Transfer()."""
1495
1496 # We specialize diff transfers only (which covers bsdiff/imgdiff/move);
1497 # otherwise add the Transfer() as is.
1498 if style != "diff" or not split:
Tao Bao183e56e2017-03-05 17:05:09 -08001499 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1500 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1501 style, by_id)
Tao Bao08c85832016-09-19 22:26:30 -07001502 return
1503
1504 # Handle .odex files specially to analyze the block-wise difference. If
1505 # most of the blocks are identical with only few changes (e.g. header),
1506 # we will patch the changed blocks only. This avoids stashing unchanged
1507 # blocks while patching. We limit the analysis to files without size
1508 # changes only. This is to avoid sacrificing the OTA generation cost too
1509 # much.
1510 if (tgt_name.split(".")[-1].lower() == 'odex' and
1511 tgt_ranges.size() == src_ranges.size()):
1512
1513 # 0.5 threshold can be further tuned. The tradeoff is: if only very
1514 # few blocks remain identical, we lose the opportunity to use imgdiff
1515 # that may have better compression ratio than bsdiff.
1516 crop_threshold = 0.5
1517
1518 tgt_skipped = RangeSet()
1519 src_skipped = RangeSet()
1520 tgt_size = tgt_ranges.size()
1521 tgt_changed = 0
1522 for src_block, tgt_block in zip(src_ranges.next_item(),
1523 tgt_ranges.next_item()):
1524 src_rs = RangeSet(str(src_block))
1525 tgt_rs = RangeSet(str(tgt_block))
1526 if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):
1527 tgt_skipped = tgt_skipped.union(tgt_rs)
1528 src_skipped = src_skipped.union(src_rs)
1529 else:
1530 tgt_changed += tgt_rs.size()
1531
1532 # Terminate early if no clear sign of benefits.
1533 if tgt_changed > tgt_size * crop_threshold:
1534 break
1535
1536 if tgt_changed < tgt_size * crop_threshold:
1537 assert tgt_changed + tgt_skipped.size() == tgt_size
Tao Bao32fcdab2018-10-12 10:30:39 -07001538 logger.info(
1539 '%10d %10d (%6.2f%%) %s', tgt_skipped.size(), tgt_size,
1540 tgt_skipped.size() * 100.0 / tgt_size, tgt_name)
Tianjie Xu41390d42017-11-22 11:35:18 -08001541 AddSplitTransfers(
Tao Bao08c85832016-09-19 22:26:30 -07001542 "%s-skipped" % (tgt_name,),
1543 "%s-skipped" % (src_name,),
1544 tgt_skipped, src_skipped, style, by_id)
1545
1546 # Intentionally change the file extension to avoid being imgdiff'd as
1547 # the files are no longer in their original format.
1548 tgt_name = "%s-cropped" % (tgt_name,)
1549 src_name = "%s-cropped" % (src_name,)
1550 tgt_ranges = tgt_ranges.subtract(tgt_skipped)
1551 src_ranges = src_ranges.subtract(src_skipped)
1552
1553 # Possibly having no changed blocks.
1554 if not tgt_ranges:
1555 return
1556
1557 # Add the transfer(s).
Tianjie Xu41390d42017-11-22 11:35:18 -08001558 AddSplitTransfers(
Tao Bao08c85832016-09-19 22:26:30 -07001559 tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1560
Tianjie Xu25366072017-09-08 17:19:02 -07001561 def ParseAndValidateSplitInfo(patch_size, tgt_ranges, src_ranges,
1562 split_info):
1563 """Parse the split_info and return a list of info tuples.
1564
1565 Args:
1566 patch_size: total size of the patch file.
1567 tgt_ranges: Ranges of the target file within the original image.
1568 src_ranges: Ranges of the source file within the original image.
1569 split_info format:
1570 imgdiff version#
1571 count of pieces
1572 <patch_size_1> <tgt_size_1> <src_ranges_1>
1573 ...
1574 <patch_size_n> <tgt_size_n> <src_ranges_n>
1575
1576 Returns:
1577 [patch_start, patch_len, split_tgt_ranges, split_src_ranges]
1578 """
1579
1580 version = int(split_info[0])
1581 assert version == 2
1582 count = int(split_info[1])
1583 assert len(split_info) - 2 == count
1584
1585 split_info_list = []
1586 patch_start = 0
1587 tgt_remain = copy.deepcopy(tgt_ranges)
1588 # each line has the format <patch_size>, <tgt_size>, <src_ranges>
1589 for line in split_info[2:]:
1590 info = line.split()
1591 assert len(info) == 3
1592 patch_length = int(info[0])
1593
1594 split_tgt_size = int(info[1])
1595 assert split_tgt_size % 4096 == 0
1596 assert split_tgt_size / 4096 <= tgt_remain.size()
1597 split_tgt_ranges = tgt_remain.first(split_tgt_size / 4096)
1598 tgt_remain = tgt_remain.subtract(split_tgt_ranges)
1599
1600 # Find the split_src_ranges within the image file from its relative
1601 # position in file.
1602 split_src_indices = RangeSet.parse_raw(info[2])
1603 split_src_ranges = RangeSet()
1604 for r in split_src_indices:
1605 curr_range = src_ranges.first(r[1]).subtract(src_ranges.first(r[0]))
1606 assert not split_src_ranges.overlaps(curr_range)
1607 split_src_ranges = split_src_ranges.union(curr_range)
1608
1609 split_info_list.append((patch_start, patch_length,
1610 split_tgt_ranges, split_src_ranges))
1611 patch_start += patch_length
1612
1613 # Check that the sizes of all the split pieces add up to the final file
1614 # size for patch and target.
1615 assert tgt_remain.size() == 0
1616 assert patch_start == patch_size
1617 return split_info_list
1618
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001619 def SplitLargeApks():
1620 """Split the large apks files.
Tianjie Xu25366072017-09-08 17:19:02 -07001621
1622 Example: Chrome.apk will be split into
1623 src-0: Chrome.apk-0, tgt-0: Chrome.apk-0
1624 src-1: Chrome.apk-1, tgt-1: Chrome.apk-1
1625 ...
1626
1627 After the split, the target pieces are continuous and block aligned; and
1628 the source pieces are mutually exclusive. During the split, we also
1629 generate and save the image patch between src-X & tgt-X. This patch will
1630 be valid because the block ranges of src-X & tgt-X will always stay the
1631 same afterwards; but there's a chance we don't use the patch if we
1632 convert the "diff" command into "new" or "move" later.
1633 """
1634
1635 while True:
1636 with transfer_lock:
1637 if not large_apks:
1638 return
1639 tgt_name, src_name, tgt_ranges, src_ranges = large_apks.pop(0)
1640
1641 src_file = common.MakeTempFile(prefix="src-")
1642 tgt_file = common.MakeTempFile(prefix="tgt-")
Tianjie Xudf1166e2018-01-27 17:35:41 -08001643 with open(src_file, "wb") as src_fd:
1644 self.src.WriteRangeDataToFd(src_ranges, src_fd)
1645 with open(tgt_file, "wb") as tgt_fd:
1646 self.tgt.WriteRangeDataToFd(tgt_ranges, tgt_fd)
Tianjie Xu25366072017-09-08 17:19:02 -07001647
1648 patch_file = common.MakeTempFile(prefix="patch-")
1649 patch_info_file = common.MakeTempFile(prefix="split_info-")
1650 cmd = ["imgdiff", "-z",
1651 "--block-limit={}".format(max_blocks_per_transfer),
1652 "--split-info=" + patch_info_file,
1653 src_file, tgt_file, patch_file]
Tao Bao73dd4f42018-10-04 16:25:33 -07001654 proc = common.Run(cmd)
1655 imgdiff_output, _ = proc.communicate()
1656 assert proc.returncode == 0, \
Tao Bao4ccea852018-02-06 15:16:41 -08001657 "Failed to create imgdiff patch between {} and {}:\n{}".format(
1658 src_name, tgt_name, imgdiff_output)
Tianjie Xu25366072017-09-08 17:19:02 -07001659
1660 with open(patch_info_file) as patch_info:
1661 lines = patch_info.readlines()
1662
1663 patch_size_total = os.path.getsize(patch_file)
1664 split_info_list = ParseAndValidateSplitInfo(patch_size_total,
1665 tgt_ranges, src_ranges,
1666 lines)
1667 for index, (patch_start, patch_length, split_tgt_ranges,
Tao Bao508b0872018-02-09 13:44:43 -08001668 split_src_ranges) in enumerate(split_info_list):
Tianjie Xu25366072017-09-08 17:19:02 -07001669 with open(patch_file) as f:
1670 f.seek(patch_start)
1671 patch_content = f.read(patch_length)
1672
1673 split_src_name = "{}-{}".format(src_name, index)
1674 split_tgt_name = "{}-{}".format(tgt_name, index)
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001675 split_large_apks.append((split_tgt_name,
1676 split_src_name,
1677 split_tgt_ranges,
1678 split_src_ranges,
1679 patch_content))
Tianjie Xu25366072017-09-08 17:19:02 -07001680
Tao Bao32fcdab2018-10-12 10:30:39 -07001681 logger.info("Finding transfers...")
Tao Bao08c85832016-09-19 22:26:30 -07001682
Tianjie Xu25366072017-09-08 17:19:02 -07001683 large_apks = []
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001684 split_large_apks = []
Tianjie Xu25366072017-09-08 17:19:02 -07001685 cache_size = common.OPTIONS.cache_size
1686 split_threshold = 0.125
1687 max_blocks_per_transfer = int(cache_size * split_threshold /
1688 self.tgt.blocksize)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001689 empty = RangeSet()
Tianjie Xu20a86cd2018-01-12 12:21:00 -08001690 for tgt_fn, tgt_ranges in sorted(self.tgt.file_map.items()):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001691 if tgt_fn == "__ZERO":
1692 # the special "__ZERO" domain is all the blocks not contained
1693 # in any file and that are filled with zeros. We have a
1694 # special transfer style for zero blocks.
1695 src_ranges = self.src.file_map.get("__ZERO", empty)
Tao Bao9a5caf22015-08-25 15:10:10 -07001696 AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
1697 "zero", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001698 continue
1699
Tao Baoff777812015-05-12 11:42:31 -07001700 elif tgt_fn == "__COPY":
1701 # "__COPY" domain includes all the blocks not contained in any
1702 # file and that need to be copied unconditionally to the target.
Tao Bao9a5caf22015-08-25 15:10:10 -07001703 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Tao Baoff777812015-05-12 11:42:31 -07001704 continue
1705
Tianjie Xu67c7cbb2018-08-30 00:32:07 -07001706 elif tgt_fn == "__HASHTREE":
1707 continue
1708
Doug Zongkerfc44a512014-08-26 13:10:25 -07001709 elif tgt_fn in self.src.file_map:
1710 # Look for an exact pathname match in the source.
Tao Bao9a5caf22015-08-25 15:10:10 -07001711 AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001712 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001713 continue
1714
1715 b = os.path.basename(tgt_fn)
1716 if b in self.src_basenames:
1717 # Look for an exact basename match in the source.
1718 src_fn = self.src_basenames[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001719 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001720 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001721 continue
1722
1723 b = re.sub("[0-9]+", "#", b)
1724 if b in self.src_numpatterns:
1725 # Look for a 'number pattern' match (a basename match after
1726 # all runs of digits are replaced by "#"). (This is useful
1727 # for .so files that contain version numbers in the filename
1728 # that get bumped.)
1729 src_fn = self.src_numpatterns[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001730 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001731 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001732 continue
1733
Tao Bao9a5caf22015-08-25 15:10:10 -07001734 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001735
Tianjie Xu25366072017-09-08 17:19:02 -07001736 transfer_lock = threading.Lock()
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001737 threads = [threading.Thread(target=SplitLargeApks)
Tianjie Xu25366072017-09-08 17:19:02 -07001738 for _ in range(self.threads)]
1739 for th in threads:
1740 th.start()
1741 while threads:
1742 threads.pop().join()
1743
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001744 # Sort the split transfers for large apks to generate a determinate package.
1745 split_large_apks.sort()
1746 for (tgt_name, src_name, tgt_ranges, src_ranges,
1747 patch) in split_large_apks:
1748 transfer_split = Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1749 self.tgt.RangeSha1(tgt_ranges),
1750 self.src.RangeSha1(src_ranges),
1751 "diff", self.transfers)
xunchang21531832018-12-06 14:20:05 -08001752 transfer_split.patch_info = PatchInfo(True, patch)
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001753
Doug Zongkerfc44a512014-08-26 13:10:25 -07001754 def AbbreviateSourceNames(self):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001755 for k in self.src.file_map.keys():
1756 b = os.path.basename(k)
1757 self.src_basenames[b] = k
1758 b = re.sub("[0-9]+", "#", b)
1759 self.src_numpatterns[b] = k
1760
1761 @staticmethod
1762 def AssertPartition(total, seq):
1763 """Assert that all the RangeSets in 'seq' form a partition of the
1764 'total' RangeSet (ie, they are nonintersecting and their union
1765 equals 'total')."""
Doug Zongker6ab2a502016-02-09 08:28:09 -08001766
Doug Zongkerfc44a512014-08-26 13:10:25 -07001767 so_far = RangeSet()
1768 for i in seq:
1769 assert not so_far.overlaps(i)
1770 so_far = so_far.union(i)
1771 assert so_far == total