blob: e5a9050b10e07a7ff0755e1df3432bbe5afe8c98 [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015from __future__ import print_function
16
Doug Zongker6ab2a502016-02-09 08:28:09 -080017import array
Tianjie Xu25366072017-09-08 17:19:02 -070018import copy
Doug Zongker6ab2a502016-02-09 08:28:09 -080019import functools
Doug Zongker62338182014-09-08 08:29:55 -070020import heapq
Doug Zongkerfc44a512014-08-26 13:10:25 -070021import itertools
Tao Bao32fcdab2018-10-12 10:30:39 -070022import logging
Doug Zongkerfc44a512014-08-26 13:10:25 -070023import multiprocessing
24import os
Tao Bao3a2e3502016-12-28 09:14:39 -080025import os.path
Doug Zongkerfc44a512014-08-26 13:10:25 -070026import re
Tao Bao183e56e2017-03-05 17:05:09 -080027import sys
Doug Zongkerfc44a512014-08-26 13:10:25 -070028import threading
xunchang21531832018-12-06 14:20:05 -080029import zlib
30from collections import deque, namedtuple, OrderedDict
Tao Bao3a2e3502016-12-28 09:14:39 -080031from hashlib import sha1
Tao Bao508b0872018-02-09 13:44:43 -080032
33import common
Dan Albert8b72aef2015-03-23 19:13:21 -070034from rangelib import RangeSet
35
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070036__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
37
Tao Bao32fcdab2018-10-12 10:30:39 -070038logger = logging.getLogger(__name__)
39
xunchang21531832018-12-06 14:20:05 -080040# The tuple contains the style and bytes of a bsdiff|imgdiff patch.
41PatchInfo = namedtuple("PatchInfo", ["imgdiff", "content"])
42
Dan Albert8b72aef2015-03-23 19:13:21 -070043
Tao Bao183e56e2017-03-05 17:05:09 -080044def compute_patch(srcfile, tgtfile, imgdiff=False):
xunchang21531832018-12-06 14:20:05 -080045 """Calls bsdiff|imgdiff to compute the patch data, returns a PatchInfo."""
Tianjie Xub59c17f2016-10-28 17:55:53 -070046 patchfile = common.MakeTempFile(prefix='patch-')
Doug Zongkerfc44a512014-08-26 13:10:25 -070047
Tianjie Xub59c17f2016-10-28 17:55:53 -070048 cmd = ['imgdiff', '-z'] if imgdiff else ['bsdiff']
49 cmd.extend([srcfile, tgtfile, patchfile])
Doug Zongkerfc44a512014-08-26 13:10:25 -070050
Tao Bao39451582017-05-04 11:10:47 -070051 # Don't dump the bsdiff/imgdiff commands, which are not useful for the case
52 # here, since they contain temp filenames only.
Tao Bao73dd4f42018-10-04 16:25:33 -070053 proc = common.Run(cmd, verbose=False)
54 output, _ = proc.communicate()
Doug Zongkerfc44a512014-08-26 13:10:25 -070055
Tao Bao73dd4f42018-10-04 16:25:33 -070056 if proc.returncode != 0:
Tianjie Xub59c17f2016-10-28 17:55:53 -070057 raise ValueError(output)
58
59 with open(patchfile, 'rb') as f:
xunchang21531832018-12-06 14:20:05 -080060 return PatchInfo(imgdiff, f.read())
Doug Zongkerfc44a512014-08-26 13:10:25 -070061
Dan Albert8b72aef2015-03-23 19:13:21 -070062
63class Image(object):
Tao Bao183e56e2017-03-05 17:05:09 -080064 def RangeSha1(self, ranges):
65 raise NotImplementedError
66
Dan Albert8b72aef2015-03-23 19:13:21 -070067 def ReadRangeSet(self, ranges):
68 raise NotImplementedError
69
Tao Bao68658c02015-06-01 13:40:49 -070070 def TotalSha1(self, include_clobbered_blocks=False):
Dan Albert8b72aef2015-03-23 19:13:21 -070071 raise NotImplementedError
72
Tao Bao183e56e2017-03-05 17:05:09 -080073 def WriteRangeDataToFd(self, ranges, fd):
74 raise NotImplementedError
75
Dan Albert8b72aef2015-03-23 19:13:21 -070076
77class EmptyImage(Image):
Doug Zongkerfc44a512014-08-26 13:10:25 -070078 """A zero-length image."""
Tao Bao183e56e2017-03-05 17:05:09 -080079
80 def __init__(self):
81 self.blocksize = 4096
82 self.care_map = RangeSet()
83 self.clobbered_blocks = RangeSet()
84 self.extended = RangeSet()
85 self.total_blocks = 0
86 self.file_map = {}
Yifan Hongbb2658d2019-01-25 12:30:58 -080087 self.hashtree_info = None
Tao Bao183e56e2017-03-05 17:05:09 -080088
89 def RangeSha1(self, ranges):
90 return sha1().hexdigest()
91
Doug Zongkerfc44a512014-08-26 13:10:25 -070092 def ReadRangeSet(self, ranges):
93 return ()
Tao Bao183e56e2017-03-05 17:05:09 -080094
Tao Bao68658c02015-06-01 13:40:49 -070095 def TotalSha1(self, include_clobbered_blocks=False):
96 # EmptyImage always carries empty clobbered_blocks, so
97 # include_clobbered_blocks can be ignored.
98 assert self.clobbered_blocks.size() == 0
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070099 return sha1().hexdigest()
100
Tao Bao183e56e2017-03-05 17:05:09 -0800101 def WriteRangeDataToFd(self, ranges, fd):
102 raise ValueError("Can't write data from EmptyImage to file")
103
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700104
Dan Albert8b72aef2015-03-23 19:13:21 -0700105class DataImage(Image):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700106 """An image wrapped around a single string of data."""
107
108 def __init__(self, data, trim=False, pad=False):
109 self.data = data
110 self.blocksize = 4096
111
112 assert not (trim and pad)
113
114 partial = len(self.data) % self.blocksize
Tao Bao7589e962015-09-05 20:35:32 -0700115 padded = False
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700116 if partial > 0:
117 if trim:
118 self.data = self.data[:-partial]
119 elif pad:
120 self.data += '\0' * (self.blocksize - partial)
Tao Bao7589e962015-09-05 20:35:32 -0700121 padded = True
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700122 else:
123 raise ValueError(("data for DataImage must be multiple of %d bytes "
124 "unless trim or pad is specified") %
125 (self.blocksize,))
126
127 assert len(self.data) % self.blocksize == 0
128
129 self.total_blocks = len(self.data) / self.blocksize
130 self.care_map = RangeSet(data=(0, self.total_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700131 # When the last block is padded, we always write the whole block even for
132 # incremental OTAs. Because otherwise the last block may get skipped if
133 # unchanged for an incremental, but would fail the post-install
134 # verification if it has non-zero contents in the padding bytes.
135 # Bug: 23828506
136 if padded:
Tao Bao42206c32015-09-08 13:39:40 -0700137 clobbered_blocks = [self.total_blocks-1, self.total_blocks]
Tao Bao7589e962015-09-05 20:35:32 -0700138 else:
Tao Bao42206c32015-09-08 13:39:40 -0700139 clobbered_blocks = []
140 self.clobbered_blocks = clobbered_blocks
Tao Baoe9b61912015-07-09 17:37:49 -0700141 self.extended = RangeSet()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700142
143 zero_blocks = []
144 nonzero_blocks = []
145 reference = '\0' * self.blocksize
146
Tao Bao7589e962015-09-05 20:35:32 -0700147 for i in range(self.total_blocks-1 if padded else self.total_blocks):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700148 d = self.data[i*self.blocksize : (i+1)*self.blocksize]
149 if d == reference:
150 zero_blocks.append(i)
151 zero_blocks.append(i+1)
152 else:
153 nonzero_blocks.append(i)
154 nonzero_blocks.append(i+1)
155
Tao Bao42206c32015-09-08 13:39:40 -0700156 assert zero_blocks or nonzero_blocks or clobbered_blocks
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700157
Tao Bao42206c32015-09-08 13:39:40 -0700158 self.file_map = dict()
159 if zero_blocks:
160 self.file_map["__ZERO"] = RangeSet(data=zero_blocks)
161 if nonzero_blocks:
162 self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)
163 if clobbered_blocks:
164 self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)
Tao Bao7589e962015-09-05 20:35:32 -0700165
Tao Bao183e56e2017-03-05 17:05:09 -0800166 def _GetRangeData(self, ranges):
167 for s, e in ranges:
168 yield self.data[s*self.blocksize:e*self.blocksize]
169
170 def RangeSha1(self, ranges):
171 h = sha1()
Tao Bao76def242017-11-21 09:25:31 -0800172 for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable
Tao Bao183e56e2017-03-05 17:05:09 -0800173 h.update(data)
174 return h.hexdigest()
175
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700176 def ReadRangeSet(self, ranges):
Tao Bao183e56e2017-03-05 17:05:09 -0800177 return [self._GetRangeData(ranges)]
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700178
Tao Bao68658c02015-06-01 13:40:49 -0700179 def TotalSha1(self, include_clobbered_blocks=False):
Tao Bao7589e962015-09-05 20:35:32 -0700180 if not include_clobbered_blocks:
Tao Bao183e56e2017-03-05 17:05:09 -0800181 return self.RangeSha1(self.care_map.subtract(self.clobbered_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700182 else:
183 return sha1(self.data).hexdigest()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700184
Tao Bao183e56e2017-03-05 17:05:09 -0800185 def WriteRangeDataToFd(self, ranges, fd):
Tao Bao76def242017-11-21 09:25:31 -0800186 for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable
Tao Bao183e56e2017-03-05 17:05:09 -0800187 fd.write(data)
188
Doug Zongkerfc44a512014-08-26 13:10:25 -0700189
190class Transfer(object):
Tao Bao183e56e2017-03-05 17:05:09 -0800191 def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, tgt_sha1,
192 src_sha1, style, by_id):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700193 self.tgt_name = tgt_name
194 self.src_name = src_name
195 self.tgt_ranges = tgt_ranges
196 self.src_ranges = src_ranges
Tao Bao183e56e2017-03-05 17:05:09 -0800197 self.tgt_sha1 = tgt_sha1
198 self.src_sha1 = src_sha1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700199 self.style = style
Tao Baob8c87172015-03-19 19:42:12 -0700200
201 # We use OrderedDict rather than dict so that the output is repeatable;
202 # otherwise it would depend on the hash values of the Transfer objects.
203 self.goes_before = OrderedDict()
204 self.goes_after = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700205
Doug Zongker62338182014-09-08 08:29:55 -0700206 self.stash_before = []
207 self.use_stash = []
208
Doug Zongkerfc44a512014-08-26 13:10:25 -0700209 self.id = len(by_id)
210 by_id.append(self)
211
xunchang21531832018-12-06 14:20:05 -0800212 self._patch_info = None
Tianjie Xu25366072017-09-08 17:19:02 -0700213
214 @property
xunchang21531832018-12-06 14:20:05 -0800215 def patch_info(self):
216 return self._patch_info
Tianjie Xu25366072017-09-08 17:19:02 -0700217
xunchang21531832018-12-06 14:20:05 -0800218 @patch_info.setter
219 def patch_info(self, info):
220 if info:
Tianjie Xu25366072017-09-08 17:19:02 -0700221 assert self.style == "diff"
xunchang21531832018-12-06 14:20:05 -0800222 self._patch_info = info
Tianjie Xu25366072017-09-08 17:19:02 -0700223
Doug Zongker62338182014-09-08 08:29:55 -0700224 def NetStashChange(self):
225 return (sum(sr.size() for (_, sr) in self.stash_before) -
226 sum(sr.size() for (_, sr) in self.use_stash))
227
Tao Bao82c47982015-08-17 09:45:13 -0700228 def ConvertToNew(self):
229 assert self.style != "new"
230 self.use_stash = []
231 self.style = "new"
232 self.src_ranges = RangeSet()
xunchang21531832018-12-06 14:20:05 -0800233 self.patch_info = None
Tao Bao82c47982015-08-17 09:45:13 -0700234
Doug Zongkerfc44a512014-08-26 13:10:25 -0700235 def __str__(self):
236 return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
237 " to " + str(self.tgt_ranges) + ">")
238
239
Doug Zongker6ab2a502016-02-09 08:28:09 -0800240@functools.total_ordering
241class HeapItem(object):
242 def __init__(self, item):
243 self.item = item
Tao Bao186ec992017-12-23 11:50:52 -0800244 # Negate the score since python's heap is a min-heap and we want the
245 # maximum score.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800246 self.score = -item.score
Tao Bao186ec992017-12-23 11:50:52 -0800247
Doug Zongker6ab2a502016-02-09 08:28:09 -0800248 def clear(self):
249 self.item = None
Tao Bao186ec992017-12-23 11:50:52 -0800250
Doug Zongker6ab2a502016-02-09 08:28:09 -0800251 def __bool__(self):
Tao Bao186ec992017-12-23 11:50:52 -0800252 return self.item is not None
253
254 # Python 2 uses __nonzero__, while Python 3 uses __bool__.
255 __nonzero__ = __bool__
256
257 # The rest operations are generated by functools.total_ordering decorator.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800258 def __eq__(self, other):
259 return self.score == other.score
Tao Bao186ec992017-12-23 11:50:52 -0800260
Doug Zongker6ab2a502016-02-09 08:28:09 -0800261 def __le__(self, other):
262 return self.score <= other.score
263
264
Tao Bao294651d2018-02-08 23:21:52 -0800265class ImgdiffStats(object):
266 """A class that collects imgdiff stats.
267
268 It keeps track of the files that will be applied imgdiff while generating
269 BlockImageDiff. It also logs the ones that cannot use imgdiff, with specific
270 reasons. The stats is only meaningful when imgdiff not being disabled by the
271 caller of BlockImageDiff. In addition, only files with supported types
272 (BlockImageDiff.FileTypeSupportedByImgdiff()) are allowed to be logged.
Tao Bao294651d2018-02-08 23:21:52 -0800273 """
274
275 USED_IMGDIFF = "APK files diff'd with imgdiff"
276 USED_IMGDIFF_LARGE_APK = "Large APK files split and diff'd with imgdiff"
277
278 # Reasons for not applying imgdiff on APKs.
Tao Bao294651d2018-02-08 23:21:52 -0800279 SKIPPED_NONMONOTONIC = "Not used imgdiff due to having non-monotonic ranges"
Tao Baoe709b092018-02-07 12:40:00 -0800280 SKIPPED_SHARED_BLOCKS = "Not used imgdiff due to using shared blocks"
Tao Bao4ccea852018-02-06 15:16:41 -0800281 SKIPPED_INCOMPLETE = "Not used imgdiff due to incomplete RangeSet"
Tao Bao294651d2018-02-08 23:21:52 -0800282
283 # The list of valid reasons, which will also be the dumped order in a report.
284 REASONS = (
285 USED_IMGDIFF,
286 USED_IMGDIFF_LARGE_APK,
Tao Bao294651d2018-02-08 23:21:52 -0800287 SKIPPED_NONMONOTONIC,
Tao Baoe709b092018-02-07 12:40:00 -0800288 SKIPPED_SHARED_BLOCKS,
Tao Bao4ccea852018-02-06 15:16:41 -0800289 SKIPPED_INCOMPLETE,
Tao Bao294651d2018-02-08 23:21:52 -0800290 )
291
292 def __init__(self):
293 self.stats = {}
294
295 def Log(self, filename, reason):
296 """Logs why imgdiff can or cannot be applied to the given filename.
297
298 Args:
299 filename: The filename string.
300 reason: One of the reason constants listed in REASONS.
301
302 Raises:
303 AssertionError: On unsupported filetypes or invalid reason.
304 """
305 assert BlockImageDiff.FileTypeSupportedByImgdiff(filename)
306 assert reason in self.REASONS
307
308 if reason not in self.stats:
309 self.stats[reason] = set()
310 self.stats[reason].add(filename)
311
312 def Report(self):
313 """Prints a report of the collected imgdiff stats."""
314
315 def print_header(header, separator):
Tao Bao32fcdab2018-10-12 10:30:39 -0700316 logger.info(header)
317 logger.info(separator * len(header) + '\n')
Tao Bao294651d2018-02-08 23:21:52 -0800318
319 print_header(' Imgdiff Stats Report ', '=')
320 for key in self.REASONS:
321 if key not in self.stats:
322 continue
323 values = self.stats[key]
324 section_header = ' {} (count: {}) '.format(key, len(values))
325 print_header(section_header, '-')
Tao Bao32fcdab2018-10-12 10:30:39 -0700326 logger.info(''.join([' {}\n'.format(name) for name in values]))
Tao Bao294651d2018-02-08 23:21:52 -0800327
328
Doug Zongkerfc44a512014-08-26 13:10:25 -0700329class BlockImageDiff(object):
Tao Bao76def242017-11-21 09:25:31 -0800330 """Generates the diff of two block image objects.
331
332 BlockImageDiff works on two image objects. An image object is anything that
333 provides the following attributes:
334
335 blocksize: the size in bytes of a block, currently must be 4096.
336
337 total_blocks: the total size of the partition/image, in blocks.
338
339 care_map: a RangeSet containing which blocks (in the range [0,
340 total_blocks) we actually care about; i.e. which blocks contain data.
341
342 file_map: a dict that partitions the blocks contained in care_map into
343 smaller domains that are useful for doing diffs on. (Typically a domain
344 is a file, and the key in file_map is the pathname.)
345
346 clobbered_blocks: a RangeSet containing which blocks contain data but may
347 be altered by the FS. They need to be excluded when verifying the
348 partition integrity.
349
350 ReadRangeSet(): a function that takes a RangeSet and returns the data
351 contained in the image blocks of that RangeSet. The data is returned as
352 a list or tuple of strings; concatenating the elements together should
353 produce the requested data. Implementations are free to break up the
354 data into list/tuple elements in any way that is convenient.
355
356 RangeSha1(): a function that returns (as a hex string) the SHA-1 hash of
357 all the data in the specified range.
358
359 TotalSha1(): a function that returns (as a hex string) the SHA-1 hash of
360 all the data in the image (ie, all the blocks in the care_map minus
361 clobbered_blocks, or including the clobbered blocks if
362 include_clobbered_blocks is True).
363
364 When creating a BlockImageDiff, the src image may be None, in which case the
365 list of transfers produced will never read from the original image.
366 """
367
Tao Bao293fd132016-06-11 12:19:23 -0700368 def __init__(self, tgt, src=None, threads=None, version=4,
369 disable_imgdiff=False):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700370 if threads is None:
371 threads = multiprocessing.cpu_count() // 2
Dan Albert8b72aef2015-03-23 19:13:21 -0700372 if threads == 0:
373 threads = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700374 self.threads = threads
Doug Zongker62338182014-09-08 08:29:55 -0700375 self.version = version
Dan Albert8b72aef2015-03-23 19:13:21 -0700376 self.transfers = []
377 self.src_basenames = {}
378 self.src_numpatterns = {}
Tao Baob4cfca52016-02-04 14:26:02 -0800379 self._max_stashed_size = 0
Tao Baod522bdc2016-04-12 15:53:16 -0700380 self.touched_src_ranges = RangeSet()
381 self.touched_src_sha1 = None
Tao Bao293fd132016-06-11 12:19:23 -0700382 self.disable_imgdiff = disable_imgdiff
Tao Bao294651d2018-02-08 23:21:52 -0800383 self.imgdiff_stats = ImgdiffStats() if not disable_imgdiff else None
Doug Zongker62338182014-09-08 08:29:55 -0700384
Tao Bao8fad03e2017-03-01 14:36:26 -0800385 assert version in (3, 4)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700386
387 self.tgt = tgt
388 if src is None:
389 src = EmptyImage()
390 self.src = src
391
392 # The updater code that installs the patch always uses 4k blocks.
393 assert tgt.blocksize == 4096
394 assert src.blocksize == 4096
395
396 # The range sets in each filemap should comprise a partition of
397 # the care map.
398 self.AssertPartition(src.care_map, src.file_map.values())
399 self.AssertPartition(tgt.care_map, tgt.file_map.values())
400
Tao Baob4cfca52016-02-04 14:26:02 -0800401 @property
402 def max_stashed_size(self):
403 return self._max_stashed_size
404
Tao Baocb73aed2018-01-31 17:32:40 -0800405 @staticmethod
406 def FileTypeSupportedByImgdiff(filename):
407 """Returns whether the file type is supported by imgdiff."""
408 return filename.lower().endswith(('.apk', '.jar', '.zip'))
409
Tao Bao294651d2018-02-08 23:21:52 -0800410 def CanUseImgdiff(self, name, tgt_ranges, src_ranges, large_apk=False):
Tao Baocb73aed2018-01-31 17:32:40 -0800411 """Checks whether we can apply imgdiff for the given RangeSets.
412
413 For files in ZIP format (e.g., APKs, JARs, etc.) we would like to use
414 'imgdiff -z' if possible. Because it usually produces significantly smaller
415 patches than bsdiff.
416
417 This is permissible if all of the following conditions hold.
418 - The imgdiff hasn't been disabled by the caller (e.g. squashfs);
419 - The file type is supported by imgdiff;
420 - The source and target blocks are monotonic (i.e. the data is stored with
421 blocks in increasing order);
Tao Baoe709b092018-02-07 12:40:00 -0800422 - Both files don't contain shared blocks;
Tao Bao4ccea852018-02-06 15:16:41 -0800423 - Both files have complete lists of blocks;
Tao Baocb73aed2018-01-31 17:32:40 -0800424 - We haven't removed any blocks from the source set.
425
426 If all these conditions are satisfied, concatenating all the blocks in the
427 RangeSet in order will produce a valid ZIP file (plus possibly extra zeros
428 in the last block). imgdiff is fine with extra zeros at the end of the file.
429
430 Args:
431 name: The filename to be diff'd.
432 tgt_ranges: The target RangeSet.
433 src_ranges: The source RangeSet.
Tao Bao294651d2018-02-08 23:21:52 -0800434 large_apk: Whether this is to split a large APK.
Tao Baocb73aed2018-01-31 17:32:40 -0800435
436 Returns:
437 A boolean result.
438 """
Tao Bao508b0872018-02-09 13:44:43 -0800439 if self.disable_imgdiff or not self.FileTypeSupportedByImgdiff(name):
Tao Bao294651d2018-02-08 23:21:52 -0800440 return False
441
442 if not tgt_ranges.monotonic or not src_ranges.monotonic:
443 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_NONMONOTONIC)
444 return False
445
Tao Baoe709b092018-02-07 12:40:00 -0800446 if (tgt_ranges.extra.get('uses_shared_blocks') or
447 src_ranges.extra.get('uses_shared_blocks')):
448 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_SHARED_BLOCKS)
449 return False
450
Tao Bao4ccea852018-02-06 15:16:41 -0800451 if tgt_ranges.extra.get('incomplete') or src_ranges.extra.get('incomplete'):
452 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_INCOMPLETE)
453 return False
454
Tao Bao294651d2018-02-08 23:21:52 -0800455 reason = (ImgdiffStats.USED_IMGDIFF_LARGE_APK if large_apk
456 else ImgdiffStats.USED_IMGDIFF)
457 self.imgdiff_stats.Log(name, reason)
458 return True
Tao Baocb73aed2018-01-31 17:32:40 -0800459
Doug Zongkerfc44a512014-08-26 13:10:25 -0700460 def Compute(self, prefix):
461 # When looking for a source file to use as the diff input for a
462 # target file, we try:
463 # 1) an exact path match if available, otherwise
464 # 2) a exact basename match if available, otherwise
465 # 3) a basename match after all runs of digits are replaced by
466 # "#" if available, otherwise
467 # 4) we have no source for this target.
468 self.AbbreviateSourceNames()
469 self.FindTransfers()
470
xunchang21531832018-12-06 14:20:05 -0800471 self.FindSequenceForTransfers()
Doug Zongker62338182014-09-08 08:29:55 -0700472
Tao Bao82c47982015-08-17 09:45:13 -0700473 # Ensure the runtime stash size is under the limit.
Tao Bao8fad03e2017-03-01 14:36:26 -0800474 if common.OPTIONS.cache_size is not None:
xunchang3df4d5e2018-12-06 15:03:45 -0800475 stash_limit = (common.OPTIONS.cache_size *
476 common.OPTIONS.stash_threshold / self.tgt.blocksize)
477 # Ignore the stash limit and calculate the maximum simultaneously stashed
478 # blocks needed.
479 _, max_stashed_blocks = self.ReviseStashSize(ignore_stash_limit=True)
480
481 # We cannot stash more blocks than the stash limit simultaneously. As a
482 # result, some 'diff' commands will be converted to new; leading to an
483 # unintended large package. To mitigate this issue, we can carefully
484 # choose the transfers for conversion. The number '1024' can be further
485 # tweaked here to balance the package size and build time.
486 if max_stashed_blocks > stash_limit + 1024:
xunchangb6105dc2018-12-06 16:39:46 -0800487 self.SelectAndConvertDiffTransfersToNew(
488 max_stashed_blocks - stash_limit)
xunchang3df4d5e2018-12-06 15:03:45 -0800489 # Regenerate the sequence as the graph has changed.
490 self.FindSequenceForTransfers()
491
492 # Revise the stash size again to keep the size under limit.
Tao Bao82c47982015-08-17 09:45:13 -0700493 self.ReviseStashSize()
494
Doug Zongkerfc44a512014-08-26 13:10:25 -0700495 # Double-check our work.
496 self.AssertSequenceGood()
Tianjie Xu8a7ed9f2018-01-23 14:06:11 -0800497 self.AssertSha1Good()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700498
499 self.ComputePatches(prefix)
500 self.WriteTransfers(prefix)
501
Tao Bao294651d2018-02-08 23:21:52 -0800502 # Report the imgdiff stats.
Tao Bao32fcdab2018-10-12 10:30:39 -0700503 if not self.disable_imgdiff:
Tao Bao294651d2018-02-08 23:21:52 -0800504 self.imgdiff_stats.Report()
505
Doug Zongkerfc44a512014-08-26 13:10:25 -0700506 def WriteTransfers(self, prefix):
Tianjie Xu37e29302016-06-23 16:10:35 -0700507 def WriteSplitTransfers(out, style, target_blocks):
508 """Limit the size of operand in command 'new' and 'zero' to 1024 blocks.
Tianjie Xubb848c52016-06-21 15:54:09 -0700509
510 This prevents the target size of one command from being too large; and
511 might help to avoid fsync errors on some devices."""
512
Tao Bao3a2e3502016-12-28 09:14:39 -0800513 assert style == "new" or style == "zero"
Tianjie Xu37e29302016-06-23 16:10:35 -0700514 blocks_limit = 1024
Tianjie Xubb848c52016-06-21 15:54:09 -0700515 total = 0
Tianjie Xu37e29302016-06-23 16:10:35 -0700516 while target_blocks:
517 blocks_to_write = target_blocks.first(blocks_limit)
518 out.append("%s %s\n" % (style, blocks_to_write.to_string_raw()))
519 total += blocks_to_write.size()
520 target_blocks = target_blocks.subtract(blocks_to_write)
Tianjie Xubb848c52016-06-21 15:54:09 -0700521 return total
522
Doug Zongkerfc44a512014-08-26 13:10:25 -0700523 out = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700524 total = 0
Doug Zongkerfc44a512014-08-26 13:10:25 -0700525
Tao Bao3a2e3502016-12-28 09:14:39 -0800526 # In BBOTA v3+, it uses the hash of the stashed blocks as the stash slot
527 # id. 'stashes' records the map from 'hash' to the ref count. The stash
528 # will be freed only if the count decrements to zero.
Doug Zongker62338182014-09-08 08:29:55 -0700529 stashes = {}
530 stashed_blocks = 0
531 max_stashed_blocks = 0
532
Doug Zongkerfc44a512014-08-26 13:10:25 -0700533 for xf in self.transfers:
534
Tao Bao8fad03e2017-03-01 14:36:26 -0800535 for _, sr in xf.stash_before:
536 sh = self.src.RangeSha1(sr)
537 if sh in stashes:
538 stashes[sh] += 1
Sami Tolvanendd67a292014-12-09 16:40:34 +0000539 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800540 stashes[sh] = 1
541 stashed_blocks += sr.size()
542 self.touched_src_ranges = self.touched_src_ranges.union(sr)
543 out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
Doug Zongker62338182014-09-08 08:29:55 -0700544
545 if stashed_blocks > max_stashed_blocks:
546 max_stashed_blocks = stashed_blocks
547
Jesse Zhao7b985f62015-03-02 16:53:08 -0800548 free_string = []
caozhiyuan21b37d82015-10-21 15:14:03 +0800549 free_size = 0
Jesse Zhao7b985f62015-03-02 16:53:08 -0800550
Tao Bao8fad03e2017-03-01 14:36:26 -0800551 # <# blocks> <src ranges>
552 # OR
553 # <# blocks> <src ranges> <src locs> <stash refs...>
554 # OR
555 # <# blocks> - <stash refs...>
Doug Zongker62338182014-09-08 08:29:55 -0700556
Tao Bao8fad03e2017-03-01 14:36:26 -0800557 size = xf.src_ranges.size()
Tao Bao508b0872018-02-09 13:44:43 -0800558 src_str_buffer = [str(size)]
Doug Zongker62338182014-09-08 08:29:55 -0700559
Tao Bao8fad03e2017-03-01 14:36:26 -0800560 unstashed_src_ranges = xf.src_ranges
561 mapped_stashes = []
562 for _, sr in xf.use_stash:
563 unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
564 sh = self.src.RangeSha1(sr)
565 sr = xf.src_ranges.map_within(sr)
566 mapped_stashes.append(sr)
567 assert sh in stashes
Tao Bao508b0872018-02-09 13:44:43 -0800568 src_str_buffer.append("%s:%s" % (sh, sr.to_string_raw()))
Tao Bao8fad03e2017-03-01 14:36:26 -0800569 stashes[sh] -= 1
570 if stashes[sh] == 0:
571 free_string.append("free %s\n" % (sh,))
572 free_size += sr.size()
573 stashes.pop(sh)
Doug Zongker62338182014-09-08 08:29:55 -0700574
Tao Bao8fad03e2017-03-01 14:36:26 -0800575 if unstashed_src_ranges:
Tao Bao508b0872018-02-09 13:44:43 -0800576 src_str_buffer.insert(1, unstashed_src_ranges.to_string_raw())
Tao Bao8fad03e2017-03-01 14:36:26 -0800577 if xf.use_stash:
578 mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
Tao Bao508b0872018-02-09 13:44:43 -0800579 src_str_buffer.insert(2, mapped_unstashed.to_string_raw())
Tao Bao8fad03e2017-03-01 14:36:26 -0800580 mapped_stashes.append(mapped_unstashed)
Doug Zongker62338182014-09-08 08:29:55 -0700581 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Tao Bao8fad03e2017-03-01 14:36:26 -0800582 else:
Tao Bao508b0872018-02-09 13:44:43 -0800583 src_str_buffer.insert(1, "-")
Tao Bao8fad03e2017-03-01 14:36:26 -0800584 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Doug Zongker62338182014-09-08 08:29:55 -0700585
Tao Bao508b0872018-02-09 13:44:43 -0800586 src_str = " ".join(src_str_buffer)
Doug Zongker62338182014-09-08 08:29:55 -0700587
Tao Bao8fad03e2017-03-01 14:36:26 -0800588 # version 3+:
Doug Zongker62338182014-09-08 08:29:55 -0700589 # zero <rangeset>
590 # new <rangeset>
591 # erase <rangeset>
Dan Albert8b72aef2015-03-23 19:13:21 -0700592 # bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
593 # imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
594 # move hash <tgt rangeset> <src_str>
Doug Zongkerfc44a512014-08-26 13:10:25 -0700595
596 tgt_size = xf.tgt_ranges.size()
597
598 if xf.style == "new":
599 assert xf.tgt_ranges
Tianjie Xu37e29302016-06-23 16:10:35 -0700600 assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700601 total += tgt_size
602 elif xf.style == "move":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700603 assert xf.tgt_ranges
604 assert xf.src_ranges.size() == tgt_size
605 if xf.src_ranges != xf.tgt_ranges:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100606 # take into account automatic stashing of overlapping blocks
607 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700608 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100609 if temp_stash_usage > max_stashed_blocks:
610 max_stashed_blocks = temp_stash_usage
611
Tao Baod522bdc2016-04-12 15:53:16 -0700612 self.touched_src_ranges = self.touched_src_ranges.union(
613 xf.src_ranges)
614
Tao Bao8fad03e2017-03-01 14:36:26 -0800615 out.append("%s %s %s %s\n" % (
Sami Tolvanendd67a292014-12-09 16:40:34 +0000616 xf.style,
Tao Bao183e56e2017-03-05 17:05:09 -0800617 xf.tgt_sha1,
Dan Albert8b72aef2015-03-23 19:13:21 -0700618 xf.tgt_ranges.to_string_raw(), src_str))
Tao Bao8fad03e2017-03-01 14:36:26 -0800619 total += tgt_size
620 elif xf.style in ("bsdiff", "imgdiff"):
621 assert xf.tgt_ranges
622 assert xf.src_ranges
623 # take into account automatic stashing of overlapping blocks
624 if xf.src_ranges.overlaps(xf.tgt_ranges):
625 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
626 if temp_stash_usage > max_stashed_blocks:
627 max_stashed_blocks = temp_stash_usage
628
629 self.touched_src_ranges = self.touched_src_ranges.union(xf.src_ranges)
630
631 out.append("%s %d %d %s %s %s %s\n" % (
632 xf.style,
633 xf.patch_start, xf.patch_len,
634 xf.src_sha1,
635 xf.tgt_sha1,
636 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700637 total += tgt_size
638 elif xf.style == "zero":
639 assert xf.tgt_ranges
640 to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
Tianjie Xu37e29302016-06-23 16:10:35 -0700641 assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size()
Tianjie Xubb848c52016-06-21 15:54:09 -0700642 total += to_zero.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700643 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700644 raise ValueError("unknown transfer style '%s'\n" % xf.style)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700645
Sami Tolvanendd67a292014-12-09 16:40:34 +0000646 if free_string:
647 out.append("".join(free_string))
caozhiyuan21b37d82015-10-21 15:14:03 +0800648 stashed_blocks -= free_size
Sami Tolvanendd67a292014-12-09 16:40:34 +0000649
Tao Bao8fad03e2017-03-01 14:36:26 -0800650 if common.OPTIONS.cache_size is not None:
Tao Bao8dcf7382015-05-21 14:09:49 -0700651 # Sanity check: abort if we're going to need more stash space than
652 # the allowed size (cache_size * threshold). There are two purposes
653 # of having a threshold here. a) Part of the cache may have been
654 # occupied by some recovery logs. b) It will buy us some time to deal
655 # with the oversize issue.
656 cache_size = common.OPTIONS.cache_size
657 stash_threshold = common.OPTIONS.stash_threshold
658 max_allowed = cache_size * stash_threshold
Tao Baoe8c68a02017-02-26 10:48:11 -0800659 assert max_stashed_blocks * self.tgt.blocksize <= max_allowed, \
Tao Bao8dcf7382015-05-21 14:09:49 -0700660 'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
661 max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
662 self.tgt.blocksize, max_allowed, cache_size,
663 stash_threshold)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700664
Tao Bao8fad03e2017-03-01 14:36:26 -0800665 self.touched_src_sha1 = self.src.RangeSha1(self.touched_src_ranges)
Tao Baod522bdc2016-04-12 15:53:16 -0700666
Tianjie Xu67c7cbb2018-08-30 00:32:07 -0700667 if self.tgt.hashtree_info:
668 out.append("compute_hash_tree {} {} {} {} {}\n".format(
669 self.tgt.hashtree_info.hashtree_range.to_string_raw(),
670 self.tgt.hashtree_info.filesystem_range.to_string_raw(),
671 self.tgt.hashtree_info.hash_algorithm,
672 self.tgt.hashtree_info.salt,
673 self.tgt.hashtree_info.root_hash))
674
Tao Baoe9b61912015-07-09 17:37:49 -0700675 # Zero out extended blocks as a workaround for bug 20881595.
676 if self.tgt.extended:
Tianjie Xu37e29302016-06-23 16:10:35 -0700677 assert (WriteSplitTransfers(out, "zero", self.tgt.extended) ==
Tianjie Xubb848c52016-06-21 15:54:09 -0700678 self.tgt.extended.size())
Tao Baob32d56e2015-09-09 11:55:01 -0700679 total += self.tgt.extended.size()
Tao Baoe9b61912015-07-09 17:37:49 -0700680
681 # We erase all the blocks on the partition that a) don't contain useful
Tao Bao66f1fa62016-05-03 10:02:01 -0700682 # data in the new image; b) will not be touched by dm-verity. Out of those
683 # blocks, we erase the ones that won't be used in this update at the
684 # beginning of an update. The rest would be erased at the end. This is to
685 # work around the eMMC issue observed on some devices, which may otherwise
686 # get starving for clean blocks and thus fail the update. (b/28347095)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700687 all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
Tao Baoe9b61912015-07-09 17:37:49 -0700688 all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
689 new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
Tao Bao66f1fa62016-05-03 10:02:01 -0700690
691 erase_first = new_dontcare.subtract(self.touched_src_ranges)
692 if erase_first:
693 out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))
694
695 erase_last = new_dontcare.subtract(erase_first)
696 if erase_last:
697 out.append("erase %s\n" % (erase_last.to_string_raw(),))
Doug Zongkere985f6f2014-09-09 12:38:47 -0700698
699 out.insert(0, "%d\n" % (self.version,)) # format version number
Tao Baob32d56e2015-09-09 11:55:01 -0700700 out.insert(1, "%d\n" % (total,))
Tao Bao8fad03e2017-03-01 14:36:26 -0800701 # v3+: the number of stash slots is unused.
702 out.insert(2, "0\n")
703 out.insert(3, str(max_stashed_blocks) + "\n")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700704
705 with open(prefix + ".transfer.list", "wb") as f:
706 for i in out:
707 f.write(i)
708
Tao Bao8fad03e2017-03-01 14:36:26 -0800709 self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize
710 OPTIONS = common.OPTIONS
711 if OPTIONS.cache_size is not None:
712 max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
Tao Bao32fcdab2018-10-12 10:30:39 -0700713 logger.info(
714 "max stashed blocks: %d (%d bytes), limit: %d bytes (%.2f%%)\n",
715 max_stashed_blocks, self._max_stashed_size, max_allowed,
716 self._max_stashed_size * 100.0 / max_allowed)
Tao Bao8fad03e2017-03-01 14:36:26 -0800717 else:
Tao Bao32fcdab2018-10-12 10:30:39 -0700718 logger.info(
719 "max stashed blocks: %d (%d bytes), limit: <unknown>\n",
720 max_stashed_blocks, self._max_stashed_size)
Doug Zongker62338182014-09-08 08:29:55 -0700721
xunchang3df4d5e2018-12-06 15:03:45 -0800722 def ReviseStashSize(self, ignore_stash_limit=False):
723 """ Revises the transfers to keep the stash size within the size limit.
724
725 Iterates through the transfer list and calculates the stash size each
726 transfer generates. Converts the affected transfers to new if we reach the
727 stash limit.
728
729 Args:
730 ignore_stash_limit: Ignores the stash limit and calculates the max
731 simultaneous stashed blocks instead. No change will be made to the
732 transfer list with this flag.
733
734 Return:
735 A tuple of (tgt blocks converted to new, max stashed blocks)
736 """
Tao Bao32fcdab2018-10-12 10:30:39 -0700737 logger.info("Revising stash size...")
Tao Baoe27acfd2016-12-16 11:13:55 -0800738 stash_map = {}
Tao Bao82c47982015-08-17 09:45:13 -0700739
740 # Create the map between a stash and its def/use points. For example, for a
Tao Bao3c5a16d2017-02-13 11:42:50 -0800741 # given stash of (raw_id, sr), stash_map[raw_id] = (sr, def_cmd, use_cmd).
Tao Bao82c47982015-08-17 09:45:13 -0700742 for xf in self.transfers:
743 # Command xf defines (stores) all the stashes in stash_before.
Tao Bao3a2e3502016-12-28 09:14:39 -0800744 for stash_raw_id, sr in xf.stash_before:
745 stash_map[stash_raw_id] = (sr, xf)
Tao Bao82c47982015-08-17 09:45:13 -0700746
747 # Record all the stashes command xf uses.
Tao Bao3a2e3502016-12-28 09:14:39 -0800748 for stash_raw_id, _ in xf.use_stash:
749 stash_map[stash_raw_id] += (xf,)
Tao Bao82c47982015-08-17 09:45:13 -0700750
xunchang3df4d5e2018-12-06 15:03:45 -0800751 max_allowed_blocks = None
752 if not ignore_stash_limit:
753 # Compute the maximum blocks available for stash based on /cache size and
754 # the threshold.
755 cache_size = common.OPTIONS.cache_size
756 stash_threshold = common.OPTIONS.stash_threshold
757 max_allowed_blocks = cache_size * stash_threshold / self.tgt.blocksize
Tao Bao82c47982015-08-17 09:45:13 -0700758
Tao Bao3a2e3502016-12-28 09:14:39 -0800759 # See the comments for 'stashes' in WriteTransfers().
Tao Baoe27acfd2016-12-16 11:13:55 -0800760 stashes = {}
Tao Bao82c47982015-08-17 09:45:13 -0700761 stashed_blocks = 0
Tao Bao9a5caf22015-08-25 15:10:10 -0700762 new_blocks = 0
xunchang3df4d5e2018-12-06 15:03:45 -0800763 max_stashed_blocks = 0
Tao Bao82c47982015-08-17 09:45:13 -0700764
765 # Now go through all the commands. Compute the required stash size on the
766 # fly. If a command requires excess stash than available, it deletes the
767 # stash by replacing the command that uses the stash with a "new" command
768 # instead.
769 for xf in self.transfers:
770 replaced_cmds = []
771
772 # xf.stash_before generates explicit stash commands.
Tao Bao3a2e3502016-12-28 09:14:39 -0800773 for stash_raw_id, sr in xf.stash_before:
Tao Baoe27acfd2016-12-16 11:13:55 -0800774 # Check the post-command stashed_blocks.
775 stashed_blocks_after = stashed_blocks
Tao Bao8fad03e2017-03-01 14:36:26 -0800776 sh = self.src.RangeSha1(sr)
777 if sh not in stashes:
Tao Baoe27acfd2016-12-16 11:13:55 -0800778 stashed_blocks_after += sr.size()
Tao Baoe27acfd2016-12-16 11:13:55 -0800779
xunchang3df4d5e2018-12-06 15:03:45 -0800780 if max_allowed_blocks and stashed_blocks_after > max_allowed_blocks:
Tao Bao82c47982015-08-17 09:45:13 -0700781 # We cannot stash this one for a later command. Find out the command
782 # that will use this stash and replace the command with "new".
Tao Bao3a2e3502016-12-28 09:14:39 -0800783 use_cmd = stash_map[stash_raw_id][2]
Tao Bao82c47982015-08-17 09:45:13 -0700784 replaced_cmds.append(use_cmd)
Tao Bao32fcdab2018-10-12 10:30:39 -0700785 logger.info("%10d %9s %s", sr.size(), "explicit", use_cmd)
Tao Bao82c47982015-08-17 09:45:13 -0700786 else:
Tao Bao3c5a16d2017-02-13 11:42:50 -0800787 # Update the stashes map.
Tao Bao8fad03e2017-03-01 14:36:26 -0800788 if sh in stashes:
789 stashes[sh] += 1
Tao Bao3c5a16d2017-02-13 11:42:50 -0800790 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800791 stashes[sh] = 1
Tao Baoe27acfd2016-12-16 11:13:55 -0800792 stashed_blocks = stashed_blocks_after
xunchang3df4d5e2018-12-06 15:03:45 -0800793 max_stashed_blocks = max(max_stashed_blocks, stashed_blocks)
Tao Bao82c47982015-08-17 09:45:13 -0700794
795 # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
796 # ComputePatches(), they both have the style of "diff".
Tao Bao8fad03e2017-03-01 14:36:26 -0800797 if xf.style == "diff":
Tao Bao82c47982015-08-17 09:45:13 -0700798 assert xf.tgt_ranges and xf.src_ranges
799 if xf.src_ranges.overlaps(xf.tgt_ranges):
xunchang3df4d5e2018-12-06 15:03:45 -0800800 if (max_allowed_blocks and
801 stashed_blocks + xf.src_ranges.size() > max_allowed_blocks):
Tao Bao82c47982015-08-17 09:45:13 -0700802 replaced_cmds.append(xf)
Tao Bao32fcdab2018-10-12 10:30:39 -0700803 logger.info("%10d %9s %s", xf.src_ranges.size(), "implicit", xf)
xunchang3df4d5e2018-12-06 15:03:45 -0800804 else:
805 # The whole source ranges will be stashed for implicit stashes.
806 max_stashed_blocks = max(max_stashed_blocks,
807 stashed_blocks + xf.src_ranges.size())
Tao Bao82c47982015-08-17 09:45:13 -0700808
809 # Replace the commands in replaced_cmds with "new"s.
810 for cmd in replaced_cmds:
811 # It no longer uses any commands in "use_stash". Remove the def points
812 # for all those stashes.
Tao Bao3a2e3502016-12-28 09:14:39 -0800813 for stash_raw_id, sr in cmd.use_stash:
814 def_cmd = stash_map[stash_raw_id][1]
815 assert (stash_raw_id, sr) in def_cmd.stash_before
816 def_cmd.stash_before.remove((stash_raw_id, sr))
Tao Bao82c47982015-08-17 09:45:13 -0700817
Tianjie Xuebe39a02016-01-14 14:12:26 -0800818 # Add up blocks that violates space limit and print total number to
819 # screen later.
820 new_blocks += cmd.tgt_ranges.size()
Tao Bao82c47982015-08-17 09:45:13 -0700821 cmd.ConvertToNew()
822
Tao Bao3a2e3502016-12-28 09:14:39 -0800823 # xf.use_stash may generate free commands.
Tao Bao8fad03e2017-03-01 14:36:26 -0800824 for _, sr in xf.use_stash:
825 sh = self.src.RangeSha1(sr)
826 assert sh in stashes
827 stashes[sh] -= 1
828 if stashes[sh] == 0:
Tao Baoe27acfd2016-12-16 11:13:55 -0800829 stashed_blocks -= sr.size()
Tao Bao8fad03e2017-03-01 14:36:26 -0800830 stashes.pop(sh)
Tao Baoe27acfd2016-12-16 11:13:55 -0800831
Tianjie Xuebe39a02016-01-14 14:12:26 -0800832 num_of_bytes = new_blocks * self.tgt.blocksize
Tao Bao32fcdab2018-10-12 10:30:39 -0700833 logger.info(
834 " Total %d blocks (%d bytes) are packed as new blocks due to "
xunchangb6105dc2018-12-06 16:39:46 -0800835 "insufficient cache size. Maximum blocks stashed simultaneously: %d",
836 new_blocks, num_of_bytes, max_stashed_blocks)
xunchang3df4d5e2018-12-06 15:03:45 -0800837 return new_blocks, max_stashed_blocks
Tao Bao9a5caf22015-08-25 15:10:10 -0700838
Doug Zongkerfc44a512014-08-26 13:10:25 -0700839 def ComputePatches(self, prefix):
Tao Bao32fcdab2018-10-12 10:30:39 -0700840 logger.info("Reticulating splines...")
Tao Bao183e56e2017-03-05 17:05:09 -0800841 diff_queue = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700842 patch_num = 0
843 with open(prefix + ".new.dat", "wb") as new_f:
Tao Bao183e56e2017-03-05 17:05:09 -0800844 for index, xf in enumerate(self.transfers):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700845 if xf.style == "zero":
Tao Bao08c85832016-09-19 22:26:30 -0700846 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao32fcdab2018-10-12 10:30:39 -0700847 logger.info(
848 "%10d %10d (%6.2f%%) %7s %s %s", tgt_size, tgt_size, 100.0,
849 xf.style, xf.tgt_name, str(xf.tgt_ranges))
Tao Bao08c85832016-09-19 22:26:30 -0700850
Doug Zongkerfc44a512014-08-26 13:10:25 -0700851 elif xf.style == "new":
Tao Bao183e56e2017-03-05 17:05:09 -0800852 self.tgt.WriteRangeDataToFd(xf.tgt_ranges, new_f)
Tao Bao08c85832016-09-19 22:26:30 -0700853 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao32fcdab2018-10-12 10:30:39 -0700854 logger.info(
855 "%10d %10d (%6.2f%%) %7s %s %s", tgt_size, tgt_size, 100.0,
856 xf.style, xf.tgt_name, str(xf.tgt_ranges))
Tao Bao08c85832016-09-19 22:26:30 -0700857
Doug Zongkerfc44a512014-08-26 13:10:25 -0700858 elif xf.style == "diff":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700859 # We can't compare src and tgt directly because they may have
860 # the same content but be broken up into blocks differently, eg:
861 #
862 # ["he", "llo"] vs ["h", "ello"]
863 #
864 # We want those to compare equal, ideally without having to
865 # actually concatenate the strings (these may be tens of
866 # megabytes).
Tao Bao183e56e2017-03-05 17:05:09 -0800867 if xf.src_sha1 == xf.tgt_sha1:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700868 # These are identical; we don't need to generate a patch,
869 # just issue copy commands on the device.
870 xf.style = "move"
xunchang21531832018-12-06 14:20:05 -0800871 xf.patch_info = None
Tao Bao183e56e2017-03-05 17:05:09 -0800872 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao08c85832016-09-19 22:26:30 -0700873 if xf.src_ranges != xf.tgt_ranges:
Tao Bao32fcdab2018-10-12 10:30:39 -0700874 logger.info(
875 "%10d %10d (%6.2f%%) %7s %s %s (from %s)", tgt_size, tgt_size,
876 100.0, xf.style,
Tao Bao08c85832016-09-19 22:26:30 -0700877 xf.tgt_name if xf.tgt_name == xf.src_name else (
878 xf.tgt_name + " (from " + xf.src_name + ")"),
Tao Bao32fcdab2018-10-12 10:30:39 -0700879 str(xf.tgt_ranges), str(xf.src_ranges))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700880 else:
xunchang21531832018-12-06 14:20:05 -0800881 if xf.patch_info:
882 # We have already generated the patch (e.g. during split of large
883 # APKs or reduction of stash size)
884 imgdiff = xf.patch_info.imgdiff
Tianjie Xu25366072017-09-08 17:19:02 -0700885 else:
Tao Baocb73aed2018-01-31 17:32:40 -0800886 imgdiff = self.CanUseImgdiff(
887 xf.tgt_name, xf.tgt_ranges, xf.src_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700888 xf.style = "imgdiff" if imgdiff else "bsdiff"
Tao Bao183e56e2017-03-05 17:05:09 -0800889 diff_queue.append((index, imgdiff, patch_num))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700890 patch_num += 1
891
892 else:
893 assert False, "unknown style " + xf.style
894
xunchang21531832018-12-06 14:20:05 -0800895 patches = self.ComputePatchesForInputList(diff_queue, False)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700896
Tao Bao183e56e2017-03-05 17:05:09 -0800897 offset = 0
898 with open(prefix + ".patch.dat", "wb") as patch_fd:
xunchang21531832018-12-06 14:20:05 -0800899 for index, patch_info, _ in patches:
Tao Bao183e56e2017-03-05 17:05:09 -0800900 xf = self.transfers[index]
xunchang21531832018-12-06 14:20:05 -0800901 xf.patch_len = len(patch_info.content)
Tao Bao183e56e2017-03-05 17:05:09 -0800902 xf.patch_start = offset
903 offset += xf.patch_len
xunchang21531832018-12-06 14:20:05 -0800904 patch_fd.write(patch_info.content)
Tao Bao183e56e2017-03-05 17:05:09 -0800905
Tao Bao32fcdab2018-10-12 10:30:39 -0700906 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
907 logger.info(
908 "%10d %10d (%6.2f%%) %7s %s %s %s", xf.patch_len, tgt_size,
909 xf.patch_len * 100.0 / tgt_size, xf.style,
910 xf.tgt_name if xf.tgt_name == xf.src_name else (
911 xf.tgt_name + " (from " + xf.src_name + ")"),
912 xf.tgt_ranges, xf.src_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700913
Tianjie Xu8a7ed9f2018-01-23 14:06:11 -0800914 def AssertSha1Good(self):
915 """Check the SHA-1 of the src & tgt blocks in the transfer list.
916
917 Double check the SHA-1 value to avoid the issue in b/71908713, where
918 SparseImage.RangeSha1() messed up with the hash calculation in multi-thread
919 environment. That specific problem has been fixed by protecting the
920 underlying generator function 'SparseImage._GetRangeData()' with lock.
921 """
922 for xf in self.transfers:
923 tgt_sha1 = self.tgt.RangeSha1(xf.tgt_ranges)
924 assert xf.tgt_sha1 == tgt_sha1
925 if xf.style == "diff":
926 src_sha1 = self.src.RangeSha1(xf.src_ranges)
927 assert xf.src_sha1 == src_sha1
928
Doug Zongkerfc44a512014-08-26 13:10:25 -0700929 def AssertSequenceGood(self):
930 # Simulate the sequences of transfers we will output, and check that:
931 # - we never read a block after writing it, and
932 # - we write every block we care about exactly once.
933
934 # Start with no blocks having been touched yet.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800935 touched = array.array("B", "\0" * self.tgt.total_blocks)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700936
937 # Imagine processing the transfers in order.
938 for xf in self.transfers:
939 # Check that the input blocks for this transfer haven't yet been touched.
Doug Zongker62338182014-09-08 08:29:55 -0700940
941 x = xf.src_ranges
Tao Bao8fad03e2017-03-01 14:36:26 -0800942 for _, sr in xf.use_stash:
943 x = x.subtract(sr)
Doug Zongker62338182014-09-08 08:29:55 -0700944
Doug Zongker6ab2a502016-02-09 08:28:09 -0800945 for s, e in x:
Tao Baoff75c232016-03-04 15:23:34 -0800946 # Source image could be larger. Don't check the blocks that are in the
947 # source image only. Since they are not in 'touched', and won't ever
948 # be touched.
949 for i in range(s, min(e, self.tgt.total_blocks)):
Doug Zongker6ab2a502016-02-09 08:28:09 -0800950 assert touched[i] == 0
951
952 # Check that the output blocks for this transfer haven't yet
953 # been touched, and touch all the blocks written by this
954 # transfer.
955 for s, e in xf.tgt_ranges:
956 for i in range(s, e):
957 assert touched[i] == 0
958 touched[i] = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700959
Tianjie Xu67c7cbb2018-08-30 00:32:07 -0700960 if self.tgt.hashtree_info:
961 for s, e in self.tgt.hashtree_info.hashtree_range:
962 for i in range(s, e):
963 assert touched[i] == 0
964 touched[i] = 1
965
Doug Zongkerfc44a512014-08-26 13:10:25 -0700966 # Check that we've written every target block.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800967 for s, e in self.tgt.care_map:
968 for i in range(s, e):
969 assert touched[i] == 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700970
xunchang21531832018-12-06 14:20:05 -0800971 def FindSequenceForTransfers(self):
972 """Finds a sequence for the given transfers.
973
974 The goal is to minimize the violation of order dependencies between these
975 transfers, so that fewer blocks are stashed when applying the update.
976 """
977
978 # Clear the existing dependency between transfers
979 for xf in self.transfers:
980 xf.goes_before = OrderedDict()
981 xf.goes_after = OrderedDict()
982
983 xf.stash_before = []
984 xf.use_stash = []
985
986 # Find the ordering dependencies among transfers (this is O(n^2)
987 # in the number of transfers).
988 self.GenerateDigraph()
989 # Find a sequence of transfers that satisfies as many ordering
990 # dependencies as possible (heuristically).
991 self.FindVertexSequence()
992 # Fix up the ordering dependencies that the sequence didn't
993 # satisfy.
994 self.ReverseBackwardEdges()
995 self.ImproveVertexSequence()
996
Doug Zongker62338182014-09-08 08:29:55 -0700997 def ImproveVertexSequence(self):
Tao Bao32fcdab2018-10-12 10:30:39 -0700998 logger.info("Improving vertex order...")
Doug Zongker62338182014-09-08 08:29:55 -0700999
1000 # At this point our digraph is acyclic; we reversed any edges that
1001 # were backwards in the heuristically-generated sequence. The
1002 # previously-generated order is still acceptable, but we hope to
1003 # find a better order that needs less memory for stashed data.
1004 # Now we do a topological sort to generate a new vertex order,
1005 # using a greedy algorithm to choose which vertex goes next
1006 # whenever we have a choice.
1007
1008 # Make a copy of the edge set; this copy will get destroyed by the
1009 # algorithm.
1010 for xf in self.transfers:
1011 xf.incoming = xf.goes_after.copy()
1012 xf.outgoing = xf.goes_before.copy()
1013
1014 L = [] # the new vertex order
1015
1016 # S is the set of sources in the remaining graph; we always choose
1017 # the one that leaves the least amount of stashed data after it's
1018 # executed.
1019 S = [(u.NetStashChange(), u.order, u) for u in self.transfers
1020 if not u.incoming]
1021 heapq.heapify(S)
1022
1023 while S:
1024 _, _, xf = heapq.heappop(S)
1025 L.append(xf)
1026 for u in xf.outgoing:
1027 del u.incoming[xf]
1028 if not u.incoming:
1029 heapq.heappush(S, (u.NetStashChange(), u.order, u))
1030
1031 # if this fails then our graph had a cycle.
1032 assert len(L) == len(self.transfers)
1033
1034 self.transfers = L
1035 for i, xf in enumerate(L):
1036 xf.order = i
1037
Doug Zongker62338182014-09-08 08:29:55 -07001038 def ReverseBackwardEdges(self):
Tao Bao3a2e3502016-12-28 09:14:39 -08001039 """Reverse unsatisfying edges and compute pairs of stashed blocks.
1040
1041 For each transfer, make sure it properly stashes the blocks it touches and
1042 will be used by later transfers. It uses pairs of (stash_raw_id, range) to
1043 record the blocks to be stashed. 'stash_raw_id' is an id that uniquely
1044 identifies each pair. Note that for the same range (e.g. RangeSet("1-5")),
1045 it is possible to have multiple pairs with different 'stash_raw_id's. Each
1046 'stash_raw_id' will be consumed by one transfer. In BBOTA v3+, identical
1047 blocks will be written to the same stash slot in WriteTransfers().
1048 """
1049
Tao Bao32fcdab2018-10-12 10:30:39 -07001050 logger.info("Reversing backward edges...")
Doug Zongker62338182014-09-08 08:29:55 -07001051 in_order = 0
1052 out_of_order = 0
Tao Bao3a2e3502016-12-28 09:14:39 -08001053 stash_raw_id = 0
Doug Zongker62338182014-09-08 08:29:55 -07001054 stash_size = 0
1055
1056 for xf in self.transfers:
Doug Zongker62338182014-09-08 08:29:55 -07001057 for u in xf.goes_before.copy():
1058 # xf should go before u
1059 if xf.order < u.order:
1060 # it does, hurray!
1061 in_order += 1
1062 else:
1063 # it doesn't, boo. modify u to stash the blocks that it
1064 # writes that xf wants to read, and then require u to go
1065 # before xf.
1066 out_of_order += 1
1067
1068 overlap = xf.src_ranges.intersect(u.tgt_ranges)
1069 assert overlap
1070
Tao Bao3a2e3502016-12-28 09:14:39 -08001071 u.stash_before.append((stash_raw_id, overlap))
1072 xf.use_stash.append((stash_raw_id, overlap))
1073 stash_raw_id += 1
Doug Zongker62338182014-09-08 08:29:55 -07001074 stash_size += overlap.size()
1075
1076 # reverse the edge direction; now xf must go after u
1077 del xf.goes_before[u]
1078 del u.goes_after[xf]
1079 xf.goes_after[u] = None # value doesn't matter
1080 u.goes_before[xf] = None
1081
Tao Bao32fcdab2018-10-12 10:30:39 -07001082 logger.info(
1083 " %d/%d dependencies (%.2f%%) were violated; %d source blocks "
1084 "stashed.", out_of_order, in_order + out_of_order,
1085 (out_of_order * 100.0 / (in_order + out_of_order)) if (
1086 in_order + out_of_order) else 0.0,
1087 stash_size)
Doug Zongker62338182014-09-08 08:29:55 -07001088
Doug Zongkerfc44a512014-08-26 13:10:25 -07001089 def FindVertexSequence(self):
Tao Bao32fcdab2018-10-12 10:30:39 -07001090 logger.info("Finding vertex sequence...")
Doug Zongkerfc44a512014-08-26 13:10:25 -07001091
1092 # This is based on "A Fast & Effective Heuristic for the Feedback
1093 # Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
1094 # it as starting with the digraph G and moving all the vertices to
1095 # be on a horizontal line in some order, trying to minimize the
1096 # number of edges that end up pointing to the left. Left-pointing
1097 # edges will get removed to turn the digraph into a DAG. In this
1098 # case each edge has a weight which is the number of source blocks
1099 # we'll lose if that edge is removed; we try to minimize the total
1100 # weight rather than just the number of edges.
1101
1102 # Make a copy of the edge set; this copy will get destroyed by the
1103 # algorithm.
1104 for xf in self.transfers:
1105 xf.incoming = xf.goes_after.copy()
1106 xf.outgoing = xf.goes_before.copy()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001107 xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())
Doug Zongkerfc44a512014-08-26 13:10:25 -07001108
1109 # We use an OrderedDict instead of just a set so that the output
1110 # is repeatable; otherwise it would depend on the hash values of
1111 # the transfer objects.
1112 G = OrderedDict()
1113 for xf in self.transfers:
1114 G[xf] = None
1115 s1 = deque() # the left side of the sequence, built from left to right
1116 s2 = deque() # the right side of the sequence, built from right to left
1117
Doug Zongker6ab2a502016-02-09 08:28:09 -08001118 heap = []
1119 for xf in self.transfers:
1120 xf.heap_item = HeapItem(xf)
1121 heap.append(xf.heap_item)
1122 heapq.heapify(heap)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001123
Tao Bao33482282016-10-24 16:49:08 -07001124 # Use OrderedDict() instead of set() to preserve the insertion order. Need
1125 # to use 'sinks[key] = None' to add key into the set. sinks will look like
1126 # { key1: None, key2: None, ... }.
1127 sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing)
1128 sources = OrderedDict.fromkeys(u for u in G if not u.incoming)
Doug Zongker6ab2a502016-02-09 08:28:09 -08001129
1130 def adjust_score(iu, delta):
1131 iu.score += delta
1132 iu.heap_item.clear()
1133 iu.heap_item = HeapItem(iu)
1134 heapq.heappush(heap, iu.heap_item)
1135
1136 while G:
Doug Zongkerfc44a512014-08-26 13:10:25 -07001137 # Put all sinks at the end of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001138 while sinks:
Tao Bao33482282016-10-24 16:49:08 -07001139 new_sinks = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001140 for u in sinks:
Tao Bao508b0872018-02-09 13:44:43 -08001141 if u not in G:
1142 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001143 s2.appendleft(u)
1144 del G[u]
1145 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001146 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001147 if not iu.outgoing:
1148 new_sinks[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001149 sinks = new_sinks
Doug Zongkerfc44a512014-08-26 13:10:25 -07001150
1151 # Put all the sources at the beginning of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001152 while sources:
Tao Bao33482282016-10-24 16:49:08 -07001153 new_sources = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001154 for u in sources:
Tao Bao508b0872018-02-09 13:44:43 -08001155 if u not in G:
1156 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001157 s1.append(u)
1158 del G[u]
1159 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001160 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001161 if not iu.incoming:
1162 new_sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001163 sources = new_sources
Doug Zongkerfc44a512014-08-26 13:10:25 -07001164
Tao Bao508b0872018-02-09 13:44:43 -08001165 if not G:
1166 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001167
1168 # Find the "best" vertex to put next. "Best" is the one that
1169 # maximizes the net difference in source blocks saved we get by
1170 # pretending it's a source rather than a sink.
1171
Doug Zongker6ab2a502016-02-09 08:28:09 -08001172 while True:
1173 u = heapq.heappop(heap)
1174 if u and u.item in G:
1175 u = u.item
1176 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001177
Doug Zongkerfc44a512014-08-26 13:10:25 -07001178 s1.append(u)
1179 del G[u]
1180 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001181 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001182 if not iu.incoming:
1183 sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001184
Doug Zongkerfc44a512014-08-26 13:10:25 -07001185 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001186 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001187 if not iu.outgoing:
1188 sinks[iu] = None
Doug Zongkerfc44a512014-08-26 13:10:25 -07001189
1190 # Now record the sequence in the 'order' field of each transfer,
1191 # and by rearranging self.transfers to be in the chosen sequence.
1192
1193 new_transfers = []
1194 for x in itertools.chain(s1, s2):
1195 x.order = len(new_transfers)
1196 new_transfers.append(x)
1197 del x.incoming
1198 del x.outgoing
1199
1200 self.transfers = new_transfers
1201
1202 def GenerateDigraph(self):
Tao Bao32fcdab2018-10-12 10:30:39 -07001203 logger.info("Generating digraph...")
Doug Zongker6ab2a502016-02-09 08:28:09 -08001204
1205 # Each item of source_ranges will be:
1206 # - None, if that block is not used as a source,
Tao Bao33482282016-10-24 16:49:08 -07001207 # - an ordered set of transfers.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001208 source_ranges = []
1209 for b in self.transfers:
1210 for s, e in b.src_ranges:
1211 if e > len(source_ranges):
1212 source_ranges.extend([None] * (e-len(source_ranges)))
1213 for i in range(s, e):
1214 if source_ranges[i] is None:
Tao Bao33482282016-10-24 16:49:08 -07001215 source_ranges[i] = OrderedDict.fromkeys([b])
Doug Zongker6ab2a502016-02-09 08:28:09 -08001216 else:
Tao Bao33482282016-10-24 16:49:08 -07001217 source_ranges[i][b] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001218
Doug Zongkerfc44a512014-08-26 13:10:25 -07001219 for a in self.transfers:
Tao Bao33482282016-10-24 16:49:08 -07001220 intersections = OrderedDict()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001221 for s, e in a.tgt_ranges:
1222 for i in range(s, e):
Tao Bao508b0872018-02-09 13:44:43 -08001223 if i >= len(source_ranges):
1224 break
Tao Bao33482282016-10-24 16:49:08 -07001225 # Add all the Transfers in source_ranges[i] to the (ordered) set.
1226 if source_ranges[i] is not None:
1227 for j in source_ranges[i]:
1228 intersections[j] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001229
1230 for b in intersections:
Tao Bao508b0872018-02-09 13:44:43 -08001231 if a is b:
1232 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001233
1234 # If the blocks written by A are read by B, then B needs to go before A.
1235 i = a.tgt_ranges.intersect(b.src_ranges)
1236 if i:
Doug Zongkerab7ca1d2014-08-26 10:40:28 -07001237 if b.src_name == "__ZERO":
1238 # the cost of removing source blocks for the __ZERO domain
1239 # is (nearly) zero.
1240 size = 0
1241 else:
1242 size = i.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001243 b.goes_before[a] = size
1244 a.goes_after[b] = size
1245
xunchang21531832018-12-06 14:20:05 -08001246 def ComputePatchesForInputList(self, diff_queue, compress_target):
1247 """Returns a list of patch information for the input list of transfers.
1248
1249 Args:
1250 diff_queue: a list of transfers with style 'diff'
1251 compress_target: If True, compresses the target ranges of each
1252 transfers; and save the size.
1253
1254 Returns:
1255 A list of (transfer order, patch_info, compressed_size) tuples.
1256 """
1257
1258 if not diff_queue:
1259 return []
1260
1261 if self.threads > 1:
1262 logger.info("Computing patches (using %d threads)...", self.threads)
1263 else:
1264 logger.info("Computing patches...")
1265
1266 diff_total = len(diff_queue)
1267 patches = [None] * diff_total
1268 error_messages = []
1269
1270 # Using multiprocessing doesn't give additional benefits, due to the
1271 # pattern of the code. The diffing work is done by subprocess.call, which
1272 # already runs in a separate process (not affected much by the GIL -
1273 # Global Interpreter Lock). Using multiprocess also requires either a)
1274 # writing the diff input files in the main process before forking, or b)
1275 # reopening the image file (SparseImage) in the worker processes. Doing
1276 # neither of them further improves the performance.
1277 lock = threading.Lock()
1278
1279 def diff_worker():
1280 while True:
1281 with lock:
1282 if not diff_queue:
1283 return
1284 xf_index, imgdiff, patch_index = diff_queue.pop()
1285 xf = self.transfers[xf_index]
1286
1287 message = []
1288 compressed_size = None
1289
1290 patch_info = xf.patch_info
1291 if not patch_info:
1292 src_file = common.MakeTempFile(prefix="src-")
1293 with open(src_file, "wb") as fd:
1294 self.src.WriteRangeDataToFd(xf.src_ranges, fd)
1295
1296 tgt_file = common.MakeTempFile(prefix="tgt-")
1297 with open(tgt_file, "wb") as fd:
1298 self.tgt.WriteRangeDataToFd(xf.tgt_ranges, fd)
1299
1300 try:
1301 patch_info = compute_patch(src_file, tgt_file, imgdiff)
1302 except ValueError as e:
1303 message.append(
1304 "Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (
1305 "imgdiff" if imgdiff else "bsdiff",
1306 xf.tgt_name if xf.tgt_name == xf.src_name else
1307 xf.tgt_name + " (from " + xf.src_name + ")",
1308 xf.tgt_ranges, xf.src_ranges, e.message))
1309
1310 if compress_target:
1311 tgt_data = self.tgt.ReadRangeSet(xf.tgt_ranges)
1312 try:
1313 # Compresses with the default level
1314 compress_obj = zlib.compressobj(6, zlib.DEFLATED, -zlib.MAX_WBITS)
1315 compressed_data = (compress_obj.compress("".join(tgt_data))
1316 + compress_obj.flush())
1317 compressed_size = len(compressed_data)
1318 except zlib.error as e:
1319 message.append(
1320 "Failed to compress the data in target range {} for {}:\n"
1321 "{}".format(xf.tgt_ranges, xf.tgt_name, e.message))
1322
1323 if message:
1324 with lock:
1325 error_messages.extend(message)
1326
1327 with lock:
1328 patches[patch_index] = (xf_index, patch_info, compressed_size)
1329
1330 threads = [threading.Thread(target=diff_worker)
1331 for _ in range(self.threads)]
1332 for th in threads:
1333 th.start()
1334 while threads:
1335 threads.pop().join()
1336
1337 if error_messages:
1338 logger.error('ERROR:')
1339 logger.error('\n'.join(error_messages))
1340 logger.error('\n\n\n')
1341 sys.exit(1)
1342
1343 return patches
1344
xunchangb6105dc2018-12-06 16:39:46 -08001345 def SelectAndConvertDiffTransfersToNew(self, violated_stash_blocks):
xunchang3df4d5e2018-12-06 15:03:45 -08001346 """Converts the diff transfers to reduce the max simultaneous stash.
1347
1348 Since the 'new' data is compressed with deflate, we can select the 'diff'
1349 transfers for conversion by comparing its patch size with the size of the
1350 compressed data. Ideally, we want to convert the transfers with a small
1351 size increase, but using a large number of stashed blocks.
1352 """
xunchangb6105dc2018-12-06 16:39:46 -08001353 TransferSizeScore = namedtuple("TransferSizeScore",
1354 "xf, used_stash_blocks, score")
xunchang3df4d5e2018-12-06 15:03:45 -08001355
1356 logger.info("Selecting diff commands to convert to new.")
1357 diff_queue = []
1358 for xf in self.transfers:
1359 if xf.style == "diff" and xf.src_sha1 != xf.tgt_sha1:
1360 use_imgdiff = self.CanUseImgdiff(xf.tgt_name, xf.tgt_ranges,
1361 xf.src_ranges)
1362 diff_queue.append((xf.order, use_imgdiff, len(diff_queue)))
1363
1364 # Remove the 'move' transfers, and compute the patch & compressed size
1365 # for the remaining.
1366 result = self.ComputePatchesForInputList(diff_queue, True)
1367
xunchangb6105dc2018-12-06 16:39:46 -08001368 conversion_candidates = []
xunchang3df4d5e2018-12-06 15:03:45 -08001369 for xf_index, patch_info, compressed_size in result:
1370 xf = self.transfers[xf_index]
1371 if not xf.patch_info:
1372 xf.patch_info = patch_info
1373
1374 size_ratio = len(xf.patch_info.content) * 100.0 / compressed_size
1375 diff_style = "imgdiff" if xf.patch_info.imgdiff else "bsdiff"
xunchangb6105dc2018-12-06 16:39:46 -08001376 logger.info("%s, target size: %d blocks, style: %s, patch size: %d,"
xunchang3df4d5e2018-12-06 15:03:45 -08001377 " compression_size: %d, ratio %.2f%%", xf.tgt_name,
1378 xf.tgt_ranges.size(), diff_style,
1379 len(xf.patch_info.content), compressed_size, size_ratio)
1380
xunchangb6105dc2018-12-06 16:39:46 -08001381 used_stash_blocks = sum(sr.size() for _, sr in xf.use_stash)
xunchang3df4d5e2018-12-06 15:03:45 -08001382 # Convert the transfer to new if the compressed size is smaller or equal.
1383 # We don't need to maintain the stash_before lists here because the
1384 # graph will be regenerated later.
1385 if len(xf.patch_info.content) >= compressed_size:
xunchangb6105dc2018-12-06 16:39:46 -08001386 # Add the transfer to the candidate list with negative score. And it
1387 # will be converted later.
1388 conversion_candidates.append(TransferSizeScore(xf, used_stash_blocks,
1389 -1))
1390 elif used_stash_blocks > 0:
1391 # This heuristic represents the size increase in the final package to
1392 # remove per unit of stashed data.
1393 score = ((compressed_size - len(xf.patch_info.content)) * 100.0
1394 / used_stash_blocks)
1395 conversion_candidates.append(TransferSizeScore(xf, used_stash_blocks,
1396 score))
1397 # Transfers with lower score (i.e. less expensive to convert) will be
1398 # converted first.
1399 conversion_candidates.sort(key=lambda x: x.score)
xunchang3df4d5e2018-12-06 15:03:45 -08001400
xunchangb6105dc2018-12-06 16:39:46 -08001401 # TODO(xunchang), improve the logic to find the transfers to convert, e.g.
1402 # convert the ones that contribute to the max stash, run ReviseStashSize
1403 # multiple times etc.
1404 removed_stashed_blocks = 0
1405 for xf, used_stash_blocks, _ in conversion_candidates:
1406 logger.info("Converting %s to new", xf.tgt_name)
1407 xf.ConvertToNew()
1408 removed_stashed_blocks += used_stash_blocks
1409 # Experiments show that we will get a smaller package size if we remove
1410 # slightly more stashed blocks than the violated stash blocks.
1411 if removed_stashed_blocks >= violated_stash_blocks:
1412 break
xunchang3df4d5e2018-12-06 15:03:45 -08001413
1414 logger.info("Removed %d stashed blocks", removed_stashed_blocks)
1415
Doug Zongkerfc44a512014-08-26 13:10:25 -07001416 def FindTransfers(self):
Tao Bao9a5caf22015-08-25 15:10:10 -07001417 """Parse the file_map to generate all the transfers."""
1418
Tianjie Xu41390d42017-11-22 11:35:18 -08001419 def AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
1420 src_ranges, style, by_id):
1421 """Add one or multiple Transfer()s by splitting large files.
1422
1423 For BBOTA v3, we need to stash source blocks for resumable feature.
1424 However, with the growth of file size and the shrink of the cache
1425 partition source blocks are too large to be stashed. If a file occupies
1426 too many blocks, we split it into smaller pieces by getting multiple
1427 Transfer()s.
1428
1429 The downside is that after splitting, we may increase the package size
1430 since the split pieces don't align well. According to our experiments,
1431 1/8 of the cache size as the per-piece limit appears to be optimal.
1432 Compared to the fixed 1024-block limit, it reduces the overall package
1433 size by 30% for volantis, and 20% for angler and bullhead."""
1434
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001435 pieces = 0
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001436 while (tgt_ranges.size() > max_blocks_per_transfer and
1437 src_ranges.size() > max_blocks_per_transfer):
Tao Bao9a5caf22015-08-25 15:10:10 -07001438 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1439 src_split_name = "%s-%d" % (src_name, pieces)
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001440 tgt_first = tgt_ranges.first(max_blocks_per_transfer)
1441 src_first = src_ranges.first(max_blocks_per_transfer)
1442
Tao Bao183e56e2017-03-05 17:05:09 -08001443 Transfer(tgt_split_name, src_split_name, tgt_first, src_first,
1444 self.tgt.RangeSha1(tgt_first), self.src.RangeSha1(src_first),
1445 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001446
1447 tgt_ranges = tgt_ranges.subtract(tgt_first)
1448 src_ranges = src_ranges.subtract(src_first)
1449 pieces += 1
1450
1451 # Handle remaining blocks.
1452 if tgt_ranges.size() or src_ranges.size():
1453 # Must be both non-empty.
1454 assert tgt_ranges.size() and src_ranges.size()
1455 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1456 src_split_name = "%s-%d" % (src_name, pieces)
Tao Bao183e56e2017-03-05 17:05:09 -08001457 Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges,
1458 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1459 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001460
Tianjie Xu41390d42017-11-22 11:35:18 -08001461 def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges, style,
1462 by_id):
1463 """Find all the zip files and split the others with a fixed chunk size.
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001464
Tianjie Xu41390d42017-11-22 11:35:18 -08001465 This function will construct a list of zip archives, which will later be
1466 split by imgdiff to reduce the final patch size. For the other files,
1467 we will plainly split them based on a fixed chunk size with the potential
1468 patch size penalty.
1469 """
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001470
1471 assert style == "diff"
1472
1473 # Change nothing for small files.
1474 if (tgt_ranges.size() <= max_blocks_per_transfer and
1475 src_ranges.size() <= max_blocks_per_transfer):
1476 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1477 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1478 style, by_id)
1479 return
1480
Tao Baocb73aed2018-01-31 17:32:40 -08001481 # Split large APKs with imgdiff, if possible. We're intentionally checking
1482 # file types one more time (CanUseImgdiff() checks that as well), before
1483 # calling the costly RangeSha1()s.
1484 if (self.FileTypeSupportedByImgdiff(tgt_name) and
1485 self.tgt.RangeSha1(tgt_ranges) != self.src.RangeSha1(src_ranges)):
Tao Bao294651d2018-02-08 23:21:52 -08001486 if self.CanUseImgdiff(tgt_name, tgt_ranges, src_ranges, True):
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001487 large_apks.append((tgt_name, src_name, tgt_ranges, src_ranges))
1488 return
1489
Tianjie Xu41390d42017-11-22 11:35:18 -08001490 AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
1491 src_ranges, style, by_id)
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001492
Tao Bao08c85832016-09-19 22:26:30 -07001493 def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
1494 split=False):
1495 """Wrapper function for adding a Transfer()."""
1496
1497 # We specialize diff transfers only (which covers bsdiff/imgdiff/move);
1498 # otherwise add the Transfer() as is.
1499 if style != "diff" or not split:
Tao Bao183e56e2017-03-05 17:05:09 -08001500 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1501 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1502 style, by_id)
Tao Bao08c85832016-09-19 22:26:30 -07001503 return
1504
1505 # Handle .odex files specially to analyze the block-wise difference. If
1506 # most of the blocks are identical with only few changes (e.g. header),
1507 # we will patch the changed blocks only. This avoids stashing unchanged
1508 # blocks while patching. We limit the analysis to files without size
1509 # changes only. This is to avoid sacrificing the OTA generation cost too
1510 # much.
1511 if (tgt_name.split(".")[-1].lower() == 'odex' and
1512 tgt_ranges.size() == src_ranges.size()):
1513
1514 # 0.5 threshold can be further tuned. The tradeoff is: if only very
1515 # few blocks remain identical, we lose the opportunity to use imgdiff
1516 # that may have better compression ratio than bsdiff.
1517 crop_threshold = 0.5
1518
1519 tgt_skipped = RangeSet()
1520 src_skipped = RangeSet()
1521 tgt_size = tgt_ranges.size()
1522 tgt_changed = 0
1523 for src_block, tgt_block in zip(src_ranges.next_item(),
1524 tgt_ranges.next_item()):
1525 src_rs = RangeSet(str(src_block))
1526 tgt_rs = RangeSet(str(tgt_block))
1527 if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):
1528 tgt_skipped = tgt_skipped.union(tgt_rs)
1529 src_skipped = src_skipped.union(src_rs)
1530 else:
1531 tgt_changed += tgt_rs.size()
1532
1533 # Terminate early if no clear sign of benefits.
1534 if tgt_changed > tgt_size * crop_threshold:
1535 break
1536
1537 if tgt_changed < tgt_size * crop_threshold:
1538 assert tgt_changed + tgt_skipped.size() == tgt_size
Tao Bao32fcdab2018-10-12 10:30:39 -07001539 logger.info(
1540 '%10d %10d (%6.2f%%) %s', tgt_skipped.size(), tgt_size,
1541 tgt_skipped.size() * 100.0 / tgt_size, tgt_name)
Tianjie Xu41390d42017-11-22 11:35:18 -08001542 AddSplitTransfers(
Tao Bao08c85832016-09-19 22:26:30 -07001543 "%s-skipped" % (tgt_name,),
1544 "%s-skipped" % (src_name,),
1545 tgt_skipped, src_skipped, style, by_id)
1546
1547 # Intentionally change the file extension to avoid being imgdiff'd as
1548 # the files are no longer in their original format.
1549 tgt_name = "%s-cropped" % (tgt_name,)
1550 src_name = "%s-cropped" % (src_name,)
1551 tgt_ranges = tgt_ranges.subtract(tgt_skipped)
1552 src_ranges = src_ranges.subtract(src_skipped)
1553
1554 # Possibly having no changed blocks.
1555 if not tgt_ranges:
1556 return
1557
1558 # Add the transfer(s).
Tianjie Xu41390d42017-11-22 11:35:18 -08001559 AddSplitTransfers(
Tao Bao08c85832016-09-19 22:26:30 -07001560 tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1561
Tianjie Xu25366072017-09-08 17:19:02 -07001562 def ParseAndValidateSplitInfo(patch_size, tgt_ranges, src_ranges,
1563 split_info):
1564 """Parse the split_info and return a list of info tuples.
1565
1566 Args:
1567 patch_size: total size of the patch file.
1568 tgt_ranges: Ranges of the target file within the original image.
1569 src_ranges: Ranges of the source file within the original image.
1570 split_info format:
1571 imgdiff version#
1572 count of pieces
1573 <patch_size_1> <tgt_size_1> <src_ranges_1>
1574 ...
1575 <patch_size_n> <tgt_size_n> <src_ranges_n>
1576
1577 Returns:
1578 [patch_start, patch_len, split_tgt_ranges, split_src_ranges]
1579 """
1580
1581 version = int(split_info[0])
1582 assert version == 2
1583 count = int(split_info[1])
1584 assert len(split_info) - 2 == count
1585
1586 split_info_list = []
1587 patch_start = 0
1588 tgt_remain = copy.deepcopy(tgt_ranges)
1589 # each line has the format <patch_size>, <tgt_size>, <src_ranges>
1590 for line in split_info[2:]:
1591 info = line.split()
1592 assert len(info) == 3
1593 patch_length = int(info[0])
1594
1595 split_tgt_size = int(info[1])
1596 assert split_tgt_size % 4096 == 0
1597 assert split_tgt_size / 4096 <= tgt_remain.size()
1598 split_tgt_ranges = tgt_remain.first(split_tgt_size / 4096)
1599 tgt_remain = tgt_remain.subtract(split_tgt_ranges)
1600
1601 # Find the split_src_ranges within the image file from its relative
1602 # position in file.
1603 split_src_indices = RangeSet.parse_raw(info[2])
1604 split_src_ranges = RangeSet()
1605 for r in split_src_indices:
1606 curr_range = src_ranges.first(r[1]).subtract(src_ranges.first(r[0]))
1607 assert not split_src_ranges.overlaps(curr_range)
1608 split_src_ranges = split_src_ranges.union(curr_range)
1609
1610 split_info_list.append((patch_start, patch_length,
1611 split_tgt_ranges, split_src_ranges))
1612 patch_start += patch_length
1613
1614 # Check that the sizes of all the split pieces add up to the final file
1615 # size for patch and target.
1616 assert tgt_remain.size() == 0
1617 assert patch_start == patch_size
1618 return split_info_list
1619
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001620 def SplitLargeApks():
1621 """Split the large apks files.
Tianjie Xu25366072017-09-08 17:19:02 -07001622
1623 Example: Chrome.apk will be split into
1624 src-0: Chrome.apk-0, tgt-0: Chrome.apk-0
1625 src-1: Chrome.apk-1, tgt-1: Chrome.apk-1
1626 ...
1627
1628 After the split, the target pieces are continuous and block aligned; and
1629 the source pieces are mutually exclusive. During the split, we also
1630 generate and save the image patch between src-X & tgt-X. This patch will
1631 be valid because the block ranges of src-X & tgt-X will always stay the
1632 same afterwards; but there's a chance we don't use the patch if we
1633 convert the "diff" command into "new" or "move" later.
1634 """
1635
1636 while True:
1637 with transfer_lock:
1638 if not large_apks:
1639 return
1640 tgt_name, src_name, tgt_ranges, src_ranges = large_apks.pop(0)
1641
1642 src_file = common.MakeTempFile(prefix="src-")
1643 tgt_file = common.MakeTempFile(prefix="tgt-")
Tianjie Xudf1166e2018-01-27 17:35:41 -08001644 with open(src_file, "wb") as src_fd:
1645 self.src.WriteRangeDataToFd(src_ranges, src_fd)
1646 with open(tgt_file, "wb") as tgt_fd:
1647 self.tgt.WriteRangeDataToFd(tgt_ranges, tgt_fd)
Tianjie Xu25366072017-09-08 17:19:02 -07001648
1649 patch_file = common.MakeTempFile(prefix="patch-")
1650 patch_info_file = common.MakeTempFile(prefix="split_info-")
1651 cmd = ["imgdiff", "-z",
1652 "--block-limit={}".format(max_blocks_per_transfer),
1653 "--split-info=" + patch_info_file,
1654 src_file, tgt_file, patch_file]
Tao Bao73dd4f42018-10-04 16:25:33 -07001655 proc = common.Run(cmd)
1656 imgdiff_output, _ = proc.communicate()
1657 assert proc.returncode == 0, \
Tao Bao4ccea852018-02-06 15:16:41 -08001658 "Failed to create imgdiff patch between {} and {}:\n{}".format(
1659 src_name, tgt_name, imgdiff_output)
Tianjie Xu25366072017-09-08 17:19:02 -07001660
1661 with open(patch_info_file) as patch_info:
1662 lines = patch_info.readlines()
1663
1664 patch_size_total = os.path.getsize(patch_file)
1665 split_info_list = ParseAndValidateSplitInfo(patch_size_total,
1666 tgt_ranges, src_ranges,
1667 lines)
1668 for index, (patch_start, patch_length, split_tgt_ranges,
Tao Bao508b0872018-02-09 13:44:43 -08001669 split_src_ranges) in enumerate(split_info_list):
Tianjie Xu25366072017-09-08 17:19:02 -07001670 with open(patch_file) as f:
1671 f.seek(patch_start)
1672 patch_content = f.read(patch_length)
1673
1674 split_src_name = "{}-{}".format(src_name, index)
1675 split_tgt_name = "{}-{}".format(tgt_name, index)
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001676 split_large_apks.append((split_tgt_name,
1677 split_src_name,
1678 split_tgt_ranges,
1679 split_src_ranges,
1680 patch_content))
Tianjie Xu25366072017-09-08 17:19:02 -07001681
Tao Bao32fcdab2018-10-12 10:30:39 -07001682 logger.info("Finding transfers...")
Tao Bao08c85832016-09-19 22:26:30 -07001683
Tianjie Xu25366072017-09-08 17:19:02 -07001684 large_apks = []
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001685 split_large_apks = []
Tianjie Xu25366072017-09-08 17:19:02 -07001686 cache_size = common.OPTIONS.cache_size
1687 split_threshold = 0.125
1688 max_blocks_per_transfer = int(cache_size * split_threshold /
1689 self.tgt.blocksize)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001690 empty = RangeSet()
Tianjie Xu20a86cd2018-01-12 12:21:00 -08001691 for tgt_fn, tgt_ranges in sorted(self.tgt.file_map.items()):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001692 if tgt_fn == "__ZERO":
1693 # the special "__ZERO" domain is all the blocks not contained
1694 # in any file and that are filled with zeros. We have a
1695 # special transfer style for zero blocks.
1696 src_ranges = self.src.file_map.get("__ZERO", empty)
Tao Bao9a5caf22015-08-25 15:10:10 -07001697 AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
1698 "zero", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001699 continue
1700
Tao Baoff777812015-05-12 11:42:31 -07001701 elif tgt_fn == "__COPY":
1702 # "__COPY" domain includes all the blocks not contained in any
1703 # file and that need to be copied unconditionally to the target.
Tao Bao9a5caf22015-08-25 15:10:10 -07001704 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Tao Baoff777812015-05-12 11:42:31 -07001705 continue
1706
Tianjie Xu67c7cbb2018-08-30 00:32:07 -07001707 elif tgt_fn == "__HASHTREE":
1708 continue
1709
Doug Zongkerfc44a512014-08-26 13:10:25 -07001710 elif tgt_fn in self.src.file_map:
1711 # Look for an exact pathname match in the source.
Tao Bao9a5caf22015-08-25 15:10:10 -07001712 AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001713 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001714 continue
1715
1716 b = os.path.basename(tgt_fn)
1717 if b in self.src_basenames:
1718 # Look for an exact basename match in the source.
1719 src_fn = self.src_basenames[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001720 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001721 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001722 continue
1723
1724 b = re.sub("[0-9]+", "#", b)
1725 if b in self.src_numpatterns:
1726 # Look for a 'number pattern' match (a basename match after
1727 # all runs of digits are replaced by "#"). (This is useful
1728 # for .so files that contain version numbers in the filename
1729 # that get bumped.)
1730 src_fn = self.src_numpatterns[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001731 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001732 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001733 continue
1734
Tao Bao9a5caf22015-08-25 15:10:10 -07001735 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001736
Tianjie Xu25366072017-09-08 17:19:02 -07001737 transfer_lock = threading.Lock()
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001738 threads = [threading.Thread(target=SplitLargeApks)
Tianjie Xu25366072017-09-08 17:19:02 -07001739 for _ in range(self.threads)]
1740 for th in threads:
1741 th.start()
1742 while threads:
1743 threads.pop().join()
1744
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001745 # Sort the split transfers for large apks to generate a determinate package.
1746 split_large_apks.sort()
1747 for (tgt_name, src_name, tgt_ranges, src_ranges,
1748 patch) in split_large_apks:
1749 transfer_split = Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1750 self.tgt.RangeSha1(tgt_ranges),
1751 self.src.RangeSha1(src_ranges),
1752 "diff", self.transfers)
xunchang21531832018-12-06 14:20:05 -08001753 transfer_split.patch_info = PatchInfo(True, patch)
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001754
Doug Zongkerfc44a512014-08-26 13:10:25 -07001755 def AbbreviateSourceNames(self):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001756 for k in self.src.file_map.keys():
1757 b = os.path.basename(k)
1758 self.src_basenames[b] = k
1759 b = re.sub("[0-9]+", "#", b)
1760 self.src_numpatterns[b] = k
1761
1762 @staticmethod
1763 def AssertPartition(total, seq):
1764 """Assert that all the RangeSets in 'seq' form a partition of the
1765 'total' RangeSet (ie, they are nonintersecting and their union
1766 equals 'total')."""
Doug Zongker6ab2a502016-02-09 08:28:09 -08001767
Doug Zongkerfc44a512014-08-26 13:10:25 -07001768 so_far = RangeSet()
1769 for i in seq:
1770 assert not so_far.overlaps(i)
1771 so_far = so_far.union(i)
1772 assert so_far == total