blob: 80d402371174f6178685c0a0bb9961435d208964 [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015from __future__ import print_function
16
Doug Zongker6ab2a502016-02-09 08:28:09 -080017import array
Tianjie Xu25366072017-09-08 17:19:02 -070018import copy
Doug Zongker6ab2a502016-02-09 08:28:09 -080019import functools
Doug Zongker62338182014-09-08 08:29:55 -070020import heapq
Doug Zongkerfc44a512014-08-26 13:10:25 -070021import itertools
Tao Bao32fcdab2018-10-12 10:30:39 -070022import logging
Doug Zongkerfc44a512014-08-26 13:10:25 -070023import multiprocessing
24import os
Tao Bao3a2e3502016-12-28 09:14:39 -080025import os.path
Doug Zongkerfc44a512014-08-26 13:10:25 -070026import re
Tao Bao183e56e2017-03-05 17:05:09 -080027import sys
Doug Zongkerfc44a512014-08-26 13:10:25 -070028import threading
xunchang21531832018-12-06 14:20:05 -080029import zlib
30from collections import deque, namedtuple, OrderedDict
Tao Bao3a2e3502016-12-28 09:14:39 -080031from hashlib import sha1
Tao Bao508b0872018-02-09 13:44:43 -080032
33import common
Dan Albert8b72aef2015-03-23 19:13:21 -070034from rangelib import RangeSet
35
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070036__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
37
Tao Bao32fcdab2018-10-12 10:30:39 -070038logger = logging.getLogger(__name__)
39
xunchang21531832018-12-06 14:20:05 -080040# The tuple contains the style and bytes of a bsdiff|imgdiff patch.
41PatchInfo = namedtuple("PatchInfo", ["imgdiff", "content"])
42
Dan Albert8b72aef2015-03-23 19:13:21 -070043
Tao Bao183e56e2017-03-05 17:05:09 -080044def compute_patch(srcfile, tgtfile, imgdiff=False):
xunchang21531832018-12-06 14:20:05 -080045 """Calls bsdiff|imgdiff to compute the patch data, returns a PatchInfo."""
Tianjie Xub59c17f2016-10-28 17:55:53 -070046 patchfile = common.MakeTempFile(prefix='patch-')
Doug Zongkerfc44a512014-08-26 13:10:25 -070047
Tianjie Xub59c17f2016-10-28 17:55:53 -070048 cmd = ['imgdiff', '-z'] if imgdiff else ['bsdiff']
49 cmd.extend([srcfile, tgtfile, patchfile])
Doug Zongkerfc44a512014-08-26 13:10:25 -070050
Tao Bao39451582017-05-04 11:10:47 -070051 # Don't dump the bsdiff/imgdiff commands, which are not useful for the case
52 # here, since they contain temp filenames only.
Tao Bao73dd4f42018-10-04 16:25:33 -070053 proc = common.Run(cmd, verbose=False)
54 output, _ = proc.communicate()
Doug Zongkerfc44a512014-08-26 13:10:25 -070055
Tao Bao73dd4f42018-10-04 16:25:33 -070056 if proc.returncode != 0:
Tianjie Xub59c17f2016-10-28 17:55:53 -070057 raise ValueError(output)
58
59 with open(patchfile, 'rb') as f:
xunchang21531832018-12-06 14:20:05 -080060 return PatchInfo(imgdiff, f.read())
Doug Zongkerfc44a512014-08-26 13:10:25 -070061
Dan Albert8b72aef2015-03-23 19:13:21 -070062
63class Image(object):
Tao Bao183e56e2017-03-05 17:05:09 -080064 def RangeSha1(self, ranges):
65 raise NotImplementedError
66
Dan Albert8b72aef2015-03-23 19:13:21 -070067 def ReadRangeSet(self, ranges):
68 raise NotImplementedError
69
Tao Bao68658c02015-06-01 13:40:49 -070070 def TotalSha1(self, include_clobbered_blocks=False):
Dan Albert8b72aef2015-03-23 19:13:21 -070071 raise NotImplementedError
72
Tao Bao183e56e2017-03-05 17:05:09 -080073 def WriteRangeDataToFd(self, ranges, fd):
74 raise NotImplementedError
75
Dan Albert8b72aef2015-03-23 19:13:21 -070076
77class EmptyImage(Image):
Doug Zongkerfc44a512014-08-26 13:10:25 -070078 """A zero-length image."""
Tao Bao183e56e2017-03-05 17:05:09 -080079
80 def __init__(self):
81 self.blocksize = 4096
82 self.care_map = RangeSet()
83 self.clobbered_blocks = RangeSet()
84 self.extended = RangeSet()
85 self.total_blocks = 0
86 self.file_map = {}
87
88 def RangeSha1(self, ranges):
89 return sha1().hexdigest()
90
Doug Zongkerfc44a512014-08-26 13:10:25 -070091 def ReadRangeSet(self, ranges):
92 return ()
Tao Bao183e56e2017-03-05 17:05:09 -080093
Tao Bao68658c02015-06-01 13:40:49 -070094 def TotalSha1(self, include_clobbered_blocks=False):
95 # EmptyImage always carries empty clobbered_blocks, so
96 # include_clobbered_blocks can be ignored.
97 assert self.clobbered_blocks.size() == 0
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070098 return sha1().hexdigest()
99
Tao Bao183e56e2017-03-05 17:05:09 -0800100 def WriteRangeDataToFd(self, ranges, fd):
101 raise ValueError("Can't write data from EmptyImage to file")
102
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700103
Dan Albert8b72aef2015-03-23 19:13:21 -0700104class DataImage(Image):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700105 """An image wrapped around a single string of data."""
106
107 def __init__(self, data, trim=False, pad=False):
108 self.data = data
109 self.blocksize = 4096
110
111 assert not (trim and pad)
112
113 partial = len(self.data) % self.blocksize
Tao Bao7589e962015-09-05 20:35:32 -0700114 padded = False
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700115 if partial > 0:
116 if trim:
117 self.data = self.data[:-partial]
118 elif pad:
119 self.data += '\0' * (self.blocksize - partial)
Tao Bao7589e962015-09-05 20:35:32 -0700120 padded = True
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700121 else:
122 raise ValueError(("data for DataImage must be multiple of %d bytes "
123 "unless trim or pad is specified") %
124 (self.blocksize,))
125
126 assert len(self.data) % self.blocksize == 0
127
128 self.total_blocks = len(self.data) / self.blocksize
129 self.care_map = RangeSet(data=(0, self.total_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700130 # When the last block is padded, we always write the whole block even for
131 # incremental OTAs. Because otherwise the last block may get skipped if
132 # unchanged for an incremental, but would fail the post-install
133 # verification if it has non-zero contents in the padding bytes.
134 # Bug: 23828506
135 if padded:
Tao Bao42206c32015-09-08 13:39:40 -0700136 clobbered_blocks = [self.total_blocks-1, self.total_blocks]
Tao Bao7589e962015-09-05 20:35:32 -0700137 else:
Tao Bao42206c32015-09-08 13:39:40 -0700138 clobbered_blocks = []
139 self.clobbered_blocks = clobbered_blocks
Tao Baoe9b61912015-07-09 17:37:49 -0700140 self.extended = RangeSet()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700141
142 zero_blocks = []
143 nonzero_blocks = []
144 reference = '\0' * self.blocksize
145
Tao Bao7589e962015-09-05 20:35:32 -0700146 for i in range(self.total_blocks-1 if padded else self.total_blocks):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700147 d = self.data[i*self.blocksize : (i+1)*self.blocksize]
148 if d == reference:
149 zero_blocks.append(i)
150 zero_blocks.append(i+1)
151 else:
152 nonzero_blocks.append(i)
153 nonzero_blocks.append(i+1)
154
Tao Bao42206c32015-09-08 13:39:40 -0700155 assert zero_blocks or nonzero_blocks or clobbered_blocks
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700156
Tao Bao42206c32015-09-08 13:39:40 -0700157 self.file_map = dict()
158 if zero_blocks:
159 self.file_map["__ZERO"] = RangeSet(data=zero_blocks)
160 if nonzero_blocks:
161 self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)
162 if clobbered_blocks:
163 self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)
Tao Bao7589e962015-09-05 20:35:32 -0700164
Tao Bao183e56e2017-03-05 17:05:09 -0800165 def _GetRangeData(self, ranges):
166 for s, e in ranges:
167 yield self.data[s*self.blocksize:e*self.blocksize]
168
169 def RangeSha1(self, ranges):
170 h = sha1()
Tao Bao76def242017-11-21 09:25:31 -0800171 for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable
Tao Bao183e56e2017-03-05 17:05:09 -0800172 h.update(data)
173 return h.hexdigest()
174
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700175 def ReadRangeSet(self, ranges):
Tao Bao183e56e2017-03-05 17:05:09 -0800176 return [self._GetRangeData(ranges)]
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700177
Tao Bao68658c02015-06-01 13:40:49 -0700178 def TotalSha1(self, include_clobbered_blocks=False):
Tao Bao7589e962015-09-05 20:35:32 -0700179 if not include_clobbered_blocks:
Tao Bao183e56e2017-03-05 17:05:09 -0800180 return self.RangeSha1(self.care_map.subtract(self.clobbered_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700181 else:
182 return sha1(self.data).hexdigest()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700183
Tao Bao183e56e2017-03-05 17:05:09 -0800184 def WriteRangeDataToFd(self, ranges, fd):
Tao Bao76def242017-11-21 09:25:31 -0800185 for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable
Tao Bao183e56e2017-03-05 17:05:09 -0800186 fd.write(data)
187
Doug Zongkerfc44a512014-08-26 13:10:25 -0700188
189class Transfer(object):
Tao Bao183e56e2017-03-05 17:05:09 -0800190 def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, tgt_sha1,
191 src_sha1, style, by_id):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700192 self.tgt_name = tgt_name
193 self.src_name = src_name
194 self.tgt_ranges = tgt_ranges
195 self.src_ranges = src_ranges
Tao Bao183e56e2017-03-05 17:05:09 -0800196 self.tgt_sha1 = tgt_sha1
197 self.src_sha1 = src_sha1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700198 self.style = style
Tao Baob8c87172015-03-19 19:42:12 -0700199
200 # We use OrderedDict rather than dict so that the output is repeatable;
201 # otherwise it would depend on the hash values of the Transfer objects.
202 self.goes_before = OrderedDict()
203 self.goes_after = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700204
Doug Zongker62338182014-09-08 08:29:55 -0700205 self.stash_before = []
206 self.use_stash = []
207
Doug Zongkerfc44a512014-08-26 13:10:25 -0700208 self.id = len(by_id)
209 by_id.append(self)
210
xunchang21531832018-12-06 14:20:05 -0800211 self._patch_info = None
Tianjie Xu25366072017-09-08 17:19:02 -0700212
213 @property
xunchang21531832018-12-06 14:20:05 -0800214 def patch_info(self):
215 return self._patch_info
Tianjie Xu25366072017-09-08 17:19:02 -0700216
xunchang21531832018-12-06 14:20:05 -0800217 @patch_info.setter
218 def patch_info(self, info):
219 if info:
Tianjie Xu25366072017-09-08 17:19:02 -0700220 assert self.style == "diff"
xunchang21531832018-12-06 14:20:05 -0800221 self._patch_info = info
Tianjie Xu25366072017-09-08 17:19:02 -0700222
Doug Zongker62338182014-09-08 08:29:55 -0700223 def NetStashChange(self):
224 return (sum(sr.size() for (_, sr) in self.stash_before) -
225 sum(sr.size() for (_, sr) in self.use_stash))
226
Tao Bao82c47982015-08-17 09:45:13 -0700227 def ConvertToNew(self):
228 assert self.style != "new"
229 self.use_stash = []
230 self.style = "new"
231 self.src_ranges = RangeSet()
xunchang21531832018-12-06 14:20:05 -0800232 self.patch_info = None
Tao Bao82c47982015-08-17 09:45:13 -0700233
Doug Zongkerfc44a512014-08-26 13:10:25 -0700234 def __str__(self):
235 return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
236 " to " + str(self.tgt_ranges) + ">")
237
238
Doug Zongker6ab2a502016-02-09 08:28:09 -0800239@functools.total_ordering
240class HeapItem(object):
241 def __init__(self, item):
242 self.item = item
Tao Bao186ec992017-12-23 11:50:52 -0800243 # Negate the score since python's heap is a min-heap and we want the
244 # maximum score.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800245 self.score = -item.score
Tao Bao186ec992017-12-23 11:50:52 -0800246
Doug Zongker6ab2a502016-02-09 08:28:09 -0800247 def clear(self):
248 self.item = None
Tao Bao186ec992017-12-23 11:50:52 -0800249
Doug Zongker6ab2a502016-02-09 08:28:09 -0800250 def __bool__(self):
Tao Bao186ec992017-12-23 11:50:52 -0800251 return self.item is not None
252
253 # Python 2 uses __nonzero__, while Python 3 uses __bool__.
254 __nonzero__ = __bool__
255
256 # The rest operations are generated by functools.total_ordering decorator.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800257 def __eq__(self, other):
258 return self.score == other.score
Tao Bao186ec992017-12-23 11:50:52 -0800259
Doug Zongker6ab2a502016-02-09 08:28:09 -0800260 def __le__(self, other):
261 return self.score <= other.score
262
263
Tao Bao294651d2018-02-08 23:21:52 -0800264class ImgdiffStats(object):
265 """A class that collects imgdiff stats.
266
267 It keeps track of the files that will be applied imgdiff while generating
268 BlockImageDiff. It also logs the ones that cannot use imgdiff, with specific
269 reasons. The stats is only meaningful when imgdiff not being disabled by the
270 caller of BlockImageDiff. In addition, only files with supported types
271 (BlockImageDiff.FileTypeSupportedByImgdiff()) are allowed to be logged.
Tao Bao294651d2018-02-08 23:21:52 -0800272 """
273
274 USED_IMGDIFF = "APK files diff'd with imgdiff"
275 USED_IMGDIFF_LARGE_APK = "Large APK files split and diff'd with imgdiff"
276
277 # Reasons for not applying imgdiff on APKs.
Tao Bao294651d2018-02-08 23:21:52 -0800278 SKIPPED_NONMONOTONIC = "Not used imgdiff due to having non-monotonic ranges"
Tao Baoe709b092018-02-07 12:40:00 -0800279 SKIPPED_SHARED_BLOCKS = "Not used imgdiff due to using shared blocks"
Tao Bao4ccea852018-02-06 15:16:41 -0800280 SKIPPED_INCOMPLETE = "Not used imgdiff due to incomplete RangeSet"
Tao Bao294651d2018-02-08 23:21:52 -0800281
282 # The list of valid reasons, which will also be the dumped order in a report.
283 REASONS = (
284 USED_IMGDIFF,
285 USED_IMGDIFF_LARGE_APK,
Tao Bao294651d2018-02-08 23:21:52 -0800286 SKIPPED_NONMONOTONIC,
Tao Baoe709b092018-02-07 12:40:00 -0800287 SKIPPED_SHARED_BLOCKS,
Tao Bao4ccea852018-02-06 15:16:41 -0800288 SKIPPED_INCOMPLETE,
Tao Bao294651d2018-02-08 23:21:52 -0800289 )
290
291 def __init__(self):
292 self.stats = {}
293
294 def Log(self, filename, reason):
295 """Logs why imgdiff can or cannot be applied to the given filename.
296
297 Args:
298 filename: The filename string.
299 reason: One of the reason constants listed in REASONS.
300
301 Raises:
302 AssertionError: On unsupported filetypes or invalid reason.
303 """
304 assert BlockImageDiff.FileTypeSupportedByImgdiff(filename)
305 assert reason in self.REASONS
306
307 if reason not in self.stats:
308 self.stats[reason] = set()
309 self.stats[reason].add(filename)
310
311 def Report(self):
312 """Prints a report of the collected imgdiff stats."""
313
314 def print_header(header, separator):
Tao Bao32fcdab2018-10-12 10:30:39 -0700315 logger.info(header)
316 logger.info(separator * len(header) + '\n')
Tao Bao294651d2018-02-08 23:21:52 -0800317
318 print_header(' Imgdiff Stats Report ', '=')
319 for key in self.REASONS:
320 if key not in self.stats:
321 continue
322 values = self.stats[key]
323 section_header = ' {} (count: {}) '.format(key, len(values))
324 print_header(section_header, '-')
Tao Bao32fcdab2018-10-12 10:30:39 -0700325 logger.info(''.join([' {}\n'.format(name) for name in values]))
Tao Bao294651d2018-02-08 23:21:52 -0800326
327
Doug Zongkerfc44a512014-08-26 13:10:25 -0700328class BlockImageDiff(object):
Tao Bao76def242017-11-21 09:25:31 -0800329 """Generates the diff of two block image objects.
330
331 BlockImageDiff works on two image objects. An image object is anything that
332 provides the following attributes:
333
334 blocksize: the size in bytes of a block, currently must be 4096.
335
336 total_blocks: the total size of the partition/image, in blocks.
337
338 care_map: a RangeSet containing which blocks (in the range [0,
339 total_blocks) we actually care about; i.e. which blocks contain data.
340
341 file_map: a dict that partitions the blocks contained in care_map into
342 smaller domains that are useful for doing diffs on. (Typically a domain
343 is a file, and the key in file_map is the pathname.)
344
345 clobbered_blocks: a RangeSet containing which blocks contain data but may
346 be altered by the FS. They need to be excluded when verifying the
347 partition integrity.
348
349 ReadRangeSet(): a function that takes a RangeSet and returns the data
350 contained in the image blocks of that RangeSet. The data is returned as
351 a list or tuple of strings; concatenating the elements together should
352 produce the requested data. Implementations are free to break up the
353 data into list/tuple elements in any way that is convenient.
354
355 RangeSha1(): a function that returns (as a hex string) the SHA-1 hash of
356 all the data in the specified range.
357
358 TotalSha1(): a function that returns (as a hex string) the SHA-1 hash of
359 all the data in the image (ie, all the blocks in the care_map minus
360 clobbered_blocks, or including the clobbered blocks if
361 include_clobbered_blocks is True).
362
363 When creating a BlockImageDiff, the src image may be None, in which case the
364 list of transfers produced will never read from the original image.
365 """
366
Tao Bao293fd132016-06-11 12:19:23 -0700367 def __init__(self, tgt, src=None, threads=None, version=4,
368 disable_imgdiff=False):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700369 if threads is None:
370 threads = multiprocessing.cpu_count() // 2
Dan Albert8b72aef2015-03-23 19:13:21 -0700371 if threads == 0:
372 threads = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700373 self.threads = threads
Doug Zongker62338182014-09-08 08:29:55 -0700374 self.version = version
Dan Albert8b72aef2015-03-23 19:13:21 -0700375 self.transfers = []
376 self.src_basenames = {}
377 self.src_numpatterns = {}
Tao Baob4cfca52016-02-04 14:26:02 -0800378 self._max_stashed_size = 0
Tao Baod522bdc2016-04-12 15:53:16 -0700379 self.touched_src_ranges = RangeSet()
380 self.touched_src_sha1 = None
Tao Bao293fd132016-06-11 12:19:23 -0700381 self.disable_imgdiff = disable_imgdiff
Tao Bao294651d2018-02-08 23:21:52 -0800382 self.imgdiff_stats = ImgdiffStats() if not disable_imgdiff else None
Doug Zongker62338182014-09-08 08:29:55 -0700383
Tao Bao8fad03e2017-03-01 14:36:26 -0800384 assert version in (3, 4)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700385
386 self.tgt = tgt
387 if src is None:
388 src = EmptyImage()
389 self.src = src
390
391 # The updater code that installs the patch always uses 4k blocks.
392 assert tgt.blocksize == 4096
393 assert src.blocksize == 4096
394
395 # The range sets in each filemap should comprise a partition of
396 # the care map.
397 self.AssertPartition(src.care_map, src.file_map.values())
398 self.AssertPartition(tgt.care_map, tgt.file_map.values())
399
Tao Baob4cfca52016-02-04 14:26:02 -0800400 @property
401 def max_stashed_size(self):
402 return self._max_stashed_size
403
Tao Baocb73aed2018-01-31 17:32:40 -0800404 @staticmethod
405 def FileTypeSupportedByImgdiff(filename):
406 """Returns whether the file type is supported by imgdiff."""
407 return filename.lower().endswith(('.apk', '.jar', '.zip'))
408
Tao Bao294651d2018-02-08 23:21:52 -0800409 def CanUseImgdiff(self, name, tgt_ranges, src_ranges, large_apk=False):
Tao Baocb73aed2018-01-31 17:32:40 -0800410 """Checks whether we can apply imgdiff for the given RangeSets.
411
412 For files in ZIP format (e.g., APKs, JARs, etc.) we would like to use
413 'imgdiff -z' if possible. Because it usually produces significantly smaller
414 patches than bsdiff.
415
416 This is permissible if all of the following conditions hold.
417 - The imgdiff hasn't been disabled by the caller (e.g. squashfs);
418 - The file type is supported by imgdiff;
419 - The source and target blocks are monotonic (i.e. the data is stored with
420 blocks in increasing order);
Tao Baoe709b092018-02-07 12:40:00 -0800421 - Both files don't contain shared blocks;
Tao Bao4ccea852018-02-06 15:16:41 -0800422 - Both files have complete lists of blocks;
Tao Baocb73aed2018-01-31 17:32:40 -0800423 - We haven't removed any blocks from the source set.
424
425 If all these conditions are satisfied, concatenating all the blocks in the
426 RangeSet in order will produce a valid ZIP file (plus possibly extra zeros
427 in the last block). imgdiff is fine with extra zeros at the end of the file.
428
429 Args:
430 name: The filename to be diff'd.
431 tgt_ranges: The target RangeSet.
432 src_ranges: The source RangeSet.
Tao Bao294651d2018-02-08 23:21:52 -0800433 large_apk: Whether this is to split a large APK.
Tao Baocb73aed2018-01-31 17:32:40 -0800434
435 Returns:
436 A boolean result.
437 """
Tao Bao508b0872018-02-09 13:44:43 -0800438 if self.disable_imgdiff or not self.FileTypeSupportedByImgdiff(name):
Tao Bao294651d2018-02-08 23:21:52 -0800439 return False
440
441 if not tgt_ranges.monotonic or not src_ranges.monotonic:
442 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_NONMONOTONIC)
443 return False
444
Tao Baoe709b092018-02-07 12:40:00 -0800445 if (tgt_ranges.extra.get('uses_shared_blocks') or
446 src_ranges.extra.get('uses_shared_blocks')):
447 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_SHARED_BLOCKS)
448 return False
449
Tao Bao4ccea852018-02-06 15:16:41 -0800450 if tgt_ranges.extra.get('incomplete') or src_ranges.extra.get('incomplete'):
451 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_INCOMPLETE)
452 return False
453
Tao Bao294651d2018-02-08 23:21:52 -0800454 reason = (ImgdiffStats.USED_IMGDIFF_LARGE_APK if large_apk
455 else ImgdiffStats.USED_IMGDIFF)
456 self.imgdiff_stats.Log(name, reason)
457 return True
Tao Baocb73aed2018-01-31 17:32:40 -0800458
Doug Zongkerfc44a512014-08-26 13:10:25 -0700459 def Compute(self, prefix):
460 # When looking for a source file to use as the diff input for a
461 # target file, we try:
462 # 1) an exact path match if available, otherwise
463 # 2) a exact basename match if available, otherwise
464 # 3) a basename match after all runs of digits are replaced by
465 # "#" if available, otherwise
466 # 4) we have no source for this target.
467 self.AbbreviateSourceNames()
468 self.FindTransfers()
469
xunchang21531832018-12-06 14:20:05 -0800470 self.FindSequenceForTransfers()
Doug Zongker62338182014-09-08 08:29:55 -0700471
Tao Bao82c47982015-08-17 09:45:13 -0700472 # Ensure the runtime stash size is under the limit.
Tao Bao8fad03e2017-03-01 14:36:26 -0800473 if common.OPTIONS.cache_size is not None:
xunchang3df4d5e2018-12-06 15:03:45 -0800474 stash_limit = (common.OPTIONS.cache_size *
475 common.OPTIONS.stash_threshold / self.tgt.blocksize)
476 # Ignore the stash limit and calculate the maximum simultaneously stashed
477 # blocks needed.
478 _, max_stashed_blocks = self.ReviseStashSize(ignore_stash_limit=True)
479
480 # We cannot stash more blocks than the stash limit simultaneously. As a
481 # result, some 'diff' commands will be converted to new; leading to an
482 # unintended large package. To mitigate this issue, we can carefully
483 # choose the transfers for conversion. The number '1024' can be further
484 # tweaked here to balance the package size and build time.
485 if max_stashed_blocks > stash_limit + 1024:
486 self.SelectAndConvertDiffTransfersToNew()
487 # Regenerate the sequence as the graph has changed.
488 self.FindSequenceForTransfers()
489
490 # Revise the stash size again to keep the size under limit.
Tao Bao82c47982015-08-17 09:45:13 -0700491 self.ReviseStashSize()
492
Doug Zongkerfc44a512014-08-26 13:10:25 -0700493 # Double-check our work.
494 self.AssertSequenceGood()
Tianjie Xu8a7ed9f2018-01-23 14:06:11 -0800495 self.AssertSha1Good()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700496
497 self.ComputePatches(prefix)
498 self.WriteTransfers(prefix)
499
Tao Bao294651d2018-02-08 23:21:52 -0800500 # Report the imgdiff stats.
Tao Bao32fcdab2018-10-12 10:30:39 -0700501 if not self.disable_imgdiff:
Tao Bao294651d2018-02-08 23:21:52 -0800502 self.imgdiff_stats.Report()
503
Doug Zongkerfc44a512014-08-26 13:10:25 -0700504 def WriteTransfers(self, prefix):
Tianjie Xu37e29302016-06-23 16:10:35 -0700505 def WriteSplitTransfers(out, style, target_blocks):
506 """Limit the size of operand in command 'new' and 'zero' to 1024 blocks.
Tianjie Xubb848c52016-06-21 15:54:09 -0700507
508 This prevents the target size of one command from being too large; and
509 might help to avoid fsync errors on some devices."""
510
Tao Bao3a2e3502016-12-28 09:14:39 -0800511 assert style == "new" or style == "zero"
Tianjie Xu37e29302016-06-23 16:10:35 -0700512 blocks_limit = 1024
Tianjie Xubb848c52016-06-21 15:54:09 -0700513 total = 0
Tianjie Xu37e29302016-06-23 16:10:35 -0700514 while target_blocks:
515 blocks_to_write = target_blocks.first(blocks_limit)
516 out.append("%s %s\n" % (style, blocks_to_write.to_string_raw()))
517 total += blocks_to_write.size()
518 target_blocks = target_blocks.subtract(blocks_to_write)
Tianjie Xubb848c52016-06-21 15:54:09 -0700519 return total
520
Doug Zongkerfc44a512014-08-26 13:10:25 -0700521 out = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700522 total = 0
Doug Zongkerfc44a512014-08-26 13:10:25 -0700523
Tao Bao3a2e3502016-12-28 09:14:39 -0800524 # In BBOTA v3+, it uses the hash of the stashed blocks as the stash slot
525 # id. 'stashes' records the map from 'hash' to the ref count. The stash
526 # will be freed only if the count decrements to zero.
Doug Zongker62338182014-09-08 08:29:55 -0700527 stashes = {}
528 stashed_blocks = 0
529 max_stashed_blocks = 0
530
Doug Zongkerfc44a512014-08-26 13:10:25 -0700531 for xf in self.transfers:
532
Tao Bao8fad03e2017-03-01 14:36:26 -0800533 for _, sr in xf.stash_before:
534 sh = self.src.RangeSha1(sr)
535 if sh in stashes:
536 stashes[sh] += 1
Sami Tolvanendd67a292014-12-09 16:40:34 +0000537 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800538 stashes[sh] = 1
539 stashed_blocks += sr.size()
540 self.touched_src_ranges = self.touched_src_ranges.union(sr)
541 out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
Doug Zongker62338182014-09-08 08:29:55 -0700542
543 if stashed_blocks > max_stashed_blocks:
544 max_stashed_blocks = stashed_blocks
545
Jesse Zhao7b985f62015-03-02 16:53:08 -0800546 free_string = []
caozhiyuan21b37d82015-10-21 15:14:03 +0800547 free_size = 0
Jesse Zhao7b985f62015-03-02 16:53:08 -0800548
Tao Bao8fad03e2017-03-01 14:36:26 -0800549 # <# blocks> <src ranges>
550 # OR
551 # <# blocks> <src ranges> <src locs> <stash refs...>
552 # OR
553 # <# blocks> - <stash refs...>
Doug Zongker62338182014-09-08 08:29:55 -0700554
Tao Bao8fad03e2017-03-01 14:36:26 -0800555 size = xf.src_ranges.size()
Tao Bao508b0872018-02-09 13:44:43 -0800556 src_str_buffer = [str(size)]
Doug Zongker62338182014-09-08 08:29:55 -0700557
Tao Bao8fad03e2017-03-01 14:36:26 -0800558 unstashed_src_ranges = xf.src_ranges
559 mapped_stashes = []
560 for _, sr in xf.use_stash:
561 unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
562 sh = self.src.RangeSha1(sr)
563 sr = xf.src_ranges.map_within(sr)
564 mapped_stashes.append(sr)
565 assert sh in stashes
Tao Bao508b0872018-02-09 13:44:43 -0800566 src_str_buffer.append("%s:%s" % (sh, sr.to_string_raw()))
Tao Bao8fad03e2017-03-01 14:36:26 -0800567 stashes[sh] -= 1
568 if stashes[sh] == 0:
569 free_string.append("free %s\n" % (sh,))
570 free_size += sr.size()
571 stashes.pop(sh)
Doug Zongker62338182014-09-08 08:29:55 -0700572
Tao Bao8fad03e2017-03-01 14:36:26 -0800573 if unstashed_src_ranges:
Tao Bao508b0872018-02-09 13:44:43 -0800574 src_str_buffer.insert(1, unstashed_src_ranges.to_string_raw())
Tao Bao8fad03e2017-03-01 14:36:26 -0800575 if xf.use_stash:
576 mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
Tao Bao508b0872018-02-09 13:44:43 -0800577 src_str_buffer.insert(2, mapped_unstashed.to_string_raw())
Tao Bao8fad03e2017-03-01 14:36:26 -0800578 mapped_stashes.append(mapped_unstashed)
Doug Zongker62338182014-09-08 08:29:55 -0700579 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Tao Bao8fad03e2017-03-01 14:36:26 -0800580 else:
Tao Bao508b0872018-02-09 13:44:43 -0800581 src_str_buffer.insert(1, "-")
Tao Bao8fad03e2017-03-01 14:36:26 -0800582 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Doug Zongker62338182014-09-08 08:29:55 -0700583
Tao Bao508b0872018-02-09 13:44:43 -0800584 src_str = " ".join(src_str_buffer)
Doug Zongker62338182014-09-08 08:29:55 -0700585
Tao Bao8fad03e2017-03-01 14:36:26 -0800586 # version 3+:
Doug Zongker62338182014-09-08 08:29:55 -0700587 # zero <rangeset>
588 # new <rangeset>
589 # erase <rangeset>
Dan Albert8b72aef2015-03-23 19:13:21 -0700590 # bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
591 # imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
592 # move hash <tgt rangeset> <src_str>
Doug Zongkerfc44a512014-08-26 13:10:25 -0700593
594 tgt_size = xf.tgt_ranges.size()
595
596 if xf.style == "new":
597 assert xf.tgt_ranges
Tianjie Xu37e29302016-06-23 16:10:35 -0700598 assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700599 total += tgt_size
600 elif xf.style == "move":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700601 assert xf.tgt_ranges
602 assert xf.src_ranges.size() == tgt_size
603 if xf.src_ranges != xf.tgt_ranges:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100604 # take into account automatic stashing of overlapping blocks
605 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700606 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100607 if temp_stash_usage > max_stashed_blocks:
608 max_stashed_blocks = temp_stash_usage
609
Tao Baod522bdc2016-04-12 15:53:16 -0700610 self.touched_src_ranges = self.touched_src_ranges.union(
611 xf.src_ranges)
612
Tao Bao8fad03e2017-03-01 14:36:26 -0800613 out.append("%s %s %s %s\n" % (
Sami Tolvanendd67a292014-12-09 16:40:34 +0000614 xf.style,
Tao Bao183e56e2017-03-05 17:05:09 -0800615 xf.tgt_sha1,
Dan Albert8b72aef2015-03-23 19:13:21 -0700616 xf.tgt_ranges.to_string_raw(), src_str))
Tao Bao8fad03e2017-03-01 14:36:26 -0800617 total += tgt_size
618 elif xf.style in ("bsdiff", "imgdiff"):
619 assert xf.tgt_ranges
620 assert xf.src_ranges
621 # take into account automatic stashing of overlapping blocks
622 if xf.src_ranges.overlaps(xf.tgt_ranges):
623 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
624 if temp_stash_usage > max_stashed_blocks:
625 max_stashed_blocks = temp_stash_usage
626
627 self.touched_src_ranges = self.touched_src_ranges.union(xf.src_ranges)
628
629 out.append("%s %d %d %s %s %s %s\n" % (
630 xf.style,
631 xf.patch_start, xf.patch_len,
632 xf.src_sha1,
633 xf.tgt_sha1,
634 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700635 total += tgt_size
636 elif xf.style == "zero":
637 assert xf.tgt_ranges
638 to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
Tianjie Xu37e29302016-06-23 16:10:35 -0700639 assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size()
Tianjie Xubb848c52016-06-21 15:54:09 -0700640 total += to_zero.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700641 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700642 raise ValueError("unknown transfer style '%s'\n" % xf.style)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700643
Sami Tolvanendd67a292014-12-09 16:40:34 +0000644 if free_string:
645 out.append("".join(free_string))
caozhiyuan21b37d82015-10-21 15:14:03 +0800646 stashed_blocks -= free_size
Sami Tolvanendd67a292014-12-09 16:40:34 +0000647
Tao Bao8fad03e2017-03-01 14:36:26 -0800648 if common.OPTIONS.cache_size is not None:
Tao Bao8dcf7382015-05-21 14:09:49 -0700649 # Sanity check: abort if we're going to need more stash space than
650 # the allowed size (cache_size * threshold). There are two purposes
651 # of having a threshold here. a) Part of the cache may have been
652 # occupied by some recovery logs. b) It will buy us some time to deal
653 # with the oversize issue.
654 cache_size = common.OPTIONS.cache_size
655 stash_threshold = common.OPTIONS.stash_threshold
656 max_allowed = cache_size * stash_threshold
Tao Baoe8c68a02017-02-26 10:48:11 -0800657 assert max_stashed_blocks * self.tgt.blocksize <= max_allowed, \
Tao Bao8dcf7382015-05-21 14:09:49 -0700658 'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
659 max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
660 self.tgt.blocksize, max_allowed, cache_size,
661 stash_threshold)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700662
Tao Bao8fad03e2017-03-01 14:36:26 -0800663 self.touched_src_sha1 = self.src.RangeSha1(self.touched_src_ranges)
Tao Baod522bdc2016-04-12 15:53:16 -0700664
Tianjie Xu67c7cbb2018-08-30 00:32:07 -0700665 if self.tgt.hashtree_info:
666 out.append("compute_hash_tree {} {} {} {} {}\n".format(
667 self.tgt.hashtree_info.hashtree_range.to_string_raw(),
668 self.tgt.hashtree_info.filesystem_range.to_string_raw(),
669 self.tgt.hashtree_info.hash_algorithm,
670 self.tgt.hashtree_info.salt,
671 self.tgt.hashtree_info.root_hash))
672
Tao Baoe9b61912015-07-09 17:37:49 -0700673 # Zero out extended blocks as a workaround for bug 20881595.
674 if self.tgt.extended:
Tianjie Xu37e29302016-06-23 16:10:35 -0700675 assert (WriteSplitTransfers(out, "zero", self.tgt.extended) ==
Tianjie Xubb848c52016-06-21 15:54:09 -0700676 self.tgt.extended.size())
Tao Baob32d56e2015-09-09 11:55:01 -0700677 total += self.tgt.extended.size()
Tao Baoe9b61912015-07-09 17:37:49 -0700678
679 # We erase all the blocks on the partition that a) don't contain useful
Tao Bao66f1fa62016-05-03 10:02:01 -0700680 # data in the new image; b) will not be touched by dm-verity. Out of those
681 # blocks, we erase the ones that won't be used in this update at the
682 # beginning of an update. The rest would be erased at the end. This is to
683 # work around the eMMC issue observed on some devices, which may otherwise
684 # get starving for clean blocks and thus fail the update. (b/28347095)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700685 all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
Tao Baoe9b61912015-07-09 17:37:49 -0700686 all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
687 new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
Tao Bao66f1fa62016-05-03 10:02:01 -0700688
689 erase_first = new_dontcare.subtract(self.touched_src_ranges)
690 if erase_first:
691 out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))
692
693 erase_last = new_dontcare.subtract(erase_first)
694 if erase_last:
695 out.append("erase %s\n" % (erase_last.to_string_raw(),))
Doug Zongkere985f6f2014-09-09 12:38:47 -0700696
697 out.insert(0, "%d\n" % (self.version,)) # format version number
Tao Baob32d56e2015-09-09 11:55:01 -0700698 out.insert(1, "%d\n" % (total,))
Tao Bao8fad03e2017-03-01 14:36:26 -0800699 # v3+: the number of stash slots is unused.
700 out.insert(2, "0\n")
701 out.insert(3, str(max_stashed_blocks) + "\n")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700702
703 with open(prefix + ".transfer.list", "wb") as f:
704 for i in out:
705 f.write(i)
706
Tao Bao8fad03e2017-03-01 14:36:26 -0800707 self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize
708 OPTIONS = common.OPTIONS
709 if OPTIONS.cache_size is not None:
710 max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
Tao Bao32fcdab2018-10-12 10:30:39 -0700711 logger.info(
712 "max stashed blocks: %d (%d bytes), limit: %d bytes (%.2f%%)\n",
713 max_stashed_blocks, self._max_stashed_size, max_allowed,
714 self._max_stashed_size * 100.0 / max_allowed)
Tao Bao8fad03e2017-03-01 14:36:26 -0800715 else:
Tao Bao32fcdab2018-10-12 10:30:39 -0700716 logger.info(
717 "max stashed blocks: %d (%d bytes), limit: <unknown>\n",
718 max_stashed_blocks, self._max_stashed_size)
Doug Zongker62338182014-09-08 08:29:55 -0700719
xunchang3df4d5e2018-12-06 15:03:45 -0800720 def ReviseStashSize(self, ignore_stash_limit=False):
721 """ Revises the transfers to keep the stash size within the size limit.
722
723 Iterates through the transfer list and calculates the stash size each
724 transfer generates. Converts the affected transfers to new if we reach the
725 stash limit.
726
727 Args:
728 ignore_stash_limit: Ignores the stash limit and calculates the max
729 simultaneous stashed blocks instead. No change will be made to the
730 transfer list with this flag.
731
732 Return:
733 A tuple of (tgt blocks converted to new, max stashed blocks)
734 """
Tao Bao32fcdab2018-10-12 10:30:39 -0700735 logger.info("Revising stash size...")
Tao Baoe27acfd2016-12-16 11:13:55 -0800736 stash_map = {}
Tao Bao82c47982015-08-17 09:45:13 -0700737
738 # Create the map between a stash and its def/use points. For example, for a
Tao Bao3c5a16d2017-02-13 11:42:50 -0800739 # given stash of (raw_id, sr), stash_map[raw_id] = (sr, def_cmd, use_cmd).
Tao Bao82c47982015-08-17 09:45:13 -0700740 for xf in self.transfers:
741 # Command xf defines (stores) all the stashes in stash_before.
Tao Bao3a2e3502016-12-28 09:14:39 -0800742 for stash_raw_id, sr in xf.stash_before:
743 stash_map[stash_raw_id] = (sr, xf)
Tao Bao82c47982015-08-17 09:45:13 -0700744
745 # Record all the stashes command xf uses.
Tao Bao3a2e3502016-12-28 09:14:39 -0800746 for stash_raw_id, _ in xf.use_stash:
747 stash_map[stash_raw_id] += (xf,)
Tao Bao82c47982015-08-17 09:45:13 -0700748
xunchang3df4d5e2018-12-06 15:03:45 -0800749 max_allowed_blocks = None
750 if not ignore_stash_limit:
751 # Compute the maximum blocks available for stash based on /cache size and
752 # the threshold.
753 cache_size = common.OPTIONS.cache_size
754 stash_threshold = common.OPTIONS.stash_threshold
755 max_allowed_blocks = cache_size * stash_threshold / self.tgt.blocksize
Tao Bao82c47982015-08-17 09:45:13 -0700756
Tao Bao3a2e3502016-12-28 09:14:39 -0800757 # See the comments for 'stashes' in WriteTransfers().
Tao Baoe27acfd2016-12-16 11:13:55 -0800758 stashes = {}
Tao Bao82c47982015-08-17 09:45:13 -0700759 stashed_blocks = 0
Tao Bao9a5caf22015-08-25 15:10:10 -0700760 new_blocks = 0
xunchang3df4d5e2018-12-06 15:03:45 -0800761 max_stashed_blocks = 0
Tao Bao82c47982015-08-17 09:45:13 -0700762
763 # Now go through all the commands. Compute the required stash size on the
764 # fly. If a command requires excess stash than available, it deletes the
765 # stash by replacing the command that uses the stash with a "new" command
766 # instead.
767 for xf in self.transfers:
768 replaced_cmds = []
769
770 # xf.stash_before generates explicit stash commands.
Tao Bao3a2e3502016-12-28 09:14:39 -0800771 for stash_raw_id, sr in xf.stash_before:
Tao Baoe27acfd2016-12-16 11:13:55 -0800772 # Check the post-command stashed_blocks.
773 stashed_blocks_after = stashed_blocks
Tao Bao8fad03e2017-03-01 14:36:26 -0800774 sh = self.src.RangeSha1(sr)
775 if sh not in stashes:
Tao Baoe27acfd2016-12-16 11:13:55 -0800776 stashed_blocks_after += sr.size()
Tao Baoe27acfd2016-12-16 11:13:55 -0800777
xunchang3df4d5e2018-12-06 15:03:45 -0800778 if max_allowed_blocks and stashed_blocks_after > max_allowed_blocks:
Tao Bao82c47982015-08-17 09:45:13 -0700779 # We cannot stash this one for a later command. Find out the command
780 # that will use this stash and replace the command with "new".
Tao Bao3a2e3502016-12-28 09:14:39 -0800781 use_cmd = stash_map[stash_raw_id][2]
Tao Bao82c47982015-08-17 09:45:13 -0700782 replaced_cmds.append(use_cmd)
Tao Bao32fcdab2018-10-12 10:30:39 -0700783 logger.info("%10d %9s %s", sr.size(), "explicit", use_cmd)
Tao Bao82c47982015-08-17 09:45:13 -0700784 else:
Tao Bao3c5a16d2017-02-13 11:42:50 -0800785 # Update the stashes map.
Tao Bao8fad03e2017-03-01 14:36:26 -0800786 if sh in stashes:
787 stashes[sh] += 1
Tao Bao3c5a16d2017-02-13 11:42:50 -0800788 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800789 stashes[sh] = 1
Tao Baoe27acfd2016-12-16 11:13:55 -0800790 stashed_blocks = stashed_blocks_after
xunchang3df4d5e2018-12-06 15:03:45 -0800791 max_stashed_blocks = max(max_stashed_blocks, stashed_blocks)
Tao Bao82c47982015-08-17 09:45:13 -0700792
793 # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
794 # ComputePatches(), they both have the style of "diff".
Tao Bao8fad03e2017-03-01 14:36:26 -0800795 if xf.style == "diff":
Tao Bao82c47982015-08-17 09:45:13 -0700796 assert xf.tgt_ranges and xf.src_ranges
797 if xf.src_ranges.overlaps(xf.tgt_ranges):
xunchang3df4d5e2018-12-06 15:03:45 -0800798 if (max_allowed_blocks and
799 stashed_blocks + xf.src_ranges.size() > max_allowed_blocks):
Tao Bao82c47982015-08-17 09:45:13 -0700800 replaced_cmds.append(xf)
Tao Bao32fcdab2018-10-12 10:30:39 -0700801 logger.info("%10d %9s %s", xf.src_ranges.size(), "implicit", xf)
xunchang3df4d5e2018-12-06 15:03:45 -0800802 else:
803 # The whole source ranges will be stashed for implicit stashes.
804 max_stashed_blocks = max(max_stashed_blocks,
805 stashed_blocks + xf.src_ranges.size())
Tao Bao82c47982015-08-17 09:45:13 -0700806
807 # Replace the commands in replaced_cmds with "new"s.
808 for cmd in replaced_cmds:
809 # It no longer uses any commands in "use_stash". Remove the def points
810 # for all those stashes.
Tao Bao3a2e3502016-12-28 09:14:39 -0800811 for stash_raw_id, sr in cmd.use_stash:
812 def_cmd = stash_map[stash_raw_id][1]
813 assert (stash_raw_id, sr) in def_cmd.stash_before
814 def_cmd.stash_before.remove((stash_raw_id, sr))
Tao Bao82c47982015-08-17 09:45:13 -0700815
Tianjie Xuebe39a02016-01-14 14:12:26 -0800816 # Add up blocks that violates space limit and print total number to
817 # screen later.
818 new_blocks += cmd.tgt_ranges.size()
Tao Bao82c47982015-08-17 09:45:13 -0700819 cmd.ConvertToNew()
820
Tao Bao3a2e3502016-12-28 09:14:39 -0800821 # xf.use_stash may generate free commands.
Tao Bao8fad03e2017-03-01 14:36:26 -0800822 for _, sr in xf.use_stash:
823 sh = self.src.RangeSha1(sr)
824 assert sh in stashes
825 stashes[sh] -= 1
826 if stashes[sh] == 0:
Tao Baoe27acfd2016-12-16 11:13:55 -0800827 stashed_blocks -= sr.size()
Tao Bao8fad03e2017-03-01 14:36:26 -0800828 stashes.pop(sh)
Tao Baoe27acfd2016-12-16 11:13:55 -0800829
Tianjie Xuebe39a02016-01-14 14:12:26 -0800830 num_of_bytes = new_blocks * self.tgt.blocksize
Tao Bao32fcdab2018-10-12 10:30:39 -0700831 logger.info(
832 " Total %d blocks (%d bytes) are packed as new blocks due to "
833 "insufficient cache size.", new_blocks, num_of_bytes)
xunchang3df4d5e2018-12-06 15:03:45 -0800834 return new_blocks, max_stashed_blocks
Tao Bao9a5caf22015-08-25 15:10:10 -0700835
Doug Zongkerfc44a512014-08-26 13:10:25 -0700836 def ComputePatches(self, prefix):
Tao Bao32fcdab2018-10-12 10:30:39 -0700837 logger.info("Reticulating splines...")
Tao Bao183e56e2017-03-05 17:05:09 -0800838 diff_queue = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700839 patch_num = 0
840 with open(prefix + ".new.dat", "wb") as new_f:
Tao Bao183e56e2017-03-05 17:05:09 -0800841 for index, xf in enumerate(self.transfers):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700842 if xf.style == "zero":
Tao Bao08c85832016-09-19 22:26:30 -0700843 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao32fcdab2018-10-12 10:30:39 -0700844 logger.info(
845 "%10d %10d (%6.2f%%) %7s %s %s", tgt_size, tgt_size, 100.0,
846 xf.style, xf.tgt_name, str(xf.tgt_ranges))
Tao Bao08c85832016-09-19 22:26:30 -0700847
Doug Zongkerfc44a512014-08-26 13:10:25 -0700848 elif xf.style == "new":
Tao Bao183e56e2017-03-05 17:05:09 -0800849 self.tgt.WriteRangeDataToFd(xf.tgt_ranges, new_f)
Tao Bao08c85832016-09-19 22:26:30 -0700850 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao32fcdab2018-10-12 10:30:39 -0700851 logger.info(
852 "%10d %10d (%6.2f%%) %7s %s %s", tgt_size, tgt_size, 100.0,
853 xf.style, xf.tgt_name, str(xf.tgt_ranges))
Tao Bao08c85832016-09-19 22:26:30 -0700854
Doug Zongkerfc44a512014-08-26 13:10:25 -0700855 elif xf.style == "diff":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700856 # We can't compare src and tgt directly because they may have
857 # the same content but be broken up into blocks differently, eg:
858 #
859 # ["he", "llo"] vs ["h", "ello"]
860 #
861 # We want those to compare equal, ideally without having to
862 # actually concatenate the strings (these may be tens of
863 # megabytes).
Tao Bao183e56e2017-03-05 17:05:09 -0800864 if xf.src_sha1 == xf.tgt_sha1:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700865 # These are identical; we don't need to generate a patch,
866 # just issue copy commands on the device.
867 xf.style = "move"
xunchang21531832018-12-06 14:20:05 -0800868 xf.patch_info = None
Tao Bao183e56e2017-03-05 17:05:09 -0800869 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao08c85832016-09-19 22:26:30 -0700870 if xf.src_ranges != xf.tgt_ranges:
Tao Bao32fcdab2018-10-12 10:30:39 -0700871 logger.info(
872 "%10d %10d (%6.2f%%) %7s %s %s (from %s)", tgt_size, tgt_size,
873 100.0, xf.style,
Tao Bao08c85832016-09-19 22:26:30 -0700874 xf.tgt_name if xf.tgt_name == xf.src_name else (
875 xf.tgt_name + " (from " + xf.src_name + ")"),
Tao Bao32fcdab2018-10-12 10:30:39 -0700876 str(xf.tgt_ranges), str(xf.src_ranges))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700877 else:
xunchang21531832018-12-06 14:20:05 -0800878 if xf.patch_info:
879 # We have already generated the patch (e.g. during split of large
880 # APKs or reduction of stash size)
881 imgdiff = xf.patch_info.imgdiff
Tianjie Xu25366072017-09-08 17:19:02 -0700882 else:
Tao Baocb73aed2018-01-31 17:32:40 -0800883 imgdiff = self.CanUseImgdiff(
884 xf.tgt_name, xf.tgt_ranges, xf.src_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700885 xf.style = "imgdiff" if imgdiff else "bsdiff"
Tao Bao183e56e2017-03-05 17:05:09 -0800886 diff_queue.append((index, imgdiff, patch_num))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700887 patch_num += 1
888
889 else:
890 assert False, "unknown style " + xf.style
891
xunchang21531832018-12-06 14:20:05 -0800892 patches = self.ComputePatchesForInputList(diff_queue, False)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700893
Tao Bao183e56e2017-03-05 17:05:09 -0800894 offset = 0
895 with open(prefix + ".patch.dat", "wb") as patch_fd:
xunchang21531832018-12-06 14:20:05 -0800896 for index, patch_info, _ in patches:
Tao Bao183e56e2017-03-05 17:05:09 -0800897 xf = self.transfers[index]
xunchang21531832018-12-06 14:20:05 -0800898 xf.patch_len = len(patch_info.content)
Tao Bao183e56e2017-03-05 17:05:09 -0800899 xf.patch_start = offset
900 offset += xf.patch_len
xunchang21531832018-12-06 14:20:05 -0800901 patch_fd.write(patch_info.content)
Tao Bao183e56e2017-03-05 17:05:09 -0800902
Tao Bao32fcdab2018-10-12 10:30:39 -0700903 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
904 logger.info(
905 "%10d %10d (%6.2f%%) %7s %s %s %s", xf.patch_len, tgt_size,
906 xf.patch_len * 100.0 / tgt_size, xf.style,
907 xf.tgt_name if xf.tgt_name == xf.src_name else (
908 xf.tgt_name + " (from " + xf.src_name + ")"),
909 xf.tgt_ranges, xf.src_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700910
Tianjie Xu8a7ed9f2018-01-23 14:06:11 -0800911 def AssertSha1Good(self):
912 """Check the SHA-1 of the src & tgt blocks in the transfer list.
913
914 Double check the SHA-1 value to avoid the issue in b/71908713, where
915 SparseImage.RangeSha1() messed up with the hash calculation in multi-thread
916 environment. That specific problem has been fixed by protecting the
917 underlying generator function 'SparseImage._GetRangeData()' with lock.
918 """
919 for xf in self.transfers:
920 tgt_sha1 = self.tgt.RangeSha1(xf.tgt_ranges)
921 assert xf.tgt_sha1 == tgt_sha1
922 if xf.style == "diff":
923 src_sha1 = self.src.RangeSha1(xf.src_ranges)
924 assert xf.src_sha1 == src_sha1
925
Doug Zongkerfc44a512014-08-26 13:10:25 -0700926 def AssertSequenceGood(self):
927 # Simulate the sequences of transfers we will output, and check that:
928 # - we never read a block after writing it, and
929 # - we write every block we care about exactly once.
930
931 # Start with no blocks having been touched yet.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800932 touched = array.array("B", "\0" * self.tgt.total_blocks)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700933
934 # Imagine processing the transfers in order.
935 for xf in self.transfers:
936 # Check that the input blocks for this transfer haven't yet been touched.
Doug Zongker62338182014-09-08 08:29:55 -0700937
938 x = xf.src_ranges
Tao Bao8fad03e2017-03-01 14:36:26 -0800939 for _, sr in xf.use_stash:
940 x = x.subtract(sr)
Doug Zongker62338182014-09-08 08:29:55 -0700941
Doug Zongker6ab2a502016-02-09 08:28:09 -0800942 for s, e in x:
Tao Baoff75c232016-03-04 15:23:34 -0800943 # Source image could be larger. Don't check the blocks that are in the
944 # source image only. Since they are not in 'touched', and won't ever
945 # be touched.
946 for i in range(s, min(e, self.tgt.total_blocks)):
Doug Zongker6ab2a502016-02-09 08:28:09 -0800947 assert touched[i] == 0
948
949 # Check that the output blocks for this transfer haven't yet
950 # been touched, and touch all the blocks written by this
951 # transfer.
952 for s, e in xf.tgt_ranges:
953 for i in range(s, e):
954 assert touched[i] == 0
955 touched[i] = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700956
Tianjie Xu67c7cbb2018-08-30 00:32:07 -0700957 if self.tgt.hashtree_info:
958 for s, e in self.tgt.hashtree_info.hashtree_range:
959 for i in range(s, e):
960 assert touched[i] == 0
961 touched[i] = 1
962
Doug Zongkerfc44a512014-08-26 13:10:25 -0700963 # Check that we've written every target block.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800964 for s, e in self.tgt.care_map:
965 for i in range(s, e):
966 assert touched[i] == 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700967
xunchang21531832018-12-06 14:20:05 -0800968 def FindSequenceForTransfers(self):
969 """Finds a sequence for the given transfers.
970
971 The goal is to minimize the violation of order dependencies between these
972 transfers, so that fewer blocks are stashed when applying the update.
973 """
974
975 # Clear the existing dependency between transfers
976 for xf in self.transfers:
977 xf.goes_before = OrderedDict()
978 xf.goes_after = OrderedDict()
979
980 xf.stash_before = []
981 xf.use_stash = []
982
983 # Find the ordering dependencies among transfers (this is O(n^2)
984 # in the number of transfers).
985 self.GenerateDigraph()
986 # Find a sequence of transfers that satisfies as many ordering
987 # dependencies as possible (heuristically).
988 self.FindVertexSequence()
989 # Fix up the ordering dependencies that the sequence didn't
990 # satisfy.
991 self.ReverseBackwardEdges()
992 self.ImproveVertexSequence()
993
Doug Zongker62338182014-09-08 08:29:55 -0700994 def ImproveVertexSequence(self):
Tao Bao32fcdab2018-10-12 10:30:39 -0700995 logger.info("Improving vertex order...")
Doug Zongker62338182014-09-08 08:29:55 -0700996
997 # At this point our digraph is acyclic; we reversed any edges that
998 # were backwards in the heuristically-generated sequence. The
999 # previously-generated order is still acceptable, but we hope to
1000 # find a better order that needs less memory for stashed data.
1001 # Now we do a topological sort to generate a new vertex order,
1002 # using a greedy algorithm to choose which vertex goes next
1003 # whenever we have a choice.
1004
1005 # Make a copy of the edge set; this copy will get destroyed by the
1006 # algorithm.
1007 for xf in self.transfers:
1008 xf.incoming = xf.goes_after.copy()
1009 xf.outgoing = xf.goes_before.copy()
1010
1011 L = [] # the new vertex order
1012
1013 # S is the set of sources in the remaining graph; we always choose
1014 # the one that leaves the least amount of stashed data after it's
1015 # executed.
1016 S = [(u.NetStashChange(), u.order, u) for u in self.transfers
1017 if not u.incoming]
1018 heapq.heapify(S)
1019
1020 while S:
1021 _, _, xf = heapq.heappop(S)
1022 L.append(xf)
1023 for u in xf.outgoing:
1024 del u.incoming[xf]
1025 if not u.incoming:
1026 heapq.heappush(S, (u.NetStashChange(), u.order, u))
1027
1028 # if this fails then our graph had a cycle.
1029 assert len(L) == len(self.transfers)
1030
1031 self.transfers = L
1032 for i, xf in enumerate(L):
1033 xf.order = i
1034
Doug Zongker62338182014-09-08 08:29:55 -07001035 def ReverseBackwardEdges(self):
Tao Bao3a2e3502016-12-28 09:14:39 -08001036 """Reverse unsatisfying edges and compute pairs of stashed blocks.
1037
1038 For each transfer, make sure it properly stashes the blocks it touches and
1039 will be used by later transfers. It uses pairs of (stash_raw_id, range) to
1040 record the blocks to be stashed. 'stash_raw_id' is an id that uniquely
1041 identifies each pair. Note that for the same range (e.g. RangeSet("1-5")),
1042 it is possible to have multiple pairs with different 'stash_raw_id's. Each
1043 'stash_raw_id' will be consumed by one transfer. In BBOTA v3+, identical
1044 blocks will be written to the same stash slot in WriteTransfers().
1045 """
1046
Tao Bao32fcdab2018-10-12 10:30:39 -07001047 logger.info("Reversing backward edges...")
Doug Zongker62338182014-09-08 08:29:55 -07001048 in_order = 0
1049 out_of_order = 0
Tao Bao3a2e3502016-12-28 09:14:39 -08001050 stash_raw_id = 0
Doug Zongker62338182014-09-08 08:29:55 -07001051 stash_size = 0
1052
1053 for xf in self.transfers:
Doug Zongker62338182014-09-08 08:29:55 -07001054 for u in xf.goes_before.copy():
1055 # xf should go before u
1056 if xf.order < u.order:
1057 # it does, hurray!
1058 in_order += 1
1059 else:
1060 # it doesn't, boo. modify u to stash the blocks that it
1061 # writes that xf wants to read, and then require u to go
1062 # before xf.
1063 out_of_order += 1
1064
1065 overlap = xf.src_ranges.intersect(u.tgt_ranges)
1066 assert overlap
1067
Tao Bao3a2e3502016-12-28 09:14:39 -08001068 u.stash_before.append((stash_raw_id, overlap))
1069 xf.use_stash.append((stash_raw_id, overlap))
1070 stash_raw_id += 1
Doug Zongker62338182014-09-08 08:29:55 -07001071 stash_size += overlap.size()
1072
1073 # reverse the edge direction; now xf must go after u
1074 del xf.goes_before[u]
1075 del u.goes_after[xf]
1076 xf.goes_after[u] = None # value doesn't matter
1077 u.goes_before[xf] = None
1078
Tao Bao32fcdab2018-10-12 10:30:39 -07001079 logger.info(
1080 " %d/%d dependencies (%.2f%%) were violated; %d source blocks "
1081 "stashed.", out_of_order, in_order + out_of_order,
1082 (out_of_order * 100.0 / (in_order + out_of_order)) if (
1083 in_order + out_of_order) else 0.0,
1084 stash_size)
Doug Zongker62338182014-09-08 08:29:55 -07001085
Doug Zongkerfc44a512014-08-26 13:10:25 -07001086 def FindVertexSequence(self):
Tao Bao32fcdab2018-10-12 10:30:39 -07001087 logger.info("Finding vertex sequence...")
Doug Zongkerfc44a512014-08-26 13:10:25 -07001088
1089 # This is based on "A Fast & Effective Heuristic for the Feedback
1090 # Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
1091 # it as starting with the digraph G and moving all the vertices to
1092 # be on a horizontal line in some order, trying to minimize the
1093 # number of edges that end up pointing to the left. Left-pointing
1094 # edges will get removed to turn the digraph into a DAG. In this
1095 # case each edge has a weight which is the number of source blocks
1096 # we'll lose if that edge is removed; we try to minimize the total
1097 # weight rather than just the number of edges.
1098
1099 # Make a copy of the edge set; this copy will get destroyed by the
1100 # algorithm.
1101 for xf in self.transfers:
1102 xf.incoming = xf.goes_after.copy()
1103 xf.outgoing = xf.goes_before.copy()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001104 xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())
Doug Zongkerfc44a512014-08-26 13:10:25 -07001105
1106 # We use an OrderedDict instead of just a set so that the output
1107 # is repeatable; otherwise it would depend on the hash values of
1108 # the transfer objects.
1109 G = OrderedDict()
1110 for xf in self.transfers:
1111 G[xf] = None
1112 s1 = deque() # the left side of the sequence, built from left to right
1113 s2 = deque() # the right side of the sequence, built from right to left
1114
Doug Zongker6ab2a502016-02-09 08:28:09 -08001115 heap = []
1116 for xf in self.transfers:
1117 xf.heap_item = HeapItem(xf)
1118 heap.append(xf.heap_item)
1119 heapq.heapify(heap)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001120
Tao Bao33482282016-10-24 16:49:08 -07001121 # Use OrderedDict() instead of set() to preserve the insertion order. Need
1122 # to use 'sinks[key] = None' to add key into the set. sinks will look like
1123 # { key1: None, key2: None, ... }.
1124 sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing)
1125 sources = OrderedDict.fromkeys(u for u in G if not u.incoming)
Doug Zongker6ab2a502016-02-09 08:28:09 -08001126
1127 def adjust_score(iu, delta):
1128 iu.score += delta
1129 iu.heap_item.clear()
1130 iu.heap_item = HeapItem(iu)
1131 heapq.heappush(heap, iu.heap_item)
1132
1133 while G:
Doug Zongkerfc44a512014-08-26 13:10:25 -07001134 # Put all sinks at the end of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001135 while sinks:
Tao Bao33482282016-10-24 16:49:08 -07001136 new_sinks = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001137 for u in sinks:
Tao Bao508b0872018-02-09 13:44:43 -08001138 if u not in G:
1139 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001140 s2.appendleft(u)
1141 del G[u]
1142 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001143 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001144 if not iu.outgoing:
1145 new_sinks[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001146 sinks = new_sinks
Doug Zongkerfc44a512014-08-26 13:10:25 -07001147
1148 # Put all the sources at the beginning of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001149 while sources:
Tao Bao33482282016-10-24 16:49:08 -07001150 new_sources = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001151 for u in sources:
Tao Bao508b0872018-02-09 13:44:43 -08001152 if u not in G:
1153 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001154 s1.append(u)
1155 del G[u]
1156 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001157 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001158 if not iu.incoming:
1159 new_sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001160 sources = new_sources
Doug Zongkerfc44a512014-08-26 13:10:25 -07001161
Tao Bao508b0872018-02-09 13:44:43 -08001162 if not G:
1163 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001164
1165 # Find the "best" vertex to put next. "Best" is the one that
1166 # maximizes the net difference in source blocks saved we get by
1167 # pretending it's a source rather than a sink.
1168
Doug Zongker6ab2a502016-02-09 08:28:09 -08001169 while True:
1170 u = heapq.heappop(heap)
1171 if u and u.item in G:
1172 u = u.item
1173 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001174
Doug Zongkerfc44a512014-08-26 13:10:25 -07001175 s1.append(u)
1176 del G[u]
1177 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001178 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001179 if not iu.incoming:
1180 sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001181
Doug Zongkerfc44a512014-08-26 13:10:25 -07001182 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001183 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001184 if not iu.outgoing:
1185 sinks[iu] = None
Doug Zongkerfc44a512014-08-26 13:10:25 -07001186
1187 # Now record the sequence in the 'order' field of each transfer,
1188 # and by rearranging self.transfers to be in the chosen sequence.
1189
1190 new_transfers = []
1191 for x in itertools.chain(s1, s2):
1192 x.order = len(new_transfers)
1193 new_transfers.append(x)
1194 del x.incoming
1195 del x.outgoing
1196
1197 self.transfers = new_transfers
1198
1199 def GenerateDigraph(self):
Tao Bao32fcdab2018-10-12 10:30:39 -07001200 logger.info("Generating digraph...")
Doug Zongker6ab2a502016-02-09 08:28:09 -08001201
1202 # Each item of source_ranges will be:
1203 # - None, if that block is not used as a source,
Tao Bao33482282016-10-24 16:49:08 -07001204 # - an ordered set of transfers.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001205 source_ranges = []
1206 for b in self.transfers:
1207 for s, e in b.src_ranges:
1208 if e > len(source_ranges):
1209 source_ranges.extend([None] * (e-len(source_ranges)))
1210 for i in range(s, e):
1211 if source_ranges[i] is None:
Tao Bao33482282016-10-24 16:49:08 -07001212 source_ranges[i] = OrderedDict.fromkeys([b])
Doug Zongker6ab2a502016-02-09 08:28:09 -08001213 else:
Tao Bao33482282016-10-24 16:49:08 -07001214 source_ranges[i][b] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001215
Doug Zongkerfc44a512014-08-26 13:10:25 -07001216 for a in self.transfers:
Tao Bao33482282016-10-24 16:49:08 -07001217 intersections = OrderedDict()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001218 for s, e in a.tgt_ranges:
1219 for i in range(s, e):
Tao Bao508b0872018-02-09 13:44:43 -08001220 if i >= len(source_ranges):
1221 break
Tao Bao33482282016-10-24 16:49:08 -07001222 # Add all the Transfers in source_ranges[i] to the (ordered) set.
1223 if source_ranges[i] is not None:
1224 for j in source_ranges[i]:
1225 intersections[j] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001226
1227 for b in intersections:
Tao Bao508b0872018-02-09 13:44:43 -08001228 if a is b:
1229 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001230
1231 # If the blocks written by A are read by B, then B needs to go before A.
1232 i = a.tgt_ranges.intersect(b.src_ranges)
1233 if i:
Doug Zongkerab7ca1d2014-08-26 10:40:28 -07001234 if b.src_name == "__ZERO":
1235 # the cost of removing source blocks for the __ZERO domain
1236 # is (nearly) zero.
1237 size = 0
1238 else:
1239 size = i.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001240 b.goes_before[a] = size
1241 a.goes_after[b] = size
1242
xunchang21531832018-12-06 14:20:05 -08001243 def ComputePatchesForInputList(self, diff_queue, compress_target):
1244 """Returns a list of patch information for the input list of transfers.
1245
1246 Args:
1247 diff_queue: a list of transfers with style 'diff'
1248 compress_target: If True, compresses the target ranges of each
1249 transfers; and save the size.
1250
1251 Returns:
1252 A list of (transfer order, patch_info, compressed_size) tuples.
1253 """
1254
1255 if not diff_queue:
1256 return []
1257
1258 if self.threads > 1:
1259 logger.info("Computing patches (using %d threads)...", self.threads)
1260 else:
1261 logger.info("Computing patches...")
1262
1263 diff_total = len(diff_queue)
1264 patches = [None] * diff_total
1265 error_messages = []
1266
1267 # Using multiprocessing doesn't give additional benefits, due to the
1268 # pattern of the code. The diffing work is done by subprocess.call, which
1269 # already runs in a separate process (not affected much by the GIL -
1270 # Global Interpreter Lock). Using multiprocess also requires either a)
1271 # writing the diff input files in the main process before forking, or b)
1272 # reopening the image file (SparseImage) in the worker processes. Doing
1273 # neither of them further improves the performance.
1274 lock = threading.Lock()
1275
1276 def diff_worker():
1277 while True:
1278 with lock:
1279 if not diff_queue:
1280 return
1281 xf_index, imgdiff, patch_index = diff_queue.pop()
1282 xf = self.transfers[xf_index]
1283
1284 message = []
1285 compressed_size = None
1286
1287 patch_info = xf.patch_info
1288 if not patch_info:
1289 src_file = common.MakeTempFile(prefix="src-")
1290 with open(src_file, "wb") as fd:
1291 self.src.WriteRangeDataToFd(xf.src_ranges, fd)
1292
1293 tgt_file = common.MakeTempFile(prefix="tgt-")
1294 with open(tgt_file, "wb") as fd:
1295 self.tgt.WriteRangeDataToFd(xf.tgt_ranges, fd)
1296
1297 try:
1298 patch_info = compute_patch(src_file, tgt_file, imgdiff)
1299 except ValueError as e:
1300 message.append(
1301 "Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (
1302 "imgdiff" if imgdiff else "bsdiff",
1303 xf.tgt_name if xf.tgt_name == xf.src_name else
1304 xf.tgt_name + " (from " + xf.src_name + ")",
1305 xf.tgt_ranges, xf.src_ranges, e.message))
1306
1307 if compress_target:
1308 tgt_data = self.tgt.ReadRangeSet(xf.tgt_ranges)
1309 try:
1310 # Compresses with the default level
1311 compress_obj = zlib.compressobj(6, zlib.DEFLATED, -zlib.MAX_WBITS)
1312 compressed_data = (compress_obj.compress("".join(tgt_data))
1313 + compress_obj.flush())
1314 compressed_size = len(compressed_data)
1315 except zlib.error as e:
1316 message.append(
1317 "Failed to compress the data in target range {} for {}:\n"
1318 "{}".format(xf.tgt_ranges, xf.tgt_name, e.message))
1319
1320 if message:
1321 with lock:
1322 error_messages.extend(message)
1323
1324 with lock:
1325 patches[patch_index] = (xf_index, patch_info, compressed_size)
1326
1327 threads = [threading.Thread(target=diff_worker)
1328 for _ in range(self.threads)]
1329 for th in threads:
1330 th.start()
1331 while threads:
1332 threads.pop().join()
1333
1334 if error_messages:
1335 logger.error('ERROR:')
1336 logger.error('\n'.join(error_messages))
1337 logger.error('\n\n\n')
1338 sys.exit(1)
1339
1340 return patches
1341
xunchang3df4d5e2018-12-06 15:03:45 -08001342 def SelectAndConvertDiffTransfersToNew(self):
1343 """Converts the diff transfers to reduce the max simultaneous stash.
1344
1345 Since the 'new' data is compressed with deflate, we can select the 'diff'
1346 transfers for conversion by comparing its patch size with the size of the
1347 compressed data. Ideally, we want to convert the transfers with a small
1348 size increase, but using a large number of stashed blocks.
1349 """
1350
1351 logger.info("Selecting diff commands to convert to new.")
1352 diff_queue = []
1353 for xf in self.transfers:
1354 if xf.style == "diff" and xf.src_sha1 != xf.tgt_sha1:
1355 use_imgdiff = self.CanUseImgdiff(xf.tgt_name, xf.tgt_ranges,
1356 xf.src_ranges)
1357 diff_queue.append((xf.order, use_imgdiff, len(diff_queue)))
1358
1359 # Remove the 'move' transfers, and compute the patch & compressed size
1360 # for the remaining.
1361 result = self.ComputePatchesForInputList(diff_queue, True)
1362
1363 removed_stashed_blocks = 0
1364 for xf_index, patch_info, compressed_size in result:
1365 xf = self.transfers[xf_index]
1366 if not xf.patch_info:
1367 xf.patch_info = patch_info
1368
1369 size_ratio = len(xf.patch_info.content) * 100.0 / compressed_size
1370 diff_style = "imgdiff" if xf.patch_info.imgdiff else "bsdiff"
1371 logger.info("%s, target size: %d, style: %s, patch size: %d,"
1372 " compression_size: %d, ratio %.2f%%", xf.tgt_name,
1373 xf.tgt_ranges.size(), diff_style,
1374 len(xf.patch_info.content), compressed_size, size_ratio)
1375
1376 # Convert the transfer to new if the compressed size is smaller or equal.
1377 # We don't need to maintain the stash_before lists here because the
1378 # graph will be regenerated later.
1379 if len(xf.patch_info.content) >= compressed_size:
1380 removed_stashed_blocks += sum(sr.size() for _, sr in xf.use_stash)
1381 logger.info("Converting %s to new", xf.tgt_name)
1382 xf.ConvertToNew()
1383
1384 # TODO(xunchang) convert more transfers by sorting:
1385 # (compressed size - patch_size) / used_stashed_blocks
1386
1387 logger.info("Removed %d stashed blocks", removed_stashed_blocks)
1388
Doug Zongkerfc44a512014-08-26 13:10:25 -07001389 def FindTransfers(self):
Tao Bao9a5caf22015-08-25 15:10:10 -07001390 """Parse the file_map to generate all the transfers."""
1391
Tianjie Xu41390d42017-11-22 11:35:18 -08001392 def AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
1393 src_ranges, style, by_id):
1394 """Add one or multiple Transfer()s by splitting large files.
1395
1396 For BBOTA v3, we need to stash source blocks for resumable feature.
1397 However, with the growth of file size and the shrink of the cache
1398 partition source blocks are too large to be stashed. If a file occupies
1399 too many blocks, we split it into smaller pieces by getting multiple
1400 Transfer()s.
1401
1402 The downside is that after splitting, we may increase the package size
1403 since the split pieces don't align well. According to our experiments,
1404 1/8 of the cache size as the per-piece limit appears to be optimal.
1405 Compared to the fixed 1024-block limit, it reduces the overall package
1406 size by 30% for volantis, and 20% for angler and bullhead."""
1407
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001408 pieces = 0
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001409 while (tgt_ranges.size() > max_blocks_per_transfer and
1410 src_ranges.size() > max_blocks_per_transfer):
Tao Bao9a5caf22015-08-25 15:10:10 -07001411 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1412 src_split_name = "%s-%d" % (src_name, pieces)
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001413 tgt_first = tgt_ranges.first(max_blocks_per_transfer)
1414 src_first = src_ranges.first(max_blocks_per_transfer)
1415
Tao Bao183e56e2017-03-05 17:05:09 -08001416 Transfer(tgt_split_name, src_split_name, tgt_first, src_first,
1417 self.tgt.RangeSha1(tgt_first), self.src.RangeSha1(src_first),
1418 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001419
1420 tgt_ranges = tgt_ranges.subtract(tgt_first)
1421 src_ranges = src_ranges.subtract(src_first)
1422 pieces += 1
1423
1424 # Handle remaining blocks.
1425 if tgt_ranges.size() or src_ranges.size():
1426 # Must be both non-empty.
1427 assert tgt_ranges.size() and src_ranges.size()
1428 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1429 src_split_name = "%s-%d" % (src_name, pieces)
Tao Bao183e56e2017-03-05 17:05:09 -08001430 Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges,
1431 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1432 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001433
Tianjie Xu41390d42017-11-22 11:35:18 -08001434 def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges, style,
1435 by_id):
1436 """Find all the zip files and split the others with a fixed chunk size.
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001437
Tianjie Xu41390d42017-11-22 11:35:18 -08001438 This function will construct a list of zip archives, which will later be
1439 split by imgdiff to reduce the final patch size. For the other files,
1440 we will plainly split them based on a fixed chunk size with the potential
1441 patch size penalty.
1442 """
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001443
1444 assert style == "diff"
1445
1446 # Change nothing for small files.
1447 if (tgt_ranges.size() <= max_blocks_per_transfer and
1448 src_ranges.size() <= max_blocks_per_transfer):
1449 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1450 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1451 style, by_id)
1452 return
1453
Tao Baocb73aed2018-01-31 17:32:40 -08001454 # Split large APKs with imgdiff, if possible. We're intentionally checking
1455 # file types one more time (CanUseImgdiff() checks that as well), before
1456 # calling the costly RangeSha1()s.
1457 if (self.FileTypeSupportedByImgdiff(tgt_name) and
1458 self.tgt.RangeSha1(tgt_ranges) != self.src.RangeSha1(src_ranges)):
Tao Bao294651d2018-02-08 23:21:52 -08001459 if self.CanUseImgdiff(tgt_name, tgt_ranges, src_ranges, True):
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001460 large_apks.append((tgt_name, src_name, tgt_ranges, src_ranges))
1461 return
1462
Tianjie Xu41390d42017-11-22 11:35:18 -08001463 AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
1464 src_ranges, style, by_id)
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001465
Tao Bao08c85832016-09-19 22:26:30 -07001466 def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
1467 split=False):
1468 """Wrapper function for adding a Transfer()."""
1469
1470 # We specialize diff transfers only (which covers bsdiff/imgdiff/move);
1471 # otherwise add the Transfer() as is.
1472 if style != "diff" or not split:
Tao Bao183e56e2017-03-05 17:05:09 -08001473 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1474 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1475 style, by_id)
Tao Bao08c85832016-09-19 22:26:30 -07001476 return
1477
1478 # Handle .odex files specially to analyze the block-wise difference. If
1479 # most of the blocks are identical with only few changes (e.g. header),
1480 # we will patch the changed blocks only. This avoids stashing unchanged
1481 # blocks while patching. We limit the analysis to files without size
1482 # changes only. This is to avoid sacrificing the OTA generation cost too
1483 # much.
1484 if (tgt_name.split(".")[-1].lower() == 'odex' and
1485 tgt_ranges.size() == src_ranges.size()):
1486
1487 # 0.5 threshold can be further tuned. The tradeoff is: if only very
1488 # few blocks remain identical, we lose the opportunity to use imgdiff
1489 # that may have better compression ratio than bsdiff.
1490 crop_threshold = 0.5
1491
1492 tgt_skipped = RangeSet()
1493 src_skipped = RangeSet()
1494 tgt_size = tgt_ranges.size()
1495 tgt_changed = 0
1496 for src_block, tgt_block in zip(src_ranges.next_item(),
1497 tgt_ranges.next_item()):
1498 src_rs = RangeSet(str(src_block))
1499 tgt_rs = RangeSet(str(tgt_block))
1500 if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):
1501 tgt_skipped = tgt_skipped.union(tgt_rs)
1502 src_skipped = src_skipped.union(src_rs)
1503 else:
1504 tgt_changed += tgt_rs.size()
1505
1506 # Terminate early if no clear sign of benefits.
1507 if tgt_changed > tgt_size * crop_threshold:
1508 break
1509
1510 if tgt_changed < tgt_size * crop_threshold:
1511 assert tgt_changed + tgt_skipped.size() == tgt_size
Tao Bao32fcdab2018-10-12 10:30:39 -07001512 logger.info(
1513 '%10d %10d (%6.2f%%) %s', tgt_skipped.size(), tgt_size,
1514 tgt_skipped.size() * 100.0 / tgt_size, tgt_name)
Tianjie Xu41390d42017-11-22 11:35:18 -08001515 AddSplitTransfers(
Tao Bao08c85832016-09-19 22:26:30 -07001516 "%s-skipped" % (tgt_name,),
1517 "%s-skipped" % (src_name,),
1518 tgt_skipped, src_skipped, style, by_id)
1519
1520 # Intentionally change the file extension to avoid being imgdiff'd as
1521 # the files are no longer in their original format.
1522 tgt_name = "%s-cropped" % (tgt_name,)
1523 src_name = "%s-cropped" % (src_name,)
1524 tgt_ranges = tgt_ranges.subtract(tgt_skipped)
1525 src_ranges = src_ranges.subtract(src_skipped)
1526
1527 # Possibly having no changed blocks.
1528 if not tgt_ranges:
1529 return
1530
1531 # Add the transfer(s).
Tianjie Xu41390d42017-11-22 11:35:18 -08001532 AddSplitTransfers(
Tao Bao08c85832016-09-19 22:26:30 -07001533 tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1534
Tianjie Xu25366072017-09-08 17:19:02 -07001535 def ParseAndValidateSplitInfo(patch_size, tgt_ranges, src_ranges,
1536 split_info):
1537 """Parse the split_info and return a list of info tuples.
1538
1539 Args:
1540 patch_size: total size of the patch file.
1541 tgt_ranges: Ranges of the target file within the original image.
1542 src_ranges: Ranges of the source file within the original image.
1543 split_info format:
1544 imgdiff version#
1545 count of pieces
1546 <patch_size_1> <tgt_size_1> <src_ranges_1>
1547 ...
1548 <patch_size_n> <tgt_size_n> <src_ranges_n>
1549
1550 Returns:
1551 [patch_start, patch_len, split_tgt_ranges, split_src_ranges]
1552 """
1553
1554 version = int(split_info[0])
1555 assert version == 2
1556 count = int(split_info[1])
1557 assert len(split_info) - 2 == count
1558
1559 split_info_list = []
1560 patch_start = 0
1561 tgt_remain = copy.deepcopy(tgt_ranges)
1562 # each line has the format <patch_size>, <tgt_size>, <src_ranges>
1563 for line in split_info[2:]:
1564 info = line.split()
1565 assert len(info) == 3
1566 patch_length = int(info[0])
1567
1568 split_tgt_size = int(info[1])
1569 assert split_tgt_size % 4096 == 0
1570 assert split_tgt_size / 4096 <= tgt_remain.size()
1571 split_tgt_ranges = tgt_remain.first(split_tgt_size / 4096)
1572 tgt_remain = tgt_remain.subtract(split_tgt_ranges)
1573
1574 # Find the split_src_ranges within the image file from its relative
1575 # position in file.
1576 split_src_indices = RangeSet.parse_raw(info[2])
1577 split_src_ranges = RangeSet()
1578 for r in split_src_indices:
1579 curr_range = src_ranges.first(r[1]).subtract(src_ranges.first(r[0]))
1580 assert not split_src_ranges.overlaps(curr_range)
1581 split_src_ranges = split_src_ranges.union(curr_range)
1582
1583 split_info_list.append((patch_start, patch_length,
1584 split_tgt_ranges, split_src_ranges))
1585 patch_start += patch_length
1586
1587 # Check that the sizes of all the split pieces add up to the final file
1588 # size for patch and target.
1589 assert tgt_remain.size() == 0
1590 assert patch_start == patch_size
1591 return split_info_list
1592
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001593 def SplitLargeApks():
1594 """Split the large apks files.
Tianjie Xu25366072017-09-08 17:19:02 -07001595
1596 Example: Chrome.apk will be split into
1597 src-0: Chrome.apk-0, tgt-0: Chrome.apk-0
1598 src-1: Chrome.apk-1, tgt-1: Chrome.apk-1
1599 ...
1600
1601 After the split, the target pieces are continuous and block aligned; and
1602 the source pieces are mutually exclusive. During the split, we also
1603 generate and save the image patch between src-X & tgt-X. This patch will
1604 be valid because the block ranges of src-X & tgt-X will always stay the
1605 same afterwards; but there's a chance we don't use the patch if we
1606 convert the "diff" command into "new" or "move" later.
1607 """
1608
1609 while True:
1610 with transfer_lock:
1611 if not large_apks:
1612 return
1613 tgt_name, src_name, tgt_ranges, src_ranges = large_apks.pop(0)
1614
1615 src_file = common.MakeTempFile(prefix="src-")
1616 tgt_file = common.MakeTempFile(prefix="tgt-")
Tianjie Xudf1166e2018-01-27 17:35:41 -08001617 with open(src_file, "wb") as src_fd:
1618 self.src.WriteRangeDataToFd(src_ranges, src_fd)
1619 with open(tgt_file, "wb") as tgt_fd:
1620 self.tgt.WriteRangeDataToFd(tgt_ranges, tgt_fd)
Tianjie Xu25366072017-09-08 17:19:02 -07001621
1622 patch_file = common.MakeTempFile(prefix="patch-")
1623 patch_info_file = common.MakeTempFile(prefix="split_info-")
1624 cmd = ["imgdiff", "-z",
1625 "--block-limit={}".format(max_blocks_per_transfer),
1626 "--split-info=" + patch_info_file,
1627 src_file, tgt_file, patch_file]
Tao Bao73dd4f42018-10-04 16:25:33 -07001628 proc = common.Run(cmd)
1629 imgdiff_output, _ = proc.communicate()
1630 assert proc.returncode == 0, \
Tao Bao4ccea852018-02-06 15:16:41 -08001631 "Failed to create imgdiff patch between {} and {}:\n{}".format(
1632 src_name, tgt_name, imgdiff_output)
Tianjie Xu25366072017-09-08 17:19:02 -07001633
1634 with open(patch_info_file) as patch_info:
1635 lines = patch_info.readlines()
1636
1637 patch_size_total = os.path.getsize(patch_file)
1638 split_info_list = ParseAndValidateSplitInfo(patch_size_total,
1639 tgt_ranges, src_ranges,
1640 lines)
1641 for index, (patch_start, patch_length, split_tgt_ranges,
Tao Bao508b0872018-02-09 13:44:43 -08001642 split_src_ranges) in enumerate(split_info_list):
Tianjie Xu25366072017-09-08 17:19:02 -07001643 with open(patch_file) as f:
1644 f.seek(patch_start)
1645 patch_content = f.read(patch_length)
1646
1647 split_src_name = "{}-{}".format(src_name, index)
1648 split_tgt_name = "{}-{}".format(tgt_name, index)
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001649 split_large_apks.append((split_tgt_name,
1650 split_src_name,
1651 split_tgt_ranges,
1652 split_src_ranges,
1653 patch_content))
Tianjie Xu25366072017-09-08 17:19:02 -07001654
Tao Bao32fcdab2018-10-12 10:30:39 -07001655 logger.info("Finding transfers...")
Tao Bao08c85832016-09-19 22:26:30 -07001656
Tianjie Xu25366072017-09-08 17:19:02 -07001657 large_apks = []
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001658 split_large_apks = []
Tianjie Xu25366072017-09-08 17:19:02 -07001659 cache_size = common.OPTIONS.cache_size
1660 split_threshold = 0.125
1661 max_blocks_per_transfer = int(cache_size * split_threshold /
1662 self.tgt.blocksize)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001663 empty = RangeSet()
Tianjie Xu20a86cd2018-01-12 12:21:00 -08001664 for tgt_fn, tgt_ranges in sorted(self.tgt.file_map.items()):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001665 if tgt_fn == "__ZERO":
1666 # the special "__ZERO" domain is all the blocks not contained
1667 # in any file and that are filled with zeros. We have a
1668 # special transfer style for zero blocks.
1669 src_ranges = self.src.file_map.get("__ZERO", empty)
Tao Bao9a5caf22015-08-25 15:10:10 -07001670 AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
1671 "zero", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001672 continue
1673
Tao Baoff777812015-05-12 11:42:31 -07001674 elif tgt_fn == "__COPY":
1675 # "__COPY" domain includes all the blocks not contained in any
1676 # file and that need to be copied unconditionally to the target.
Tao Bao9a5caf22015-08-25 15:10:10 -07001677 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Tao Baoff777812015-05-12 11:42:31 -07001678 continue
1679
Tianjie Xu67c7cbb2018-08-30 00:32:07 -07001680 elif tgt_fn == "__HASHTREE":
1681 continue
1682
Doug Zongkerfc44a512014-08-26 13:10:25 -07001683 elif tgt_fn in self.src.file_map:
1684 # Look for an exact pathname match in the source.
Tao Bao9a5caf22015-08-25 15:10:10 -07001685 AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001686 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001687 continue
1688
1689 b = os.path.basename(tgt_fn)
1690 if b in self.src_basenames:
1691 # Look for an exact basename match in the source.
1692 src_fn = self.src_basenames[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001693 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001694 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001695 continue
1696
1697 b = re.sub("[0-9]+", "#", b)
1698 if b in self.src_numpatterns:
1699 # Look for a 'number pattern' match (a basename match after
1700 # all runs of digits are replaced by "#"). (This is useful
1701 # for .so files that contain version numbers in the filename
1702 # that get bumped.)
1703 src_fn = self.src_numpatterns[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001704 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001705 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001706 continue
1707
Tao Bao9a5caf22015-08-25 15:10:10 -07001708 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001709
Tianjie Xu25366072017-09-08 17:19:02 -07001710 transfer_lock = threading.Lock()
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001711 threads = [threading.Thread(target=SplitLargeApks)
Tianjie Xu25366072017-09-08 17:19:02 -07001712 for _ in range(self.threads)]
1713 for th in threads:
1714 th.start()
1715 while threads:
1716 threads.pop().join()
1717
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001718 # Sort the split transfers for large apks to generate a determinate package.
1719 split_large_apks.sort()
1720 for (tgt_name, src_name, tgt_ranges, src_ranges,
1721 patch) in split_large_apks:
1722 transfer_split = Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1723 self.tgt.RangeSha1(tgt_ranges),
1724 self.src.RangeSha1(src_ranges),
1725 "diff", self.transfers)
xunchang21531832018-12-06 14:20:05 -08001726 transfer_split.patch_info = PatchInfo(True, patch)
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001727
Doug Zongkerfc44a512014-08-26 13:10:25 -07001728 def AbbreviateSourceNames(self):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001729 for k in self.src.file_map.keys():
1730 b = os.path.basename(k)
1731 self.src_basenames[b] = k
1732 b = re.sub("[0-9]+", "#", b)
1733 self.src_numpatterns[b] = k
1734
1735 @staticmethod
1736 def AssertPartition(total, seq):
1737 """Assert that all the RangeSets in 'seq' form a partition of the
1738 'total' RangeSet (ie, they are nonintersecting and their union
1739 equals 'total')."""
Doug Zongker6ab2a502016-02-09 08:28:09 -08001740
Doug Zongkerfc44a512014-08-26 13:10:25 -07001741 so_far = RangeSet()
1742 for i in seq:
1743 assert not so_far.overlaps(i)
1744 so_far = so_far.union(i)
1745 assert so_far == total