blob: b5e01d332424f312b121c6e418cd4dc1f9342ae2 [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015from __future__ import print_function
16
Doug Zongker6ab2a502016-02-09 08:28:09 -080017import array
Tianjie Xu25366072017-09-08 17:19:02 -070018import copy
Doug Zongker6ab2a502016-02-09 08:28:09 -080019import functools
Doug Zongker62338182014-09-08 08:29:55 -070020import heapq
Doug Zongkerfc44a512014-08-26 13:10:25 -070021import itertools
Tao Bao32fcdab2018-10-12 10:30:39 -070022import logging
Doug Zongkerfc44a512014-08-26 13:10:25 -070023import multiprocessing
24import os
Tao Bao3a2e3502016-12-28 09:14:39 -080025import os.path
Doug Zongkerfc44a512014-08-26 13:10:25 -070026import re
Tao Bao183e56e2017-03-05 17:05:09 -080027import sys
Doug Zongkerfc44a512014-08-26 13:10:25 -070028import threading
xunchang21531832018-12-06 14:20:05 -080029import zlib
30from collections import deque, namedtuple, OrderedDict
Tao Bao3a2e3502016-12-28 09:14:39 -080031from hashlib import sha1
Tao Bao508b0872018-02-09 13:44:43 -080032
33import common
Dan Albert8b72aef2015-03-23 19:13:21 -070034from rangelib import RangeSet
35
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070036__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
37
Tao Bao32fcdab2018-10-12 10:30:39 -070038logger = logging.getLogger(__name__)
39
xunchang21531832018-12-06 14:20:05 -080040# The tuple contains the style and bytes of a bsdiff|imgdiff patch.
41PatchInfo = namedtuple("PatchInfo", ["imgdiff", "content"])
42
Dan Albert8b72aef2015-03-23 19:13:21 -070043
Tao Bao183e56e2017-03-05 17:05:09 -080044def compute_patch(srcfile, tgtfile, imgdiff=False):
xunchang21531832018-12-06 14:20:05 -080045 """Calls bsdiff|imgdiff to compute the patch data, returns a PatchInfo."""
Tianjie Xub59c17f2016-10-28 17:55:53 -070046 patchfile = common.MakeTempFile(prefix='patch-')
Doug Zongkerfc44a512014-08-26 13:10:25 -070047
Tianjie Xub59c17f2016-10-28 17:55:53 -070048 cmd = ['imgdiff', '-z'] if imgdiff else ['bsdiff']
49 cmd.extend([srcfile, tgtfile, patchfile])
Doug Zongkerfc44a512014-08-26 13:10:25 -070050
Tao Bao39451582017-05-04 11:10:47 -070051 # Don't dump the bsdiff/imgdiff commands, which are not useful for the case
52 # here, since they contain temp filenames only.
Tao Bao73dd4f42018-10-04 16:25:33 -070053 proc = common.Run(cmd, verbose=False)
54 output, _ = proc.communicate()
Doug Zongkerfc44a512014-08-26 13:10:25 -070055
Tao Bao73dd4f42018-10-04 16:25:33 -070056 if proc.returncode != 0:
Tianjie Xub59c17f2016-10-28 17:55:53 -070057 raise ValueError(output)
58
59 with open(patchfile, 'rb') as f:
xunchang21531832018-12-06 14:20:05 -080060 return PatchInfo(imgdiff, f.read())
Doug Zongkerfc44a512014-08-26 13:10:25 -070061
Dan Albert8b72aef2015-03-23 19:13:21 -070062
63class Image(object):
Tao Bao183e56e2017-03-05 17:05:09 -080064 def RangeSha1(self, ranges):
65 raise NotImplementedError
66
Dan Albert8b72aef2015-03-23 19:13:21 -070067 def ReadRangeSet(self, ranges):
68 raise NotImplementedError
69
Tao Bao68658c02015-06-01 13:40:49 -070070 def TotalSha1(self, include_clobbered_blocks=False):
Dan Albert8b72aef2015-03-23 19:13:21 -070071 raise NotImplementedError
72
Tao Bao183e56e2017-03-05 17:05:09 -080073 def WriteRangeDataToFd(self, ranges, fd):
74 raise NotImplementedError
75
Dan Albert8b72aef2015-03-23 19:13:21 -070076
77class EmptyImage(Image):
Doug Zongkerfc44a512014-08-26 13:10:25 -070078 """A zero-length image."""
Tao Bao183e56e2017-03-05 17:05:09 -080079
80 def __init__(self):
81 self.blocksize = 4096
82 self.care_map = RangeSet()
83 self.clobbered_blocks = RangeSet()
84 self.extended = RangeSet()
85 self.total_blocks = 0
86 self.file_map = {}
87
88 def RangeSha1(self, ranges):
89 return sha1().hexdigest()
90
Doug Zongkerfc44a512014-08-26 13:10:25 -070091 def ReadRangeSet(self, ranges):
92 return ()
Tao Bao183e56e2017-03-05 17:05:09 -080093
Tao Bao68658c02015-06-01 13:40:49 -070094 def TotalSha1(self, include_clobbered_blocks=False):
95 # EmptyImage always carries empty clobbered_blocks, so
96 # include_clobbered_blocks can be ignored.
97 assert self.clobbered_blocks.size() == 0
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070098 return sha1().hexdigest()
99
Tao Bao183e56e2017-03-05 17:05:09 -0800100 def WriteRangeDataToFd(self, ranges, fd):
101 raise ValueError("Can't write data from EmptyImage to file")
102
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700103
Dan Albert8b72aef2015-03-23 19:13:21 -0700104class DataImage(Image):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700105 """An image wrapped around a single string of data."""
106
107 def __init__(self, data, trim=False, pad=False):
108 self.data = data
109 self.blocksize = 4096
110
111 assert not (trim and pad)
112
113 partial = len(self.data) % self.blocksize
Tao Bao7589e962015-09-05 20:35:32 -0700114 padded = False
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700115 if partial > 0:
116 if trim:
117 self.data = self.data[:-partial]
118 elif pad:
119 self.data += '\0' * (self.blocksize - partial)
Tao Bao7589e962015-09-05 20:35:32 -0700120 padded = True
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700121 else:
122 raise ValueError(("data for DataImage must be multiple of %d bytes "
123 "unless trim or pad is specified") %
124 (self.blocksize,))
125
126 assert len(self.data) % self.blocksize == 0
127
128 self.total_blocks = len(self.data) / self.blocksize
129 self.care_map = RangeSet(data=(0, self.total_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700130 # When the last block is padded, we always write the whole block even for
131 # incremental OTAs. Because otherwise the last block may get skipped if
132 # unchanged for an incremental, but would fail the post-install
133 # verification if it has non-zero contents in the padding bytes.
134 # Bug: 23828506
135 if padded:
Tao Bao42206c32015-09-08 13:39:40 -0700136 clobbered_blocks = [self.total_blocks-1, self.total_blocks]
Tao Bao7589e962015-09-05 20:35:32 -0700137 else:
Tao Bao42206c32015-09-08 13:39:40 -0700138 clobbered_blocks = []
139 self.clobbered_blocks = clobbered_blocks
Tao Baoe9b61912015-07-09 17:37:49 -0700140 self.extended = RangeSet()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700141
142 zero_blocks = []
143 nonzero_blocks = []
144 reference = '\0' * self.blocksize
145
Tao Bao7589e962015-09-05 20:35:32 -0700146 for i in range(self.total_blocks-1 if padded else self.total_blocks):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700147 d = self.data[i*self.blocksize : (i+1)*self.blocksize]
148 if d == reference:
149 zero_blocks.append(i)
150 zero_blocks.append(i+1)
151 else:
152 nonzero_blocks.append(i)
153 nonzero_blocks.append(i+1)
154
Tao Bao42206c32015-09-08 13:39:40 -0700155 assert zero_blocks or nonzero_blocks or clobbered_blocks
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700156
Tao Bao42206c32015-09-08 13:39:40 -0700157 self.file_map = dict()
158 if zero_blocks:
159 self.file_map["__ZERO"] = RangeSet(data=zero_blocks)
160 if nonzero_blocks:
161 self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)
162 if clobbered_blocks:
163 self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)
Tao Bao7589e962015-09-05 20:35:32 -0700164
Tao Bao183e56e2017-03-05 17:05:09 -0800165 def _GetRangeData(self, ranges):
166 for s, e in ranges:
167 yield self.data[s*self.blocksize:e*self.blocksize]
168
169 def RangeSha1(self, ranges):
170 h = sha1()
Tao Bao76def242017-11-21 09:25:31 -0800171 for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable
Tao Bao183e56e2017-03-05 17:05:09 -0800172 h.update(data)
173 return h.hexdigest()
174
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700175 def ReadRangeSet(self, ranges):
Tao Bao183e56e2017-03-05 17:05:09 -0800176 return [self._GetRangeData(ranges)]
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700177
Tao Bao68658c02015-06-01 13:40:49 -0700178 def TotalSha1(self, include_clobbered_blocks=False):
Tao Bao7589e962015-09-05 20:35:32 -0700179 if not include_clobbered_blocks:
Tao Bao183e56e2017-03-05 17:05:09 -0800180 return self.RangeSha1(self.care_map.subtract(self.clobbered_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700181 else:
182 return sha1(self.data).hexdigest()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700183
Tao Bao183e56e2017-03-05 17:05:09 -0800184 def WriteRangeDataToFd(self, ranges, fd):
Tao Bao76def242017-11-21 09:25:31 -0800185 for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable
Tao Bao183e56e2017-03-05 17:05:09 -0800186 fd.write(data)
187
Doug Zongkerfc44a512014-08-26 13:10:25 -0700188
189class Transfer(object):
Tao Bao183e56e2017-03-05 17:05:09 -0800190 def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, tgt_sha1,
191 src_sha1, style, by_id):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700192 self.tgt_name = tgt_name
193 self.src_name = src_name
194 self.tgt_ranges = tgt_ranges
195 self.src_ranges = src_ranges
Tao Bao183e56e2017-03-05 17:05:09 -0800196 self.tgt_sha1 = tgt_sha1
197 self.src_sha1 = src_sha1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700198 self.style = style
Tao Baob8c87172015-03-19 19:42:12 -0700199
200 # We use OrderedDict rather than dict so that the output is repeatable;
201 # otherwise it would depend on the hash values of the Transfer objects.
202 self.goes_before = OrderedDict()
203 self.goes_after = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700204
Doug Zongker62338182014-09-08 08:29:55 -0700205 self.stash_before = []
206 self.use_stash = []
207
Doug Zongkerfc44a512014-08-26 13:10:25 -0700208 self.id = len(by_id)
209 by_id.append(self)
210
xunchang21531832018-12-06 14:20:05 -0800211 self._patch_info = None
Tianjie Xu25366072017-09-08 17:19:02 -0700212
213 @property
xunchang21531832018-12-06 14:20:05 -0800214 def patch_info(self):
215 return self._patch_info
Tianjie Xu25366072017-09-08 17:19:02 -0700216
xunchang21531832018-12-06 14:20:05 -0800217 @patch_info.setter
218 def patch_info(self, info):
219 if info:
Tianjie Xu25366072017-09-08 17:19:02 -0700220 assert self.style == "diff"
xunchang21531832018-12-06 14:20:05 -0800221 self._patch_info = info
Tianjie Xu25366072017-09-08 17:19:02 -0700222
Doug Zongker62338182014-09-08 08:29:55 -0700223 def NetStashChange(self):
224 return (sum(sr.size() for (_, sr) in self.stash_before) -
225 sum(sr.size() for (_, sr) in self.use_stash))
226
Tao Bao82c47982015-08-17 09:45:13 -0700227 def ConvertToNew(self):
228 assert self.style != "new"
229 self.use_stash = []
230 self.style = "new"
231 self.src_ranges = RangeSet()
xunchang21531832018-12-06 14:20:05 -0800232 self.patch_info = None
Tao Bao82c47982015-08-17 09:45:13 -0700233
Doug Zongkerfc44a512014-08-26 13:10:25 -0700234 def __str__(self):
235 return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
236 " to " + str(self.tgt_ranges) + ">")
237
238
Doug Zongker6ab2a502016-02-09 08:28:09 -0800239@functools.total_ordering
240class HeapItem(object):
241 def __init__(self, item):
242 self.item = item
Tao Bao186ec992017-12-23 11:50:52 -0800243 # Negate the score since python's heap is a min-heap and we want the
244 # maximum score.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800245 self.score = -item.score
Tao Bao186ec992017-12-23 11:50:52 -0800246
Doug Zongker6ab2a502016-02-09 08:28:09 -0800247 def clear(self):
248 self.item = None
Tao Bao186ec992017-12-23 11:50:52 -0800249
Doug Zongker6ab2a502016-02-09 08:28:09 -0800250 def __bool__(self):
Tao Bao186ec992017-12-23 11:50:52 -0800251 return self.item is not None
252
253 # Python 2 uses __nonzero__, while Python 3 uses __bool__.
254 __nonzero__ = __bool__
255
256 # The rest operations are generated by functools.total_ordering decorator.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800257 def __eq__(self, other):
258 return self.score == other.score
Tao Bao186ec992017-12-23 11:50:52 -0800259
Doug Zongker6ab2a502016-02-09 08:28:09 -0800260 def __le__(self, other):
261 return self.score <= other.score
262
263
Tao Bao294651d2018-02-08 23:21:52 -0800264class ImgdiffStats(object):
265 """A class that collects imgdiff stats.
266
267 It keeps track of the files that will be applied imgdiff while generating
268 BlockImageDiff. It also logs the ones that cannot use imgdiff, with specific
269 reasons. The stats is only meaningful when imgdiff not being disabled by the
270 caller of BlockImageDiff. In addition, only files with supported types
271 (BlockImageDiff.FileTypeSupportedByImgdiff()) are allowed to be logged.
Tao Bao294651d2018-02-08 23:21:52 -0800272 """
273
274 USED_IMGDIFF = "APK files diff'd with imgdiff"
275 USED_IMGDIFF_LARGE_APK = "Large APK files split and diff'd with imgdiff"
276
277 # Reasons for not applying imgdiff on APKs.
Tao Bao294651d2018-02-08 23:21:52 -0800278 SKIPPED_NONMONOTONIC = "Not used imgdiff due to having non-monotonic ranges"
Tao Baoe709b092018-02-07 12:40:00 -0800279 SKIPPED_SHARED_BLOCKS = "Not used imgdiff due to using shared blocks"
Tao Bao4ccea852018-02-06 15:16:41 -0800280 SKIPPED_INCOMPLETE = "Not used imgdiff due to incomplete RangeSet"
Tao Bao294651d2018-02-08 23:21:52 -0800281
282 # The list of valid reasons, which will also be the dumped order in a report.
283 REASONS = (
284 USED_IMGDIFF,
285 USED_IMGDIFF_LARGE_APK,
Tao Bao294651d2018-02-08 23:21:52 -0800286 SKIPPED_NONMONOTONIC,
Tao Baoe709b092018-02-07 12:40:00 -0800287 SKIPPED_SHARED_BLOCKS,
Tao Bao4ccea852018-02-06 15:16:41 -0800288 SKIPPED_INCOMPLETE,
Tao Bao294651d2018-02-08 23:21:52 -0800289 )
290
291 def __init__(self):
292 self.stats = {}
293
294 def Log(self, filename, reason):
295 """Logs why imgdiff can or cannot be applied to the given filename.
296
297 Args:
298 filename: The filename string.
299 reason: One of the reason constants listed in REASONS.
300
301 Raises:
302 AssertionError: On unsupported filetypes or invalid reason.
303 """
304 assert BlockImageDiff.FileTypeSupportedByImgdiff(filename)
305 assert reason in self.REASONS
306
307 if reason not in self.stats:
308 self.stats[reason] = set()
309 self.stats[reason].add(filename)
310
311 def Report(self):
312 """Prints a report of the collected imgdiff stats."""
313
314 def print_header(header, separator):
Tao Bao32fcdab2018-10-12 10:30:39 -0700315 logger.info(header)
316 logger.info(separator * len(header) + '\n')
Tao Bao294651d2018-02-08 23:21:52 -0800317
318 print_header(' Imgdiff Stats Report ', '=')
319 for key in self.REASONS:
320 if key not in self.stats:
321 continue
322 values = self.stats[key]
323 section_header = ' {} (count: {}) '.format(key, len(values))
324 print_header(section_header, '-')
Tao Bao32fcdab2018-10-12 10:30:39 -0700325 logger.info(''.join([' {}\n'.format(name) for name in values]))
Tao Bao294651d2018-02-08 23:21:52 -0800326
327
Doug Zongkerfc44a512014-08-26 13:10:25 -0700328class BlockImageDiff(object):
Tao Bao76def242017-11-21 09:25:31 -0800329 """Generates the diff of two block image objects.
330
331 BlockImageDiff works on two image objects. An image object is anything that
332 provides the following attributes:
333
334 blocksize: the size in bytes of a block, currently must be 4096.
335
336 total_blocks: the total size of the partition/image, in blocks.
337
338 care_map: a RangeSet containing which blocks (in the range [0,
339 total_blocks) we actually care about; i.e. which blocks contain data.
340
341 file_map: a dict that partitions the blocks contained in care_map into
342 smaller domains that are useful for doing diffs on. (Typically a domain
343 is a file, and the key in file_map is the pathname.)
344
345 clobbered_blocks: a RangeSet containing which blocks contain data but may
346 be altered by the FS. They need to be excluded when verifying the
347 partition integrity.
348
349 ReadRangeSet(): a function that takes a RangeSet and returns the data
350 contained in the image blocks of that RangeSet. The data is returned as
351 a list or tuple of strings; concatenating the elements together should
352 produce the requested data. Implementations are free to break up the
353 data into list/tuple elements in any way that is convenient.
354
355 RangeSha1(): a function that returns (as a hex string) the SHA-1 hash of
356 all the data in the specified range.
357
358 TotalSha1(): a function that returns (as a hex string) the SHA-1 hash of
359 all the data in the image (ie, all the blocks in the care_map minus
360 clobbered_blocks, or including the clobbered blocks if
361 include_clobbered_blocks is True).
362
363 When creating a BlockImageDiff, the src image may be None, in which case the
364 list of transfers produced will never read from the original image.
365 """
366
Tao Bao293fd132016-06-11 12:19:23 -0700367 def __init__(self, tgt, src=None, threads=None, version=4,
368 disable_imgdiff=False):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700369 if threads is None:
370 threads = multiprocessing.cpu_count() // 2
Dan Albert8b72aef2015-03-23 19:13:21 -0700371 if threads == 0:
372 threads = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700373 self.threads = threads
Doug Zongker62338182014-09-08 08:29:55 -0700374 self.version = version
Dan Albert8b72aef2015-03-23 19:13:21 -0700375 self.transfers = []
376 self.src_basenames = {}
377 self.src_numpatterns = {}
Tao Baob4cfca52016-02-04 14:26:02 -0800378 self._max_stashed_size = 0
Tao Baod522bdc2016-04-12 15:53:16 -0700379 self.touched_src_ranges = RangeSet()
380 self.touched_src_sha1 = None
Tao Bao293fd132016-06-11 12:19:23 -0700381 self.disable_imgdiff = disable_imgdiff
Tao Bao294651d2018-02-08 23:21:52 -0800382 self.imgdiff_stats = ImgdiffStats() if not disable_imgdiff else None
Doug Zongker62338182014-09-08 08:29:55 -0700383
Tao Bao8fad03e2017-03-01 14:36:26 -0800384 assert version in (3, 4)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700385
386 self.tgt = tgt
387 if src is None:
388 src = EmptyImage()
389 self.src = src
390
391 # The updater code that installs the patch always uses 4k blocks.
392 assert tgt.blocksize == 4096
393 assert src.blocksize == 4096
394
395 # The range sets in each filemap should comprise a partition of
396 # the care map.
397 self.AssertPartition(src.care_map, src.file_map.values())
398 self.AssertPartition(tgt.care_map, tgt.file_map.values())
399
Tao Baob4cfca52016-02-04 14:26:02 -0800400 @property
401 def max_stashed_size(self):
402 return self._max_stashed_size
403
Tao Baocb73aed2018-01-31 17:32:40 -0800404 @staticmethod
405 def FileTypeSupportedByImgdiff(filename):
406 """Returns whether the file type is supported by imgdiff."""
407 return filename.lower().endswith(('.apk', '.jar', '.zip'))
408
Tao Bao294651d2018-02-08 23:21:52 -0800409 def CanUseImgdiff(self, name, tgt_ranges, src_ranges, large_apk=False):
Tao Baocb73aed2018-01-31 17:32:40 -0800410 """Checks whether we can apply imgdiff for the given RangeSets.
411
412 For files in ZIP format (e.g., APKs, JARs, etc.) we would like to use
413 'imgdiff -z' if possible. Because it usually produces significantly smaller
414 patches than bsdiff.
415
416 This is permissible if all of the following conditions hold.
417 - The imgdiff hasn't been disabled by the caller (e.g. squashfs);
418 - The file type is supported by imgdiff;
419 - The source and target blocks are monotonic (i.e. the data is stored with
420 blocks in increasing order);
Tao Baoe709b092018-02-07 12:40:00 -0800421 - Both files don't contain shared blocks;
Tao Bao4ccea852018-02-06 15:16:41 -0800422 - Both files have complete lists of blocks;
Tao Baocb73aed2018-01-31 17:32:40 -0800423 - We haven't removed any blocks from the source set.
424
425 If all these conditions are satisfied, concatenating all the blocks in the
426 RangeSet in order will produce a valid ZIP file (plus possibly extra zeros
427 in the last block). imgdiff is fine with extra zeros at the end of the file.
428
429 Args:
430 name: The filename to be diff'd.
431 tgt_ranges: The target RangeSet.
432 src_ranges: The source RangeSet.
Tao Bao294651d2018-02-08 23:21:52 -0800433 large_apk: Whether this is to split a large APK.
Tao Baocb73aed2018-01-31 17:32:40 -0800434
435 Returns:
436 A boolean result.
437 """
Tao Bao508b0872018-02-09 13:44:43 -0800438 if self.disable_imgdiff or not self.FileTypeSupportedByImgdiff(name):
Tao Bao294651d2018-02-08 23:21:52 -0800439 return False
440
441 if not tgt_ranges.monotonic or not src_ranges.monotonic:
442 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_NONMONOTONIC)
443 return False
444
Tao Baoe709b092018-02-07 12:40:00 -0800445 if (tgt_ranges.extra.get('uses_shared_blocks') or
446 src_ranges.extra.get('uses_shared_blocks')):
447 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_SHARED_BLOCKS)
448 return False
449
Tao Bao4ccea852018-02-06 15:16:41 -0800450 if tgt_ranges.extra.get('incomplete') or src_ranges.extra.get('incomplete'):
451 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_INCOMPLETE)
452 return False
453
Tao Bao294651d2018-02-08 23:21:52 -0800454 reason = (ImgdiffStats.USED_IMGDIFF_LARGE_APK if large_apk
455 else ImgdiffStats.USED_IMGDIFF)
456 self.imgdiff_stats.Log(name, reason)
457 return True
Tao Baocb73aed2018-01-31 17:32:40 -0800458
Doug Zongkerfc44a512014-08-26 13:10:25 -0700459 def Compute(self, prefix):
460 # When looking for a source file to use as the diff input for a
461 # target file, we try:
462 # 1) an exact path match if available, otherwise
463 # 2) a exact basename match if available, otherwise
464 # 3) a basename match after all runs of digits are replaced by
465 # "#" if available, otherwise
466 # 4) we have no source for this target.
467 self.AbbreviateSourceNames()
468 self.FindTransfers()
469
xunchang21531832018-12-06 14:20:05 -0800470 self.FindSequenceForTransfers()
Doug Zongker62338182014-09-08 08:29:55 -0700471
Tao Bao82c47982015-08-17 09:45:13 -0700472 # Ensure the runtime stash size is under the limit.
Tao Bao8fad03e2017-03-01 14:36:26 -0800473 if common.OPTIONS.cache_size is not None:
Tao Bao82c47982015-08-17 09:45:13 -0700474 self.ReviseStashSize()
475
Doug Zongkerfc44a512014-08-26 13:10:25 -0700476 # Double-check our work.
477 self.AssertSequenceGood()
Tianjie Xu8a7ed9f2018-01-23 14:06:11 -0800478 self.AssertSha1Good()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700479
480 self.ComputePatches(prefix)
481 self.WriteTransfers(prefix)
482
Tao Bao294651d2018-02-08 23:21:52 -0800483 # Report the imgdiff stats.
Tao Bao32fcdab2018-10-12 10:30:39 -0700484 if not self.disable_imgdiff:
Tao Bao294651d2018-02-08 23:21:52 -0800485 self.imgdiff_stats.Report()
486
Doug Zongkerfc44a512014-08-26 13:10:25 -0700487 def WriteTransfers(self, prefix):
Tianjie Xu37e29302016-06-23 16:10:35 -0700488 def WriteSplitTransfers(out, style, target_blocks):
489 """Limit the size of operand in command 'new' and 'zero' to 1024 blocks.
Tianjie Xubb848c52016-06-21 15:54:09 -0700490
491 This prevents the target size of one command from being too large; and
492 might help to avoid fsync errors on some devices."""
493
Tao Bao3a2e3502016-12-28 09:14:39 -0800494 assert style == "new" or style == "zero"
Tianjie Xu37e29302016-06-23 16:10:35 -0700495 blocks_limit = 1024
Tianjie Xubb848c52016-06-21 15:54:09 -0700496 total = 0
Tianjie Xu37e29302016-06-23 16:10:35 -0700497 while target_blocks:
498 blocks_to_write = target_blocks.first(blocks_limit)
499 out.append("%s %s\n" % (style, blocks_to_write.to_string_raw()))
500 total += blocks_to_write.size()
501 target_blocks = target_blocks.subtract(blocks_to_write)
Tianjie Xubb848c52016-06-21 15:54:09 -0700502 return total
503
Doug Zongkerfc44a512014-08-26 13:10:25 -0700504 out = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700505 total = 0
Doug Zongkerfc44a512014-08-26 13:10:25 -0700506
Tao Bao3a2e3502016-12-28 09:14:39 -0800507 # In BBOTA v3+, it uses the hash of the stashed blocks as the stash slot
508 # id. 'stashes' records the map from 'hash' to the ref count. The stash
509 # will be freed only if the count decrements to zero.
Doug Zongker62338182014-09-08 08:29:55 -0700510 stashes = {}
511 stashed_blocks = 0
512 max_stashed_blocks = 0
513
Doug Zongkerfc44a512014-08-26 13:10:25 -0700514 for xf in self.transfers:
515
Tao Bao8fad03e2017-03-01 14:36:26 -0800516 for _, sr in xf.stash_before:
517 sh = self.src.RangeSha1(sr)
518 if sh in stashes:
519 stashes[sh] += 1
Sami Tolvanendd67a292014-12-09 16:40:34 +0000520 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800521 stashes[sh] = 1
522 stashed_blocks += sr.size()
523 self.touched_src_ranges = self.touched_src_ranges.union(sr)
524 out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
Doug Zongker62338182014-09-08 08:29:55 -0700525
526 if stashed_blocks > max_stashed_blocks:
527 max_stashed_blocks = stashed_blocks
528
Jesse Zhao7b985f62015-03-02 16:53:08 -0800529 free_string = []
caozhiyuan21b37d82015-10-21 15:14:03 +0800530 free_size = 0
Jesse Zhao7b985f62015-03-02 16:53:08 -0800531
Tao Bao8fad03e2017-03-01 14:36:26 -0800532 # <# blocks> <src ranges>
533 # OR
534 # <# blocks> <src ranges> <src locs> <stash refs...>
535 # OR
536 # <# blocks> - <stash refs...>
Doug Zongker62338182014-09-08 08:29:55 -0700537
Tao Bao8fad03e2017-03-01 14:36:26 -0800538 size = xf.src_ranges.size()
Tao Bao508b0872018-02-09 13:44:43 -0800539 src_str_buffer = [str(size)]
Doug Zongker62338182014-09-08 08:29:55 -0700540
Tao Bao8fad03e2017-03-01 14:36:26 -0800541 unstashed_src_ranges = xf.src_ranges
542 mapped_stashes = []
543 for _, sr in xf.use_stash:
544 unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
545 sh = self.src.RangeSha1(sr)
546 sr = xf.src_ranges.map_within(sr)
547 mapped_stashes.append(sr)
548 assert sh in stashes
Tao Bao508b0872018-02-09 13:44:43 -0800549 src_str_buffer.append("%s:%s" % (sh, sr.to_string_raw()))
Tao Bao8fad03e2017-03-01 14:36:26 -0800550 stashes[sh] -= 1
551 if stashes[sh] == 0:
552 free_string.append("free %s\n" % (sh,))
553 free_size += sr.size()
554 stashes.pop(sh)
Doug Zongker62338182014-09-08 08:29:55 -0700555
Tao Bao8fad03e2017-03-01 14:36:26 -0800556 if unstashed_src_ranges:
Tao Bao508b0872018-02-09 13:44:43 -0800557 src_str_buffer.insert(1, unstashed_src_ranges.to_string_raw())
Tao Bao8fad03e2017-03-01 14:36:26 -0800558 if xf.use_stash:
559 mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
Tao Bao508b0872018-02-09 13:44:43 -0800560 src_str_buffer.insert(2, mapped_unstashed.to_string_raw())
Tao Bao8fad03e2017-03-01 14:36:26 -0800561 mapped_stashes.append(mapped_unstashed)
Doug Zongker62338182014-09-08 08:29:55 -0700562 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Tao Bao8fad03e2017-03-01 14:36:26 -0800563 else:
Tao Bao508b0872018-02-09 13:44:43 -0800564 src_str_buffer.insert(1, "-")
Tao Bao8fad03e2017-03-01 14:36:26 -0800565 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Doug Zongker62338182014-09-08 08:29:55 -0700566
Tao Bao508b0872018-02-09 13:44:43 -0800567 src_str = " ".join(src_str_buffer)
Doug Zongker62338182014-09-08 08:29:55 -0700568
Tao Bao8fad03e2017-03-01 14:36:26 -0800569 # version 3+:
Doug Zongker62338182014-09-08 08:29:55 -0700570 # zero <rangeset>
571 # new <rangeset>
572 # erase <rangeset>
Dan Albert8b72aef2015-03-23 19:13:21 -0700573 # bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
574 # imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
575 # move hash <tgt rangeset> <src_str>
Doug Zongkerfc44a512014-08-26 13:10:25 -0700576
577 tgt_size = xf.tgt_ranges.size()
578
579 if xf.style == "new":
580 assert xf.tgt_ranges
Tianjie Xu37e29302016-06-23 16:10:35 -0700581 assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700582 total += tgt_size
583 elif xf.style == "move":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700584 assert xf.tgt_ranges
585 assert xf.src_ranges.size() == tgt_size
586 if xf.src_ranges != xf.tgt_ranges:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100587 # take into account automatic stashing of overlapping blocks
588 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700589 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100590 if temp_stash_usage > max_stashed_blocks:
591 max_stashed_blocks = temp_stash_usage
592
Tao Baod522bdc2016-04-12 15:53:16 -0700593 self.touched_src_ranges = self.touched_src_ranges.union(
594 xf.src_ranges)
595
Tao Bao8fad03e2017-03-01 14:36:26 -0800596 out.append("%s %s %s %s\n" % (
Sami Tolvanendd67a292014-12-09 16:40:34 +0000597 xf.style,
Tao Bao183e56e2017-03-05 17:05:09 -0800598 xf.tgt_sha1,
Dan Albert8b72aef2015-03-23 19:13:21 -0700599 xf.tgt_ranges.to_string_raw(), src_str))
Tao Bao8fad03e2017-03-01 14:36:26 -0800600 total += tgt_size
601 elif xf.style in ("bsdiff", "imgdiff"):
602 assert xf.tgt_ranges
603 assert xf.src_ranges
604 # take into account automatic stashing of overlapping blocks
605 if xf.src_ranges.overlaps(xf.tgt_ranges):
606 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
607 if temp_stash_usage > max_stashed_blocks:
608 max_stashed_blocks = temp_stash_usage
609
610 self.touched_src_ranges = self.touched_src_ranges.union(xf.src_ranges)
611
612 out.append("%s %d %d %s %s %s %s\n" % (
613 xf.style,
614 xf.patch_start, xf.patch_len,
615 xf.src_sha1,
616 xf.tgt_sha1,
617 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700618 total += tgt_size
619 elif xf.style == "zero":
620 assert xf.tgt_ranges
621 to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
Tianjie Xu37e29302016-06-23 16:10:35 -0700622 assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size()
Tianjie Xubb848c52016-06-21 15:54:09 -0700623 total += to_zero.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700624 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700625 raise ValueError("unknown transfer style '%s'\n" % xf.style)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700626
Sami Tolvanendd67a292014-12-09 16:40:34 +0000627 if free_string:
628 out.append("".join(free_string))
caozhiyuan21b37d82015-10-21 15:14:03 +0800629 stashed_blocks -= free_size
Sami Tolvanendd67a292014-12-09 16:40:34 +0000630
Tao Bao8fad03e2017-03-01 14:36:26 -0800631 if common.OPTIONS.cache_size is not None:
Tao Bao8dcf7382015-05-21 14:09:49 -0700632 # Sanity check: abort if we're going to need more stash space than
633 # the allowed size (cache_size * threshold). There are two purposes
634 # of having a threshold here. a) Part of the cache may have been
635 # occupied by some recovery logs. b) It will buy us some time to deal
636 # with the oversize issue.
637 cache_size = common.OPTIONS.cache_size
638 stash_threshold = common.OPTIONS.stash_threshold
639 max_allowed = cache_size * stash_threshold
Tao Baoe8c68a02017-02-26 10:48:11 -0800640 assert max_stashed_blocks * self.tgt.blocksize <= max_allowed, \
Tao Bao8dcf7382015-05-21 14:09:49 -0700641 'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
642 max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
643 self.tgt.blocksize, max_allowed, cache_size,
644 stash_threshold)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700645
Tao Bao8fad03e2017-03-01 14:36:26 -0800646 self.touched_src_sha1 = self.src.RangeSha1(self.touched_src_ranges)
Tao Baod522bdc2016-04-12 15:53:16 -0700647
Tianjie Xu67c7cbb2018-08-30 00:32:07 -0700648 if self.tgt.hashtree_info:
649 out.append("compute_hash_tree {} {} {} {} {}\n".format(
650 self.tgt.hashtree_info.hashtree_range.to_string_raw(),
651 self.tgt.hashtree_info.filesystem_range.to_string_raw(),
652 self.tgt.hashtree_info.hash_algorithm,
653 self.tgt.hashtree_info.salt,
654 self.tgt.hashtree_info.root_hash))
655
Tao Baoe9b61912015-07-09 17:37:49 -0700656 # Zero out extended blocks as a workaround for bug 20881595.
657 if self.tgt.extended:
Tianjie Xu37e29302016-06-23 16:10:35 -0700658 assert (WriteSplitTransfers(out, "zero", self.tgt.extended) ==
Tianjie Xubb848c52016-06-21 15:54:09 -0700659 self.tgt.extended.size())
Tao Baob32d56e2015-09-09 11:55:01 -0700660 total += self.tgt.extended.size()
Tao Baoe9b61912015-07-09 17:37:49 -0700661
662 # We erase all the blocks on the partition that a) don't contain useful
Tao Bao66f1fa62016-05-03 10:02:01 -0700663 # data in the new image; b) will not be touched by dm-verity. Out of those
664 # blocks, we erase the ones that won't be used in this update at the
665 # beginning of an update. The rest would be erased at the end. This is to
666 # work around the eMMC issue observed on some devices, which may otherwise
667 # get starving for clean blocks and thus fail the update. (b/28347095)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700668 all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
Tao Baoe9b61912015-07-09 17:37:49 -0700669 all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
670 new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
Tao Bao66f1fa62016-05-03 10:02:01 -0700671
672 erase_first = new_dontcare.subtract(self.touched_src_ranges)
673 if erase_first:
674 out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))
675
676 erase_last = new_dontcare.subtract(erase_first)
677 if erase_last:
678 out.append("erase %s\n" % (erase_last.to_string_raw(),))
Doug Zongkere985f6f2014-09-09 12:38:47 -0700679
680 out.insert(0, "%d\n" % (self.version,)) # format version number
Tao Baob32d56e2015-09-09 11:55:01 -0700681 out.insert(1, "%d\n" % (total,))
Tao Bao8fad03e2017-03-01 14:36:26 -0800682 # v3+: the number of stash slots is unused.
683 out.insert(2, "0\n")
684 out.insert(3, str(max_stashed_blocks) + "\n")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700685
686 with open(prefix + ".transfer.list", "wb") as f:
687 for i in out:
688 f.write(i)
689
Tao Bao8fad03e2017-03-01 14:36:26 -0800690 self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize
691 OPTIONS = common.OPTIONS
692 if OPTIONS.cache_size is not None:
693 max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
Tao Bao32fcdab2018-10-12 10:30:39 -0700694 logger.info(
695 "max stashed blocks: %d (%d bytes), limit: %d bytes (%.2f%%)\n",
696 max_stashed_blocks, self._max_stashed_size, max_allowed,
697 self._max_stashed_size * 100.0 / max_allowed)
Tao Bao8fad03e2017-03-01 14:36:26 -0800698 else:
Tao Bao32fcdab2018-10-12 10:30:39 -0700699 logger.info(
700 "max stashed blocks: %d (%d bytes), limit: <unknown>\n",
701 max_stashed_blocks, self._max_stashed_size)
Doug Zongker62338182014-09-08 08:29:55 -0700702
Tao Bao82c47982015-08-17 09:45:13 -0700703 def ReviseStashSize(self):
Tao Bao32fcdab2018-10-12 10:30:39 -0700704 logger.info("Revising stash size...")
Tao Baoe27acfd2016-12-16 11:13:55 -0800705 stash_map = {}
Tao Bao82c47982015-08-17 09:45:13 -0700706
707 # Create the map between a stash and its def/use points. For example, for a
Tao Bao3c5a16d2017-02-13 11:42:50 -0800708 # given stash of (raw_id, sr), stash_map[raw_id] = (sr, def_cmd, use_cmd).
Tao Bao82c47982015-08-17 09:45:13 -0700709 for xf in self.transfers:
710 # Command xf defines (stores) all the stashes in stash_before.
Tao Bao3a2e3502016-12-28 09:14:39 -0800711 for stash_raw_id, sr in xf.stash_before:
712 stash_map[stash_raw_id] = (sr, xf)
Tao Bao82c47982015-08-17 09:45:13 -0700713
714 # Record all the stashes command xf uses.
Tao Bao3a2e3502016-12-28 09:14:39 -0800715 for stash_raw_id, _ in xf.use_stash:
716 stash_map[stash_raw_id] += (xf,)
Tao Bao82c47982015-08-17 09:45:13 -0700717
718 # Compute the maximum blocks available for stash based on /cache size and
719 # the threshold.
720 cache_size = common.OPTIONS.cache_size
721 stash_threshold = common.OPTIONS.stash_threshold
722 max_allowed = cache_size * stash_threshold / self.tgt.blocksize
723
Tao Bao3a2e3502016-12-28 09:14:39 -0800724 # See the comments for 'stashes' in WriteTransfers().
Tao Baoe27acfd2016-12-16 11:13:55 -0800725 stashes = {}
Tao Bao82c47982015-08-17 09:45:13 -0700726 stashed_blocks = 0
Tao Bao9a5caf22015-08-25 15:10:10 -0700727 new_blocks = 0
Tao Bao82c47982015-08-17 09:45:13 -0700728
729 # Now go through all the commands. Compute the required stash size on the
730 # fly. If a command requires excess stash than available, it deletes the
731 # stash by replacing the command that uses the stash with a "new" command
732 # instead.
733 for xf in self.transfers:
734 replaced_cmds = []
735
736 # xf.stash_before generates explicit stash commands.
Tao Bao3a2e3502016-12-28 09:14:39 -0800737 for stash_raw_id, sr in xf.stash_before:
Tao Baoe27acfd2016-12-16 11:13:55 -0800738 # Check the post-command stashed_blocks.
739 stashed_blocks_after = stashed_blocks
Tao Bao8fad03e2017-03-01 14:36:26 -0800740 sh = self.src.RangeSha1(sr)
741 if sh not in stashes:
Tao Baoe27acfd2016-12-16 11:13:55 -0800742 stashed_blocks_after += sr.size()
Tao Baoe27acfd2016-12-16 11:13:55 -0800743
744 if stashed_blocks_after > max_allowed:
Tao Bao82c47982015-08-17 09:45:13 -0700745 # We cannot stash this one for a later command. Find out the command
746 # that will use this stash and replace the command with "new".
Tao Bao3a2e3502016-12-28 09:14:39 -0800747 use_cmd = stash_map[stash_raw_id][2]
Tao Bao82c47982015-08-17 09:45:13 -0700748 replaced_cmds.append(use_cmd)
Tao Bao32fcdab2018-10-12 10:30:39 -0700749 logger.info("%10d %9s %s", sr.size(), "explicit", use_cmd)
Tao Bao82c47982015-08-17 09:45:13 -0700750 else:
Tao Bao3c5a16d2017-02-13 11:42:50 -0800751 # Update the stashes map.
Tao Bao8fad03e2017-03-01 14:36:26 -0800752 if sh in stashes:
753 stashes[sh] += 1
Tao Bao3c5a16d2017-02-13 11:42:50 -0800754 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800755 stashes[sh] = 1
Tao Baoe27acfd2016-12-16 11:13:55 -0800756 stashed_blocks = stashed_blocks_after
Tao Bao82c47982015-08-17 09:45:13 -0700757
758 # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
759 # ComputePatches(), they both have the style of "diff".
Tao Bao8fad03e2017-03-01 14:36:26 -0800760 if xf.style == "diff":
Tao Bao82c47982015-08-17 09:45:13 -0700761 assert xf.tgt_ranges and xf.src_ranges
762 if xf.src_ranges.overlaps(xf.tgt_ranges):
763 if stashed_blocks + xf.src_ranges.size() > max_allowed:
764 replaced_cmds.append(xf)
Tao Bao32fcdab2018-10-12 10:30:39 -0700765 logger.info("%10d %9s %s", xf.src_ranges.size(), "implicit", xf)
Tao Bao82c47982015-08-17 09:45:13 -0700766
767 # Replace the commands in replaced_cmds with "new"s.
768 for cmd in replaced_cmds:
769 # It no longer uses any commands in "use_stash". Remove the def points
770 # for all those stashes.
Tao Bao3a2e3502016-12-28 09:14:39 -0800771 for stash_raw_id, sr in cmd.use_stash:
772 def_cmd = stash_map[stash_raw_id][1]
773 assert (stash_raw_id, sr) in def_cmd.stash_before
774 def_cmd.stash_before.remove((stash_raw_id, sr))
Tao Bao82c47982015-08-17 09:45:13 -0700775
Tianjie Xuebe39a02016-01-14 14:12:26 -0800776 # Add up blocks that violates space limit and print total number to
777 # screen later.
778 new_blocks += cmd.tgt_ranges.size()
Tao Bao82c47982015-08-17 09:45:13 -0700779 cmd.ConvertToNew()
780
Tao Bao3a2e3502016-12-28 09:14:39 -0800781 # xf.use_stash may generate free commands.
Tao Bao8fad03e2017-03-01 14:36:26 -0800782 for _, sr in xf.use_stash:
783 sh = self.src.RangeSha1(sr)
784 assert sh in stashes
785 stashes[sh] -= 1
786 if stashes[sh] == 0:
Tao Baoe27acfd2016-12-16 11:13:55 -0800787 stashed_blocks -= sr.size()
Tao Bao8fad03e2017-03-01 14:36:26 -0800788 stashes.pop(sh)
Tao Baoe27acfd2016-12-16 11:13:55 -0800789
Tianjie Xuebe39a02016-01-14 14:12:26 -0800790 num_of_bytes = new_blocks * self.tgt.blocksize
Tao Bao32fcdab2018-10-12 10:30:39 -0700791 logger.info(
792 " Total %d blocks (%d bytes) are packed as new blocks due to "
793 "insufficient cache size.", new_blocks, num_of_bytes)
Tao Bao304ee272016-12-19 11:01:38 -0800794 return new_blocks
Tao Bao9a5caf22015-08-25 15:10:10 -0700795
Doug Zongkerfc44a512014-08-26 13:10:25 -0700796 def ComputePatches(self, prefix):
Tao Bao32fcdab2018-10-12 10:30:39 -0700797 logger.info("Reticulating splines...")
Tao Bao183e56e2017-03-05 17:05:09 -0800798 diff_queue = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700799 patch_num = 0
800 with open(prefix + ".new.dat", "wb") as new_f:
Tao Bao183e56e2017-03-05 17:05:09 -0800801 for index, xf in enumerate(self.transfers):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700802 if xf.style == "zero":
Tao Bao08c85832016-09-19 22:26:30 -0700803 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao32fcdab2018-10-12 10:30:39 -0700804 logger.info(
805 "%10d %10d (%6.2f%%) %7s %s %s", tgt_size, tgt_size, 100.0,
806 xf.style, xf.tgt_name, str(xf.tgt_ranges))
Tao Bao08c85832016-09-19 22:26:30 -0700807
Doug Zongkerfc44a512014-08-26 13:10:25 -0700808 elif xf.style == "new":
Tao Bao183e56e2017-03-05 17:05:09 -0800809 self.tgt.WriteRangeDataToFd(xf.tgt_ranges, new_f)
Tao Bao08c85832016-09-19 22:26:30 -0700810 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao32fcdab2018-10-12 10:30:39 -0700811 logger.info(
812 "%10d %10d (%6.2f%%) %7s %s %s", tgt_size, tgt_size, 100.0,
813 xf.style, xf.tgt_name, str(xf.tgt_ranges))
Tao Bao08c85832016-09-19 22:26:30 -0700814
Doug Zongkerfc44a512014-08-26 13:10:25 -0700815 elif xf.style == "diff":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700816 # We can't compare src and tgt directly because they may have
817 # the same content but be broken up into blocks differently, eg:
818 #
819 # ["he", "llo"] vs ["h", "ello"]
820 #
821 # We want those to compare equal, ideally without having to
822 # actually concatenate the strings (these may be tens of
823 # megabytes).
Tao Bao183e56e2017-03-05 17:05:09 -0800824 if xf.src_sha1 == xf.tgt_sha1:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700825 # These are identical; we don't need to generate a patch,
826 # just issue copy commands on the device.
827 xf.style = "move"
xunchang21531832018-12-06 14:20:05 -0800828 xf.patch_info = None
Tao Bao183e56e2017-03-05 17:05:09 -0800829 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao08c85832016-09-19 22:26:30 -0700830 if xf.src_ranges != xf.tgt_ranges:
Tao Bao32fcdab2018-10-12 10:30:39 -0700831 logger.info(
832 "%10d %10d (%6.2f%%) %7s %s %s (from %s)", tgt_size, tgt_size,
833 100.0, xf.style,
Tao Bao08c85832016-09-19 22:26:30 -0700834 xf.tgt_name if xf.tgt_name == xf.src_name else (
835 xf.tgt_name + " (from " + xf.src_name + ")"),
Tao Bao32fcdab2018-10-12 10:30:39 -0700836 str(xf.tgt_ranges), str(xf.src_ranges))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700837 else:
xunchang21531832018-12-06 14:20:05 -0800838 if xf.patch_info:
839 # We have already generated the patch (e.g. during split of large
840 # APKs or reduction of stash size)
841 imgdiff = xf.patch_info.imgdiff
Tianjie Xu25366072017-09-08 17:19:02 -0700842 else:
Tao Baocb73aed2018-01-31 17:32:40 -0800843 imgdiff = self.CanUseImgdiff(
844 xf.tgt_name, xf.tgt_ranges, xf.src_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700845 xf.style = "imgdiff" if imgdiff else "bsdiff"
Tao Bao183e56e2017-03-05 17:05:09 -0800846 diff_queue.append((index, imgdiff, patch_num))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700847 patch_num += 1
848
849 else:
850 assert False, "unknown style " + xf.style
851
xunchang21531832018-12-06 14:20:05 -0800852 patches = self.ComputePatchesForInputList(diff_queue, False)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700853
Tao Bao183e56e2017-03-05 17:05:09 -0800854 offset = 0
855 with open(prefix + ".patch.dat", "wb") as patch_fd:
xunchang21531832018-12-06 14:20:05 -0800856 for index, patch_info, _ in patches:
Tao Bao183e56e2017-03-05 17:05:09 -0800857 xf = self.transfers[index]
xunchang21531832018-12-06 14:20:05 -0800858 xf.patch_len = len(patch_info.content)
Tao Bao183e56e2017-03-05 17:05:09 -0800859 xf.patch_start = offset
860 offset += xf.patch_len
xunchang21531832018-12-06 14:20:05 -0800861 patch_fd.write(patch_info.content)
Tao Bao183e56e2017-03-05 17:05:09 -0800862
Tao Bao32fcdab2018-10-12 10:30:39 -0700863 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
864 logger.info(
865 "%10d %10d (%6.2f%%) %7s %s %s %s", xf.patch_len, tgt_size,
866 xf.patch_len * 100.0 / tgt_size, xf.style,
867 xf.tgt_name if xf.tgt_name == xf.src_name else (
868 xf.tgt_name + " (from " + xf.src_name + ")"),
869 xf.tgt_ranges, xf.src_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700870
Tianjie Xu8a7ed9f2018-01-23 14:06:11 -0800871 def AssertSha1Good(self):
872 """Check the SHA-1 of the src & tgt blocks in the transfer list.
873
874 Double check the SHA-1 value to avoid the issue in b/71908713, where
875 SparseImage.RangeSha1() messed up with the hash calculation in multi-thread
876 environment. That specific problem has been fixed by protecting the
877 underlying generator function 'SparseImage._GetRangeData()' with lock.
878 """
879 for xf in self.transfers:
880 tgt_sha1 = self.tgt.RangeSha1(xf.tgt_ranges)
881 assert xf.tgt_sha1 == tgt_sha1
882 if xf.style == "diff":
883 src_sha1 = self.src.RangeSha1(xf.src_ranges)
884 assert xf.src_sha1 == src_sha1
885
Doug Zongkerfc44a512014-08-26 13:10:25 -0700886 def AssertSequenceGood(self):
887 # Simulate the sequences of transfers we will output, and check that:
888 # - we never read a block after writing it, and
889 # - we write every block we care about exactly once.
890
891 # Start with no blocks having been touched yet.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800892 touched = array.array("B", "\0" * self.tgt.total_blocks)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700893
894 # Imagine processing the transfers in order.
895 for xf in self.transfers:
896 # Check that the input blocks for this transfer haven't yet been touched.
Doug Zongker62338182014-09-08 08:29:55 -0700897
898 x = xf.src_ranges
Tao Bao8fad03e2017-03-01 14:36:26 -0800899 for _, sr in xf.use_stash:
900 x = x.subtract(sr)
Doug Zongker62338182014-09-08 08:29:55 -0700901
Doug Zongker6ab2a502016-02-09 08:28:09 -0800902 for s, e in x:
Tao Baoff75c232016-03-04 15:23:34 -0800903 # Source image could be larger. Don't check the blocks that are in the
904 # source image only. Since they are not in 'touched', and won't ever
905 # be touched.
906 for i in range(s, min(e, self.tgt.total_blocks)):
Doug Zongker6ab2a502016-02-09 08:28:09 -0800907 assert touched[i] == 0
908
909 # Check that the output blocks for this transfer haven't yet
910 # been touched, and touch all the blocks written by this
911 # transfer.
912 for s, e in xf.tgt_ranges:
913 for i in range(s, e):
914 assert touched[i] == 0
915 touched[i] = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700916
Tianjie Xu67c7cbb2018-08-30 00:32:07 -0700917 if self.tgt.hashtree_info:
918 for s, e in self.tgt.hashtree_info.hashtree_range:
919 for i in range(s, e):
920 assert touched[i] == 0
921 touched[i] = 1
922
Doug Zongkerfc44a512014-08-26 13:10:25 -0700923 # Check that we've written every target block.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800924 for s, e in self.tgt.care_map:
925 for i in range(s, e):
926 assert touched[i] == 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700927
xunchang21531832018-12-06 14:20:05 -0800928 def FindSequenceForTransfers(self):
929 """Finds a sequence for the given transfers.
930
931 The goal is to minimize the violation of order dependencies between these
932 transfers, so that fewer blocks are stashed when applying the update.
933 """
934
935 # Clear the existing dependency between transfers
936 for xf in self.transfers:
937 xf.goes_before = OrderedDict()
938 xf.goes_after = OrderedDict()
939
940 xf.stash_before = []
941 xf.use_stash = []
942
943 # Find the ordering dependencies among transfers (this is O(n^2)
944 # in the number of transfers).
945 self.GenerateDigraph()
946 # Find a sequence of transfers that satisfies as many ordering
947 # dependencies as possible (heuristically).
948 self.FindVertexSequence()
949 # Fix up the ordering dependencies that the sequence didn't
950 # satisfy.
951 self.ReverseBackwardEdges()
952 self.ImproveVertexSequence()
953
Doug Zongker62338182014-09-08 08:29:55 -0700954 def ImproveVertexSequence(self):
Tao Bao32fcdab2018-10-12 10:30:39 -0700955 logger.info("Improving vertex order...")
Doug Zongker62338182014-09-08 08:29:55 -0700956
957 # At this point our digraph is acyclic; we reversed any edges that
958 # were backwards in the heuristically-generated sequence. The
959 # previously-generated order is still acceptable, but we hope to
960 # find a better order that needs less memory for stashed data.
961 # Now we do a topological sort to generate a new vertex order,
962 # using a greedy algorithm to choose which vertex goes next
963 # whenever we have a choice.
964
965 # Make a copy of the edge set; this copy will get destroyed by the
966 # algorithm.
967 for xf in self.transfers:
968 xf.incoming = xf.goes_after.copy()
969 xf.outgoing = xf.goes_before.copy()
970
971 L = [] # the new vertex order
972
973 # S is the set of sources in the remaining graph; we always choose
974 # the one that leaves the least amount of stashed data after it's
975 # executed.
976 S = [(u.NetStashChange(), u.order, u) for u in self.transfers
977 if not u.incoming]
978 heapq.heapify(S)
979
980 while S:
981 _, _, xf = heapq.heappop(S)
982 L.append(xf)
983 for u in xf.outgoing:
984 del u.incoming[xf]
985 if not u.incoming:
986 heapq.heappush(S, (u.NetStashChange(), u.order, u))
987
988 # if this fails then our graph had a cycle.
989 assert len(L) == len(self.transfers)
990
991 self.transfers = L
992 for i, xf in enumerate(L):
993 xf.order = i
994
Doug Zongker62338182014-09-08 08:29:55 -0700995 def ReverseBackwardEdges(self):
Tao Bao3a2e3502016-12-28 09:14:39 -0800996 """Reverse unsatisfying edges and compute pairs of stashed blocks.
997
998 For each transfer, make sure it properly stashes the blocks it touches and
999 will be used by later transfers. It uses pairs of (stash_raw_id, range) to
1000 record the blocks to be stashed. 'stash_raw_id' is an id that uniquely
1001 identifies each pair. Note that for the same range (e.g. RangeSet("1-5")),
1002 it is possible to have multiple pairs with different 'stash_raw_id's. Each
1003 'stash_raw_id' will be consumed by one transfer. In BBOTA v3+, identical
1004 blocks will be written to the same stash slot in WriteTransfers().
1005 """
1006
Tao Bao32fcdab2018-10-12 10:30:39 -07001007 logger.info("Reversing backward edges...")
Doug Zongker62338182014-09-08 08:29:55 -07001008 in_order = 0
1009 out_of_order = 0
Tao Bao3a2e3502016-12-28 09:14:39 -08001010 stash_raw_id = 0
Doug Zongker62338182014-09-08 08:29:55 -07001011 stash_size = 0
1012
1013 for xf in self.transfers:
Doug Zongker62338182014-09-08 08:29:55 -07001014 for u in xf.goes_before.copy():
1015 # xf should go before u
1016 if xf.order < u.order:
1017 # it does, hurray!
1018 in_order += 1
1019 else:
1020 # it doesn't, boo. modify u to stash the blocks that it
1021 # writes that xf wants to read, and then require u to go
1022 # before xf.
1023 out_of_order += 1
1024
1025 overlap = xf.src_ranges.intersect(u.tgt_ranges)
1026 assert overlap
1027
Tao Bao3a2e3502016-12-28 09:14:39 -08001028 u.stash_before.append((stash_raw_id, overlap))
1029 xf.use_stash.append((stash_raw_id, overlap))
1030 stash_raw_id += 1
Doug Zongker62338182014-09-08 08:29:55 -07001031 stash_size += overlap.size()
1032
1033 # reverse the edge direction; now xf must go after u
1034 del xf.goes_before[u]
1035 del u.goes_after[xf]
1036 xf.goes_after[u] = None # value doesn't matter
1037 u.goes_before[xf] = None
1038
Tao Bao32fcdab2018-10-12 10:30:39 -07001039 logger.info(
1040 " %d/%d dependencies (%.2f%%) were violated; %d source blocks "
1041 "stashed.", out_of_order, in_order + out_of_order,
1042 (out_of_order * 100.0 / (in_order + out_of_order)) if (
1043 in_order + out_of_order) else 0.0,
1044 stash_size)
Doug Zongker62338182014-09-08 08:29:55 -07001045
Doug Zongkerfc44a512014-08-26 13:10:25 -07001046 def FindVertexSequence(self):
Tao Bao32fcdab2018-10-12 10:30:39 -07001047 logger.info("Finding vertex sequence...")
Doug Zongkerfc44a512014-08-26 13:10:25 -07001048
1049 # This is based on "A Fast & Effective Heuristic for the Feedback
1050 # Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
1051 # it as starting with the digraph G and moving all the vertices to
1052 # be on a horizontal line in some order, trying to minimize the
1053 # number of edges that end up pointing to the left. Left-pointing
1054 # edges will get removed to turn the digraph into a DAG. In this
1055 # case each edge has a weight which is the number of source blocks
1056 # we'll lose if that edge is removed; we try to minimize the total
1057 # weight rather than just the number of edges.
1058
1059 # Make a copy of the edge set; this copy will get destroyed by the
1060 # algorithm.
1061 for xf in self.transfers:
1062 xf.incoming = xf.goes_after.copy()
1063 xf.outgoing = xf.goes_before.copy()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001064 xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())
Doug Zongkerfc44a512014-08-26 13:10:25 -07001065
1066 # We use an OrderedDict instead of just a set so that the output
1067 # is repeatable; otherwise it would depend on the hash values of
1068 # the transfer objects.
1069 G = OrderedDict()
1070 for xf in self.transfers:
1071 G[xf] = None
1072 s1 = deque() # the left side of the sequence, built from left to right
1073 s2 = deque() # the right side of the sequence, built from right to left
1074
Doug Zongker6ab2a502016-02-09 08:28:09 -08001075 heap = []
1076 for xf in self.transfers:
1077 xf.heap_item = HeapItem(xf)
1078 heap.append(xf.heap_item)
1079 heapq.heapify(heap)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001080
Tao Bao33482282016-10-24 16:49:08 -07001081 # Use OrderedDict() instead of set() to preserve the insertion order. Need
1082 # to use 'sinks[key] = None' to add key into the set. sinks will look like
1083 # { key1: None, key2: None, ... }.
1084 sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing)
1085 sources = OrderedDict.fromkeys(u for u in G if not u.incoming)
Doug Zongker6ab2a502016-02-09 08:28:09 -08001086
1087 def adjust_score(iu, delta):
1088 iu.score += delta
1089 iu.heap_item.clear()
1090 iu.heap_item = HeapItem(iu)
1091 heapq.heappush(heap, iu.heap_item)
1092
1093 while G:
Doug Zongkerfc44a512014-08-26 13:10:25 -07001094 # Put all sinks at the end of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001095 while sinks:
Tao Bao33482282016-10-24 16:49:08 -07001096 new_sinks = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001097 for u in sinks:
Tao Bao508b0872018-02-09 13:44:43 -08001098 if u not in G:
1099 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001100 s2.appendleft(u)
1101 del G[u]
1102 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001103 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001104 if not iu.outgoing:
1105 new_sinks[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001106 sinks = new_sinks
Doug Zongkerfc44a512014-08-26 13:10:25 -07001107
1108 # Put all the sources at the beginning of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001109 while sources:
Tao Bao33482282016-10-24 16:49:08 -07001110 new_sources = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001111 for u in sources:
Tao Bao508b0872018-02-09 13:44:43 -08001112 if u not in G:
1113 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001114 s1.append(u)
1115 del G[u]
1116 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001117 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001118 if not iu.incoming:
1119 new_sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001120 sources = new_sources
Doug Zongkerfc44a512014-08-26 13:10:25 -07001121
Tao Bao508b0872018-02-09 13:44:43 -08001122 if not G:
1123 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001124
1125 # Find the "best" vertex to put next. "Best" is the one that
1126 # maximizes the net difference in source blocks saved we get by
1127 # pretending it's a source rather than a sink.
1128
Doug Zongker6ab2a502016-02-09 08:28:09 -08001129 while True:
1130 u = heapq.heappop(heap)
1131 if u and u.item in G:
1132 u = u.item
1133 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001134
Doug Zongkerfc44a512014-08-26 13:10:25 -07001135 s1.append(u)
1136 del G[u]
1137 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001138 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001139 if not iu.incoming:
1140 sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001141
Doug Zongkerfc44a512014-08-26 13:10:25 -07001142 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001143 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001144 if not iu.outgoing:
1145 sinks[iu] = None
Doug Zongkerfc44a512014-08-26 13:10:25 -07001146
1147 # Now record the sequence in the 'order' field of each transfer,
1148 # and by rearranging self.transfers to be in the chosen sequence.
1149
1150 new_transfers = []
1151 for x in itertools.chain(s1, s2):
1152 x.order = len(new_transfers)
1153 new_transfers.append(x)
1154 del x.incoming
1155 del x.outgoing
1156
1157 self.transfers = new_transfers
1158
1159 def GenerateDigraph(self):
Tao Bao32fcdab2018-10-12 10:30:39 -07001160 logger.info("Generating digraph...")
Doug Zongker6ab2a502016-02-09 08:28:09 -08001161
1162 # Each item of source_ranges will be:
1163 # - None, if that block is not used as a source,
Tao Bao33482282016-10-24 16:49:08 -07001164 # - an ordered set of transfers.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001165 source_ranges = []
1166 for b in self.transfers:
1167 for s, e in b.src_ranges:
1168 if e > len(source_ranges):
1169 source_ranges.extend([None] * (e-len(source_ranges)))
1170 for i in range(s, e):
1171 if source_ranges[i] is None:
Tao Bao33482282016-10-24 16:49:08 -07001172 source_ranges[i] = OrderedDict.fromkeys([b])
Doug Zongker6ab2a502016-02-09 08:28:09 -08001173 else:
Tao Bao33482282016-10-24 16:49:08 -07001174 source_ranges[i][b] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001175
Doug Zongkerfc44a512014-08-26 13:10:25 -07001176 for a in self.transfers:
Tao Bao33482282016-10-24 16:49:08 -07001177 intersections = OrderedDict()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001178 for s, e in a.tgt_ranges:
1179 for i in range(s, e):
Tao Bao508b0872018-02-09 13:44:43 -08001180 if i >= len(source_ranges):
1181 break
Tao Bao33482282016-10-24 16:49:08 -07001182 # Add all the Transfers in source_ranges[i] to the (ordered) set.
1183 if source_ranges[i] is not None:
1184 for j in source_ranges[i]:
1185 intersections[j] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001186
1187 for b in intersections:
Tao Bao508b0872018-02-09 13:44:43 -08001188 if a is b:
1189 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001190
1191 # If the blocks written by A are read by B, then B needs to go before A.
1192 i = a.tgt_ranges.intersect(b.src_ranges)
1193 if i:
Doug Zongkerab7ca1d2014-08-26 10:40:28 -07001194 if b.src_name == "__ZERO":
1195 # the cost of removing source blocks for the __ZERO domain
1196 # is (nearly) zero.
1197 size = 0
1198 else:
1199 size = i.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001200 b.goes_before[a] = size
1201 a.goes_after[b] = size
1202
xunchang21531832018-12-06 14:20:05 -08001203 def ComputePatchesForInputList(self, diff_queue, compress_target):
1204 """Returns a list of patch information for the input list of transfers.
1205
1206 Args:
1207 diff_queue: a list of transfers with style 'diff'
1208 compress_target: If True, compresses the target ranges of each
1209 transfers; and save the size.
1210
1211 Returns:
1212 A list of (transfer order, patch_info, compressed_size) tuples.
1213 """
1214
1215 if not diff_queue:
1216 return []
1217
1218 if self.threads > 1:
1219 logger.info("Computing patches (using %d threads)...", self.threads)
1220 else:
1221 logger.info("Computing patches...")
1222
1223 diff_total = len(diff_queue)
1224 patches = [None] * diff_total
1225 error_messages = []
1226
1227 # Using multiprocessing doesn't give additional benefits, due to the
1228 # pattern of the code. The diffing work is done by subprocess.call, which
1229 # already runs in a separate process (not affected much by the GIL -
1230 # Global Interpreter Lock). Using multiprocess also requires either a)
1231 # writing the diff input files in the main process before forking, or b)
1232 # reopening the image file (SparseImage) in the worker processes. Doing
1233 # neither of them further improves the performance.
1234 lock = threading.Lock()
1235
1236 def diff_worker():
1237 while True:
1238 with lock:
1239 if not diff_queue:
1240 return
1241 xf_index, imgdiff, patch_index = diff_queue.pop()
1242 xf = self.transfers[xf_index]
1243
1244 message = []
1245 compressed_size = None
1246
1247 patch_info = xf.patch_info
1248 if not patch_info:
1249 src_file = common.MakeTempFile(prefix="src-")
1250 with open(src_file, "wb") as fd:
1251 self.src.WriteRangeDataToFd(xf.src_ranges, fd)
1252
1253 tgt_file = common.MakeTempFile(prefix="tgt-")
1254 with open(tgt_file, "wb") as fd:
1255 self.tgt.WriteRangeDataToFd(xf.tgt_ranges, fd)
1256
1257 try:
1258 patch_info = compute_patch(src_file, tgt_file, imgdiff)
1259 except ValueError as e:
1260 message.append(
1261 "Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (
1262 "imgdiff" if imgdiff else "bsdiff",
1263 xf.tgt_name if xf.tgt_name == xf.src_name else
1264 xf.tgt_name + " (from " + xf.src_name + ")",
1265 xf.tgt_ranges, xf.src_ranges, e.message))
1266
1267 if compress_target:
1268 tgt_data = self.tgt.ReadRangeSet(xf.tgt_ranges)
1269 try:
1270 # Compresses with the default level
1271 compress_obj = zlib.compressobj(6, zlib.DEFLATED, -zlib.MAX_WBITS)
1272 compressed_data = (compress_obj.compress("".join(tgt_data))
1273 + compress_obj.flush())
1274 compressed_size = len(compressed_data)
1275 except zlib.error as e:
1276 message.append(
1277 "Failed to compress the data in target range {} for {}:\n"
1278 "{}".format(xf.tgt_ranges, xf.tgt_name, e.message))
1279
1280 if message:
1281 with lock:
1282 error_messages.extend(message)
1283
1284 with lock:
1285 patches[patch_index] = (xf_index, patch_info, compressed_size)
1286
1287 threads = [threading.Thread(target=diff_worker)
1288 for _ in range(self.threads)]
1289 for th in threads:
1290 th.start()
1291 while threads:
1292 threads.pop().join()
1293
1294 if error_messages:
1295 logger.error('ERROR:')
1296 logger.error('\n'.join(error_messages))
1297 logger.error('\n\n\n')
1298 sys.exit(1)
1299
1300 return patches
1301
Doug Zongkerfc44a512014-08-26 13:10:25 -07001302 def FindTransfers(self):
Tao Bao9a5caf22015-08-25 15:10:10 -07001303 """Parse the file_map to generate all the transfers."""
1304
Tianjie Xu41390d42017-11-22 11:35:18 -08001305 def AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
1306 src_ranges, style, by_id):
1307 """Add one or multiple Transfer()s by splitting large files.
1308
1309 For BBOTA v3, we need to stash source blocks for resumable feature.
1310 However, with the growth of file size and the shrink of the cache
1311 partition source blocks are too large to be stashed. If a file occupies
1312 too many blocks, we split it into smaller pieces by getting multiple
1313 Transfer()s.
1314
1315 The downside is that after splitting, we may increase the package size
1316 since the split pieces don't align well. According to our experiments,
1317 1/8 of the cache size as the per-piece limit appears to be optimal.
1318 Compared to the fixed 1024-block limit, it reduces the overall package
1319 size by 30% for volantis, and 20% for angler and bullhead."""
1320
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001321 pieces = 0
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001322 while (tgt_ranges.size() > max_blocks_per_transfer and
1323 src_ranges.size() > max_blocks_per_transfer):
Tao Bao9a5caf22015-08-25 15:10:10 -07001324 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1325 src_split_name = "%s-%d" % (src_name, pieces)
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001326 tgt_first = tgt_ranges.first(max_blocks_per_transfer)
1327 src_first = src_ranges.first(max_blocks_per_transfer)
1328
Tao Bao183e56e2017-03-05 17:05:09 -08001329 Transfer(tgt_split_name, src_split_name, tgt_first, src_first,
1330 self.tgt.RangeSha1(tgt_first), self.src.RangeSha1(src_first),
1331 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001332
1333 tgt_ranges = tgt_ranges.subtract(tgt_first)
1334 src_ranges = src_ranges.subtract(src_first)
1335 pieces += 1
1336
1337 # Handle remaining blocks.
1338 if tgt_ranges.size() or src_ranges.size():
1339 # Must be both non-empty.
1340 assert tgt_ranges.size() and src_ranges.size()
1341 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1342 src_split_name = "%s-%d" % (src_name, pieces)
Tao Bao183e56e2017-03-05 17:05:09 -08001343 Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges,
1344 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1345 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001346
Tianjie Xu41390d42017-11-22 11:35:18 -08001347 def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges, style,
1348 by_id):
1349 """Find all the zip files and split the others with a fixed chunk size.
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001350
Tianjie Xu41390d42017-11-22 11:35:18 -08001351 This function will construct a list of zip archives, which will later be
1352 split by imgdiff to reduce the final patch size. For the other files,
1353 we will plainly split them based on a fixed chunk size with the potential
1354 patch size penalty.
1355 """
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001356
1357 assert style == "diff"
1358
1359 # Change nothing for small files.
1360 if (tgt_ranges.size() <= max_blocks_per_transfer and
1361 src_ranges.size() <= max_blocks_per_transfer):
1362 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1363 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1364 style, by_id)
1365 return
1366
Tao Baocb73aed2018-01-31 17:32:40 -08001367 # Split large APKs with imgdiff, if possible. We're intentionally checking
1368 # file types one more time (CanUseImgdiff() checks that as well), before
1369 # calling the costly RangeSha1()s.
1370 if (self.FileTypeSupportedByImgdiff(tgt_name) and
1371 self.tgt.RangeSha1(tgt_ranges) != self.src.RangeSha1(src_ranges)):
Tao Bao294651d2018-02-08 23:21:52 -08001372 if self.CanUseImgdiff(tgt_name, tgt_ranges, src_ranges, True):
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001373 large_apks.append((tgt_name, src_name, tgt_ranges, src_ranges))
1374 return
1375
Tianjie Xu41390d42017-11-22 11:35:18 -08001376 AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
1377 src_ranges, style, by_id)
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001378
Tao Bao08c85832016-09-19 22:26:30 -07001379 def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
1380 split=False):
1381 """Wrapper function for adding a Transfer()."""
1382
1383 # We specialize diff transfers only (which covers bsdiff/imgdiff/move);
1384 # otherwise add the Transfer() as is.
1385 if style != "diff" or not split:
Tao Bao183e56e2017-03-05 17:05:09 -08001386 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1387 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1388 style, by_id)
Tao Bao08c85832016-09-19 22:26:30 -07001389 return
1390
1391 # Handle .odex files specially to analyze the block-wise difference. If
1392 # most of the blocks are identical with only few changes (e.g. header),
1393 # we will patch the changed blocks only. This avoids stashing unchanged
1394 # blocks while patching. We limit the analysis to files without size
1395 # changes only. This is to avoid sacrificing the OTA generation cost too
1396 # much.
1397 if (tgt_name.split(".")[-1].lower() == 'odex' and
1398 tgt_ranges.size() == src_ranges.size()):
1399
1400 # 0.5 threshold can be further tuned. The tradeoff is: if only very
1401 # few blocks remain identical, we lose the opportunity to use imgdiff
1402 # that may have better compression ratio than bsdiff.
1403 crop_threshold = 0.5
1404
1405 tgt_skipped = RangeSet()
1406 src_skipped = RangeSet()
1407 tgt_size = tgt_ranges.size()
1408 tgt_changed = 0
1409 for src_block, tgt_block in zip(src_ranges.next_item(),
1410 tgt_ranges.next_item()):
1411 src_rs = RangeSet(str(src_block))
1412 tgt_rs = RangeSet(str(tgt_block))
1413 if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):
1414 tgt_skipped = tgt_skipped.union(tgt_rs)
1415 src_skipped = src_skipped.union(src_rs)
1416 else:
1417 tgt_changed += tgt_rs.size()
1418
1419 # Terminate early if no clear sign of benefits.
1420 if tgt_changed > tgt_size * crop_threshold:
1421 break
1422
1423 if tgt_changed < tgt_size * crop_threshold:
1424 assert tgt_changed + tgt_skipped.size() == tgt_size
Tao Bao32fcdab2018-10-12 10:30:39 -07001425 logger.info(
1426 '%10d %10d (%6.2f%%) %s', tgt_skipped.size(), tgt_size,
1427 tgt_skipped.size() * 100.0 / tgt_size, tgt_name)
Tianjie Xu41390d42017-11-22 11:35:18 -08001428 AddSplitTransfers(
Tao Bao08c85832016-09-19 22:26:30 -07001429 "%s-skipped" % (tgt_name,),
1430 "%s-skipped" % (src_name,),
1431 tgt_skipped, src_skipped, style, by_id)
1432
1433 # Intentionally change the file extension to avoid being imgdiff'd as
1434 # the files are no longer in their original format.
1435 tgt_name = "%s-cropped" % (tgt_name,)
1436 src_name = "%s-cropped" % (src_name,)
1437 tgt_ranges = tgt_ranges.subtract(tgt_skipped)
1438 src_ranges = src_ranges.subtract(src_skipped)
1439
1440 # Possibly having no changed blocks.
1441 if not tgt_ranges:
1442 return
1443
1444 # Add the transfer(s).
Tianjie Xu41390d42017-11-22 11:35:18 -08001445 AddSplitTransfers(
Tao Bao08c85832016-09-19 22:26:30 -07001446 tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1447
Tianjie Xu25366072017-09-08 17:19:02 -07001448 def ParseAndValidateSplitInfo(patch_size, tgt_ranges, src_ranges,
1449 split_info):
1450 """Parse the split_info and return a list of info tuples.
1451
1452 Args:
1453 patch_size: total size of the patch file.
1454 tgt_ranges: Ranges of the target file within the original image.
1455 src_ranges: Ranges of the source file within the original image.
1456 split_info format:
1457 imgdiff version#
1458 count of pieces
1459 <patch_size_1> <tgt_size_1> <src_ranges_1>
1460 ...
1461 <patch_size_n> <tgt_size_n> <src_ranges_n>
1462
1463 Returns:
1464 [patch_start, patch_len, split_tgt_ranges, split_src_ranges]
1465 """
1466
1467 version = int(split_info[0])
1468 assert version == 2
1469 count = int(split_info[1])
1470 assert len(split_info) - 2 == count
1471
1472 split_info_list = []
1473 patch_start = 0
1474 tgt_remain = copy.deepcopy(tgt_ranges)
1475 # each line has the format <patch_size>, <tgt_size>, <src_ranges>
1476 for line in split_info[2:]:
1477 info = line.split()
1478 assert len(info) == 3
1479 patch_length = int(info[0])
1480
1481 split_tgt_size = int(info[1])
1482 assert split_tgt_size % 4096 == 0
1483 assert split_tgt_size / 4096 <= tgt_remain.size()
1484 split_tgt_ranges = tgt_remain.first(split_tgt_size / 4096)
1485 tgt_remain = tgt_remain.subtract(split_tgt_ranges)
1486
1487 # Find the split_src_ranges within the image file from its relative
1488 # position in file.
1489 split_src_indices = RangeSet.parse_raw(info[2])
1490 split_src_ranges = RangeSet()
1491 for r in split_src_indices:
1492 curr_range = src_ranges.first(r[1]).subtract(src_ranges.first(r[0]))
1493 assert not split_src_ranges.overlaps(curr_range)
1494 split_src_ranges = split_src_ranges.union(curr_range)
1495
1496 split_info_list.append((patch_start, patch_length,
1497 split_tgt_ranges, split_src_ranges))
1498 patch_start += patch_length
1499
1500 # Check that the sizes of all the split pieces add up to the final file
1501 # size for patch and target.
1502 assert tgt_remain.size() == 0
1503 assert patch_start == patch_size
1504 return split_info_list
1505
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001506 def SplitLargeApks():
1507 """Split the large apks files.
Tianjie Xu25366072017-09-08 17:19:02 -07001508
1509 Example: Chrome.apk will be split into
1510 src-0: Chrome.apk-0, tgt-0: Chrome.apk-0
1511 src-1: Chrome.apk-1, tgt-1: Chrome.apk-1
1512 ...
1513
1514 After the split, the target pieces are continuous and block aligned; and
1515 the source pieces are mutually exclusive. During the split, we also
1516 generate and save the image patch between src-X & tgt-X. This patch will
1517 be valid because the block ranges of src-X & tgt-X will always stay the
1518 same afterwards; but there's a chance we don't use the patch if we
1519 convert the "diff" command into "new" or "move" later.
1520 """
1521
1522 while True:
1523 with transfer_lock:
1524 if not large_apks:
1525 return
1526 tgt_name, src_name, tgt_ranges, src_ranges = large_apks.pop(0)
1527
1528 src_file = common.MakeTempFile(prefix="src-")
1529 tgt_file = common.MakeTempFile(prefix="tgt-")
Tianjie Xudf1166e2018-01-27 17:35:41 -08001530 with open(src_file, "wb") as src_fd:
1531 self.src.WriteRangeDataToFd(src_ranges, src_fd)
1532 with open(tgt_file, "wb") as tgt_fd:
1533 self.tgt.WriteRangeDataToFd(tgt_ranges, tgt_fd)
Tianjie Xu25366072017-09-08 17:19:02 -07001534
1535 patch_file = common.MakeTempFile(prefix="patch-")
1536 patch_info_file = common.MakeTempFile(prefix="split_info-")
1537 cmd = ["imgdiff", "-z",
1538 "--block-limit={}".format(max_blocks_per_transfer),
1539 "--split-info=" + patch_info_file,
1540 src_file, tgt_file, patch_file]
Tao Bao73dd4f42018-10-04 16:25:33 -07001541 proc = common.Run(cmd)
1542 imgdiff_output, _ = proc.communicate()
1543 assert proc.returncode == 0, \
Tao Bao4ccea852018-02-06 15:16:41 -08001544 "Failed to create imgdiff patch between {} and {}:\n{}".format(
1545 src_name, tgt_name, imgdiff_output)
Tianjie Xu25366072017-09-08 17:19:02 -07001546
1547 with open(patch_info_file) as patch_info:
1548 lines = patch_info.readlines()
1549
1550 patch_size_total = os.path.getsize(patch_file)
1551 split_info_list = ParseAndValidateSplitInfo(patch_size_total,
1552 tgt_ranges, src_ranges,
1553 lines)
1554 for index, (patch_start, patch_length, split_tgt_ranges,
Tao Bao508b0872018-02-09 13:44:43 -08001555 split_src_ranges) in enumerate(split_info_list):
Tianjie Xu25366072017-09-08 17:19:02 -07001556 with open(patch_file) as f:
1557 f.seek(patch_start)
1558 patch_content = f.read(patch_length)
1559
1560 split_src_name = "{}-{}".format(src_name, index)
1561 split_tgt_name = "{}-{}".format(tgt_name, index)
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001562 split_large_apks.append((split_tgt_name,
1563 split_src_name,
1564 split_tgt_ranges,
1565 split_src_ranges,
1566 patch_content))
Tianjie Xu25366072017-09-08 17:19:02 -07001567
Tao Bao32fcdab2018-10-12 10:30:39 -07001568 logger.info("Finding transfers...")
Tao Bao08c85832016-09-19 22:26:30 -07001569
Tianjie Xu25366072017-09-08 17:19:02 -07001570 large_apks = []
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001571 split_large_apks = []
Tianjie Xu25366072017-09-08 17:19:02 -07001572 cache_size = common.OPTIONS.cache_size
1573 split_threshold = 0.125
1574 max_blocks_per_transfer = int(cache_size * split_threshold /
1575 self.tgt.blocksize)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001576 empty = RangeSet()
Tianjie Xu20a86cd2018-01-12 12:21:00 -08001577 for tgt_fn, tgt_ranges in sorted(self.tgt.file_map.items()):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001578 if tgt_fn == "__ZERO":
1579 # the special "__ZERO" domain is all the blocks not contained
1580 # in any file and that are filled with zeros. We have a
1581 # special transfer style for zero blocks.
1582 src_ranges = self.src.file_map.get("__ZERO", empty)
Tao Bao9a5caf22015-08-25 15:10:10 -07001583 AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
1584 "zero", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001585 continue
1586
Tao Baoff777812015-05-12 11:42:31 -07001587 elif tgt_fn == "__COPY":
1588 # "__COPY" domain includes all the blocks not contained in any
1589 # file and that need to be copied unconditionally to the target.
Tao Bao9a5caf22015-08-25 15:10:10 -07001590 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Tao Baoff777812015-05-12 11:42:31 -07001591 continue
1592
Tianjie Xu67c7cbb2018-08-30 00:32:07 -07001593 elif tgt_fn == "__HASHTREE":
1594 continue
1595
Doug Zongkerfc44a512014-08-26 13:10:25 -07001596 elif tgt_fn in self.src.file_map:
1597 # Look for an exact pathname match in the source.
Tao Bao9a5caf22015-08-25 15:10:10 -07001598 AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001599 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001600 continue
1601
1602 b = os.path.basename(tgt_fn)
1603 if b in self.src_basenames:
1604 # Look for an exact basename match in the source.
1605 src_fn = self.src_basenames[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001606 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001607 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001608 continue
1609
1610 b = re.sub("[0-9]+", "#", b)
1611 if b in self.src_numpatterns:
1612 # Look for a 'number pattern' match (a basename match after
1613 # all runs of digits are replaced by "#"). (This is useful
1614 # for .so files that contain version numbers in the filename
1615 # that get bumped.)
1616 src_fn = self.src_numpatterns[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001617 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001618 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001619 continue
1620
Tao Bao9a5caf22015-08-25 15:10:10 -07001621 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001622
Tianjie Xu25366072017-09-08 17:19:02 -07001623 transfer_lock = threading.Lock()
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001624 threads = [threading.Thread(target=SplitLargeApks)
Tianjie Xu25366072017-09-08 17:19:02 -07001625 for _ in range(self.threads)]
1626 for th in threads:
1627 th.start()
1628 while threads:
1629 threads.pop().join()
1630
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001631 # Sort the split transfers for large apks to generate a determinate package.
1632 split_large_apks.sort()
1633 for (tgt_name, src_name, tgt_ranges, src_ranges,
1634 patch) in split_large_apks:
1635 transfer_split = Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1636 self.tgt.RangeSha1(tgt_ranges),
1637 self.src.RangeSha1(src_ranges),
1638 "diff", self.transfers)
xunchang21531832018-12-06 14:20:05 -08001639 transfer_split.patch_info = PatchInfo(True, patch)
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001640
Doug Zongkerfc44a512014-08-26 13:10:25 -07001641 def AbbreviateSourceNames(self):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001642 for k in self.src.file_map.keys():
1643 b = os.path.basename(k)
1644 self.src_basenames[b] = k
1645 b = re.sub("[0-9]+", "#", b)
1646 self.src_numpatterns[b] = k
1647
1648 @staticmethod
1649 def AssertPartition(total, seq):
1650 """Assert that all the RangeSets in 'seq' form a partition of the
1651 'total' RangeSet (ie, they are nonintersecting and their union
1652 equals 'total')."""
Doug Zongker6ab2a502016-02-09 08:28:09 -08001653
Doug Zongkerfc44a512014-08-26 13:10:25 -07001654 so_far = RangeSet()
1655 for i in seq:
1656 assert not so_far.overlaps(i)
1657 so_far = so_far.union(i)
1658 assert so_far == total