blob: ecb1d3163cc49368659e95f54d70e7551c1e6af4 [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015from __future__ import print_function
16
Doug Zongker6ab2a502016-02-09 08:28:09 -080017import array
Tianjie Xu25366072017-09-08 17:19:02 -070018import copy
Doug Zongker6ab2a502016-02-09 08:28:09 -080019import functools
Doug Zongker62338182014-09-08 08:29:55 -070020import heapq
Doug Zongkerfc44a512014-08-26 13:10:25 -070021import itertools
Tao Bao32fcdab2018-10-12 10:30:39 -070022import logging
Doug Zongkerfc44a512014-08-26 13:10:25 -070023import multiprocessing
24import os
Tao Bao3a2e3502016-12-28 09:14:39 -080025import os.path
Doug Zongkerfc44a512014-08-26 13:10:25 -070026import re
Tao Bao183e56e2017-03-05 17:05:09 -080027import sys
Doug Zongkerfc44a512014-08-26 13:10:25 -070028import threading
xunchang21531832018-12-06 14:20:05 -080029import zlib
30from collections import deque, namedtuple, OrderedDict
Tao Bao3a2e3502016-12-28 09:14:39 -080031from hashlib import sha1
Tao Bao508b0872018-02-09 13:44:43 -080032
33import common
Dan Albert8b72aef2015-03-23 19:13:21 -070034from rangelib import RangeSet
35
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070036__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
37
Tao Bao32fcdab2018-10-12 10:30:39 -070038logger = logging.getLogger(__name__)
39
xunchang21531832018-12-06 14:20:05 -080040# The tuple contains the style and bytes of a bsdiff|imgdiff patch.
41PatchInfo = namedtuple("PatchInfo", ["imgdiff", "content"])
42
Dan Albert8b72aef2015-03-23 19:13:21 -070043
Tao Bao183e56e2017-03-05 17:05:09 -080044def compute_patch(srcfile, tgtfile, imgdiff=False):
xunchang21531832018-12-06 14:20:05 -080045 """Calls bsdiff|imgdiff to compute the patch data, returns a PatchInfo."""
Tianjie Xub59c17f2016-10-28 17:55:53 -070046 patchfile = common.MakeTempFile(prefix='patch-')
Doug Zongkerfc44a512014-08-26 13:10:25 -070047
Tianjie Xub59c17f2016-10-28 17:55:53 -070048 cmd = ['imgdiff', '-z'] if imgdiff else ['bsdiff']
49 cmd.extend([srcfile, tgtfile, patchfile])
Doug Zongkerfc44a512014-08-26 13:10:25 -070050
Tao Bao39451582017-05-04 11:10:47 -070051 # Don't dump the bsdiff/imgdiff commands, which are not useful for the case
52 # here, since they contain temp filenames only.
Tao Bao73dd4f42018-10-04 16:25:33 -070053 proc = common.Run(cmd, verbose=False)
54 output, _ = proc.communicate()
Doug Zongkerfc44a512014-08-26 13:10:25 -070055
Tao Bao73dd4f42018-10-04 16:25:33 -070056 if proc.returncode != 0:
Tianjie Xub59c17f2016-10-28 17:55:53 -070057 raise ValueError(output)
58
59 with open(patchfile, 'rb') as f:
xunchang21531832018-12-06 14:20:05 -080060 return PatchInfo(imgdiff, f.read())
Doug Zongkerfc44a512014-08-26 13:10:25 -070061
Dan Albert8b72aef2015-03-23 19:13:21 -070062
63class Image(object):
Tao Bao183e56e2017-03-05 17:05:09 -080064 def RangeSha1(self, ranges):
65 raise NotImplementedError
66
Dan Albert8b72aef2015-03-23 19:13:21 -070067 def ReadRangeSet(self, ranges):
68 raise NotImplementedError
69
Tao Bao68658c02015-06-01 13:40:49 -070070 def TotalSha1(self, include_clobbered_blocks=False):
Dan Albert8b72aef2015-03-23 19:13:21 -070071 raise NotImplementedError
72
Tao Bao183e56e2017-03-05 17:05:09 -080073 def WriteRangeDataToFd(self, ranges, fd):
74 raise NotImplementedError
75
Dan Albert8b72aef2015-03-23 19:13:21 -070076
77class EmptyImage(Image):
Doug Zongkerfc44a512014-08-26 13:10:25 -070078 """A zero-length image."""
Tao Bao183e56e2017-03-05 17:05:09 -080079
80 def __init__(self):
81 self.blocksize = 4096
82 self.care_map = RangeSet()
83 self.clobbered_blocks = RangeSet()
84 self.extended = RangeSet()
85 self.total_blocks = 0
86 self.file_map = {}
Yifan Hongbb2658d2019-01-25 12:30:58 -080087 self.hashtree_info = None
Tao Bao183e56e2017-03-05 17:05:09 -080088
89 def RangeSha1(self, ranges):
90 return sha1().hexdigest()
91
Doug Zongkerfc44a512014-08-26 13:10:25 -070092 def ReadRangeSet(self, ranges):
93 return ()
Tao Bao183e56e2017-03-05 17:05:09 -080094
Tao Bao68658c02015-06-01 13:40:49 -070095 def TotalSha1(self, include_clobbered_blocks=False):
96 # EmptyImage always carries empty clobbered_blocks, so
97 # include_clobbered_blocks can be ignored.
98 assert self.clobbered_blocks.size() == 0
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070099 return sha1().hexdigest()
100
Tao Bao183e56e2017-03-05 17:05:09 -0800101 def WriteRangeDataToFd(self, ranges, fd):
102 raise ValueError("Can't write data from EmptyImage to file")
103
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700104
Dan Albert8b72aef2015-03-23 19:13:21 -0700105class DataImage(Image):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700106 """An image wrapped around a single string of data."""
107
108 def __init__(self, data, trim=False, pad=False):
109 self.data = data
110 self.blocksize = 4096
111
112 assert not (trim and pad)
113
114 partial = len(self.data) % self.blocksize
Tao Bao7589e962015-09-05 20:35:32 -0700115 padded = False
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700116 if partial > 0:
117 if trim:
118 self.data = self.data[:-partial]
119 elif pad:
120 self.data += '\0' * (self.blocksize - partial)
Tao Bao7589e962015-09-05 20:35:32 -0700121 padded = True
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700122 else:
123 raise ValueError(("data for DataImage must be multiple of %d bytes "
124 "unless trim or pad is specified") %
125 (self.blocksize,))
126
127 assert len(self.data) % self.blocksize == 0
128
Tao Baob8131202019-06-19 14:15:34 -0700129 self.total_blocks = len(self.data) // self.blocksize
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700130 self.care_map = RangeSet(data=(0, self.total_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700131 # When the last block is padded, we always write the whole block even for
132 # incremental OTAs. Because otherwise the last block may get skipped if
133 # unchanged for an incremental, but would fail the post-install
134 # verification if it has non-zero contents in the padding bytes.
135 # Bug: 23828506
136 if padded:
Tao Bao42206c32015-09-08 13:39:40 -0700137 clobbered_blocks = [self.total_blocks-1, self.total_blocks]
Tao Bao7589e962015-09-05 20:35:32 -0700138 else:
Tao Bao42206c32015-09-08 13:39:40 -0700139 clobbered_blocks = []
140 self.clobbered_blocks = clobbered_blocks
Tao Baoe9b61912015-07-09 17:37:49 -0700141 self.extended = RangeSet()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700142
143 zero_blocks = []
144 nonzero_blocks = []
145 reference = '\0' * self.blocksize
146
Tao Bao7589e962015-09-05 20:35:32 -0700147 for i in range(self.total_blocks-1 if padded else self.total_blocks):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700148 d = self.data[i*self.blocksize : (i+1)*self.blocksize]
149 if d == reference:
150 zero_blocks.append(i)
151 zero_blocks.append(i+1)
152 else:
153 nonzero_blocks.append(i)
154 nonzero_blocks.append(i+1)
155
Tao Bao42206c32015-09-08 13:39:40 -0700156 assert zero_blocks or nonzero_blocks or clobbered_blocks
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700157
Tao Bao42206c32015-09-08 13:39:40 -0700158 self.file_map = dict()
159 if zero_blocks:
160 self.file_map["__ZERO"] = RangeSet(data=zero_blocks)
161 if nonzero_blocks:
162 self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)
163 if clobbered_blocks:
164 self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)
Tao Bao7589e962015-09-05 20:35:32 -0700165
Tao Bao183e56e2017-03-05 17:05:09 -0800166 def _GetRangeData(self, ranges):
167 for s, e in ranges:
168 yield self.data[s*self.blocksize:e*self.blocksize]
169
170 def RangeSha1(self, ranges):
171 h = sha1()
Tao Bao76def242017-11-21 09:25:31 -0800172 for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable
Tao Bao183e56e2017-03-05 17:05:09 -0800173 h.update(data)
174 return h.hexdigest()
175
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700176 def ReadRangeSet(self, ranges):
Yifan Hong6f3eaeb2019-04-09 16:49:33 -0700177 return list(self._GetRangeData(ranges))
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700178
Tao Bao68658c02015-06-01 13:40:49 -0700179 def TotalSha1(self, include_clobbered_blocks=False):
Tao Bao7589e962015-09-05 20:35:32 -0700180 if not include_clobbered_blocks:
Tao Bao183e56e2017-03-05 17:05:09 -0800181 return self.RangeSha1(self.care_map.subtract(self.clobbered_blocks))
Tao Baob8131202019-06-19 14:15:34 -0700182 return sha1(self.data).hexdigest()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700183
Tao Bao183e56e2017-03-05 17:05:09 -0800184 def WriteRangeDataToFd(self, ranges, fd):
Tao Bao76def242017-11-21 09:25:31 -0800185 for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable
Tao Bao183e56e2017-03-05 17:05:09 -0800186 fd.write(data)
187
Doug Zongkerfc44a512014-08-26 13:10:25 -0700188
Yifan Hong8a66a712019-04-04 15:37:57 -0700189class FileImage(Image):
190 """An image wrapped around a raw image file."""
191
192 def __init__(self, path, hashtree_info_generator=None):
193 self.path = path
194 self.blocksize = 4096
195 self._file_size = os.path.getsize(self.path)
Tao Baob8131202019-06-19 14:15:34 -0700196 self._file = open(self.path, 'rb')
Yifan Hong8a66a712019-04-04 15:37:57 -0700197
198 if self._file_size % self.blocksize != 0:
199 raise ValueError("Size of file %s must be multiple of %d bytes, but is %d"
200 % self.path, self.blocksize, self._file_size)
201
Tao Baob8131202019-06-19 14:15:34 -0700202 self.total_blocks = self._file_size // self.blocksize
Yifan Hong8a66a712019-04-04 15:37:57 -0700203 self.care_map = RangeSet(data=(0, self.total_blocks))
204 self.clobbered_blocks = RangeSet()
205 self.extended = RangeSet()
206
Yifan Hong55988c42019-04-12 15:01:12 -0700207 self.generator_lock = threading.Lock()
208
Yifan Hong8a66a712019-04-04 15:37:57 -0700209 self.hashtree_info = None
210 if hashtree_info_generator:
211 self.hashtree_info = hashtree_info_generator.Generate(self)
212
213 zero_blocks = []
214 nonzero_blocks = []
215 reference = '\0' * self.blocksize
216
217 for i in range(self.total_blocks):
218 d = self._file.read(self.blocksize)
219 if d == reference:
220 zero_blocks.append(i)
221 zero_blocks.append(i+1)
222 else:
223 nonzero_blocks.append(i)
224 nonzero_blocks.append(i+1)
225
226 assert zero_blocks or nonzero_blocks
227
228 self.file_map = {}
229 if zero_blocks:
230 self.file_map["__ZERO"] = RangeSet(data=zero_blocks)
231 if nonzero_blocks:
232 self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)
233 if self.hashtree_info:
234 self.file_map["__HASHTREE"] = self.hashtree_info.hashtree_range
235
236 def __del__(self):
237 self._file.close()
238
239 def _GetRangeData(self, ranges):
Yifan Hong55988c42019-04-12 15:01:12 -0700240 # Use a lock to protect the generator so that we will not run two
241 # instances of this generator on the same object simultaneously.
242 with self.generator_lock:
243 for s, e in ranges:
244 self._file.seek(s * self.blocksize)
245 for _ in range(s, e):
246 yield self._file.read(self.blocksize)
Yifan Hong8a66a712019-04-04 15:37:57 -0700247
248 def RangeSha1(self, ranges):
249 h = sha1()
250 for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable
251 h.update(data)
252 return h.hexdigest()
253
254 def ReadRangeSet(self, ranges):
255 return list(self._GetRangeData(ranges))
256
257 def TotalSha1(self, include_clobbered_blocks=False):
258 assert not self.clobbered_blocks
259 return self.RangeSha1(self.care_map)
260
261 def WriteRangeDataToFd(self, ranges, fd):
262 for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable
263 fd.write(data)
264
265
Doug Zongkerfc44a512014-08-26 13:10:25 -0700266class Transfer(object):
Tao Bao183e56e2017-03-05 17:05:09 -0800267 def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, tgt_sha1,
268 src_sha1, style, by_id):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700269 self.tgt_name = tgt_name
270 self.src_name = src_name
271 self.tgt_ranges = tgt_ranges
272 self.src_ranges = src_ranges
Tao Bao183e56e2017-03-05 17:05:09 -0800273 self.tgt_sha1 = tgt_sha1
274 self.src_sha1 = src_sha1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700275 self.style = style
Tao Baob8c87172015-03-19 19:42:12 -0700276
277 # We use OrderedDict rather than dict so that the output is repeatable;
278 # otherwise it would depend on the hash values of the Transfer objects.
279 self.goes_before = OrderedDict()
280 self.goes_after = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700281
Doug Zongker62338182014-09-08 08:29:55 -0700282 self.stash_before = []
283 self.use_stash = []
284
Doug Zongkerfc44a512014-08-26 13:10:25 -0700285 self.id = len(by_id)
286 by_id.append(self)
287
xunchang21531832018-12-06 14:20:05 -0800288 self._patch_info = None
Tianjie Xu25366072017-09-08 17:19:02 -0700289
290 @property
xunchang21531832018-12-06 14:20:05 -0800291 def patch_info(self):
292 return self._patch_info
Tianjie Xu25366072017-09-08 17:19:02 -0700293
xunchang21531832018-12-06 14:20:05 -0800294 @patch_info.setter
295 def patch_info(self, info):
296 if info:
Tianjie Xu25366072017-09-08 17:19:02 -0700297 assert self.style == "diff"
xunchang21531832018-12-06 14:20:05 -0800298 self._patch_info = info
Tianjie Xu25366072017-09-08 17:19:02 -0700299
Doug Zongker62338182014-09-08 08:29:55 -0700300 def NetStashChange(self):
301 return (sum(sr.size() for (_, sr) in self.stash_before) -
302 sum(sr.size() for (_, sr) in self.use_stash))
303
Tao Bao82c47982015-08-17 09:45:13 -0700304 def ConvertToNew(self):
305 assert self.style != "new"
306 self.use_stash = []
307 self.style = "new"
308 self.src_ranges = RangeSet()
xunchang21531832018-12-06 14:20:05 -0800309 self.patch_info = None
Tao Bao82c47982015-08-17 09:45:13 -0700310
Doug Zongkerfc44a512014-08-26 13:10:25 -0700311 def __str__(self):
312 return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
313 " to " + str(self.tgt_ranges) + ">")
314
315
Doug Zongker6ab2a502016-02-09 08:28:09 -0800316@functools.total_ordering
317class HeapItem(object):
318 def __init__(self, item):
319 self.item = item
Tao Bao186ec992017-12-23 11:50:52 -0800320 # Negate the score since python's heap is a min-heap and we want the
321 # maximum score.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800322 self.score = -item.score
Tao Bao186ec992017-12-23 11:50:52 -0800323
Doug Zongker6ab2a502016-02-09 08:28:09 -0800324 def clear(self):
325 self.item = None
Tao Bao186ec992017-12-23 11:50:52 -0800326
Doug Zongker6ab2a502016-02-09 08:28:09 -0800327 def __bool__(self):
Tao Bao186ec992017-12-23 11:50:52 -0800328 return self.item is not None
329
330 # Python 2 uses __nonzero__, while Python 3 uses __bool__.
331 __nonzero__ = __bool__
332
333 # The rest operations are generated by functools.total_ordering decorator.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800334 def __eq__(self, other):
335 return self.score == other.score
Tao Bao186ec992017-12-23 11:50:52 -0800336
Doug Zongker6ab2a502016-02-09 08:28:09 -0800337 def __le__(self, other):
338 return self.score <= other.score
339
340
Tao Bao294651d2018-02-08 23:21:52 -0800341class ImgdiffStats(object):
342 """A class that collects imgdiff stats.
343
344 It keeps track of the files that will be applied imgdiff while generating
345 BlockImageDiff. It also logs the ones that cannot use imgdiff, with specific
346 reasons. The stats is only meaningful when imgdiff not being disabled by the
347 caller of BlockImageDiff. In addition, only files with supported types
348 (BlockImageDiff.FileTypeSupportedByImgdiff()) are allowed to be logged.
Tao Bao294651d2018-02-08 23:21:52 -0800349 """
350
351 USED_IMGDIFF = "APK files diff'd with imgdiff"
352 USED_IMGDIFF_LARGE_APK = "Large APK files split and diff'd with imgdiff"
353
354 # Reasons for not applying imgdiff on APKs.
Tao Bao294651d2018-02-08 23:21:52 -0800355 SKIPPED_NONMONOTONIC = "Not used imgdiff due to having non-monotonic ranges"
Tao Baoe709b092018-02-07 12:40:00 -0800356 SKIPPED_SHARED_BLOCKS = "Not used imgdiff due to using shared blocks"
Tao Bao4ccea852018-02-06 15:16:41 -0800357 SKIPPED_INCOMPLETE = "Not used imgdiff due to incomplete RangeSet"
Tao Bao294651d2018-02-08 23:21:52 -0800358
359 # The list of valid reasons, which will also be the dumped order in a report.
360 REASONS = (
361 USED_IMGDIFF,
362 USED_IMGDIFF_LARGE_APK,
Tao Bao294651d2018-02-08 23:21:52 -0800363 SKIPPED_NONMONOTONIC,
Tao Baoe709b092018-02-07 12:40:00 -0800364 SKIPPED_SHARED_BLOCKS,
Tao Bao4ccea852018-02-06 15:16:41 -0800365 SKIPPED_INCOMPLETE,
Tao Bao294651d2018-02-08 23:21:52 -0800366 )
367
368 def __init__(self):
369 self.stats = {}
370
371 def Log(self, filename, reason):
372 """Logs why imgdiff can or cannot be applied to the given filename.
373
374 Args:
375 filename: The filename string.
376 reason: One of the reason constants listed in REASONS.
377
378 Raises:
379 AssertionError: On unsupported filetypes or invalid reason.
380 """
381 assert BlockImageDiff.FileTypeSupportedByImgdiff(filename)
382 assert reason in self.REASONS
383
384 if reason not in self.stats:
385 self.stats[reason] = set()
386 self.stats[reason].add(filename)
387
388 def Report(self):
389 """Prints a report of the collected imgdiff stats."""
390
391 def print_header(header, separator):
Tao Bao32fcdab2018-10-12 10:30:39 -0700392 logger.info(header)
Tao Baob8131202019-06-19 14:15:34 -0700393 logger.info('%s\n', separator * len(header))
Tao Bao294651d2018-02-08 23:21:52 -0800394
395 print_header(' Imgdiff Stats Report ', '=')
396 for key in self.REASONS:
397 if key not in self.stats:
398 continue
399 values = self.stats[key]
400 section_header = ' {} (count: {}) '.format(key, len(values))
401 print_header(section_header, '-')
Tao Bao32fcdab2018-10-12 10:30:39 -0700402 logger.info(''.join([' {}\n'.format(name) for name in values]))
Tao Bao294651d2018-02-08 23:21:52 -0800403
404
Doug Zongkerfc44a512014-08-26 13:10:25 -0700405class BlockImageDiff(object):
Tao Bao76def242017-11-21 09:25:31 -0800406 """Generates the diff of two block image objects.
407
408 BlockImageDiff works on two image objects. An image object is anything that
409 provides the following attributes:
410
411 blocksize: the size in bytes of a block, currently must be 4096.
412
413 total_blocks: the total size of the partition/image, in blocks.
414
415 care_map: a RangeSet containing which blocks (in the range [0,
416 total_blocks) we actually care about; i.e. which blocks contain data.
417
418 file_map: a dict that partitions the blocks contained in care_map into
419 smaller domains that are useful for doing diffs on. (Typically a domain
420 is a file, and the key in file_map is the pathname.)
421
422 clobbered_blocks: a RangeSet containing which blocks contain data but may
423 be altered by the FS. They need to be excluded when verifying the
424 partition integrity.
425
426 ReadRangeSet(): a function that takes a RangeSet and returns the data
427 contained in the image blocks of that RangeSet. The data is returned as
428 a list or tuple of strings; concatenating the elements together should
429 produce the requested data. Implementations are free to break up the
430 data into list/tuple elements in any way that is convenient.
431
432 RangeSha1(): a function that returns (as a hex string) the SHA-1 hash of
433 all the data in the specified range.
434
435 TotalSha1(): a function that returns (as a hex string) the SHA-1 hash of
436 all the data in the image (ie, all the blocks in the care_map minus
437 clobbered_blocks, or including the clobbered blocks if
438 include_clobbered_blocks is True).
439
440 When creating a BlockImageDiff, the src image may be None, in which case the
441 list of transfers produced will never read from the original image.
442 """
443
Tao Bao293fd132016-06-11 12:19:23 -0700444 def __init__(self, tgt, src=None, threads=None, version=4,
445 disable_imgdiff=False):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700446 if threads is None:
447 threads = multiprocessing.cpu_count() // 2
Dan Albert8b72aef2015-03-23 19:13:21 -0700448 if threads == 0:
449 threads = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700450 self.threads = threads
Doug Zongker62338182014-09-08 08:29:55 -0700451 self.version = version
Dan Albert8b72aef2015-03-23 19:13:21 -0700452 self.transfers = []
453 self.src_basenames = {}
454 self.src_numpatterns = {}
Tao Baob4cfca52016-02-04 14:26:02 -0800455 self._max_stashed_size = 0
Tao Baod522bdc2016-04-12 15:53:16 -0700456 self.touched_src_ranges = RangeSet()
457 self.touched_src_sha1 = None
Tao Bao293fd132016-06-11 12:19:23 -0700458 self.disable_imgdiff = disable_imgdiff
Tao Bao294651d2018-02-08 23:21:52 -0800459 self.imgdiff_stats = ImgdiffStats() if not disable_imgdiff else None
Doug Zongker62338182014-09-08 08:29:55 -0700460
Tao Bao8fad03e2017-03-01 14:36:26 -0800461 assert version in (3, 4)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700462
463 self.tgt = tgt
464 if src is None:
465 src = EmptyImage()
466 self.src = src
467
468 # The updater code that installs the patch always uses 4k blocks.
469 assert tgt.blocksize == 4096
470 assert src.blocksize == 4096
471
472 # The range sets in each filemap should comprise a partition of
473 # the care map.
474 self.AssertPartition(src.care_map, src.file_map.values())
475 self.AssertPartition(tgt.care_map, tgt.file_map.values())
476
Tao Baob4cfca52016-02-04 14:26:02 -0800477 @property
478 def max_stashed_size(self):
479 return self._max_stashed_size
480
Tao Baocb73aed2018-01-31 17:32:40 -0800481 @staticmethod
482 def FileTypeSupportedByImgdiff(filename):
483 """Returns whether the file type is supported by imgdiff."""
484 return filename.lower().endswith(('.apk', '.jar', '.zip'))
485
Tao Bao294651d2018-02-08 23:21:52 -0800486 def CanUseImgdiff(self, name, tgt_ranges, src_ranges, large_apk=False):
Tao Baocb73aed2018-01-31 17:32:40 -0800487 """Checks whether we can apply imgdiff for the given RangeSets.
488
489 For files in ZIP format (e.g., APKs, JARs, etc.) we would like to use
490 'imgdiff -z' if possible. Because it usually produces significantly smaller
491 patches than bsdiff.
492
493 This is permissible if all of the following conditions hold.
494 - The imgdiff hasn't been disabled by the caller (e.g. squashfs);
495 - The file type is supported by imgdiff;
496 - The source and target blocks are monotonic (i.e. the data is stored with
497 blocks in increasing order);
Tao Baoe709b092018-02-07 12:40:00 -0800498 - Both files don't contain shared blocks;
Tao Bao4ccea852018-02-06 15:16:41 -0800499 - Both files have complete lists of blocks;
Tao Baocb73aed2018-01-31 17:32:40 -0800500 - We haven't removed any blocks from the source set.
501
502 If all these conditions are satisfied, concatenating all the blocks in the
503 RangeSet in order will produce a valid ZIP file (plus possibly extra zeros
504 in the last block). imgdiff is fine with extra zeros at the end of the file.
505
506 Args:
507 name: The filename to be diff'd.
508 tgt_ranges: The target RangeSet.
509 src_ranges: The source RangeSet.
Tao Bao294651d2018-02-08 23:21:52 -0800510 large_apk: Whether this is to split a large APK.
Tao Baocb73aed2018-01-31 17:32:40 -0800511
512 Returns:
513 A boolean result.
514 """
Tao Bao508b0872018-02-09 13:44:43 -0800515 if self.disable_imgdiff or not self.FileTypeSupportedByImgdiff(name):
Tao Bao294651d2018-02-08 23:21:52 -0800516 return False
517
518 if not tgt_ranges.monotonic or not src_ranges.monotonic:
519 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_NONMONOTONIC)
520 return False
521
Tao Baoe709b092018-02-07 12:40:00 -0800522 if (tgt_ranges.extra.get('uses_shared_blocks') or
523 src_ranges.extra.get('uses_shared_blocks')):
524 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_SHARED_BLOCKS)
525 return False
526
Tao Bao4ccea852018-02-06 15:16:41 -0800527 if tgt_ranges.extra.get('incomplete') or src_ranges.extra.get('incomplete'):
528 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_INCOMPLETE)
529 return False
530
Tao Bao294651d2018-02-08 23:21:52 -0800531 reason = (ImgdiffStats.USED_IMGDIFF_LARGE_APK if large_apk
532 else ImgdiffStats.USED_IMGDIFF)
533 self.imgdiff_stats.Log(name, reason)
534 return True
Tao Baocb73aed2018-01-31 17:32:40 -0800535
Doug Zongkerfc44a512014-08-26 13:10:25 -0700536 def Compute(self, prefix):
537 # When looking for a source file to use as the diff input for a
538 # target file, we try:
539 # 1) an exact path match if available, otherwise
540 # 2) a exact basename match if available, otherwise
541 # 3) a basename match after all runs of digits are replaced by
542 # "#" if available, otherwise
543 # 4) we have no source for this target.
544 self.AbbreviateSourceNames()
545 self.FindTransfers()
546
xunchang21531832018-12-06 14:20:05 -0800547 self.FindSequenceForTransfers()
Doug Zongker62338182014-09-08 08:29:55 -0700548
Tao Bao82c47982015-08-17 09:45:13 -0700549 # Ensure the runtime stash size is under the limit.
Tao Bao8fad03e2017-03-01 14:36:26 -0800550 if common.OPTIONS.cache_size is not None:
xunchang3df4d5e2018-12-06 15:03:45 -0800551 stash_limit = (common.OPTIONS.cache_size *
552 common.OPTIONS.stash_threshold / self.tgt.blocksize)
553 # Ignore the stash limit and calculate the maximum simultaneously stashed
554 # blocks needed.
555 _, max_stashed_blocks = self.ReviseStashSize(ignore_stash_limit=True)
556
557 # We cannot stash more blocks than the stash limit simultaneously. As a
558 # result, some 'diff' commands will be converted to new; leading to an
559 # unintended large package. To mitigate this issue, we can carefully
560 # choose the transfers for conversion. The number '1024' can be further
561 # tweaked here to balance the package size and build time.
562 if max_stashed_blocks > stash_limit + 1024:
xunchangb6105dc2018-12-06 16:39:46 -0800563 self.SelectAndConvertDiffTransfersToNew(
564 max_stashed_blocks - stash_limit)
xunchang3df4d5e2018-12-06 15:03:45 -0800565 # Regenerate the sequence as the graph has changed.
566 self.FindSequenceForTransfers()
567
568 # Revise the stash size again to keep the size under limit.
Tao Bao82c47982015-08-17 09:45:13 -0700569 self.ReviseStashSize()
570
Doug Zongkerfc44a512014-08-26 13:10:25 -0700571 # Double-check our work.
572 self.AssertSequenceGood()
Tianjie Xu8a7ed9f2018-01-23 14:06:11 -0800573 self.AssertSha1Good()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700574
575 self.ComputePatches(prefix)
576 self.WriteTransfers(prefix)
577
Tao Bao294651d2018-02-08 23:21:52 -0800578 # Report the imgdiff stats.
Tao Bao32fcdab2018-10-12 10:30:39 -0700579 if not self.disable_imgdiff:
Tao Bao294651d2018-02-08 23:21:52 -0800580 self.imgdiff_stats.Report()
581
Doug Zongkerfc44a512014-08-26 13:10:25 -0700582 def WriteTransfers(self, prefix):
Tianjie Xu37e29302016-06-23 16:10:35 -0700583 def WriteSplitTransfers(out, style, target_blocks):
584 """Limit the size of operand in command 'new' and 'zero' to 1024 blocks.
Tianjie Xubb848c52016-06-21 15:54:09 -0700585
586 This prevents the target size of one command from being too large; and
587 might help to avoid fsync errors on some devices."""
588
Tao Bao3a2e3502016-12-28 09:14:39 -0800589 assert style == "new" or style == "zero"
Tianjie Xu37e29302016-06-23 16:10:35 -0700590 blocks_limit = 1024
Tianjie Xubb848c52016-06-21 15:54:09 -0700591 total = 0
Tianjie Xu37e29302016-06-23 16:10:35 -0700592 while target_blocks:
593 blocks_to_write = target_blocks.first(blocks_limit)
594 out.append("%s %s\n" % (style, blocks_to_write.to_string_raw()))
595 total += blocks_to_write.size()
596 target_blocks = target_blocks.subtract(blocks_to_write)
Tianjie Xubb848c52016-06-21 15:54:09 -0700597 return total
598
Doug Zongkerfc44a512014-08-26 13:10:25 -0700599 out = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700600 total = 0
Doug Zongkerfc44a512014-08-26 13:10:25 -0700601
Tao Bao3a2e3502016-12-28 09:14:39 -0800602 # In BBOTA v3+, it uses the hash of the stashed blocks as the stash slot
603 # id. 'stashes' records the map from 'hash' to the ref count. The stash
604 # will be freed only if the count decrements to zero.
Doug Zongker62338182014-09-08 08:29:55 -0700605 stashes = {}
606 stashed_blocks = 0
607 max_stashed_blocks = 0
608
Doug Zongkerfc44a512014-08-26 13:10:25 -0700609 for xf in self.transfers:
610
Tao Bao8fad03e2017-03-01 14:36:26 -0800611 for _, sr in xf.stash_before:
612 sh = self.src.RangeSha1(sr)
613 if sh in stashes:
614 stashes[sh] += 1
Sami Tolvanendd67a292014-12-09 16:40:34 +0000615 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800616 stashes[sh] = 1
617 stashed_blocks += sr.size()
618 self.touched_src_ranges = self.touched_src_ranges.union(sr)
619 out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
Doug Zongker62338182014-09-08 08:29:55 -0700620
621 if stashed_blocks > max_stashed_blocks:
622 max_stashed_blocks = stashed_blocks
623
Jesse Zhao7b985f62015-03-02 16:53:08 -0800624 free_string = []
caozhiyuan21b37d82015-10-21 15:14:03 +0800625 free_size = 0
Jesse Zhao7b985f62015-03-02 16:53:08 -0800626
Tao Bao8fad03e2017-03-01 14:36:26 -0800627 # <# blocks> <src ranges>
628 # OR
629 # <# blocks> <src ranges> <src locs> <stash refs...>
630 # OR
631 # <# blocks> - <stash refs...>
Doug Zongker62338182014-09-08 08:29:55 -0700632
Tao Bao8fad03e2017-03-01 14:36:26 -0800633 size = xf.src_ranges.size()
Tao Bao508b0872018-02-09 13:44:43 -0800634 src_str_buffer = [str(size)]
Doug Zongker62338182014-09-08 08:29:55 -0700635
Tao Bao8fad03e2017-03-01 14:36:26 -0800636 unstashed_src_ranges = xf.src_ranges
637 mapped_stashes = []
638 for _, sr in xf.use_stash:
639 unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
640 sh = self.src.RangeSha1(sr)
641 sr = xf.src_ranges.map_within(sr)
642 mapped_stashes.append(sr)
643 assert sh in stashes
Tao Bao508b0872018-02-09 13:44:43 -0800644 src_str_buffer.append("%s:%s" % (sh, sr.to_string_raw()))
Tao Bao8fad03e2017-03-01 14:36:26 -0800645 stashes[sh] -= 1
646 if stashes[sh] == 0:
647 free_string.append("free %s\n" % (sh,))
648 free_size += sr.size()
649 stashes.pop(sh)
Doug Zongker62338182014-09-08 08:29:55 -0700650
Tao Bao8fad03e2017-03-01 14:36:26 -0800651 if unstashed_src_ranges:
Tao Bao508b0872018-02-09 13:44:43 -0800652 src_str_buffer.insert(1, unstashed_src_ranges.to_string_raw())
Tao Bao8fad03e2017-03-01 14:36:26 -0800653 if xf.use_stash:
654 mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
Tao Bao508b0872018-02-09 13:44:43 -0800655 src_str_buffer.insert(2, mapped_unstashed.to_string_raw())
Tao Bao8fad03e2017-03-01 14:36:26 -0800656 mapped_stashes.append(mapped_unstashed)
Doug Zongker62338182014-09-08 08:29:55 -0700657 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Tao Bao8fad03e2017-03-01 14:36:26 -0800658 else:
Tao Bao508b0872018-02-09 13:44:43 -0800659 src_str_buffer.insert(1, "-")
Tao Bao8fad03e2017-03-01 14:36:26 -0800660 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Doug Zongker62338182014-09-08 08:29:55 -0700661
Tao Bao508b0872018-02-09 13:44:43 -0800662 src_str = " ".join(src_str_buffer)
Doug Zongker62338182014-09-08 08:29:55 -0700663
Tao Bao8fad03e2017-03-01 14:36:26 -0800664 # version 3+:
Doug Zongker62338182014-09-08 08:29:55 -0700665 # zero <rangeset>
666 # new <rangeset>
667 # erase <rangeset>
Dan Albert8b72aef2015-03-23 19:13:21 -0700668 # bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
669 # imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
670 # move hash <tgt rangeset> <src_str>
Doug Zongkerfc44a512014-08-26 13:10:25 -0700671
672 tgt_size = xf.tgt_ranges.size()
673
674 if xf.style == "new":
675 assert xf.tgt_ranges
Tianjie Xu37e29302016-06-23 16:10:35 -0700676 assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700677 total += tgt_size
678 elif xf.style == "move":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700679 assert xf.tgt_ranges
680 assert xf.src_ranges.size() == tgt_size
681 if xf.src_ranges != xf.tgt_ranges:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100682 # take into account automatic stashing of overlapping blocks
683 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700684 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100685 if temp_stash_usage > max_stashed_blocks:
686 max_stashed_blocks = temp_stash_usage
687
Tao Baod522bdc2016-04-12 15:53:16 -0700688 self.touched_src_ranges = self.touched_src_ranges.union(
689 xf.src_ranges)
690
Tao Bao8fad03e2017-03-01 14:36:26 -0800691 out.append("%s %s %s %s\n" % (
Sami Tolvanendd67a292014-12-09 16:40:34 +0000692 xf.style,
Tao Bao183e56e2017-03-05 17:05:09 -0800693 xf.tgt_sha1,
Dan Albert8b72aef2015-03-23 19:13:21 -0700694 xf.tgt_ranges.to_string_raw(), src_str))
Tao Bao8fad03e2017-03-01 14:36:26 -0800695 total += tgt_size
696 elif xf.style in ("bsdiff", "imgdiff"):
697 assert xf.tgt_ranges
698 assert xf.src_ranges
699 # take into account automatic stashing of overlapping blocks
700 if xf.src_ranges.overlaps(xf.tgt_ranges):
701 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
702 if temp_stash_usage > max_stashed_blocks:
703 max_stashed_blocks = temp_stash_usage
704
705 self.touched_src_ranges = self.touched_src_ranges.union(xf.src_ranges)
706
707 out.append("%s %d %d %s %s %s %s\n" % (
708 xf.style,
709 xf.patch_start, xf.patch_len,
710 xf.src_sha1,
711 xf.tgt_sha1,
712 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700713 total += tgt_size
714 elif xf.style == "zero":
715 assert xf.tgt_ranges
716 to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
Tianjie Xu37e29302016-06-23 16:10:35 -0700717 assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size()
Tianjie Xubb848c52016-06-21 15:54:09 -0700718 total += to_zero.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700719 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700720 raise ValueError("unknown transfer style '%s'\n" % xf.style)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700721
Sami Tolvanendd67a292014-12-09 16:40:34 +0000722 if free_string:
723 out.append("".join(free_string))
caozhiyuan21b37d82015-10-21 15:14:03 +0800724 stashed_blocks -= free_size
Sami Tolvanendd67a292014-12-09 16:40:34 +0000725
Tao Bao8fad03e2017-03-01 14:36:26 -0800726 if common.OPTIONS.cache_size is not None:
Tao Bao8dcf7382015-05-21 14:09:49 -0700727 # Sanity check: abort if we're going to need more stash space than
728 # the allowed size (cache_size * threshold). There are two purposes
729 # of having a threshold here. a) Part of the cache may have been
730 # occupied by some recovery logs. b) It will buy us some time to deal
731 # with the oversize issue.
732 cache_size = common.OPTIONS.cache_size
733 stash_threshold = common.OPTIONS.stash_threshold
734 max_allowed = cache_size * stash_threshold
Tao Baoe8c68a02017-02-26 10:48:11 -0800735 assert max_stashed_blocks * self.tgt.blocksize <= max_allowed, \
Tao Bao8dcf7382015-05-21 14:09:49 -0700736 'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
737 max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
738 self.tgt.blocksize, max_allowed, cache_size,
739 stash_threshold)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700740
Tao Bao8fad03e2017-03-01 14:36:26 -0800741 self.touched_src_sha1 = self.src.RangeSha1(self.touched_src_ranges)
Tao Baod522bdc2016-04-12 15:53:16 -0700742
Tianjie Xu67c7cbb2018-08-30 00:32:07 -0700743 if self.tgt.hashtree_info:
744 out.append("compute_hash_tree {} {} {} {} {}\n".format(
745 self.tgt.hashtree_info.hashtree_range.to_string_raw(),
746 self.tgt.hashtree_info.filesystem_range.to_string_raw(),
747 self.tgt.hashtree_info.hash_algorithm,
748 self.tgt.hashtree_info.salt,
749 self.tgt.hashtree_info.root_hash))
750
Tao Baoe9b61912015-07-09 17:37:49 -0700751 # Zero out extended blocks as a workaround for bug 20881595.
752 if self.tgt.extended:
Tianjie Xu37e29302016-06-23 16:10:35 -0700753 assert (WriteSplitTransfers(out, "zero", self.tgt.extended) ==
Tianjie Xubb848c52016-06-21 15:54:09 -0700754 self.tgt.extended.size())
Tao Baob32d56e2015-09-09 11:55:01 -0700755 total += self.tgt.extended.size()
Tao Baoe9b61912015-07-09 17:37:49 -0700756
757 # We erase all the blocks on the partition that a) don't contain useful
Tao Bao66f1fa62016-05-03 10:02:01 -0700758 # data in the new image; b) will not be touched by dm-verity. Out of those
759 # blocks, we erase the ones that won't be used in this update at the
760 # beginning of an update. The rest would be erased at the end. This is to
761 # work around the eMMC issue observed on some devices, which may otherwise
762 # get starving for clean blocks and thus fail the update. (b/28347095)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700763 all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
Tao Baoe9b61912015-07-09 17:37:49 -0700764 all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
765 new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
Tao Bao66f1fa62016-05-03 10:02:01 -0700766
767 erase_first = new_dontcare.subtract(self.touched_src_ranges)
768 if erase_first:
769 out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))
770
771 erase_last = new_dontcare.subtract(erase_first)
772 if erase_last:
773 out.append("erase %s\n" % (erase_last.to_string_raw(),))
Doug Zongkere985f6f2014-09-09 12:38:47 -0700774
775 out.insert(0, "%d\n" % (self.version,)) # format version number
Tao Baob32d56e2015-09-09 11:55:01 -0700776 out.insert(1, "%d\n" % (total,))
Tao Bao8fad03e2017-03-01 14:36:26 -0800777 # v3+: the number of stash slots is unused.
778 out.insert(2, "0\n")
779 out.insert(3, str(max_stashed_blocks) + "\n")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700780
Tao Baob8131202019-06-19 14:15:34 -0700781 with open(prefix + ".transfer.list", "w") as f:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700782 for i in out:
783 f.write(i)
784
Tao Bao8fad03e2017-03-01 14:36:26 -0800785 self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize
786 OPTIONS = common.OPTIONS
787 if OPTIONS.cache_size is not None:
788 max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
Tao Bao32fcdab2018-10-12 10:30:39 -0700789 logger.info(
790 "max stashed blocks: %d (%d bytes), limit: %d bytes (%.2f%%)\n",
791 max_stashed_blocks, self._max_stashed_size, max_allowed,
792 self._max_stashed_size * 100.0 / max_allowed)
Tao Bao8fad03e2017-03-01 14:36:26 -0800793 else:
Tao Bao32fcdab2018-10-12 10:30:39 -0700794 logger.info(
795 "max stashed blocks: %d (%d bytes), limit: <unknown>\n",
796 max_stashed_blocks, self._max_stashed_size)
Doug Zongker62338182014-09-08 08:29:55 -0700797
xunchang3df4d5e2018-12-06 15:03:45 -0800798 def ReviseStashSize(self, ignore_stash_limit=False):
799 """ Revises the transfers to keep the stash size within the size limit.
800
801 Iterates through the transfer list and calculates the stash size each
802 transfer generates. Converts the affected transfers to new if we reach the
803 stash limit.
804
805 Args:
806 ignore_stash_limit: Ignores the stash limit and calculates the max
807 simultaneous stashed blocks instead. No change will be made to the
808 transfer list with this flag.
809
810 Return:
811 A tuple of (tgt blocks converted to new, max stashed blocks)
812 """
Tao Bao32fcdab2018-10-12 10:30:39 -0700813 logger.info("Revising stash size...")
Tao Baoe27acfd2016-12-16 11:13:55 -0800814 stash_map = {}
Tao Bao82c47982015-08-17 09:45:13 -0700815
816 # Create the map between a stash and its def/use points. For example, for a
Tao Bao3c5a16d2017-02-13 11:42:50 -0800817 # given stash of (raw_id, sr), stash_map[raw_id] = (sr, def_cmd, use_cmd).
Tao Bao82c47982015-08-17 09:45:13 -0700818 for xf in self.transfers:
819 # Command xf defines (stores) all the stashes in stash_before.
Tao Bao3a2e3502016-12-28 09:14:39 -0800820 for stash_raw_id, sr in xf.stash_before:
821 stash_map[stash_raw_id] = (sr, xf)
Tao Bao82c47982015-08-17 09:45:13 -0700822
823 # Record all the stashes command xf uses.
Tao Bao3a2e3502016-12-28 09:14:39 -0800824 for stash_raw_id, _ in xf.use_stash:
825 stash_map[stash_raw_id] += (xf,)
Tao Bao82c47982015-08-17 09:45:13 -0700826
xunchang3df4d5e2018-12-06 15:03:45 -0800827 max_allowed_blocks = None
828 if not ignore_stash_limit:
829 # Compute the maximum blocks available for stash based on /cache size and
830 # the threshold.
831 cache_size = common.OPTIONS.cache_size
832 stash_threshold = common.OPTIONS.stash_threshold
833 max_allowed_blocks = cache_size * stash_threshold / self.tgt.blocksize
Tao Bao82c47982015-08-17 09:45:13 -0700834
Tao Bao3a2e3502016-12-28 09:14:39 -0800835 # See the comments for 'stashes' in WriteTransfers().
Tao Baoe27acfd2016-12-16 11:13:55 -0800836 stashes = {}
Tao Bao82c47982015-08-17 09:45:13 -0700837 stashed_blocks = 0
Tao Bao9a5caf22015-08-25 15:10:10 -0700838 new_blocks = 0
xunchang3df4d5e2018-12-06 15:03:45 -0800839 max_stashed_blocks = 0
Tao Bao82c47982015-08-17 09:45:13 -0700840
841 # Now go through all the commands. Compute the required stash size on the
842 # fly. If a command requires excess stash than available, it deletes the
843 # stash by replacing the command that uses the stash with a "new" command
844 # instead.
845 for xf in self.transfers:
846 replaced_cmds = []
847
848 # xf.stash_before generates explicit stash commands.
Tao Bao3a2e3502016-12-28 09:14:39 -0800849 for stash_raw_id, sr in xf.stash_before:
Tao Baoe27acfd2016-12-16 11:13:55 -0800850 # Check the post-command stashed_blocks.
851 stashed_blocks_after = stashed_blocks
Tao Bao8fad03e2017-03-01 14:36:26 -0800852 sh = self.src.RangeSha1(sr)
853 if sh not in stashes:
Tao Baoe27acfd2016-12-16 11:13:55 -0800854 stashed_blocks_after += sr.size()
Tao Baoe27acfd2016-12-16 11:13:55 -0800855
xunchang3df4d5e2018-12-06 15:03:45 -0800856 if max_allowed_blocks and stashed_blocks_after > max_allowed_blocks:
Tao Bao82c47982015-08-17 09:45:13 -0700857 # We cannot stash this one for a later command. Find out the command
858 # that will use this stash and replace the command with "new".
Tao Bao3a2e3502016-12-28 09:14:39 -0800859 use_cmd = stash_map[stash_raw_id][2]
Tao Bao82c47982015-08-17 09:45:13 -0700860 replaced_cmds.append(use_cmd)
Tao Bao32fcdab2018-10-12 10:30:39 -0700861 logger.info("%10d %9s %s", sr.size(), "explicit", use_cmd)
Tao Bao82c47982015-08-17 09:45:13 -0700862 else:
Tao Bao3c5a16d2017-02-13 11:42:50 -0800863 # Update the stashes map.
Tao Bao8fad03e2017-03-01 14:36:26 -0800864 if sh in stashes:
865 stashes[sh] += 1
Tao Bao3c5a16d2017-02-13 11:42:50 -0800866 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800867 stashes[sh] = 1
Tao Baoe27acfd2016-12-16 11:13:55 -0800868 stashed_blocks = stashed_blocks_after
xunchang3df4d5e2018-12-06 15:03:45 -0800869 max_stashed_blocks = max(max_stashed_blocks, stashed_blocks)
Tao Bao82c47982015-08-17 09:45:13 -0700870
871 # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
872 # ComputePatches(), they both have the style of "diff".
Tao Bao8fad03e2017-03-01 14:36:26 -0800873 if xf.style == "diff":
Tao Bao82c47982015-08-17 09:45:13 -0700874 assert xf.tgt_ranges and xf.src_ranges
875 if xf.src_ranges.overlaps(xf.tgt_ranges):
xunchang3df4d5e2018-12-06 15:03:45 -0800876 if (max_allowed_blocks and
877 stashed_blocks + xf.src_ranges.size() > max_allowed_blocks):
Tao Bao82c47982015-08-17 09:45:13 -0700878 replaced_cmds.append(xf)
Tao Bao32fcdab2018-10-12 10:30:39 -0700879 logger.info("%10d %9s %s", xf.src_ranges.size(), "implicit", xf)
xunchang3df4d5e2018-12-06 15:03:45 -0800880 else:
881 # The whole source ranges will be stashed for implicit stashes.
882 max_stashed_blocks = max(max_stashed_blocks,
883 stashed_blocks + xf.src_ranges.size())
Tao Bao82c47982015-08-17 09:45:13 -0700884
885 # Replace the commands in replaced_cmds with "new"s.
886 for cmd in replaced_cmds:
887 # It no longer uses any commands in "use_stash". Remove the def points
888 # for all those stashes.
Tao Bao3a2e3502016-12-28 09:14:39 -0800889 for stash_raw_id, sr in cmd.use_stash:
890 def_cmd = stash_map[stash_raw_id][1]
891 assert (stash_raw_id, sr) in def_cmd.stash_before
892 def_cmd.stash_before.remove((stash_raw_id, sr))
Tao Bao82c47982015-08-17 09:45:13 -0700893
Tianjie Xuebe39a02016-01-14 14:12:26 -0800894 # Add up blocks that violates space limit and print total number to
895 # screen later.
896 new_blocks += cmd.tgt_ranges.size()
Tao Bao82c47982015-08-17 09:45:13 -0700897 cmd.ConvertToNew()
898
Tao Bao3a2e3502016-12-28 09:14:39 -0800899 # xf.use_stash may generate free commands.
Tao Bao8fad03e2017-03-01 14:36:26 -0800900 for _, sr in xf.use_stash:
901 sh = self.src.RangeSha1(sr)
902 assert sh in stashes
903 stashes[sh] -= 1
904 if stashes[sh] == 0:
Tao Baoe27acfd2016-12-16 11:13:55 -0800905 stashed_blocks -= sr.size()
Tao Bao8fad03e2017-03-01 14:36:26 -0800906 stashes.pop(sh)
Tao Baoe27acfd2016-12-16 11:13:55 -0800907
Tianjie Xuebe39a02016-01-14 14:12:26 -0800908 num_of_bytes = new_blocks * self.tgt.blocksize
Tao Bao32fcdab2018-10-12 10:30:39 -0700909 logger.info(
910 " Total %d blocks (%d bytes) are packed as new blocks due to "
xunchangb6105dc2018-12-06 16:39:46 -0800911 "insufficient cache size. Maximum blocks stashed simultaneously: %d",
912 new_blocks, num_of_bytes, max_stashed_blocks)
xunchang3df4d5e2018-12-06 15:03:45 -0800913 return new_blocks, max_stashed_blocks
Tao Bao9a5caf22015-08-25 15:10:10 -0700914
Doug Zongkerfc44a512014-08-26 13:10:25 -0700915 def ComputePatches(self, prefix):
Tao Bao32fcdab2018-10-12 10:30:39 -0700916 logger.info("Reticulating splines...")
Tao Bao183e56e2017-03-05 17:05:09 -0800917 diff_queue = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700918 patch_num = 0
919 with open(prefix + ".new.dat", "wb") as new_f:
Tao Bao183e56e2017-03-05 17:05:09 -0800920 for index, xf in enumerate(self.transfers):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700921 if xf.style == "zero":
Tao Bao08c85832016-09-19 22:26:30 -0700922 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao32fcdab2018-10-12 10:30:39 -0700923 logger.info(
924 "%10d %10d (%6.2f%%) %7s %s %s", tgt_size, tgt_size, 100.0,
925 xf.style, xf.tgt_name, str(xf.tgt_ranges))
Tao Bao08c85832016-09-19 22:26:30 -0700926
Doug Zongkerfc44a512014-08-26 13:10:25 -0700927 elif xf.style == "new":
Tao Bao183e56e2017-03-05 17:05:09 -0800928 self.tgt.WriteRangeDataToFd(xf.tgt_ranges, new_f)
Tao Bao08c85832016-09-19 22:26:30 -0700929 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao32fcdab2018-10-12 10:30:39 -0700930 logger.info(
931 "%10d %10d (%6.2f%%) %7s %s %s", tgt_size, tgt_size, 100.0,
932 xf.style, xf.tgt_name, str(xf.tgt_ranges))
Tao Bao08c85832016-09-19 22:26:30 -0700933
Doug Zongkerfc44a512014-08-26 13:10:25 -0700934 elif xf.style == "diff":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700935 # We can't compare src and tgt directly because they may have
936 # the same content but be broken up into blocks differently, eg:
937 #
938 # ["he", "llo"] vs ["h", "ello"]
939 #
940 # We want those to compare equal, ideally without having to
941 # actually concatenate the strings (these may be tens of
942 # megabytes).
Tao Bao183e56e2017-03-05 17:05:09 -0800943 if xf.src_sha1 == xf.tgt_sha1:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700944 # These are identical; we don't need to generate a patch,
945 # just issue copy commands on the device.
946 xf.style = "move"
xunchang21531832018-12-06 14:20:05 -0800947 xf.patch_info = None
Tao Bao183e56e2017-03-05 17:05:09 -0800948 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao08c85832016-09-19 22:26:30 -0700949 if xf.src_ranges != xf.tgt_ranges:
Tao Bao32fcdab2018-10-12 10:30:39 -0700950 logger.info(
951 "%10d %10d (%6.2f%%) %7s %s %s (from %s)", tgt_size, tgt_size,
952 100.0, xf.style,
Tao Bao08c85832016-09-19 22:26:30 -0700953 xf.tgt_name if xf.tgt_name == xf.src_name else (
954 xf.tgt_name + " (from " + xf.src_name + ")"),
Tao Bao32fcdab2018-10-12 10:30:39 -0700955 str(xf.tgt_ranges), str(xf.src_ranges))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700956 else:
xunchang21531832018-12-06 14:20:05 -0800957 if xf.patch_info:
958 # We have already generated the patch (e.g. during split of large
959 # APKs or reduction of stash size)
960 imgdiff = xf.patch_info.imgdiff
Tianjie Xu25366072017-09-08 17:19:02 -0700961 else:
Tao Baocb73aed2018-01-31 17:32:40 -0800962 imgdiff = self.CanUseImgdiff(
963 xf.tgt_name, xf.tgt_ranges, xf.src_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700964 xf.style = "imgdiff" if imgdiff else "bsdiff"
Tao Bao183e56e2017-03-05 17:05:09 -0800965 diff_queue.append((index, imgdiff, patch_num))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700966 patch_num += 1
967
968 else:
969 assert False, "unknown style " + xf.style
970
xunchang21531832018-12-06 14:20:05 -0800971 patches = self.ComputePatchesForInputList(diff_queue, False)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700972
Tao Bao183e56e2017-03-05 17:05:09 -0800973 offset = 0
974 with open(prefix + ".patch.dat", "wb") as patch_fd:
xunchang21531832018-12-06 14:20:05 -0800975 for index, patch_info, _ in patches:
Tao Bao183e56e2017-03-05 17:05:09 -0800976 xf = self.transfers[index]
xunchang21531832018-12-06 14:20:05 -0800977 xf.patch_len = len(patch_info.content)
Tao Bao183e56e2017-03-05 17:05:09 -0800978 xf.patch_start = offset
979 offset += xf.patch_len
xunchang21531832018-12-06 14:20:05 -0800980 patch_fd.write(patch_info.content)
Tao Bao183e56e2017-03-05 17:05:09 -0800981
Tao Bao32fcdab2018-10-12 10:30:39 -0700982 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
983 logger.info(
984 "%10d %10d (%6.2f%%) %7s %s %s %s", xf.patch_len, tgt_size,
985 xf.patch_len * 100.0 / tgt_size, xf.style,
986 xf.tgt_name if xf.tgt_name == xf.src_name else (
987 xf.tgt_name + " (from " + xf.src_name + ")"),
988 xf.tgt_ranges, xf.src_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700989
Tianjie Xu8a7ed9f2018-01-23 14:06:11 -0800990 def AssertSha1Good(self):
991 """Check the SHA-1 of the src & tgt blocks in the transfer list.
992
993 Double check the SHA-1 value to avoid the issue in b/71908713, where
994 SparseImage.RangeSha1() messed up with the hash calculation in multi-thread
995 environment. That specific problem has been fixed by protecting the
996 underlying generator function 'SparseImage._GetRangeData()' with lock.
997 """
998 for xf in self.transfers:
999 tgt_sha1 = self.tgt.RangeSha1(xf.tgt_ranges)
1000 assert xf.tgt_sha1 == tgt_sha1
1001 if xf.style == "diff":
1002 src_sha1 = self.src.RangeSha1(xf.src_ranges)
1003 assert xf.src_sha1 == src_sha1
1004
Doug Zongkerfc44a512014-08-26 13:10:25 -07001005 def AssertSequenceGood(self):
1006 # Simulate the sequences of transfers we will output, and check that:
1007 # - we never read a block after writing it, and
1008 # - we write every block we care about exactly once.
1009
1010 # Start with no blocks having been touched yet.
Tao Baob8131202019-06-19 14:15:34 -07001011 touched = array.array("B", b"\0" * self.tgt.total_blocks)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001012
1013 # Imagine processing the transfers in order.
1014 for xf in self.transfers:
1015 # Check that the input blocks for this transfer haven't yet been touched.
Doug Zongker62338182014-09-08 08:29:55 -07001016
1017 x = xf.src_ranges
Tao Bao8fad03e2017-03-01 14:36:26 -08001018 for _, sr in xf.use_stash:
1019 x = x.subtract(sr)
Doug Zongker62338182014-09-08 08:29:55 -07001020
Doug Zongker6ab2a502016-02-09 08:28:09 -08001021 for s, e in x:
Tao Baoff75c232016-03-04 15:23:34 -08001022 # Source image could be larger. Don't check the blocks that are in the
1023 # source image only. Since they are not in 'touched', and won't ever
1024 # be touched.
1025 for i in range(s, min(e, self.tgt.total_blocks)):
Doug Zongker6ab2a502016-02-09 08:28:09 -08001026 assert touched[i] == 0
1027
1028 # Check that the output blocks for this transfer haven't yet
1029 # been touched, and touch all the blocks written by this
1030 # transfer.
1031 for s, e in xf.tgt_ranges:
1032 for i in range(s, e):
1033 assert touched[i] == 0
1034 touched[i] = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -07001035
Tianjie Xu67c7cbb2018-08-30 00:32:07 -07001036 if self.tgt.hashtree_info:
1037 for s, e in self.tgt.hashtree_info.hashtree_range:
1038 for i in range(s, e):
1039 assert touched[i] == 0
1040 touched[i] = 1
1041
Doug Zongkerfc44a512014-08-26 13:10:25 -07001042 # Check that we've written every target block.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001043 for s, e in self.tgt.care_map:
1044 for i in range(s, e):
1045 assert touched[i] == 1
Doug Zongkerfc44a512014-08-26 13:10:25 -07001046
xunchang21531832018-12-06 14:20:05 -08001047 def FindSequenceForTransfers(self):
1048 """Finds a sequence for the given transfers.
1049
1050 The goal is to minimize the violation of order dependencies between these
1051 transfers, so that fewer blocks are stashed when applying the update.
1052 """
1053
1054 # Clear the existing dependency between transfers
1055 for xf in self.transfers:
1056 xf.goes_before = OrderedDict()
1057 xf.goes_after = OrderedDict()
1058
1059 xf.stash_before = []
1060 xf.use_stash = []
1061
1062 # Find the ordering dependencies among transfers (this is O(n^2)
1063 # in the number of transfers).
1064 self.GenerateDigraph()
1065 # Find a sequence of transfers that satisfies as many ordering
1066 # dependencies as possible (heuristically).
1067 self.FindVertexSequence()
1068 # Fix up the ordering dependencies that the sequence didn't
1069 # satisfy.
1070 self.ReverseBackwardEdges()
1071 self.ImproveVertexSequence()
1072
Doug Zongker62338182014-09-08 08:29:55 -07001073 def ImproveVertexSequence(self):
Tao Bao32fcdab2018-10-12 10:30:39 -07001074 logger.info("Improving vertex order...")
Doug Zongker62338182014-09-08 08:29:55 -07001075
1076 # At this point our digraph is acyclic; we reversed any edges that
1077 # were backwards in the heuristically-generated sequence. The
1078 # previously-generated order is still acceptable, but we hope to
1079 # find a better order that needs less memory for stashed data.
1080 # Now we do a topological sort to generate a new vertex order,
1081 # using a greedy algorithm to choose which vertex goes next
1082 # whenever we have a choice.
1083
1084 # Make a copy of the edge set; this copy will get destroyed by the
1085 # algorithm.
1086 for xf in self.transfers:
1087 xf.incoming = xf.goes_after.copy()
1088 xf.outgoing = xf.goes_before.copy()
1089
1090 L = [] # the new vertex order
1091
1092 # S is the set of sources in the remaining graph; we always choose
1093 # the one that leaves the least amount of stashed data after it's
1094 # executed.
1095 S = [(u.NetStashChange(), u.order, u) for u in self.transfers
1096 if not u.incoming]
1097 heapq.heapify(S)
1098
1099 while S:
1100 _, _, xf = heapq.heappop(S)
1101 L.append(xf)
1102 for u in xf.outgoing:
1103 del u.incoming[xf]
1104 if not u.incoming:
1105 heapq.heappush(S, (u.NetStashChange(), u.order, u))
1106
1107 # if this fails then our graph had a cycle.
1108 assert len(L) == len(self.transfers)
1109
1110 self.transfers = L
1111 for i, xf in enumerate(L):
1112 xf.order = i
1113
Doug Zongker62338182014-09-08 08:29:55 -07001114 def ReverseBackwardEdges(self):
Tao Bao3a2e3502016-12-28 09:14:39 -08001115 """Reverse unsatisfying edges and compute pairs of stashed blocks.
1116
1117 For each transfer, make sure it properly stashes the blocks it touches and
1118 will be used by later transfers. It uses pairs of (stash_raw_id, range) to
1119 record the blocks to be stashed. 'stash_raw_id' is an id that uniquely
1120 identifies each pair. Note that for the same range (e.g. RangeSet("1-5")),
1121 it is possible to have multiple pairs with different 'stash_raw_id's. Each
1122 'stash_raw_id' will be consumed by one transfer. In BBOTA v3+, identical
1123 blocks will be written to the same stash slot in WriteTransfers().
1124 """
1125
Tao Bao32fcdab2018-10-12 10:30:39 -07001126 logger.info("Reversing backward edges...")
Doug Zongker62338182014-09-08 08:29:55 -07001127 in_order = 0
1128 out_of_order = 0
Tao Bao3a2e3502016-12-28 09:14:39 -08001129 stash_raw_id = 0
Doug Zongker62338182014-09-08 08:29:55 -07001130 stash_size = 0
1131
1132 for xf in self.transfers:
Doug Zongker62338182014-09-08 08:29:55 -07001133 for u in xf.goes_before.copy():
1134 # xf should go before u
1135 if xf.order < u.order:
1136 # it does, hurray!
1137 in_order += 1
1138 else:
1139 # it doesn't, boo. modify u to stash the blocks that it
1140 # writes that xf wants to read, and then require u to go
1141 # before xf.
1142 out_of_order += 1
1143
1144 overlap = xf.src_ranges.intersect(u.tgt_ranges)
1145 assert overlap
1146
Tao Bao3a2e3502016-12-28 09:14:39 -08001147 u.stash_before.append((stash_raw_id, overlap))
1148 xf.use_stash.append((stash_raw_id, overlap))
1149 stash_raw_id += 1
Doug Zongker62338182014-09-08 08:29:55 -07001150 stash_size += overlap.size()
1151
1152 # reverse the edge direction; now xf must go after u
1153 del xf.goes_before[u]
1154 del u.goes_after[xf]
1155 xf.goes_after[u] = None # value doesn't matter
1156 u.goes_before[xf] = None
1157
Tao Bao32fcdab2018-10-12 10:30:39 -07001158 logger.info(
1159 " %d/%d dependencies (%.2f%%) were violated; %d source blocks "
1160 "stashed.", out_of_order, in_order + out_of_order,
1161 (out_of_order * 100.0 / (in_order + out_of_order)) if (
1162 in_order + out_of_order) else 0.0,
1163 stash_size)
Doug Zongker62338182014-09-08 08:29:55 -07001164
Doug Zongkerfc44a512014-08-26 13:10:25 -07001165 def FindVertexSequence(self):
Tao Bao32fcdab2018-10-12 10:30:39 -07001166 logger.info("Finding vertex sequence...")
Doug Zongkerfc44a512014-08-26 13:10:25 -07001167
1168 # This is based on "A Fast & Effective Heuristic for the Feedback
1169 # Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
1170 # it as starting with the digraph G and moving all the vertices to
1171 # be on a horizontal line in some order, trying to minimize the
1172 # number of edges that end up pointing to the left. Left-pointing
1173 # edges will get removed to turn the digraph into a DAG. In this
1174 # case each edge has a weight which is the number of source blocks
1175 # we'll lose if that edge is removed; we try to minimize the total
1176 # weight rather than just the number of edges.
1177
1178 # Make a copy of the edge set; this copy will get destroyed by the
1179 # algorithm.
1180 for xf in self.transfers:
1181 xf.incoming = xf.goes_after.copy()
1182 xf.outgoing = xf.goes_before.copy()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001183 xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())
Doug Zongkerfc44a512014-08-26 13:10:25 -07001184
1185 # We use an OrderedDict instead of just a set so that the output
1186 # is repeatable; otherwise it would depend on the hash values of
1187 # the transfer objects.
1188 G = OrderedDict()
1189 for xf in self.transfers:
1190 G[xf] = None
1191 s1 = deque() # the left side of the sequence, built from left to right
1192 s2 = deque() # the right side of the sequence, built from right to left
1193
Doug Zongker6ab2a502016-02-09 08:28:09 -08001194 heap = []
1195 for xf in self.transfers:
1196 xf.heap_item = HeapItem(xf)
1197 heap.append(xf.heap_item)
1198 heapq.heapify(heap)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001199
Tao Bao33482282016-10-24 16:49:08 -07001200 # Use OrderedDict() instead of set() to preserve the insertion order. Need
1201 # to use 'sinks[key] = None' to add key into the set. sinks will look like
1202 # { key1: None, key2: None, ... }.
1203 sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing)
1204 sources = OrderedDict.fromkeys(u for u in G if not u.incoming)
Doug Zongker6ab2a502016-02-09 08:28:09 -08001205
1206 def adjust_score(iu, delta):
1207 iu.score += delta
1208 iu.heap_item.clear()
1209 iu.heap_item = HeapItem(iu)
1210 heapq.heappush(heap, iu.heap_item)
1211
1212 while G:
Doug Zongkerfc44a512014-08-26 13:10:25 -07001213 # Put all sinks at the end of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001214 while sinks:
Tao Bao33482282016-10-24 16:49:08 -07001215 new_sinks = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001216 for u in sinks:
Tao Bao508b0872018-02-09 13:44:43 -08001217 if u not in G:
1218 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001219 s2.appendleft(u)
1220 del G[u]
1221 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001222 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001223 if not iu.outgoing:
1224 new_sinks[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001225 sinks = new_sinks
Doug Zongkerfc44a512014-08-26 13:10:25 -07001226
1227 # Put all the sources at the beginning of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001228 while sources:
Tao Bao33482282016-10-24 16:49:08 -07001229 new_sources = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001230 for u in sources:
Tao Bao508b0872018-02-09 13:44:43 -08001231 if u not in G:
1232 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001233 s1.append(u)
1234 del G[u]
1235 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001236 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001237 if not iu.incoming:
1238 new_sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001239 sources = new_sources
Doug Zongkerfc44a512014-08-26 13:10:25 -07001240
Tao Bao508b0872018-02-09 13:44:43 -08001241 if not G:
1242 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001243
1244 # Find the "best" vertex to put next. "Best" is the one that
1245 # maximizes the net difference in source blocks saved we get by
1246 # pretending it's a source rather than a sink.
1247
Doug Zongker6ab2a502016-02-09 08:28:09 -08001248 while True:
1249 u = heapq.heappop(heap)
1250 if u and u.item in G:
1251 u = u.item
1252 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001253
Doug Zongkerfc44a512014-08-26 13:10:25 -07001254 s1.append(u)
1255 del G[u]
1256 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001257 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001258 if not iu.incoming:
1259 sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001260
Doug Zongkerfc44a512014-08-26 13:10:25 -07001261 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001262 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001263 if not iu.outgoing:
1264 sinks[iu] = None
Doug Zongkerfc44a512014-08-26 13:10:25 -07001265
1266 # Now record the sequence in the 'order' field of each transfer,
1267 # and by rearranging self.transfers to be in the chosen sequence.
1268
1269 new_transfers = []
1270 for x in itertools.chain(s1, s2):
1271 x.order = len(new_transfers)
1272 new_transfers.append(x)
1273 del x.incoming
1274 del x.outgoing
1275
1276 self.transfers = new_transfers
1277
1278 def GenerateDigraph(self):
Tao Bao32fcdab2018-10-12 10:30:39 -07001279 logger.info("Generating digraph...")
Doug Zongker6ab2a502016-02-09 08:28:09 -08001280
1281 # Each item of source_ranges will be:
1282 # - None, if that block is not used as a source,
Tao Bao33482282016-10-24 16:49:08 -07001283 # - an ordered set of transfers.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001284 source_ranges = []
1285 for b in self.transfers:
1286 for s, e in b.src_ranges:
1287 if e > len(source_ranges):
1288 source_ranges.extend([None] * (e-len(source_ranges)))
1289 for i in range(s, e):
1290 if source_ranges[i] is None:
Tao Bao33482282016-10-24 16:49:08 -07001291 source_ranges[i] = OrderedDict.fromkeys([b])
Doug Zongker6ab2a502016-02-09 08:28:09 -08001292 else:
Tao Bao33482282016-10-24 16:49:08 -07001293 source_ranges[i][b] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001294
Doug Zongkerfc44a512014-08-26 13:10:25 -07001295 for a in self.transfers:
Tao Bao33482282016-10-24 16:49:08 -07001296 intersections = OrderedDict()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001297 for s, e in a.tgt_ranges:
1298 for i in range(s, e):
Tao Bao508b0872018-02-09 13:44:43 -08001299 if i >= len(source_ranges):
1300 break
Tao Bao33482282016-10-24 16:49:08 -07001301 # Add all the Transfers in source_ranges[i] to the (ordered) set.
1302 if source_ranges[i] is not None:
1303 for j in source_ranges[i]:
1304 intersections[j] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001305
1306 for b in intersections:
Tao Bao508b0872018-02-09 13:44:43 -08001307 if a is b:
1308 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001309
1310 # If the blocks written by A are read by B, then B needs to go before A.
1311 i = a.tgt_ranges.intersect(b.src_ranges)
1312 if i:
Doug Zongkerab7ca1d2014-08-26 10:40:28 -07001313 if b.src_name == "__ZERO":
1314 # the cost of removing source blocks for the __ZERO domain
1315 # is (nearly) zero.
1316 size = 0
1317 else:
1318 size = i.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001319 b.goes_before[a] = size
1320 a.goes_after[b] = size
1321
xunchang21531832018-12-06 14:20:05 -08001322 def ComputePatchesForInputList(self, diff_queue, compress_target):
1323 """Returns a list of patch information for the input list of transfers.
1324
1325 Args:
1326 diff_queue: a list of transfers with style 'diff'
1327 compress_target: If True, compresses the target ranges of each
1328 transfers; and save the size.
1329
1330 Returns:
1331 A list of (transfer order, patch_info, compressed_size) tuples.
1332 """
1333
1334 if not diff_queue:
1335 return []
1336
1337 if self.threads > 1:
1338 logger.info("Computing patches (using %d threads)...", self.threads)
1339 else:
1340 logger.info("Computing patches...")
1341
1342 diff_total = len(diff_queue)
1343 patches = [None] * diff_total
1344 error_messages = []
1345
1346 # Using multiprocessing doesn't give additional benefits, due to the
1347 # pattern of the code. The diffing work is done by subprocess.call, which
1348 # already runs in a separate process (not affected much by the GIL -
1349 # Global Interpreter Lock). Using multiprocess also requires either a)
1350 # writing the diff input files in the main process before forking, or b)
1351 # reopening the image file (SparseImage) in the worker processes. Doing
1352 # neither of them further improves the performance.
1353 lock = threading.Lock()
1354
1355 def diff_worker():
1356 while True:
1357 with lock:
1358 if not diff_queue:
1359 return
1360 xf_index, imgdiff, patch_index = diff_queue.pop()
1361 xf = self.transfers[xf_index]
1362
1363 message = []
1364 compressed_size = None
1365
1366 patch_info = xf.patch_info
1367 if not patch_info:
1368 src_file = common.MakeTempFile(prefix="src-")
1369 with open(src_file, "wb") as fd:
1370 self.src.WriteRangeDataToFd(xf.src_ranges, fd)
1371
1372 tgt_file = common.MakeTempFile(prefix="tgt-")
1373 with open(tgt_file, "wb") as fd:
1374 self.tgt.WriteRangeDataToFd(xf.tgt_ranges, fd)
1375
1376 try:
1377 patch_info = compute_patch(src_file, tgt_file, imgdiff)
1378 except ValueError as e:
1379 message.append(
1380 "Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (
1381 "imgdiff" if imgdiff else "bsdiff",
1382 xf.tgt_name if xf.tgt_name == xf.src_name else
1383 xf.tgt_name + " (from " + xf.src_name + ")",
1384 xf.tgt_ranges, xf.src_ranges, e.message))
1385
1386 if compress_target:
1387 tgt_data = self.tgt.ReadRangeSet(xf.tgt_ranges)
1388 try:
1389 # Compresses with the default level
1390 compress_obj = zlib.compressobj(6, zlib.DEFLATED, -zlib.MAX_WBITS)
1391 compressed_data = (compress_obj.compress("".join(tgt_data))
1392 + compress_obj.flush())
1393 compressed_size = len(compressed_data)
1394 except zlib.error as e:
1395 message.append(
1396 "Failed to compress the data in target range {} for {}:\n"
1397 "{}".format(xf.tgt_ranges, xf.tgt_name, e.message))
1398
1399 if message:
1400 with lock:
1401 error_messages.extend(message)
1402
1403 with lock:
1404 patches[patch_index] = (xf_index, patch_info, compressed_size)
1405
1406 threads = [threading.Thread(target=diff_worker)
1407 for _ in range(self.threads)]
1408 for th in threads:
1409 th.start()
1410 while threads:
1411 threads.pop().join()
1412
1413 if error_messages:
1414 logger.error('ERROR:')
1415 logger.error('\n'.join(error_messages))
1416 logger.error('\n\n\n')
1417 sys.exit(1)
1418
1419 return patches
1420
xunchangb6105dc2018-12-06 16:39:46 -08001421 def SelectAndConvertDiffTransfersToNew(self, violated_stash_blocks):
xunchang3df4d5e2018-12-06 15:03:45 -08001422 """Converts the diff transfers to reduce the max simultaneous stash.
1423
1424 Since the 'new' data is compressed with deflate, we can select the 'diff'
1425 transfers for conversion by comparing its patch size with the size of the
1426 compressed data. Ideally, we want to convert the transfers with a small
1427 size increase, but using a large number of stashed blocks.
1428 """
xunchangb6105dc2018-12-06 16:39:46 -08001429 TransferSizeScore = namedtuple("TransferSizeScore",
1430 "xf, used_stash_blocks, score")
xunchang3df4d5e2018-12-06 15:03:45 -08001431
1432 logger.info("Selecting diff commands to convert to new.")
1433 diff_queue = []
1434 for xf in self.transfers:
1435 if xf.style == "diff" and xf.src_sha1 != xf.tgt_sha1:
1436 use_imgdiff = self.CanUseImgdiff(xf.tgt_name, xf.tgt_ranges,
1437 xf.src_ranges)
1438 diff_queue.append((xf.order, use_imgdiff, len(diff_queue)))
1439
1440 # Remove the 'move' transfers, and compute the patch & compressed size
1441 # for the remaining.
1442 result = self.ComputePatchesForInputList(diff_queue, True)
1443
xunchangb6105dc2018-12-06 16:39:46 -08001444 conversion_candidates = []
xunchang3df4d5e2018-12-06 15:03:45 -08001445 for xf_index, patch_info, compressed_size in result:
1446 xf = self.transfers[xf_index]
1447 if not xf.patch_info:
1448 xf.patch_info = patch_info
1449
1450 size_ratio = len(xf.patch_info.content) * 100.0 / compressed_size
1451 diff_style = "imgdiff" if xf.patch_info.imgdiff else "bsdiff"
xunchangb6105dc2018-12-06 16:39:46 -08001452 logger.info("%s, target size: %d blocks, style: %s, patch size: %d,"
xunchang3df4d5e2018-12-06 15:03:45 -08001453 " compression_size: %d, ratio %.2f%%", xf.tgt_name,
1454 xf.tgt_ranges.size(), diff_style,
1455 len(xf.patch_info.content), compressed_size, size_ratio)
1456
xunchangb6105dc2018-12-06 16:39:46 -08001457 used_stash_blocks = sum(sr.size() for _, sr in xf.use_stash)
xunchang3df4d5e2018-12-06 15:03:45 -08001458 # Convert the transfer to new if the compressed size is smaller or equal.
1459 # We don't need to maintain the stash_before lists here because the
1460 # graph will be regenerated later.
1461 if len(xf.patch_info.content) >= compressed_size:
xunchangb6105dc2018-12-06 16:39:46 -08001462 # Add the transfer to the candidate list with negative score. And it
1463 # will be converted later.
1464 conversion_candidates.append(TransferSizeScore(xf, used_stash_blocks,
1465 -1))
1466 elif used_stash_blocks > 0:
1467 # This heuristic represents the size increase in the final package to
1468 # remove per unit of stashed data.
1469 score = ((compressed_size - len(xf.patch_info.content)) * 100.0
1470 / used_stash_blocks)
1471 conversion_candidates.append(TransferSizeScore(xf, used_stash_blocks,
1472 score))
1473 # Transfers with lower score (i.e. less expensive to convert) will be
1474 # converted first.
1475 conversion_candidates.sort(key=lambda x: x.score)
xunchang3df4d5e2018-12-06 15:03:45 -08001476
xunchangb6105dc2018-12-06 16:39:46 -08001477 # TODO(xunchang), improve the logic to find the transfers to convert, e.g.
1478 # convert the ones that contribute to the max stash, run ReviseStashSize
1479 # multiple times etc.
1480 removed_stashed_blocks = 0
1481 for xf, used_stash_blocks, _ in conversion_candidates:
1482 logger.info("Converting %s to new", xf.tgt_name)
1483 xf.ConvertToNew()
1484 removed_stashed_blocks += used_stash_blocks
1485 # Experiments show that we will get a smaller package size if we remove
1486 # slightly more stashed blocks than the violated stash blocks.
1487 if removed_stashed_blocks >= violated_stash_blocks:
1488 break
xunchang3df4d5e2018-12-06 15:03:45 -08001489
1490 logger.info("Removed %d stashed blocks", removed_stashed_blocks)
1491
Doug Zongkerfc44a512014-08-26 13:10:25 -07001492 def FindTransfers(self):
Tao Bao9a5caf22015-08-25 15:10:10 -07001493 """Parse the file_map to generate all the transfers."""
1494
Tianjie Xu41390d42017-11-22 11:35:18 -08001495 def AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
1496 src_ranges, style, by_id):
1497 """Add one or multiple Transfer()s by splitting large files.
1498
1499 For BBOTA v3, we need to stash source blocks for resumable feature.
1500 However, with the growth of file size and the shrink of the cache
1501 partition source blocks are too large to be stashed. If a file occupies
1502 too many blocks, we split it into smaller pieces by getting multiple
1503 Transfer()s.
1504
1505 The downside is that after splitting, we may increase the package size
1506 since the split pieces don't align well. According to our experiments,
1507 1/8 of the cache size as the per-piece limit appears to be optimal.
1508 Compared to the fixed 1024-block limit, it reduces the overall package
1509 size by 30% for volantis, and 20% for angler and bullhead."""
1510
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001511 pieces = 0
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001512 while (tgt_ranges.size() > max_blocks_per_transfer and
1513 src_ranges.size() > max_blocks_per_transfer):
Tao Bao9a5caf22015-08-25 15:10:10 -07001514 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1515 src_split_name = "%s-%d" % (src_name, pieces)
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001516 tgt_first = tgt_ranges.first(max_blocks_per_transfer)
1517 src_first = src_ranges.first(max_blocks_per_transfer)
1518
Tao Bao183e56e2017-03-05 17:05:09 -08001519 Transfer(tgt_split_name, src_split_name, tgt_first, src_first,
1520 self.tgt.RangeSha1(tgt_first), self.src.RangeSha1(src_first),
1521 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001522
1523 tgt_ranges = tgt_ranges.subtract(tgt_first)
1524 src_ranges = src_ranges.subtract(src_first)
1525 pieces += 1
1526
1527 # Handle remaining blocks.
1528 if tgt_ranges.size() or src_ranges.size():
1529 # Must be both non-empty.
1530 assert tgt_ranges.size() and src_ranges.size()
1531 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1532 src_split_name = "%s-%d" % (src_name, pieces)
Tao Bao183e56e2017-03-05 17:05:09 -08001533 Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges,
1534 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1535 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001536
Tianjie Xu41390d42017-11-22 11:35:18 -08001537 def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges, style,
1538 by_id):
1539 """Find all the zip files and split the others with a fixed chunk size.
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001540
Tianjie Xu41390d42017-11-22 11:35:18 -08001541 This function will construct a list of zip archives, which will later be
1542 split by imgdiff to reduce the final patch size. For the other files,
1543 we will plainly split them based on a fixed chunk size with the potential
1544 patch size penalty.
1545 """
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001546
1547 assert style == "diff"
1548
1549 # Change nothing for small files.
1550 if (tgt_ranges.size() <= max_blocks_per_transfer and
1551 src_ranges.size() <= max_blocks_per_transfer):
1552 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1553 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1554 style, by_id)
1555 return
1556
Tao Baocb73aed2018-01-31 17:32:40 -08001557 # Split large APKs with imgdiff, if possible. We're intentionally checking
1558 # file types one more time (CanUseImgdiff() checks that as well), before
1559 # calling the costly RangeSha1()s.
1560 if (self.FileTypeSupportedByImgdiff(tgt_name) and
1561 self.tgt.RangeSha1(tgt_ranges) != self.src.RangeSha1(src_ranges)):
Tao Bao294651d2018-02-08 23:21:52 -08001562 if self.CanUseImgdiff(tgt_name, tgt_ranges, src_ranges, True):
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001563 large_apks.append((tgt_name, src_name, tgt_ranges, src_ranges))
1564 return
1565
Tianjie Xu41390d42017-11-22 11:35:18 -08001566 AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
1567 src_ranges, style, by_id)
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001568
Tao Bao08c85832016-09-19 22:26:30 -07001569 def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
1570 split=False):
1571 """Wrapper function for adding a Transfer()."""
1572
1573 # We specialize diff transfers only (which covers bsdiff/imgdiff/move);
1574 # otherwise add the Transfer() as is.
1575 if style != "diff" or not split:
Tao Bao183e56e2017-03-05 17:05:09 -08001576 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1577 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1578 style, by_id)
Tao Bao08c85832016-09-19 22:26:30 -07001579 return
1580
1581 # Handle .odex files specially to analyze the block-wise difference. If
1582 # most of the blocks are identical with only few changes (e.g. header),
1583 # we will patch the changed blocks only. This avoids stashing unchanged
1584 # blocks while patching. We limit the analysis to files without size
1585 # changes only. This is to avoid sacrificing the OTA generation cost too
1586 # much.
1587 if (tgt_name.split(".")[-1].lower() == 'odex' and
1588 tgt_ranges.size() == src_ranges.size()):
1589
1590 # 0.5 threshold can be further tuned. The tradeoff is: if only very
1591 # few blocks remain identical, we lose the opportunity to use imgdiff
1592 # that may have better compression ratio than bsdiff.
1593 crop_threshold = 0.5
1594
1595 tgt_skipped = RangeSet()
1596 src_skipped = RangeSet()
1597 tgt_size = tgt_ranges.size()
1598 tgt_changed = 0
1599 for src_block, tgt_block in zip(src_ranges.next_item(),
1600 tgt_ranges.next_item()):
1601 src_rs = RangeSet(str(src_block))
1602 tgt_rs = RangeSet(str(tgt_block))
1603 if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):
1604 tgt_skipped = tgt_skipped.union(tgt_rs)
1605 src_skipped = src_skipped.union(src_rs)
1606 else:
1607 tgt_changed += tgt_rs.size()
1608
1609 # Terminate early if no clear sign of benefits.
1610 if tgt_changed > tgt_size * crop_threshold:
1611 break
1612
1613 if tgt_changed < tgt_size * crop_threshold:
1614 assert tgt_changed + tgt_skipped.size() == tgt_size
Tao Bao32fcdab2018-10-12 10:30:39 -07001615 logger.info(
1616 '%10d %10d (%6.2f%%) %s', tgt_skipped.size(), tgt_size,
1617 tgt_skipped.size() * 100.0 / tgt_size, tgt_name)
Tianjie Xu41390d42017-11-22 11:35:18 -08001618 AddSplitTransfers(
Tao Bao08c85832016-09-19 22:26:30 -07001619 "%s-skipped" % (tgt_name,),
1620 "%s-skipped" % (src_name,),
1621 tgt_skipped, src_skipped, style, by_id)
1622
1623 # Intentionally change the file extension to avoid being imgdiff'd as
1624 # the files are no longer in their original format.
1625 tgt_name = "%s-cropped" % (tgt_name,)
1626 src_name = "%s-cropped" % (src_name,)
1627 tgt_ranges = tgt_ranges.subtract(tgt_skipped)
1628 src_ranges = src_ranges.subtract(src_skipped)
1629
1630 # Possibly having no changed blocks.
1631 if not tgt_ranges:
1632 return
1633
1634 # Add the transfer(s).
Tianjie Xu41390d42017-11-22 11:35:18 -08001635 AddSplitTransfers(
Tao Bao08c85832016-09-19 22:26:30 -07001636 tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1637
Tianjie Xu25366072017-09-08 17:19:02 -07001638 def ParseAndValidateSplitInfo(patch_size, tgt_ranges, src_ranges,
1639 split_info):
1640 """Parse the split_info and return a list of info tuples.
1641
1642 Args:
1643 patch_size: total size of the patch file.
1644 tgt_ranges: Ranges of the target file within the original image.
1645 src_ranges: Ranges of the source file within the original image.
1646 split_info format:
1647 imgdiff version#
1648 count of pieces
1649 <patch_size_1> <tgt_size_1> <src_ranges_1>
1650 ...
1651 <patch_size_n> <tgt_size_n> <src_ranges_n>
1652
1653 Returns:
1654 [patch_start, patch_len, split_tgt_ranges, split_src_ranges]
1655 """
1656
1657 version = int(split_info[0])
1658 assert version == 2
1659 count = int(split_info[1])
1660 assert len(split_info) - 2 == count
1661
1662 split_info_list = []
1663 patch_start = 0
1664 tgt_remain = copy.deepcopy(tgt_ranges)
1665 # each line has the format <patch_size>, <tgt_size>, <src_ranges>
1666 for line in split_info[2:]:
1667 info = line.split()
1668 assert len(info) == 3
1669 patch_length = int(info[0])
1670
1671 split_tgt_size = int(info[1])
1672 assert split_tgt_size % 4096 == 0
Tao Baob8131202019-06-19 14:15:34 -07001673 assert split_tgt_size // 4096 <= tgt_remain.size()
1674 split_tgt_ranges = tgt_remain.first(split_tgt_size // 4096)
Tianjie Xu25366072017-09-08 17:19:02 -07001675 tgt_remain = tgt_remain.subtract(split_tgt_ranges)
1676
1677 # Find the split_src_ranges within the image file from its relative
1678 # position in file.
1679 split_src_indices = RangeSet.parse_raw(info[2])
1680 split_src_ranges = RangeSet()
1681 for r in split_src_indices:
1682 curr_range = src_ranges.first(r[1]).subtract(src_ranges.first(r[0]))
1683 assert not split_src_ranges.overlaps(curr_range)
1684 split_src_ranges = split_src_ranges.union(curr_range)
1685
1686 split_info_list.append((patch_start, patch_length,
1687 split_tgt_ranges, split_src_ranges))
1688 patch_start += patch_length
1689
1690 # Check that the sizes of all the split pieces add up to the final file
1691 # size for patch and target.
1692 assert tgt_remain.size() == 0
1693 assert patch_start == patch_size
1694 return split_info_list
1695
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001696 def SplitLargeApks():
1697 """Split the large apks files.
Tianjie Xu25366072017-09-08 17:19:02 -07001698
1699 Example: Chrome.apk will be split into
1700 src-0: Chrome.apk-0, tgt-0: Chrome.apk-0
1701 src-1: Chrome.apk-1, tgt-1: Chrome.apk-1
1702 ...
1703
1704 After the split, the target pieces are continuous and block aligned; and
1705 the source pieces are mutually exclusive. During the split, we also
1706 generate and save the image patch between src-X & tgt-X. This patch will
1707 be valid because the block ranges of src-X & tgt-X will always stay the
1708 same afterwards; but there's a chance we don't use the patch if we
1709 convert the "diff" command into "new" or "move" later.
1710 """
1711
1712 while True:
1713 with transfer_lock:
1714 if not large_apks:
1715 return
1716 tgt_name, src_name, tgt_ranges, src_ranges = large_apks.pop(0)
1717
1718 src_file = common.MakeTempFile(prefix="src-")
1719 tgt_file = common.MakeTempFile(prefix="tgt-")
Tianjie Xudf1166e2018-01-27 17:35:41 -08001720 with open(src_file, "wb") as src_fd:
1721 self.src.WriteRangeDataToFd(src_ranges, src_fd)
1722 with open(tgt_file, "wb") as tgt_fd:
1723 self.tgt.WriteRangeDataToFd(tgt_ranges, tgt_fd)
Tianjie Xu25366072017-09-08 17:19:02 -07001724
1725 patch_file = common.MakeTempFile(prefix="patch-")
1726 patch_info_file = common.MakeTempFile(prefix="split_info-")
1727 cmd = ["imgdiff", "-z",
1728 "--block-limit={}".format(max_blocks_per_transfer),
1729 "--split-info=" + patch_info_file,
1730 src_file, tgt_file, patch_file]
Tao Bao73dd4f42018-10-04 16:25:33 -07001731 proc = common.Run(cmd)
1732 imgdiff_output, _ = proc.communicate()
1733 assert proc.returncode == 0, \
Tao Bao4ccea852018-02-06 15:16:41 -08001734 "Failed to create imgdiff patch between {} and {}:\n{}".format(
1735 src_name, tgt_name, imgdiff_output)
Tianjie Xu25366072017-09-08 17:19:02 -07001736
1737 with open(patch_info_file) as patch_info:
1738 lines = patch_info.readlines()
1739
1740 patch_size_total = os.path.getsize(patch_file)
1741 split_info_list = ParseAndValidateSplitInfo(patch_size_total,
1742 tgt_ranges, src_ranges,
1743 lines)
1744 for index, (patch_start, patch_length, split_tgt_ranges,
Tao Bao508b0872018-02-09 13:44:43 -08001745 split_src_ranges) in enumerate(split_info_list):
Tao Baob8131202019-06-19 14:15:34 -07001746 with open(patch_file, 'rb') as f:
Tianjie Xu25366072017-09-08 17:19:02 -07001747 f.seek(patch_start)
1748 patch_content = f.read(patch_length)
1749
1750 split_src_name = "{}-{}".format(src_name, index)
1751 split_tgt_name = "{}-{}".format(tgt_name, index)
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001752 split_large_apks.append((split_tgt_name,
1753 split_src_name,
1754 split_tgt_ranges,
1755 split_src_ranges,
1756 patch_content))
Tianjie Xu25366072017-09-08 17:19:02 -07001757
Tao Bao32fcdab2018-10-12 10:30:39 -07001758 logger.info("Finding transfers...")
Tao Bao08c85832016-09-19 22:26:30 -07001759
Tianjie Xu25366072017-09-08 17:19:02 -07001760 large_apks = []
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001761 split_large_apks = []
Tianjie Xu25366072017-09-08 17:19:02 -07001762 cache_size = common.OPTIONS.cache_size
1763 split_threshold = 0.125
1764 max_blocks_per_transfer = int(cache_size * split_threshold /
1765 self.tgt.blocksize)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001766 empty = RangeSet()
Tianjie Xu20a86cd2018-01-12 12:21:00 -08001767 for tgt_fn, tgt_ranges in sorted(self.tgt.file_map.items()):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001768 if tgt_fn == "__ZERO":
1769 # the special "__ZERO" domain is all the blocks not contained
1770 # in any file and that are filled with zeros. We have a
1771 # special transfer style for zero blocks.
1772 src_ranges = self.src.file_map.get("__ZERO", empty)
Tao Bao9a5caf22015-08-25 15:10:10 -07001773 AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
1774 "zero", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001775 continue
1776
Tao Baoff777812015-05-12 11:42:31 -07001777 elif tgt_fn == "__COPY":
1778 # "__COPY" domain includes all the blocks not contained in any
1779 # file and that need to be copied unconditionally to the target.
Tao Bao9a5caf22015-08-25 15:10:10 -07001780 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Tao Baoff777812015-05-12 11:42:31 -07001781 continue
1782
Tianjie Xu67c7cbb2018-08-30 00:32:07 -07001783 elif tgt_fn == "__HASHTREE":
1784 continue
1785
Doug Zongkerfc44a512014-08-26 13:10:25 -07001786 elif tgt_fn in self.src.file_map:
1787 # Look for an exact pathname match in the source.
Tao Bao9a5caf22015-08-25 15:10:10 -07001788 AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001789 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001790 continue
1791
1792 b = os.path.basename(tgt_fn)
1793 if b in self.src_basenames:
1794 # Look for an exact basename match in the source.
1795 src_fn = self.src_basenames[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001796 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001797 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001798 continue
1799
1800 b = re.sub("[0-9]+", "#", b)
1801 if b in self.src_numpatterns:
1802 # Look for a 'number pattern' match (a basename match after
1803 # all runs of digits are replaced by "#"). (This is useful
1804 # for .so files that contain version numbers in the filename
1805 # that get bumped.)
1806 src_fn = self.src_numpatterns[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001807 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001808 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001809 continue
1810
Tao Bao9a5caf22015-08-25 15:10:10 -07001811 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001812
Tianjie Xu25366072017-09-08 17:19:02 -07001813 transfer_lock = threading.Lock()
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001814 threads = [threading.Thread(target=SplitLargeApks)
Tianjie Xu25366072017-09-08 17:19:02 -07001815 for _ in range(self.threads)]
1816 for th in threads:
1817 th.start()
1818 while threads:
1819 threads.pop().join()
1820
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001821 # Sort the split transfers for large apks to generate a determinate package.
1822 split_large_apks.sort()
1823 for (tgt_name, src_name, tgt_ranges, src_ranges,
1824 patch) in split_large_apks:
1825 transfer_split = Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1826 self.tgt.RangeSha1(tgt_ranges),
1827 self.src.RangeSha1(src_ranges),
1828 "diff", self.transfers)
xunchang21531832018-12-06 14:20:05 -08001829 transfer_split.patch_info = PatchInfo(True, patch)
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001830
Doug Zongkerfc44a512014-08-26 13:10:25 -07001831 def AbbreviateSourceNames(self):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001832 for k in self.src.file_map.keys():
1833 b = os.path.basename(k)
1834 self.src_basenames[b] = k
1835 b = re.sub("[0-9]+", "#", b)
1836 self.src_numpatterns[b] = k
1837
1838 @staticmethod
1839 def AssertPartition(total, seq):
1840 """Assert that all the RangeSets in 'seq' form a partition of the
1841 'total' RangeSet (ie, they are nonintersecting and their union
1842 equals 'total')."""
Doug Zongker6ab2a502016-02-09 08:28:09 -08001843
Doug Zongkerfc44a512014-08-26 13:10:25 -07001844 so_far = RangeSet()
1845 for i in seq:
1846 assert not so_far.overlaps(i)
1847 so_far = so_far.union(i)
1848 assert so_far == total