blob: 2e5f8041edd1eaf20b95d69f640d2b8de09e39b5 [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015from __future__ import print_function
16
Doug Zongker6ab2a502016-02-09 08:28:09 -080017import array
Tianjie Xu25366072017-09-08 17:19:02 -070018import copy
Doug Zongker6ab2a502016-02-09 08:28:09 -080019import functools
Doug Zongker62338182014-09-08 08:29:55 -070020import heapq
Doug Zongkerfc44a512014-08-26 13:10:25 -070021import itertools
Tao Bao32fcdab2018-10-12 10:30:39 -070022import logging
Doug Zongkerfc44a512014-08-26 13:10:25 -070023import multiprocessing
24import os
Tao Bao3a2e3502016-12-28 09:14:39 -080025import os.path
Doug Zongkerfc44a512014-08-26 13:10:25 -070026import re
Tao Bao183e56e2017-03-05 17:05:09 -080027import sys
Doug Zongkerfc44a512014-08-26 13:10:25 -070028import threading
xunchang21531832018-12-06 14:20:05 -080029import zlib
30from collections import deque, namedtuple, OrderedDict
Tao Bao3a2e3502016-12-28 09:14:39 -080031from hashlib import sha1
Tao Bao508b0872018-02-09 13:44:43 -080032
33import common
Dan Albert8b72aef2015-03-23 19:13:21 -070034from rangelib import RangeSet
35
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070036__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
37
Tao Bao32fcdab2018-10-12 10:30:39 -070038logger = logging.getLogger(__name__)
39
xunchang21531832018-12-06 14:20:05 -080040# The tuple contains the style and bytes of a bsdiff|imgdiff patch.
41PatchInfo = namedtuple("PatchInfo", ["imgdiff", "content"])
42
Dan Albert8b72aef2015-03-23 19:13:21 -070043
Tao Bao183e56e2017-03-05 17:05:09 -080044def compute_patch(srcfile, tgtfile, imgdiff=False):
xunchang21531832018-12-06 14:20:05 -080045 """Calls bsdiff|imgdiff to compute the patch data, returns a PatchInfo."""
Tianjie Xub59c17f2016-10-28 17:55:53 -070046 patchfile = common.MakeTempFile(prefix='patch-')
Doug Zongkerfc44a512014-08-26 13:10:25 -070047
Tianjie Xub59c17f2016-10-28 17:55:53 -070048 cmd = ['imgdiff', '-z'] if imgdiff else ['bsdiff']
49 cmd.extend([srcfile, tgtfile, patchfile])
Doug Zongkerfc44a512014-08-26 13:10:25 -070050
Tao Bao39451582017-05-04 11:10:47 -070051 # Don't dump the bsdiff/imgdiff commands, which are not useful for the case
52 # here, since they contain temp filenames only.
Tao Bao73dd4f42018-10-04 16:25:33 -070053 proc = common.Run(cmd, verbose=False)
54 output, _ = proc.communicate()
Doug Zongkerfc44a512014-08-26 13:10:25 -070055
Tao Bao73dd4f42018-10-04 16:25:33 -070056 if proc.returncode != 0:
Tianjie Xub59c17f2016-10-28 17:55:53 -070057 raise ValueError(output)
58
59 with open(patchfile, 'rb') as f:
xunchang21531832018-12-06 14:20:05 -080060 return PatchInfo(imgdiff, f.read())
Doug Zongkerfc44a512014-08-26 13:10:25 -070061
Dan Albert8b72aef2015-03-23 19:13:21 -070062
63class Image(object):
Tao Bao183e56e2017-03-05 17:05:09 -080064 def RangeSha1(self, ranges):
65 raise NotImplementedError
66
Dan Albert8b72aef2015-03-23 19:13:21 -070067 def ReadRangeSet(self, ranges):
68 raise NotImplementedError
69
Tao Bao68658c02015-06-01 13:40:49 -070070 def TotalSha1(self, include_clobbered_blocks=False):
Dan Albert8b72aef2015-03-23 19:13:21 -070071 raise NotImplementedError
72
Tao Bao183e56e2017-03-05 17:05:09 -080073 def WriteRangeDataToFd(self, ranges, fd):
74 raise NotImplementedError
75
Dan Albert8b72aef2015-03-23 19:13:21 -070076
77class EmptyImage(Image):
Doug Zongkerfc44a512014-08-26 13:10:25 -070078 """A zero-length image."""
Tao Bao183e56e2017-03-05 17:05:09 -080079
80 def __init__(self):
81 self.blocksize = 4096
82 self.care_map = RangeSet()
83 self.clobbered_blocks = RangeSet()
84 self.extended = RangeSet()
85 self.total_blocks = 0
86 self.file_map = {}
Yifan Hongbb2658d2019-01-25 12:30:58 -080087 self.hashtree_info = None
Tao Bao183e56e2017-03-05 17:05:09 -080088
89 def RangeSha1(self, ranges):
90 return sha1().hexdigest()
91
Doug Zongkerfc44a512014-08-26 13:10:25 -070092 def ReadRangeSet(self, ranges):
93 return ()
Tao Bao183e56e2017-03-05 17:05:09 -080094
Tao Bao68658c02015-06-01 13:40:49 -070095 def TotalSha1(self, include_clobbered_blocks=False):
96 # EmptyImage always carries empty clobbered_blocks, so
97 # include_clobbered_blocks can be ignored.
98 assert self.clobbered_blocks.size() == 0
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070099 return sha1().hexdigest()
100
Tao Bao183e56e2017-03-05 17:05:09 -0800101 def WriteRangeDataToFd(self, ranges, fd):
102 raise ValueError("Can't write data from EmptyImage to file")
103
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700104
Dan Albert8b72aef2015-03-23 19:13:21 -0700105class DataImage(Image):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700106 """An image wrapped around a single string of data."""
107
108 def __init__(self, data, trim=False, pad=False):
109 self.data = data
110 self.blocksize = 4096
111
112 assert not (trim and pad)
113
114 partial = len(self.data) % self.blocksize
Tao Bao7589e962015-09-05 20:35:32 -0700115 padded = False
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700116 if partial > 0:
117 if trim:
118 self.data = self.data[:-partial]
119 elif pad:
120 self.data += '\0' * (self.blocksize - partial)
Tao Bao7589e962015-09-05 20:35:32 -0700121 padded = True
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700122 else:
123 raise ValueError(("data for DataImage must be multiple of %d bytes "
124 "unless trim or pad is specified") %
125 (self.blocksize,))
126
127 assert len(self.data) % self.blocksize == 0
128
129 self.total_blocks = len(self.data) / self.blocksize
130 self.care_map = RangeSet(data=(0, self.total_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700131 # When the last block is padded, we always write the whole block even for
132 # incremental OTAs. Because otherwise the last block may get skipped if
133 # unchanged for an incremental, but would fail the post-install
134 # verification if it has non-zero contents in the padding bytes.
135 # Bug: 23828506
136 if padded:
Tao Bao42206c32015-09-08 13:39:40 -0700137 clobbered_blocks = [self.total_blocks-1, self.total_blocks]
Tao Bao7589e962015-09-05 20:35:32 -0700138 else:
Tao Bao42206c32015-09-08 13:39:40 -0700139 clobbered_blocks = []
140 self.clobbered_blocks = clobbered_blocks
Tao Baoe9b61912015-07-09 17:37:49 -0700141 self.extended = RangeSet()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700142
143 zero_blocks = []
144 nonzero_blocks = []
145 reference = '\0' * self.blocksize
146
Tao Bao7589e962015-09-05 20:35:32 -0700147 for i in range(self.total_blocks-1 if padded else self.total_blocks):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700148 d = self.data[i*self.blocksize : (i+1)*self.blocksize]
149 if d == reference:
150 zero_blocks.append(i)
151 zero_blocks.append(i+1)
152 else:
153 nonzero_blocks.append(i)
154 nonzero_blocks.append(i+1)
155
Tao Bao42206c32015-09-08 13:39:40 -0700156 assert zero_blocks or nonzero_blocks or clobbered_blocks
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700157
Tao Bao42206c32015-09-08 13:39:40 -0700158 self.file_map = dict()
159 if zero_blocks:
160 self.file_map["__ZERO"] = RangeSet(data=zero_blocks)
161 if nonzero_blocks:
162 self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)
163 if clobbered_blocks:
164 self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)
Tao Bao7589e962015-09-05 20:35:32 -0700165
Tao Bao183e56e2017-03-05 17:05:09 -0800166 def _GetRangeData(self, ranges):
167 for s, e in ranges:
168 yield self.data[s*self.blocksize:e*self.blocksize]
169
170 def RangeSha1(self, ranges):
171 h = sha1()
Tao Bao76def242017-11-21 09:25:31 -0800172 for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable
Tao Bao183e56e2017-03-05 17:05:09 -0800173 h.update(data)
174 return h.hexdigest()
175
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700176 def ReadRangeSet(self, ranges):
Yifan Hong6f3eaeb2019-04-09 16:49:33 -0700177 return list(self._GetRangeData(ranges))
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700178
Tao Bao68658c02015-06-01 13:40:49 -0700179 def TotalSha1(self, include_clobbered_blocks=False):
Tao Bao7589e962015-09-05 20:35:32 -0700180 if not include_clobbered_blocks:
Tao Bao183e56e2017-03-05 17:05:09 -0800181 return self.RangeSha1(self.care_map.subtract(self.clobbered_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700182 else:
183 return sha1(self.data).hexdigest()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700184
Tao Bao183e56e2017-03-05 17:05:09 -0800185 def WriteRangeDataToFd(self, ranges, fd):
Tao Bao76def242017-11-21 09:25:31 -0800186 for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable
Tao Bao183e56e2017-03-05 17:05:09 -0800187 fd.write(data)
188
Doug Zongkerfc44a512014-08-26 13:10:25 -0700189
Yifan Hong8a66a712019-04-04 15:37:57 -0700190class FileImage(Image):
191 """An image wrapped around a raw image file."""
192
193 def __init__(self, path, hashtree_info_generator=None):
194 self.path = path
195 self.blocksize = 4096
196 self._file_size = os.path.getsize(self.path)
197 self._file = open(self.path, 'r')
198
199 if self._file_size % self.blocksize != 0:
200 raise ValueError("Size of file %s must be multiple of %d bytes, but is %d"
201 % self.path, self.blocksize, self._file_size)
202
203 self.total_blocks = self._file_size / self.blocksize
204 self.care_map = RangeSet(data=(0, self.total_blocks))
205 self.clobbered_blocks = RangeSet()
206 self.extended = RangeSet()
207
208 self.hashtree_info = None
209 if hashtree_info_generator:
210 self.hashtree_info = hashtree_info_generator.Generate(self)
211
212 zero_blocks = []
213 nonzero_blocks = []
214 reference = '\0' * self.blocksize
215
216 for i in range(self.total_blocks):
217 d = self._file.read(self.blocksize)
218 if d == reference:
219 zero_blocks.append(i)
220 zero_blocks.append(i+1)
221 else:
222 nonzero_blocks.append(i)
223 nonzero_blocks.append(i+1)
224
225 assert zero_blocks or nonzero_blocks
226
227 self.file_map = {}
228 if zero_blocks:
229 self.file_map["__ZERO"] = RangeSet(data=zero_blocks)
230 if nonzero_blocks:
231 self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)
232 if self.hashtree_info:
233 self.file_map["__HASHTREE"] = self.hashtree_info.hashtree_range
234
235 def __del__(self):
236 self._file.close()
237
238 def _GetRangeData(self, ranges):
239 for s, e in ranges:
240 self._file.seek(s * self.blocksize)
241 for _ in range(s, e):
242 yield self._file.read(self.blocksize)
243
244 def RangeSha1(self, ranges):
245 h = sha1()
246 for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable
247 h.update(data)
248 return h.hexdigest()
249
250 def ReadRangeSet(self, ranges):
251 return list(self._GetRangeData(ranges))
252
253 def TotalSha1(self, include_clobbered_blocks=False):
254 assert not self.clobbered_blocks
255 return self.RangeSha1(self.care_map)
256
257 def WriteRangeDataToFd(self, ranges, fd):
258 for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable
259 fd.write(data)
260
261
Doug Zongkerfc44a512014-08-26 13:10:25 -0700262class Transfer(object):
Tao Bao183e56e2017-03-05 17:05:09 -0800263 def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, tgt_sha1,
264 src_sha1, style, by_id):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700265 self.tgt_name = tgt_name
266 self.src_name = src_name
267 self.tgt_ranges = tgt_ranges
268 self.src_ranges = src_ranges
Tao Bao183e56e2017-03-05 17:05:09 -0800269 self.tgt_sha1 = tgt_sha1
270 self.src_sha1 = src_sha1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700271 self.style = style
Tao Baob8c87172015-03-19 19:42:12 -0700272
273 # We use OrderedDict rather than dict so that the output is repeatable;
274 # otherwise it would depend on the hash values of the Transfer objects.
275 self.goes_before = OrderedDict()
276 self.goes_after = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700277
Doug Zongker62338182014-09-08 08:29:55 -0700278 self.stash_before = []
279 self.use_stash = []
280
Doug Zongkerfc44a512014-08-26 13:10:25 -0700281 self.id = len(by_id)
282 by_id.append(self)
283
xunchang21531832018-12-06 14:20:05 -0800284 self._patch_info = None
Tianjie Xu25366072017-09-08 17:19:02 -0700285
286 @property
xunchang21531832018-12-06 14:20:05 -0800287 def patch_info(self):
288 return self._patch_info
Tianjie Xu25366072017-09-08 17:19:02 -0700289
xunchang21531832018-12-06 14:20:05 -0800290 @patch_info.setter
291 def patch_info(self, info):
292 if info:
Tianjie Xu25366072017-09-08 17:19:02 -0700293 assert self.style == "diff"
xunchang21531832018-12-06 14:20:05 -0800294 self._patch_info = info
Tianjie Xu25366072017-09-08 17:19:02 -0700295
Doug Zongker62338182014-09-08 08:29:55 -0700296 def NetStashChange(self):
297 return (sum(sr.size() for (_, sr) in self.stash_before) -
298 sum(sr.size() for (_, sr) in self.use_stash))
299
Tao Bao82c47982015-08-17 09:45:13 -0700300 def ConvertToNew(self):
301 assert self.style != "new"
302 self.use_stash = []
303 self.style = "new"
304 self.src_ranges = RangeSet()
xunchang21531832018-12-06 14:20:05 -0800305 self.patch_info = None
Tao Bao82c47982015-08-17 09:45:13 -0700306
Doug Zongkerfc44a512014-08-26 13:10:25 -0700307 def __str__(self):
308 return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
309 " to " + str(self.tgt_ranges) + ">")
310
311
Doug Zongker6ab2a502016-02-09 08:28:09 -0800312@functools.total_ordering
313class HeapItem(object):
314 def __init__(self, item):
315 self.item = item
Tao Bao186ec992017-12-23 11:50:52 -0800316 # Negate the score since python's heap is a min-heap and we want the
317 # maximum score.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800318 self.score = -item.score
Tao Bao186ec992017-12-23 11:50:52 -0800319
Doug Zongker6ab2a502016-02-09 08:28:09 -0800320 def clear(self):
321 self.item = None
Tao Bao186ec992017-12-23 11:50:52 -0800322
Doug Zongker6ab2a502016-02-09 08:28:09 -0800323 def __bool__(self):
Tao Bao186ec992017-12-23 11:50:52 -0800324 return self.item is not None
325
326 # Python 2 uses __nonzero__, while Python 3 uses __bool__.
327 __nonzero__ = __bool__
328
329 # The rest operations are generated by functools.total_ordering decorator.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800330 def __eq__(self, other):
331 return self.score == other.score
Tao Bao186ec992017-12-23 11:50:52 -0800332
Doug Zongker6ab2a502016-02-09 08:28:09 -0800333 def __le__(self, other):
334 return self.score <= other.score
335
336
Tao Bao294651d2018-02-08 23:21:52 -0800337class ImgdiffStats(object):
338 """A class that collects imgdiff stats.
339
340 It keeps track of the files that will be applied imgdiff while generating
341 BlockImageDiff. It also logs the ones that cannot use imgdiff, with specific
342 reasons. The stats is only meaningful when imgdiff not being disabled by the
343 caller of BlockImageDiff. In addition, only files with supported types
344 (BlockImageDiff.FileTypeSupportedByImgdiff()) are allowed to be logged.
Tao Bao294651d2018-02-08 23:21:52 -0800345 """
346
347 USED_IMGDIFF = "APK files diff'd with imgdiff"
348 USED_IMGDIFF_LARGE_APK = "Large APK files split and diff'd with imgdiff"
349
350 # Reasons for not applying imgdiff on APKs.
Tao Bao294651d2018-02-08 23:21:52 -0800351 SKIPPED_NONMONOTONIC = "Not used imgdiff due to having non-monotonic ranges"
Tao Baoe709b092018-02-07 12:40:00 -0800352 SKIPPED_SHARED_BLOCKS = "Not used imgdiff due to using shared blocks"
Tao Bao4ccea852018-02-06 15:16:41 -0800353 SKIPPED_INCOMPLETE = "Not used imgdiff due to incomplete RangeSet"
Tao Bao294651d2018-02-08 23:21:52 -0800354
355 # The list of valid reasons, which will also be the dumped order in a report.
356 REASONS = (
357 USED_IMGDIFF,
358 USED_IMGDIFF_LARGE_APK,
Tao Bao294651d2018-02-08 23:21:52 -0800359 SKIPPED_NONMONOTONIC,
Tao Baoe709b092018-02-07 12:40:00 -0800360 SKIPPED_SHARED_BLOCKS,
Tao Bao4ccea852018-02-06 15:16:41 -0800361 SKIPPED_INCOMPLETE,
Tao Bao294651d2018-02-08 23:21:52 -0800362 )
363
364 def __init__(self):
365 self.stats = {}
366
367 def Log(self, filename, reason):
368 """Logs why imgdiff can or cannot be applied to the given filename.
369
370 Args:
371 filename: The filename string.
372 reason: One of the reason constants listed in REASONS.
373
374 Raises:
375 AssertionError: On unsupported filetypes or invalid reason.
376 """
377 assert BlockImageDiff.FileTypeSupportedByImgdiff(filename)
378 assert reason in self.REASONS
379
380 if reason not in self.stats:
381 self.stats[reason] = set()
382 self.stats[reason].add(filename)
383
384 def Report(self):
385 """Prints a report of the collected imgdiff stats."""
386
387 def print_header(header, separator):
Tao Bao32fcdab2018-10-12 10:30:39 -0700388 logger.info(header)
389 logger.info(separator * len(header) + '\n')
Tao Bao294651d2018-02-08 23:21:52 -0800390
391 print_header(' Imgdiff Stats Report ', '=')
392 for key in self.REASONS:
393 if key not in self.stats:
394 continue
395 values = self.stats[key]
396 section_header = ' {} (count: {}) '.format(key, len(values))
397 print_header(section_header, '-')
Tao Bao32fcdab2018-10-12 10:30:39 -0700398 logger.info(''.join([' {}\n'.format(name) for name in values]))
Tao Bao294651d2018-02-08 23:21:52 -0800399
400
Doug Zongkerfc44a512014-08-26 13:10:25 -0700401class BlockImageDiff(object):
Tao Bao76def242017-11-21 09:25:31 -0800402 """Generates the diff of two block image objects.
403
404 BlockImageDiff works on two image objects. An image object is anything that
405 provides the following attributes:
406
407 blocksize: the size in bytes of a block, currently must be 4096.
408
409 total_blocks: the total size of the partition/image, in blocks.
410
411 care_map: a RangeSet containing which blocks (in the range [0,
412 total_blocks) we actually care about; i.e. which blocks contain data.
413
414 file_map: a dict that partitions the blocks contained in care_map into
415 smaller domains that are useful for doing diffs on. (Typically a domain
416 is a file, and the key in file_map is the pathname.)
417
418 clobbered_blocks: a RangeSet containing which blocks contain data but may
419 be altered by the FS. They need to be excluded when verifying the
420 partition integrity.
421
422 ReadRangeSet(): a function that takes a RangeSet and returns the data
423 contained in the image blocks of that RangeSet. The data is returned as
424 a list or tuple of strings; concatenating the elements together should
425 produce the requested data. Implementations are free to break up the
426 data into list/tuple elements in any way that is convenient.
427
428 RangeSha1(): a function that returns (as a hex string) the SHA-1 hash of
429 all the data in the specified range.
430
431 TotalSha1(): a function that returns (as a hex string) the SHA-1 hash of
432 all the data in the image (ie, all the blocks in the care_map minus
433 clobbered_blocks, or including the clobbered blocks if
434 include_clobbered_blocks is True).
435
436 When creating a BlockImageDiff, the src image may be None, in which case the
437 list of transfers produced will never read from the original image.
438 """
439
Tao Bao293fd132016-06-11 12:19:23 -0700440 def __init__(self, tgt, src=None, threads=None, version=4,
441 disable_imgdiff=False):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700442 if threads is None:
443 threads = multiprocessing.cpu_count() // 2
Dan Albert8b72aef2015-03-23 19:13:21 -0700444 if threads == 0:
445 threads = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700446 self.threads = threads
Doug Zongker62338182014-09-08 08:29:55 -0700447 self.version = version
Dan Albert8b72aef2015-03-23 19:13:21 -0700448 self.transfers = []
449 self.src_basenames = {}
450 self.src_numpatterns = {}
Tao Baob4cfca52016-02-04 14:26:02 -0800451 self._max_stashed_size = 0
Tao Baod522bdc2016-04-12 15:53:16 -0700452 self.touched_src_ranges = RangeSet()
453 self.touched_src_sha1 = None
Tao Bao293fd132016-06-11 12:19:23 -0700454 self.disable_imgdiff = disable_imgdiff
Tao Bao294651d2018-02-08 23:21:52 -0800455 self.imgdiff_stats = ImgdiffStats() if not disable_imgdiff else None
Doug Zongker62338182014-09-08 08:29:55 -0700456
Tao Bao8fad03e2017-03-01 14:36:26 -0800457 assert version in (3, 4)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700458
459 self.tgt = tgt
460 if src is None:
461 src = EmptyImage()
462 self.src = src
463
464 # The updater code that installs the patch always uses 4k blocks.
465 assert tgt.blocksize == 4096
466 assert src.blocksize == 4096
467
468 # The range sets in each filemap should comprise a partition of
469 # the care map.
470 self.AssertPartition(src.care_map, src.file_map.values())
471 self.AssertPartition(tgt.care_map, tgt.file_map.values())
472
Tao Baob4cfca52016-02-04 14:26:02 -0800473 @property
474 def max_stashed_size(self):
475 return self._max_stashed_size
476
Tao Baocb73aed2018-01-31 17:32:40 -0800477 @staticmethod
478 def FileTypeSupportedByImgdiff(filename):
479 """Returns whether the file type is supported by imgdiff."""
480 return filename.lower().endswith(('.apk', '.jar', '.zip'))
481
Tao Bao294651d2018-02-08 23:21:52 -0800482 def CanUseImgdiff(self, name, tgt_ranges, src_ranges, large_apk=False):
Tao Baocb73aed2018-01-31 17:32:40 -0800483 """Checks whether we can apply imgdiff for the given RangeSets.
484
485 For files in ZIP format (e.g., APKs, JARs, etc.) we would like to use
486 'imgdiff -z' if possible. Because it usually produces significantly smaller
487 patches than bsdiff.
488
489 This is permissible if all of the following conditions hold.
490 - The imgdiff hasn't been disabled by the caller (e.g. squashfs);
491 - The file type is supported by imgdiff;
492 - The source and target blocks are monotonic (i.e. the data is stored with
493 blocks in increasing order);
Tao Baoe709b092018-02-07 12:40:00 -0800494 - Both files don't contain shared blocks;
Tao Bao4ccea852018-02-06 15:16:41 -0800495 - Both files have complete lists of blocks;
Tao Baocb73aed2018-01-31 17:32:40 -0800496 - We haven't removed any blocks from the source set.
497
498 If all these conditions are satisfied, concatenating all the blocks in the
499 RangeSet in order will produce a valid ZIP file (plus possibly extra zeros
500 in the last block). imgdiff is fine with extra zeros at the end of the file.
501
502 Args:
503 name: The filename to be diff'd.
504 tgt_ranges: The target RangeSet.
505 src_ranges: The source RangeSet.
Tao Bao294651d2018-02-08 23:21:52 -0800506 large_apk: Whether this is to split a large APK.
Tao Baocb73aed2018-01-31 17:32:40 -0800507
508 Returns:
509 A boolean result.
510 """
Tao Bao508b0872018-02-09 13:44:43 -0800511 if self.disable_imgdiff or not self.FileTypeSupportedByImgdiff(name):
Tao Bao294651d2018-02-08 23:21:52 -0800512 return False
513
514 if not tgt_ranges.monotonic or not src_ranges.monotonic:
515 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_NONMONOTONIC)
516 return False
517
Tao Baoe709b092018-02-07 12:40:00 -0800518 if (tgt_ranges.extra.get('uses_shared_blocks') or
519 src_ranges.extra.get('uses_shared_blocks')):
520 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_SHARED_BLOCKS)
521 return False
522
Tao Bao4ccea852018-02-06 15:16:41 -0800523 if tgt_ranges.extra.get('incomplete') or src_ranges.extra.get('incomplete'):
524 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_INCOMPLETE)
525 return False
526
Tao Bao294651d2018-02-08 23:21:52 -0800527 reason = (ImgdiffStats.USED_IMGDIFF_LARGE_APK if large_apk
528 else ImgdiffStats.USED_IMGDIFF)
529 self.imgdiff_stats.Log(name, reason)
530 return True
Tao Baocb73aed2018-01-31 17:32:40 -0800531
Doug Zongkerfc44a512014-08-26 13:10:25 -0700532 def Compute(self, prefix):
533 # When looking for a source file to use as the diff input for a
534 # target file, we try:
535 # 1) an exact path match if available, otherwise
536 # 2) a exact basename match if available, otherwise
537 # 3) a basename match after all runs of digits are replaced by
538 # "#" if available, otherwise
539 # 4) we have no source for this target.
540 self.AbbreviateSourceNames()
541 self.FindTransfers()
542
xunchang21531832018-12-06 14:20:05 -0800543 self.FindSequenceForTransfers()
Doug Zongker62338182014-09-08 08:29:55 -0700544
Tao Bao82c47982015-08-17 09:45:13 -0700545 # Ensure the runtime stash size is under the limit.
Tao Bao8fad03e2017-03-01 14:36:26 -0800546 if common.OPTIONS.cache_size is not None:
xunchang3df4d5e2018-12-06 15:03:45 -0800547 stash_limit = (common.OPTIONS.cache_size *
548 common.OPTIONS.stash_threshold / self.tgt.blocksize)
549 # Ignore the stash limit and calculate the maximum simultaneously stashed
550 # blocks needed.
551 _, max_stashed_blocks = self.ReviseStashSize(ignore_stash_limit=True)
552
553 # We cannot stash more blocks than the stash limit simultaneously. As a
554 # result, some 'diff' commands will be converted to new; leading to an
555 # unintended large package. To mitigate this issue, we can carefully
556 # choose the transfers for conversion. The number '1024' can be further
557 # tweaked here to balance the package size and build time.
558 if max_stashed_blocks > stash_limit + 1024:
xunchangb6105dc2018-12-06 16:39:46 -0800559 self.SelectAndConvertDiffTransfersToNew(
560 max_stashed_blocks - stash_limit)
xunchang3df4d5e2018-12-06 15:03:45 -0800561 # Regenerate the sequence as the graph has changed.
562 self.FindSequenceForTransfers()
563
564 # Revise the stash size again to keep the size under limit.
Tao Bao82c47982015-08-17 09:45:13 -0700565 self.ReviseStashSize()
566
Doug Zongkerfc44a512014-08-26 13:10:25 -0700567 # Double-check our work.
568 self.AssertSequenceGood()
Tianjie Xu8a7ed9f2018-01-23 14:06:11 -0800569 self.AssertSha1Good()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700570
571 self.ComputePatches(prefix)
572 self.WriteTransfers(prefix)
573
Tao Bao294651d2018-02-08 23:21:52 -0800574 # Report the imgdiff stats.
Tao Bao32fcdab2018-10-12 10:30:39 -0700575 if not self.disable_imgdiff:
Tao Bao294651d2018-02-08 23:21:52 -0800576 self.imgdiff_stats.Report()
577
Doug Zongkerfc44a512014-08-26 13:10:25 -0700578 def WriteTransfers(self, prefix):
Tianjie Xu37e29302016-06-23 16:10:35 -0700579 def WriteSplitTransfers(out, style, target_blocks):
580 """Limit the size of operand in command 'new' and 'zero' to 1024 blocks.
Tianjie Xubb848c52016-06-21 15:54:09 -0700581
582 This prevents the target size of one command from being too large; and
583 might help to avoid fsync errors on some devices."""
584
Tao Bao3a2e3502016-12-28 09:14:39 -0800585 assert style == "new" or style == "zero"
Tianjie Xu37e29302016-06-23 16:10:35 -0700586 blocks_limit = 1024
Tianjie Xubb848c52016-06-21 15:54:09 -0700587 total = 0
Tianjie Xu37e29302016-06-23 16:10:35 -0700588 while target_blocks:
589 blocks_to_write = target_blocks.first(blocks_limit)
590 out.append("%s %s\n" % (style, blocks_to_write.to_string_raw()))
591 total += blocks_to_write.size()
592 target_blocks = target_blocks.subtract(blocks_to_write)
Tianjie Xubb848c52016-06-21 15:54:09 -0700593 return total
594
Doug Zongkerfc44a512014-08-26 13:10:25 -0700595 out = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700596 total = 0
Doug Zongkerfc44a512014-08-26 13:10:25 -0700597
Tao Bao3a2e3502016-12-28 09:14:39 -0800598 # In BBOTA v3+, it uses the hash of the stashed blocks as the stash slot
599 # id. 'stashes' records the map from 'hash' to the ref count. The stash
600 # will be freed only if the count decrements to zero.
Doug Zongker62338182014-09-08 08:29:55 -0700601 stashes = {}
602 stashed_blocks = 0
603 max_stashed_blocks = 0
604
Doug Zongkerfc44a512014-08-26 13:10:25 -0700605 for xf in self.transfers:
606
Tao Bao8fad03e2017-03-01 14:36:26 -0800607 for _, sr in xf.stash_before:
608 sh = self.src.RangeSha1(sr)
609 if sh in stashes:
610 stashes[sh] += 1
Sami Tolvanendd67a292014-12-09 16:40:34 +0000611 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800612 stashes[sh] = 1
613 stashed_blocks += sr.size()
614 self.touched_src_ranges = self.touched_src_ranges.union(sr)
615 out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
Doug Zongker62338182014-09-08 08:29:55 -0700616
617 if stashed_blocks > max_stashed_blocks:
618 max_stashed_blocks = stashed_blocks
619
Jesse Zhao7b985f62015-03-02 16:53:08 -0800620 free_string = []
caozhiyuan21b37d82015-10-21 15:14:03 +0800621 free_size = 0
Jesse Zhao7b985f62015-03-02 16:53:08 -0800622
Tao Bao8fad03e2017-03-01 14:36:26 -0800623 # <# blocks> <src ranges>
624 # OR
625 # <# blocks> <src ranges> <src locs> <stash refs...>
626 # OR
627 # <# blocks> - <stash refs...>
Doug Zongker62338182014-09-08 08:29:55 -0700628
Tao Bao8fad03e2017-03-01 14:36:26 -0800629 size = xf.src_ranges.size()
Tao Bao508b0872018-02-09 13:44:43 -0800630 src_str_buffer = [str(size)]
Doug Zongker62338182014-09-08 08:29:55 -0700631
Tao Bao8fad03e2017-03-01 14:36:26 -0800632 unstashed_src_ranges = xf.src_ranges
633 mapped_stashes = []
634 for _, sr in xf.use_stash:
635 unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
636 sh = self.src.RangeSha1(sr)
637 sr = xf.src_ranges.map_within(sr)
638 mapped_stashes.append(sr)
639 assert sh in stashes
Tao Bao508b0872018-02-09 13:44:43 -0800640 src_str_buffer.append("%s:%s" % (sh, sr.to_string_raw()))
Tao Bao8fad03e2017-03-01 14:36:26 -0800641 stashes[sh] -= 1
642 if stashes[sh] == 0:
643 free_string.append("free %s\n" % (sh,))
644 free_size += sr.size()
645 stashes.pop(sh)
Doug Zongker62338182014-09-08 08:29:55 -0700646
Tao Bao8fad03e2017-03-01 14:36:26 -0800647 if unstashed_src_ranges:
Tao Bao508b0872018-02-09 13:44:43 -0800648 src_str_buffer.insert(1, unstashed_src_ranges.to_string_raw())
Tao Bao8fad03e2017-03-01 14:36:26 -0800649 if xf.use_stash:
650 mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
Tao Bao508b0872018-02-09 13:44:43 -0800651 src_str_buffer.insert(2, mapped_unstashed.to_string_raw())
Tao Bao8fad03e2017-03-01 14:36:26 -0800652 mapped_stashes.append(mapped_unstashed)
Doug Zongker62338182014-09-08 08:29:55 -0700653 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Tao Bao8fad03e2017-03-01 14:36:26 -0800654 else:
Tao Bao508b0872018-02-09 13:44:43 -0800655 src_str_buffer.insert(1, "-")
Tao Bao8fad03e2017-03-01 14:36:26 -0800656 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Doug Zongker62338182014-09-08 08:29:55 -0700657
Tao Bao508b0872018-02-09 13:44:43 -0800658 src_str = " ".join(src_str_buffer)
Doug Zongker62338182014-09-08 08:29:55 -0700659
Tao Bao8fad03e2017-03-01 14:36:26 -0800660 # version 3+:
Doug Zongker62338182014-09-08 08:29:55 -0700661 # zero <rangeset>
662 # new <rangeset>
663 # erase <rangeset>
Dan Albert8b72aef2015-03-23 19:13:21 -0700664 # bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
665 # imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
666 # move hash <tgt rangeset> <src_str>
Doug Zongkerfc44a512014-08-26 13:10:25 -0700667
668 tgt_size = xf.tgt_ranges.size()
669
670 if xf.style == "new":
671 assert xf.tgt_ranges
Tianjie Xu37e29302016-06-23 16:10:35 -0700672 assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700673 total += tgt_size
674 elif xf.style == "move":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700675 assert xf.tgt_ranges
676 assert xf.src_ranges.size() == tgt_size
677 if xf.src_ranges != xf.tgt_ranges:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100678 # take into account automatic stashing of overlapping blocks
679 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700680 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100681 if temp_stash_usage > max_stashed_blocks:
682 max_stashed_blocks = temp_stash_usage
683
Tao Baod522bdc2016-04-12 15:53:16 -0700684 self.touched_src_ranges = self.touched_src_ranges.union(
685 xf.src_ranges)
686
Tao Bao8fad03e2017-03-01 14:36:26 -0800687 out.append("%s %s %s %s\n" % (
Sami Tolvanendd67a292014-12-09 16:40:34 +0000688 xf.style,
Tao Bao183e56e2017-03-05 17:05:09 -0800689 xf.tgt_sha1,
Dan Albert8b72aef2015-03-23 19:13:21 -0700690 xf.tgt_ranges.to_string_raw(), src_str))
Tao Bao8fad03e2017-03-01 14:36:26 -0800691 total += tgt_size
692 elif xf.style in ("bsdiff", "imgdiff"):
693 assert xf.tgt_ranges
694 assert xf.src_ranges
695 # take into account automatic stashing of overlapping blocks
696 if xf.src_ranges.overlaps(xf.tgt_ranges):
697 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
698 if temp_stash_usage > max_stashed_blocks:
699 max_stashed_blocks = temp_stash_usage
700
701 self.touched_src_ranges = self.touched_src_ranges.union(xf.src_ranges)
702
703 out.append("%s %d %d %s %s %s %s\n" % (
704 xf.style,
705 xf.patch_start, xf.patch_len,
706 xf.src_sha1,
707 xf.tgt_sha1,
708 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700709 total += tgt_size
710 elif xf.style == "zero":
711 assert xf.tgt_ranges
712 to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
Tianjie Xu37e29302016-06-23 16:10:35 -0700713 assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size()
Tianjie Xubb848c52016-06-21 15:54:09 -0700714 total += to_zero.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700715 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700716 raise ValueError("unknown transfer style '%s'\n" % xf.style)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700717
Sami Tolvanendd67a292014-12-09 16:40:34 +0000718 if free_string:
719 out.append("".join(free_string))
caozhiyuan21b37d82015-10-21 15:14:03 +0800720 stashed_blocks -= free_size
Sami Tolvanendd67a292014-12-09 16:40:34 +0000721
Tao Bao8fad03e2017-03-01 14:36:26 -0800722 if common.OPTIONS.cache_size is not None:
Tao Bao8dcf7382015-05-21 14:09:49 -0700723 # Sanity check: abort if we're going to need more stash space than
724 # the allowed size (cache_size * threshold). There are two purposes
725 # of having a threshold here. a) Part of the cache may have been
726 # occupied by some recovery logs. b) It will buy us some time to deal
727 # with the oversize issue.
728 cache_size = common.OPTIONS.cache_size
729 stash_threshold = common.OPTIONS.stash_threshold
730 max_allowed = cache_size * stash_threshold
Tao Baoe8c68a02017-02-26 10:48:11 -0800731 assert max_stashed_blocks * self.tgt.blocksize <= max_allowed, \
Tao Bao8dcf7382015-05-21 14:09:49 -0700732 'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
733 max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
734 self.tgt.blocksize, max_allowed, cache_size,
735 stash_threshold)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700736
Tao Bao8fad03e2017-03-01 14:36:26 -0800737 self.touched_src_sha1 = self.src.RangeSha1(self.touched_src_ranges)
Tao Baod522bdc2016-04-12 15:53:16 -0700738
Tianjie Xu67c7cbb2018-08-30 00:32:07 -0700739 if self.tgt.hashtree_info:
740 out.append("compute_hash_tree {} {} {} {} {}\n".format(
741 self.tgt.hashtree_info.hashtree_range.to_string_raw(),
742 self.tgt.hashtree_info.filesystem_range.to_string_raw(),
743 self.tgt.hashtree_info.hash_algorithm,
744 self.tgt.hashtree_info.salt,
745 self.tgt.hashtree_info.root_hash))
746
Tao Baoe9b61912015-07-09 17:37:49 -0700747 # Zero out extended blocks as a workaround for bug 20881595.
748 if self.tgt.extended:
Tianjie Xu37e29302016-06-23 16:10:35 -0700749 assert (WriteSplitTransfers(out, "zero", self.tgt.extended) ==
Tianjie Xubb848c52016-06-21 15:54:09 -0700750 self.tgt.extended.size())
Tao Baob32d56e2015-09-09 11:55:01 -0700751 total += self.tgt.extended.size()
Tao Baoe9b61912015-07-09 17:37:49 -0700752
753 # We erase all the blocks on the partition that a) don't contain useful
Tao Bao66f1fa62016-05-03 10:02:01 -0700754 # data in the new image; b) will not be touched by dm-verity. Out of those
755 # blocks, we erase the ones that won't be used in this update at the
756 # beginning of an update. The rest would be erased at the end. This is to
757 # work around the eMMC issue observed on some devices, which may otherwise
758 # get starving for clean blocks and thus fail the update. (b/28347095)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700759 all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
Tao Baoe9b61912015-07-09 17:37:49 -0700760 all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
761 new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
Tao Bao66f1fa62016-05-03 10:02:01 -0700762
763 erase_first = new_dontcare.subtract(self.touched_src_ranges)
764 if erase_first:
765 out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))
766
767 erase_last = new_dontcare.subtract(erase_first)
768 if erase_last:
769 out.append("erase %s\n" % (erase_last.to_string_raw(),))
Doug Zongkere985f6f2014-09-09 12:38:47 -0700770
771 out.insert(0, "%d\n" % (self.version,)) # format version number
Tao Baob32d56e2015-09-09 11:55:01 -0700772 out.insert(1, "%d\n" % (total,))
Tao Bao8fad03e2017-03-01 14:36:26 -0800773 # v3+: the number of stash slots is unused.
774 out.insert(2, "0\n")
775 out.insert(3, str(max_stashed_blocks) + "\n")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700776
777 with open(prefix + ".transfer.list", "wb") as f:
778 for i in out:
779 f.write(i)
780
Tao Bao8fad03e2017-03-01 14:36:26 -0800781 self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize
782 OPTIONS = common.OPTIONS
783 if OPTIONS.cache_size is not None:
784 max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
Tao Bao32fcdab2018-10-12 10:30:39 -0700785 logger.info(
786 "max stashed blocks: %d (%d bytes), limit: %d bytes (%.2f%%)\n",
787 max_stashed_blocks, self._max_stashed_size, max_allowed,
788 self._max_stashed_size * 100.0 / max_allowed)
Tao Bao8fad03e2017-03-01 14:36:26 -0800789 else:
Tao Bao32fcdab2018-10-12 10:30:39 -0700790 logger.info(
791 "max stashed blocks: %d (%d bytes), limit: <unknown>\n",
792 max_stashed_blocks, self._max_stashed_size)
Doug Zongker62338182014-09-08 08:29:55 -0700793
xunchang3df4d5e2018-12-06 15:03:45 -0800794 def ReviseStashSize(self, ignore_stash_limit=False):
795 """ Revises the transfers to keep the stash size within the size limit.
796
797 Iterates through the transfer list and calculates the stash size each
798 transfer generates. Converts the affected transfers to new if we reach the
799 stash limit.
800
801 Args:
802 ignore_stash_limit: Ignores the stash limit and calculates the max
803 simultaneous stashed blocks instead. No change will be made to the
804 transfer list with this flag.
805
806 Return:
807 A tuple of (tgt blocks converted to new, max stashed blocks)
808 """
Tao Bao32fcdab2018-10-12 10:30:39 -0700809 logger.info("Revising stash size...")
Tao Baoe27acfd2016-12-16 11:13:55 -0800810 stash_map = {}
Tao Bao82c47982015-08-17 09:45:13 -0700811
812 # Create the map between a stash and its def/use points. For example, for a
Tao Bao3c5a16d2017-02-13 11:42:50 -0800813 # given stash of (raw_id, sr), stash_map[raw_id] = (sr, def_cmd, use_cmd).
Tao Bao82c47982015-08-17 09:45:13 -0700814 for xf in self.transfers:
815 # Command xf defines (stores) all the stashes in stash_before.
Tao Bao3a2e3502016-12-28 09:14:39 -0800816 for stash_raw_id, sr in xf.stash_before:
817 stash_map[stash_raw_id] = (sr, xf)
Tao Bao82c47982015-08-17 09:45:13 -0700818
819 # Record all the stashes command xf uses.
Tao Bao3a2e3502016-12-28 09:14:39 -0800820 for stash_raw_id, _ in xf.use_stash:
821 stash_map[stash_raw_id] += (xf,)
Tao Bao82c47982015-08-17 09:45:13 -0700822
xunchang3df4d5e2018-12-06 15:03:45 -0800823 max_allowed_blocks = None
824 if not ignore_stash_limit:
825 # Compute the maximum blocks available for stash based on /cache size and
826 # the threshold.
827 cache_size = common.OPTIONS.cache_size
828 stash_threshold = common.OPTIONS.stash_threshold
829 max_allowed_blocks = cache_size * stash_threshold / self.tgt.blocksize
Tao Bao82c47982015-08-17 09:45:13 -0700830
Tao Bao3a2e3502016-12-28 09:14:39 -0800831 # See the comments for 'stashes' in WriteTransfers().
Tao Baoe27acfd2016-12-16 11:13:55 -0800832 stashes = {}
Tao Bao82c47982015-08-17 09:45:13 -0700833 stashed_blocks = 0
Tao Bao9a5caf22015-08-25 15:10:10 -0700834 new_blocks = 0
xunchang3df4d5e2018-12-06 15:03:45 -0800835 max_stashed_blocks = 0
Tao Bao82c47982015-08-17 09:45:13 -0700836
837 # Now go through all the commands. Compute the required stash size on the
838 # fly. If a command requires excess stash than available, it deletes the
839 # stash by replacing the command that uses the stash with a "new" command
840 # instead.
841 for xf in self.transfers:
842 replaced_cmds = []
843
844 # xf.stash_before generates explicit stash commands.
Tao Bao3a2e3502016-12-28 09:14:39 -0800845 for stash_raw_id, sr in xf.stash_before:
Tao Baoe27acfd2016-12-16 11:13:55 -0800846 # Check the post-command stashed_blocks.
847 stashed_blocks_after = stashed_blocks
Tao Bao8fad03e2017-03-01 14:36:26 -0800848 sh = self.src.RangeSha1(sr)
849 if sh not in stashes:
Tao Baoe27acfd2016-12-16 11:13:55 -0800850 stashed_blocks_after += sr.size()
Tao Baoe27acfd2016-12-16 11:13:55 -0800851
xunchang3df4d5e2018-12-06 15:03:45 -0800852 if max_allowed_blocks and stashed_blocks_after > max_allowed_blocks:
Tao Bao82c47982015-08-17 09:45:13 -0700853 # We cannot stash this one for a later command. Find out the command
854 # that will use this stash and replace the command with "new".
Tao Bao3a2e3502016-12-28 09:14:39 -0800855 use_cmd = stash_map[stash_raw_id][2]
Tao Bao82c47982015-08-17 09:45:13 -0700856 replaced_cmds.append(use_cmd)
Tao Bao32fcdab2018-10-12 10:30:39 -0700857 logger.info("%10d %9s %s", sr.size(), "explicit", use_cmd)
Tao Bao82c47982015-08-17 09:45:13 -0700858 else:
Tao Bao3c5a16d2017-02-13 11:42:50 -0800859 # Update the stashes map.
Tao Bao8fad03e2017-03-01 14:36:26 -0800860 if sh in stashes:
861 stashes[sh] += 1
Tao Bao3c5a16d2017-02-13 11:42:50 -0800862 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800863 stashes[sh] = 1
Tao Baoe27acfd2016-12-16 11:13:55 -0800864 stashed_blocks = stashed_blocks_after
xunchang3df4d5e2018-12-06 15:03:45 -0800865 max_stashed_blocks = max(max_stashed_blocks, stashed_blocks)
Tao Bao82c47982015-08-17 09:45:13 -0700866
867 # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
868 # ComputePatches(), they both have the style of "diff".
Tao Bao8fad03e2017-03-01 14:36:26 -0800869 if xf.style == "diff":
Tao Bao82c47982015-08-17 09:45:13 -0700870 assert xf.tgt_ranges and xf.src_ranges
871 if xf.src_ranges.overlaps(xf.tgt_ranges):
xunchang3df4d5e2018-12-06 15:03:45 -0800872 if (max_allowed_blocks and
873 stashed_blocks + xf.src_ranges.size() > max_allowed_blocks):
Tao Bao82c47982015-08-17 09:45:13 -0700874 replaced_cmds.append(xf)
Tao Bao32fcdab2018-10-12 10:30:39 -0700875 logger.info("%10d %9s %s", xf.src_ranges.size(), "implicit", xf)
xunchang3df4d5e2018-12-06 15:03:45 -0800876 else:
877 # The whole source ranges will be stashed for implicit stashes.
878 max_stashed_blocks = max(max_stashed_blocks,
879 stashed_blocks + xf.src_ranges.size())
Tao Bao82c47982015-08-17 09:45:13 -0700880
881 # Replace the commands in replaced_cmds with "new"s.
882 for cmd in replaced_cmds:
883 # It no longer uses any commands in "use_stash". Remove the def points
884 # for all those stashes.
Tao Bao3a2e3502016-12-28 09:14:39 -0800885 for stash_raw_id, sr in cmd.use_stash:
886 def_cmd = stash_map[stash_raw_id][1]
887 assert (stash_raw_id, sr) in def_cmd.stash_before
888 def_cmd.stash_before.remove((stash_raw_id, sr))
Tao Bao82c47982015-08-17 09:45:13 -0700889
Tianjie Xuebe39a02016-01-14 14:12:26 -0800890 # Add up blocks that violates space limit and print total number to
891 # screen later.
892 new_blocks += cmd.tgt_ranges.size()
Tao Bao82c47982015-08-17 09:45:13 -0700893 cmd.ConvertToNew()
894
Tao Bao3a2e3502016-12-28 09:14:39 -0800895 # xf.use_stash may generate free commands.
Tao Bao8fad03e2017-03-01 14:36:26 -0800896 for _, sr in xf.use_stash:
897 sh = self.src.RangeSha1(sr)
898 assert sh in stashes
899 stashes[sh] -= 1
900 if stashes[sh] == 0:
Tao Baoe27acfd2016-12-16 11:13:55 -0800901 stashed_blocks -= sr.size()
Tao Bao8fad03e2017-03-01 14:36:26 -0800902 stashes.pop(sh)
Tao Baoe27acfd2016-12-16 11:13:55 -0800903
Tianjie Xuebe39a02016-01-14 14:12:26 -0800904 num_of_bytes = new_blocks * self.tgt.blocksize
Tao Bao32fcdab2018-10-12 10:30:39 -0700905 logger.info(
906 " Total %d blocks (%d bytes) are packed as new blocks due to "
xunchangb6105dc2018-12-06 16:39:46 -0800907 "insufficient cache size. Maximum blocks stashed simultaneously: %d",
908 new_blocks, num_of_bytes, max_stashed_blocks)
xunchang3df4d5e2018-12-06 15:03:45 -0800909 return new_blocks, max_stashed_blocks
Tao Bao9a5caf22015-08-25 15:10:10 -0700910
Doug Zongkerfc44a512014-08-26 13:10:25 -0700911 def ComputePatches(self, prefix):
Tao Bao32fcdab2018-10-12 10:30:39 -0700912 logger.info("Reticulating splines...")
Tao Bao183e56e2017-03-05 17:05:09 -0800913 diff_queue = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700914 patch_num = 0
915 with open(prefix + ".new.dat", "wb") as new_f:
Tao Bao183e56e2017-03-05 17:05:09 -0800916 for index, xf in enumerate(self.transfers):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700917 if xf.style == "zero":
Tao Bao08c85832016-09-19 22:26:30 -0700918 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao32fcdab2018-10-12 10:30:39 -0700919 logger.info(
920 "%10d %10d (%6.2f%%) %7s %s %s", tgt_size, tgt_size, 100.0,
921 xf.style, xf.tgt_name, str(xf.tgt_ranges))
Tao Bao08c85832016-09-19 22:26:30 -0700922
Doug Zongkerfc44a512014-08-26 13:10:25 -0700923 elif xf.style == "new":
Tao Bao183e56e2017-03-05 17:05:09 -0800924 self.tgt.WriteRangeDataToFd(xf.tgt_ranges, new_f)
Tao Bao08c85832016-09-19 22:26:30 -0700925 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao32fcdab2018-10-12 10:30:39 -0700926 logger.info(
927 "%10d %10d (%6.2f%%) %7s %s %s", tgt_size, tgt_size, 100.0,
928 xf.style, xf.tgt_name, str(xf.tgt_ranges))
Tao Bao08c85832016-09-19 22:26:30 -0700929
Doug Zongkerfc44a512014-08-26 13:10:25 -0700930 elif xf.style == "diff":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700931 # We can't compare src and tgt directly because they may have
932 # the same content but be broken up into blocks differently, eg:
933 #
934 # ["he", "llo"] vs ["h", "ello"]
935 #
936 # We want those to compare equal, ideally without having to
937 # actually concatenate the strings (these may be tens of
938 # megabytes).
Tao Bao183e56e2017-03-05 17:05:09 -0800939 if xf.src_sha1 == xf.tgt_sha1:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700940 # These are identical; we don't need to generate a patch,
941 # just issue copy commands on the device.
942 xf.style = "move"
xunchang21531832018-12-06 14:20:05 -0800943 xf.patch_info = None
Tao Bao183e56e2017-03-05 17:05:09 -0800944 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao08c85832016-09-19 22:26:30 -0700945 if xf.src_ranges != xf.tgt_ranges:
Tao Bao32fcdab2018-10-12 10:30:39 -0700946 logger.info(
947 "%10d %10d (%6.2f%%) %7s %s %s (from %s)", tgt_size, tgt_size,
948 100.0, xf.style,
Tao Bao08c85832016-09-19 22:26:30 -0700949 xf.tgt_name if xf.tgt_name == xf.src_name else (
950 xf.tgt_name + " (from " + xf.src_name + ")"),
Tao Bao32fcdab2018-10-12 10:30:39 -0700951 str(xf.tgt_ranges), str(xf.src_ranges))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700952 else:
xunchang21531832018-12-06 14:20:05 -0800953 if xf.patch_info:
954 # We have already generated the patch (e.g. during split of large
955 # APKs or reduction of stash size)
956 imgdiff = xf.patch_info.imgdiff
Tianjie Xu25366072017-09-08 17:19:02 -0700957 else:
Tao Baocb73aed2018-01-31 17:32:40 -0800958 imgdiff = self.CanUseImgdiff(
959 xf.tgt_name, xf.tgt_ranges, xf.src_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700960 xf.style = "imgdiff" if imgdiff else "bsdiff"
Tao Bao183e56e2017-03-05 17:05:09 -0800961 diff_queue.append((index, imgdiff, patch_num))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700962 patch_num += 1
963
964 else:
965 assert False, "unknown style " + xf.style
966
xunchang21531832018-12-06 14:20:05 -0800967 patches = self.ComputePatchesForInputList(diff_queue, False)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700968
Tao Bao183e56e2017-03-05 17:05:09 -0800969 offset = 0
970 with open(prefix + ".patch.dat", "wb") as patch_fd:
xunchang21531832018-12-06 14:20:05 -0800971 for index, patch_info, _ in patches:
Tao Bao183e56e2017-03-05 17:05:09 -0800972 xf = self.transfers[index]
xunchang21531832018-12-06 14:20:05 -0800973 xf.patch_len = len(patch_info.content)
Tao Bao183e56e2017-03-05 17:05:09 -0800974 xf.patch_start = offset
975 offset += xf.patch_len
xunchang21531832018-12-06 14:20:05 -0800976 patch_fd.write(patch_info.content)
Tao Bao183e56e2017-03-05 17:05:09 -0800977
Tao Bao32fcdab2018-10-12 10:30:39 -0700978 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
979 logger.info(
980 "%10d %10d (%6.2f%%) %7s %s %s %s", xf.patch_len, tgt_size,
981 xf.patch_len * 100.0 / tgt_size, xf.style,
982 xf.tgt_name if xf.tgt_name == xf.src_name else (
983 xf.tgt_name + " (from " + xf.src_name + ")"),
984 xf.tgt_ranges, xf.src_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700985
Tianjie Xu8a7ed9f2018-01-23 14:06:11 -0800986 def AssertSha1Good(self):
987 """Check the SHA-1 of the src & tgt blocks in the transfer list.
988
989 Double check the SHA-1 value to avoid the issue in b/71908713, where
990 SparseImage.RangeSha1() messed up with the hash calculation in multi-thread
991 environment. That specific problem has been fixed by protecting the
992 underlying generator function 'SparseImage._GetRangeData()' with lock.
993 """
994 for xf in self.transfers:
995 tgt_sha1 = self.tgt.RangeSha1(xf.tgt_ranges)
996 assert xf.tgt_sha1 == tgt_sha1
997 if xf.style == "diff":
998 src_sha1 = self.src.RangeSha1(xf.src_ranges)
999 assert xf.src_sha1 == src_sha1
1000
Doug Zongkerfc44a512014-08-26 13:10:25 -07001001 def AssertSequenceGood(self):
1002 # Simulate the sequences of transfers we will output, and check that:
1003 # - we never read a block after writing it, and
1004 # - we write every block we care about exactly once.
1005
1006 # Start with no blocks having been touched yet.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001007 touched = array.array("B", "\0" * self.tgt.total_blocks)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001008
1009 # Imagine processing the transfers in order.
1010 for xf in self.transfers:
1011 # Check that the input blocks for this transfer haven't yet been touched.
Doug Zongker62338182014-09-08 08:29:55 -07001012
1013 x = xf.src_ranges
Tao Bao8fad03e2017-03-01 14:36:26 -08001014 for _, sr in xf.use_stash:
1015 x = x.subtract(sr)
Doug Zongker62338182014-09-08 08:29:55 -07001016
Doug Zongker6ab2a502016-02-09 08:28:09 -08001017 for s, e in x:
Tao Baoff75c232016-03-04 15:23:34 -08001018 # Source image could be larger. Don't check the blocks that are in the
1019 # source image only. Since they are not in 'touched', and won't ever
1020 # be touched.
1021 for i in range(s, min(e, self.tgt.total_blocks)):
Doug Zongker6ab2a502016-02-09 08:28:09 -08001022 assert touched[i] == 0
1023
1024 # Check that the output blocks for this transfer haven't yet
1025 # been touched, and touch all the blocks written by this
1026 # transfer.
1027 for s, e in xf.tgt_ranges:
1028 for i in range(s, e):
1029 assert touched[i] == 0
1030 touched[i] = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -07001031
Tianjie Xu67c7cbb2018-08-30 00:32:07 -07001032 if self.tgt.hashtree_info:
1033 for s, e in self.tgt.hashtree_info.hashtree_range:
1034 for i in range(s, e):
1035 assert touched[i] == 0
1036 touched[i] = 1
1037
Doug Zongkerfc44a512014-08-26 13:10:25 -07001038 # Check that we've written every target block.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001039 for s, e in self.tgt.care_map:
1040 for i in range(s, e):
1041 assert touched[i] == 1
Doug Zongkerfc44a512014-08-26 13:10:25 -07001042
xunchang21531832018-12-06 14:20:05 -08001043 def FindSequenceForTransfers(self):
1044 """Finds a sequence for the given transfers.
1045
1046 The goal is to minimize the violation of order dependencies between these
1047 transfers, so that fewer blocks are stashed when applying the update.
1048 """
1049
1050 # Clear the existing dependency between transfers
1051 for xf in self.transfers:
1052 xf.goes_before = OrderedDict()
1053 xf.goes_after = OrderedDict()
1054
1055 xf.stash_before = []
1056 xf.use_stash = []
1057
1058 # Find the ordering dependencies among transfers (this is O(n^2)
1059 # in the number of transfers).
1060 self.GenerateDigraph()
1061 # Find a sequence of transfers that satisfies as many ordering
1062 # dependencies as possible (heuristically).
1063 self.FindVertexSequence()
1064 # Fix up the ordering dependencies that the sequence didn't
1065 # satisfy.
1066 self.ReverseBackwardEdges()
1067 self.ImproveVertexSequence()
1068
Doug Zongker62338182014-09-08 08:29:55 -07001069 def ImproveVertexSequence(self):
Tao Bao32fcdab2018-10-12 10:30:39 -07001070 logger.info("Improving vertex order...")
Doug Zongker62338182014-09-08 08:29:55 -07001071
1072 # At this point our digraph is acyclic; we reversed any edges that
1073 # were backwards in the heuristically-generated sequence. The
1074 # previously-generated order is still acceptable, but we hope to
1075 # find a better order that needs less memory for stashed data.
1076 # Now we do a topological sort to generate a new vertex order,
1077 # using a greedy algorithm to choose which vertex goes next
1078 # whenever we have a choice.
1079
1080 # Make a copy of the edge set; this copy will get destroyed by the
1081 # algorithm.
1082 for xf in self.transfers:
1083 xf.incoming = xf.goes_after.copy()
1084 xf.outgoing = xf.goes_before.copy()
1085
1086 L = [] # the new vertex order
1087
1088 # S is the set of sources in the remaining graph; we always choose
1089 # the one that leaves the least amount of stashed data after it's
1090 # executed.
1091 S = [(u.NetStashChange(), u.order, u) for u in self.transfers
1092 if not u.incoming]
1093 heapq.heapify(S)
1094
1095 while S:
1096 _, _, xf = heapq.heappop(S)
1097 L.append(xf)
1098 for u in xf.outgoing:
1099 del u.incoming[xf]
1100 if not u.incoming:
1101 heapq.heappush(S, (u.NetStashChange(), u.order, u))
1102
1103 # if this fails then our graph had a cycle.
1104 assert len(L) == len(self.transfers)
1105
1106 self.transfers = L
1107 for i, xf in enumerate(L):
1108 xf.order = i
1109
Doug Zongker62338182014-09-08 08:29:55 -07001110 def ReverseBackwardEdges(self):
Tao Bao3a2e3502016-12-28 09:14:39 -08001111 """Reverse unsatisfying edges and compute pairs of stashed blocks.
1112
1113 For each transfer, make sure it properly stashes the blocks it touches and
1114 will be used by later transfers. It uses pairs of (stash_raw_id, range) to
1115 record the blocks to be stashed. 'stash_raw_id' is an id that uniquely
1116 identifies each pair. Note that for the same range (e.g. RangeSet("1-5")),
1117 it is possible to have multiple pairs with different 'stash_raw_id's. Each
1118 'stash_raw_id' will be consumed by one transfer. In BBOTA v3+, identical
1119 blocks will be written to the same stash slot in WriteTransfers().
1120 """
1121
Tao Bao32fcdab2018-10-12 10:30:39 -07001122 logger.info("Reversing backward edges...")
Doug Zongker62338182014-09-08 08:29:55 -07001123 in_order = 0
1124 out_of_order = 0
Tao Bao3a2e3502016-12-28 09:14:39 -08001125 stash_raw_id = 0
Doug Zongker62338182014-09-08 08:29:55 -07001126 stash_size = 0
1127
1128 for xf in self.transfers:
Doug Zongker62338182014-09-08 08:29:55 -07001129 for u in xf.goes_before.copy():
1130 # xf should go before u
1131 if xf.order < u.order:
1132 # it does, hurray!
1133 in_order += 1
1134 else:
1135 # it doesn't, boo. modify u to stash the blocks that it
1136 # writes that xf wants to read, and then require u to go
1137 # before xf.
1138 out_of_order += 1
1139
1140 overlap = xf.src_ranges.intersect(u.tgt_ranges)
1141 assert overlap
1142
Tao Bao3a2e3502016-12-28 09:14:39 -08001143 u.stash_before.append((stash_raw_id, overlap))
1144 xf.use_stash.append((stash_raw_id, overlap))
1145 stash_raw_id += 1
Doug Zongker62338182014-09-08 08:29:55 -07001146 stash_size += overlap.size()
1147
1148 # reverse the edge direction; now xf must go after u
1149 del xf.goes_before[u]
1150 del u.goes_after[xf]
1151 xf.goes_after[u] = None # value doesn't matter
1152 u.goes_before[xf] = None
1153
Tao Bao32fcdab2018-10-12 10:30:39 -07001154 logger.info(
1155 " %d/%d dependencies (%.2f%%) were violated; %d source blocks "
1156 "stashed.", out_of_order, in_order + out_of_order,
1157 (out_of_order * 100.0 / (in_order + out_of_order)) if (
1158 in_order + out_of_order) else 0.0,
1159 stash_size)
Doug Zongker62338182014-09-08 08:29:55 -07001160
Doug Zongkerfc44a512014-08-26 13:10:25 -07001161 def FindVertexSequence(self):
Tao Bao32fcdab2018-10-12 10:30:39 -07001162 logger.info("Finding vertex sequence...")
Doug Zongkerfc44a512014-08-26 13:10:25 -07001163
1164 # This is based on "A Fast & Effective Heuristic for the Feedback
1165 # Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
1166 # it as starting with the digraph G and moving all the vertices to
1167 # be on a horizontal line in some order, trying to minimize the
1168 # number of edges that end up pointing to the left. Left-pointing
1169 # edges will get removed to turn the digraph into a DAG. In this
1170 # case each edge has a weight which is the number of source blocks
1171 # we'll lose if that edge is removed; we try to minimize the total
1172 # weight rather than just the number of edges.
1173
1174 # Make a copy of the edge set; this copy will get destroyed by the
1175 # algorithm.
1176 for xf in self.transfers:
1177 xf.incoming = xf.goes_after.copy()
1178 xf.outgoing = xf.goes_before.copy()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001179 xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())
Doug Zongkerfc44a512014-08-26 13:10:25 -07001180
1181 # We use an OrderedDict instead of just a set so that the output
1182 # is repeatable; otherwise it would depend on the hash values of
1183 # the transfer objects.
1184 G = OrderedDict()
1185 for xf in self.transfers:
1186 G[xf] = None
1187 s1 = deque() # the left side of the sequence, built from left to right
1188 s2 = deque() # the right side of the sequence, built from right to left
1189
Doug Zongker6ab2a502016-02-09 08:28:09 -08001190 heap = []
1191 for xf in self.transfers:
1192 xf.heap_item = HeapItem(xf)
1193 heap.append(xf.heap_item)
1194 heapq.heapify(heap)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001195
Tao Bao33482282016-10-24 16:49:08 -07001196 # Use OrderedDict() instead of set() to preserve the insertion order. Need
1197 # to use 'sinks[key] = None' to add key into the set. sinks will look like
1198 # { key1: None, key2: None, ... }.
1199 sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing)
1200 sources = OrderedDict.fromkeys(u for u in G if not u.incoming)
Doug Zongker6ab2a502016-02-09 08:28:09 -08001201
1202 def adjust_score(iu, delta):
1203 iu.score += delta
1204 iu.heap_item.clear()
1205 iu.heap_item = HeapItem(iu)
1206 heapq.heappush(heap, iu.heap_item)
1207
1208 while G:
Doug Zongkerfc44a512014-08-26 13:10:25 -07001209 # Put all sinks at the end of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001210 while sinks:
Tao Bao33482282016-10-24 16:49:08 -07001211 new_sinks = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001212 for u in sinks:
Tao Bao508b0872018-02-09 13:44:43 -08001213 if u not in G:
1214 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001215 s2.appendleft(u)
1216 del G[u]
1217 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001218 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001219 if not iu.outgoing:
1220 new_sinks[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001221 sinks = new_sinks
Doug Zongkerfc44a512014-08-26 13:10:25 -07001222
1223 # Put all the sources at the beginning of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001224 while sources:
Tao Bao33482282016-10-24 16:49:08 -07001225 new_sources = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001226 for u in sources:
Tao Bao508b0872018-02-09 13:44:43 -08001227 if u not in G:
1228 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001229 s1.append(u)
1230 del G[u]
1231 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001232 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001233 if not iu.incoming:
1234 new_sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001235 sources = new_sources
Doug Zongkerfc44a512014-08-26 13:10:25 -07001236
Tao Bao508b0872018-02-09 13:44:43 -08001237 if not G:
1238 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001239
1240 # Find the "best" vertex to put next. "Best" is the one that
1241 # maximizes the net difference in source blocks saved we get by
1242 # pretending it's a source rather than a sink.
1243
Doug Zongker6ab2a502016-02-09 08:28:09 -08001244 while True:
1245 u = heapq.heappop(heap)
1246 if u and u.item in G:
1247 u = u.item
1248 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001249
Doug Zongkerfc44a512014-08-26 13:10:25 -07001250 s1.append(u)
1251 del G[u]
1252 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001253 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001254 if not iu.incoming:
1255 sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001256
Doug Zongkerfc44a512014-08-26 13:10:25 -07001257 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001258 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001259 if not iu.outgoing:
1260 sinks[iu] = None
Doug Zongkerfc44a512014-08-26 13:10:25 -07001261
1262 # Now record the sequence in the 'order' field of each transfer,
1263 # and by rearranging self.transfers to be in the chosen sequence.
1264
1265 new_transfers = []
1266 for x in itertools.chain(s1, s2):
1267 x.order = len(new_transfers)
1268 new_transfers.append(x)
1269 del x.incoming
1270 del x.outgoing
1271
1272 self.transfers = new_transfers
1273
1274 def GenerateDigraph(self):
Tao Bao32fcdab2018-10-12 10:30:39 -07001275 logger.info("Generating digraph...")
Doug Zongker6ab2a502016-02-09 08:28:09 -08001276
1277 # Each item of source_ranges will be:
1278 # - None, if that block is not used as a source,
Tao Bao33482282016-10-24 16:49:08 -07001279 # - an ordered set of transfers.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001280 source_ranges = []
1281 for b in self.transfers:
1282 for s, e in b.src_ranges:
1283 if e > len(source_ranges):
1284 source_ranges.extend([None] * (e-len(source_ranges)))
1285 for i in range(s, e):
1286 if source_ranges[i] is None:
Tao Bao33482282016-10-24 16:49:08 -07001287 source_ranges[i] = OrderedDict.fromkeys([b])
Doug Zongker6ab2a502016-02-09 08:28:09 -08001288 else:
Tao Bao33482282016-10-24 16:49:08 -07001289 source_ranges[i][b] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001290
Doug Zongkerfc44a512014-08-26 13:10:25 -07001291 for a in self.transfers:
Tao Bao33482282016-10-24 16:49:08 -07001292 intersections = OrderedDict()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001293 for s, e in a.tgt_ranges:
1294 for i in range(s, e):
Tao Bao508b0872018-02-09 13:44:43 -08001295 if i >= len(source_ranges):
1296 break
Tao Bao33482282016-10-24 16:49:08 -07001297 # Add all the Transfers in source_ranges[i] to the (ordered) set.
1298 if source_ranges[i] is not None:
1299 for j in source_ranges[i]:
1300 intersections[j] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001301
1302 for b in intersections:
Tao Bao508b0872018-02-09 13:44:43 -08001303 if a is b:
1304 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001305
1306 # If the blocks written by A are read by B, then B needs to go before A.
1307 i = a.tgt_ranges.intersect(b.src_ranges)
1308 if i:
Doug Zongkerab7ca1d2014-08-26 10:40:28 -07001309 if b.src_name == "__ZERO":
1310 # the cost of removing source blocks for the __ZERO domain
1311 # is (nearly) zero.
1312 size = 0
1313 else:
1314 size = i.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001315 b.goes_before[a] = size
1316 a.goes_after[b] = size
1317
xunchang21531832018-12-06 14:20:05 -08001318 def ComputePatchesForInputList(self, diff_queue, compress_target):
1319 """Returns a list of patch information for the input list of transfers.
1320
1321 Args:
1322 diff_queue: a list of transfers with style 'diff'
1323 compress_target: If True, compresses the target ranges of each
1324 transfers; and save the size.
1325
1326 Returns:
1327 A list of (transfer order, patch_info, compressed_size) tuples.
1328 """
1329
1330 if not diff_queue:
1331 return []
1332
1333 if self.threads > 1:
1334 logger.info("Computing patches (using %d threads)...", self.threads)
1335 else:
1336 logger.info("Computing patches...")
1337
1338 diff_total = len(diff_queue)
1339 patches = [None] * diff_total
1340 error_messages = []
1341
1342 # Using multiprocessing doesn't give additional benefits, due to the
1343 # pattern of the code. The diffing work is done by subprocess.call, which
1344 # already runs in a separate process (not affected much by the GIL -
1345 # Global Interpreter Lock). Using multiprocess also requires either a)
1346 # writing the diff input files in the main process before forking, or b)
1347 # reopening the image file (SparseImage) in the worker processes. Doing
1348 # neither of them further improves the performance.
1349 lock = threading.Lock()
1350
1351 def diff_worker():
1352 while True:
1353 with lock:
1354 if not diff_queue:
1355 return
1356 xf_index, imgdiff, patch_index = diff_queue.pop()
1357 xf = self.transfers[xf_index]
1358
1359 message = []
1360 compressed_size = None
1361
1362 patch_info = xf.patch_info
1363 if not patch_info:
1364 src_file = common.MakeTempFile(prefix="src-")
1365 with open(src_file, "wb") as fd:
1366 self.src.WriteRangeDataToFd(xf.src_ranges, fd)
1367
1368 tgt_file = common.MakeTempFile(prefix="tgt-")
1369 with open(tgt_file, "wb") as fd:
1370 self.tgt.WriteRangeDataToFd(xf.tgt_ranges, fd)
1371
1372 try:
1373 patch_info = compute_patch(src_file, tgt_file, imgdiff)
1374 except ValueError as e:
1375 message.append(
1376 "Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (
1377 "imgdiff" if imgdiff else "bsdiff",
1378 xf.tgt_name if xf.tgt_name == xf.src_name else
1379 xf.tgt_name + " (from " + xf.src_name + ")",
1380 xf.tgt_ranges, xf.src_ranges, e.message))
1381
1382 if compress_target:
1383 tgt_data = self.tgt.ReadRangeSet(xf.tgt_ranges)
1384 try:
1385 # Compresses with the default level
1386 compress_obj = zlib.compressobj(6, zlib.DEFLATED, -zlib.MAX_WBITS)
1387 compressed_data = (compress_obj.compress("".join(tgt_data))
1388 + compress_obj.flush())
1389 compressed_size = len(compressed_data)
1390 except zlib.error as e:
1391 message.append(
1392 "Failed to compress the data in target range {} for {}:\n"
1393 "{}".format(xf.tgt_ranges, xf.tgt_name, e.message))
1394
1395 if message:
1396 with lock:
1397 error_messages.extend(message)
1398
1399 with lock:
1400 patches[patch_index] = (xf_index, patch_info, compressed_size)
1401
1402 threads = [threading.Thread(target=diff_worker)
1403 for _ in range(self.threads)]
1404 for th in threads:
1405 th.start()
1406 while threads:
1407 threads.pop().join()
1408
1409 if error_messages:
1410 logger.error('ERROR:')
1411 logger.error('\n'.join(error_messages))
1412 logger.error('\n\n\n')
1413 sys.exit(1)
1414
1415 return patches
1416
xunchangb6105dc2018-12-06 16:39:46 -08001417 def SelectAndConvertDiffTransfersToNew(self, violated_stash_blocks):
xunchang3df4d5e2018-12-06 15:03:45 -08001418 """Converts the diff transfers to reduce the max simultaneous stash.
1419
1420 Since the 'new' data is compressed with deflate, we can select the 'diff'
1421 transfers for conversion by comparing its patch size with the size of the
1422 compressed data. Ideally, we want to convert the transfers with a small
1423 size increase, but using a large number of stashed blocks.
1424 """
xunchangb6105dc2018-12-06 16:39:46 -08001425 TransferSizeScore = namedtuple("TransferSizeScore",
1426 "xf, used_stash_blocks, score")
xunchang3df4d5e2018-12-06 15:03:45 -08001427
1428 logger.info("Selecting diff commands to convert to new.")
1429 diff_queue = []
1430 for xf in self.transfers:
1431 if xf.style == "diff" and xf.src_sha1 != xf.tgt_sha1:
1432 use_imgdiff = self.CanUseImgdiff(xf.tgt_name, xf.tgt_ranges,
1433 xf.src_ranges)
1434 diff_queue.append((xf.order, use_imgdiff, len(diff_queue)))
1435
1436 # Remove the 'move' transfers, and compute the patch & compressed size
1437 # for the remaining.
1438 result = self.ComputePatchesForInputList(diff_queue, True)
1439
xunchangb6105dc2018-12-06 16:39:46 -08001440 conversion_candidates = []
xunchang3df4d5e2018-12-06 15:03:45 -08001441 for xf_index, patch_info, compressed_size in result:
1442 xf = self.transfers[xf_index]
1443 if not xf.patch_info:
1444 xf.patch_info = patch_info
1445
1446 size_ratio = len(xf.patch_info.content) * 100.0 / compressed_size
1447 diff_style = "imgdiff" if xf.patch_info.imgdiff else "bsdiff"
xunchangb6105dc2018-12-06 16:39:46 -08001448 logger.info("%s, target size: %d blocks, style: %s, patch size: %d,"
xunchang3df4d5e2018-12-06 15:03:45 -08001449 " compression_size: %d, ratio %.2f%%", xf.tgt_name,
1450 xf.tgt_ranges.size(), diff_style,
1451 len(xf.patch_info.content), compressed_size, size_ratio)
1452
xunchangb6105dc2018-12-06 16:39:46 -08001453 used_stash_blocks = sum(sr.size() for _, sr in xf.use_stash)
xunchang3df4d5e2018-12-06 15:03:45 -08001454 # Convert the transfer to new if the compressed size is smaller or equal.
1455 # We don't need to maintain the stash_before lists here because the
1456 # graph will be regenerated later.
1457 if len(xf.patch_info.content) >= compressed_size:
xunchangb6105dc2018-12-06 16:39:46 -08001458 # Add the transfer to the candidate list with negative score. And it
1459 # will be converted later.
1460 conversion_candidates.append(TransferSizeScore(xf, used_stash_blocks,
1461 -1))
1462 elif used_stash_blocks > 0:
1463 # This heuristic represents the size increase in the final package to
1464 # remove per unit of stashed data.
1465 score = ((compressed_size - len(xf.patch_info.content)) * 100.0
1466 / used_stash_blocks)
1467 conversion_candidates.append(TransferSizeScore(xf, used_stash_blocks,
1468 score))
1469 # Transfers with lower score (i.e. less expensive to convert) will be
1470 # converted first.
1471 conversion_candidates.sort(key=lambda x: x.score)
xunchang3df4d5e2018-12-06 15:03:45 -08001472
xunchangb6105dc2018-12-06 16:39:46 -08001473 # TODO(xunchang), improve the logic to find the transfers to convert, e.g.
1474 # convert the ones that contribute to the max stash, run ReviseStashSize
1475 # multiple times etc.
1476 removed_stashed_blocks = 0
1477 for xf, used_stash_blocks, _ in conversion_candidates:
1478 logger.info("Converting %s to new", xf.tgt_name)
1479 xf.ConvertToNew()
1480 removed_stashed_blocks += used_stash_blocks
1481 # Experiments show that we will get a smaller package size if we remove
1482 # slightly more stashed blocks than the violated stash blocks.
1483 if removed_stashed_blocks >= violated_stash_blocks:
1484 break
xunchang3df4d5e2018-12-06 15:03:45 -08001485
1486 logger.info("Removed %d stashed blocks", removed_stashed_blocks)
1487
Doug Zongkerfc44a512014-08-26 13:10:25 -07001488 def FindTransfers(self):
Tao Bao9a5caf22015-08-25 15:10:10 -07001489 """Parse the file_map to generate all the transfers."""
1490
Tianjie Xu41390d42017-11-22 11:35:18 -08001491 def AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
1492 src_ranges, style, by_id):
1493 """Add one or multiple Transfer()s by splitting large files.
1494
1495 For BBOTA v3, we need to stash source blocks for resumable feature.
1496 However, with the growth of file size and the shrink of the cache
1497 partition source blocks are too large to be stashed. If a file occupies
1498 too many blocks, we split it into smaller pieces by getting multiple
1499 Transfer()s.
1500
1501 The downside is that after splitting, we may increase the package size
1502 since the split pieces don't align well. According to our experiments,
1503 1/8 of the cache size as the per-piece limit appears to be optimal.
1504 Compared to the fixed 1024-block limit, it reduces the overall package
1505 size by 30% for volantis, and 20% for angler and bullhead."""
1506
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001507 pieces = 0
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001508 while (tgt_ranges.size() > max_blocks_per_transfer and
1509 src_ranges.size() > max_blocks_per_transfer):
Tao Bao9a5caf22015-08-25 15:10:10 -07001510 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1511 src_split_name = "%s-%d" % (src_name, pieces)
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001512 tgt_first = tgt_ranges.first(max_blocks_per_transfer)
1513 src_first = src_ranges.first(max_blocks_per_transfer)
1514
Tao Bao183e56e2017-03-05 17:05:09 -08001515 Transfer(tgt_split_name, src_split_name, tgt_first, src_first,
1516 self.tgt.RangeSha1(tgt_first), self.src.RangeSha1(src_first),
1517 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001518
1519 tgt_ranges = tgt_ranges.subtract(tgt_first)
1520 src_ranges = src_ranges.subtract(src_first)
1521 pieces += 1
1522
1523 # Handle remaining blocks.
1524 if tgt_ranges.size() or src_ranges.size():
1525 # Must be both non-empty.
1526 assert tgt_ranges.size() and src_ranges.size()
1527 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1528 src_split_name = "%s-%d" % (src_name, pieces)
Tao Bao183e56e2017-03-05 17:05:09 -08001529 Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges,
1530 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1531 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001532
Tianjie Xu41390d42017-11-22 11:35:18 -08001533 def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges, style,
1534 by_id):
1535 """Find all the zip files and split the others with a fixed chunk size.
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001536
Tianjie Xu41390d42017-11-22 11:35:18 -08001537 This function will construct a list of zip archives, which will later be
1538 split by imgdiff to reduce the final patch size. For the other files,
1539 we will plainly split them based on a fixed chunk size with the potential
1540 patch size penalty.
1541 """
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001542
1543 assert style == "diff"
1544
1545 # Change nothing for small files.
1546 if (tgt_ranges.size() <= max_blocks_per_transfer and
1547 src_ranges.size() <= max_blocks_per_transfer):
1548 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1549 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1550 style, by_id)
1551 return
1552
Tao Baocb73aed2018-01-31 17:32:40 -08001553 # Split large APKs with imgdiff, if possible. We're intentionally checking
1554 # file types one more time (CanUseImgdiff() checks that as well), before
1555 # calling the costly RangeSha1()s.
1556 if (self.FileTypeSupportedByImgdiff(tgt_name) and
1557 self.tgt.RangeSha1(tgt_ranges) != self.src.RangeSha1(src_ranges)):
Tao Bao294651d2018-02-08 23:21:52 -08001558 if self.CanUseImgdiff(tgt_name, tgt_ranges, src_ranges, True):
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001559 large_apks.append((tgt_name, src_name, tgt_ranges, src_ranges))
1560 return
1561
Tianjie Xu41390d42017-11-22 11:35:18 -08001562 AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
1563 src_ranges, style, by_id)
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001564
Tao Bao08c85832016-09-19 22:26:30 -07001565 def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
1566 split=False):
1567 """Wrapper function for adding a Transfer()."""
1568
1569 # We specialize diff transfers only (which covers bsdiff/imgdiff/move);
1570 # otherwise add the Transfer() as is.
1571 if style != "diff" or not split:
Tao Bao183e56e2017-03-05 17:05:09 -08001572 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1573 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1574 style, by_id)
Tao Bao08c85832016-09-19 22:26:30 -07001575 return
1576
1577 # Handle .odex files specially to analyze the block-wise difference. If
1578 # most of the blocks are identical with only few changes (e.g. header),
1579 # we will patch the changed blocks only. This avoids stashing unchanged
1580 # blocks while patching. We limit the analysis to files without size
1581 # changes only. This is to avoid sacrificing the OTA generation cost too
1582 # much.
1583 if (tgt_name.split(".")[-1].lower() == 'odex' and
1584 tgt_ranges.size() == src_ranges.size()):
1585
1586 # 0.5 threshold can be further tuned. The tradeoff is: if only very
1587 # few blocks remain identical, we lose the opportunity to use imgdiff
1588 # that may have better compression ratio than bsdiff.
1589 crop_threshold = 0.5
1590
1591 tgt_skipped = RangeSet()
1592 src_skipped = RangeSet()
1593 tgt_size = tgt_ranges.size()
1594 tgt_changed = 0
1595 for src_block, tgt_block in zip(src_ranges.next_item(),
1596 tgt_ranges.next_item()):
1597 src_rs = RangeSet(str(src_block))
1598 tgt_rs = RangeSet(str(tgt_block))
1599 if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):
1600 tgt_skipped = tgt_skipped.union(tgt_rs)
1601 src_skipped = src_skipped.union(src_rs)
1602 else:
1603 tgt_changed += tgt_rs.size()
1604
1605 # Terminate early if no clear sign of benefits.
1606 if tgt_changed > tgt_size * crop_threshold:
1607 break
1608
1609 if tgt_changed < tgt_size * crop_threshold:
1610 assert tgt_changed + tgt_skipped.size() == tgt_size
Tao Bao32fcdab2018-10-12 10:30:39 -07001611 logger.info(
1612 '%10d %10d (%6.2f%%) %s', tgt_skipped.size(), tgt_size,
1613 tgt_skipped.size() * 100.0 / tgt_size, tgt_name)
Tianjie Xu41390d42017-11-22 11:35:18 -08001614 AddSplitTransfers(
Tao Bao08c85832016-09-19 22:26:30 -07001615 "%s-skipped" % (tgt_name,),
1616 "%s-skipped" % (src_name,),
1617 tgt_skipped, src_skipped, style, by_id)
1618
1619 # Intentionally change the file extension to avoid being imgdiff'd as
1620 # the files are no longer in their original format.
1621 tgt_name = "%s-cropped" % (tgt_name,)
1622 src_name = "%s-cropped" % (src_name,)
1623 tgt_ranges = tgt_ranges.subtract(tgt_skipped)
1624 src_ranges = src_ranges.subtract(src_skipped)
1625
1626 # Possibly having no changed blocks.
1627 if not tgt_ranges:
1628 return
1629
1630 # Add the transfer(s).
Tianjie Xu41390d42017-11-22 11:35:18 -08001631 AddSplitTransfers(
Tao Bao08c85832016-09-19 22:26:30 -07001632 tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1633
Tianjie Xu25366072017-09-08 17:19:02 -07001634 def ParseAndValidateSplitInfo(patch_size, tgt_ranges, src_ranges,
1635 split_info):
1636 """Parse the split_info and return a list of info tuples.
1637
1638 Args:
1639 patch_size: total size of the patch file.
1640 tgt_ranges: Ranges of the target file within the original image.
1641 src_ranges: Ranges of the source file within the original image.
1642 split_info format:
1643 imgdiff version#
1644 count of pieces
1645 <patch_size_1> <tgt_size_1> <src_ranges_1>
1646 ...
1647 <patch_size_n> <tgt_size_n> <src_ranges_n>
1648
1649 Returns:
1650 [patch_start, patch_len, split_tgt_ranges, split_src_ranges]
1651 """
1652
1653 version = int(split_info[0])
1654 assert version == 2
1655 count = int(split_info[1])
1656 assert len(split_info) - 2 == count
1657
1658 split_info_list = []
1659 patch_start = 0
1660 tgt_remain = copy.deepcopy(tgt_ranges)
1661 # each line has the format <patch_size>, <tgt_size>, <src_ranges>
1662 for line in split_info[2:]:
1663 info = line.split()
1664 assert len(info) == 3
1665 patch_length = int(info[0])
1666
1667 split_tgt_size = int(info[1])
1668 assert split_tgt_size % 4096 == 0
1669 assert split_tgt_size / 4096 <= tgt_remain.size()
1670 split_tgt_ranges = tgt_remain.first(split_tgt_size / 4096)
1671 tgt_remain = tgt_remain.subtract(split_tgt_ranges)
1672
1673 # Find the split_src_ranges within the image file from its relative
1674 # position in file.
1675 split_src_indices = RangeSet.parse_raw(info[2])
1676 split_src_ranges = RangeSet()
1677 for r in split_src_indices:
1678 curr_range = src_ranges.first(r[1]).subtract(src_ranges.first(r[0]))
1679 assert not split_src_ranges.overlaps(curr_range)
1680 split_src_ranges = split_src_ranges.union(curr_range)
1681
1682 split_info_list.append((patch_start, patch_length,
1683 split_tgt_ranges, split_src_ranges))
1684 patch_start += patch_length
1685
1686 # Check that the sizes of all the split pieces add up to the final file
1687 # size for patch and target.
1688 assert tgt_remain.size() == 0
1689 assert patch_start == patch_size
1690 return split_info_list
1691
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001692 def SplitLargeApks():
1693 """Split the large apks files.
Tianjie Xu25366072017-09-08 17:19:02 -07001694
1695 Example: Chrome.apk will be split into
1696 src-0: Chrome.apk-0, tgt-0: Chrome.apk-0
1697 src-1: Chrome.apk-1, tgt-1: Chrome.apk-1
1698 ...
1699
1700 After the split, the target pieces are continuous and block aligned; and
1701 the source pieces are mutually exclusive. During the split, we also
1702 generate and save the image patch between src-X & tgt-X. This patch will
1703 be valid because the block ranges of src-X & tgt-X will always stay the
1704 same afterwards; but there's a chance we don't use the patch if we
1705 convert the "diff" command into "new" or "move" later.
1706 """
1707
1708 while True:
1709 with transfer_lock:
1710 if not large_apks:
1711 return
1712 tgt_name, src_name, tgt_ranges, src_ranges = large_apks.pop(0)
1713
1714 src_file = common.MakeTempFile(prefix="src-")
1715 tgt_file = common.MakeTempFile(prefix="tgt-")
Tianjie Xudf1166e2018-01-27 17:35:41 -08001716 with open(src_file, "wb") as src_fd:
1717 self.src.WriteRangeDataToFd(src_ranges, src_fd)
1718 with open(tgt_file, "wb") as tgt_fd:
1719 self.tgt.WriteRangeDataToFd(tgt_ranges, tgt_fd)
Tianjie Xu25366072017-09-08 17:19:02 -07001720
1721 patch_file = common.MakeTempFile(prefix="patch-")
1722 patch_info_file = common.MakeTempFile(prefix="split_info-")
1723 cmd = ["imgdiff", "-z",
1724 "--block-limit={}".format(max_blocks_per_transfer),
1725 "--split-info=" + patch_info_file,
1726 src_file, tgt_file, patch_file]
Tao Bao73dd4f42018-10-04 16:25:33 -07001727 proc = common.Run(cmd)
1728 imgdiff_output, _ = proc.communicate()
1729 assert proc.returncode == 0, \
Tao Bao4ccea852018-02-06 15:16:41 -08001730 "Failed to create imgdiff patch between {} and {}:\n{}".format(
1731 src_name, tgt_name, imgdiff_output)
Tianjie Xu25366072017-09-08 17:19:02 -07001732
1733 with open(patch_info_file) as patch_info:
1734 lines = patch_info.readlines()
1735
1736 patch_size_total = os.path.getsize(patch_file)
1737 split_info_list = ParseAndValidateSplitInfo(patch_size_total,
1738 tgt_ranges, src_ranges,
1739 lines)
1740 for index, (patch_start, patch_length, split_tgt_ranges,
Tao Bao508b0872018-02-09 13:44:43 -08001741 split_src_ranges) in enumerate(split_info_list):
Tianjie Xu25366072017-09-08 17:19:02 -07001742 with open(patch_file) as f:
1743 f.seek(patch_start)
1744 patch_content = f.read(patch_length)
1745
1746 split_src_name = "{}-{}".format(src_name, index)
1747 split_tgt_name = "{}-{}".format(tgt_name, index)
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001748 split_large_apks.append((split_tgt_name,
1749 split_src_name,
1750 split_tgt_ranges,
1751 split_src_ranges,
1752 patch_content))
Tianjie Xu25366072017-09-08 17:19:02 -07001753
Tao Bao32fcdab2018-10-12 10:30:39 -07001754 logger.info("Finding transfers...")
Tao Bao08c85832016-09-19 22:26:30 -07001755
Tianjie Xu25366072017-09-08 17:19:02 -07001756 large_apks = []
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001757 split_large_apks = []
Tianjie Xu25366072017-09-08 17:19:02 -07001758 cache_size = common.OPTIONS.cache_size
1759 split_threshold = 0.125
1760 max_blocks_per_transfer = int(cache_size * split_threshold /
1761 self.tgt.blocksize)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001762 empty = RangeSet()
Tianjie Xu20a86cd2018-01-12 12:21:00 -08001763 for tgt_fn, tgt_ranges in sorted(self.tgt.file_map.items()):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001764 if tgt_fn == "__ZERO":
1765 # the special "__ZERO" domain is all the blocks not contained
1766 # in any file and that are filled with zeros. We have a
1767 # special transfer style for zero blocks.
1768 src_ranges = self.src.file_map.get("__ZERO", empty)
Tao Bao9a5caf22015-08-25 15:10:10 -07001769 AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
1770 "zero", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001771 continue
1772
Tao Baoff777812015-05-12 11:42:31 -07001773 elif tgt_fn == "__COPY":
1774 # "__COPY" domain includes all the blocks not contained in any
1775 # file and that need to be copied unconditionally to the target.
Tao Bao9a5caf22015-08-25 15:10:10 -07001776 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Tao Baoff777812015-05-12 11:42:31 -07001777 continue
1778
Tianjie Xu67c7cbb2018-08-30 00:32:07 -07001779 elif tgt_fn == "__HASHTREE":
1780 continue
1781
Doug Zongkerfc44a512014-08-26 13:10:25 -07001782 elif tgt_fn in self.src.file_map:
1783 # Look for an exact pathname match in the source.
Tao Bao9a5caf22015-08-25 15:10:10 -07001784 AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001785 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001786 continue
1787
1788 b = os.path.basename(tgt_fn)
1789 if b in self.src_basenames:
1790 # Look for an exact basename match in the source.
1791 src_fn = self.src_basenames[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001792 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001793 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001794 continue
1795
1796 b = re.sub("[0-9]+", "#", b)
1797 if b in self.src_numpatterns:
1798 # Look for a 'number pattern' match (a basename match after
1799 # all runs of digits are replaced by "#"). (This is useful
1800 # for .so files that contain version numbers in the filename
1801 # that get bumped.)
1802 src_fn = self.src_numpatterns[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001803 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001804 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001805 continue
1806
Tao Bao9a5caf22015-08-25 15:10:10 -07001807 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001808
Tianjie Xu25366072017-09-08 17:19:02 -07001809 transfer_lock = threading.Lock()
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001810 threads = [threading.Thread(target=SplitLargeApks)
Tianjie Xu25366072017-09-08 17:19:02 -07001811 for _ in range(self.threads)]
1812 for th in threads:
1813 th.start()
1814 while threads:
1815 threads.pop().join()
1816
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001817 # Sort the split transfers for large apks to generate a determinate package.
1818 split_large_apks.sort()
1819 for (tgt_name, src_name, tgt_ranges, src_ranges,
1820 patch) in split_large_apks:
1821 transfer_split = Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1822 self.tgt.RangeSha1(tgt_ranges),
1823 self.src.RangeSha1(src_ranges),
1824 "diff", self.transfers)
xunchang21531832018-12-06 14:20:05 -08001825 transfer_split.patch_info = PatchInfo(True, patch)
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001826
Doug Zongkerfc44a512014-08-26 13:10:25 -07001827 def AbbreviateSourceNames(self):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001828 for k in self.src.file_map.keys():
1829 b = os.path.basename(k)
1830 self.src_basenames[b] = k
1831 b = re.sub("[0-9]+", "#", b)
1832 self.src_numpatterns[b] = k
1833
1834 @staticmethod
1835 def AssertPartition(total, seq):
1836 """Assert that all the RangeSets in 'seq' form a partition of the
1837 'total' RangeSet (ie, they are nonintersecting and their union
1838 equals 'total')."""
Doug Zongker6ab2a502016-02-09 08:28:09 -08001839
Doug Zongkerfc44a512014-08-26 13:10:25 -07001840 so_far = RangeSet()
1841 for i in seq:
1842 assert not so_far.overlaps(i)
1843 so_far = so_far.union(i)
1844 assert so_far == total