blob: b23eef112c1f3e99485f1b1a9b1ea5ecbc30ed6a [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015from __future__ import print_function
16
Doug Zongker6ab2a502016-02-09 08:28:09 -080017import array
Tianjie Xu25366072017-09-08 17:19:02 -070018import copy
Doug Zongker6ab2a502016-02-09 08:28:09 -080019import functools
Doug Zongker62338182014-09-08 08:29:55 -070020import heapq
Doug Zongkerfc44a512014-08-26 13:10:25 -070021import itertools
Tao Bao32fcdab2018-10-12 10:30:39 -070022import logging
Doug Zongkerfc44a512014-08-26 13:10:25 -070023import multiprocessing
24import os
Tao Bao3a2e3502016-12-28 09:14:39 -080025import os.path
Doug Zongkerfc44a512014-08-26 13:10:25 -070026import re
Tao Bao183e56e2017-03-05 17:05:09 -080027import sys
Doug Zongkerfc44a512014-08-26 13:10:25 -070028import threading
xunchang21531832018-12-06 14:20:05 -080029import zlib
30from collections import deque, namedtuple, OrderedDict
Tao Bao3a2e3502016-12-28 09:14:39 -080031from hashlib import sha1
Tao Bao508b0872018-02-09 13:44:43 -080032
33import common
Dan Albert8b72aef2015-03-23 19:13:21 -070034from rangelib import RangeSet
35
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070036__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
37
Tao Bao32fcdab2018-10-12 10:30:39 -070038logger = logging.getLogger(__name__)
39
xunchang21531832018-12-06 14:20:05 -080040# The tuple contains the style and bytes of a bsdiff|imgdiff patch.
41PatchInfo = namedtuple("PatchInfo", ["imgdiff", "content"])
42
Dan Albert8b72aef2015-03-23 19:13:21 -070043
Tao Bao183e56e2017-03-05 17:05:09 -080044def compute_patch(srcfile, tgtfile, imgdiff=False):
xunchang21531832018-12-06 14:20:05 -080045 """Calls bsdiff|imgdiff to compute the patch data, returns a PatchInfo."""
Tianjie Xub59c17f2016-10-28 17:55:53 -070046 patchfile = common.MakeTempFile(prefix='patch-')
Doug Zongkerfc44a512014-08-26 13:10:25 -070047
Tianjie Xub59c17f2016-10-28 17:55:53 -070048 cmd = ['imgdiff', '-z'] if imgdiff else ['bsdiff']
49 cmd.extend([srcfile, tgtfile, patchfile])
Doug Zongkerfc44a512014-08-26 13:10:25 -070050
Tao Bao39451582017-05-04 11:10:47 -070051 # Don't dump the bsdiff/imgdiff commands, which are not useful for the case
52 # here, since they contain temp filenames only.
Tao Bao73dd4f42018-10-04 16:25:33 -070053 proc = common.Run(cmd, verbose=False)
54 output, _ = proc.communicate()
Doug Zongkerfc44a512014-08-26 13:10:25 -070055
Tao Bao73dd4f42018-10-04 16:25:33 -070056 if proc.returncode != 0:
Tianjie Xub59c17f2016-10-28 17:55:53 -070057 raise ValueError(output)
58
59 with open(patchfile, 'rb') as f:
xunchang21531832018-12-06 14:20:05 -080060 return PatchInfo(imgdiff, f.read())
Doug Zongkerfc44a512014-08-26 13:10:25 -070061
Dan Albert8b72aef2015-03-23 19:13:21 -070062
63class Image(object):
Tao Bao183e56e2017-03-05 17:05:09 -080064 def RangeSha1(self, ranges):
65 raise NotImplementedError
66
Dan Albert8b72aef2015-03-23 19:13:21 -070067 def ReadRangeSet(self, ranges):
68 raise NotImplementedError
69
Tao Bao68658c02015-06-01 13:40:49 -070070 def TotalSha1(self, include_clobbered_blocks=False):
Dan Albert8b72aef2015-03-23 19:13:21 -070071 raise NotImplementedError
72
Tao Bao183e56e2017-03-05 17:05:09 -080073 def WriteRangeDataToFd(self, ranges, fd):
74 raise NotImplementedError
75
Dan Albert8b72aef2015-03-23 19:13:21 -070076
77class EmptyImage(Image):
Doug Zongkerfc44a512014-08-26 13:10:25 -070078 """A zero-length image."""
Tao Bao183e56e2017-03-05 17:05:09 -080079
80 def __init__(self):
81 self.blocksize = 4096
82 self.care_map = RangeSet()
83 self.clobbered_blocks = RangeSet()
84 self.extended = RangeSet()
85 self.total_blocks = 0
86 self.file_map = {}
Yifan Hongbb2658d2019-01-25 12:30:58 -080087 self.hashtree_info = None
Tao Bao183e56e2017-03-05 17:05:09 -080088
89 def RangeSha1(self, ranges):
90 return sha1().hexdigest()
91
Doug Zongkerfc44a512014-08-26 13:10:25 -070092 def ReadRangeSet(self, ranges):
93 return ()
Tao Bao183e56e2017-03-05 17:05:09 -080094
Tao Bao68658c02015-06-01 13:40:49 -070095 def TotalSha1(self, include_clobbered_blocks=False):
96 # EmptyImage always carries empty clobbered_blocks, so
97 # include_clobbered_blocks can be ignored.
98 assert self.clobbered_blocks.size() == 0
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070099 return sha1().hexdigest()
100
Tao Bao183e56e2017-03-05 17:05:09 -0800101 def WriteRangeDataToFd(self, ranges, fd):
102 raise ValueError("Can't write data from EmptyImage to file")
103
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700104
Dan Albert8b72aef2015-03-23 19:13:21 -0700105class DataImage(Image):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700106 """An image wrapped around a single string of data."""
107
108 def __init__(self, data, trim=False, pad=False):
109 self.data = data
110 self.blocksize = 4096
111
112 assert not (trim and pad)
113
114 partial = len(self.data) % self.blocksize
Tao Bao7589e962015-09-05 20:35:32 -0700115 padded = False
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700116 if partial > 0:
117 if trim:
118 self.data = self.data[:-partial]
119 elif pad:
120 self.data += '\0' * (self.blocksize - partial)
Tao Bao7589e962015-09-05 20:35:32 -0700121 padded = True
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700122 else:
123 raise ValueError(("data for DataImage must be multiple of %d bytes "
124 "unless trim or pad is specified") %
125 (self.blocksize,))
126
127 assert len(self.data) % self.blocksize == 0
128
129 self.total_blocks = len(self.data) / self.blocksize
130 self.care_map = RangeSet(data=(0, self.total_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700131 # When the last block is padded, we always write the whole block even for
132 # incremental OTAs. Because otherwise the last block may get skipped if
133 # unchanged for an incremental, but would fail the post-install
134 # verification if it has non-zero contents in the padding bytes.
135 # Bug: 23828506
136 if padded:
Tao Bao42206c32015-09-08 13:39:40 -0700137 clobbered_blocks = [self.total_blocks-1, self.total_blocks]
Tao Bao7589e962015-09-05 20:35:32 -0700138 else:
Tao Bao42206c32015-09-08 13:39:40 -0700139 clobbered_blocks = []
140 self.clobbered_blocks = clobbered_blocks
Tao Baoe9b61912015-07-09 17:37:49 -0700141 self.extended = RangeSet()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700142
143 zero_blocks = []
144 nonzero_blocks = []
145 reference = '\0' * self.blocksize
146
Tao Bao7589e962015-09-05 20:35:32 -0700147 for i in range(self.total_blocks-1 if padded else self.total_blocks):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700148 d = self.data[i*self.blocksize : (i+1)*self.blocksize]
149 if d == reference:
150 zero_blocks.append(i)
151 zero_blocks.append(i+1)
152 else:
153 nonzero_blocks.append(i)
154 nonzero_blocks.append(i+1)
155
Tao Bao42206c32015-09-08 13:39:40 -0700156 assert zero_blocks or nonzero_blocks or clobbered_blocks
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700157
Tao Bao42206c32015-09-08 13:39:40 -0700158 self.file_map = dict()
159 if zero_blocks:
160 self.file_map["__ZERO"] = RangeSet(data=zero_blocks)
161 if nonzero_blocks:
162 self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)
163 if clobbered_blocks:
164 self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)
Tao Bao7589e962015-09-05 20:35:32 -0700165
Tao Bao183e56e2017-03-05 17:05:09 -0800166 def _GetRangeData(self, ranges):
167 for s, e in ranges:
168 yield self.data[s*self.blocksize:e*self.blocksize]
169
170 def RangeSha1(self, ranges):
171 h = sha1()
Tao Bao76def242017-11-21 09:25:31 -0800172 for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable
Tao Bao183e56e2017-03-05 17:05:09 -0800173 h.update(data)
174 return h.hexdigest()
175
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700176 def ReadRangeSet(self, ranges):
Yifan Hong6f3eaeb2019-04-09 16:49:33 -0700177 return list(self._GetRangeData(ranges))
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700178
Tao Bao68658c02015-06-01 13:40:49 -0700179 def TotalSha1(self, include_clobbered_blocks=False):
Tao Bao7589e962015-09-05 20:35:32 -0700180 if not include_clobbered_blocks:
Tao Bao183e56e2017-03-05 17:05:09 -0800181 return self.RangeSha1(self.care_map.subtract(self.clobbered_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700182 else:
183 return sha1(self.data).hexdigest()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700184
Tao Bao183e56e2017-03-05 17:05:09 -0800185 def WriteRangeDataToFd(self, ranges, fd):
Tao Bao76def242017-11-21 09:25:31 -0800186 for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable
Tao Bao183e56e2017-03-05 17:05:09 -0800187 fd.write(data)
188
Doug Zongkerfc44a512014-08-26 13:10:25 -0700189
Yifan Hong8a66a712019-04-04 15:37:57 -0700190class FileImage(Image):
191 """An image wrapped around a raw image file."""
192
193 def __init__(self, path, hashtree_info_generator=None):
194 self.path = path
195 self.blocksize = 4096
196 self._file_size = os.path.getsize(self.path)
197 self._file = open(self.path, 'r')
198
199 if self._file_size % self.blocksize != 0:
200 raise ValueError("Size of file %s must be multiple of %d bytes, but is %d"
201 % self.path, self.blocksize, self._file_size)
202
203 self.total_blocks = self._file_size / self.blocksize
204 self.care_map = RangeSet(data=(0, self.total_blocks))
205 self.clobbered_blocks = RangeSet()
206 self.extended = RangeSet()
207
Yifan Hong55988c42019-04-12 15:01:12 -0700208 self.generator_lock = threading.Lock()
209
Yifan Hong8a66a712019-04-04 15:37:57 -0700210 self.hashtree_info = None
211 if hashtree_info_generator:
212 self.hashtree_info = hashtree_info_generator.Generate(self)
213
214 zero_blocks = []
215 nonzero_blocks = []
216 reference = '\0' * self.blocksize
217
218 for i in range(self.total_blocks):
219 d = self._file.read(self.blocksize)
220 if d == reference:
221 zero_blocks.append(i)
222 zero_blocks.append(i+1)
223 else:
224 nonzero_blocks.append(i)
225 nonzero_blocks.append(i+1)
226
227 assert zero_blocks or nonzero_blocks
228
229 self.file_map = {}
230 if zero_blocks:
231 self.file_map["__ZERO"] = RangeSet(data=zero_blocks)
232 if nonzero_blocks:
233 self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)
234 if self.hashtree_info:
235 self.file_map["__HASHTREE"] = self.hashtree_info.hashtree_range
236
237 def __del__(self):
238 self._file.close()
239
240 def _GetRangeData(self, ranges):
Yifan Hong55988c42019-04-12 15:01:12 -0700241 # Use a lock to protect the generator so that we will not run two
242 # instances of this generator on the same object simultaneously.
243 with self.generator_lock:
244 for s, e in ranges:
245 self._file.seek(s * self.blocksize)
246 for _ in range(s, e):
247 yield self._file.read(self.blocksize)
Yifan Hong8a66a712019-04-04 15:37:57 -0700248
249 def RangeSha1(self, ranges):
250 h = sha1()
251 for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable
252 h.update(data)
253 return h.hexdigest()
254
255 def ReadRangeSet(self, ranges):
256 return list(self._GetRangeData(ranges))
257
258 def TotalSha1(self, include_clobbered_blocks=False):
259 assert not self.clobbered_blocks
260 return self.RangeSha1(self.care_map)
261
262 def WriteRangeDataToFd(self, ranges, fd):
263 for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable
264 fd.write(data)
265
266
Doug Zongkerfc44a512014-08-26 13:10:25 -0700267class Transfer(object):
Tao Bao183e56e2017-03-05 17:05:09 -0800268 def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, tgt_sha1,
269 src_sha1, style, by_id):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700270 self.tgt_name = tgt_name
271 self.src_name = src_name
272 self.tgt_ranges = tgt_ranges
273 self.src_ranges = src_ranges
Tao Bao183e56e2017-03-05 17:05:09 -0800274 self.tgt_sha1 = tgt_sha1
275 self.src_sha1 = src_sha1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700276 self.style = style
Tao Baob8c87172015-03-19 19:42:12 -0700277
278 # We use OrderedDict rather than dict so that the output is repeatable;
279 # otherwise it would depend on the hash values of the Transfer objects.
280 self.goes_before = OrderedDict()
281 self.goes_after = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700282
Doug Zongker62338182014-09-08 08:29:55 -0700283 self.stash_before = []
284 self.use_stash = []
285
Doug Zongkerfc44a512014-08-26 13:10:25 -0700286 self.id = len(by_id)
287 by_id.append(self)
288
xunchang21531832018-12-06 14:20:05 -0800289 self._patch_info = None
Tianjie Xu25366072017-09-08 17:19:02 -0700290
291 @property
xunchang21531832018-12-06 14:20:05 -0800292 def patch_info(self):
293 return self._patch_info
Tianjie Xu25366072017-09-08 17:19:02 -0700294
xunchang21531832018-12-06 14:20:05 -0800295 @patch_info.setter
296 def patch_info(self, info):
297 if info:
Tianjie Xu25366072017-09-08 17:19:02 -0700298 assert self.style == "diff"
xunchang21531832018-12-06 14:20:05 -0800299 self._patch_info = info
Tianjie Xu25366072017-09-08 17:19:02 -0700300
Doug Zongker62338182014-09-08 08:29:55 -0700301 def NetStashChange(self):
302 return (sum(sr.size() for (_, sr) in self.stash_before) -
303 sum(sr.size() for (_, sr) in self.use_stash))
304
Tao Bao82c47982015-08-17 09:45:13 -0700305 def ConvertToNew(self):
306 assert self.style != "new"
307 self.use_stash = []
308 self.style = "new"
309 self.src_ranges = RangeSet()
xunchang21531832018-12-06 14:20:05 -0800310 self.patch_info = None
Tao Bao82c47982015-08-17 09:45:13 -0700311
Doug Zongkerfc44a512014-08-26 13:10:25 -0700312 def __str__(self):
313 return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
314 " to " + str(self.tgt_ranges) + ">")
315
316
Doug Zongker6ab2a502016-02-09 08:28:09 -0800317@functools.total_ordering
318class HeapItem(object):
319 def __init__(self, item):
320 self.item = item
Tao Bao186ec992017-12-23 11:50:52 -0800321 # Negate the score since python's heap is a min-heap and we want the
322 # maximum score.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800323 self.score = -item.score
Tao Bao186ec992017-12-23 11:50:52 -0800324
Doug Zongker6ab2a502016-02-09 08:28:09 -0800325 def clear(self):
326 self.item = None
Tao Bao186ec992017-12-23 11:50:52 -0800327
Doug Zongker6ab2a502016-02-09 08:28:09 -0800328 def __bool__(self):
Tao Bao186ec992017-12-23 11:50:52 -0800329 return self.item is not None
330
331 # Python 2 uses __nonzero__, while Python 3 uses __bool__.
332 __nonzero__ = __bool__
333
334 # The rest operations are generated by functools.total_ordering decorator.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800335 def __eq__(self, other):
336 return self.score == other.score
Tao Bao186ec992017-12-23 11:50:52 -0800337
Doug Zongker6ab2a502016-02-09 08:28:09 -0800338 def __le__(self, other):
339 return self.score <= other.score
340
341
Tao Bao294651d2018-02-08 23:21:52 -0800342class ImgdiffStats(object):
343 """A class that collects imgdiff stats.
344
345 It keeps track of the files that will be applied imgdiff while generating
346 BlockImageDiff. It also logs the ones that cannot use imgdiff, with specific
347 reasons. The stats is only meaningful when imgdiff not being disabled by the
348 caller of BlockImageDiff. In addition, only files with supported types
349 (BlockImageDiff.FileTypeSupportedByImgdiff()) are allowed to be logged.
Tao Bao294651d2018-02-08 23:21:52 -0800350 """
351
352 USED_IMGDIFF = "APK files diff'd with imgdiff"
353 USED_IMGDIFF_LARGE_APK = "Large APK files split and diff'd with imgdiff"
354
355 # Reasons for not applying imgdiff on APKs.
Tao Bao294651d2018-02-08 23:21:52 -0800356 SKIPPED_NONMONOTONIC = "Not used imgdiff due to having non-monotonic ranges"
Tao Baoe709b092018-02-07 12:40:00 -0800357 SKIPPED_SHARED_BLOCKS = "Not used imgdiff due to using shared blocks"
Tao Bao4ccea852018-02-06 15:16:41 -0800358 SKIPPED_INCOMPLETE = "Not used imgdiff due to incomplete RangeSet"
Tao Bao294651d2018-02-08 23:21:52 -0800359
360 # The list of valid reasons, which will also be the dumped order in a report.
361 REASONS = (
362 USED_IMGDIFF,
363 USED_IMGDIFF_LARGE_APK,
Tao Bao294651d2018-02-08 23:21:52 -0800364 SKIPPED_NONMONOTONIC,
Tao Baoe709b092018-02-07 12:40:00 -0800365 SKIPPED_SHARED_BLOCKS,
Tao Bao4ccea852018-02-06 15:16:41 -0800366 SKIPPED_INCOMPLETE,
Tao Bao294651d2018-02-08 23:21:52 -0800367 )
368
369 def __init__(self):
370 self.stats = {}
371
372 def Log(self, filename, reason):
373 """Logs why imgdiff can or cannot be applied to the given filename.
374
375 Args:
376 filename: The filename string.
377 reason: One of the reason constants listed in REASONS.
378
379 Raises:
380 AssertionError: On unsupported filetypes or invalid reason.
381 """
382 assert BlockImageDiff.FileTypeSupportedByImgdiff(filename)
383 assert reason in self.REASONS
384
385 if reason not in self.stats:
386 self.stats[reason] = set()
387 self.stats[reason].add(filename)
388
389 def Report(self):
390 """Prints a report of the collected imgdiff stats."""
391
392 def print_header(header, separator):
Tao Bao32fcdab2018-10-12 10:30:39 -0700393 logger.info(header)
394 logger.info(separator * len(header) + '\n')
Tao Bao294651d2018-02-08 23:21:52 -0800395
396 print_header(' Imgdiff Stats Report ', '=')
397 for key in self.REASONS:
398 if key not in self.stats:
399 continue
400 values = self.stats[key]
401 section_header = ' {} (count: {}) '.format(key, len(values))
402 print_header(section_header, '-')
Tao Bao32fcdab2018-10-12 10:30:39 -0700403 logger.info(''.join([' {}\n'.format(name) for name in values]))
Tao Bao294651d2018-02-08 23:21:52 -0800404
405
Doug Zongkerfc44a512014-08-26 13:10:25 -0700406class BlockImageDiff(object):
Tao Bao76def242017-11-21 09:25:31 -0800407 """Generates the diff of two block image objects.
408
409 BlockImageDiff works on two image objects. An image object is anything that
410 provides the following attributes:
411
412 blocksize: the size in bytes of a block, currently must be 4096.
413
414 total_blocks: the total size of the partition/image, in blocks.
415
416 care_map: a RangeSet containing which blocks (in the range [0,
417 total_blocks) we actually care about; i.e. which blocks contain data.
418
419 file_map: a dict that partitions the blocks contained in care_map into
420 smaller domains that are useful for doing diffs on. (Typically a domain
421 is a file, and the key in file_map is the pathname.)
422
423 clobbered_blocks: a RangeSet containing which blocks contain data but may
424 be altered by the FS. They need to be excluded when verifying the
425 partition integrity.
426
427 ReadRangeSet(): a function that takes a RangeSet and returns the data
428 contained in the image blocks of that RangeSet. The data is returned as
429 a list or tuple of strings; concatenating the elements together should
430 produce the requested data. Implementations are free to break up the
431 data into list/tuple elements in any way that is convenient.
432
433 RangeSha1(): a function that returns (as a hex string) the SHA-1 hash of
434 all the data in the specified range.
435
436 TotalSha1(): a function that returns (as a hex string) the SHA-1 hash of
437 all the data in the image (ie, all the blocks in the care_map minus
438 clobbered_blocks, or including the clobbered blocks if
439 include_clobbered_blocks is True).
440
441 When creating a BlockImageDiff, the src image may be None, in which case the
442 list of transfers produced will never read from the original image.
443 """
444
Tao Bao293fd132016-06-11 12:19:23 -0700445 def __init__(self, tgt, src=None, threads=None, version=4,
446 disable_imgdiff=False):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700447 if threads is None:
448 threads = multiprocessing.cpu_count() // 2
Dan Albert8b72aef2015-03-23 19:13:21 -0700449 if threads == 0:
450 threads = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700451 self.threads = threads
Doug Zongker62338182014-09-08 08:29:55 -0700452 self.version = version
Dan Albert8b72aef2015-03-23 19:13:21 -0700453 self.transfers = []
454 self.src_basenames = {}
455 self.src_numpatterns = {}
Tao Baob4cfca52016-02-04 14:26:02 -0800456 self._max_stashed_size = 0
Tao Baod522bdc2016-04-12 15:53:16 -0700457 self.touched_src_ranges = RangeSet()
458 self.touched_src_sha1 = None
Tao Bao293fd132016-06-11 12:19:23 -0700459 self.disable_imgdiff = disable_imgdiff
Tao Bao294651d2018-02-08 23:21:52 -0800460 self.imgdiff_stats = ImgdiffStats() if not disable_imgdiff else None
Doug Zongker62338182014-09-08 08:29:55 -0700461
Tao Bao8fad03e2017-03-01 14:36:26 -0800462 assert version in (3, 4)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700463
464 self.tgt = tgt
465 if src is None:
466 src = EmptyImage()
467 self.src = src
468
469 # The updater code that installs the patch always uses 4k blocks.
470 assert tgt.blocksize == 4096
471 assert src.blocksize == 4096
472
473 # The range sets in each filemap should comprise a partition of
474 # the care map.
475 self.AssertPartition(src.care_map, src.file_map.values())
476 self.AssertPartition(tgt.care_map, tgt.file_map.values())
477
Tao Baob4cfca52016-02-04 14:26:02 -0800478 @property
479 def max_stashed_size(self):
480 return self._max_stashed_size
481
Tao Baocb73aed2018-01-31 17:32:40 -0800482 @staticmethod
483 def FileTypeSupportedByImgdiff(filename):
484 """Returns whether the file type is supported by imgdiff."""
485 return filename.lower().endswith(('.apk', '.jar', '.zip'))
486
Tao Bao294651d2018-02-08 23:21:52 -0800487 def CanUseImgdiff(self, name, tgt_ranges, src_ranges, large_apk=False):
Tao Baocb73aed2018-01-31 17:32:40 -0800488 """Checks whether we can apply imgdiff for the given RangeSets.
489
490 For files in ZIP format (e.g., APKs, JARs, etc.) we would like to use
491 'imgdiff -z' if possible. Because it usually produces significantly smaller
492 patches than bsdiff.
493
494 This is permissible if all of the following conditions hold.
495 - The imgdiff hasn't been disabled by the caller (e.g. squashfs);
496 - The file type is supported by imgdiff;
497 - The source and target blocks are monotonic (i.e. the data is stored with
498 blocks in increasing order);
Tao Baoe709b092018-02-07 12:40:00 -0800499 - Both files don't contain shared blocks;
Tao Bao4ccea852018-02-06 15:16:41 -0800500 - Both files have complete lists of blocks;
Tao Baocb73aed2018-01-31 17:32:40 -0800501 - We haven't removed any blocks from the source set.
502
503 If all these conditions are satisfied, concatenating all the blocks in the
504 RangeSet in order will produce a valid ZIP file (plus possibly extra zeros
505 in the last block). imgdiff is fine with extra zeros at the end of the file.
506
507 Args:
508 name: The filename to be diff'd.
509 tgt_ranges: The target RangeSet.
510 src_ranges: The source RangeSet.
Tao Bao294651d2018-02-08 23:21:52 -0800511 large_apk: Whether this is to split a large APK.
Tao Baocb73aed2018-01-31 17:32:40 -0800512
513 Returns:
514 A boolean result.
515 """
Tao Bao508b0872018-02-09 13:44:43 -0800516 if self.disable_imgdiff or not self.FileTypeSupportedByImgdiff(name):
Tao Bao294651d2018-02-08 23:21:52 -0800517 return False
518
519 if not tgt_ranges.monotonic or not src_ranges.monotonic:
520 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_NONMONOTONIC)
521 return False
522
Tao Baoe709b092018-02-07 12:40:00 -0800523 if (tgt_ranges.extra.get('uses_shared_blocks') or
524 src_ranges.extra.get('uses_shared_blocks')):
525 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_SHARED_BLOCKS)
526 return False
527
Tao Bao4ccea852018-02-06 15:16:41 -0800528 if tgt_ranges.extra.get('incomplete') or src_ranges.extra.get('incomplete'):
529 self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_INCOMPLETE)
530 return False
531
Tao Bao294651d2018-02-08 23:21:52 -0800532 reason = (ImgdiffStats.USED_IMGDIFF_LARGE_APK if large_apk
533 else ImgdiffStats.USED_IMGDIFF)
534 self.imgdiff_stats.Log(name, reason)
535 return True
Tao Baocb73aed2018-01-31 17:32:40 -0800536
Doug Zongkerfc44a512014-08-26 13:10:25 -0700537 def Compute(self, prefix):
538 # When looking for a source file to use as the diff input for a
539 # target file, we try:
540 # 1) an exact path match if available, otherwise
541 # 2) a exact basename match if available, otherwise
542 # 3) a basename match after all runs of digits are replaced by
543 # "#" if available, otherwise
544 # 4) we have no source for this target.
545 self.AbbreviateSourceNames()
546 self.FindTransfers()
547
xunchang21531832018-12-06 14:20:05 -0800548 self.FindSequenceForTransfers()
Doug Zongker62338182014-09-08 08:29:55 -0700549
Tao Bao82c47982015-08-17 09:45:13 -0700550 # Ensure the runtime stash size is under the limit.
Tao Bao8fad03e2017-03-01 14:36:26 -0800551 if common.OPTIONS.cache_size is not None:
xunchang3df4d5e2018-12-06 15:03:45 -0800552 stash_limit = (common.OPTIONS.cache_size *
553 common.OPTIONS.stash_threshold / self.tgt.blocksize)
554 # Ignore the stash limit and calculate the maximum simultaneously stashed
555 # blocks needed.
556 _, max_stashed_blocks = self.ReviseStashSize(ignore_stash_limit=True)
557
558 # We cannot stash more blocks than the stash limit simultaneously. As a
559 # result, some 'diff' commands will be converted to new; leading to an
560 # unintended large package. To mitigate this issue, we can carefully
561 # choose the transfers for conversion. The number '1024' can be further
562 # tweaked here to balance the package size and build time.
563 if max_stashed_blocks > stash_limit + 1024:
xunchangb6105dc2018-12-06 16:39:46 -0800564 self.SelectAndConvertDiffTransfersToNew(
565 max_stashed_blocks - stash_limit)
xunchang3df4d5e2018-12-06 15:03:45 -0800566 # Regenerate the sequence as the graph has changed.
567 self.FindSequenceForTransfers()
568
569 # Revise the stash size again to keep the size under limit.
Tao Bao82c47982015-08-17 09:45:13 -0700570 self.ReviseStashSize()
571
Doug Zongkerfc44a512014-08-26 13:10:25 -0700572 # Double-check our work.
573 self.AssertSequenceGood()
Tianjie Xu8a7ed9f2018-01-23 14:06:11 -0800574 self.AssertSha1Good()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700575
576 self.ComputePatches(prefix)
577 self.WriteTransfers(prefix)
578
Tao Bao294651d2018-02-08 23:21:52 -0800579 # Report the imgdiff stats.
Tao Bao32fcdab2018-10-12 10:30:39 -0700580 if not self.disable_imgdiff:
Tao Bao294651d2018-02-08 23:21:52 -0800581 self.imgdiff_stats.Report()
582
Doug Zongkerfc44a512014-08-26 13:10:25 -0700583 def WriteTransfers(self, prefix):
Tianjie Xu37e29302016-06-23 16:10:35 -0700584 def WriteSplitTransfers(out, style, target_blocks):
585 """Limit the size of operand in command 'new' and 'zero' to 1024 blocks.
Tianjie Xubb848c52016-06-21 15:54:09 -0700586
587 This prevents the target size of one command from being too large; and
588 might help to avoid fsync errors on some devices."""
589
Tao Bao3a2e3502016-12-28 09:14:39 -0800590 assert style == "new" or style == "zero"
Tianjie Xu37e29302016-06-23 16:10:35 -0700591 blocks_limit = 1024
Tianjie Xubb848c52016-06-21 15:54:09 -0700592 total = 0
Tianjie Xu37e29302016-06-23 16:10:35 -0700593 while target_blocks:
594 blocks_to_write = target_blocks.first(blocks_limit)
595 out.append("%s %s\n" % (style, blocks_to_write.to_string_raw()))
596 total += blocks_to_write.size()
597 target_blocks = target_blocks.subtract(blocks_to_write)
Tianjie Xubb848c52016-06-21 15:54:09 -0700598 return total
599
Doug Zongkerfc44a512014-08-26 13:10:25 -0700600 out = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700601 total = 0
Doug Zongkerfc44a512014-08-26 13:10:25 -0700602
Tao Bao3a2e3502016-12-28 09:14:39 -0800603 # In BBOTA v3+, it uses the hash of the stashed blocks as the stash slot
604 # id. 'stashes' records the map from 'hash' to the ref count. The stash
605 # will be freed only if the count decrements to zero.
Doug Zongker62338182014-09-08 08:29:55 -0700606 stashes = {}
607 stashed_blocks = 0
608 max_stashed_blocks = 0
609
Doug Zongkerfc44a512014-08-26 13:10:25 -0700610 for xf in self.transfers:
611
Tao Bao8fad03e2017-03-01 14:36:26 -0800612 for _, sr in xf.stash_before:
613 sh = self.src.RangeSha1(sr)
614 if sh in stashes:
615 stashes[sh] += 1
Sami Tolvanendd67a292014-12-09 16:40:34 +0000616 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800617 stashes[sh] = 1
618 stashed_blocks += sr.size()
619 self.touched_src_ranges = self.touched_src_ranges.union(sr)
620 out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
Doug Zongker62338182014-09-08 08:29:55 -0700621
622 if stashed_blocks > max_stashed_blocks:
623 max_stashed_blocks = stashed_blocks
624
Jesse Zhao7b985f62015-03-02 16:53:08 -0800625 free_string = []
caozhiyuan21b37d82015-10-21 15:14:03 +0800626 free_size = 0
Jesse Zhao7b985f62015-03-02 16:53:08 -0800627
Tao Bao8fad03e2017-03-01 14:36:26 -0800628 # <# blocks> <src ranges>
629 # OR
630 # <# blocks> <src ranges> <src locs> <stash refs...>
631 # OR
632 # <# blocks> - <stash refs...>
Doug Zongker62338182014-09-08 08:29:55 -0700633
Tao Bao8fad03e2017-03-01 14:36:26 -0800634 size = xf.src_ranges.size()
Tao Bao508b0872018-02-09 13:44:43 -0800635 src_str_buffer = [str(size)]
Doug Zongker62338182014-09-08 08:29:55 -0700636
Tao Bao8fad03e2017-03-01 14:36:26 -0800637 unstashed_src_ranges = xf.src_ranges
638 mapped_stashes = []
639 for _, sr in xf.use_stash:
640 unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
641 sh = self.src.RangeSha1(sr)
642 sr = xf.src_ranges.map_within(sr)
643 mapped_stashes.append(sr)
644 assert sh in stashes
Tao Bao508b0872018-02-09 13:44:43 -0800645 src_str_buffer.append("%s:%s" % (sh, sr.to_string_raw()))
Tao Bao8fad03e2017-03-01 14:36:26 -0800646 stashes[sh] -= 1
647 if stashes[sh] == 0:
648 free_string.append("free %s\n" % (sh,))
649 free_size += sr.size()
650 stashes.pop(sh)
Doug Zongker62338182014-09-08 08:29:55 -0700651
Tao Bao8fad03e2017-03-01 14:36:26 -0800652 if unstashed_src_ranges:
Tao Bao508b0872018-02-09 13:44:43 -0800653 src_str_buffer.insert(1, unstashed_src_ranges.to_string_raw())
Tao Bao8fad03e2017-03-01 14:36:26 -0800654 if xf.use_stash:
655 mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
Tao Bao508b0872018-02-09 13:44:43 -0800656 src_str_buffer.insert(2, mapped_unstashed.to_string_raw())
Tao Bao8fad03e2017-03-01 14:36:26 -0800657 mapped_stashes.append(mapped_unstashed)
Doug Zongker62338182014-09-08 08:29:55 -0700658 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Tao Bao8fad03e2017-03-01 14:36:26 -0800659 else:
Tao Bao508b0872018-02-09 13:44:43 -0800660 src_str_buffer.insert(1, "-")
Tao Bao8fad03e2017-03-01 14:36:26 -0800661 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Doug Zongker62338182014-09-08 08:29:55 -0700662
Tao Bao508b0872018-02-09 13:44:43 -0800663 src_str = " ".join(src_str_buffer)
Doug Zongker62338182014-09-08 08:29:55 -0700664
Tao Bao8fad03e2017-03-01 14:36:26 -0800665 # version 3+:
Doug Zongker62338182014-09-08 08:29:55 -0700666 # zero <rangeset>
667 # new <rangeset>
668 # erase <rangeset>
Dan Albert8b72aef2015-03-23 19:13:21 -0700669 # bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
670 # imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
671 # move hash <tgt rangeset> <src_str>
Doug Zongkerfc44a512014-08-26 13:10:25 -0700672
673 tgt_size = xf.tgt_ranges.size()
674
675 if xf.style == "new":
676 assert xf.tgt_ranges
Tianjie Xu37e29302016-06-23 16:10:35 -0700677 assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700678 total += tgt_size
679 elif xf.style == "move":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700680 assert xf.tgt_ranges
681 assert xf.src_ranges.size() == tgt_size
682 if xf.src_ranges != xf.tgt_ranges:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100683 # take into account automatic stashing of overlapping blocks
684 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700685 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100686 if temp_stash_usage > max_stashed_blocks:
687 max_stashed_blocks = temp_stash_usage
688
Tao Baod522bdc2016-04-12 15:53:16 -0700689 self.touched_src_ranges = self.touched_src_ranges.union(
690 xf.src_ranges)
691
Tao Bao8fad03e2017-03-01 14:36:26 -0800692 out.append("%s %s %s %s\n" % (
Sami Tolvanendd67a292014-12-09 16:40:34 +0000693 xf.style,
Tao Bao183e56e2017-03-05 17:05:09 -0800694 xf.tgt_sha1,
Dan Albert8b72aef2015-03-23 19:13:21 -0700695 xf.tgt_ranges.to_string_raw(), src_str))
Tao Bao8fad03e2017-03-01 14:36:26 -0800696 total += tgt_size
697 elif xf.style in ("bsdiff", "imgdiff"):
698 assert xf.tgt_ranges
699 assert xf.src_ranges
700 # take into account automatic stashing of overlapping blocks
701 if xf.src_ranges.overlaps(xf.tgt_ranges):
702 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
703 if temp_stash_usage > max_stashed_blocks:
704 max_stashed_blocks = temp_stash_usage
705
706 self.touched_src_ranges = self.touched_src_ranges.union(xf.src_ranges)
707
708 out.append("%s %d %d %s %s %s %s\n" % (
709 xf.style,
710 xf.patch_start, xf.patch_len,
711 xf.src_sha1,
712 xf.tgt_sha1,
713 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700714 total += tgt_size
715 elif xf.style == "zero":
716 assert xf.tgt_ranges
717 to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
Tianjie Xu37e29302016-06-23 16:10:35 -0700718 assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size()
Tianjie Xubb848c52016-06-21 15:54:09 -0700719 total += to_zero.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700720 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700721 raise ValueError("unknown transfer style '%s'\n" % xf.style)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700722
Sami Tolvanendd67a292014-12-09 16:40:34 +0000723 if free_string:
724 out.append("".join(free_string))
caozhiyuan21b37d82015-10-21 15:14:03 +0800725 stashed_blocks -= free_size
Sami Tolvanendd67a292014-12-09 16:40:34 +0000726
Tao Bao8fad03e2017-03-01 14:36:26 -0800727 if common.OPTIONS.cache_size is not None:
Tao Bao8dcf7382015-05-21 14:09:49 -0700728 # Sanity check: abort if we're going to need more stash space than
729 # the allowed size (cache_size * threshold). There are two purposes
730 # of having a threshold here. a) Part of the cache may have been
731 # occupied by some recovery logs. b) It will buy us some time to deal
732 # with the oversize issue.
733 cache_size = common.OPTIONS.cache_size
734 stash_threshold = common.OPTIONS.stash_threshold
735 max_allowed = cache_size * stash_threshold
Tao Baoe8c68a02017-02-26 10:48:11 -0800736 assert max_stashed_blocks * self.tgt.blocksize <= max_allowed, \
Tao Bao8dcf7382015-05-21 14:09:49 -0700737 'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
738 max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
739 self.tgt.blocksize, max_allowed, cache_size,
740 stash_threshold)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700741
Tao Bao8fad03e2017-03-01 14:36:26 -0800742 self.touched_src_sha1 = self.src.RangeSha1(self.touched_src_ranges)
Tao Baod522bdc2016-04-12 15:53:16 -0700743
Tianjie Xu67c7cbb2018-08-30 00:32:07 -0700744 if self.tgt.hashtree_info:
745 out.append("compute_hash_tree {} {} {} {} {}\n".format(
746 self.tgt.hashtree_info.hashtree_range.to_string_raw(),
747 self.tgt.hashtree_info.filesystem_range.to_string_raw(),
748 self.tgt.hashtree_info.hash_algorithm,
749 self.tgt.hashtree_info.salt,
750 self.tgt.hashtree_info.root_hash))
751
Tao Baoe9b61912015-07-09 17:37:49 -0700752 # Zero out extended blocks as a workaround for bug 20881595.
753 if self.tgt.extended:
Tianjie Xu37e29302016-06-23 16:10:35 -0700754 assert (WriteSplitTransfers(out, "zero", self.tgt.extended) ==
Tianjie Xubb848c52016-06-21 15:54:09 -0700755 self.tgt.extended.size())
Tao Baob32d56e2015-09-09 11:55:01 -0700756 total += self.tgt.extended.size()
Tao Baoe9b61912015-07-09 17:37:49 -0700757
758 # We erase all the blocks on the partition that a) don't contain useful
Tao Bao66f1fa62016-05-03 10:02:01 -0700759 # data in the new image; b) will not be touched by dm-verity. Out of those
760 # blocks, we erase the ones that won't be used in this update at the
761 # beginning of an update. The rest would be erased at the end. This is to
762 # work around the eMMC issue observed on some devices, which may otherwise
763 # get starving for clean blocks and thus fail the update. (b/28347095)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700764 all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
Tao Baoe9b61912015-07-09 17:37:49 -0700765 all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
766 new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
Tao Bao66f1fa62016-05-03 10:02:01 -0700767
768 erase_first = new_dontcare.subtract(self.touched_src_ranges)
769 if erase_first:
770 out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))
771
772 erase_last = new_dontcare.subtract(erase_first)
773 if erase_last:
774 out.append("erase %s\n" % (erase_last.to_string_raw(),))
Doug Zongkere985f6f2014-09-09 12:38:47 -0700775
776 out.insert(0, "%d\n" % (self.version,)) # format version number
Tao Baob32d56e2015-09-09 11:55:01 -0700777 out.insert(1, "%d\n" % (total,))
Tao Bao8fad03e2017-03-01 14:36:26 -0800778 # v3+: the number of stash slots is unused.
779 out.insert(2, "0\n")
780 out.insert(3, str(max_stashed_blocks) + "\n")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700781
782 with open(prefix + ".transfer.list", "wb") as f:
783 for i in out:
784 f.write(i)
785
Tao Bao8fad03e2017-03-01 14:36:26 -0800786 self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize
787 OPTIONS = common.OPTIONS
788 if OPTIONS.cache_size is not None:
789 max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
Tao Bao32fcdab2018-10-12 10:30:39 -0700790 logger.info(
791 "max stashed blocks: %d (%d bytes), limit: %d bytes (%.2f%%)\n",
792 max_stashed_blocks, self._max_stashed_size, max_allowed,
793 self._max_stashed_size * 100.0 / max_allowed)
Tao Bao8fad03e2017-03-01 14:36:26 -0800794 else:
Tao Bao32fcdab2018-10-12 10:30:39 -0700795 logger.info(
796 "max stashed blocks: %d (%d bytes), limit: <unknown>\n",
797 max_stashed_blocks, self._max_stashed_size)
Doug Zongker62338182014-09-08 08:29:55 -0700798
xunchang3df4d5e2018-12-06 15:03:45 -0800799 def ReviseStashSize(self, ignore_stash_limit=False):
800 """ Revises the transfers to keep the stash size within the size limit.
801
802 Iterates through the transfer list and calculates the stash size each
803 transfer generates. Converts the affected transfers to new if we reach the
804 stash limit.
805
806 Args:
807 ignore_stash_limit: Ignores the stash limit and calculates the max
808 simultaneous stashed blocks instead. No change will be made to the
809 transfer list with this flag.
810
811 Return:
812 A tuple of (tgt blocks converted to new, max stashed blocks)
813 """
Tao Bao32fcdab2018-10-12 10:30:39 -0700814 logger.info("Revising stash size...")
Tao Baoe27acfd2016-12-16 11:13:55 -0800815 stash_map = {}
Tao Bao82c47982015-08-17 09:45:13 -0700816
817 # Create the map between a stash and its def/use points. For example, for a
Tao Bao3c5a16d2017-02-13 11:42:50 -0800818 # given stash of (raw_id, sr), stash_map[raw_id] = (sr, def_cmd, use_cmd).
Tao Bao82c47982015-08-17 09:45:13 -0700819 for xf in self.transfers:
820 # Command xf defines (stores) all the stashes in stash_before.
Tao Bao3a2e3502016-12-28 09:14:39 -0800821 for stash_raw_id, sr in xf.stash_before:
822 stash_map[stash_raw_id] = (sr, xf)
Tao Bao82c47982015-08-17 09:45:13 -0700823
824 # Record all the stashes command xf uses.
Tao Bao3a2e3502016-12-28 09:14:39 -0800825 for stash_raw_id, _ in xf.use_stash:
826 stash_map[stash_raw_id] += (xf,)
Tao Bao82c47982015-08-17 09:45:13 -0700827
xunchang3df4d5e2018-12-06 15:03:45 -0800828 max_allowed_blocks = None
829 if not ignore_stash_limit:
830 # Compute the maximum blocks available for stash based on /cache size and
831 # the threshold.
832 cache_size = common.OPTIONS.cache_size
833 stash_threshold = common.OPTIONS.stash_threshold
834 max_allowed_blocks = cache_size * stash_threshold / self.tgt.blocksize
Tao Bao82c47982015-08-17 09:45:13 -0700835
Tao Bao3a2e3502016-12-28 09:14:39 -0800836 # See the comments for 'stashes' in WriteTransfers().
Tao Baoe27acfd2016-12-16 11:13:55 -0800837 stashes = {}
Tao Bao82c47982015-08-17 09:45:13 -0700838 stashed_blocks = 0
Tao Bao9a5caf22015-08-25 15:10:10 -0700839 new_blocks = 0
xunchang3df4d5e2018-12-06 15:03:45 -0800840 max_stashed_blocks = 0
Tao Bao82c47982015-08-17 09:45:13 -0700841
842 # Now go through all the commands. Compute the required stash size on the
843 # fly. If a command requires excess stash than available, it deletes the
844 # stash by replacing the command that uses the stash with a "new" command
845 # instead.
846 for xf in self.transfers:
847 replaced_cmds = []
848
849 # xf.stash_before generates explicit stash commands.
Tao Bao3a2e3502016-12-28 09:14:39 -0800850 for stash_raw_id, sr in xf.stash_before:
Tao Baoe27acfd2016-12-16 11:13:55 -0800851 # Check the post-command stashed_blocks.
852 stashed_blocks_after = stashed_blocks
Tao Bao8fad03e2017-03-01 14:36:26 -0800853 sh = self.src.RangeSha1(sr)
854 if sh not in stashes:
Tao Baoe27acfd2016-12-16 11:13:55 -0800855 stashed_blocks_after += sr.size()
Tao Baoe27acfd2016-12-16 11:13:55 -0800856
xunchang3df4d5e2018-12-06 15:03:45 -0800857 if max_allowed_blocks and stashed_blocks_after > max_allowed_blocks:
Tao Bao82c47982015-08-17 09:45:13 -0700858 # We cannot stash this one for a later command. Find out the command
859 # that will use this stash and replace the command with "new".
Tao Bao3a2e3502016-12-28 09:14:39 -0800860 use_cmd = stash_map[stash_raw_id][2]
Tao Bao82c47982015-08-17 09:45:13 -0700861 replaced_cmds.append(use_cmd)
Tao Bao32fcdab2018-10-12 10:30:39 -0700862 logger.info("%10d %9s %s", sr.size(), "explicit", use_cmd)
Tao Bao82c47982015-08-17 09:45:13 -0700863 else:
Tao Bao3c5a16d2017-02-13 11:42:50 -0800864 # Update the stashes map.
Tao Bao8fad03e2017-03-01 14:36:26 -0800865 if sh in stashes:
866 stashes[sh] += 1
Tao Bao3c5a16d2017-02-13 11:42:50 -0800867 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800868 stashes[sh] = 1
Tao Baoe27acfd2016-12-16 11:13:55 -0800869 stashed_blocks = stashed_blocks_after
xunchang3df4d5e2018-12-06 15:03:45 -0800870 max_stashed_blocks = max(max_stashed_blocks, stashed_blocks)
Tao Bao82c47982015-08-17 09:45:13 -0700871
872 # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
873 # ComputePatches(), they both have the style of "diff".
Tao Bao8fad03e2017-03-01 14:36:26 -0800874 if xf.style == "diff":
Tao Bao82c47982015-08-17 09:45:13 -0700875 assert xf.tgt_ranges and xf.src_ranges
876 if xf.src_ranges.overlaps(xf.tgt_ranges):
xunchang3df4d5e2018-12-06 15:03:45 -0800877 if (max_allowed_blocks and
878 stashed_blocks + xf.src_ranges.size() > max_allowed_blocks):
Tao Bao82c47982015-08-17 09:45:13 -0700879 replaced_cmds.append(xf)
Tao Bao32fcdab2018-10-12 10:30:39 -0700880 logger.info("%10d %9s %s", xf.src_ranges.size(), "implicit", xf)
xunchang3df4d5e2018-12-06 15:03:45 -0800881 else:
882 # The whole source ranges will be stashed for implicit stashes.
883 max_stashed_blocks = max(max_stashed_blocks,
884 stashed_blocks + xf.src_ranges.size())
Tao Bao82c47982015-08-17 09:45:13 -0700885
886 # Replace the commands in replaced_cmds with "new"s.
887 for cmd in replaced_cmds:
888 # It no longer uses any commands in "use_stash". Remove the def points
889 # for all those stashes.
Tao Bao3a2e3502016-12-28 09:14:39 -0800890 for stash_raw_id, sr in cmd.use_stash:
891 def_cmd = stash_map[stash_raw_id][1]
892 assert (stash_raw_id, sr) in def_cmd.stash_before
893 def_cmd.stash_before.remove((stash_raw_id, sr))
Tao Bao82c47982015-08-17 09:45:13 -0700894
Tianjie Xuebe39a02016-01-14 14:12:26 -0800895 # Add up blocks that violates space limit and print total number to
896 # screen later.
897 new_blocks += cmd.tgt_ranges.size()
Tao Bao82c47982015-08-17 09:45:13 -0700898 cmd.ConvertToNew()
899
Tao Bao3a2e3502016-12-28 09:14:39 -0800900 # xf.use_stash may generate free commands.
Tao Bao8fad03e2017-03-01 14:36:26 -0800901 for _, sr in xf.use_stash:
902 sh = self.src.RangeSha1(sr)
903 assert sh in stashes
904 stashes[sh] -= 1
905 if stashes[sh] == 0:
Tao Baoe27acfd2016-12-16 11:13:55 -0800906 stashed_blocks -= sr.size()
Tao Bao8fad03e2017-03-01 14:36:26 -0800907 stashes.pop(sh)
Tao Baoe27acfd2016-12-16 11:13:55 -0800908
Tianjie Xuebe39a02016-01-14 14:12:26 -0800909 num_of_bytes = new_blocks * self.tgt.blocksize
Tao Bao32fcdab2018-10-12 10:30:39 -0700910 logger.info(
911 " Total %d blocks (%d bytes) are packed as new blocks due to "
xunchangb6105dc2018-12-06 16:39:46 -0800912 "insufficient cache size. Maximum blocks stashed simultaneously: %d",
913 new_blocks, num_of_bytes, max_stashed_blocks)
xunchang3df4d5e2018-12-06 15:03:45 -0800914 return new_blocks, max_stashed_blocks
Tao Bao9a5caf22015-08-25 15:10:10 -0700915
Doug Zongkerfc44a512014-08-26 13:10:25 -0700916 def ComputePatches(self, prefix):
Tao Bao32fcdab2018-10-12 10:30:39 -0700917 logger.info("Reticulating splines...")
Tao Bao183e56e2017-03-05 17:05:09 -0800918 diff_queue = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700919 patch_num = 0
920 with open(prefix + ".new.dat", "wb") as new_f:
Tao Bao183e56e2017-03-05 17:05:09 -0800921 for index, xf in enumerate(self.transfers):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700922 if xf.style == "zero":
Tao Bao08c85832016-09-19 22:26:30 -0700923 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao32fcdab2018-10-12 10:30:39 -0700924 logger.info(
925 "%10d %10d (%6.2f%%) %7s %s %s", tgt_size, tgt_size, 100.0,
926 xf.style, xf.tgt_name, str(xf.tgt_ranges))
Tao Bao08c85832016-09-19 22:26:30 -0700927
Doug Zongkerfc44a512014-08-26 13:10:25 -0700928 elif xf.style == "new":
Tao Bao183e56e2017-03-05 17:05:09 -0800929 self.tgt.WriteRangeDataToFd(xf.tgt_ranges, new_f)
Tao Bao08c85832016-09-19 22:26:30 -0700930 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao32fcdab2018-10-12 10:30:39 -0700931 logger.info(
932 "%10d %10d (%6.2f%%) %7s %s %s", tgt_size, tgt_size, 100.0,
933 xf.style, xf.tgt_name, str(xf.tgt_ranges))
Tao Bao08c85832016-09-19 22:26:30 -0700934
Doug Zongkerfc44a512014-08-26 13:10:25 -0700935 elif xf.style == "diff":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700936 # We can't compare src and tgt directly because they may have
937 # the same content but be broken up into blocks differently, eg:
938 #
939 # ["he", "llo"] vs ["h", "ello"]
940 #
941 # We want those to compare equal, ideally without having to
942 # actually concatenate the strings (these may be tens of
943 # megabytes).
Tao Bao183e56e2017-03-05 17:05:09 -0800944 if xf.src_sha1 == xf.tgt_sha1:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700945 # These are identical; we don't need to generate a patch,
946 # just issue copy commands on the device.
947 xf.style = "move"
xunchang21531832018-12-06 14:20:05 -0800948 xf.patch_info = None
Tao Bao183e56e2017-03-05 17:05:09 -0800949 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao08c85832016-09-19 22:26:30 -0700950 if xf.src_ranges != xf.tgt_ranges:
Tao Bao32fcdab2018-10-12 10:30:39 -0700951 logger.info(
952 "%10d %10d (%6.2f%%) %7s %s %s (from %s)", tgt_size, tgt_size,
953 100.0, xf.style,
Tao Bao08c85832016-09-19 22:26:30 -0700954 xf.tgt_name if xf.tgt_name == xf.src_name else (
955 xf.tgt_name + " (from " + xf.src_name + ")"),
Tao Bao32fcdab2018-10-12 10:30:39 -0700956 str(xf.tgt_ranges), str(xf.src_ranges))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700957 else:
xunchang21531832018-12-06 14:20:05 -0800958 if xf.patch_info:
959 # We have already generated the patch (e.g. during split of large
960 # APKs or reduction of stash size)
961 imgdiff = xf.patch_info.imgdiff
Tianjie Xu25366072017-09-08 17:19:02 -0700962 else:
Tao Baocb73aed2018-01-31 17:32:40 -0800963 imgdiff = self.CanUseImgdiff(
964 xf.tgt_name, xf.tgt_ranges, xf.src_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700965 xf.style = "imgdiff" if imgdiff else "bsdiff"
Tao Bao183e56e2017-03-05 17:05:09 -0800966 diff_queue.append((index, imgdiff, patch_num))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700967 patch_num += 1
968
969 else:
970 assert False, "unknown style " + xf.style
971
xunchang21531832018-12-06 14:20:05 -0800972 patches = self.ComputePatchesForInputList(diff_queue, False)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700973
Tao Bao183e56e2017-03-05 17:05:09 -0800974 offset = 0
975 with open(prefix + ".patch.dat", "wb") as patch_fd:
xunchang21531832018-12-06 14:20:05 -0800976 for index, patch_info, _ in patches:
Tao Bao183e56e2017-03-05 17:05:09 -0800977 xf = self.transfers[index]
xunchang21531832018-12-06 14:20:05 -0800978 xf.patch_len = len(patch_info.content)
Tao Bao183e56e2017-03-05 17:05:09 -0800979 xf.patch_start = offset
980 offset += xf.patch_len
xunchang21531832018-12-06 14:20:05 -0800981 patch_fd.write(patch_info.content)
Tao Bao183e56e2017-03-05 17:05:09 -0800982
Tao Bao32fcdab2018-10-12 10:30:39 -0700983 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
984 logger.info(
985 "%10d %10d (%6.2f%%) %7s %s %s %s", xf.patch_len, tgt_size,
986 xf.patch_len * 100.0 / tgt_size, xf.style,
987 xf.tgt_name if xf.tgt_name == xf.src_name else (
988 xf.tgt_name + " (from " + xf.src_name + ")"),
989 xf.tgt_ranges, xf.src_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700990
Tianjie Xu8a7ed9f2018-01-23 14:06:11 -0800991 def AssertSha1Good(self):
992 """Check the SHA-1 of the src & tgt blocks in the transfer list.
993
994 Double check the SHA-1 value to avoid the issue in b/71908713, where
995 SparseImage.RangeSha1() messed up with the hash calculation in multi-thread
996 environment. That specific problem has been fixed by protecting the
997 underlying generator function 'SparseImage._GetRangeData()' with lock.
998 """
999 for xf in self.transfers:
1000 tgt_sha1 = self.tgt.RangeSha1(xf.tgt_ranges)
1001 assert xf.tgt_sha1 == tgt_sha1
1002 if xf.style == "diff":
1003 src_sha1 = self.src.RangeSha1(xf.src_ranges)
1004 assert xf.src_sha1 == src_sha1
1005
Doug Zongkerfc44a512014-08-26 13:10:25 -07001006 def AssertSequenceGood(self):
1007 # Simulate the sequences of transfers we will output, and check that:
1008 # - we never read a block after writing it, and
1009 # - we write every block we care about exactly once.
1010
1011 # Start with no blocks having been touched yet.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001012 touched = array.array("B", "\0" * self.tgt.total_blocks)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001013
1014 # Imagine processing the transfers in order.
1015 for xf in self.transfers:
1016 # Check that the input blocks for this transfer haven't yet been touched.
Doug Zongker62338182014-09-08 08:29:55 -07001017
1018 x = xf.src_ranges
Tao Bao8fad03e2017-03-01 14:36:26 -08001019 for _, sr in xf.use_stash:
1020 x = x.subtract(sr)
Doug Zongker62338182014-09-08 08:29:55 -07001021
Doug Zongker6ab2a502016-02-09 08:28:09 -08001022 for s, e in x:
Tao Baoff75c232016-03-04 15:23:34 -08001023 # Source image could be larger. Don't check the blocks that are in the
1024 # source image only. Since they are not in 'touched', and won't ever
1025 # be touched.
1026 for i in range(s, min(e, self.tgt.total_blocks)):
Doug Zongker6ab2a502016-02-09 08:28:09 -08001027 assert touched[i] == 0
1028
1029 # Check that the output blocks for this transfer haven't yet
1030 # been touched, and touch all the blocks written by this
1031 # transfer.
1032 for s, e in xf.tgt_ranges:
1033 for i in range(s, e):
1034 assert touched[i] == 0
1035 touched[i] = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -07001036
Tianjie Xu67c7cbb2018-08-30 00:32:07 -07001037 if self.tgt.hashtree_info:
1038 for s, e in self.tgt.hashtree_info.hashtree_range:
1039 for i in range(s, e):
1040 assert touched[i] == 0
1041 touched[i] = 1
1042
Doug Zongkerfc44a512014-08-26 13:10:25 -07001043 # Check that we've written every target block.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001044 for s, e in self.tgt.care_map:
1045 for i in range(s, e):
1046 assert touched[i] == 1
Doug Zongkerfc44a512014-08-26 13:10:25 -07001047
xunchang21531832018-12-06 14:20:05 -08001048 def FindSequenceForTransfers(self):
1049 """Finds a sequence for the given transfers.
1050
1051 The goal is to minimize the violation of order dependencies between these
1052 transfers, so that fewer blocks are stashed when applying the update.
1053 """
1054
1055 # Clear the existing dependency between transfers
1056 for xf in self.transfers:
1057 xf.goes_before = OrderedDict()
1058 xf.goes_after = OrderedDict()
1059
1060 xf.stash_before = []
1061 xf.use_stash = []
1062
1063 # Find the ordering dependencies among transfers (this is O(n^2)
1064 # in the number of transfers).
1065 self.GenerateDigraph()
1066 # Find a sequence of transfers that satisfies as many ordering
1067 # dependencies as possible (heuristically).
1068 self.FindVertexSequence()
1069 # Fix up the ordering dependencies that the sequence didn't
1070 # satisfy.
1071 self.ReverseBackwardEdges()
1072 self.ImproveVertexSequence()
1073
Doug Zongker62338182014-09-08 08:29:55 -07001074 def ImproveVertexSequence(self):
Tao Bao32fcdab2018-10-12 10:30:39 -07001075 logger.info("Improving vertex order...")
Doug Zongker62338182014-09-08 08:29:55 -07001076
1077 # At this point our digraph is acyclic; we reversed any edges that
1078 # were backwards in the heuristically-generated sequence. The
1079 # previously-generated order is still acceptable, but we hope to
1080 # find a better order that needs less memory for stashed data.
1081 # Now we do a topological sort to generate a new vertex order,
1082 # using a greedy algorithm to choose which vertex goes next
1083 # whenever we have a choice.
1084
1085 # Make a copy of the edge set; this copy will get destroyed by the
1086 # algorithm.
1087 for xf in self.transfers:
1088 xf.incoming = xf.goes_after.copy()
1089 xf.outgoing = xf.goes_before.copy()
1090
1091 L = [] # the new vertex order
1092
1093 # S is the set of sources in the remaining graph; we always choose
1094 # the one that leaves the least amount of stashed data after it's
1095 # executed.
1096 S = [(u.NetStashChange(), u.order, u) for u in self.transfers
1097 if not u.incoming]
1098 heapq.heapify(S)
1099
1100 while S:
1101 _, _, xf = heapq.heappop(S)
1102 L.append(xf)
1103 for u in xf.outgoing:
1104 del u.incoming[xf]
1105 if not u.incoming:
1106 heapq.heappush(S, (u.NetStashChange(), u.order, u))
1107
1108 # if this fails then our graph had a cycle.
1109 assert len(L) == len(self.transfers)
1110
1111 self.transfers = L
1112 for i, xf in enumerate(L):
1113 xf.order = i
1114
Doug Zongker62338182014-09-08 08:29:55 -07001115 def ReverseBackwardEdges(self):
Tao Bao3a2e3502016-12-28 09:14:39 -08001116 """Reverse unsatisfying edges and compute pairs of stashed blocks.
1117
1118 For each transfer, make sure it properly stashes the blocks it touches and
1119 will be used by later transfers. It uses pairs of (stash_raw_id, range) to
1120 record the blocks to be stashed. 'stash_raw_id' is an id that uniquely
1121 identifies each pair. Note that for the same range (e.g. RangeSet("1-5")),
1122 it is possible to have multiple pairs with different 'stash_raw_id's. Each
1123 'stash_raw_id' will be consumed by one transfer. In BBOTA v3+, identical
1124 blocks will be written to the same stash slot in WriteTransfers().
1125 """
1126
Tao Bao32fcdab2018-10-12 10:30:39 -07001127 logger.info("Reversing backward edges...")
Doug Zongker62338182014-09-08 08:29:55 -07001128 in_order = 0
1129 out_of_order = 0
Tao Bao3a2e3502016-12-28 09:14:39 -08001130 stash_raw_id = 0
Doug Zongker62338182014-09-08 08:29:55 -07001131 stash_size = 0
1132
1133 for xf in self.transfers:
Doug Zongker62338182014-09-08 08:29:55 -07001134 for u in xf.goes_before.copy():
1135 # xf should go before u
1136 if xf.order < u.order:
1137 # it does, hurray!
1138 in_order += 1
1139 else:
1140 # it doesn't, boo. modify u to stash the blocks that it
1141 # writes that xf wants to read, and then require u to go
1142 # before xf.
1143 out_of_order += 1
1144
1145 overlap = xf.src_ranges.intersect(u.tgt_ranges)
1146 assert overlap
1147
Tao Bao3a2e3502016-12-28 09:14:39 -08001148 u.stash_before.append((stash_raw_id, overlap))
1149 xf.use_stash.append((stash_raw_id, overlap))
1150 stash_raw_id += 1
Doug Zongker62338182014-09-08 08:29:55 -07001151 stash_size += overlap.size()
1152
1153 # reverse the edge direction; now xf must go after u
1154 del xf.goes_before[u]
1155 del u.goes_after[xf]
1156 xf.goes_after[u] = None # value doesn't matter
1157 u.goes_before[xf] = None
1158
Tao Bao32fcdab2018-10-12 10:30:39 -07001159 logger.info(
1160 " %d/%d dependencies (%.2f%%) were violated; %d source blocks "
1161 "stashed.", out_of_order, in_order + out_of_order,
1162 (out_of_order * 100.0 / (in_order + out_of_order)) if (
1163 in_order + out_of_order) else 0.0,
1164 stash_size)
Doug Zongker62338182014-09-08 08:29:55 -07001165
Doug Zongkerfc44a512014-08-26 13:10:25 -07001166 def FindVertexSequence(self):
Tao Bao32fcdab2018-10-12 10:30:39 -07001167 logger.info("Finding vertex sequence...")
Doug Zongkerfc44a512014-08-26 13:10:25 -07001168
1169 # This is based on "A Fast & Effective Heuristic for the Feedback
1170 # Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
1171 # it as starting with the digraph G and moving all the vertices to
1172 # be on a horizontal line in some order, trying to minimize the
1173 # number of edges that end up pointing to the left. Left-pointing
1174 # edges will get removed to turn the digraph into a DAG. In this
1175 # case each edge has a weight which is the number of source blocks
1176 # we'll lose if that edge is removed; we try to minimize the total
1177 # weight rather than just the number of edges.
1178
1179 # Make a copy of the edge set; this copy will get destroyed by the
1180 # algorithm.
1181 for xf in self.transfers:
1182 xf.incoming = xf.goes_after.copy()
1183 xf.outgoing = xf.goes_before.copy()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001184 xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())
Doug Zongkerfc44a512014-08-26 13:10:25 -07001185
1186 # We use an OrderedDict instead of just a set so that the output
1187 # is repeatable; otherwise it would depend on the hash values of
1188 # the transfer objects.
1189 G = OrderedDict()
1190 for xf in self.transfers:
1191 G[xf] = None
1192 s1 = deque() # the left side of the sequence, built from left to right
1193 s2 = deque() # the right side of the sequence, built from right to left
1194
Doug Zongker6ab2a502016-02-09 08:28:09 -08001195 heap = []
1196 for xf in self.transfers:
1197 xf.heap_item = HeapItem(xf)
1198 heap.append(xf.heap_item)
1199 heapq.heapify(heap)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001200
Tao Bao33482282016-10-24 16:49:08 -07001201 # Use OrderedDict() instead of set() to preserve the insertion order. Need
1202 # to use 'sinks[key] = None' to add key into the set. sinks will look like
1203 # { key1: None, key2: None, ... }.
1204 sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing)
1205 sources = OrderedDict.fromkeys(u for u in G if not u.incoming)
Doug Zongker6ab2a502016-02-09 08:28:09 -08001206
1207 def adjust_score(iu, delta):
1208 iu.score += delta
1209 iu.heap_item.clear()
1210 iu.heap_item = HeapItem(iu)
1211 heapq.heappush(heap, iu.heap_item)
1212
1213 while G:
Doug Zongkerfc44a512014-08-26 13:10:25 -07001214 # Put all sinks at the end of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001215 while sinks:
Tao Bao33482282016-10-24 16:49:08 -07001216 new_sinks = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001217 for u in sinks:
Tao Bao508b0872018-02-09 13:44:43 -08001218 if u not in G:
1219 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001220 s2.appendleft(u)
1221 del G[u]
1222 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001223 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001224 if not iu.outgoing:
1225 new_sinks[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001226 sinks = new_sinks
Doug Zongkerfc44a512014-08-26 13:10:25 -07001227
1228 # Put all the sources at the beginning of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001229 while sources:
Tao Bao33482282016-10-24 16:49:08 -07001230 new_sources = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001231 for u in sources:
Tao Bao508b0872018-02-09 13:44:43 -08001232 if u not in G:
1233 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001234 s1.append(u)
1235 del G[u]
1236 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001237 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001238 if not iu.incoming:
1239 new_sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001240 sources = new_sources
Doug Zongkerfc44a512014-08-26 13:10:25 -07001241
Tao Bao508b0872018-02-09 13:44:43 -08001242 if not G:
1243 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001244
1245 # Find the "best" vertex to put next. "Best" is the one that
1246 # maximizes the net difference in source blocks saved we get by
1247 # pretending it's a source rather than a sink.
1248
Doug Zongker6ab2a502016-02-09 08:28:09 -08001249 while True:
1250 u = heapq.heappop(heap)
1251 if u and u.item in G:
1252 u = u.item
1253 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001254
Doug Zongkerfc44a512014-08-26 13:10:25 -07001255 s1.append(u)
1256 del G[u]
1257 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001258 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001259 if not iu.incoming:
1260 sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001261
Doug Zongkerfc44a512014-08-26 13:10:25 -07001262 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001263 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001264 if not iu.outgoing:
1265 sinks[iu] = None
Doug Zongkerfc44a512014-08-26 13:10:25 -07001266
1267 # Now record the sequence in the 'order' field of each transfer,
1268 # and by rearranging self.transfers to be in the chosen sequence.
1269
1270 new_transfers = []
1271 for x in itertools.chain(s1, s2):
1272 x.order = len(new_transfers)
1273 new_transfers.append(x)
1274 del x.incoming
1275 del x.outgoing
1276
1277 self.transfers = new_transfers
1278
1279 def GenerateDigraph(self):
Tao Bao32fcdab2018-10-12 10:30:39 -07001280 logger.info("Generating digraph...")
Doug Zongker6ab2a502016-02-09 08:28:09 -08001281
1282 # Each item of source_ranges will be:
1283 # - None, if that block is not used as a source,
Tao Bao33482282016-10-24 16:49:08 -07001284 # - an ordered set of transfers.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001285 source_ranges = []
1286 for b in self.transfers:
1287 for s, e in b.src_ranges:
1288 if e > len(source_ranges):
1289 source_ranges.extend([None] * (e-len(source_ranges)))
1290 for i in range(s, e):
1291 if source_ranges[i] is None:
Tao Bao33482282016-10-24 16:49:08 -07001292 source_ranges[i] = OrderedDict.fromkeys([b])
Doug Zongker6ab2a502016-02-09 08:28:09 -08001293 else:
Tao Bao33482282016-10-24 16:49:08 -07001294 source_ranges[i][b] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001295
Doug Zongkerfc44a512014-08-26 13:10:25 -07001296 for a in self.transfers:
Tao Bao33482282016-10-24 16:49:08 -07001297 intersections = OrderedDict()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001298 for s, e in a.tgt_ranges:
1299 for i in range(s, e):
Tao Bao508b0872018-02-09 13:44:43 -08001300 if i >= len(source_ranges):
1301 break
Tao Bao33482282016-10-24 16:49:08 -07001302 # Add all the Transfers in source_ranges[i] to the (ordered) set.
1303 if source_ranges[i] is not None:
1304 for j in source_ranges[i]:
1305 intersections[j] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001306
1307 for b in intersections:
Tao Bao508b0872018-02-09 13:44:43 -08001308 if a is b:
1309 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001310
1311 # If the blocks written by A are read by B, then B needs to go before A.
1312 i = a.tgt_ranges.intersect(b.src_ranges)
1313 if i:
Doug Zongkerab7ca1d2014-08-26 10:40:28 -07001314 if b.src_name == "__ZERO":
1315 # the cost of removing source blocks for the __ZERO domain
1316 # is (nearly) zero.
1317 size = 0
1318 else:
1319 size = i.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001320 b.goes_before[a] = size
1321 a.goes_after[b] = size
1322
xunchang21531832018-12-06 14:20:05 -08001323 def ComputePatchesForInputList(self, diff_queue, compress_target):
1324 """Returns a list of patch information for the input list of transfers.
1325
1326 Args:
1327 diff_queue: a list of transfers with style 'diff'
1328 compress_target: If True, compresses the target ranges of each
1329 transfers; and save the size.
1330
1331 Returns:
1332 A list of (transfer order, patch_info, compressed_size) tuples.
1333 """
1334
1335 if not diff_queue:
1336 return []
1337
1338 if self.threads > 1:
1339 logger.info("Computing patches (using %d threads)...", self.threads)
1340 else:
1341 logger.info("Computing patches...")
1342
1343 diff_total = len(diff_queue)
1344 patches = [None] * diff_total
1345 error_messages = []
1346
1347 # Using multiprocessing doesn't give additional benefits, due to the
1348 # pattern of the code. The diffing work is done by subprocess.call, which
1349 # already runs in a separate process (not affected much by the GIL -
1350 # Global Interpreter Lock). Using multiprocess also requires either a)
1351 # writing the diff input files in the main process before forking, or b)
1352 # reopening the image file (SparseImage) in the worker processes. Doing
1353 # neither of them further improves the performance.
1354 lock = threading.Lock()
1355
1356 def diff_worker():
1357 while True:
1358 with lock:
1359 if not diff_queue:
1360 return
1361 xf_index, imgdiff, patch_index = diff_queue.pop()
1362 xf = self.transfers[xf_index]
1363
1364 message = []
1365 compressed_size = None
1366
1367 patch_info = xf.patch_info
1368 if not patch_info:
1369 src_file = common.MakeTempFile(prefix="src-")
1370 with open(src_file, "wb") as fd:
1371 self.src.WriteRangeDataToFd(xf.src_ranges, fd)
1372
1373 tgt_file = common.MakeTempFile(prefix="tgt-")
1374 with open(tgt_file, "wb") as fd:
1375 self.tgt.WriteRangeDataToFd(xf.tgt_ranges, fd)
1376
1377 try:
1378 patch_info = compute_patch(src_file, tgt_file, imgdiff)
1379 except ValueError as e:
1380 message.append(
1381 "Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (
1382 "imgdiff" if imgdiff else "bsdiff",
1383 xf.tgt_name if xf.tgt_name == xf.src_name else
1384 xf.tgt_name + " (from " + xf.src_name + ")",
1385 xf.tgt_ranges, xf.src_ranges, e.message))
1386
1387 if compress_target:
1388 tgt_data = self.tgt.ReadRangeSet(xf.tgt_ranges)
1389 try:
1390 # Compresses with the default level
1391 compress_obj = zlib.compressobj(6, zlib.DEFLATED, -zlib.MAX_WBITS)
1392 compressed_data = (compress_obj.compress("".join(tgt_data))
1393 + compress_obj.flush())
1394 compressed_size = len(compressed_data)
1395 except zlib.error as e:
1396 message.append(
1397 "Failed to compress the data in target range {} for {}:\n"
1398 "{}".format(xf.tgt_ranges, xf.tgt_name, e.message))
1399
1400 if message:
1401 with lock:
1402 error_messages.extend(message)
1403
1404 with lock:
1405 patches[patch_index] = (xf_index, patch_info, compressed_size)
1406
1407 threads = [threading.Thread(target=diff_worker)
1408 for _ in range(self.threads)]
1409 for th in threads:
1410 th.start()
1411 while threads:
1412 threads.pop().join()
1413
1414 if error_messages:
1415 logger.error('ERROR:')
1416 logger.error('\n'.join(error_messages))
1417 logger.error('\n\n\n')
1418 sys.exit(1)
1419
1420 return patches
1421
xunchangb6105dc2018-12-06 16:39:46 -08001422 def SelectAndConvertDiffTransfersToNew(self, violated_stash_blocks):
xunchang3df4d5e2018-12-06 15:03:45 -08001423 """Converts the diff transfers to reduce the max simultaneous stash.
1424
1425 Since the 'new' data is compressed with deflate, we can select the 'diff'
1426 transfers for conversion by comparing its patch size with the size of the
1427 compressed data. Ideally, we want to convert the transfers with a small
1428 size increase, but using a large number of stashed blocks.
1429 """
xunchangb6105dc2018-12-06 16:39:46 -08001430 TransferSizeScore = namedtuple("TransferSizeScore",
1431 "xf, used_stash_blocks, score")
xunchang3df4d5e2018-12-06 15:03:45 -08001432
1433 logger.info("Selecting diff commands to convert to new.")
1434 diff_queue = []
1435 for xf in self.transfers:
1436 if xf.style == "diff" and xf.src_sha1 != xf.tgt_sha1:
1437 use_imgdiff = self.CanUseImgdiff(xf.tgt_name, xf.tgt_ranges,
1438 xf.src_ranges)
1439 diff_queue.append((xf.order, use_imgdiff, len(diff_queue)))
1440
1441 # Remove the 'move' transfers, and compute the patch & compressed size
1442 # for the remaining.
1443 result = self.ComputePatchesForInputList(diff_queue, True)
1444
xunchangb6105dc2018-12-06 16:39:46 -08001445 conversion_candidates = []
xunchang3df4d5e2018-12-06 15:03:45 -08001446 for xf_index, patch_info, compressed_size in result:
1447 xf = self.transfers[xf_index]
1448 if not xf.patch_info:
1449 xf.patch_info = patch_info
1450
1451 size_ratio = len(xf.patch_info.content) * 100.0 / compressed_size
1452 diff_style = "imgdiff" if xf.patch_info.imgdiff else "bsdiff"
xunchangb6105dc2018-12-06 16:39:46 -08001453 logger.info("%s, target size: %d blocks, style: %s, patch size: %d,"
xunchang3df4d5e2018-12-06 15:03:45 -08001454 " compression_size: %d, ratio %.2f%%", xf.tgt_name,
1455 xf.tgt_ranges.size(), diff_style,
1456 len(xf.patch_info.content), compressed_size, size_ratio)
1457
xunchangb6105dc2018-12-06 16:39:46 -08001458 used_stash_blocks = sum(sr.size() for _, sr in xf.use_stash)
xunchang3df4d5e2018-12-06 15:03:45 -08001459 # Convert the transfer to new if the compressed size is smaller or equal.
1460 # We don't need to maintain the stash_before lists here because the
1461 # graph will be regenerated later.
1462 if len(xf.patch_info.content) >= compressed_size:
xunchangb6105dc2018-12-06 16:39:46 -08001463 # Add the transfer to the candidate list with negative score. And it
1464 # will be converted later.
1465 conversion_candidates.append(TransferSizeScore(xf, used_stash_blocks,
1466 -1))
1467 elif used_stash_blocks > 0:
1468 # This heuristic represents the size increase in the final package to
1469 # remove per unit of stashed data.
1470 score = ((compressed_size - len(xf.patch_info.content)) * 100.0
1471 / used_stash_blocks)
1472 conversion_candidates.append(TransferSizeScore(xf, used_stash_blocks,
1473 score))
1474 # Transfers with lower score (i.e. less expensive to convert) will be
1475 # converted first.
1476 conversion_candidates.sort(key=lambda x: x.score)
xunchang3df4d5e2018-12-06 15:03:45 -08001477
xunchangb6105dc2018-12-06 16:39:46 -08001478 # TODO(xunchang), improve the logic to find the transfers to convert, e.g.
1479 # convert the ones that contribute to the max stash, run ReviseStashSize
1480 # multiple times etc.
1481 removed_stashed_blocks = 0
1482 for xf, used_stash_blocks, _ in conversion_candidates:
1483 logger.info("Converting %s to new", xf.tgt_name)
1484 xf.ConvertToNew()
1485 removed_stashed_blocks += used_stash_blocks
1486 # Experiments show that we will get a smaller package size if we remove
1487 # slightly more stashed blocks than the violated stash blocks.
1488 if removed_stashed_blocks >= violated_stash_blocks:
1489 break
xunchang3df4d5e2018-12-06 15:03:45 -08001490
1491 logger.info("Removed %d stashed blocks", removed_stashed_blocks)
1492
Doug Zongkerfc44a512014-08-26 13:10:25 -07001493 def FindTransfers(self):
Tao Bao9a5caf22015-08-25 15:10:10 -07001494 """Parse the file_map to generate all the transfers."""
1495
Tianjie Xu41390d42017-11-22 11:35:18 -08001496 def AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
1497 src_ranges, style, by_id):
1498 """Add one or multiple Transfer()s by splitting large files.
1499
1500 For BBOTA v3, we need to stash source blocks for resumable feature.
1501 However, with the growth of file size and the shrink of the cache
1502 partition source blocks are too large to be stashed. If a file occupies
1503 too many blocks, we split it into smaller pieces by getting multiple
1504 Transfer()s.
1505
1506 The downside is that after splitting, we may increase the package size
1507 since the split pieces don't align well. According to our experiments,
1508 1/8 of the cache size as the per-piece limit appears to be optimal.
1509 Compared to the fixed 1024-block limit, it reduces the overall package
1510 size by 30% for volantis, and 20% for angler and bullhead."""
1511
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001512 pieces = 0
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001513 while (tgt_ranges.size() > max_blocks_per_transfer and
1514 src_ranges.size() > max_blocks_per_transfer):
Tao Bao9a5caf22015-08-25 15:10:10 -07001515 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1516 src_split_name = "%s-%d" % (src_name, pieces)
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001517 tgt_first = tgt_ranges.first(max_blocks_per_transfer)
1518 src_first = src_ranges.first(max_blocks_per_transfer)
1519
Tao Bao183e56e2017-03-05 17:05:09 -08001520 Transfer(tgt_split_name, src_split_name, tgt_first, src_first,
1521 self.tgt.RangeSha1(tgt_first), self.src.RangeSha1(src_first),
1522 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001523
1524 tgt_ranges = tgt_ranges.subtract(tgt_first)
1525 src_ranges = src_ranges.subtract(src_first)
1526 pieces += 1
1527
1528 # Handle remaining blocks.
1529 if tgt_ranges.size() or src_ranges.size():
1530 # Must be both non-empty.
1531 assert tgt_ranges.size() and src_ranges.size()
1532 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1533 src_split_name = "%s-%d" % (src_name, pieces)
Tao Bao183e56e2017-03-05 17:05:09 -08001534 Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges,
1535 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1536 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001537
Tianjie Xu41390d42017-11-22 11:35:18 -08001538 def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges, style,
1539 by_id):
1540 """Find all the zip files and split the others with a fixed chunk size.
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001541
Tianjie Xu41390d42017-11-22 11:35:18 -08001542 This function will construct a list of zip archives, which will later be
1543 split by imgdiff to reduce the final patch size. For the other files,
1544 we will plainly split them based on a fixed chunk size with the potential
1545 patch size penalty.
1546 """
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001547
1548 assert style == "diff"
1549
1550 # Change nothing for small files.
1551 if (tgt_ranges.size() <= max_blocks_per_transfer and
1552 src_ranges.size() <= max_blocks_per_transfer):
1553 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1554 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1555 style, by_id)
1556 return
1557
Tao Baocb73aed2018-01-31 17:32:40 -08001558 # Split large APKs with imgdiff, if possible. We're intentionally checking
1559 # file types one more time (CanUseImgdiff() checks that as well), before
1560 # calling the costly RangeSha1()s.
1561 if (self.FileTypeSupportedByImgdiff(tgt_name) and
1562 self.tgt.RangeSha1(tgt_ranges) != self.src.RangeSha1(src_ranges)):
Tao Bao294651d2018-02-08 23:21:52 -08001563 if self.CanUseImgdiff(tgt_name, tgt_ranges, src_ranges, True):
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001564 large_apks.append((tgt_name, src_name, tgt_ranges, src_ranges))
1565 return
1566
Tianjie Xu41390d42017-11-22 11:35:18 -08001567 AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
1568 src_ranges, style, by_id)
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001569
Tao Bao08c85832016-09-19 22:26:30 -07001570 def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
1571 split=False):
1572 """Wrapper function for adding a Transfer()."""
1573
1574 # We specialize diff transfers only (which covers bsdiff/imgdiff/move);
1575 # otherwise add the Transfer() as is.
1576 if style != "diff" or not split:
Tao Bao183e56e2017-03-05 17:05:09 -08001577 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1578 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1579 style, by_id)
Tao Bao08c85832016-09-19 22:26:30 -07001580 return
1581
1582 # Handle .odex files specially to analyze the block-wise difference. If
1583 # most of the blocks are identical with only few changes (e.g. header),
1584 # we will patch the changed blocks only. This avoids stashing unchanged
1585 # blocks while patching. We limit the analysis to files without size
1586 # changes only. This is to avoid sacrificing the OTA generation cost too
1587 # much.
1588 if (tgt_name.split(".")[-1].lower() == 'odex' and
1589 tgt_ranges.size() == src_ranges.size()):
1590
1591 # 0.5 threshold can be further tuned. The tradeoff is: if only very
1592 # few blocks remain identical, we lose the opportunity to use imgdiff
1593 # that may have better compression ratio than bsdiff.
1594 crop_threshold = 0.5
1595
1596 tgt_skipped = RangeSet()
1597 src_skipped = RangeSet()
1598 tgt_size = tgt_ranges.size()
1599 tgt_changed = 0
1600 for src_block, tgt_block in zip(src_ranges.next_item(),
1601 tgt_ranges.next_item()):
1602 src_rs = RangeSet(str(src_block))
1603 tgt_rs = RangeSet(str(tgt_block))
1604 if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):
1605 tgt_skipped = tgt_skipped.union(tgt_rs)
1606 src_skipped = src_skipped.union(src_rs)
1607 else:
1608 tgt_changed += tgt_rs.size()
1609
1610 # Terminate early if no clear sign of benefits.
1611 if tgt_changed > tgt_size * crop_threshold:
1612 break
1613
1614 if tgt_changed < tgt_size * crop_threshold:
1615 assert tgt_changed + tgt_skipped.size() == tgt_size
Tao Bao32fcdab2018-10-12 10:30:39 -07001616 logger.info(
1617 '%10d %10d (%6.2f%%) %s', tgt_skipped.size(), tgt_size,
1618 tgt_skipped.size() * 100.0 / tgt_size, tgt_name)
Tianjie Xu41390d42017-11-22 11:35:18 -08001619 AddSplitTransfers(
Tao Bao08c85832016-09-19 22:26:30 -07001620 "%s-skipped" % (tgt_name,),
1621 "%s-skipped" % (src_name,),
1622 tgt_skipped, src_skipped, style, by_id)
1623
1624 # Intentionally change the file extension to avoid being imgdiff'd as
1625 # the files are no longer in their original format.
1626 tgt_name = "%s-cropped" % (tgt_name,)
1627 src_name = "%s-cropped" % (src_name,)
1628 tgt_ranges = tgt_ranges.subtract(tgt_skipped)
1629 src_ranges = src_ranges.subtract(src_skipped)
1630
1631 # Possibly having no changed blocks.
1632 if not tgt_ranges:
1633 return
1634
1635 # Add the transfer(s).
Tianjie Xu41390d42017-11-22 11:35:18 -08001636 AddSplitTransfers(
Tao Bao08c85832016-09-19 22:26:30 -07001637 tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1638
Tianjie Xu25366072017-09-08 17:19:02 -07001639 def ParseAndValidateSplitInfo(patch_size, tgt_ranges, src_ranges,
1640 split_info):
1641 """Parse the split_info and return a list of info tuples.
1642
1643 Args:
1644 patch_size: total size of the patch file.
1645 tgt_ranges: Ranges of the target file within the original image.
1646 src_ranges: Ranges of the source file within the original image.
1647 split_info format:
1648 imgdiff version#
1649 count of pieces
1650 <patch_size_1> <tgt_size_1> <src_ranges_1>
1651 ...
1652 <patch_size_n> <tgt_size_n> <src_ranges_n>
1653
1654 Returns:
1655 [patch_start, patch_len, split_tgt_ranges, split_src_ranges]
1656 """
1657
1658 version = int(split_info[0])
1659 assert version == 2
1660 count = int(split_info[1])
1661 assert len(split_info) - 2 == count
1662
1663 split_info_list = []
1664 patch_start = 0
1665 tgt_remain = copy.deepcopy(tgt_ranges)
1666 # each line has the format <patch_size>, <tgt_size>, <src_ranges>
1667 for line in split_info[2:]:
1668 info = line.split()
1669 assert len(info) == 3
1670 patch_length = int(info[0])
1671
1672 split_tgt_size = int(info[1])
1673 assert split_tgt_size % 4096 == 0
1674 assert split_tgt_size / 4096 <= tgt_remain.size()
1675 split_tgt_ranges = tgt_remain.first(split_tgt_size / 4096)
1676 tgt_remain = tgt_remain.subtract(split_tgt_ranges)
1677
1678 # Find the split_src_ranges within the image file from its relative
1679 # position in file.
1680 split_src_indices = RangeSet.parse_raw(info[2])
1681 split_src_ranges = RangeSet()
1682 for r in split_src_indices:
1683 curr_range = src_ranges.first(r[1]).subtract(src_ranges.first(r[0]))
1684 assert not split_src_ranges.overlaps(curr_range)
1685 split_src_ranges = split_src_ranges.union(curr_range)
1686
1687 split_info_list.append((patch_start, patch_length,
1688 split_tgt_ranges, split_src_ranges))
1689 patch_start += patch_length
1690
1691 # Check that the sizes of all the split pieces add up to the final file
1692 # size for patch and target.
1693 assert tgt_remain.size() == 0
1694 assert patch_start == patch_size
1695 return split_info_list
1696
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001697 def SplitLargeApks():
1698 """Split the large apks files.
Tianjie Xu25366072017-09-08 17:19:02 -07001699
1700 Example: Chrome.apk will be split into
1701 src-0: Chrome.apk-0, tgt-0: Chrome.apk-0
1702 src-1: Chrome.apk-1, tgt-1: Chrome.apk-1
1703 ...
1704
1705 After the split, the target pieces are continuous and block aligned; and
1706 the source pieces are mutually exclusive. During the split, we also
1707 generate and save the image patch between src-X & tgt-X. This patch will
1708 be valid because the block ranges of src-X & tgt-X will always stay the
1709 same afterwards; but there's a chance we don't use the patch if we
1710 convert the "diff" command into "new" or "move" later.
1711 """
1712
1713 while True:
1714 with transfer_lock:
1715 if not large_apks:
1716 return
1717 tgt_name, src_name, tgt_ranges, src_ranges = large_apks.pop(0)
1718
1719 src_file = common.MakeTempFile(prefix="src-")
1720 tgt_file = common.MakeTempFile(prefix="tgt-")
Tianjie Xudf1166e2018-01-27 17:35:41 -08001721 with open(src_file, "wb") as src_fd:
1722 self.src.WriteRangeDataToFd(src_ranges, src_fd)
1723 with open(tgt_file, "wb") as tgt_fd:
1724 self.tgt.WriteRangeDataToFd(tgt_ranges, tgt_fd)
Tianjie Xu25366072017-09-08 17:19:02 -07001725
1726 patch_file = common.MakeTempFile(prefix="patch-")
1727 patch_info_file = common.MakeTempFile(prefix="split_info-")
1728 cmd = ["imgdiff", "-z",
1729 "--block-limit={}".format(max_blocks_per_transfer),
1730 "--split-info=" + patch_info_file,
1731 src_file, tgt_file, patch_file]
Tao Bao73dd4f42018-10-04 16:25:33 -07001732 proc = common.Run(cmd)
1733 imgdiff_output, _ = proc.communicate()
1734 assert proc.returncode == 0, \
Tao Bao4ccea852018-02-06 15:16:41 -08001735 "Failed to create imgdiff patch between {} and {}:\n{}".format(
1736 src_name, tgt_name, imgdiff_output)
Tianjie Xu25366072017-09-08 17:19:02 -07001737
1738 with open(patch_info_file) as patch_info:
1739 lines = patch_info.readlines()
1740
1741 patch_size_total = os.path.getsize(patch_file)
1742 split_info_list = ParseAndValidateSplitInfo(patch_size_total,
1743 tgt_ranges, src_ranges,
1744 lines)
1745 for index, (patch_start, patch_length, split_tgt_ranges,
Tao Bao508b0872018-02-09 13:44:43 -08001746 split_src_ranges) in enumerate(split_info_list):
Tianjie Xu25366072017-09-08 17:19:02 -07001747 with open(patch_file) as f:
1748 f.seek(patch_start)
1749 patch_content = f.read(patch_length)
1750
1751 split_src_name = "{}-{}".format(src_name, index)
1752 split_tgt_name = "{}-{}".format(tgt_name, index)
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001753 split_large_apks.append((split_tgt_name,
1754 split_src_name,
1755 split_tgt_ranges,
1756 split_src_ranges,
1757 patch_content))
Tianjie Xu25366072017-09-08 17:19:02 -07001758
Tao Bao32fcdab2018-10-12 10:30:39 -07001759 logger.info("Finding transfers...")
Tao Bao08c85832016-09-19 22:26:30 -07001760
Tianjie Xu25366072017-09-08 17:19:02 -07001761 large_apks = []
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001762 split_large_apks = []
Tianjie Xu25366072017-09-08 17:19:02 -07001763 cache_size = common.OPTIONS.cache_size
1764 split_threshold = 0.125
1765 max_blocks_per_transfer = int(cache_size * split_threshold /
1766 self.tgt.blocksize)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001767 empty = RangeSet()
Tianjie Xu20a86cd2018-01-12 12:21:00 -08001768 for tgt_fn, tgt_ranges in sorted(self.tgt.file_map.items()):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001769 if tgt_fn == "__ZERO":
1770 # the special "__ZERO" domain is all the blocks not contained
1771 # in any file and that are filled with zeros. We have a
1772 # special transfer style for zero blocks.
1773 src_ranges = self.src.file_map.get("__ZERO", empty)
Tao Bao9a5caf22015-08-25 15:10:10 -07001774 AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
1775 "zero", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001776 continue
1777
Tao Baoff777812015-05-12 11:42:31 -07001778 elif tgt_fn == "__COPY":
1779 # "__COPY" domain includes all the blocks not contained in any
1780 # file and that need to be copied unconditionally to the target.
Tao Bao9a5caf22015-08-25 15:10:10 -07001781 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Tao Baoff777812015-05-12 11:42:31 -07001782 continue
1783
Tianjie Xu67c7cbb2018-08-30 00:32:07 -07001784 elif tgt_fn == "__HASHTREE":
1785 continue
1786
Doug Zongkerfc44a512014-08-26 13:10:25 -07001787 elif tgt_fn in self.src.file_map:
1788 # Look for an exact pathname match in the source.
Tao Bao9a5caf22015-08-25 15:10:10 -07001789 AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001790 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001791 continue
1792
1793 b = os.path.basename(tgt_fn)
1794 if b in self.src_basenames:
1795 # Look for an exact basename match in the source.
1796 src_fn = self.src_basenames[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001797 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001798 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001799 continue
1800
1801 b = re.sub("[0-9]+", "#", b)
1802 if b in self.src_numpatterns:
1803 # Look for a 'number pattern' match (a basename match after
1804 # all runs of digits are replaced by "#"). (This is useful
1805 # for .so files that contain version numbers in the filename
1806 # that get bumped.)
1807 src_fn = self.src_numpatterns[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001808 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001809 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001810 continue
1811
Tao Bao9a5caf22015-08-25 15:10:10 -07001812 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001813
Tianjie Xu25366072017-09-08 17:19:02 -07001814 transfer_lock = threading.Lock()
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001815 threads = [threading.Thread(target=SplitLargeApks)
Tianjie Xu25366072017-09-08 17:19:02 -07001816 for _ in range(self.threads)]
1817 for th in threads:
1818 th.start()
1819 while threads:
1820 threads.pop().join()
1821
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001822 # Sort the split transfers for large apks to generate a determinate package.
1823 split_large_apks.sort()
1824 for (tgt_name, src_name, tgt_ranges, src_ranges,
1825 patch) in split_large_apks:
1826 transfer_split = Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1827 self.tgt.RangeSha1(tgt_ranges),
1828 self.src.RangeSha1(src_ranges),
1829 "diff", self.transfers)
xunchang21531832018-12-06 14:20:05 -08001830 transfer_split.patch_info = PatchInfo(True, patch)
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001831
Doug Zongkerfc44a512014-08-26 13:10:25 -07001832 def AbbreviateSourceNames(self):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001833 for k in self.src.file_map.keys():
1834 b = os.path.basename(k)
1835 self.src_basenames[b] = k
1836 b = re.sub("[0-9]+", "#", b)
1837 self.src_numpatterns[b] = k
1838
1839 @staticmethod
1840 def AssertPartition(total, seq):
1841 """Assert that all the RangeSets in 'seq' form a partition of the
1842 'total' RangeSet (ie, they are nonintersecting and their union
1843 equals 'total')."""
Doug Zongker6ab2a502016-02-09 08:28:09 -08001844
Doug Zongkerfc44a512014-08-26 13:10:25 -07001845 so_far = RangeSet()
1846 for i in seq:
1847 assert not so_far.overlaps(i)
1848 so_far = so_far.union(i)
1849 assert so_far == total