blob: 1ef55ffbdcccfb988571c5c66a99d56dd66210bf [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015from __future__ import print_function
16
Doug Zongker6ab2a502016-02-09 08:28:09 -080017import array
Tao Bao8dcf7382015-05-21 14:09:49 -070018import common
Tianjie Xu25366072017-09-08 17:19:02 -070019import copy
Doug Zongker6ab2a502016-02-09 08:28:09 -080020import functools
Doug Zongker62338182014-09-08 08:29:55 -070021import heapq
Doug Zongkerfc44a512014-08-26 13:10:25 -070022import itertools
23import multiprocessing
24import os
Tao Bao3a2e3502016-12-28 09:14:39 -080025import os.path
Doug Zongkerfc44a512014-08-26 13:10:25 -070026import re
27import subprocess
Tao Bao183e56e2017-03-05 17:05:09 -080028import sys
Doug Zongkerfc44a512014-08-26 13:10:25 -070029import threading
Doug Zongkerfc44a512014-08-26 13:10:25 -070030
Tao Bao3a2e3502016-12-28 09:14:39 -080031from collections import deque, OrderedDict
32from hashlib import sha1
Dan Albert8b72aef2015-03-23 19:13:21 -070033from rangelib import RangeSet
34
Doug Zongkerfc44a512014-08-26 13:10:25 -070035
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070036__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
37
Dan Albert8b72aef2015-03-23 19:13:21 -070038
Tao Bao183e56e2017-03-05 17:05:09 -080039def compute_patch(srcfile, tgtfile, imgdiff=False):
Tianjie Xub59c17f2016-10-28 17:55:53 -070040 patchfile = common.MakeTempFile(prefix='patch-')
Doug Zongkerfc44a512014-08-26 13:10:25 -070041
Tianjie Xub59c17f2016-10-28 17:55:53 -070042 cmd = ['imgdiff', '-z'] if imgdiff else ['bsdiff']
43 cmd.extend([srcfile, tgtfile, patchfile])
Doug Zongkerfc44a512014-08-26 13:10:25 -070044
Tao Bao39451582017-05-04 11:10:47 -070045 # Don't dump the bsdiff/imgdiff commands, which are not useful for the case
46 # here, since they contain temp filenames only.
47 p = common.Run(cmd, verbose=False, stdout=subprocess.PIPE,
48 stderr=subprocess.STDOUT)
Tianjie Xub59c17f2016-10-28 17:55:53 -070049 output, _ = p.communicate()
Doug Zongkerfc44a512014-08-26 13:10:25 -070050
Tianjie Xub59c17f2016-10-28 17:55:53 -070051 if p.returncode != 0:
52 raise ValueError(output)
53
54 with open(patchfile, 'rb') as f:
Tao Bao183e56e2017-03-05 17:05:09 -080055 return f.read()
Doug Zongkerfc44a512014-08-26 13:10:25 -070056
Dan Albert8b72aef2015-03-23 19:13:21 -070057
58class Image(object):
Tao Bao183e56e2017-03-05 17:05:09 -080059 def RangeSha1(self, ranges):
60 raise NotImplementedError
61
Dan Albert8b72aef2015-03-23 19:13:21 -070062 def ReadRangeSet(self, ranges):
63 raise NotImplementedError
64
Tao Bao68658c02015-06-01 13:40:49 -070065 def TotalSha1(self, include_clobbered_blocks=False):
Dan Albert8b72aef2015-03-23 19:13:21 -070066 raise NotImplementedError
67
Tao Bao183e56e2017-03-05 17:05:09 -080068 def WriteRangeDataToFd(self, ranges, fd):
69 raise NotImplementedError
70
Dan Albert8b72aef2015-03-23 19:13:21 -070071
72class EmptyImage(Image):
Doug Zongkerfc44a512014-08-26 13:10:25 -070073 """A zero-length image."""
Tao Bao183e56e2017-03-05 17:05:09 -080074
75 def __init__(self):
76 self.blocksize = 4096
77 self.care_map = RangeSet()
78 self.clobbered_blocks = RangeSet()
79 self.extended = RangeSet()
80 self.total_blocks = 0
81 self.file_map = {}
82
83 def RangeSha1(self, ranges):
84 return sha1().hexdigest()
85
Doug Zongkerfc44a512014-08-26 13:10:25 -070086 def ReadRangeSet(self, ranges):
87 return ()
Tao Bao183e56e2017-03-05 17:05:09 -080088
Tao Bao68658c02015-06-01 13:40:49 -070089 def TotalSha1(self, include_clobbered_blocks=False):
90 # EmptyImage always carries empty clobbered_blocks, so
91 # include_clobbered_blocks can be ignored.
92 assert self.clobbered_blocks.size() == 0
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070093 return sha1().hexdigest()
94
Tao Bao183e56e2017-03-05 17:05:09 -080095 def WriteRangeDataToFd(self, ranges, fd):
96 raise ValueError("Can't write data from EmptyImage to file")
97
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070098
Dan Albert8b72aef2015-03-23 19:13:21 -070099class DataImage(Image):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700100 """An image wrapped around a single string of data."""
101
102 def __init__(self, data, trim=False, pad=False):
103 self.data = data
104 self.blocksize = 4096
105
106 assert not (trim and pad)
107
108 partial = len(self.data) % self.blocksize
Tao Bao7589e962015-09-05 20:35:32 -0700109 padded = False
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700110 if partial > 0:
111 if trim:
112 self.data = self.data[:-partial]
113 elif pad:
114 self.data += '\0' * (self.blocksize - partial)
Tao Bao7589e962015-09-05 20:35:32 -0700115 padded = True
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700116 else:
117 raise ValueError(("data for DataImage must be multiple of %d bytes "
118 "unless trim or pad is specified") %
119 (self.blocksize,))
120
121 assert len(self.data) % self.blocksize == 0
122
123 self.total_blocks = len(self.data) / self.blocksize
124 self.care_map = RangeSet(data=(0, self.total_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700125 # When the last block is padded, we always write the whole block even for
126 # incremental OTAs. Because otherwise the last block may get skipped if
127 # unchanged for an incremental, but would fail the post-install
128 # verification if it has non-zero contents in the padding bytes.
129 # Bug: 23828506
130 if padded:
Tao Bao42206c32015-09-08 13:39:40 -0700131 clobbered_blocks = [self.total_blocks-1, self.total_blocks]
Tao Bao7589e962015-09-05 20:35:32 -0700132 else:
Tao Bao42206c32015-09-08 13:39:40 -0700133 clobbered_blocks = []
134 self.clobbered_blocks = clobbered_blocks
Tao Baoe9b61912015-07-09 17:37:49 -0700135 self.extended = RangeSet()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700136
137 zero_blocks = []
138 nonzero_blocks = []
139 reference = '\0' * self.blocksize
140
Tao Bao7589e962015-09-05 20:35:32 -0700141 for i in range(self.total_blocks-1 if padded else self.total_blocks):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700142 d = self.data[i*self.blocksize : (i+1)*self.blocksize]
143 if d == reference:
144 zero_blocks.append(i)
145 zero_blocks.append(i+1)
146 else:
147 nonzero_blocks.append(i)
148 nonzero_blocks.append(i+1)
149
Tao Bao42206c32015-09-08 13:39:40 -0700150 assert zero_blocks or nonzero_blocks or clobbered_blocks
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700151
Tao Bao42206c32015-09-08 13:39:40 -0700152 self.file_map = dict()
153 if zero_blocks:
154 self.file_map["__ZERO"] = RangeSet(data=zero_blocks)
155 if nonzero_blocks:
156 self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)
157 if clobbered_blocks:
158 self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)
Tao Bao7589e962015-09-05 20:35:32 -0700159
Tao Bao183e56e2017-03-05 17:05:09 -0800160 def _GetRangeData(self, ranges):
161 for s, e in ranges:
162 yield self.data[s*self.blocksize:e*self.blocksize]
163
164 def RangeSha1(self, ranges):
165 h = sha1()
166 for data in self._GetRangeData(ranges):
167 h.update(data)
168 return h.hexdigest()
169
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700170 def ReadRangeSet(self, ranges):
Tao Bao183e56e2017-03-05 17:05:09 -0800171 return [self._GetRangeData(ranges)]
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700172
Tao Bao68658c02015-06-01 13:40:49 -0700173 def TotalSha1(self, include_clobbered_blocks=False):
Tao Bao7589e962015-09-05 20:35:32 -0700174 if not include_clobbered_blocks:
Tao Bao183e56e2017-03-05 17:05:09 -0800175 return self.RangeSha1(self.care_map.subtract(self.clobbered_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700176 else:
177 return sha1(self.data).hexdigest()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700178
Tao Bao183e56e2017-03-05 17:05:09 -0800179 def WriteRangeDataToFd(self, ranges, fd):
180 for data in self._GetRangeData(ranges):
181 fd.write(data)
182
Doug Zongkerfc44a512014-08-26 13:10:25 -0700183
184class Transfer(object):
Tao Bao183e56e2017-03-05 17:05:09 -0800185 def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, tgt_sha1,
186 src_sha1, style, by_id):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700187 self.tgt_name = tgt_name
188 self.src_name = src_name
189 self.tgt_ranges = tgt_ranges
190 self.src_ranges = src_ranges
Tao Bao183e56e2017-03-05 17:05:09 -0800191 self.tgt_sha1 = tgt_sha1
192 self.src_sha1 = src_sha1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700193 self.style = style
194 self.intact = (getattr(tgt_ranges, "monotonic", False) and
195 getattr(src_ranges, "monotonic", False))
Tao Baob8c87172015-03-19 19:42:12 -0700196
197 # We use OrderedDict rather than dict so that the output is repeatable;
198 # otherwise it would depend on the hash values of the Transfer objects.
199 self.goes_before = OrderedDict()
200 self.goes_after = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700201
Doug Zongker62338182014-09-08 08:29:55 -0700202 self.stash_before = []
203 self.use_stash = []
204
Doug Zongkerfc44a512014-08-26 13:10:25 -0700205 self.id = len(by_id)
206 by_id.append(self)
207
Tianjie Xu25366072017-09-08 17:19:02 -0700208 self._patch = None
209
210 @property
211 def patch(self):
212 return self._patch
213
214 @patch.setter
215 def patch(self, patch):
216 if patch:
217 assert self.style == "diff"
218 self._patch = patch
219
Doug Zongker62338182014-09-08 08:29:55 -0700220 def NetStashChange(self):
221 return (sum(sr.size() for (_, sr) in self.stash_before) -
222 sum(sr.size() for (_, sr) in self.use_stash))
223
Tao Bao82c47982015-08-17 09:45:13 -0700224 def ConvertToNew(self):
225 assert self.style != "new"
226 self.use_stash = []
227 self.style = "new"
228 self.src_ranges = RangeSet()
Tianjie Xu25366072017-09-08 17:19:02 -0700229 self.patch = None
Tao Bao82c47982015-08-17 09:45:13 -0700230
Doug Zongkerfc44a512014-08-26 13:10:25 -0700231 def __str__(self):
232 return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
233 " to " + str(self.tgt_ranges) + ">")
234
235
Doug Zongker6ab2a502016-02-09 08:28:09 -0800236@functools.total_ordering
237class HeapItem(object):
238 def __init__(self, item):
239 self.item = item
240 # Negate the score since python's heap is a min-heap and we want
241 # the maximum score.
242 self.score = -item.score
243 def clear(self):
244 self.item = None
245 def __bool__(self):
246 return self.item is None
247 def __eq__(self, other):
248 return self.score == other.score
249 def __le__(self, other):
250 return self.score <= other.score
251
252
Doug Zongkerfc44a512014-08-26 13:10:25 -0700253# BlockImageDiff works on two image objects. An image object is
254# anything that provides the following attributes:
255#
256# blocksize: the size in bytes of a block, currently must be 4096.
257#
258# total_blocks: the total size of the partition/image, in blocks.
259#
260# care_map: a RangeSet containing which blocks (in the range [0,
261# total_blocks) we actually care about; i.e. which blocks contain
262# data.
263#
264# file_map: a dict that partitions the blocks contained in care_map
265# into smaller domains that are useful for doing diffs on.
266# (Typically a domain is a file, and the key in file_map is the
267# pathname.)
268#
Tao Baoff777812015-05-12 11:42:31 -0700269# clobbered_blocks: a RangeSet containing which blocks contain data
270# but may be altered by the FS. They need to be excluded when
271# verifying the partition integrity.
272#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700273# ReadRangeSet(): a function that takes a RangeSet and returns the
274# data contained in the image blocks of that RangeSet. The data
275# is returned as a list or tuple of strings; concatenating the
276# elements together should produce the requested data.
277# Implementations are free to break up the data into list/tuple
278# elements in any way that is convenient.
279#
Tao Bao183e56e2017-03-05 17:05:09 -0800280# RangeSha1(): a function that returns (as a hex string) the SHA-1
281# hash of all the data in the specified range.
282#
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700283# TotalSha1(): a function that returns (as a hex string) the SHA-1
284# hash of all the data in the image (ie, all the blocks in the
Tao Bao68658c02015-06-01 13:40:49 -0700285# care_map minus clobbered_blocks, or including the clobbered
286# blocks if include_clobbered_blocks is True).
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700287#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700288# When creating a BlockImageDiff, the src image may be None, in which
289# case the list of transfers produced will never read from the
290# original image.
291
292class BlockImageDiff(object):
Tao Bao293fd132016-06-11 12:19:23 -0700293 def __init__(self, tgt, src=None, threads=None, version=4,
294 disable_imgdiff=False):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700295 if threads is None:
296 threads = multiprocessing.cpu_count() // 2
Dan Albert8b72aef2015-03-23 19:13:21 -0700297 if threads == 0:
298 threads = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700299 self.threads = threads
Doug Zongker62338182014-09-08 08:29:55 -0700300 self.version = version
Dan Albert8b72aef2015-03-23 19:13:21 -0700301 self.transfers = []
302 self.src_basenames = {}
303 self.src_numpatterns = {}
Tao Baob4cfca52016-02-04 14:26:02 -0800304 self._max_stashed_size = 0
Tao Baod522bdc2016-04-12 15:53:16 -0700305 self.touched_src_ranges = RangeSet()
306 self.touched_src_sha1 = None
Tao Bao293fd132016-06-11 12:19:23 -0700307 self.disable_imgdiff = disable_imgdiff
Doug Zongker62338182014-09-08 08:29:55 -0700308
Tao Bao8fad03e2017-03-01 14:36:26 -0800309 assert version in (3, 4)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700310
311 self.tgt = tgt
312 if src is None:
313 src = EmptyImage()
314 self.src = src
315
316 # The updater code that installs the patch always uses 4k blocks.
317 assert tgt.blocksize == 4096
318 assert src.blocksize == 4096
319
320 # The range sets in each filemap should comprise a partition of
321 # the care map.
322 self.AssertPartition(src.care_map, src.file_map.values())
323 self.AssertPartition(tgt.care_map, tgt.file_map.values())
324
Tao Baob4cfca52016-02-04 14:26:02 -0800325 @property
326 def max_stashed_size(self):
327 return self._max_stashed_size
328
Doug Zongkerfc44a512014-08-26 13:10:25 -0700329 def Compute(self, prefix):
330 # When looking for a source file to use as the diff input for a
331 # target file, we try:
332 # 1) an exact path match if available, otherwise
333 # 2) a exact basename match if available, otherwise
334 # 3) a basename match after all runs of digits are replaced by
335 # "#" if available, otherwise
336 # 4) we have no source for this target.
337 self.AbbreviateSourceNames()
338 self.FindTransfers()
339
340 # Find the ordering dependencies among transfers (this is O(n^2)
341 # in the number of transfers).
342 self.GenerateDigraph()
343 # Find a sequence of transfers that satisfies as many ordering
344 # dependencies as possible (heuristically).
345 self.FindVertexSequence()
346 # Fix up the ordering dependencies that the sequence didn't
347 # satisfy.
Tao Bao8fad03e2017-03-01 14:36:26 -0800348 self.ReverseBackwardEdges()
349 self.ImproveVertexSequence()
Doug Zongker62338182014-09-08 08:29:55 -0700350
Tao Bao82c47982015-08-17 09:45:13 -0700351 # Ensure the runtime stash size is under the limit.
Tao Bao8fad03e2017-03-01 14:36:26 -0800352 if common.OPTIONS.cache_size is not None:
Tao Bao82c47982015-08-17 09:45:13 -0700353 self.ReviseStashSize()
354
Doug Zongkerfc44a512014-08-26 13:10:25 -0700355 # Double-check our work.
356 self.AssertSequenceGood()
357
358 self.ComputePatches(prefix)
359 self.WriteTransfers(prefix)
360
361 def WriteTransfers(self, prefix):
Tianjie Xu37e29302016-06-23 16:10:35 -0700362 def WriteSplitTransfers(out, style, target_blocks):
363 """Limit the size of operand in command 'new' and 'zero' to 1024 blocks.
Tianjie Xubb848c52016-06-21 15:54:09 -0700364
365 This prevents the target size of one command from being too large; and
366 might help to avoid fsync errors on some devices."""
367
Tao Bao3a2e3502016-12-28 09:14:39 -0800368 assert style == "new" or style == "zero"
Tianjie Xu37e29302016-06-23 16:10:35 -0700369 blocks_limit = 1024
Tianjie Xubb848c52016-06-21 15:54:09 -0700370 total = 0
Tianjie Xu37e29302016-06-23 16:10:35 -0700371 while target_blocks:
372 blocks_to_write = target_blocks.first(blocks_limit)
373 out.append("%s %s\n" % (style, blocks_to_write.to_string_raw()))
374 total += blocks_to_write.size()
375 target_blocks = target_blocks.subtract(blocks_to_write)
Tianjie Xubb848c52016-06-21 15:54:09 -0700376 return total
377
Doug Zongkerfc44a512014-08-26 13:10:25 -0700378 out = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700379 total = 0
Doug Zongkerfc44a512014-08-26 13:10:25 -0700380
Tao Bao3a2e3502016-12-28 09:14:39 -0800381 # In BBOTA v3+, it uses the hash of the stashed blocks as the stash slot
382 # id. 'stashes' records the map from 'hash' to the ref count. The stash
383 # will be freed only if the count decrements to zero.
Doug Zongker62338182014-09-08 08:29:55 -0700384 stashes = {}
385 stashed_blocks = 0
386 max_stashed_blocks = 0
387
Doug Zongkerfc44a512014-08-26 13:10:25 -0700388 for xf in self.transfers:
389
Tao Bao8fad03e2017-03-01 14:36:26 -0800390 for _, sr in xf.stash_before:
391 sh = self.src.RangeSha1(sr)
392 if sh in stashes:
393 stashes[sh] += 1
Sami Tolvanendd67a292014-12-09 16:40:34 +0000394 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800395 stashes[sh] = 1
396 stashed_blocks += sr.size()
397 self.touched_src_ranges = self.touched_src_ranges.union(sr)
398 out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
Doug Zongker62338182014-09-08 08:29:55 -0700399
400 if stashed_blocks > max_stashed_blocks:
401 max_stashed_blocks = stashed_blocks
402
Jesse Zhao7b985f62015-03-02 16:53:08 -0800403 free_string = []
caozhiyuan21b37d82015-10-21 15:14:03 +0800404 free_size = 0
Jesse Zhao7b985f62015-03-02 16:53:08 -0800405
Tao Bao8fad03e2017-03-01 14:36:26 -0800406 # <# blocks> <src ranges>
407 # OR
408 # <# blocks> <src ranges> <src locs> <stash refs...>
409 # OR
410 # <# blocks> - <stash refs...>
Doug Zongker62338182014-09-08 08:29:55 -0700411
Tao Bao8fad03e2017-03-01 14:36:26 -0800412 size = xf.src_ranges.size()
413 src_str = [str(size)]
Doug Zongker62338182014-09-08 08:29:55 -0700414
Tao Bao8fad03e2017-03-01 14:36:26 -0800415 unstashed_src_ranges = xf.src_ranges
416 mapped_stashes = []
417 for _, sr in xf.use_stash:
418 unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
419 sh = self.src.RangeSha1(sr)
420 sr = xf.src_ranges.map_within(sr)
421 mapped_stashes.append(sr)
422 assert sh in stashes
423 src_str.append("%s:%s" % (sh, sr.to_string_raw()))
424 stashes[sh] -= 1
425 if stashes[sh] == 0:
426 free_string.append("free %s\n" % (sh,))
427 free_size += sr.size()
428 stashes.pop(sh)
Doug Zongker62338182014-09-08 08:29:55 -0700429
Tao Bao8fad03e2017-03-01 14:36:26 -0800430 if unstashed_src_ranges:
431 src_str.insert(1, unstashed_src_ranges.to_string_raw())
432 if xf.use_stash:
433 mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
434 src_str.insert(2, mapped_unstashed.to_string_raw())
435 mapped_stashes.append(mapped_unstashed)
Doug Zongker62338182014-09-08 08:29:55 -0700436 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Tao Bao8fad03e2017-03-01 14:36:26 -0800437 else:
438 src_str.insert(1, "-")
439 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Doug Zongker62338182014-09-08 08:29:55 -0700440
Tao Bao8fad03e2017-03-01 14:36:26 -0800441 src_str = " ".join(src_str)
Doug Zongker62338182014-09-08 08:29:55 -0700442
Tao Bao8fad03e2017-03-01 14:36:26 -0800443 # version 3+:
Doug Zongker62338182014-09-08 08:29:55 -0700444 # zero <rangeset>
445 # new <rangeset>
446 # erase <rangeset>
Dan Albert8b72aef2015-03-23 19:13:21 -0700447 # bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
448 # imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
449 # move hash <tgt rangeset> <src_str>
Doug Zongkerfc44a512014-08-26 13:10:25 -0700450
451 tgt_size = xf.tgt_ranges.size()
452
453 if xf.style == "new":
454 assert xf.tgt_ranges
Tianjie Xu37e29302016-06-23 16:10:35 -0700455 assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700456 total += tgt_size
457 elif xf.style == "move":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700458 assert xf.tgt_ranges
459 assert xf.src_ranges.size() == tgt_size
460 if xf.src_ranges != xf.tgt_ranges:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100461 # take into account automatic stashing of overlapping blocks
462 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700463 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100464 if temp_stash_usage > max_stashed_blocks:
465 max_stashed_blocks = temp_stash_usage
466
Tao Baod522bdc2016-04-12 15:53:16 -0700467 self.touched_src_ranges = self.touched_src_ranges.union(
468 xf.src_ranges)
469
Tao Bao8fad03e2017-03-01 14:36:26 -0800470 out.append("%s %s %s %s\n" % (
Sami Tolvanendd67a292014-12-09 16:40:34 +0000471 xf.style,
Tao Bao183e56e2017-03-05 17:05:09 -0800472 xf.tgt_sha1,
Dan Albert8b72aef2015-03-23 19:13:21 -0700473 xf.tgt_ranges.to_string_raw(), src_str))
Tao Bao8fad03e2017-03-01 14:36:26 -0800474 total += tgt_size
475 elif xf.style in ("bsdiff", "imgdiff"):
476 assert xf.tgt_ranges
477 assert xf.src_ranges
478 # take into account automatic stashing of overlapping blocks
479 if xf.src_ranges.overlaps(xf.tgt_ranges):
480 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
481 if temp_stash_usage > max_stashed_blocks:
482 max_stashed_blocks = temp_stash_usage
483
484 self.touched_src_ranges = self.touched_src_ranges.union(xf.src_ranges)
485
486 out.append("%s %d %d %s %s %s %s\n" % (
487 xf.style,
488 xf.patch_start, xf.patch_len,
489 xf.src_sha1,
490 xf.tgt_sha1,
491 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700492 total += tgt_size
493 elif xf.style == "zero":
494 assert xf.tgt_ranges
495 to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
Tianjie Xu37e29302016-06-23 16:10:35 -0700496 assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size()
Tianjie Xubb848c52016-06-21 15:54:09 -0700497 total += to_zero.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700498 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700499 raise ValueError("unknown transfer style '%s'\n" % xf.style)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700500
Sami Tolvanendd67a292014-12-09 16:40:34 +0000501 if free_string:
502 out.append("".join(free_string))
caozhiyuan21b37d82015-10-21 15:14:03 +0800503 stashed_blocks -= free_size
Sami Tolvanendd67a292014-12-09 16:40:34 +0000504
Tao Bao8fad03e2017-03-01 14:36:26 -0800505 if common.OPTIONS.cache_size is not None:
Tao Bao8dcf7382015-05-21 14:09:49 -0700506 # Sanity check: abort if we're going to need more stash space than
507 # the allowed size (cache_size * threshold). There are two purposes
508 # of having a threshold here. a) Part of the cache may have been
509 # occupied by some recovery logs. b) It will buy us some time to deal
510 # with the oversize issue.
511 cache_size = common.OPTIONS.cache_size
512 stash_threshold = common.OPTIONS.stash_threshold
513 max_allowed = cache_size * stash_threshold
Tao Baoe8c68a02017-02-26 10:48:11 -0800514 assert max_stashed_blocks * self.tgt.blocksize <= max_allowed, \
Tao Bao8dcf7382015-05-21 14:09:49 -0700515 'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
516 max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
517 self.tgt.blocksize, max_allowed, cache_size,
518 stash_threshold)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700519
Tao Bao8fad03e2017-03-01 14:36:26 -0800520 self.touched_src_sha1 = self.src.RangeSha1(self.touched_src_ranges)
Tao Baod522bdc2016-04-12 15:53:16 -0700521
Tao Baoe9b61912015-07-09 17:37:49 -0700522 # Zero out extended blocks as a workaround for bug 20881595.
523 if self.tgt.extended:
Tianjie Xu37e29302016-06-23 16:10:35 -0700524 assert (WriteSplitTransfers(out, "zero", self.tgt.extended) ==
Tianjie Xubb848c52016-06-21 15:54:09 -0700525 self.tgt.extended.size())
Tao Baob32d56e2015-09-09 11:55:01 -0700526 total += self.tgt.extended.size()
Tao Baoe9b61912015-07-09 17:37:49 -0700527
528 # We erase all the blocks on the partition that a) don't contain useful
Tao Bao66f1fa62016-05-03 10:02:01 -0700529 # data in the new image; b) will not be touched by dm-verity. Out of those
530 # blocks, we erase the ones that won't be used in this update at the
531 # beginning of an update. The rest would be erased at the end. This is to
532 # work around the eMMC issue observed on some devices, which may otherwise
533 # get starving for clean blocks and thus fail the update. (b/28347095)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700534 all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
Tao Baoe9b61912015-07-09 17:37:49 -0700535 all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
536 new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
Tao Bao66f1fa62016-05-03 10:02:01 -0700537
538 erase_first = new_dontcare.subtract(self.touched_src_ranges)
539 if erase_first:
540 out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))
541
542 erase_last = new_dontcare.subtract(erase_first)
543 if erase_last:
544 out.append("erase %s\n" % (erase_last.to_string_raw(),))
Doug Zongkere985f6f2014-09-09 12:38:47 -0700545
546 out.insert(0, "%d\n" % (self.version,)) # format version number
Tao Baob32d56e2015-09-09 11:55:01 -0700547 out.insert(1, "%d\n" % (total,))
Tao Bao8fad03e2017-03-01 14:36:26 -0800548 # v3+: the number of stash slots is unused.
549 out.insert(2, "0\n")
550 out.insert(3, str(max_stashed_blocks) + "\n")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700551
552 with open(prefix + ".transfer.list", "wb") as f:
553 for i in out:
554 f.write(i)
555
Tao Bao8fad03e2017-03-01 14:36:26 -0800556 self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize
557 OPTIONS = common.OPTIONS
558 if OPTIONS.cache_size is not None:
559 max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
560 print("max stashed blocks: %d (%d bytes), "
561 "limit: %d bytes (%.2f%%)\n" % (
562 max_stashed_blocks, self._max_stashed_size, max_allowed,
563 self._max_stashed_size * 100.0 / max_allowed))
564 else:
565 print("max stashed blocks: %d (%d bytes), limit: <unknown>\n" % (
566 max_stashed_blocks, self._max_stashed_size))
Doug Zongker62338182014-09-08 08:29:55 -0700567
Tao Bao82c47982015-08-17 09:45:13 -0700568 def ReviseStashSize(self):
569 print("Revising stash size...")
Tao Baoe27acfd2016-12-16 11:13:55 -0800570 stash_map = {}
Tao Bao82c47982015-08-17 09:45:13 -0700571
572 # Create the map between a stash and its def/use points. For example, for a
Tao Bao3c5a16d2017-02-13 11:42:50 -0800573 # given stash of (raw_id, sr), stash_map[raw_id] = (sr, def_cmd, use_cmd).
Tao Bao82c47982015-08-17 09:45:13 -0700574 for xf in self.transfers:
575 # Command xf defines (stores) all the stashes in stash_before.
Tao Bao3a2e3502016-12-28 09:14:39 -0800576 for stash_raw_id, sr in xf.stash_before:
577 stash_map[stash_raw_id] = (sr, xf)
Tao Bao82c47982015-08-17 09:45:13 -0700578
579 # Record all the stashes command xf uses.
Tao Bao3a2e3502016-12-28 09:14:39 -0800580 for stash_raw_id, _ in xf.use_stash:
581 stash_map[stash_raw_id] += (xf,)
Tao Bao82c47982015-08-17 09:45:13 -0700582
583 # Compute the maximum blocks available for stash based on /cache size and
584 # the threshold.
585 cache_size = common.OPTIONS.cache_size
586 stash_threshold = common.OPTIONS.stash_threshold
587 max_allowed = cache_size * stash_threshold / self.tgt.blocksize
588
Tao Bao3a2e3502016-12-28 09:14:39 -0800589 # See the comments for 'stashes' in WriteTransfers().
Tao Baoe27acfd2016-12-16 11:13:55 -0800590 stashes = {}
Tao Bao82c47982015-08-17 09:45:13 -0700591 stashed_blocks = 0
Tao Bao9a5caf22015-08-25 15:10:10 -0700592 new_blocks = 0
Tao Bao82c47982015-08-17 09:45:13 -0700593
594 # Now go through all the commands. Compute the required stash size on the
595 # fly. If a command requires excess stash than available, it deletes the
596 # stash by replacing the command that uses the stash with a "new" command
597 # instead.
598 for xf in self.transfers:
599 replaced_cmds = []
600
601 # xf.stash_before generates explicit stash commands.
Tao Bao3a2e3502016-12-28 09:14:39 -0800602 for stash_raw_id, sr in xf.stash_before:
Tao Baoe27acfd2016-12-16 11:13:55 -0800603 # Check the post-command stashed_blocks.
604 stashed_blocks_after = stashed_blocks
Tao Bao8fad03e2017-03-01 14:36:26 -0800605 sh = self.src.RangeSha1(sr)
606 if sh not in stashes:
Tao Baoe27acfd2016-12-16 11:13:55 -0800607 stashed_blocks_after += sr.size()
Tao Baoe27acfd2016-12-16 11:13:55 -0800608
609 if stashed_blocks_after > max_allowed:
Tao Bao82c47982015-08-17 09:45:13 -0700610 # We cannot stash this one for a later command. Find out the command
611 # that will use this stash and replace the command with "new".
Tao Bao3a2e3502016-12-28 09:14:39 -0800612 use_cmd = stash_map[stash_raw_id][2]
Tao Bao82c47982015-08-17 09:45:13 -0700613 replaced_cmds.append(use_cmd)
Tao Bao9a5caf22015-08-25 15:10:10 -0700614 print("%10d %9s %s" % (sr.size(), "explicit", use_cmd))
Tao Bao82c47982015-08-17 09:45:13 -0700615 else:
Tao Bao3c5a16d2017-02-13 11:42:50 -0800616 # Update the stashes map.
Tao Bao8fad03e2017-03-01 14:36:26 -0800617 if sh in stashes:
618 stashes[sh] += 1
Tao Bao3c5a16d2017-02-13 11:42:50 -0800619 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800620 stashes[sh] = 1
Tao Baoe27acfd2016-12-16 11:13:55 -0800621 stashed_blocks = stashed_blocks_after
Tao Bao82c47982015-08-17 09:45:13 -0700622
623 # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
624 # ComputePatches(), they both have the style of "diff".
Tao Bao8fad03e2017-03-01 14:36:26 -0800625 if xf.style == "diff":
Tao Bao82c47982015-08-17 09:45:13 -0700626 assert xf.tgt_ranges and xf.src_ranges
627 if xf.src_ranges.overlaps(xf.tgt_ranges):
628 if stashed_blocks + xf.src_ranges.size() > max_allowed:
629 replaced_cmds.append(xf)
Tao Bao9a5caf22015-08-25 15:10:10 -0700630 print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf))
Tao Bao82c47982015-08-17 09:45:13 -0700631
632 # Replace the commands in replaced_cmds with "new"s.
633 for cmd in replaced_cmds:
634 # It no longer uses any commands in "use_stash". Remove the def points
635 # for all those stashes.
Tao Bao3a2e3502016-12-28 09:14:39 -0800636 for stash_raw_id, sr in cmd.use_stash:
637 def_cmd = stash_map[stash_raw_id][1]
638 assert (stash_raw_id, sr) in def_cmd.stash_before
639 def_cmd.stash_before.remove((stash_raw_id, sr))
Tao Bao82c47982015-08-17 09:45:13 -0700640
Tianjie Xuebe39a02016-01-14 14:12:26 -0800641 # Add up blocks that violates space limit and print total number to
642 # screen later.
643 new_blocks += cmd.tgt_ranges.size()
Tao Bao82c47982015-08-17 09:45:13 -0700644 cmd.ConvertToNew()
645
Tao Bao3a2e3502016-12-28 09:14:39 -0800646 # xf.use_stash may generate free commands.
Tao Bao8fad03e2017-03-01 14:36:26 -0800647 for _, sr in xf.use_stash:
648 sh = self.src.RangeSha1(sr)
649 assert sh in stashes
650 stashes[sh] -= 1
651 if stashes[sh] == 0:
Tao Baoe27acfd2016-12-16 11:13:55 -0800652 stashed_blocks -= sr.size()
Tao Bao8fad03e2017-03-01 14:36:26 -0800653 stashes.pop(sh)
Tao Baoe27acfd2016-12-16 11:13:55 -0800654
Tianjie Xuebe39a02016-01-14 14:12:26 -0800655 num_of_bytes = new_blocks * self.tgt.blocksize
656 print(" Total %d blocks (%d bytes) are packed as new blocks due to "
657 "insufficient cache size." % (new_blocks, num_of_bytes))
Tao Bao304ee272016-12-19 11:01:38 -0800658 return new_blocks
Tao Bao9a5caf22015-08-25 15:10:10 -0700659
Doug Zongkerfc44a512014-08-26 13:10:25 -0700660 def ComputePatches(self, prefix):
661 print("Reticulating splines...")
Tao Bao183e56e2017-03-05 17:05:09 -0800662 diff_queue = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700663 patch_num = 0
664 with open(prefix + ".new.dat", "wb") as new_f:
Tao Bao183e56e2017-03-05 17:05:09 -0800665 for index, xf in enumerate(self.transfers):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700666 if xf.style == "zero":
Tao Bao08c85832016-09-19 22:26:30 -0700667 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
668 print("%10d %10d (%6.2f%%) %7s %s %s" % (
669 tgt_size, tgt_size, 100.0, xf.style, xf.tgt_name,
670 str(xf.tgt_ranges)))
671
Doug Zongkerfc44a512014-08-26 13:10:25 -0700672 elif xf.style == "new":
Tao Bao183e56e2017-03-05 17:05:09 -0800673 self.tgt.WriteRangeDataToFd(xf.tgt_ranges, new_f)
Tao Bao08c85832016-09-19 22:26:30 -0700674 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
675 print("%10d %10d (%6.2f%%) %7s %s %s" % (
676 tgt_size, tgt_size, 100.0, xf.style,
677 xf.tgt_name, str(xf.tgt_ranges)))
678
Doug Zongkerfc44a512014-08-26 13:10:25 -0700679 elif xf.style == "diff":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700680 # We can't compare src and tgt directly because they may have
681 # the same content but be broken up into blocks differently, eg:
682 #
683 # ["he", "llo"] vs ["h", "ello"]
684 #
685 # We want those to compare equal, ideally without having to
686 # actually concatenate the strings (these may be tens of
687 # megabytes).
Tao Bao183e56e2017-03-05 17:05:09 -0800688 if xf.src_sha1 == xf.tgt_sha1:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700689 # These are identical; we don't need to generate a patch,
690 # just issue copy commands on the device.
691 xf.style = "move"
Tianjie Xu25366072017-09-08 17:19:02 -0700692 xf.patch = None
Tao Bao183e56e2017-03-05 17:05:09 -0800693 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao08c85832016-09-19 22:26:30 -0700694 if xf.src_ranges != xf.tgt_ranges:
695 print("%10d %10d (%6.2f%%) %7s %s %s (from %s)" % (
696 tgt_size, tgt_size, 100.0, xf.style,
697 xf.tgt_name if xf.tgt_name == xf.src_name else (
698 xf.tgt_name + " (from " + xf.src_name + ")"),
699 str(xf.tgt_ranges), str(xf.src_ranges)))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700700 else:
Tianjie Xu25366072017-09-08 17:19:02 -0700701 if xf.patch:
702 # We have already generated the patch with imgdiff. Check if the
703 # transfer is intact.
704 assert not self.disable_imgdiff
705 imgdiff = True
706 if not xf.intact:
707 imgdiff = False
708 xf.patch = None
709 else:
710 # For files in zip format (eg, APKs, JARs, etc.) we would
711 # like to use imgdiff -z if possible (because it usually
712 # produces significantly smaller patches than bsdiff).
713 # This is permissible if:
714 #
715 # - imgdiff is not disabled, and
716 # - the source and target files are monotonic (ie, the
717 # data is stored with blocks in increasing order), and
718 # - we haven't removed any blocks from the source set.
719 #
720 # If these conditions are satisfied then appending all the
721 # blocks in the set together in order will produce a valid
722 # zip file (plus possibly extra zeros in the last block),
723 # which is what imgdiff needs to operate. (imgdiff is
724 # fine with extra zeros at the end of the file.)
725 imgdiff = (not self.disable_imgdiff and xf.intact and
726 xf.tgt_name.split(".")[-1].lower()
727 in ("apk", "jar", "zip"))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700728 xf.style = "imgdiff" if imgdiff else "bsdiff"
Tao Bao183e56e2017-03-05 17:05:09 -0800729 diff_queue.append((index, imgdiff, patch_num))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700730 patch_num += 1
731
732 else:
733 assert False, "unknown style " + xf.style
734
Tao Bao183e56e2017-03-05 17:05:09 -0800735 if diff_queue:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700736 if self.threads > 1:
737 print("Computing patches (using %d threads)..." % (self.threads,))
738 else:
739 print("Computing patches...")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700740
Tao Bao183e56e2017-03-05 17:05:09 -0800741 diff_total = len(diff_queue)
742 patches = [None] * diff_total
Tianjie Xub59c17f2016-10-28 17:55:53 -0700743 error_messages = []
Tao Baob937ead2017-10-19 16:51:53 -0700744 warning_messages = []
Tao Bao33635b12017-03-12 13:02:51 -0700745 if sys.stdout.isatty():
746 global diff_done
747 diff_done = 0
Doug Zongkerfc44a512014-08-26 13:10:25 -0700748
Tao Bao183e56e2017-03-05 17:05:09 -0800749 # Using multiprocessing doesn't give additional benefits, due to the
750 # pattern of the code. The diffing work is done by subprocess.call, which
751 # already runs in a separate process (not affected much by the GIL -
752 # Global Interpreter Lock). Using multiprocess also requires either a)
753 # writing the diff input files in the main process before forking, or b)
754 # reopening the image file (SparseImage) in the worker processes. Doing
755 # neither of them further improves the performance.
Doug Zongkerfc44a512014-08-26 13:10:25 -0700756 lock = threading.Lock()
757 def diff_worker():
758 while True:
759 with lock:
Tao Bao183e56e2017-03-05 17:05:09 -0800760 if not diff_queue:
Dan Albert8b72aef2015-03-23 19:13:21 -0700761 return
Tao Bao183e56e2017-03-05 17:05:09 -0800762 xf_index, imgdiff, patch_index = diff_queue.pop()
763
764 xf = self.transfers[xf_index]
Tianjie Xu25366072017-09-08 17:19:02 -0700765 patch = xf.patch
766 if not patch:
767 src_ranges = xf.src_ranges
768 tgt_ranges = xf.tgt_ranges
Tao Bao183e56e2017-03-05 17:05:09 -0800769
Tianjie Xu25366072017-09-08 17:19:02 -0700770 # Needs lock since WriteRangeDataToFd() is stateful (calling seek).
Tianjie Xub59c17f2016-10-28 17:55:53 -0700771 with lock:
Tianjie Xu25366072017-09-08 17:19:02 -0700772 src_file = common.MakeTempFile(prefix="src-")
773 with open(src_file, "wb") as fd:
774 self.src.WriteRangeDataToFd(src_ranges, fd)
775
776 tgt_file = common.MakeTempFile(prefix="tgt-")
777 with open(tgt_file, "wb") as fd:
778 self.tgt.WriteRangeDataToFd(tgt_ranges, fd)
779
780 message = []
781 try:
782 patch = compute_patch(src_file, tgt_file, imgdiff)
783 except ValueError as e:
784 message.append(
785 "Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (
786 "imgdiff" if imgdiff else "bsdiff",
787 xf.tgt_name if xf.tgt_name == xf.src_name else
788 xf.tgt_name + " (from " + xf.src_name + ")",
789 xf.tgt_ranges, xf.src_ranges, e.message))
790 # TODO(b/68016761): Better handle the holes in mke2fs created
791 # images.
792 if imgdiff:
793 try:
794 patch = compute_patch(src_file, tgt_file, imgdiff=False)
795 message.append(
796 "Fell back and generated with bsdiff instead for %s" % (
797 xf.tgt_name,))
798 xf.style = "bsdiff"
799 with lock:
800 warning_messages.extend(message)
801 del message[:]
802 except ValueError as e:
803 message.append(
804 "Also failed to generate with bsdiff for %s:\n%s" % (
805 xf.tgt_name, e.message))
806
807 if message:
808 with lock:
809 error_messages.extend(message)
Tao Bao183e56e2017-03-05 17:05:09 -0800810
811 with lock:
812 patches[patch_index] = (xf_index, patch)
813 if sys.stdout.isatty():
Tao Bao33635b12017-03-12 13:02:51 -0700814 global diff_done
815 diff_done += 1
816 progress = diff_done * 100 / diff_total
Tao Bao183e56e2017-03-05 17:05:09 -0800817 # '\033[K' is to clear to EOL.
818 print(' [%d%%] %s\033[K' % (progress, xf.tgt_name), end='\r')
819 sys.stdout.flush()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700820
821 threads = [threading.Thread(target=diff_worker)
Dan Albert8b72aef2015-03-23 19:13:21 -0700822 for _ in range(self.threads)]
Doug Zongkerfc44a512014-08-26 13:10:25 -0700823 for th in threads:
824 th.start()
825 while threads:
826 threads.pop().join()
Tao Bao183e56e2017-03-05 17:05:09 -0800827
828 if sys.stdout.isatty():
829 print('\n')
Tianjie Xub59c17f2016-10-28 17:55:53 -0700830
Tao Baob937ead2017-10-19 16:51:53 -0700831 if warning_messages:
832 print('WARNING:')
833 print('\n'.join(warning_messages))
834 print('\n\n\n')
835
Tianjie Xub59c17f2016-10-28 17:55:53 -0700836 if error_messages:
Tao Baob937ead2017-10-19 16:51:53 -0700837 print('ERROR:')
Tianjie Xub59c17f2016-10-28 17:55:53 -0700838 print('\n'.join(error_messages))
Tao Baob937ead2017-10-19 16:51:53 -0700839 print('\n\n\n')
Tianjie Xub59c17f2016-10-28 17:55:53 -0700840 sys.exit(1)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700841 else:
842 patches = []
843
Tao Bao183e56e2017-03-05 17:05:09 -0800844 offset = 0
845 with open(prefix + ".patch.dat", "wb") as patch_fd:
846 for index, patch in patches:
847 xf = self.transfers[index]
Doug Zongkerfc44a512014-08-26 13:10:25 -0700848 xf.patch_len = len(patch)
Tao Bao183e56e2017-03-05 17:05:09 -0800849 xf.patch_start = offset
850 offset += xf.patch_len
851 patch_fd.write(patch)
852
853 if common.OPTIONS.verbose:
854 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
855 print("%10d %10d (%6.2f%%) %7s %s %s %s" % (
856 xf.patch_len, tgt_size, xf.patch_len * 100.0 / tgt_size,
857 xf.style,
858 xf.tgt_name if xf.tgt_name == xf.src_name else (
859 xf.tgt_name + " (from " + xf.src_name + ")"),
860 xf.tgt_ranges, xf.src_ranges))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700861
862 def AssertSequenceGood(self):
863 # Simulate the sequences of transfers we will output, and check that:
864 # - we never read a block after writing it, and
865 # - we write every block we care about exactly once.
866
867 # Start with no blocks having been touched yet.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800868 touched = array.array("B", "\0" * self.tgt.total_blocks)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700869
870 # Imagine processing the transfers in order.
871 for xf in self.transfers:
872 # Check that the input blocks for this transfer haven't yet been touched.
Doug Zongker62338182014-09-08 08:29:55 -0700873
874 x = xf.src_ranges
Tao Bao8fad03e2017-03-01 14:36:26 -0800875 for _, sr in xf.use_stash:
876 x = x.subtract(sr)
Doug Zongker62338182014-09-08 08:29:55 -0700877
Doug Zongker6ab2a502016-02-09 08:28:09 -0800878 for s, e in x:
Tao Baoff75c232016-03-04 15:23:34 -0800879 # Source image could be larger. Don't check the blocks that are in the
880 # source image only. Since they are not in 'touched', and won't ever
881 # be touched.
882 for i in range(s, min(e, self.tgt.total_blocks)):
Doug Zongker6ab2a502016-02-09 08:28:09 -0800883 assert touched[i] == 0
884
885 # Check that the output blocks for this transfer haven't yet
886 # been touched, and touch all the blocks written by this
887 # transfer.
888 for s, e in xf.tgt_ranges:
889 for i in range(s, e):
890 assert touched[i] == 0
891 touched[i] = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700892
893 # Check that we've written every target block.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800894 for s, e in self.tgt.care_map:
895 for i in range(s, e):
896 assert touched[i] == 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700897
Doug Zongker62338182014-09-08 08:29:55 -0700898 def ImproveVertexSequence(self):
899 print("Improving vertex order...")
900
901 # At this point our digraph is acyclic; we reversed any edges that
902 # were backwards in the heuristically-generated sequence. The
903 # previously-generated order is still acceptable, but we hope to
904 # find a better order that needs less memory for stashed data.
905 # Now we do a topological sort to generate a new vertex order,
906 # using a greedy algorithm to choose which vertex goes next
907 # whenever we have a choice.
908
909 # Make a copy of the edge set; this copy will get destroyed by the
910 # algorithm.
911 for xf in self.transfers:
912 xf.incoming = xf.goes_after.copy()
913 xf.outgoing = xf.goes_before.copy()
914
915 L = [] # the new vertex order
916
917 # S is the set of sources in the remaining graph; we always choose
918 # the one that leaves the least amount of stashed data after it's
919 # executed.
920 S = [(u.NetStashChange(), u.order, u) for u in self.transfers
921 if not u.incoming]
922 heapq.heapify(S)
923
924 while S:
925 _, _, xf = heapq.heappop(S)
926 L.append(xf)
927 for u in xf.outgoing:
928 del u.incoming[xf]
929 if not u.incoming:
930 heapq.heappush(S, (u.NetStashChange(), u.order, u))
931
932 # if this fails then our graph had a cycle.
933 assert len(L) == len(self.transfers)
934
935 self.transfers = L
936 for i, xf in enumerate(L):
937 xf.order = i
938
Doug Zongkerfc44a512014-08-26 13:10:25 -0700939 def RemoveBackwardEdges(self):
940 print("Removing backward edges...")
941 in_order = 0
942 out_of_order = 0
943 lost_source = 0
944
945 for xf in self.transfers:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700946 lost = 0
947 size = xf.src_ranges.size()
948 for u in xf.goes_before:
949 # xf should go before u
950 if xf.order < u.order:
951 # it does, hurray!
Doug Zongker62338182014-09-08 08:29:55 -0700952 in_order += 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700953 else:
954 # it doesn't, boo. trim the blocks that u writes from xf's
955 # source, so that xf can go after u.
Doug Zongker62338182014-09-08 08:29:55 -0700956 out_of_order += 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700957 assert xf.src_ranges.overlaps(u.tgt_ranges)
958 xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges)
959 xf.intact = False
960
961 if xf.style == "diff" and not xf.src_ranges:
962 # nothing left to diff from; treat as new data
963 xf.style = "new"
964
965 lost = size - xf.src_ranges.size()
966 lost_source += lost
Doug Zongkerfc44a512014-08-26 13:10:25 -0700967
968 print((" %d/%d dependencies (%.2f%%) were violated; "
969 "%d source blocks removed.") %
970 (out_of_order, in_order + out_of_order,
971 (out_of_order * 100.0 / (in_order + out_of_order))
972 if (in_order + out_of_order) else 0.0,
973 lost_source))
974
Doug Zongker62338182014-09-08 08:29:55 -0700975 def ReverseBackwardEdges(self):
Tao Bao3a2e3502016-12-28 09:14:39 -0800976 """Reverse unsatisfying edges and compute pairs of stashed blocks.
977
978 For each transfer, make sure it properly stashes the blocks it touches and
979 will be used by later transfers. It uses pairs of (stash_raw_id, range) to
980 record the blocks to be stashed. 'stash_raw_id' is an id that uniquely
981 identifies each pair. Note that for the same range (e.g. RangeSet("1-5")),
982 it is possible to have multiple pairs with different 'stash_raw_id's. Each
983 'stash_raw_id' will be consumed by one transfer. In BBOTA v3+, identical
984 blocks will be written to the same stash slot in WriteTransfers().
985 """
986
Doug Zongker62338182014-09-08 08:29:55 -0700987 print("Reversing backward edges...")
988 in_order = 0
989 out_of_order = 0
Tao Bao3a2e3502016-12-28 09:14:39 -0800990 stash_raw_id = 0
Doug Zongker62338182014-09-08 08:29:55 -0700991 stash_size = 0
992
993 for xf in self.transfers:
Doug Zongker62338182014-09-08 08:29:55 -0700994 for u in xf.goes_before.copy():
995 # xf should go before u
996 if xf.order < u.order:
997 # it does, hurray!
998 in_order += 1
999 else:
1000 # it doesn't, boo. modify u to stash the blocks that it
1001 # writes that xf wants to read, and then require u to go
1002 # before xf.
1003 out_of_order += 1
1004
1005 overlap = xf.src_ranges.intersect(u.tgt_ranges)
1006 assert overlap
1007
Tao Bao3a2e3502016-12-28 09:14:39 -08001008 u.stash_before.append((stash_raw_id, overlap))
1009 xf.use_stash.append((stash_raw_id, overlap))
1010 stash_raw_id += 1
Doug Zongker62338182014-09-08 08:29:55 -07001011 stash_size += overlap.size()
1012
1013 # reverse the edge direction; now xf must go after u
1014 del xf.goes_before[u]
1015 del u.goes_after[xf]
1016 xf.goes_after[u] = None # value doesn't matter
1017 u.goes_before[xf] = None
1018
1019 print((" %d/%d dependencies (%.2f%%) were violated; "
1020 "%d source blocks stashed.") %
1021 (out_of_order, in_order + out_of_order,
1022 (out_of_order * 100.0 / (in_order + out_of_order))
1023 if (in_order + out_of_order) else 0.0,
1024 stash_size))
1025
Doug Zongkerfc44a512014-08-26 13:10:25 -07001026 def FindVertexSequence(self):
1027 print("Finding vertex sequence...")
1028
1029 # This is based on "A Fast & Effective Heuristic for the Feedback
1030 # Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
1031 # it as starting with the digraph G and moving all the vertices to
1032 # be on a horizontal line in some order, trying to minimize the
1033 # number of edges that end up pointing to the left. Left-pointing
1034 # edges will get removed to turn the digraph into a DAG. In this
1035 # case each edge has a weight which is the number of source blocks
1036 # we'll lose if that edge is removed; we try to minimize the total
1037 # weight rather than just the number of edges.
1038
1039 # Make a copy of the edge set; this copy will get destroyed by the
1040 # algorithm.
1041 for xf in self.transfers:
1042 xf.incoming = xf.goes_after.copy()
1043 xf.outgoing = xf.goes_before.copy()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001044 xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())
Doug Zongkerfc44a512014-08-26 13:10:25 -07001045
1046 # We use an OrderedDict instead of just a set so that the output
1047 # is repeatable; otherwise it would depend on the hash values of
1048 # the transfer objects.
1049 G = OrderedDict()
1050 for xf in self.transfers:
1051 G[xf] = None
1052 s1 = deque() # the left side of the sequence, built from left to right
1053 s2 = deque() # the right side of the sequence, built from right to left
1054
Doug Zongker6ab2a502016-02-09 08:28:09 -08001055 heap = []
1056 for xf in self.transfers:
1057 xf.heap_item = HeapItem(xf)
1058 heap.append(xf.heap_item)
1059 heapq.heapify(heap)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001060
Tao Bao33482282016-10-24 16:49:08 -07001061 # Use OrderedDict() instead of set() to preserve the insertion order. Need
1062 # to use 'sinks[key] = None' to add key into the set. sinks will look like
1063 # { key1: None, key2: None, ... }.
1064 sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing)
1065 sources = OrderedDict.fromkeys(u for u in G if not u.incoming)
Doug Zongker6ab2a502016-02-09 08:28:09 -08001066
1067 def adjust_score(iu, delta):
1068 iu.score += delta
1069 iu.heap_item.clear()
1070 iu.heap_item = HeapItem(iu)
1071 heapq.heappush(heap, iu.heap_item)
1072
1073 while G:
Doug Zongkerfc44a512014-08-26 13:10:25 -07001074 # Put all sinks at the end of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001075 while sinks:
Tao Bao33482282016-10-24 16:49:08 -07001076 new_sinks = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001077 for u in sinks:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001078 if u not in G: continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001079 s2.appendleft(u)
1080 del G[u]
1081 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001082 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001083 if not iu.outgoing:
1084 new_sinks[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001085 sinks = new_sinks
Doug Zongkerfc44a512014-08-26 13:10:25 -07001086
1087 # Put all the sources at the beginning of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001088 while sources:
Tao Bao33482282016-10-24 16:49:08 -07001089 new_sources = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001090 for u in sources:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001091 if u not in G: continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001092 s1.append(u)
1093 del G[u]
1094 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001095 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001096 if not iu.incoming:
1097 new_sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001098 sources = new_sources
Doug Zongkerfc44a512014-08-26 13:10:25 -07001099
Doug Zongker6ab2a502016-02-09 08:28:09 -08001100 if not G: break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001101
1102 # Find the "best" vertex to put next. "Best" is the one that
1103 # maximizes the net difference in source blocks saved we get by
1104 # pretending it's a source rather than a sink.
1105
Doug Zongker6ab2a502016-02-09 08:28:09 -08001106 while True:
1107 u = heapq.heappop(heap)
1108 if u and u.item in G:
1109 u = u.item
1110 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001111
Doug Zongkerfc44a512014-08-26 13:10:25 -07001112 s1.append(u)
1113 del G[u]
1114 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001115 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001116 if not iu.incoming:
1117 sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001118
Doug Zongkerfc44a512014-08-26 13:10:25 -07001119 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001120 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001121 if not iu.outgoing:
1122 sinks[iu] = None
Doug Zongkerfc44a512014-08-26 13:10:25 -07001123
1124 # Now record the sequence in the 'order' field of each transfer,
1125 # and by rearranging self.transfers to be in the chosen sequence.
1126
1127 new_transfers = []
1128 for x in itertools.chain(s1, s2):
1129 x.order = len(new_transfers)
1130 new_transfers.append(x)
1131 del x.incoming
1132 del x.outgoing
1133
1134 self.transfers = new_transfers
1135
1136 def GenerateDigraph(self):
1137 print("Generating digraph...")
Doug Zongker6ab2a502016-02-09 08:28:09 -08001138
1139 # Each item of source_ranges will be:
1140 # - None, if that block is not used as a source,
Tao Bao33482282016-10-24 16:49:08 -07001141 # - an ordered set of transfers.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001142 source_ranges = []
1143 for b in self.transfers:
1144 for s, e in b.src_ranges:
1145 if e > len(source_ranges):
1146 source_ranges.extend([None] * (e-len(source_ranges)))
1147 for i in range(s, e):
1148 if source_ranges[i] is None:
Tao Bao33482282016-10-24 16:49:08 -07001149 source_ranges[i] = OrderedDict.fromkeys([b])
Doug Zongker6ab2a502016-02-09 08:28:09 -08001150 else:
Tao Bao33482282016-10-24 16:49:08 -07001151 source_ranges[i][b] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001152
Doug Zongkerfc44a512014-08-26 13:10:25 -07001153 for a in self.transfers:
Tao Bao33482282016-10-24 16:49:08 -07001154 intersections = OrderedDict()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001155 for s, e in a.tgt_ranges:
1156 for i in range(s, e):
1157 if i >= len(source_ranges): break
Tao Bao33482282016-10-24 16:49:08 -07001158 # Add all the Transfers in source_ranges[i] to the (ordered) set.
1159 if source_ranges[i] is not None:
1160 for j in source_ranges[i]:
1161 intersections[j] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001162
1163 for b in intersections:
1164 if a is b: continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001165
1166 # If the blocks written by A are read by B, then B needs to go before A.
1167 i = a.tgt_ranges.intersect(b.src_ranges)
1168 if i:
Doug Zongkerab7ca1d2014-08-26 10:40:28 -07001169 if b.src_name == "__ZERO":
1170 # the cost of removing source blocks for the __ZERO domain
1171 # is (nearly) zero.
1172 size = 0
1173 else:
1174 size = i.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001175 b.goes_before[a] = size
1176 a.goes_after[b] = size
1177
1178 def FindTransfers(self):
Tao Bao9a5caf22015-08-25 15:10:10 -07001179 """Parse the file_map to generate all the transfers."""
1180
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001181 def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges, style,
1182 by_id):
1183 """Add one or multiple Transfer()s by splitting large files."""
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001184 pieces = 0
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001185 while (tgt_ranges.size() > max_blocks_per_transfer and
1186 src_ranges.size() > max_blocks_per_transfer):
Tao Bao9a5caf22015-08-25 15:10:10 -07001187 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1188 src_split_name = "%s-%d" % (src_name, pieces)
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001189 tgt_first = tgt_ranges.first(max_blocks_per_transfer)
1190 src_first = src_ranges.first(max_blocks_per_transfer)
1191
Tao Bao183e56e2017-03-05 17:05:09 -08001192 Transfer(tgt_split_name, src_split_name, tgt_first, src_first,
1193 self.tgt.RangeSha1(tgt_first), self.src.RangeSha1(src_first),
1194 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001195
1196 tgt_ranges = tgt_ranges.subtract(tgt_first)
1197 src_ranges = src_ranges.subtract(src_first)
1198 pieces += 1
1199
1200 # Handle remaining blocks.
1201 if tgt_ranges.size() or src_ranges.size():
1202 # Must be both non-empty.
1203 assert tgt_ranges.size() and src_ranges.size()
1204 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1205 src_split_name = "%s-%d" % (src_name, pieces)
Tao Bao183e56e2017-03-05 17:05:09 -08001206 Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges,
1207 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1208 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001209
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001210 def FindZipsAndAddSplitTransfers(tgt_name, src_name, tgt_ranges,
1211 src_ranges, style, by_id):
1212 """Find all the zip archives and add split transfers for the other files.
1213
1214 For BBOTA v3, we need to stash source blocks for resumable feature.
1215 However, with the growth of file size and the shrink of the cache
1216 partition source blocks are too large to be stashed. If a file occupies
1217 too many blocks, we split it into smaller pieces by getting multiple
1218 Transfer()s.
1219
1220 The downside is that after splitting, we may increase the package size
1221 since the split pieces don't align well. According to our experiments,
1222 1/8 of the cache size as the per-piece limit appears to be optimal.
1223 Compared to the fixed 1024-block limit, it reduces the overall package
1224 size by 30% for volantis, and 20% for angler and bullhead."""
1225
1226 assert style == "diff"
1227
1228 # Change nothing for small files.
1229 if (tgt_ranges.size() <= max_blocks_per_transfer and
1230 src_ranges.size() <= max_blocks_per_transfer):
1231 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1232 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1233 style, by_id)
1234 return
1235
1236 if tgt_name.split(".")[-1].lower() in ("apk", "jar", "zip"):
1237 split_enable = (not self.disable_imgdiff and src_ranges.monotonic and
1238 tgt_ranges.monotonic)
1239 if split_enable and (self.tgt.RangeSha1(tgt_ranges) !=
1240 self.src.RangeSha1(src_ranges)):
1241 large_apks.append((tgt_name, src_name, tgt_ranges, src_ranges))
1242 return
1243
1244 AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges,
1245 style, by_id)
1246
Tao Bao08c85832016-09-19 22:26:30 -07001247 def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
1248 split=False):
1249 """Wrapper function for adding a Transfer()."""
1250
1251 # We specialize diff transfers only (which covers bsdiff/imgdiff/move);
1252 # otherwise add the Transfer() as is.
1253 if style != "diff" or not split:
Tao Bao183e56e2017-03-05 17:05:09 -08001254 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1255 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1256 style, by_id)
Tao Bao08c85832016-09-19 22:26:30 -07001257 return
1258
1259 # Handle .odex files specially to analyze the block-wise difference. If
1260 # most of the blocks are identical with only few changes (e.g. header),
1261 # we will patch the changed blocks only. This avoids stashing unchanged
1262 # blocks while patching. We limit the analysis to files without size
1263 # changes only. This is to avoid sacrificing the OTA generation cost too
1264 # much.
1265 if (tgt_name.split(".")[-1].lower() == 'odex' and
1266 tgt_ranges.size() == src_ranges.size()):
1267
1268 # 0.5 threshold can be further tuned. The tradeoff is: if only very
1269 # few blocks remain identical, we lose the opportunity to use imgdiff
1270 # that may have better compression ratio than bsdiff.
1271 crop_threshold = 0.5
1272
1273 tgt_skipped = RangeSet()
1274 src_skipped = RangeSet()
1275 tgt_size = tgt_ranges.size()
1276 tgt_changed = 0
1277 for src_block, tgt_block in zip(src_ranges.next_item(),
1278 tgt_ranges.next_item()):
1279 src_rs = RangeSet(str(src_block))
1280 tgt_rs = RangeSet(str(tgt_block))
1281 if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):
1282 tgt_skipped = tgt_skipped.union(tgt_rs)
1283 src_skipped = src_skipped.union(src_rs)
1284 else:
1285 tgt_changed += tgt_rs.size()
1286
1287 # Terminate early if no clear sign of benefits.
1288 if tgt_changed > tgt_size * crop_threshold:
1289 break
1290
1291 if tgt_changed < tgt_size * crop_threshold:
1292 assert tgt_changed + tgt_skipped.size() == tgt_size
1293 print('%10d %10d (%6.2f%%) %s' % (tgt_skipped.size(), tgt_size,
1294 tgt_skipped.size() * 100.0 / tgt_size, tgt_name))
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001295 FindZipsAndAddSplitTransfers(
Tao Bao08c85832016-09-19 22:26:30 -07001296 "%s-skipped" % (tgt_name,),
1297 "%s-skipped" % (src_name,),
1298 tgt_skipped, src_skipped, style, by_id)
1299
1300 # Intentionally change the file extension to avoid being imgdiff'd as
1301 # the files are no longer in their original format.
1302 tgt_name = "%s-cropped" % (tgt_name,)
1303 src_name = "%s-cropped" % (src_name,)
1304 tgt_ranges = tgt_ranges.subtract(tgt_skipped)
1305 src_ranges = src_ranges.subtract(src_skipped)
1306
1307 # Possibly having no changed blocks.
1308 if not tgt_ranges:
1309 return
1310
1311 # Add the transfer(s).
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001312 FindZipsAndAddSplitTransfers(
Tao Bao08c85832016-09-19 22:26:30 -07001313 tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1314
Tianjie Xu25366072017-09-08 17:19:02 -07001315 def ParseAndValidateSplitInfo(patch_size, tgt_ranges, src_ranges,
1316 split_info):
1317 """Parse the split_info and return a list of info tuples.
1318
1319 Args:
1320 patch_size: total size of the patch file.
1321 tgt_ranges: Ranges of the target file within the original image.
1322 src_ranges: Ranges of the source file within the original image.
1323 split_info format:
1324 imgdiff version#
1325 count of pieces
1326 <patch_size_1> <tgt_size_1> <src_ranges_1>
1327 ...
1328 <patch_size_n> <tgt_size_n> <src_ranges_n>
1329
1330 Returns:
1331 [patch_start, patch_len, split_tgt_ranges, split_src_ranges]
1332 """
1333
1334 version = int(split_info[0])
1335 assert version == 2
1336 count = int(split_info[1])
1337 assert len(split_info) - 2 == count
1338
1339 split_info_list = []
1340 patch_start = 0
1341 tgt_remain = copy.deepcopy(tgt_ranges)
1342 # each line has the format <patch_size>, <tgt_size>, <src_ranges>
1343 for line in split_info[2:]:
1344 info = line.split()
1345 assert len(info) == 3
1346 patch_length = int(info[0])
1347
1348 split_tgt_size = int(info[1])
1349 assert split_tgt_size % 4096 == 0
1350 assert split_tgt_size / 4096 <= tgt_remain.size()
1351 split_tgt_ranges = tgt_remain.first(split_tgt_size / 4096)
1352 tgt_remain = tgt_remain.subtract(split_tgt_ranges)
1353
1354 # Find the split_src_ranges within the image file from its relative
1355 # position in file.
1356 split_src_indices = RangeSet.parse_raw(info[2])
1357 split_src_ranges = RangeSet()
1358 for r in split_src_indices:
1359 curr_range = src_ranges.first(r[1]).subtract(src_ranges.first(r[0]))
1360 assert not split_src_ranges.overlaps(curr_range)
1361 split_src_ranges = split_src_ranges.union(curr_range)
1362
1363 split_info_list.append((patch_start, patch_length,
1364 split_tgt_ranges, split_src_ranges))
1365 patch_start += patch_length
1366
1367 # Check that the sizes of all the split pieces add up to the final file
1368 # size for patch and target.
1369 assert tgt_remain.size() == 0
1370 assert patch_start == patch_size
1371 return split_info_list
1372
1373 def AddSplitTransferForLargeApks():
1374 """Create split transfers for large apk files.
1375
1376 Example: Chrome.apk will be split into
1377 src-0: Chrome.apk-0, tgt-0: Chrome.apk-0
1378 src-1: Chrome.apk-1, tgt-1: Chrome.apk-1
1379 ...
1380
1381 After the split, the target pieces are continuous and block aligned; and
1382 the source pieces are mutually exclusive. During the split, we also
1383 generate and save the image patch between src-X & tgt-X. This patch will
1384 be valid because the block ranges of src-X & tgt-X will always stay the
1385 same afterwards; but there's a chance we don't use the patch if we
1386 convert the "diff" command into "new" or "move" later.
1387 """
1388
1389 while True:
1390 with transfer_lock:
1391 if not large_apks:
1392 return
1393 tgt_name, src_name, tgt_ranges, src_ranges = large_apks.pop(0)
1394
1395 src_file = common.MakeTempFile(prefix="src-")
1396 tgt_file = common.MakeTempFile(prefix="tgt-")
1397 with transfer_lock:
1398 with open(src_file, "wb") as src_fd:
1399 self.src.WriteRangeDataToFd(src_ranges, src_fd)
1400 with open(tgt_file, "wb") as tgt_fd:
1401 self.tgt.WriteRangeDataToFd(tgt_ranges, tgt_fd)
1402
1403 patch_file = common.MakeTempFile(prefix="patch-")
1404 patch_info_file = common.MakeTempFile(prefix="split_info-")
1405 cmd = ["imgdiff", "-z",
1406 "--block-limit={}".format(max_blocks_per_transfer),
1407 "--split-info=" + patch_info_file,
1408 src_file, tgt_file, patch_file]
1409 p = common.Run(cmd, stdout=subprocess.PIPE)
1410 p.communicate()
Tianjie Xu25366072017-09-08 17:19:02 -07001411 if p.returncode != 0:
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001412 print("Failed to create patch between {} and {},"
1413 " falling back to bsdiff".format(src_name, tgt_name))
1414 with transfer_lock:
1415 AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges,
1416 "diff", self.transfers)
1417 continue
Tianjie Xu25366072017-09-08 17:19:02 -07001418
1419 with open(patch_info_file) as patch_info:
1420 lines = patch_info.readlines()
1421
1422 patch_size_total = os.path.getsize(patch_file)
1423 split_info_list = ParseAndValidateSplitInfo(patch_size_total,
1424 tgt_ranges, src_ranges,
1425 lines)
1426 for index, (patch_start, patch_length, split_tgt_ranges,
1427 split_src_ranges) in enumerate(split_info_list):
1428 with open(patch_file) as f:
1429 f.seek(patch_start)
1430 patch_content = f.read(patch_length)
1431
1432 split_src_name = "{}-{}".format(src_name, index)
1433 split_tgt_name = "{}-{}".format(tgt_name, index)
1434 transfer_split = Transfer(split_tgt_name, split_src_name,
1435 split_tgt_ranges, split_src_ranges,
1436 self.tgt.RangeSha1(split_tgt_ranges),
1437 self.src.RangeSha1(split_src_ranges),
1438 "diff", self.transfers)
1439 transfer_split.patch = patch_content
1440
Tao Bao08c85832016-09-19 22:26:30 -07001441 print("Finding transfers...")
1442
Tianjie Xu25366072017-09-08 17:19:02 -07001443 large_apks = []
1444 cache_size = common.OPTIONS.cache_size
1445 split_threshold = 0.125
1446 max_blocks_per_transfer = int(cache_size * split_threshold /
1447 self.tgt.blocksize)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001448 empty = RangeSet()
1449 for tgt_fn, tgt_ranges in self.tgt.file_map.items():
1450 if tgt_fn == "__ZERO":
1451 # the special "__ZERO" domain is all the blocks not contained
1452 # in any file and that are filled with zeros. We have a
1453 # special transfer style for zero blocks.
1454 src_ranges = self.src.file_map.get("__ZERO", empty)
Tao Bao9a5caf22015-08-25 15:10:10 -07001455 AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
1456 "zero", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001457 continue
1458
Tao Baoff777812015-05-12 11:42:31 -07001459 elif tgt_fn == "__COPY":
1460 # "__COPY" domain includes all the blocks not contained in any
1461 # file and that need to be copied unconditionally to the target.
Tao Bao9a5caf22015-08-25 15:10:10 -07001462 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Tao Baoff777812015-05-12 11:42:31 -07001463 continue
1464
Doug Zongkerfc44a512014-08-26 13:10:25 -07001465 elif tgt_fn in self.src.file_map:
1466 # Look for an exact pathname match in the source.
Tao Bao9a5caf22015-08-25 15:10:10 -07001467 AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001468 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001469 continue
1470
1471 b = os.path.basename(tgt_fn)
1472 if b in self.src_basenames:
1473 # Look for an exact basename match in the source.
1474 src_fn = self.src_basenames[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001475 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001476 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001477 continue
1478
1479 b = re.sub("[0-9]+", "#", b)
1480 if b in self.src_numpatterns:
1481 # Look for a 'number pattern' match (a basename match after
1482 # all runs of digits are replaced by "#"). (This is useful
1483 # for .so files that contain version numbers in the filename
1484 # that get bumped.)
1485 src_fn = self.src_numpatterns[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001486 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001487 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001488 continue
1489
Tao Bao9a5caf22015-08-25 15:10:10 -07001490 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001491
Tianjie Xu25366072017-09-08 17:19:02 -07001492 transfer_lock = threading.Lock()
1493 threads = [threading.Thread(target=AddSplitTransferForLargeApks)
1494 for _ in range(self.threads)]
1495 for th in threads:
1496 th.start()
1497 while threads:
1498 threads.pop().join()
1499
Doug Zongkerfc44a512014-08-26 13:10:25 -07001500 def AbbreviateSourceNames(self):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001501 for k in self.src.file_map.keys():
1502 b = os.path.basename(k)
1503 self.src_basenames[b] = k
1504 b = re.sub("[0-9]+", "#", b)
1505 self.src_numpatterns[b] = k
1506
1507 @staticmethod
1508 def AssertPartition(total, seq):
1509 """Assert that all the RangeSets in 'seq' form a partition of the
1510 'total' RangeSet (ie, they are nonintersecting and their union
1511 equals 'total')."""
Doug Zongker6ab2a502016-02-09 08:28:09 -08001512
Doug Zongkerfc44a512014-08-26 13:10:25 -07001513 so_far = RangeSet()
1514 for i in seq:
1515 assert not so_far.overlaps(i)
1516 so_far = so_far.union(i)
1517 assert so_far == total