blob: b1ad8b636aa6b3ccf798c89cfdb7573c8088e1cc [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015from __future__ import print_function
16
Doug Zongker6ab2a502016-02-09 08:28:09 -080017import array
Tao Bao8dcf7382015-05-21 14:09:49 -070018import common
Tianjie Xu25366072017-09-08 17:19:02 -070019import copy
Doug Zongker6ab2a502016-02-09 08:28:09 -080020import functools
Doug Zongker62338182014-09-08 08:29:55 -070021import heapq
Doug Zongkerfc44a512014-08-26 13:10:25 -070022import itertools
23import multiprocessing
24import os
Tao Bao3a2e3502016-12-28 09:14:39 -080025import os.path
Doug Zongkerfc44a512014-08-26 13:10:25 -070026import re
27import subprocess
Tao Bao183e56e2017-03-05 17:05:09 -080028import sys
Doug Zongkerfc44a512014-08-26 13:10:25 -070029import threading
Doug Zongkerfc44a512014-08-26 13:10:25 -070030
Tao Bao3a2e3502016-12-28 09:14:39 -080031from collections import deque, OrderedDict
32from hashlib import sha1
Dan Albert8b72aef2015-03-23 19:13:21 -070033from rangelib import RangeSet
34
Doug Zongkerfc44a512014-08-26 13:10:25 -070035
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070036__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
37
Dan Albert8b72aef2015-03-23 19:13:21 -070038
Tao Bao183e56e2017-03-05 17:05:09 -080039def compute_patch(srcfile, tgtfile, imgdiff=False):
Tianjie Xub59c17f2016-10-28 17:55:53 -070040 patchfile = common.MakeTempFile(prefix='patch-')
Doug Zongkerfc44a512014-08-26 13:10:25 -070041
Tianjie Xub59c17f2016-10-28 17:55:53 -070042 cmd = ['imgdiff', '-z'] if imgdiff else ['bsdiff']
43 cmd.extend([srcfile, tgtfile, patchfile])
Doug Zongkerfc44a512014-08-26 13:10:25 -070044
Tao Bao39451582017-05-04 11:10:47 -070045 # Don't dump the bsdiff/imgdiff commands, which are not useful for the case
46 # here, since they contain temp filenames only.
47 p = common.Run(cmd, verbose=False, stdout=subprocess.PIPE,
48 stderr=subprocess.STDOUT)
Tianjie Xub59c17f2016-10-28 17:55:53 -070049 output, _ = p.communicate()
Doug Zongkerfc44a512014-08-26 13:10:25 -070050
Tianjie Xub59c17f2016-10-28 17:55:53 -070051 if p.returncode != 0:
52 raise ValueError(output)
53
54 with open(patchfile, 'rb') as f:
Tao Bao183e56e2017-03-05 17:05:09 -080055 return f.read()
Doug Zongkerfc44a512014-08-26 13:10:25 -070056
Dan Albert8b72aef2015-03-23 19:13:21 -070057
58class Image(object):
Tao Bao183e56e2017-03-05 17:05:09 -080059 def RangeSha1(self, ranges):
60 raise NotImplementedError
61
Dan Albert8b72aef2015-03-23 19:13:21 -070062 def ReadRangeSet(self, ranges):
63 raise NotImplementedError
64
Tao Bao68658c02015-06-01 13:40:49 -070065 def TotalSha1(self, include_clobbered_blocks=False):
Dan Albert8b72aef2015-03-23 19:13:21 -070066 raise NotImplementedError
67
Tao Bao183e56e2017-03-05 17:05:09 -080068 def WriteRangeDataToFd(self, ranges, fd):
69 raise NotImplementedError
70
Dan Albert8b72aef2015-03-23 19:13:21 -070071
72class EmptyImage(Image):
Doug Zongkerfc44a512014-08-26 13:10:25 -070073 """A zero-length image."""
Tao Bao183e56e2017-03-05 17:05:09 -080074
75 def __init__(self):
76 self.blocksize = 4096
77 self.care_map = RangeSet()
78 self.clobbered_blocks = RangeSet()
79 self.extended = RangeSet()
80 self.total_blocks = 0
81 self.file_map = {}
82
83 def RangeSha1(self, ranges):
84 return sha1().hexdigest()
85
Doug Zongkerfc44a512014-08-26 13:10:25 -070086 def ReadRangeSet(self, ranges):
87 return ()
Tao Bao183e56e2017-03-05 17:05:09 -080088
Tao Bao68658c02015-06-01 13:40:49 -070089 def TotalSha1(self, include_clobbered_blocks=False):
90 # EmptyImage always carries empty clobbered_blocks, so
91 # include_clobbered_blocks can be ignored.
92 assert self.clobbered_blocks.size() == 0
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070093 return sha1().hexdigest()
94
Tao Bao183e56e2017-03-05 17:05:09 -080095 def WriteRangeDataToFd(self, ranges, fd):
96 raise ValueError("Can't write data from EmptyImage to file")
97
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070098
Dan Albert8b72aef2015-03-23 19:13:21 -070099class DataImage(Image):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700100 """An image wrapped around a single string of data."""
101
102 def __init__(self, data, trim=False, pad=False):
103 self.data = data
104 self.blocksize = 4096
105
106 assert not (trim and pad)
107
108 partial = len(self.data) % self.blocksize
Tao Bao7589e962015-09-05 20:35:32 -0700109 padded = False
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700110 if partial > 0:
111 if trim:
112 self.data = self.data[:-partial]
113 elif pad:
114 self.data += '\0' * (self.blocksize - partial)
Tao Bao7589e962015-09-05 20:35:32 -0700115 padded = True
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700116 else:
117 raise ValueError(("data for DataImage must be multiple of %d bytes "
118 "unless trim or pad is specified") %
119 (self.blocksize,))
120
121 assert len(self.data) % self.blocksize == 0
122
123 self.total_blocks = len(self.data) / self.blocksize
124 self.care_map = RangeSet(data=(0, self.total_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700125 # When the last block is padded, we always write the whole block even for
126 # incremental OTAs. Because otherwise the last block may get skipped if
127 # unchanged for an incremental, but would fail the post-install
128 # verification if it has non-zero contents in the padding bytes.
129 # Bug: 23828506
130 if padded:
Tao Bao42206c32015-09-08 13:39:40 -0700131 clobbered_blocks = [self.total_blocks-1, self.total_blocks]
Tao Bao7589e962015-09-05 20:35:32 -0700132 else:
Tao Bao42206c32015-09-08 13:39:40 -0700133 clobbered_blocks = []
134 self.clobbered_blocks = clobbered_blocks
Tao Baoe9b61912015-07-09 17:37:49 -0700135 self.extended = RangeSet()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700136
137 zero_blocks = []
138 nonzero_blocks = []
139 reference = '\0' * self.blocksize
140
Tao Bao7589e962015-09-05 20:35:32 -0700141 for i in range(self.total_blocks-1 if padded else self.total_blocks):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700142 d = self.data[i*self.blocksize : (i+1)*self.blocksize]
143 if d == reference:
144 zero_blocks.append(i)
145 zero_blocks.append(i+1)
146 else:
147 nonzero_blocks.append(i)
148 nonzero_blocks.append(i+1)
149
Tao Bao42206c32015-09-08 13:39:40 -0700150 assert zero_blocks or nonzero_blocks or clobbered_blocks
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700151
Tao Bao42206c32015-09-08 13:39:40 -0700152 self.file_map = dict()
153 if zero_blocks:
154 self.file_map["__ZERO"] = RangeSet(data=zero_blocks)
155 if nonzero_blocks:
156 self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)
157 if clobbered_blocks:
158 self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)
Tao Bao7589e962015-09-05 20:35:32 -0700159
Tao Bao183e56e2017-03-05 17:05:09 -0800160 def _GetRangeData(self, ranges):
161 for s, e in ranges:
162 yield self.data[s*self.blocksize:e*self.blocksize]
163
164 def RangeSha1(self, ranges):
165 h = sha1()
166 for data in self._GetRangeData(ranges):
167 h.update(data)
168 return h.hexdigest()
169
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700170 def ReadRangeSet(self, ranges):
Tao Bao183e56e2017-03-05 17:05:09 -0800171 return [self._GetRangeData(ranges)]
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700172
Tao Bao68658c02015-06-01 13:40:49 -0700173 def TotalSha1(self, include_clobbered_blocks=False):
Tao Bao7589e962015-09-05 20:35:32 -0700174 if not include_clobbered_blocks:
Tao Bao183e56e2017-03-05 17:05:09 -0800175 return self.RangeSha1(self.care_map.subtract(self.clobbered_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700176 else:
177 return sha1(self.data).hexdigest()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700178
Tao Bao183e56e2017-03-05 17:05:09 -0800179 def WriteRangeDataToFd(self, ranges, fd):
180 for data in self._GetRangeData(ranges):
181 fd.write(data)
182
Doug Zongkerfc44a512014-08-26 13:10:25 -0700183
184class Transfer(object):
Tao Bao183e56e2017-03-05 17:05:09 -0800185 def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, tgt_sha1,
186 src_sha1, style, by_id):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700187 self.tgt_name = tgt_name
188 self.src_name = src_name
189 self.tgt_ranges = tgt_ranges
190 self.src_ranges = src_ranges
Tao Bao183e56e2017-03-05 17:05:09 -0800191 self.tgt_sha1 = tgt_sha1
192 self.src_sha1 = src_sha1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700193 self.style = style
194 self.intact = (getattr(tgt_ranges, "monotonic", False) and
195 getattr(src_ranges, "monotonic", False))
Tao Baob8c87172015-03-19 19:42:12 -0700196
197 # We use OrderedDict rather than dict so that the output is repeatable;
198 # otherwise it would depend on the hash values of the Transfer objects.
199 self.goes_before = OrderedDict()
200 self.goes_after = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700201
Doug Zongker62338182014-09-08 08:29:55 -0700202 self.stash_before = []
203 self.use_stash = []
204
Doug Zongkerfc44a512014-08-26 13:10:25 -0700205 self.id = len(by_id)
206 by_id.append(self)
207
Tianjie Xu25366072017-09-08 17:19:02 -0700208 self._patch = None
209
210 @property
211 def patch(self):
212 return self._patch
213
214 @patch.setter
215 def patch(self, patch):
216 if patch:
217 assert self.style == "diff"
218 self._patch = patch
219
Doug Zongker62338182014-09-08 08:29:55 -0700220 def NetStashChange(self):
221 return (sum(sr.size() for (_, sr) in self.stash_before) -
222 sum(sr.size() for (_, sr) in self.use_stash))
223
Tao Bao82c47982015-08-17 09:45:13 -0700224 def ConvertToNew(self):
225 assert self.style != "new"
226 self.use_stash = []
227 self.style = "new"
228 self.src_ranges = RangeSet()
Tianjie Xu25366072017-09-08 17:19:02 -0700229 self.patch = None
Tao Bao82c47982015-08-17 09:45:13 -0700230
Doug Zongkerfc44a512014-08-26 13:10:25 -0700231 def __str__(self):
232 return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
233 " to " + str(self.tgt_ranges) + ">")
234
235
Doug Zongker6ab2a502016-02-09 08:28:09 -0800236@functools.total_ordering
237class HeapItem(object):
238 def __init__(self, item):
239 self.item = item
Tao Bao186ec992017-12-23 11:50:52 -0800240 # Negate the score since python's heap is a min-heap and we want the
241 # maximum score.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800242 self.score = -item.score
Tao Bao186ec992017-12-23 11:50:52 -0800243
Doug Zongker6ab2a502016-02-09 08:28:09 -0800244 def clear(self):
245 self.item = None
Tao Bao186ec992017-12-23 11:50:52 -0800246
Doug Zongker6ab2a502016-02-09 08:28:09 -0800247 def __bool__(self):
Tao Bao186ec992017-12-23 11:50:52 -0800248 return self.item is not None
249
250 # Python 2 uses __nonzero__, while Python 3 uses __bool__.
251 __nonzero__ = __bool__
252
253 # The rest operations are generated by functools.total_ordering decorator.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800254 def __eq__(self, other):
255 return self.score == other.score
Tao Bao186ec992017-12-23 11:50:52 -0800256
Doug Zongker6ab2a502016-02-09 08:28:09 -0800257 def __le__(self, other):
258 return self.score <= other.score
259
260
Doug Zongkerfc44a512014-08-26 13:10:25 -0700261# BlockImageDiff works on two image objects. An image object is
262# anything that provides the following attributes:
263#
264# blocksize: the size in bytes of a block, currently must be 4096.
265#
266# total_blocks: the total size of the partition/image, in blocks.
267#
268# care_map: a RangeSet containing which blocks (in the range [0,
269# total_blocks) we actually care about; i.e. which blocks contain
270# data.
271#
272# file_map: a dict that partitions the blocks contained in care_map
273# into smaller domains that are useful for doing diffs on.
274# (Typically a domain is a file, and the key in file_map is the
275# pathname.)
276#
Tao Baoff777812015-05-12 11:42:31 -0700277# clobbered_blocks: a RangeSet containing which blocks contain data
278# but may be altered by the FS. They need to be excluded when
279# verifying the partition integrity.
280#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700281# ReadRangeSet(): a function that takes a RangeSet and returns the
282# data contained in the image blocks of that RangeSet. The data
283# is returned as a list or tuple of strings; concatenating the
284# elements together should produce the requested data.
285# Implementations are free to break up the data into list/tuple
286# elements in any way that is convenient.
287#
Tao Bao183e56e2017-03-05 17:05:09 -0800288# RangeSha1(): a function that returns (as a hex string) the SHA-1
289# hash of all the data in the specified range.
290#
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700291# TotalSha1(): a function that returns (as a hex string) the SHA-1
292# hash of all the data in the image (ie, all the blocks in the
Tao Bao68658c02015-06-01 13:40:49 -0700293# care_map minus clobbered_blocks, or including the clobbered
294# blocks if include_clobbered_blocks is True).
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700295#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700296# When creating a BlockImageDiff, the src image may be None, in which
297# case the list of transfers produced will never read from the
298# original image.
299
300class BlockImageDiff(object):
Tao Bao293fd132016-06-11 12:19:23 -0700301 def __init__(self, tgt, src=None, threads=None, version=4,
302 disable_imgdiff=False):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700303 if threads is None:
304 threads = multiprocessing.cpu_count() // 2
Dan Albert8b72aef2015-03-23 19:13:21 -0700305 if threads == 0:
306 threads = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700307 self.threads = threads
Doug Zongker62338182014-09-08 08:29:55 -0700308 self.version = version
Dan Albert8b72aef2015-03-23 19:13:21 -0700309 self.transfers = []
310 self.src_basenames = {}
311 self.src_numpatterns = {}
Tao Baob4cfca52016-02-04 14:26:02 -0800312 self._max_stashed_size = 0
Tao Baod522bdc2016-04-12 15:53:16 -0700313 self.touched_src_ranges = RangeSet()
314 self.touched_src_sha1 = None
Tao Bao293fd132016-06-11 12:19:23 -0700315 self.disable_imgdiff = disable_imgdiff
Doug Zongker62338182014-09-08 08:29:55 -0700316
Tao Bao8fad03e2017-03-01 14:36:26 -0800317 assert version in (3, 4)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700318
319 self.tgt = tgt
320 if src is None:
321 src = EmptyImage()
322 self.src = src
323
324 # The updater code that installs the patch always uses 4k blocks.
325 assert tgt.blocksize == 4096
326 assert src.blocksize == 4096
327
328 # The range sets in each filemap should comprise a partition of
329 # the care map.
330 self.AssertPartition(src.care_map, src.file_map.values())
331 self.AssertPartition(tgt.care_map, tgt.file_map.values())
332
Tao Baob4cfca52016-02-04 14:26:02 -0800333 @property
334 def max_stashed_size(self):
335 return self._max_stashed_size
336
Doug Zongkerfc44a512014-08-26 13:10:25 -0700337 def Compute(self, prefix):
338 # When looking for a source file to use as the diff input for a
339 # target file, we try:
340 # 1) an exact path match if available, otherwise
341 # 2) a exact basename match if available, otherwise
342 # 3) a basename match after all runs of digits are replaced by
343 # "#" if available, otherwise
344 # 4) we have no source for this target.
345 self.AbbreviateSourceNames()
346 self.FindTransfers()
347
348 # Find the ordering dependencies among transfers (this is O(n^2)
349 # in the number of transfers).
350 self.GenerateDigraph()
351 # Find a sequence of transfers that satisfies as many ordering
352 # dependencies as possible (heuristically).
353 self.FindVertexSequence()
354 # Fix up the ordering dependencies that the sequence didn't
355 # satisfy.
Tao Bao8fad03e2017-03-01 14:36:26 -0800356 self.ReverseBackwardEdges()
357 self.ImproveVertexSequence()
Doug Zongker62338182014-09-08 08:29:55 -0700358
Tao Bao82c47982015-08-17 09:45:13 -0700359 # Ensure the runtime stash size is under the limit.
Tao Bao8fad03e2017-03-01 14:36:26 -0800360 if common.OPTIONS.cache_size is not None:
Tao Bao82c47982015-08-17 09:45:13 -0700361 self.ReviseStashSize()
362
Doug Zongkerfc44a512014-08-26 13:10:25 -0700363 # Double-check our work.
364 self.AssertSequenceGood()
Tianjie Xu8a7ed9f2018-01-23 14:06:11 -0800365 self.AssertSha1Good()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700366
367 self.ComputePatches(prefix)
368 self.WriteTransfers(prefix)
369
370 def WriteTransfers(self, prefix):
Tianjie Xu37e29302016-06-23 16:10:35 -0700371 def WriteSplitTransfers(out, style, target_blocks):
372 """Limit the size of operand in command 'new' and 'zero' to 1024 blocks.
Tianjie Xubb848c52016-06-21 15:54:09 -0700373
374 This prevents the target size of one command from being too large; and
375 might help to avoid fsync errors on some devices."""
376
Tao Bao3a2e3502016-12-28 09:14:39 -0800377 assert style == "new" or style == "zero"
Tianjie Xu37e29302016-06-23 16:10:35 -0700378 blocks_limit = 1024
Tianjie Xubb848c52016-06-21 15:54:09 -0700379 total = 0
Tianjie Xu37e29302016-06-23 16:10:35 -0700380 while target_blocks:
381 blocks_to_write = target_blocks.first(blocks_limit)
382 out.append("%s %s\n" % (style, blocks_to_write.to_string_raw()))
383 total += blocks_to_write.size()
384 target_blocks = target_blocks.subtract(blocks_to_write)
Tianjie Xubb848c52016-06-21 15:54:09 -0700385 return total
386
Doug Zongkerfc44a512014-08-26 13:10:25 -0700387 out = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700388 total = 0
Doug Zongkerfc44a512014-08-26 13:10:25 -0700389
Tao Bao3a2e3502016-12-28 09:14:39 -0800390 # In BBOTA v3+, it uses the hash of the stashed blocks as the stash slot
391 # id. 'stashes' records the map from 'hash' to the ref count. The stash
392 # will be freed only if the count decrements to zero.
Doug Zongker62338182014-09-08 08:29:55 -0700393 stashes = {}
394 stashed_blocks = 0
395 max_stashed_blocks = 0
396
Doug Zongkerfc44a512014-08-26 13:10:25 -0700397 for xf in self.transfers:
398
Tao Bao8fad03e2017-03-01 14:36:26 -0800399 for _, sr in xf.stash_before:
400 sh = self.src.RangeSha1(sr)
401 if sh in stashes:
402 stashes[sh] += 1
Sami Tolvanendd67a292014-12-09 16:40:34 +0000403 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800404 stashes[sh] = 1
405 stashed_blocks += sr.size()
406 self.touched_src_ranges = self.touched_src_ranges.union(sr)
407 out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
Doug Zongker62338182014-09-08 08:29:55 -0700408
409 if stashed_blocks > max_stashed_blocks:
410 max_stashed_blocks = stashed_blocks
411
Jesse Zhao7b985f62015-03-02 16:53:08 -0800412 free_string = []
caozhiyuan21b37d82015-10-21 15:14:03 +0800413 free_size = 0
Jesse Zhao7b985f62015-03-02 16:53:08 -0800414
Tao Bao8fad03e2017-03-01 14:36:26 -0800415 # <# blocks> <src ranges>
416 # OR
417 # <# blocks> <src ranges> <src locs> <stash refs...>
418 # OR
419 # <# blocks> - <stash refs...>
Doug Zongker62338182014-09-08 08:29:55 -0700420
Tao Bao8fad03e2017-03-01 14:36:26 -0800421 size = xf.src_ranges.size()
422 src_str = [str(size)]
Doug Zongker62338182014-09-08 08:29:55 -0700423
Tao Bao8fad03e2017-03-01 14:36:26 -0800424 unstashed_src_ranges = xf.src_ranges
425 mapped_stashes = []
426 for _, sr in xf.use_stash:
427 unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
428 sh = self.src.RangeSha1(sr)
429 sr = xf.src_ranges.map_within(sr)
430 mapped_stashes.append(sr)
431 assert sh in stashes
432 src_str.append("%s:%s" % (sh, sr.to_string_raw()))
433 stashes[sh] -= 1
434 if stashes[sh] == 0:
435 free_string.append("free %s\n" % (sh,))
436 free_size += sr.size()
437 stashes.pop(sh)
Doug Zongker62338182014-09-08 08:29:55 -0700438
Tao Bao8fad03e2017-03-01 14:36:26 -0800439 if unstashed_src_ranges:
440 src_str.insert(1, unstashed_src_ranges.to_string_raw())
441 if xf.use_stash:
442 mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
443 src_str.insert(2, mapped_unstashed.to_string_raw())
444 mapped_stashes.append(mapped_unstashed)
Doug Zongker62338182014-09-08 08:29:55 -0700445 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Tao Bao8fad03e2017-03-01 14:36:26 -0800446 else:
447 src_str.insert(1, "-")
448 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Doug Zongker62338182014-09-08 08:29:55 -0700449
Tao Bao8fad03e2017-03-01 14:36:26 -0800450 src_str = " ".join(src_str)
Doug Zongker62338182014-09-08 08:29:55 -0700451
Tao Bao8fad03e2017-03-01 14:36:26 -0800452 # version 3+:
Doug Zongker62338182014-09-08 08:29:55 -0700453 # zero <rangeset>
454 # new <rangeset>
455 # erase <rangeset>
Dan Albert8b72aef2015-03-23 19:13:21 -0700456 # bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
457 # imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
458 # move hash <tgt rangeset> <src_str>
Doug Zongkerfc44a512014-08-26 13:10:25 -0700459
460 tgt_size = xf.tgt_ranges.size()
461
462 if xf.style == "new":
463 assert xf.tgt_ranges
Tianjie Xu37e29302016-06-23 16:10:35 -0700464 assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700465 total += tgt_size
466 elif xf.style == "move":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700467 assert xf.tgt_ranges
468 assert xf.src_ranges.size() == tgt_size
469 if xf.src_ranges != xf.tgt_ranges:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100470 # take into account automatic stashing of overlapping blocks
471 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700472 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100473 if temp_stash_usage > max_stashed_blocks:
474 max_stashed_blocks = temp_stash_usage
475
Tao Baod522bdc2016-04-12 15:53:16 -0700476 self.touched_src_ranges = self.touched_src_ranges.union(
477 xf.src_ranges)
478
Tao Bao8fad03e2017-03-01 14:36:26 -0800479 out.append("%s %s %s %s\n" % (
Sami Tolvanendd67a292014-12-09 16:40:34 +0000480 xf.style,
Tao Bao183e56e2017-03-05 17:05:09 -0800481 xf.tgt_sha1,
Dan Albert8b72aef2015-03-23 19:13:21 -0700482 xf.tgt_ranges.to_string_raw(), src_str))
Tao Bao8fad03e2017-03-01 14:36:26 -0800483 total += tgt_size
484 elif xf.style in ("bsdiff", "imgdiff"):
485 assert xf.tgt_ranges
486 assert xf.src_ranges
487 # take into account automatic stashing of overlapping blocks
488 if xf.src_ranges.overlaps(xf.tgt_ranges):
489 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
490 if temp_stash_usage > max_stashed_blocks:
491 max_stashed_blocks = temp_stash_usage
492
493 self.touched_src_ranges = self.touched_src_ranges.union(xf.src_ranges)
494
495 out.append("%s %d %d %s %s %s %s\n" % (
496 xf.style,
497 xf.patch_start, xf.patch_len,
498 xf.src_sha1,
499 xf.tgt_sha1,
500 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700501 total += tgt_size
502 elif xf.style == "zero":
503 assert xf.tgt_ranges
504 to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
Tianjie Xu37e29302016-06-23 16:10:35 -0700505 assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size()
Tianjie Xubb848c52016-06-21 15:54:09 -0700506 total += to_zero.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700507 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700508 raise ValueError("unknown transfer style '%s'\n" % xf.style)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700509
Sami Tolvanendd67a292014-12-09 16:40:34 +0000510 if free_string:
511 out.append("".join(free_string))
caozhiyuan21b37d82015-10-21 15:14:03 +0800512 stashed_blocks -= free_size
Sami Tolvanendd67a292014-12-09 16:40:34 +0000513
Tao Bao8fad03e2017-03-01 14:36:26 -0800514 if common.OPTIONS.cache_size is not None:
Tao Bao8dcf7382015-05-21 14:09:49 -0700515 # Sanity check: abort if we're going to need more stash space than
516 # the allowed size (cache_size * threshold). There are two purposes
517 # of having a threshold here. a) Part of the cache may have been
518 # occupied by some recovery logs. b) It will buy us some time to deal
519 # with the oversize issue.
520 cache_size = common.OPTIONS.cache_size
521 stash_threshold = common.OPTIONS.stash_threshold
522 max_allowed = cache_size * stash_threshold
Tao Baoe8c68a02017-02-26 10:48:11 -0800523 assert max_stashed_blocks * self.tgt.blocksize <= max_allowed, \
Tao Bao8dcf7382015-05-21 14:09:49 -0700524 'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
525 max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
526 self.tgt.blocksize, max_allowed, cache_size,
527 stash_threshold)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700528
Tao Bao8fad03e2017-03-01 14:36:26 -0800529 self.touched_src_sha1 = self.src.RangeSha1(self.touched_src_ranges)
Tao Baod522bdc2016-04-12 15:53:16 -0700530
Tao Baoe9b61912015-07-09 17:37:49 -0700531 # Zero out extended blocks as a workaround for bug 20881595.
532 if self.tgt.extended:
Tianjie Xu37e29302016-06-23 16:10:35 -0700533 assert (WriteSplitTransfers(out, "zero", self.tgt.extended) ==
Tianjie Xubb848c52016-06-21 15:54:09 -0700534 self.tgt.extended.size())
Tao Baob32d56e2015-09-09 11:55:01 -0700535 total += self.tgt.extended.size()
Tao Baoe9b61912015-07-09 17:37:49 -0700536
537 # We erase all the blocks on the partition that a) don't contain useful
Tao Bao66f1fa62016-05-03 10:02:01 -0700538 # data in the new image; b) will not be touched by dm-verity. Out of those
539 # blocks, we erase the ones that won't be used in this update at the
540 # beginning of an update. The rest would be erased at the end. This is to
541 # work around the eMMC issue observed on some devices, which may otherwise
542 # get starving for clean blocks and thus fail the update. (b/28347095)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700543 all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
Tao Baoe9b61912015-07-09 17:37:49 -0700544 all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
545 new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
Tao Bao66f1fa62016-05-03 10:02:01 -0700546
547 erase_first = new_dontcare.subtract(self.touched_src_ranges)
548 if erase_first:
549 out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))
550
551 erase_last = new_dontcare.subtract(erase_first)
552 if erase_last:
553 out.append("erase %s\n" % (erase_last.to_string_raw(),))
Doug Zongkere985f6f2014-09-09 12:38:47 -0700554
555 out.insert(0, "%d\n" % (self.version,)) # format version number
Tao Baob32d56e2015-09-09 11:55:01 -0700556 out.insert(1, "%d\n" % (total,))
Tao Bao8fad03e2017-03-01 14:36:26 -0800557 # v3+: the number of stash slots is unused.
558 out.insert(2, "0\n")
559 out.insert(3, str(max_stashed_blocks) + "\n")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700560
561 with open(prefix + ".transfer.list", "wb") as f:
562 for i in out:
563 f.write(i)
564
Tao Bao8fad03e2017-03-01 14:36:26 -0800565 self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize
566 OPTIONS = common.OPTIONS
567 if OPTIONS.cache_size is not None:
568 max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
569 print("max stashed blocks: %d (%d bytes), "
570 "limit: %d bytes (%.2f%%)\n" % (
571 max_stashed_blocks, self._max_stashed_size, max_allowed,
572 self._max_stashed_size * 100.0 / max_allowed))
573 else:
574 print("max stashed blocks: %d (%d bytes), limit: <unknown>\n" % (
575 max_stashed_blocks, self._max_stashed_size))
Doug Zongker62338182014-09-08 08:29:55 -0700576
Tao Bao82c47982015-08-17 09:45:13 -0700577 def ReviseStashSize(self):
578 print("Revising stash size...")
Tao Baoe27acfd2016-12-16 11:13:55 -0800579 stash_map = {}
Tao Bao82c47982015-08-17 09:45:13 -0700580
581 # Create the map between a stash and its def/use points. For example, for a
Tao Bao3c5a16d2017-02-13 11:42:50 -0800582 # given stash of (raw_id, sr), stash_map[raw_id] = (sr, def_cmd, use_cmd).
Tao Bao82c47982015-08-17 09:45:13 -0700583 for xf in self.transfers:
584 # Command xf defines (stores) all the stashes in stash_before.
Tao Bao3a2e3502016-12-28 09:14:39 -0800585 for stash_raw_id, sr in xf.stash_before:
586 stash_map[stash_raw_id] = (sr, xf)
Tao Bao82c47982015-08-17 09:45:13 -0700587
588 # Record all the stashes command xf uses.
Tao Bao3a2e3502016-12-28 09:14:39 -0800589 for stash_raw_id, _ in xf.use_stash:
590 stash_map[stash_raw_id] += (xf,)
Tao Bao82c47982015-08-17 09:45:13 -0700591
592 # Compute the maximum blocks available for stash based on /cache size and
593 # the threshold.
594 cache_size = common.OPTIONS.cache_size
595 stash_threshold = common.OPTIONS.stash_threshold
596 max_allowed = cache_size * stash_threshold / self.tgt.blocksize
597
Tao Bao3a2e3502016-12-28 09:14:39 -0800598 # See the comments for 'stashes' in WriteTransfers().
Tao Baoe27acfd2016-12-16 11:13:55 -0800599 stashes = {}
Tao Bao82c47982015-08-17 09:45:13 -0700600 stashed_blocks = 0
Tao Bao9a5caf22015-08-25 15:10:10 -0700601 new_blocks = 0
Tao Bao82c47982015-08-17 09:45:13 -0700602
603 # Now go through all the commands. Compute the required stash size on the
604 # fly. If a command requires excess stash than available, it deletes the
605 # stash by replacing the command that uses the stash with a "new" command
606 # instead.
607 for xf in self.transfers:
608 replaced_cmds = []
609
610 # xf.stash_before generates explicit stash commands.
Tao Bao3a2e3502016-12-28 09:14:39 -0800611 for stash_raw_id, sr in xf.stash_before:
Tao Baoe27acfd2016-12-16 11:13:55 -0800612 # Check the post-command stashed_blocks.
613 stashed_blocks_after = stashed_blocks
Tao Bao8fad03e2017-03-01 14:36:26 -0800614 sh = self.src.RangeSha1(sr)
615 if sh not in stashes:
Tao Baoe27acfd2016-12-16 11:13:55 -0800616 stashed_blocks_after += sr.size()
Tao Baoe27acfd2016-12-16 11:13:55 -0800617
618 if stashed_blocks_after > max_allowed:
Tao Bao82c47982015-08-17 09:45:13 -0700619 # We cannot stash this one for a later command. Find out the command
620 # that will use this stash and replace the command with "new".
Tao Bao3a2e3502016-12-28 09:14:39 -0800621 use_cmd = stash_map[stash_raw_id][2]
Tao Bao82c47982015-08-17 09:45:13 -0700622 replaced_cmds.append(use_cmd)
Tao Bao9a5caf22015-08-25 15:10:10 -0700623 print("%10d %9s %s" % (sr.size(), "explicit", use_cmd))
Tao Bao82c47982015-08-17 09:45:13 -0700624 else:
Tao Bao3c5a16d2017-02-13 11:42:50 -0800625 # Update the stashes map.
Tao Bao8fad03e2017-03-01 14:36:26 -0800626 if sh in stashes:
627 stashes[sh] += 1
Tao Bao3c5a16d2017-02-13 11:42:50 -0800628 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800629 stashes[sh] = 1
Tao Baoe27acfd2016-12-16 11:13:55 -0800630 stashed_blocks = stashed_blocks_after
Tao Bao82c47982015-08-17 09:45:13 -0700631
632 # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
633 # ComputePatches(), they both have the style of "diff".
Tao Bao8fad03e2017-03-01 14:36:26 -0800634 if xf.style == "diff":
Tao Bao82c47982015-08-17 09:45:13 -0700635 assert xf.tgt_ranges and xf.src_ranges
636 if xf.src_ranges.overlaps(xf.tgt_ranges):
637 if stashed_blocks + xf.src_ranges.size() > max_allowed:
638 replaced_cmds.append(xf)
Tao Bao9a5caf22015-08-25 15:10:10 -0700639 print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf))
Tao Bao82c47982015-08-17 09:45:13 -0700640
641 # Replace the commands in replaced_cmds with "new"s.
642 for cmd in replaced_cmds:
643 # It no longer uses any commands in "use_stash". Remove the def points
644 # for all those stashes.
Tao Bao3a2e3502016-12-28 09:14:39 -0800645 for stash_raw_id, sr in cmd.use_stash:
646 def_cmd = stash_map[stash_raw_id][1]
647 assert (stash_raw_id, sr) in def_cmd.stash_before
648 def_cmd.stash_before.remove((stash_raw_id, sr))
Tao Bao82c47982015-08-17 09:45:13 -0700649
Tianjie Xuebe39a02016-01-14 14:12:26 -0800650 # Add up blocks that violates space limit and print total number to
651 # screen later.
652 new_blocks += cmd.tgt_ranges.size()
Tao Bao82c47982015-08-17 09:45:13 -0700653 cmd.ConvertToNew()
654
Tao Bao3a2e3502016-12-28 09:14:39 -0800655 # xf.use_stash may generate free commands.
Tao Bao8fad03e2017-03-01 14:36:26 -0800656 for _, sr in xf.use_stash:
657 sh = self.src.RangeSha1(sr)
658 assert sh in stashes
659 stashes[sh] -= 1
660 if stashes[sh] == 0:
Tao Baoe27acfd2016-12-16 11:13:55 -0800661 stashed_blocks -= sr.size()
Tao Bao8fad03e2017-03-01 14:36:26 -0800662 stashes.pop(sh)
Tao Baoe27acfd2016-12-16 11:13:55 -0800663
Tianjie Xuebe39a02016-01-14 14:12:26 -0800664 num_of_bytes = new_blocks * self.tgt.blocksize
665 print(" Total %d blocks (%d bytes) are packed as new blocks due to "
666 "insufficient cache size." % (new_blocks, num_of_bytes))
Tao Bao304ee272016-12-19 11:01:38 -0800667 return new_blocks
Tao Bao9a5caf22015-08-25 15:10:10 -0700668
Doug Zongkerfc44a512014-08-26 13:10:25 -0700669 def ComputePatches(self, prefix):
670 print("Reticulating splines...")
Tao Bao183e56e2017-03-05 17:05:09 -0800671 diff_queue = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700672 patch_num = 0
673 with open(prefix + ".new.dat", "wb") as new_f:
Tao Bao183e56e2017-03-05 17:05:09 -0800674 for index, xf in enumerate(self.transfers):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700675 if xf.style == "zero":
Tao Bao08c85832016-09-19 22:26:30 -0700676 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
677 print("%10d %10d (%6.2f%%) %7s %s %s" % (
678 tgt_size, tgt_size, 100.0, xf.style, xf.tgt_name,
679 str(xf.tgt_ranges)))
680
Doug Zongkerfc44a512014-08-26 13:10:25 -0700681 elif xf.style == "new":
Tao Bao183e56e2017-03-05 17:05:09 -0800682 self.tgt.WriteRangeDataToFd(xf.tgt_ranges, new_f)
Tao Bao08c85832016-09-19 22:26:30 -0700683 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
684 print("%10d %10d (%6.2f%%) %7s %s %s" % (
685 tgt_size, tgt_size, 100.0, xf.style,
686 xf.tgt_name, str(xf.tgt_ranges)))
687
Doug Zongkerfc44a512014-08-26 13:10:25 -0700688 elif xf.style == "diff":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700689 # We can't compare src and tgt directly because they may have
690 # the same content but be broken up into blocks differently, eg:
691 #
692 # ["he", "llo"] vs ["h", "ello"]
693 #
694 # We want those to compare equal, ideally without having to
695 # actually concatenate the strings (these may be tens of
696 # megabytes).
Tao Bao183e56e2017-03-05 17:05:09 -0800697 if xf.src_sha1 == xf.tgt_sha1:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700698 # These are identical; we don't need to generate a patch,
699 # just issue copy commands on the device.
700 xf.style = "move"
Tianjie Xu25366072017-09-08 17:19:02 -0700701 xf.patch = None
Tao Bao183e56e2017-03-05 17:05:09 -0800702 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao08c85832016-09-19 22:26:30 -0700703 if xf.src_ranges != xf.tgt_ranges:
704 print("%10d %10d (%6.2f%%) %7s %s %s (from %s)" % (
705 tgt_size, tgt_size, 100.0, xf.style,
706 xf.tgt_name if xf.tgt_name == xf.src_name else (
707 xf.tgt_name + " (from " + xf.src_name + ")"),
708 str(xf.tgt_ranges), str(xf.src_ranges)))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700709 else:
Tianjie Xu25366072017-09-08 17:19:02 -0700710 if xf.patch:
711 # We have already generated the patch with imgdiff. Check if the
712 # transfer is intact.
713 assert not self.disable_imgdiff
714 imgdiff = True
715 if not xf.intact:
716 imgdiff = False
717 xf.patch = None
718 else:
719 # For files in zip format (eg, APKs, JARs, etc.) we would
720 # like to use imgdiff -z if possible (because it usually
721 # produces significantly smaller patches than bsdiff).
722 # This is permissible if:
723 #
724 # - imgdiff is not disabled, and
725 # - the source and target files are monotonic (ie, the
726 # data is stored with blocks in increasing order), and
727 # - we haven't removed any blocks from the source set.
728 #
729 # If these conditions are satisfied then appending all the
730 # blocks in the set together in order will produce a valid
731 # zip file (plus possibly extra zeros in the last block),
732 # which is what imgdiff needs to operate. (imgdiff is
733 # fine with extra zeros at the end of the file.)
734 imgdiff = (not self.disable_imgdiff and xf.intact and
735 xf.tgt_name.split(".")[-1].lower()
736 in ("apk", "jar", "zip"))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700737 xf.style = "imgdiff" if imgdiff else "bsdiff"
Tao Bao183e56e2017-03-05 17:05:09 -0800738 diff_queue.append((index, imgdiff, patch_num))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700739 patch_num += 1
740
741 else:
742 assert False, "unknown style " + xf.style
743
Tao Bao183e56e2017-03-05 17:05:09 -0800744 if diff_queue:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700745 if self.threads > 1:
746 print("Computing patches (using %d threads)..." % (self.threads,))
747 else:
748 print("Computing patches...")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700749
Tao Bao183e56e2017-03-05 17:05:09 -0800750 diff_total = len(diff_queue)
751 patches = [None] * diff_total
Tianjie Xub59c17f2016-10-28 17:55:53 -0700752 error_messages = []
Tao Baob937ead2017-10-19 16:51:53 -0700753 warning_messages = []
Tao Bao33635b12017-03-12 13:02:51 -0700754 if sys.stdout.isatty():
755 global diff_done
756 diff_done = 0
Doug Zongkerfc44a512014-08-26 13:10:25 -0700757
Tao Bao183e56e2017-03-05 17:05:09 -0800758 # Using multiprocessing doesn't give additional benefits, due to the
759 # pattern of the code. The diffing work is done by subprocess.call, which
760 # already runs in a separate process (not affected much by the GIL -
761 # Global Interpreter Lock). Using multiprocess also requires either a)
762 # writing the diff input files in the main process before forking, or b)
763 # reopening the image file (SparseImage) in the worker processes. Doing
764 # neither of them further improves the performance.
Doug Zongkerfc44a512014-08-26 13:10:25 -0700765 lock = threading.Lock()
766 def diff_worker():
767 while True:
768 with lock:
Tao Bao183e56e2017-03-05 17:05:09 -0800769 if not diff_queue:
Dan Albert8b72aef2015-03-23 19:13:21 -0700770 return
Tao Bao183e56e2017-03-05 17:05:09 -0800771 xf_index, imgdiff, patch_index = diff_queue.pop()
772
773 xf = self.transfers[xf_index]
Tianjie Xu25366072017-09-08 17:19:02 -0700774 patch = xf.patch
775 if not patch:
776 src_ranges = xf.src_ranges
777 tgt_ranges = xf.tgt_ranges
Tao Bao183e56e2017-03-05 17:05:09 -0800778
Tianjie Xudf1166e2018-01-27 17:35:41 -0800779 src_file = common.MakeTempFile(prefix="src-")
780 with open(src_file, "wb") as fd:
781 self.src.WriteRangeDataToFd(src_ranges, fd)
Tianjie Xu25366072017-09-08 17:19:02 -0700782
Tianjie Xudf1166e2018-01-27 17:35:41 -0800783 tgt_file = common.MakeTempFile(prefix="tgt-")
784 with open(tgt_file, "wb") as fd:
785 self.tgt.WriteRangeDataToFd(tgt_ranges, fd)
Tianjie Xu25366072017-09-08 17:19:02 -0700786
787 message = []
788 try:
789 patch = compute_patch(src_file, tgt_file, imgdiff)
790 except ValueError as e:
791 message.append(
792 "Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (
793 "imgdiff" if imgdiff else "bsdiff",
794 xf.tgt_name if xf.tgt_name == xf.src_name else
795 xf.tgt_name + " (from " + xf.src_name + ")",
796 xf.tgt_ranges, xf.src_ranges, e.message))
797 # TODO(b/68016761): Better handle the holes in mke2fs created
798 # images.
799 if imgdiff:
800 try:
801 patch = compute_patch(src_file, tgt_file, imgdiff=False)
802 message.append(
803 "Fell back and generated with bsdiff instead for %s" % (
804 xf.tgt_name,))
805 xf.style = "bsdiff"
806 with lock:
807 warning_messages.extend(message)
808 del message[:]
809 except ValueError as e:
810 message.append(
811 "Also failed to generate with bsdiff for %s:\n%s" % (
812 xf.tgt_name, e.message))
813
814 if message:
815 with lock:
816 error_messages.extend(message)
Tao Bao183e56e2017-03-05 17:05:09 -0800817
818 with lock:
819 patches[patch_index] = (xf_index, patch)
820 if sys.stdout.isatty():
Tao Bao33635b12017-03-12 13:02:51 -0700821 global diff_done
822 diff_done += 1
823 progress = diff_done * 100 / diff_total
Tao Bao183e56e2017-03-05 17:05:09 -0800824 # '\033[K' is to clear to EOL.
825 print(' [%d%%] %s\033[K' % (progress, xf.tgt_name), end='\r')
826 sys.stdout.flush()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700827
828 threads = [threading.Thread(target=diff_worker)
Dan Albert8b72aef2015-03-23 19:13:21 -0700829 for _ in range(self.threads)]
Doug Zongkerfc44a512014-08-26 13:10:25 -0700830 for th in threads:
831 th.start()
832 while threads:
833 threads.pop().join()
Tao Bao183e56e2017-03-05 17:05:09 -0800834
835 if sys.stdout.isatty():
836 print('\n')
Tianjie Xub59c17f2016-10-28 17:55:53 -0700837
Tao Baob937ead2017-10-19 16:51:53 -0700838 if warning_messages:
839 print('WARNING:')
840 print('\n'.join(warning_messages))
841 print('\n\n\n')
842
Tianjie Xub59c17f2016-10-28 17:55:53 -0700843 if error_messages:
Tao Baob937ead2017-10-19 16:51:53 -0700844 print('ERROR:')
Tianjie Xub59c17f2016-10-28 17:55:53 -0700845 print('\n'.join(error_messages))
Tao Baob937ead2017-10-19 16:51:53 -0700846 print('\n\n\n')
Tianjie Xub59c17f2016-10-28 17:55:53 -0700847 sys.exit(1)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700848 else:
849 patches = []
850
Tao Bao183e56e2017-03-05 17:05:09 -0800851 offset = 0
852 with open(prefix + ".patch.dat", "wb") as patch_fd:
853 for index, patch in patches:
854 xf = self.transfers[index]
Doug Zongkerfc44a512014-08-26 13:10:25 -0700855 xf.patch_len = len(patch)
Tao Bao183e56e2017-03-05 17:05:09 -0800856 xf.patch_start = offset
857 offset += xf.patch_len
858 patch_fd.write(patch)
859
860 if common.OPTIONS.verbose:
861 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
862 print("%10d %10d (%6.2f%%) %7s %s %s %s" % (
863 xf.patch_len, tgt_size, xf.patch_len * 100.0 / tgt_size,
864 xf.style,
865 xf.tgt_name if xf.tgt_name == xf.src_name else (
866 xf.tgt_name + " (from " + xf.src_name + ")"),
867 xf.tgt_ranges, xf.src_ranges))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700868
Tianjie Xu8a7ed9f2018-01-23 14:06:11 -0800869 def AssertSha1Good(self):
870 """Check the SHA-1 of the src & tgt blocks in the transfer list.
871
872 Double check the SHA-1 value to avoid the issue in b/71908713, where
873 SparseImage.RangeSha1() messed up with the hash calculation in multi-thread
874 environment. That specific problem has been fixed by protecting the
875 underlying generator function 'SparseImage._GetRangeData()' with lock.
876 """
877 for xf in self.transfers:
878 tgt_sha1 = self.tgt.RangeSha1(xf.tgt_ranges)
879 assert xf.tgt_sha1 == tgt_sha1
880 if xf.style == "diff":
881 src_sha1 = self.src.RangeSha1(xf.src_ranges)
882 assert xf.src_sha1 == src_sha1
883
Doug Zongkerfc44a512014-08-26 13:10:25 -0700884 def AssertSequenceGood(self):
885 # Simulate the sequences of transfers we will output, and check that:
886 # - we never read a block after writing it, and
887 # - we write every block we care about exactly once.
888
889 # Start with no blocks having been touched yet.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800890 touched = array.array("B", "\0" * self.tgt.total_blocks)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700891
892 # Imagine processing the transfers in order.
893 for xf in self.transfers:
894 # Check that the input blocks for this transfer haven't yet been touched.
Doug Zongker62338182014-09-08 08:29:55 -0700895
896 x = xf.src_ranges
Tao Bao8fad03e2017-03-01 14:36:26 -0800897 for _, sr in xf.use_stash:
898 x = x.subtract(sr)
Doug Zongker62338182014-09-08 08:29:55 -0700899
Doug Zongker6ab2a502016-02-09 08:28:09 -0800900 for s, e in x:
Tao Baoff75c232016-03-04 15:23:34 -0800901 # Source image could be larger. Don't check the blocks that are in the
902 # source image only. Since they are not in 'touched', and won't ever
903 # be touched.
904 for i in range(s, min(e, self.tgt.total_blocks)):
Doug Zongker6ab2a502016-02-09 08:28:09 -0800905 assert touched[i] == 0
906
907 # Check that the output blocks for this transfer haven't yet
908 # been touched, and touch all the blocks written by this
909 # transfer.
910 for s, e in xf.tgt_ranges:
911 for i in range(s, e):
912 assert touched[i] == 0
913 touched[i] = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700914
915 # Check that we've written every target block.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800916 for s, e in self.tgt.care_map:
917 for i in range(s, e):
918 assert touched[i] == 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700919
Doug Zongker62338182014-09-08 08:29:55 -0700920 def ImproveVertexSequence(self):
921 print("Improving vertex order...")
922
923 # At this point our digraph is acyclic; we reversed any edges that
924 # were backwards in the heuristically-generated sequence. The
925 # previously-generated order is still acceptable, but we hope to
926 # find a better order that needs less memory for stashed data.
927 # Now we do a topological sort to generate a new vertex order,
928 # using a greedy algorithm to choose which vertex goes next
929 # whenever we have a choice.
930
931 # Make a copy of the edge set; this copy will get destroyed by the
932 # algorithm.
933 for xf in self.transfers:
934 xf.incoming = xf.goes_after.copy()
935 xf.outgoing = xf.goes_before.copy()
936
937 L = [] # the new vertex order
938
939 # S is the set of sources in the remaining graph; we always choose
940 # the one that leaves the least amount of stashed data after it's
941 # executed.
942 S = [(u.NetStashChange(), u.order, u) for u in self.transfers
943 if not u.incoming]
944 heapq.heapify(S)
945
946 while S:
947 _, _, xf = heapq.heappop(S)
948 L.append(xf)
949 for u in xf.outgoing:
950 del u.incoming[xf]
951 if not u.incoming:
952 heapq.heappush(S, (u.NetStashChange(), u.order, u))
953
954 # if this fails then our graph had a cycle.
955 assert len(L) == len(self.transfers)
956
957 self.transfers = L
958 for i, xf in enumerate(L):
959 xf.order = i
960
Doug Zongkerfc44a512014-08-26 13:10:25 -0700961 def RemoveBackwardEdges(self):
962 print("Removing backward edges...")
963 in_order = 0
964 out_of_order = 0
965 lost_source = 0
966
967 for xf in self.transfers:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700968 lost = 0
969 size = xf.src_ranges.size()
970 for u in xf.goes_before:
971 # xf should go before u
972 if xf.order < u.order:
973 # it does, hurray!
Doug Zongker62338182014-09-08 08:29:55 -0700974 in_order += 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700975 else:
976 # it doesn't, boo. trim the blocks that u writes from xf's
977 # source, so that xf can go after u.
Doug Zongker62338182014-09-08 08:29:55 -0700978 out_of_order += 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700979 assert xf.src_ranges.overlaps(u.tgt_ranges)
980 xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges)
981 xf.intact = False
982
983 if xf.style == "diff" and not xf.src_ranges:
984 # nothing left to diff from; treat as new data
985 xf.style = "new"
986
987 lost = size - xf.src_ranges.size()
988 lost_source += lost
Doug Zongkerfc44a512014-08-26 13:10:25 -0700989
990 print((" %d/%d dependencies (%.2f%%) were violated; "
991 "%d source blocks removed.") %
992 (out_of_order, in_order + out_of_order,
993 (out_of_order * 100.0 / (in_order + out_of_order))
994 if (in_order + out_of_order) else 0.0,
995 lost_source))
996
Doug Zongker62338182014-09-08 08:29:55 -0700997 def ReverseBackwardEdges(self):
Tao Bao3a2e3502016-12-28 09:14:39 -0800998 """Reverse unsatisfying edges and compute pairs of stashed blocks.
999
1000 For each transfer, make sure it properly stashes the blocks it touches and
1001 will be used by later transfers. It uses pairs of (stash_raw_id, range) to
1002 record the blocks to be stashed. 'stash_raw_id' is an id that uniquely
1003 identifies each pair. Note that for the same range (e.g. RangeSet("1-5")),
1004 it is possible to have multiple pairs with different 'stash_raw_id's. Each
1005 'stash_raw_id' will be consumed by one transfer. In BBOTA v3+, identical
1006 blocks will be written to the same stash slot in WriteTransfers().
1007 """
1008
Doug Zongker62338182014-09-08 08:29:55 -07001009 print("Reversing backward edges...")
1010 in_order = 0
1011 out_of_order = 0
Tao Bao3a2e3502016-12-28 09:14:39 -08001012 stash_raw_id = 0
Doug Zongker62338182014-09-08 08:29:55 -07001013 stash_size = 0
1014
1015 for xf in self.transfers:
Doug Zongker62338182014-09-08 08:29:55 -07001016 for u in xf.goes_before.copy():
1017 # xf should go before u
1018 if xf.order < u.order:
1019 # it does, hurray!
1020 in_order += 1
1021 else:
1022 # it doesn't, boo. modify u to stash the blocks that it
1023 # writes that xf wants to read, and then require u to go
1024 # before xf.
1025 out_of_order += 1
1026
1027 overlap = xf.src_ranges.intersect(u.tgt_ranges)
1028 assert overlap
1029
Tao Bao3a2e3502016-12-28 09:14:39 -08001030 u.stash_before.append((stash_raw_id, overlap))
1031 xf.use_stash.append((stash_raw_id, overlap))
1032 stash_raw_id += 1
Doug Zongker62338182014-09-08 08:29:55 -07001033 stash_size += overlap.size()
1034
1035 # reverse the edge direction; now xf must go after u
1036 del xf.goes_before[u]
1037 del u.goes_after[xf]
1038 xf.goes_after[u] = None # value doesn't matter
1039 u.goes_before[xf] = None
1040
1041 print((" %d/%d dependencies (%.2f%%) were violated; "
1042 "%d source blocks stashed.") %
1043 (out_of_order, in_order + out_of_order,
1044 (out_of_order * 100.0 / (in_order + out_of_order))
1045 if (in_order + out_of_order) else 0.0,
1046 stash_size))
1047
Doug Zongkerfc44a512014-08-26 13:10:25 -07001048 def FindVertexSequence(self):
1049 print("Finding vertex sequence...")
1050
1051 # This is based on "A Fast & Effective Heuristic for the Feedback
1052 # Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
1053 # it as starting with the digraph G and moving all the vertices to
1054 # be on a horizontal line in some order, trying to minimize the
1055 # number of edges that end up pointing to the left. Left-pointing
1056 # edges will get removed to turn the digraph into a DAG. In this
1057 # case each edge has a weight which is the number of source blocks
1058 # we'll lose if that edge is removed; we try to minimize the total
1059 # weight rather than just the number of edges.
1060
1061 # Make a copy of the edge set; this copy will get destroyed by the
1062 # algorithm.
1063 for xf in self.transfers:
1064 xf.incoming = xf.goes_after.copy()
1065 xf.outgoing = xf.goes_before.copy()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001066 xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())
Doug Zongkerfc44a512014-08-26 13:10:25 -07001067
1068 # We use an OrderedDict instead of just a set so that the output
1069 # is repeatable; otherwise it would depend on the hash values of
1070 # the transfer objects.
1071 G = OrderedDict()
1072 for xf in self.transfers:
1073 G[xf] = None
1074 s1 = deque() # the left side of the sequence, built from left to right
1075 s2 = deque() # the right side of the sequence, built from right to left
1076
Doug Zongker6ab2a502016-02-09 08:28:09 -08001077 heap = []
1078 for xf in self.transfers:
1079 xf.heap_item = HeapItem(xf)
1080 heap.append(xf.heap_item)
1081 heapq.heapify(heap)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001082
Tao Bao33482282016-10-24 16:49:08 -07001083 # Use OrderedDict() instead of set() to preserve the insertion order. Need
1084 # to use 'sinks[key] = None' to add key into the set. sinks will look like
1085 # { key1: None, key2: None, ... }.
1086 sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing)
1087 sources = OrderedDict.fromkeys(u for u in G if not u.incoming)
Doug Zongker6ab2a502016-02-09 08:28:09 -08001088
1089 def adjust_score(iu, delta):
1090 iu.score += delta
1091 iu.heap_item.clear()
1092 iu.heap_item = HeapItem(iu)
1093 heapq.heappush(heap, iu.heap_item)
1094
1095 while G:
Doug Zongkerfc44a512014-08-26 13:10:25 -07001096 # Put all sinks at the end of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001097 while sinks:
Tao Bao33482282016-10-24 16:49:08 -07001098 new_sinks = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001099 for u in sinks:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001100 if u not in G: continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001101 s2.appendleft(u)
1102 del G[u]
1103 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001104 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001105 if not iu.outgoing:
1106 new_sinks[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001107 sinks = new_sinks
Doug Zongkerfc44a512014-08-26 13:10:25 -07001108
1109 # Put all the sources at the beginning of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001110 while sources:
Tao Bao33482282016-10-24 16:49:08 -07001111 new_sources = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001112 for u in sources:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001113 if u not in G: continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001114 s1.append(u)
1115 del G[u]
1116 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001117 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001118 if not iu.incoming:
1119 new_sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001120 sources = new_sources
Doug Zongkerfc44a512014-08-26 13:10:25 -07001121
Doug Zongker6ab2a502016-02-09 08:28:09 -08001122 if not G: break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001123
1124 # Find the "best" vertex to put next. "Best" is the one that
1125 # maximizes the net difference in source blocks saved we get by
1126 # pretending it's a source rather than a sink.
1127
Doug Zongker6ab2a502016-02-09 08:28:09 -08001128 while True:
1129 u = heapq.heappop(heap)
1130 if u and u.item in G:
1131 u = u.item
1132 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001133
Doug Zongkerfc44a512014-08-26 13:10:25 -07001134 s1.append(u)
1135 del G[u]
1136 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001137 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001138 if not iu.incoming:
1139 sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001140
Doug Zongkerfc44a512014-08-26 13:10:25 -07001141 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001142 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001143 if not iu.outgoing:
1144 sinks[iu] = None
Doug Zongkerfc44a512014-08-26 13:10:25 -07001145
1146 # Now record the sequence in the 'order' field of each transfer,
1147 # and by rearranging self.transfers to be in the chosen sequence.
1148
1149 new_transfers = []
1150 for x in itertools.chain(s1, s2):
1151 x.order = len(new_transfers)
1152 new_transfers.append(x)
1153 del x.incoming
1154 del x.outgoing
1155
1156 self.transfers = new_transfers
1157
1158 def GenerateDigraph(self):
1159 print("Generating digraph...")
Doug Zongker6ab2a502016-02-09 08:28:09 -08001160
1161 # Each item of source_ranges will be:
1162 # - None, if that block is not used as a source,
Tao Bao33482282016-10-24 16:49:08 -07001163 # - an ordered set of transfers.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001164 source_ranges = []
1165 for b in self.transfers:
1166 for s, e in b.src_ranges:
1167 if e > len(source_ranges):
1168 source_ranges.extend([None] * (e-len(source_ranges)))
1169 for i in range(s, e):
1170 if source_ranges[i] is None:
Tao Bao33482282016-10-24 16:49:08 -07001171 source_ranges[i] = OrderedDict.fromkeys([b])
Doug Zongker6ab2a502016-02-09 08:28:09 -08001172 else:
Tao Bao33482282016-10-24 16:49:08 -07001173 source_ranges[i][b] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001174
Doug Zongkerfc44a512014-08-26 13:10:25 -07001175 for a in self.transfers:
Tao Bao33482282016-10-24 16:49:08 -07001176 intersections = OrderedDict()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001177 for s, e in a.tgt_ranges:
1178 for i in range(s, e):
1179 if i >= len(source_ranges): break
Tao Bao33482282016-10-24 16:49:08 -07001180 # Add all the Transfers in source_ranges[i] to the (ordered) set.
1181 if source_ranges[i] is not None:
1182 for j in source_ranges[i]:
1183 intersections[j] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001184
1185 for b in intersections:
1186 if a is b: continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001187
1188 # If the blocks written by A are read by B, then B needs to go before A.
1189 i = a.tgt_ranges.intersect(b.src_ranges)
1190 if i:
Doug Zongkerab7ca1d2014-08-26 10:40:28 -07001191 if b.src_name == "__ZERO":
1192 # the cost of removing source blocks for the __ZERO domain
1193 # is (nearly) zero.
1194 size = 0
1195 else:
1196 size = i.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001197 b.goes_before[a] = size
1198 a.goes_after[b] = size
1199
1200 def FindTransfers(self):
Tao Bao9a5caf22015-08-25 15:10:10 -07001201 """Parse the file_map to generate all the transfers."""
1202
Tianjie Xu41390d42017-11-22 11:35:18 -08001203 def AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
1204 src_ranges, style, by_id):
1205 """Add one or multiple Transfer()s by splitting large files.
1206
1207 For BBOTA v3, we need to stash source blocks for resumable feature.
1208 However, with the growth of file size and the shrink of the cache
1209 partition source blocks are too large to be stashed. If a file occupies
1210 too many blocks, we split it into smaller pieces by getting multiple
1211 Transfer()s.
1212
1213 The downside is that after splitting, we may increase the package size
1214 since the split pieces don't align well. According to our experiments,
1215 1/8 of the cache size as the per-piece limit appears to be optimal.
1216 Compared to the fixed 1024-block limit, it reduces the overall package
1217 size by 30% for volantis, and 20% for angler and bullhead."""
1218
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001219 pieces = 0
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001220 while (tgt_ranges.size() > max_blocks_per_transfer and
1221 src_ranges.size() > max_blocks_per_transfer):
Tao Bao9a5caf22015-08-25 15:10:10 -07001222 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1223 src_split_name = "%s-%d" % (src_name, pieces)
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001224 tgt_first = tgt_ranges.first(max_blocks_per_transfer)
1225 src_first = src_ranges.first(max_blocks_per_transfer)
1226
Tao Bao183e56e2017-03-05 17:05:09 -08001227 Transfer(tgt_split_name, src_split_name, tgt_first, src_first,
1228 self.tgt.RangeSha1(tgt_first), self.src.RangeSha1(src_first),
1229 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001230
1231 tgt_ranges = tgt_ranges.subtract(tgt_first)
1232 src_ranges = src_ranges.subtract(src_first)
1233 pieces += 1
1234
1235 # Handle remaining blocks.
1236 if tgt_ranges.size() or src_ranges.size():
1237 # Must be both non-empty.
1238 assert tgt_ranges.size() and src_ranges.size()
1239 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1240 src_split_name = "%s-%d" % (src_name, pieces)
Tao Bao183e56e2017-03-05 17:05:09 -08001241 Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges,
1242 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1243 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001244
Tianjie Xu41390d42017-11-22 11:35:18 -08001245 def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges, style,
1246 by_id):
1247 """Find all the zip files and split the others with a fixed chunk size.
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001248
Tianjie Xu41390d42017-11-22 11:35:18 -08001249 This function will construct a list of zip archives, which will later be
1250 split by imgdiff to reduce the final patch size. For the other files,
1251 we will plainly split them based on a fixed chunk size with the potential
1252 patch size penalty.
1253 """
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001254
1255 assert style == "diff"
1256
1257 # Change nothing for small files.
1258 if (tgt_ranges.size() <= max_blocks_per_transfer and
1259 src_ranges.size() <= max_blocks_per_transfer):
1260 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1261 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1262 style, by_id)
1263 return
1264
1265 if tgt_name.split(".")[-1].lower() in ("apk", "jar", "zip"):
1266 split_enable = (not self.disable_imgdiff and src_ranges.monotonic and
1267 tgt_ranges.monotonic)
1268 if split_enable and (self.tgt.RangeSha1(tgt_ranges) !=
1269 self.src.RangeSha1(src_ranges)):
1270 large_apks.append((tgt_name, src_name, tgt_ranges, src_ranges))
1271 return
1272
Tianjie Xu41390d42017-11-22 11:35:18 -08001273 AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
1274 src_ranges, style, by_id)
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001275
Tao Bao08c85832016-09-19 22:26:30 -07001276 def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
1277 split=False):
1278 """Wrapper function for adding a Transfer()."""
1279
1280 # We specialize diff transfers only (which covers bsdiff/imgdiff/move);
1281 # otherwise add the Transfer() as is.
1282 if style != "diff" or not split:
Tao Bao183e56e2017-03-05 17:05:09 -08001283 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1284 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1285 style, by_id)
Tao Bao08c85832016-09-19 22:26:30 -07001286 return
1287
1288 # Handle .odex files specially to analyze the block-wise difference. If
1289 # most of the blocks are identical with only few changes (e.g. header),
1290 # we will patch the changed blocks only. This avoids stashing unchanged
1291 # blocks while patching. We limit the analysis to files without size
1292 # changes only. This is to avoid sacrificing the OTA generation cost too
1293 # much.
1294 if (tgt_name.split(".")[-1].lower() == 'odex' and
1295 tgt_ranges.size() == src_ranges.size()):
1296
1297 # 0.5 threshold can be further tuned. The tradeoff is: if only very
1298 # few blocks remain identical, we lose the opportunity to use imgdiff
1299 # that may have better compression ratio than bsdiff.
1300 crop_threshold = 0.5
1301
1302 tgt_skipped = RangeSet()
1303 src_skipped = RangeSet()
1304 tgt_size = tgt_ranges.size()
1305 tgt_changed = 0
1306 for src_block, tgt_block in zip(src_ranges.next_item(),
1307 tgt_ranges.next_item()):
1308 src_rs = RangeSet(str(src_block))
1309 tgt_rs = RangeSet(str(tgt_block))
1310 if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):
1311 tgt_skipped = tgt_skipped.union(tgt_rs)
1312 src_skipped = src_skipped.union(src_rs)
1313 else:
1314 tgt_changed += tgt_rs.size()
1315
1316 # Terminate early if no clear sign of benefits.
1317 if tgt_changed > tgt_size * crop_threshold:
1318 break
1319
1320 if tgt_changed < tgt_size * crop_threshold:
1321 assert tgt_changed + tgt_skipped.size() == tgt_size
1322 print('%10d %10d (%6.2f%%) %s' % (tgt_skipped.size(), tgt_size,
1323 tgt_skipped.size() * 100.0 / tgt_size, tgt_name))
Tianjie Xu41390d42017-11-22 11:35:18 -08001324 AddSplitTransfers(
Tao Bao08c85832016-09-19 22:26:30 -07001325 "%s-skipped" % (tgt_name,),
1326 "%s-skipped" % (src_name,),
1327 tgt_skipped, src_skipped, style, by_id)
1328
1329 # Intentionally change the file extension to avoid being imgdiff'd as
1330 # the files are no longer in their original format.
1331 tgt_name = "%s-cropped" % (tgt_name,)
1332 src_name = "%s-cropped" % (src_name,)
1333 tgt_ranges = tgt_ranges.subtract(tgt_skipped)
1334 src_ranges = src_ranges.subtract(src_skipped)
1335
1336 # Possibly having no changed blocks.
1337 if not tgt_ranges:
1338 return
1339
1340 # Add the transfer(s).
Tianjie Xu41390d42017-11-22 11:35:18 -08001341 AddSplitTransfers(
Tao Bao08c85832016-09-19 22:26:30 -07001342 tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1343
Tianjie Xu25366072017-09-08 17:19:02 -07001344 def ParseAndValidateSplitInfo(patch_size, tgt_ranges, src_ranges,
1345 split_info):
1346 """Parse the split_info and return a list of info tuples.
1347
1348 Args:
1349 patch_size: total size of the patch file.
1350 tgt_ranges: Ranges of the target file within the original image.
1351 src_ranges: Ranges of the source file within the original image.
1352 split_info format:
1353 imgdiff version#
1354 count of pieces
1355 <patch_size_1> <tgt_size_1> <src_ranges_1>
1356 ...
1357 <patch_size_n> <tgt_size_n> <src_ranges_n>
1358
1359 Returns:
1360 [patch_start, patch_len, split_tgt_ranges, split_src_ranges]
1361 """
1362
1363 version = int(split_info[0])
1364 assert version == 2
1365 count = int(split_info[1])
1366 assert len(split_info) - 2 == count
1367
1368 split_info_list = []
1369 patch_start = 0
1370 tgt_remain = copy.deepcopy(tgt_ranges)
1371 # each line has the format <patch_size>, <tgt_size>, <src_ranges>
1372 for line in split_info[2:]:
1373 info = line.split()
1374 assert len(info) == 3
1375 patch_length = int(info[0])
1376
1377 split_tgt_size = int(info[1])
1378 assert split_tgt_size % 4096 == 0
1379 assert split_tgt_size / 4096 <= tgt_remain.size()
1380 split_tgt_ranges = tgt_remain.first(split_tgt_size / 4096)
1381 tgt_remain = tgt_remain.subtract(split_tgt_ranges)
1382
1383 # Find the split_src_ranges within the image file from its relative
1384 # position in file.
1385 split_src_indices = RangeSet.parse_raw(info[2])
1386 split_src_ranges = RangeSet()
1387 for r in split_src_indices:
1388 curr_range = src_ranges.first(r[1]).subtract(src_ranges.first(r[0]))
1389 assert not split_src_ranges.overlaps(curr_range)
1390 split_src_ranges = split_src_ranges.union(curr_range)
1391
1392 split_info_list.append((patch_start, patch_length,
1393 split_tgt_ranges, split_src_ranges))
1394 patch_start += patch_length
1395
1396 # Check that the sizes of all the split pieces add up to the final file
1397 # size for patch and target.
1398 assert tgt_remain.size() == 0
1399 assert patch_start == patch_size
1400 return split_info_list
1401
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001402 def SplitLargeApks():
1403 """Split the large apks files.
Tianjie Xu25366072017-09-08 17:19:02 -07001404
1405 Example: Chrome.apk will be split into
1406 src-0: Chrome.apk-0, tgt-0: Chrome.apk-0
1407 src-1: Chrome.apk-1, tgt-1: Chrome.apk-1
1408 ...
1409
1410 After the split, the target pieces are continuous and block aligned; and
1411 the source pieces are mutually exclusive. During the split, we also
1412 generate and save the image patch between src-X & tgt-X. This patch will
1413 be valid because the block ranges of src-X & tgt-X will always stay the
1414 same afterwards; but there's a chance we don't use the patch if we
1415 convert the "diff" command into "new" or "move" later.
Tianjie Xu41390d42017-11-22 11:35:18 -08001416
1417 The split will be attempted by calling imgdiff, which expects the input
1418 files to be valid zip archives. If imgdiff fails for some reason (i.e.
1419 holes in the APK file), we will fall back to split the failed APKs into
1420 fixed size chunks.
Tianjie Xu25366072017-09-08 17:19:02 -07001421 """
1422
1423 while True:
1424 with transfer_lock:
1425 if not large_apks:
1426 return
1427 tgt_name, src_name, tgt_ranges, src_ranges = large_apks.pop(0)
1428
1429 src_file = common.MakeTempFile(prefix="src-")
1430 tgt_file = common.MakeTempFile(prefix="tgt-")
Tianjie Xudf1166e2018-01-27 17:35:41 -08001431 with open(src_file, "wb") as src_fd:
1432 self.src.WriteRangeDataToFd(src_ranges, src_fd)
1433 with open(tgt_file, "wb") as tgt_fd:
1434 self.tgt.WriteRangeDataToFd(tgt_ranges, tgt_fd)
Tianjie Xu25366072017-09-08 17:19:02 -07001435
1436 patch_file = common.MakeTempFile(prefix="patch-")
1437 patch_info_file = common.MakeTempFile(prefix="split_info-")
1438 cmd = ["imgdiff", "-z",
1439 "--block-limit={}".format(max_blocks_per_transfer),
1440 "--split-info=" + patch_info_file,
1441 src_file, tgt_file, patch_file]
1442 p = common.Run(cmd, stdout=subprocess.PIPE)
1443 p.communicate()
Tianjie Xu25366072017-09-08 17:19:02 -07001444 if p.returncode != 0:
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001445 print("Failed to create patch between {} and {},"
1446 " falling back to bsdiff".format(src_name, tgt_name))
1447 with transfer_lock:
Tianjie Xu41390d42017-11-22 11:35:18 -08001448 AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name,
1449 tgt_ranges, src_ranges,
1450 "diff", self.transfers)
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001451 continue
Tianjie Xu25366072017-09-08 17:19:02 -07001452
1453 with open(patch_info_file) as patch_info:
1454 lines = patch_info.readlines()
1455
1456 patch_size_total = os.path.getsize(patch_file)
1457 split_info_list = ParseAndValidateSplitInfo(patch_size_total,
1458 tgt_ranges, src_ranges,
1459 lines)
1460 for index, (patch_start, patch_length, split_tgt_ranges,
1461 split_src_ranges) in enumerate(split_info_list):
1462 with open(patch_file) as f:
1463 f.seek(patch_start)
1464 patch_content = f.read(patch_length)
1465
1466 split_src_name = "{}-{}".format(src_name, index)
1467 split_tgt_name = "{}-{}".format(tgt_name, index)
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001468 split_large_apks.append((split_tgt_name,
1469 split_src_name,
1470 split_tgt_ranges,
1471 split_src_ranges,
1472 patch_content))
Tianjie Xu25366072017-09-08 17:19:02 -07001473
Tao Bao08c85832016-09-19 22:26:30 -07001474 print("Finding transfers...")
1475
Tianjie Xu25366072017-09-08 17:19:02 -07001476 large_apks = []
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001477 split_large_apks = []
Tianjie Xu25366072017-09-08 17:19:02 -07001478 cache_size = common.OPTIONS.cache_size
1479 split_threshold = 0.125
1480 max_blocks_per_transfer = int(cache_size * split_threshold /
1481 self.tgt.blocksize)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001482 empty = RangeSet()
Tianjie Xu20a86cd2018-01-12 12:21:00 -08001483 for tgt_fn, tgt_ranges in sorted(self.tgt.file_map.items()):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001484 if tgt_fn == "__ZERO":
1485 # the special "__ZERO" domain is all the blocks not contained
1486 # in any file and that are filled with zeros. We have a
1487 # special transfer style for zero blocks.
1488 src_ranges = self.src.file_map.get("__ZERO", empty)
Tao Bao9a5caf22015-08-25 15:10:10 -07001489 AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
1490 "zero", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001491 continue
1492
Tao Baoff777812015-05-12 11:42:31 -07001493 elif tgt_fn == "__COPY":
1494 # "__COPY" domain includes all the blocks not contained in any
1495 # file and that need to be copied unconditionally to the target.
Tao Bao9a5caf22015-08-25 15:10:10 -07001496 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Tao Baoff777812015-05-12 11:42:31 -07001497 continue
1498
Doug Zongkerfc44a512014-08-26 13:10:25 -07001499 elif tgt_fn in self.src.file_map:
1500 # Look for an exact pathname match in the source.
Tao Bao9a5caf22015-08-25 15:10:10 -07001501 AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001502 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001503 continue
1504
1505 b = os.path.basename(tgt_fn)
1506 if b in self.src_basenames:
1507 # Look for an exact basename match in the source.
1508 src_fn = self.src_basenames[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001509 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001510 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001511 continue
1512
1513 b = re.sub("[0-9]+", "#", b)
1514 if b in self.src_numpatterns:
1515 # Look for a 'number pattern' match (a basename match after
1516 # all runs of digits are replaced by "#"). (This is useful
1517 # for .so files that contain version numbers in the filename
1518 # that get bumped.)
1519 src_fn = self.src_numpatterns[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001520 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001521 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001522 continue
1523
Tao Bao9a5caf22015-08-25 15:10:10 -07001524 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001525
Tianjie Xu25366072017-09-08 17:19:02 -07001526 transfer_lock = threading.Lock()
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001527 threads = [threading.Thread(target=SplitLargeApks)
Tianjie Xu25366072017-09-08 17:19:02 -07001528 for _ in range(self.threads)]
1529 for th in threads:
1530 th.start()
1531 while threads:
1532 threads.pop().join()
1533
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001534 # Sort the split transfers for large apks to generate a determinate package.
1535 split_large_apks.sort()
1536 for (tgt_name, src_name, tgt_ranges, src_ranges,
1537 patch) in split_large_apks:
1538 transfer_split = Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1539 self.tgt.RangeSha1(tgt_ranges),
1540 self.src.RangeSha1(src_ranges),
1541 "diff", self.transfers)
1542 transfer_split.patch = patch
1543
Doug Zongkerfc44a512014-08-26 13:10:25 -07001544 def AbbreviateSourceNames(self):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001545 for k in self.src.file_map.keys():
1546 b = os.path.basename(k)
1547 self.src_basenames[b] = k
1548 b = re.sub("[0-9]+", "#", b)
1549 self.src_numpatterns[b] = k
1550
1551 @staticmethod
1552 def AssertPartition(total, seq):
1553 """Assert that all the RangeSets in 'seq' form a partition of the
1554 'total' RangeSet (ie, they are nonintersecting and their union
1555 equals 'total')."""
Doug Zongker6ab2a502016-02-09 08:28:09 -08001556
Doug Zongkerfc44a512014-08-26 13:10:25 -07001557 so_far = RangeSet()
1558 for i in seq:
1559 assert not so_far.overlaps(i)
1560 so_far = so_far.union(i)
1561 assert so_far == total