blob: 931026b11992a494816d1ead75759322bbc995ee [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015from __future__ import print_function
16
Doug Zongker6ab2a502016-02-09 08:28:09 -080017import array
Tao Bao8dcf7382015-05-21 14:09:49 -070018import common
Tianjie Xu25366072017-09-08 17:19:02 -070019import copy
Doug Zongker6ab2a502016-02-09 08:28:09 -080020import functools
Doug Zongker62338182014-09-08 08:29:55 -070021import heapq
Doug Zongkerfc44a512014-08-26 13:10:25 -070022import itertools
23import multiprocessing
24import os
Tao Bao3a2e3502016-12-28 09:14:39 -080025import os.path
Doug Zongkerfc44a512014-08-26 13:10:25 -070026import re
27import subprocess
Tao Bao183e56e2017-03-05 17:05:09 -080028import sys
Doug Zongkerfc44a512014-08-26 13:10:25 -070029import threading
Doug Zongkerfc44a512014-08-26 13:10:25 -070030
Tao Bao3a2e3502016-12-28 09:14:39 -080031from collections import deque, OrderedDict
32from hashlib import sha1
Dan Albert8b72aef2015-03-23 19:13:21 -070033from rangelib import RangeSet
34
Doug Zongkerfc44a512014-08-26 13:10:25 -070035
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070036__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
37
Dan Albert8b72aef2015-03-23 19:13:21 -070038
Tao Bao183e56e2017-03-05 17:05:09 -080039def compute_patch(srcfile, tgtfile, imgdiff=False):
Tianjie Xub59c17f2016-10-28 17:55:53 -070040 patchfile = common.MakeTempFile(prefix='patch-')
Doug Zongkerfc44a512014-08-26 13:10:25 -070041
Tianjie Xub59c17f2016-10-28 17:55:53 -070042 cmd = ['imgdiff', '-z'] if imgdiff else ['bsdiff']
43 cmd.extend([srcfile, tgtfile, patchfile])
Doug Zongkerfc44a512014-08-26 13:10:25 -070044
Tao Bao39451582017-05-04 11:10:47 -070045 # Don't dump the bsdiff/imgdiff commands, which are not useful for the case
46 # here, since they contain temp filenames only.
47 p = common.Run(cmd, verbose=False, stdout=subprocess.PIPE,
48 stderr=subprocess.STDOUT)
Tianjie Xub59c17f2016-10-28 17:55:53 -070049 output, _ = p.communicate()
Doug Zongkerfc44a512014-08-26 13:10:25 -070050
Tianjie Xub59c17f2016-10-28 17:55:53 -070051 if p.returncode != 0:
52 raise ValueError(output)
53
54 with open(patchfile, 'rb') as f:
Tao Bao183e56e2017-03-05 17:05:09 -080055 return f.read()
Doug Zongkerfc44a512014-08-26 13:10:25 -070056
Dan Albert8b72aef2015-03-23 19:13:21 -070057
58class Image(object):
Tao Bao183e56e2017-03-05 17:05:09 -080059 def RangeSha1(self, ranges):
60 raise NotImplementedError
61
Dan Albert8b72aef2015-03-23 19:13:21 -070062 def ReadRangeSet(self, ranges):
63 raise NotImplementedError
64
Tao Bao68658c02015-06-01 13:40:49 -070065 def TotalSha1(self, include_clobbered_blocks=False):
Dan Albert8b72aef2015-03-23 19:13:21 -070066 raise NotImplementedError
67
Tao Bao183e56e2017-03-05 17:05:09 -080068 def WriteRangeDataToFd(self, ranges, fd):
69 raise NotImplementedError
70
Dan Albert8b72aef2015-03-23 19:13:21 -070071
72class EmptyImage(Image):
Doug Zongkerfc44a512014-08-26 13:10:25 -070073 """A zero-length image."""
Tao Bao183e56e2017-03-05 17:05:09 -080074
75 def __init__(self):
76 self.blocksize = 4096
77 self.care_map = RangeSet()
78 self.clobbered_blocks = RangeSet()
79 self.extended = RangeSet()
80 self.total_blocks = 0
81 self.file_map = {}
82
83 def RangeSha1(self, ranges):
84 return sha1().hexdigest()
85
Doug Zongkerfc44a512014-08-26 13:10:25 -070086 def ReadRangeSet(self, ranges):
87 return ()
Tao Bao183e56e2017-03-05 17:05:09 -080088
Tao Bao68658c02015-06-01 13:40:49 -070089 def TotalSha1(self, include_clobbered_blocks=False):
90 # EmptyImage always carries empty clobbered_blocks, so
91 # include_clobbered_blocks can be ignored.
92 assert self.clobbered_blocks.size() == 0
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070093 return sha1().hexdigest()
94
Tao Bao183e56e2017-03-05 17:05:09 -080095 def WriteRangeDataToFd(self, ranges, fd):
96 raise ValueError("Can't write data from EmptyImage to file")
97
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070098
Dan Albert8b72aef2015-03-23 19:13:21 -070099class DataImage(Image):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700100 """An image wrapped around a single string of data."""
101
102 def __init__(self, data, trim=False, pad=False):
103 self.data = data
104 self.blocksize = 4096
105
106 assert not (trim and pad)
107
108 partial = len(self.data) % self.blocksize
Tao Bao7589e962015-09-05 20:35:32 -0700109 padded = False
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700110 if partial > 0:
111 if trim:
112 self.data = self.data[:-partial]
113 elif pad:
114 self.data += '\0' * (self.blocksize - partial)
Tao Bao7589e962015-09-05 20:35:32 -0700115 padded = True
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700116 else:
117 raise ValueError(("data for DataImage must be multiple of %d bytes "
118 "unless trim or pad is specified") %
119 (self.blocksize,))
120
121 assert len(self.data) % self.blocksize == 0
122
123 self.total_blocks = len(self.data) / self.blocksize
124 self.care_map = RangeSet(data=(0, self.total_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700125 # When the last block is padded, we always write the whole block even for
126 # incremental OTAs. Because otherwise the last block may get skipped if
127 # unchanged for an incremental, but would fail the post-install
128 # verification if it has non-zero contents in the padding bytes.
129 # Bug: 23828506
130 if padded:
Tao Bao42206c32015-09-08 13:39:40 -0700131 clobbered_blocks = [self.total_blocks-1, self.total_blocks]
Tao Bao7589e962015-09-05 20:35:32 -0700132 else:
Tao Bao42206c32015-09-08 13:39:40 -0700133 clobbered_blocks = []
134 self.clobbered_blocks = clobbered_blocks
Tao Baoe9b61912015-07-09 17:37:49 -0700135 self.extended = RangeSet()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700136
137 zero_blocks = []
138 nonzero_blocks = []
139 reference = '\0' * self.blocksize
140
Tao Bao7589e962015-09-05 20:35:32 -0700141 for i in range(self.total_blocks-1 if padded else self.total_blocks):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700142 d = self.data[i*self.blocksize : (i+1)*self.blocksize]
143 if d == reference:
144 zero_blocks.append(i)
145 zero_blocks.append(i+1)
146 else:
147 nonzero_blocks.append(i)
148 nonzero_blocks.append(i+1)
149
Tao Bao42206c32015-09-08 13:39:40 -0700150 assert zero_blocks or nonzero_blocks or clobbered_blocks
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700151
Tao Bao42206c32015-09-08 13:39:40 -0700152 self.file_map = dict()
153 if zero_blocks:
154 self.file_map["__ZERO"] = RangeSet(data=zero_blocks)
155 if nonzero_blocks:
156 self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)
157 if clobbered_blocks:
158 self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)
Tao Bao7589e962015-09-05 20:35:32 -0700159
Tao Bao183e56e2017-03-05 17:05:09 -0800160 def _GetRangeData(self, ranges):
161 for s, e in ranges:
162 yield self.data[s*self.blocksize:e*self.blocksize]
163
164 def RangeSha1(self, ranges):
165 h = sha1()
166 for data in self._GetRangeData(ranges):
167 h.update(data)
168 return h.hexdigest()
169
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700170 def ReadRangeSet(self, ranges):
Tao Bao183e56e2017-03-05 17:05:09 -0800171 return [self._GetRangeData(ranges)]
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700172
Tao Bao68658c02015-06-01 13:40:49 -0700173 def TotalSha1(self, include_clobbered_blocks=False):
Tao Bao7589e962015-09-05 20:35:32 -0700174 if not include_clobbered_blocks:
Tao Bao183e56e2017-03-05 17:05:09 -0800175 return self.RangeSha1(self.care_map.subtract(self.clobbered_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700176 else:
177 return sha1(self.data).hexdigest()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700178
Tao Bao183e56e2017-03-05 17:05:09 -0800179 def WriteRangeDataToFd(self, ranges, fd):
180 for data in self._GetRangeData(ranges):
181 fd.write(data)
182
Doug Zongkerfc44a512014-08-26 13:10:25 -0700183
184class Transfer(object):
Tao Bao183e56e2017-03-05 17:05:09 -0800185 def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, tgt_sha1,
186 src_sha1, style, by_id):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700187 self.tgt_name = tgt_name
188 self.src_name = src_name
189 self.tgt_ranges = tgt_ranges
190 self.src_ranges = src_ranges
Tao Bao183e56e2017-03-05 17:05:09 -0800191 self.tgt_sha1 = tgt_sha1
192 self.src_sha1 = src_sha1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700193 self.style = style
Tao Baofe97dbd2018-02-06 15:01:54 -0800194 self.intact = tgt_ranges.monotonic and src_ranges.monotonic
Tao Baob8c87172015-03-19 19:42:12 -0700195
196 # We use OrderedDict rather than dict so that the output is repeatable;
197 # otherwise it would depend on the hash values of the Transfer objects.
198 self.goes_before = OrderedDict()
199 self.goes_after = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700200
Doug Zongker62338182014-09-08 08:29:55 -0700201 self.stash_before = []
202 self.use_stash = []
203
Doug Zongkerfc44a512014-08-26 13:10:25 -0700204 self.id = len(by_id)
205 by_id.append(self)
206
Tianjie Xu25366072017-09-08 17:19:02 -0700207 self._patch = None
208
209 @property
210 def patch(self):
211 return self._patch
212
213 @patch.setter
214 def patch(self, patch):
215 if patch:
216 assert self.style == "diff"
217 self._patch = patch
218
Doug Zongker62338182014-09-08 08:29:55 -0700219 def NetStashChange(self):
220 return (sum(sr.size() for (_, sr) in self.stash_before) -
221 sum(sr.size() for (_, sr) in self.use_stash))
222
Tao Bao82c47982015-08-17 09:45:13 -0700223 def ConvertToNew(self):
224 assert self.style != "new"
225 self.use_stash = []
226 self.style = "new"
227 self.src_ranges = RangeSet()
Tianjie Xu25366072017-09-08 17:19:02 -0700228 self.patch = None
Tao Bao82c47982015-08-17 09:45:13 -0700229
Doug Zongkerfc44a512014-08-26 13:10:25 -0700230 def __str__(self):
231 return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
232 " to " + str(self.tgt_ranges) + ">")
233
234
Doug Zongker6ab2a502016-02-09 08:28:09 -0800235@functools.total_ordering
236class HeapItem(object):
237 def __init__(self, item):
238 self.item = item
Tao Bao186ec992017-12-23 11:50:52 -0800239 # Negate the score since python's heap is a min-heap and we want the
240 # maximum score.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800241 self.score = -item.score
Tao Bao186ec992017-12-23 11:50:52 -0800242
Doug Zongker6ab2a502016-02-09 08:28:09 -0800243 def clear(self):
244 self.item = None
Tao Bao186ec992017-12-23 11:50:52 -0800245
Doug Zongker6ab2a502016-02-09 08:28:09 -0800246 def __bool__(self):
Tao Bao186ec992017-12-23 11:50:52 -0800247 return self.item is not None
248
249 # Python 2 uses __nonzero__, while Python 3 uses __bool__.
250 __nonzero__ = __bool__
251
252 # The rest operations are generated by functools.total_ordering decorator.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800253 def __eq__(self, other):
254 return self.score == other.score
Tao Bao186ec992017-12-23 11:50:52 -0800255
Doug Zongker6ab2a502016-02-09 08:28:09 -0800256 def __le__(self, other):
257 return self.score <= other.score
258
259
Doug Zongkerfc44a512014-08-26 13:10:25 -0700260# BlockImageDiff works on two image objects. An image object is
261# anything that provides the following attributes:
262#
263# blocksize: the size in bytes of a block, currently must be 4096.
264#
265# total_blocks: the total size of the partition/image, in blocks.
266#
267# care_map: a RangeSet containing which blocks (in the range [0,
268# total_blocks) we actually care about; i.e. which blocks contain
269# data.
270#
271# file_map: a dict that partitions the blocks contained in care_map
272# into smaller domains that are useful for doing diffs on.
273# (Typically a domain is a file, and the key in file_map is the
274# pathname.)
275#
Tao Baoff777812015-05-12 11:42:31 -0700276# clobbered_blocks: a RangeSet containing which blocks contain data
277# but may be altered by the FS. They need to be excluded when
278# verifying the partition integrity.
279#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700280# ReadRangeSet(): a function that takes a RangeSet and returns the
281# data contained in the image blocks of that RangeSet. The data
282# is returned as a list or tuple of strings; concatenating the
283# elements together should produce the requested data.
284# Implementations are free to break up the data into list/tuple
285# elements in any way that is convenient.
286#
Tao Bao183e56e2017-03-05 17:05:09 -0800287# RangeSha1(): a function that returns (as a hex string) the SHA-1
288# hash of all the data in the specified range.
289#
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700290# TotalSha1(): a function that returns (as a hex string) the SHA-1
291# hash of all the data in the image (ie, all the blocks in the
Tao Bao68658c02015-06-01 13:40:49 -0700292# care_map minus clobbered_blocks, or including the clobbered
293# blocks if include_clobbered_blocks is True).
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700294#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700295# When creating a BlockImageDiff, the src image may be None, in which
296# case the list of transfers produced will never read from the
297# original image.
298
299class BlockImageDiff(object):
Tao Bao293fd132016-06-11 12:19:23 -0700300 def __init__(self, tgt, src=None, threads=None, version=4,
301 disable_imgdiff=False):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700302 if threads is None:
303 threads = multiprocessing.cpu_count() // 2
Dan Albert8b72aef2015-03-23 19:13:21 -0700304 if threads == 0:
305 threads = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700306 self.threads = threads
Doug Zongker62338182014-09-08 08:29:55 -0700307 self.version = version
Dan Albert8b72aef2015-03-23 19:13:21 -0700308 self.transfers = []
309 self.src_basenames = {}
310 self.src_numpatterns = {}
Tao Baob4cfca52016-02-04 14:26:02 -0800311 self._max_stashed_size = 0
Tao Baod522bdc2016-04-12 15:53:16 -0700312 self.touched_src_ranges = RangeSet()
313 self.touched_src_sha1 = None
Tao Bao293fd132016-06-11 12:19:23 -0700314 self.disable_imgdiff = disable_imgdiff
Doug Zongker62338182014-09-08 08:29:55 -0700315
Tao Bao8fad03e2017-03-01 14:36:26 -0800316 assert version in (3, 4)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700317
318 self.tgt = tgt
319 if src is None:
320 src = EmptyImage()
321 self.src = src
322
323 # The updater code that installs the patch always uses 4k blocks.
324 assert tgt.blocksize == 4096
325 assert src.blocksize == 4096
326
327 # The range sets in each filemap should comprise a partition of
328 # the care map.
329 self.AssertPartition(src.care_map, src.file_map.values())
330 self.AssertPartition(tgt.care_map, tgt.file_map.values())
331
Tao Baob4cfca52016-02-04 14:26:02 -0800332 @property
333 def max_stashed_size(self):
334 return self._max_stashed_size
335
Doug Zongkerfc44a512014-08-26 13:10:25 -0700336 def Compute(self, prefix):
337 # When looking for a source file to use as the diff input for a
338 # target file, we try:
339 # 1) an exact path match if available, otherwise
340 # 2) a exact basename match if available, otherwise
341 # 3) a basename match after all runs of digits are replaced by
342 # "#" if available, otherwise
343 # 4) we have no source for this target.
344 self.AbbreviateSourceNames()
345 self.FindTransfers()
346
347 # Find the ordering dependencies among transfers (this is O(n^2)
348 # in the number of transfers).
349 self.GenerateDigraph()
350 # Find a sequence of transfers that satisfies as many ordering
351 # dependencies as possible (heuristically).
352 self.FindVertexSequence()
353 # Fix up the ordering dependencies that the sequence didn't
354 # satisfy.
Tao Bao8fad03e2017-03-01 14:36:26 -0800355 self.ReverseBackwardEdges()
356 self.ImproveVertexSequence()
Doug Zongker62338182014-09-08 08:29:55 -0700357
Tao Bao82c47982015-08-17 09:45:13 -0700358 # Ensure the runtime stash size is under the limit.
Tao Bao8fad03e2017-03-01 14:36:26 -0800359 if common.OPTIONS.cache_size is not None:
Tao Bao82c47982015-08-17 09:45:13 -0700360 self.ReviseStashSize()
361
Doug Zongkerfc44a512014-08-26 13:10:25 -0700362 # Double-check our work.
363 self.AssertSequenceGood()
Tianjie Xu8a7ed9f2018-01-23 14:06:11 -0800364 self.AssertSha1Good()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700365
366 self.ComputePatches(prefix)
367 self.WriteTransfers(prefix)
368
369 def WriteTransfers(self, prefix):
Tianjie Xu37e29302016-06-23 16:10:35 -0700370 def WriteSplitTransfers(out, style, target_blocks):
371 """Limit the size of operand in command 'new' and 'zero' to 1024 blocks.
Tianjie Xubb848c52016-06-21 15:54:09 -0700372
373 This prevents the target size of one command from being too large; and
374 might help to avoid fsync errors on some devices."""
375
Tao Bao3a2e3502016-12-28 09:14:39 -0800376 assert style == "new" or style == "zero"
Tianjie Xu37e29302016-06-23 16:10:35 -0700377 blocks_limit = 1024
Tianjie Xubb848c52016-06-21 15:54:09 -0700378 total = 0
Tianjie Xu37e29302016-06-23 16:10:35 -0700379 while target_blocks:
380 blocks_to_write = target_blocks.first(blocks_limit)
381 out.append("%s %s\n" % (style, blocks_to_write.to_string_raw()))
382 total += blocks_to_write.size()
383 target_blocks = target_blocks.subtract(blocks_to_write)
Tianjie Xubb848c52016-06-21 15:54:09 -0700384 return total
385
Doug Zongkerfc44a512014-08-26 13:10:25 -0700386 out = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700387 total = 0
Doug Zongkerfc44a512014-08-26 13:10:25 -0700388
Tao Bao3a2e3502016-12-28 09:14:39 -0800389 # In BBOTA v3+, it uses the hash of the stashed blocks as the stash slot
390 # id. 'stashes' records the map from 'hash' to the ref count. The stash
391 # will be freed only if the count decrements to zero.
Doug Zongker62338182014-09-08 08:29:55 -0700392 stashes = {}
393 stashed_blocks = 0
394 max_stashed_blocks = 0
395
Doug Zongkerfc44a512014-08-26 13:10:25 -0700396 for xf in self.transfers:
397
Tao Bao8fad03e2017-03-01 14:36:26 -0800398 for _, sr in xf.stash_before:
399 sh = self.src.RangeSha1(sr)
400 if sh in stashes:
401 stashes[sh] += 1
Sami Tolvanendd67a292014-12-09 16:40:34 +0000402 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800403 stashes[sh] = 1
404 stashed_blocks += sr.size()
405 self.touched_src_ranges = self.touched_src_ranges.union(sr)
406 out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
Doug Zongker62338182014-09-08 08:29:55 -0700407
408 if stashed_blocks > max_stashed_blocks:
409 max_stashed_blocks = stashed_blocks
410
Jesse Zhao7b985f62015-03-02 16:53:08 -0800411 free_string = []
caozhiyuan21b37d82015-10-21 15:14:03 +0800412 free_size = 0
Jesse Zhao7b985f62015-03-02 16:53:08 -0800413
Tao Bao8fad03e2017-03-01 14:36:26 -0800414 # <# blocks> <src ranges>
415 # OR
416 # <# blocks> <src ranges> <src locs> <stash refs...>
417 # OR
418 # <# blocks> - <stash refs...>
Doug Zongker62338182014-09-08 08:29:55 -0700419
Tao Bao8fad03e2017-03-01 14:36:26 -0800420 size = xf.src_ranges.size()
421 src_str = [str(size)]
Doug Zongker62338182014-09-08 08:29:55 -0700422
Tao Bao8fad03e2017-03-01 14:36:26 -0800423 unstashed_src_ranges = xf.src_ranges
424 mapped_stashes = []
425 for _, sr in xf.use_stash:
426 unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
427 sh = self.src.RangeSha1(sr)
428 sr = xf.src_ranges.map_within(sr)
429 mapped_stashes.append(sr)
430 assert sh in stashes
431 src_str.append("%s:%s" % (sh, sr.to_string_raw()))
432 stashes[sh] -= 1
433 if stashes[sh] == 0:
434 free_string.append("free %s\n" % (sh,))
435 free_size += sr.size()
436 stashes.pop(sh)
Doug Zongker62338182014-09-08 08:29:55 -0700437
Tao Bao8fad03e2017-03-01 14:36:26 -0800438 if unstashed_src_ranges:
439 src_str.insert(1, unstashed_src_ranges.to_string_raw())
440 if xf.use_stash:
441 mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
442 src_str.insert(2, mapped_unstashed.to_string_raw())
443 mapped_stashes.append(mapped_unstashed)
Doug Zongker62338182014-09-08 08:29:55 -0700444 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Tao Bao8fad03e2017-03-01 14:36:26 -0800445 else:
446 src_str.insert(1, "-")
447 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Doug Zongker62338182014-09-08 08:29:55 -0700448
Tao Bao8fad03e2017-03-01 14:36:26 -0800449 src_str = " ".join(src_str)
Doug Zongker62338182014-09-08 08:29:55 -0700450
Tao Bao8fad03e2017-03-01 14:36:26 -0800451 # version 3+:
Doug Zongker62338182014-09-08 08:29:55 -0700452 # zero <rangeset>
453 # new <rangeset>
454 # erase <rangeset>
Dan Albert8b72aef2015-03-23 19:13:21 -0700455 # bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
456 # imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
457 # move hash <tgt rangeset> <src_str>
Doug Zongkerfc44a512014-08-26 13:10:25 -0700458
459 tgt_size = xf.tgt_ranges.size()
460
461 if xf.style == "new":
462 assert xf.tgt_ranges
Tianjie Xu37e29302016-06-23 16:10:35 -0700463 assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700464 total += tgt_size
465 elif xf.style == "move":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700466 assert xf.tgt_ranges
467 assert xf.src_ranges.size() == tgt_size
468 if xf.src_ranges != xf.tgt_ranges:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100469 # take into account automatic stashing of overlapping blocks
470 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700471 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100472 if temp_stash_usage > max_stashed_blocks:
473 max_stashed_blocks = temp_stash_usage
474
Tao Baod522bdc2016-04-12 15:53:16 -0700475 self.touched_src_ranges = self.touched_src_ranges.union(
476 xf.src_ranges)
477
Tao Bao8fad03e2017-03-01 14:36:26 -0800478 out.append("%s %s %s %s\n" % (
Sami Tolvanendd67a292014-12-09 16:40:34 +0000479 xf.style,
Tao Bao183e56e2017-03-05 17:05:09 -0800480 xf.tgt_sha1,
Dan Albert8b72aef2015-03-23 19:13:21 -0700481 xf.tgt_ranges.to_string_raw(), src_str))
Tao Bao8fad03e2017-03-01 14:36:26 -0800482 total += tgt_size
483 elif xf.style in ("bsdiff", "imgdiff"):
484 assert xf.tgt_ranges
485 assert xf.src_ranges
486 # take into account automatic stashing of overlapping blocks
487 if xf.src_ranges.overlaps(xf.tgt_ranges):
488 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
489 if temp_stash_usage > max_stashed_blocks:
490 max_stashed_blocks = temp_stash_usage
491
492 self.touched_src_ranges = self.touched_src_ranges.union(xf.src_ranges)
493
494 out.append("%s %d %d %s %s %s %s\n" % (
495 xf.style,
496 xf.patch_start, xf.patch_len,
497 xf.src_sha1,
498 xf.tgt_sha1,
499 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700500 total += tgt_size
501 elif xf.style == "zero":
502 assert xf.tgt_ranges
503 to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
Tianjie Xu37e29302016-06-23 16:10:35 -0700504 assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size()
Tianjie Xubb848c52016-06-21 15:54:09 -0700505 total += to_zero.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700506 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700507 raise ValueError("unknown transfer style '%s'\n" % xf.style)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700508
Sami Tolvanendd67a292014-12-09 16:40:34 +0000509 if free_string:
510 out.append("".join(free_string))
caozhiyuan21b37d82015-10-21 15:14:03 +0800511 stashed_blocks -= free_size
Sami Tolvanendd67a292014-12-09 16:40:34 +0000512
Tao Bao8fad03e2017-03-01 14:36:26 -0800513 if common.OPTIONS.cache_size is not None:
Tao Bao8dcf7382015-05-21 14:09:49 -0700514 # Sanity check: abort if we're going to need more stash space than
515 # the allowed size (cache_size * threshold). There are two purposes
516 # of having a threshold here. a) Part of the cache may have been
517 # occupied by some recovery logs. b) It will buy us some time to deal
518 # with the oversize issue.
519 cache_size = common.OPTIONS.cache_size
520 stash_threshold = common.OPTIONS.stash_threshold
521 max_allowed = cache_size * stash_threshold
Tao Baoe8c68a02017-02-26 10:48:11 -0800522 assert max_stashed_blocks * self.tgt.blocksize <= max_allowed, \
Tao Bao8dcf7382015-05-21 14:09:49 -0700523 'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
524 max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
525 self.tgt.blocksize, max_allowed, cache_size,
526 stash_threshold)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700527
Tao Bao8fad03e2017-03-01 14:36:26 -0800528 self.touched_src_sha1 = self.src.RangeSha1(self.touched_src_ranges)
Tao Baod522bdc2016-04-12 15:53:16 -0700529
Tao Baoe9b61912015-07-09 17:37:49 -0700530 # Zero out extended blocks as a workaround for bug 20881595.
531 if self.tgt.extended:
Tianjie Xu37e29302016-06-23 16:10:35 -0700532 assert (WriteSplitTransfers(out, "zero", self.tgt.extended) ==
Tianjie Xubb848c52016-06-21 15:54:09 -0700533 self.tgt.extended.size())
Tao Baob32d56e2015-09-09 11:55:01 -0700534 total += self.tgt.extended.size()
Tao Baoe9b61912015-07-09 17:37:49 -0700535
536 # We erase all the blocks on the partition that a) don't contain useful
Tao Bao66f1fa62016-05-03 10:02:01 -0700537 # data in the new image; b) will not be touched by dm-verity. Out of those
538 # blocks, we erase the ones that won't be used in this update at the
539 # beginning of an update. The rest would be erased at the end. This is to
540 # work around the eMMC issue observed on some devices, which may otherwise
541 # get starving for clean blocks and thus fail the update. (b/28347095)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700542 all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
Tao Baoe9b61912015-07-09 17:37:49 -0700543 all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
544 new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
Tao Bao66f1fa62016-05-03 10:02:01 -0700545
546 erase_first = new_dontcare.subtract(self.touched_src_ranges)
547 if erase_first:
548 out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))
549
550 erase_last = new_dontcare.subtract(erase_first)
551 if erase_last:
552 out.append("erase %s\n" % (erase_last.to_string_raw(),))
Doug Zongkere985f6f2014-09-09 12:38:47 -0700553
554 out.insert(0, "%d\n" % (self.version,)) # format version number
Tao Baob32d56e2015-09-09 11:55:01 -0700555 out.insert(1, "%d\n" % (total,))
Tao Bao8fad03e2017-03-01 14:36:26 -0800556 # v3+: the number of stash slots is unused.
557 out.insert(2, "0\n")
558 out.insert(3, str(max_stashed_blocks) + "\n")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700559
560 with open(prefix + ".transfer.list", "wb") as f:
561 for i in out:
562 f.write(i)
563
Tao Bao8fad03e2017-03-01 14:36:26 -0800564 self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize
565 OPTIONS = common.OPTIONS
566 if OPTIONS.cache_size is not None:
567 max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
568 print("max stashed blocks: %d (%d bytes), "
569 "limit: %d bytes (%.2f%%)\n" % (
570 max_stashed_blocks, self._max_stashed_size, max_allowed,
571 self._max_stashed_size * 100.0 / max_allowed))
572 else:
573 print("max stashed blocks: %d (%d bytes), limit: <unknown>\n" % (
574 max_stashed_blocks, self._max_stashed_size))
Doug Zongker62338182014-09-08 08:29:55 -0700575
Tao Bao82c47982015-08-17 09:45:13 -0700576 def ReviseStashSize(self):
577 print("Revising stash size...")
Tao Baoe27acfd2016-12-16 11:13:55 -0800578 stash_map = {}
Tao Bao82c47982015-08-17 09:45:13 -0700579
580 # Create the map between a stash and its def/use points. For example, for a
Tao Bao3c5a16d2017-02-13 11:42:50 -0800581 # given stash of (raw_id, sr), stash_map[raw_id] = (sr, def_cmd, use_cmd).
Tao Bao82c47982015-08-17 09:45:13 -0700582 for xf in self.transfers:
583 # Command xf defines (stores) all the stashes in stash_before.
Tao Bao3a2e3502016-12-28 09:14:39 -0800584 for stash_raw_id, sr in xf.stash_before:
585 stash_map[stash_raw_id] = (sr, xf)
Tao Bao82c47982015-08-17 09:45:13 -0700586
587 # Record all the stashes command xf uses.
Tao Bao3a2e3502016-12-28 09:14:39 -0800588 for stash_raw_id, _ in xf.use_stash:
589 stash_map[stash_raw_id] += (xf,)
Tao Bao82c47982015-08-17 09:45:13 -0700590
591 # Compute the maximum blocks available for stash based on /cache size and
592 # the threshold.
593 cache_size = common.OPTIONS.cache_size
594 stash_threshold = common.OPTIONS.stash_threshold
595 max_allowed = cache_size * stash_threshold / self.tgt.blocksize
596
Tao Bao3a2e3502016-12-28 09:14:39 -0800597 # See the comments for 'stashes' in WriteTransfers().
Tao Baoe27acfd2016-12-16 11:13:55 -0800598 stashes = {}
Tao Bao82c47982015-08-17 09:45:13 -0700599 stashed_blocks = 0
Tao Bao9a5caf22015-08-25 15:10:10 -0700600 new_blocks = 0
Tao Bao82c47982015-08-17 09:45:13 -0700601
602 # Now go through all the commands. Compute the required stash size on the
603 # fly. If a command requires excess stash than available, it deletes the
604 # stash by replacing the command that uses the stash with a "new" command
605 # instead.
606 for xf in self.transfers:
607 replaced_cmds = []
608
609 # xf.stash_before generates explicit stash commands.
Tao Bao3a2e3502016-12-28 09:14:39 -0800610 for stash_raw_id, sr in xf.stash_before:
Tao Baoe27acfd2016-12-16 11:13:55 -0800611 # Check the post-command stashed_blocks.
612 stashed_blocks_after = stashed_blocks
Tao Bao8fad03e2017-03-01 14:36:26 -0800613 sh = self.src.RangeSha1(sr)
614 if sh not in stashes:
Tao Baoe27acfd2016-12-16 11:13:55 -0800615 stashed_blocks_after += sr.size()
Tao Baoe27acfd2016-12-16 11:13:55 -0800616
617 if stashed_blocks_after > max_allowed:
Tao Bao82c47982015-08-17 09:45:13 -0700618 # We cannot stash this one for a later command. Find out the command
619 # that will use this stash and replace the command with "new".
Tao Bao3a2e3502016-12-28 09:14:39 -0800620 use_cmd = stash_map[stash_raw_id][2]
Tao Bao82c47982015-08-17 09:45:13 -0700621 replaced_cmds.append(use_cmd)
Tao Bao9a5caf22015-08-25 15:10:10 -0700622 print("%10d %9s %s" % (sr.size(), "explicit", use_cmd))
Tao Bao82c47982015-08-17 09:45:13 -0700623 else:
Tao Bao3c5a16d2017-02-13 11:42:50 -0800624 # Update the stashes map.
Tao Bao8fad03e2017-03-01 14:36:26 -0800625 if sh in stashes:
626 stashes[sh] += 1
Tao Bao3c5a16d2017-02-13 11:42:50 -0800627 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800628 stashes[sh] = 1
Tao Baoe27acfd2016-12-16 11:13:55 -0800629 stashed_blocks = stashed_blocks_after
Tao Bao82c47982015-08-17 09:45:13 -0700630
631 # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
632 # ComputePatches(), they both have the style of "diff".
Tao Bao8fad03e2017-03-01 14:36:26 -0800633 if xf.style == "diff":
Tao Bao82c47982015-08-17 09:45:13 -0700634 assert xf.tgt_ranges and xf.src_ranges
635 if xf.src_ranges.overlaps(xf.tgt_ranges):
636 if stashed_blocks + xf.src_ranges.size() > max_allowed:
637 replaced_cmds.append(xf)
Tao Bao9a5caf22015-08-25 15:10:10 -0700638 print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf))
Tao Bao82c47982015-08-17 09:45:13 -0700639
640 # Replace the commands in replaced_cmds with "new"s.
641 for cmd in replaced_cmds:
642 # It no longer uses any commands in "use_stash". Remove the def points
643 # for all those stashes.
Tao Bao3a2e3502016-12-28 09:14:39 -0800644 for stash_raw_id, sr in cmd.use_stash:
645 def_cmd = stash_map[stash_raw_id][1]
646 assert (stash_raw_id, sr) in def_cmd.stash_before
647 def_cmd.stash_before.remove((stash_raw_id, sr))
Tao Bao82c47982015-08-17 09:45:13 -0700648
Tianjie Xuebe39a02016-01-14 14:12:26 -0800649 # Add up blocks that violates space limit and print total number to
650 # screen later.
651 new_blocks += cmd.tgt_ranges.size()
Tao Bao82c47982015-08-17 09:45:13 -0700652 cmd.ConvertToNew()
653
Tao Bao3a2e3502016-12-28 09:14:39 -0800654 # xf.use_stash may generate free commands.
Tao Bao8fad03e2017-03-01 14:36:26 -0800655 for _, sr in xf.use_stash:
656 sh = self.src.RangeSha1(sr)
657 assert sh in stashes
658 stashes[sh] -= 1
659 if stashes[sh] == 0:
Tao Baoe27acfd2016-12-16 11:13:55 -0800660 stashed_blocks -= sr.size()
Tao Bao8fad03e2017-03-01 14:36:26 -0800661 stashes.pop(sh)
Tao Baoe27acfd2016-12-16 11:13:55 -0800662
Tianjie Xuebe39a02016-01-14 14:12:26 -0800663 num_of_bytes = new_blocks * self.tgt.blocksize
664 print(" Total %d blocks (%d bytes) are packed as new blocks due to "
665 "insufficient cache size." % (new_blocks, num_of_bytes))
Tao Bao304ee272016-12-19 11:01:38 -0800666 return new_blocks
Tao Bao9a5caf22015-08-25 15:10:10 -0700667
Doug Zongkerfc44a512014-08-26 13:10:25 -0700668 def ComputePatches(self, prefix):
669 print("Reticulating splines...")
Tao Bao183e56e2017-03-05 17:05:09 -0800670 diff_queue = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700671 patch_num = 0
672 with open(prefix + ".new.dat", "wb") as new_f:
Tao Bao183e56e2017-03-05 17:05:09 -0800673 for index, xf in enumerate(self.transfers):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700674 if xf.style == "zero":
Tao Bao08c85832016-09-19 22:26:30 -0700675 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
676 print("%10d %10d (%6.2f%%) %7s %s %s" % (
677 tgt_size, tgt_size, 100.0, xf.style, xf.tgt_name,
678 str(xf.tgt_ranges)))
679
Doug Zongkerfc44a512014-08-26 13:10:25 -0700680 elif xf.style == "new":
Tao Bao183e56e2017-03-05 17:05:09 -0800681 self.tgt.WriteRangeDataToFd(xf.tgt_ranges, new_f)
Tao Bao08c85832016-09-19 22:26:30 -0700682 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
683 print("%10d %10d (%6.2f%%) %7s %s %s" % (
684 tgt_size, tgt_size, 100.0, xf.style,
685 xf.tgt_name, str(xf.tgt_ranges)))
686
Doug Zongkerfc44a512014-08-26 13:10:25 -0700687 elif xf.style == "diff":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700688 # We can't compare src and tgt directly because they may have
689 # the same content but be broken up into blocks differently, eg:
690 #
691 # ["he", "llo"] vs ["h", "ello"]
692 #
693 # We want those to compare equal, ideally without having to
694 # actually concatenate the strings (these may be tens of
695 # megabytes).
Tao Bao183e56e2017-03-05 17:05:09 -0800696 if xf.src_sha1 == xf.tgt_sha1:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700697 # These are identical; we don't need to generate a patch,
698 # just issue copy commands on the device.
699 xf.style = "move"
Tianjie Xu25366072017-09-08 17:19:02 -0700700 xf.patch = None
Tao Bao183e56e2017-03-05 17:05:09 -0800701 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao08c85832016-09-19 22:26:30 -0700702 if xf.src_ranges != xf.tgt_ranges:
703 print("%10d %10d (%6.2f%%) %7s %s %s (from %s)" % (
704 tgt_size, tgt_size, 100.0, xf.style,
705 xf.tgt_name if xf.tgt_name == xf.src_name else (
706 xf.tgt_name + " (from " + xf.src_name + ")"),
707 str(xf.tgt_ranges), str(xf.src_ranges)))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700708 else:
Tianjie Xu25366072017-09-08 17:19:02 -0700709 if xf.patch:
710 # We have already generated the patch with imgdiff. Check if the
711 # transfer is intact.
712 assert not self.disable_imgdiff
713 imgdiff = True
714 if not xf.intact:
715 imgdiff = False
716 xf.patch = None
717 else:
718 # For files in zip format (eg, APKs, JARs, etc.) we would
719 # like to use imgdiff -z if possible (because it usually
720 # produces significantly smaller patches than bsdiff).
721 # This is permissible if:
722 #
723 # - imgdiff is not disabled, and
724 # - the source and target files are monotonic (ie, the
725 # data is stored with blocks in increasing order), and
726 # - we haven't removed any blocks from the source set.
727 #
728 # If these conditions are satisfied then appending all the
729 # blocks in the set together in order will produce a valid
730 # zip file (plus possibly extra zeros in the last block),
731 # which is what imgdiff needs to operate. (imgdiff is
732 # fine with extra zeros at the end of the file.)
733 imgdiff = (not self.disable_imgdiff and xf.intact and
734 xf.tgt_name.split(".")[-1].lower()
735 in ("apk", "jar", "zip"))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700736 xf.style = "imgdiff" if imgdiff else "bsdiff"
Tao Bao183e56e2017-03-05 17:05:09 -0800737 diff_queue.append((index, imgdiff, patch_num))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700738 patch_num += 1
739
740 else:
741 assert False, "unknown style " + xf.style
742
Tao Bao183e56e2017-03-05 17:05:09 -0800743 if diff_queue:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700744 if self.threads > 1:
745 print("Computing patches (using %d threads)..." % (self.threads,))
746 else:
747 print("Computing patches...")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700748
Tao Bao183e56e2017-03-05 17:05:09 -0800749 diff_total = len(diff_queue)
750 patches = [None] * diff_total
Tianjie Xub59c17f2016-10-28 17:55:53 -0700751 error_messages = []
Tao Baob937ead2017-10-19 16:51:53 -0700752 warning_messages = []
Tao Bao33635b12017-03-12 13:02:51 -0700753 if sys.stdout.isatty():
754 global diff_done
755 diff_done = 0
Doug Zongkerfc44a512014-08-26 13:10:25 -0700756
Tao Bao183e56e2017-03-05 17:05:09 -0800757 # Using multiprocessing doesn't give additional benefits, due to the
758 # pattern of the code. The diffing work is done by subprocess.call, which
759 # already runs in a separate process (not affected much by the GIL -
760 # Global Interpreter Lock). Using multiprocess also requires either a)
761 # writing the diff input files in the main process before forking, or b)
762 # reopening the image file (SparseImage) in the worker processes. Doing
763 # neither of them further improves the performance.
Doug Zongkerfc44a512014-08-26 13:10:25 -0700764 lock = threading.Lock()
765 def diff_worker():
766 while True:
767 with lock:
Tao Bao183e56e2017-03-05 17:05:09 -0800768 if not diff_queue:
Dan Albert8b72aef2015-03-23 19:13:21 -0700769 return
Tao Bao183e56e2017-03-05 17:05:09 -0800770 xf_index, imgdiff, patch_index = diff_queue.pop()
771
772 xf = self.transfers[xf_index]
Tianjie Xu25366072017-09-08 17:19:02 -0700773 patch = xf.patch
774 if not patch:
775 src_ranges = xf.src_ranges
776 tgt_ranges = xf.tgt_ranges
Tao Bao183e56e2017-03-05 17:05:09 -0800777
Tianjie Xudf1166e2018-01-27 17:35:41 -0800778 src_file = common.MakeTempFile(prefix="src-")
779 with open(src_file, "wb") as fd:
780 self.src.WriteRangeDataToFd(src_ranges, fd)
Tianjie Xu25366072017-09-08 17:19:02 -0700781
Tianjie Xudf1166e2018-01-27 17:35:41 -0800782 tgt_file = common.MakeTempFile(prefix="tgt-")
783 with open(tgt_file, "wb") as fd:
784 self.tgt.WriteRangeDataToFd(tgt_ranges, fd)
Tianjie Xu25366072017-09-08 17:19:02 -0700785
786 message = []
787 try:
788 patch = compute_patch(src_file, tgt_file, imgdiff)
789 except ValueError as e:
790 message.append(
791 "Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (
792 "imgdiff" if imgdiff else "bsdiff",
793 xf.tgt_name if xf.tgt_name == xf.src_name else
794 xf.tgt_name + " (from " + xf.src_name + ")",
795 xf.tgt_ranges, xf.src_ranges, e.message))
796 # TODO(b/68016761): Better handle the holes in mke2fs created
797 # images.
798 if imgdiff:
799 try:
800 patch = compute_patch(src_file, tgt_file, imgdiff=False)
801 message.append(
802 "Fell back and generated with bsdiff instead for %s" % (
803 xf.tgt_name,))
804 xf.style = "bsdiff"
805 with lock:
806 warning_messages.extend(message)
807 del message[:]
808 except ValueError as e:
809 message.append(
810 "Also failed to generate with bsdiff for %s:\n%s" % (
811 xf.tgt_name, e.message))
812
813 if message:
814 with lock:
815 error_messages.extend(message)
Tao Bao183e56e2017-03-05 17:05:09 -0800816
817 with lock:
818 patches[patch_index] = (xf_index, patch)
819 if sys.stdout.isatty():
Tao Bao33635b12017-03-12 13:02:51 -0700820 global diff_done
821 diff_done += 1
822 progress = diff_done * 100 / diff_total
Tao Bao183e56e2017-03-05 17:05:09 -0800823 # '\033[K' is to clear to EOL.
824 print(' [%d%%] %s\033[K' % (progress, xf.tgt_name), end='\r')
825 sys.stdout.flush()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700826
827 threads = [threading.Thread(target=diff_worker)
Dan Albert8b72aef2015-03-23 19:13:21 -0700828 for _ in range(self.threads)]
Doug Zongkerfc44a512014-08-26 13:10:25 -0700829 for th in threads:
830 th.start()
831 while threads:
832 threads.pop().join()
Tao Bao183e56e2017-03-05 17:05:09 -0800833
834 if sys.stdout.isatty():
835 print('\n')
Tianjie Xub59c17f2016-10-28 17:55:53 -0700836
Tao Baob937ead2017-10-19 16:51:53 -0700837 if warning_messages:
838 print('WARNING:')
839 print('\n'.join(warning_messages))
840 print('\n\n\n')
841
Tianjie Xub59c17f2016-10-28 17:55:53 -0700842 if error_messages:
Tao Baob937ead2017-10-19 16:51:53 -0700843 print('ERROR:')
Tianjie Xub59c17f2016-10-28 17:55:53 -0700844 print('\n'.join(error_messages))
Tao Baob937ead2017-10-19 16:51:53 -0700845 print('\n\n\n')
Tianjie Xub59c17f2016-10-28 17:55:53 -0700846 sys.exit(1)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700847 else:
848 patches = []
849
Tao Bao183e56e2017-03-05 17:05:09 -0800850 offset = 0
851 with open(prefix + ".patch.dat", "wb") as patch_fd:
852 for index, patch in patches:
853 xf = self.transfers[index]
Doug Zongkerfc44a512014-08-26 13:10:25 -0700854 xf.patch_len = len(patch)
Tao Bao183e56e2017-03-05 17:05:09 -0800855 xf.patch_start = offset
856 offset += xf.patch_len
857 patch_fd.write(patch)
858
859 if common.OPTIONS.verbose:
860 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
861 print("%10d %10d (%6.2f%%) %7s %s %s %s" % (
862 xf.patch_len, tgt_size, xf.patch_len * 100.0 / tgt_size,
863 xf.style,
864 xf.tgt_name if xf.tgt_name == xf.src_name else (
865 xf.tgt_name + " (from " + xf.src_name + ")"),
866 xf.tgt_ranges, xf.src_ranges))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700867
Tianjie Xu8a7ed9f2018-01-23 14:06:11 -0800868 def AssertSha1Good(self):
869 """Check the SHA-1 of the src & tgt blocks in the transfer list.
870
871 Double check the SHA-1 value to avoid the issue in b/71908713, where
872 SparseImage.RangeSha1() messed up with the hash calculation in multi-thread
873 environment. That specific problem has been fixed by protecting the
874 underlying generator function 'SparseImage._GetRangeData()' with lock.
875 """
876 for xf in self.transfers:
877 tgt_sha1 = self.tgt.RangeSha1(xf.tgt_ranges)
878 assert xf.tgt_sha1 == tgt_sha1
879 if xf.style == "diff":
880 src_sha1 = self.src.RangeSha1(xf.src_ranges)
881 assert xf.src_sha1 == src_sha1
882
Doug Zongkerfc44a512014-08-26 13:10:25 -0700883 def AssertSequenceGood(self):
884 # Simulate the sequences of transfers we will output, and check that:
885 # - we never read a block after writing it, and
886 # - we write every block we care about exactly once.
887
888 # Start with no blocks having been touched yet.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800889 touched = array.array("B", "\0" * self.tgt.total_blocks)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700890
891 # Imagine processing the transfers in order.
892 for xf in self.transfers:
893 # Check that the input blocks for this transfer haven't yet been touched.
Doug Zongker62338182014-09-08 08:29:55 -0700894
895 x = xf.src_ranges
Tao Bao8fad03e2017-03-01 14:36:26 -0800896 for _, sr in xf.use_stash:
897 x = x.subtract(sr)
Doug Zongker62338182014-09-08 08:29:55 -0700898
Doug Zongker6ab2a502016-02-09 08:28:09 -0800899 for s, e in x:
Tao Baoff75c232016-03-04 15:23:34 -0800900 # Source image could be larger. Don't check the blocks that are in the
901 # source image only. Since they are not in 'touched', and won't ever
902 # be touched.
903 for i in range(s, min(e, self.tgt.total_blocks)):
Doug Zongker6ab2a502016-02-09 08:28:09 -0800904 assert touched[i] == 0
905
906 # Check that the output blocks for this transfer haven't yet
907 # been touched, and touch all the blocks written by this
908 # transfer.
909 for s, e in xf.tgt_ranges:
910 for i in range(s, e):
911 assert touched[i] == 0
912 touched[i] = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700913
914 # Check that we've written every target block.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800915 for s, e in self.tgt.care_map:
916 for i in range(s, e):
917 assert touched[i] == 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700918
Doug Zongker62338182014-09-08 08:29:55 -0700919 def ImproveVertexSequence(self):
920 print("Improving vertex order...")
921
922 # At this point our digraph is acyclic; we reversed any edges that
923 # were backwards in the heuristically-generated sequence. The
924 # previously-generated order is still acceptable, but we hope to
925 # find a better order that needs less memory for stashed data.
926 # Now we do a topological sort to generate a new vertex order,
927 # using a greedy algorithm to choose which vertex goes next
928 # whenever we have a choice.
929
930 # Make a copy of the edge set; this copy will get destroyed by the
931 # algorithm.
932 for xf in self.transfers:
933 xf.incoming = xf.goes_after.copy()
934 xf.outgoing = xf.goes_before.copy()
935
936 L = [] # the new vertex order
937
938 # S is the set of sources in the remaining graph; we always choose
939 # the one that leaves the least amount of stashed data after it's
940 # executed.
941 S = [(u.NetStashChange(), u.order, u) for u in self.transfers
942 if not u.incoming]
943 heapq.heapify(S)
944
945 while S:
946 _, _, xf = heapq.heappop(S)
947 L.append(xf)
948 for u in xf.outgoing:
949 del u.incoming[xf]
950 if not u.incoming:
951 heapq.heappush(S, (u.NetStashChange(), u.order, u))
952
953 # if this fails then our graph had a cycle.
954 assert len(L) == len(self.transfers)
955
956 self.transfers = L
957 for i, xf in enumerate(L):
958 xf.order = i
959
Doug Zongkerfc44a512014-08-26 13:10:25 -0700960 def RemoveBackwardEdges(self):
961 print("Removing backward edges...")
962 in_order = 0
963 out_of_order = 0
964 lost_source = 0
965
966 for xf in self.transfers:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700967 lost = 0
968 size = xf.src_ranges.size()
969 for u in xf.goes_before:
970 # xf should go before u
971 if xf.order < u.order:
972 # it does, hurray!
Doug Zongker62338182014-09-08 08:29:55 -0700973 in_order += 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700974 else:
975 # it doesn't, boo. trim the blocks that u writes from xf's
976 # source, so that xf can go after u.
Doug Zongker62338182014-09-08 08:29:55 -0700977 out_of_order += 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700978 assert xf.src_ranges.overlaps(u.tgt_ranges)
979 xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges)
980 xf.intact = False
981
982 if xf.style == "diff" and not xf.src_ranges:
983 # nothing left to diff from; treat as new data
984 xf.style = "new"
985
986 lost = size - xf.src_ranges.size()
987 lost_source += lost
Doug Zongkerfc44a512014-08-26 13:10:25 -0700988
989 print((" %d/%d dependencies (%.2f%%) were violated; "
990 "%d source blocks removed.") %
991 (out_of_order, in_order + out_of_order,
992 (out_of_order * 100.0 / (in_order + out_of_order))
993 if (in_order + out_of_order) else 0.0,
994 lost_source))
995
Doug Zongker62338182014-09-08 08:29:55 -0700996 def ReverseBackwardEdges(self):
Tao Bao3a2e3502016-12-28 09:14:39 -0800997 """Reverse unsatisfying edges and compute pairs of stashed blocks.
998
999 For each transfer, make sure it properly stashes the blocks it touches and
1000 will be used by later transfers. It uses pairs of (stash_raw_id, range) to
1001 record the blocks to be stashed. 'stash_raw_id' is an id that uniquely
1002 identifies each pair. Note that for the same range (e.g. RangeSet("1-5")),
1003 it is possible to have multiple pairs with different 'stash_raw_id's. Each
1004 'stash_raw_id' will be consumed by one transfer. In BBOTA v3+, identical
1005 blocks will be written to the same stash slot in WriteTransfers().
1006 """
1007
Doug Zongker62338182014-09-08 08:29:55 -07001008 print("Reversing backward edges...")
1009 in_order = 0
1010 out_of_order = 0
Tao Bao3a2e3502016-12-28 09:14:39 -08001011 stash_raw_id = 0
Doug Zongker62338182014-09-08 08:29:55 -07001012 stash_size = 0
1013
1014 for xf in self.transfers:
Doug Zongker62338182014-09-08 08:29:55 -07001015 for u in xf.goes_before.copy():
1016 # xf should go before u
1017 if xf.order < u.order:
1018 # it does, hurray!
1019 in_order += 1
1020 else:
1021 # it doesn't, boo. modify u to stash the blocks that it
1022 # writes that xf wants to read, and then require u to go
1023 # before xf.
1024 out_of_order += 1
1025
1026 overlap = xf.src_ranges.intersect(u.tgt_ranges)
1027 assert overlap
1028
Tao Bao3a2e3502016-12-28 09:14:39 -08001029 u.stash_before.append((stash_raw_id, overlap))
1030 xf.use_stash.append((stash_raw_id, overlap))
1031 stash_raw_id += 1
Doug Zongker62338182014-09-08 08:29:55 -07001032 stash_size += overlap.size()
1033
1034 # reverse the edge direction; now xf must go after u
1035 del xf.goes_before[u]
1036 del u.goes_after[xf]
1037 xf.goes_after[u] = None # value doesn't matter
1038 u.goes_before[xf] = None
1039
1040 print((" %d/%d dependencies (%.2f%%) were violated; "
1041 "%d source blocks stashed.") %
1042 (out_of_order, in_order + out_of_order,
1043 (out_of_order * 100.0 / (in_order + out_of_order))
1044 if (in_order + out_of_order) else 0.0,
1045 stash_size))
1046
Doug Zongkerfc44a512014-08-26 13:10:25 -07001047 def FindVertexSequence(self):
1048 print("Finding vertex sequence...")
1049
1050 # This is based on "A Fast & Effective Heuristic for the Feedback
1051 # Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
1052 # it as starting with the digraph G and moving all the vertices to
1053 # be on a horizontal line in some order, trying to minimize the
1054 # number of edges that end up pointing to the left. Left-pointing
1055 # edges will get removed to turn the digraph into a DAG. In this
1056 # case each edge has a weight which is the number of source blocks
1057 # we'll lose if that edge is removed; we try to minimize the total
1058 # weight rather than just the number of edges.
1059
1060 # Make a copy of the edge set; this copy will get destroyed by the
1061 # algorithm.
1062 for xf in self.transfers:
1063 xf.incoming = xf.goes_after.copy()
1064 xf.outgoing = xf.goes_before.copy()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001065 xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())
Doug Zongkerfc44a512014-08-26 13:10:25 -07001066
1067 # We use an OrderedDict instead of just a set so that the output
1068 # is repeatable; otherwise it would depend on the hash values of
1069 # the transfer objects.
1070 G = OrderedDict()
1071 for xf in self.transfers:
1072 G[xf] = None
1073 s1 = deque() # the left side of the sequence, built from left to right
1074 s2 = deque() # the right side of the sequence, built from right to left
1075
Doug Zongker6ab2a502016-02-09 08:28:09 -08001076 heap = []
1077 for xf in self.transfers:
1078 xf.heap_item = HeapItem(xf)
1079 heap.append(xf.heap_item)
1080 heapq.heapify(heap)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001081
Tao Bao33482282016-10-24 16:49:08 -07001082 # Use OrderedDict() instead of set() to preserve the insertion order. Need
1083 # to use 'sinks[key] = None' to add key into the set. sinks will look like
1084 # { key1: None, key2: None, ... }.
1085 sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing)
1086 sources = OrderedDict.fromkeys(u for u in G if not u.incoming)
Doug Zongker6ab2a502016-02-09 08:28:09 -08001087
1088 def adjust_score(iu, delta):
1089 iu.score += delta
1090 iu.heap_item.clear()
1091 iu.heap_item = HeapItem(iu)
1092 heapq.heappush(heap, iu.heap_item)
1093
1094 while G:
Doug Zongkerfc44a512014-08-26 13:10:25 -07001095 # Put all sinks at the end of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001096 while sinks:
Tao Bao33482282016-10-24 16:49:08 -07001097 new_sinks = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001098 for u in sinks:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001099 if u not in G: continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001100 s2.appendleft(u)
1101 del G[u]
1102 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001103 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001104 if not iu.outgoing:
1105 new_sinks[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001106 sinks = new_sinks
Doug Zongkerfc44a512014-08-26 13:10:25 -07001107
1108 # Put all the sources at the beginning of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001109 while sources:
Tao Bao33482282016-10-24 16:49:08 -07001110 new_sources = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001111 for u in sources:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001112 if u not in G: continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001113 s1.append(u)
1114 del G[u]
1115 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001116 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001117 if not iu.incoming:
1118 new_sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001119 sources = new_sources
Doug Zongkerfc44a512014-08-26 13:10:25 -07001120
Doug Zongker6ab2a502016-02-09 08:28:09 -08001121 if not G: break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001122
1123 # Find the "best" vertex to put next. "Best" is the one that
1124 # maximizes the net difference in source blocks saved we get by
1125 # pretending it's a source rather than a sink.
1126
Doug Zongker6ab2a502016-02-09 08:28:09 -08001127 while True:
1128 u = heapq.heappop(heap)
1129 if u and u.item in G:
1130 u = u.item
1131 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001132
Doug Zongkerfc44a512014-08-26 13:10:25 -07001133 s1.append(u)
1134 del G[u]
1135 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001136 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001137 if not iu.incoming:
1138 sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001139
Doug Zongkerfc44a512014-08-26 13:10:25 -07001140 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001141 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001142 if not iu.outgoing:
1143 sinks[iu] = None
Doug Zongkerfc44a512014-08-26 13:10:25 -07001144
1145 # Now record the sequence in the 'order' field of each transfer,
1146 # and by rearranging self.transfers to be in the chosen sequence.
1147
1148 new_transfers = []
1149 for x in itertools.chain(s1, s2):
1150 x.order = len(new_transfers)
1151 new_transfers.append(x)
1152 del x.incoming
1153 del x.outgoing
1154
1155 self.transfers = new_transfers
1156
1157 def GenerateDigraph(self):
1158 print("Generating digraph...")
Doug Zongker6ab2a502016-02-09 08:28:09 -08001159
1160 # Each item of source_ranges will be:
1161 # - None, if that block is not used as a source,
Tao Bao33482282016-10-24 16:49:08 -07001162 # - an ordered set of transfers.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001163 source_ranges = []
1164 for b in self.transfers:
1165 for s, e in b.src_ranges:
1166 if e > len(source_ranges):
1167 source_ranges.extend([None] * (e-len(source_ranges)))
1168 for i in range(s, e):
1169 if source_ranges[i] is None:
Tao Bao33482282016-10-24 16:49:08 -07001170 source_ranges[i] = OrderedDict.fromkeys([b])
Doug Zongker6ab2a502016-02-09 08:28:09 -08001171 else:
Tao Bao33482282016-10-24 16:49:08 -07001172 source_ranges[i][b] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001173
Doug Zongkerfc44a512014-08-26 13:10:25 -07001174 for a in self.transfers:
Tao Bao33482282016-10-24 16:49:08 -07001175 intersections = OrderedDict()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001176 for s, e in a.tgt_ranges:
1177 for i in range(s, e):
1178 if i >= len(source_ranges): break
Tao Bao33482282016-10-24 16:49:08 -07001179 # Add all the Transfers in source_ranges[i] to the (ordered) set.
1180 if source_ranges[i] is not None:
1181 for j in source_ranges[i]:
1182 intersections[j] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001183
1184 for b in intersections:
1185 if a is b: continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001186
1187 # If the blocks written by A are read by B, then B needs to go before A.
1188 i = a.tgt_ranges.intersect(b.src_ranges)
1189 if i:
Doug Zongkerab7ca1d2014-08-26 10:40:28 -07001190 if b.src_name == "__ZERO":
1191 # the cost of removing source blocks for the __ZERO domain
1192 # is (nearly) zero.
1193 size = 0
1194 else:
1195 size = i.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001196 b.goes_before[a] = size
1197 a.goes_after[b] = size
1198
1199 def FindTransfers(self):
Tao Bao9a5caf22015-08-25 15:10:10 -07001200 """Parse the file_map to generate all the transfers."""
1201
Tianjie Xu41390d42017-11-22 11:35:18 -08001202 def AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
1203 src_ranges, style, by_id):
1204 """Add one or multiple Transfer()s by splitting large files.
1205
1206 For BBOTA v3, we need to stash source blocks for resumable feature.
1207 However, with the growth of file size and the shrink of the cache
1208 partition source blocks are too large to be stashed. If a file occupies
1209 too many blocks, we split it into smaller pieces by getting multiple
1210 Transfer()s.
1211
1212 The downside is that after splitting, we may increase the package size
1213 since the split pieces don't align well. According to our experiments,
1214 1/8 of the cache size as the per-piece limit appears to be optimal.
1215 Compared to the fixed 1024-block limit, it reduces the overall package
1216 size by 30% for volantis, and 20% for angler and bullhead."""
1217
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001218 pieces = 0
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001219 while (tgt_ranges.size() > max_blocks_per_transfer and
1220 src_ranges.size() > max_blocks_per_transfer):
Tao Bao9a5caf22015-08-25 15:10:10 -07001221 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1222 src_split_name = "%s-%d" % (src_name, pieces)
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001223 tgt_first = tgt_ranges.first(max_blocks_per_transfer)
1224 src_first = src_ranges.first(max_blocks_per_transfer)
1225
Tao Bao183e56e2017-03-05 17:05:09 -08001226 Transfer(tgt_split_name, src_split_name, tgt_first, src_first,
1227 self.tgt.RangeSha1(tgt_first), self.src.RangeSha1(src_first),
1228 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001229
1230 tgt_ranges = tgt_ranges.subtract(tgt_first)
1231 src_ranges = src_ranges.subtract(src_first)
1232 pieces += 1
1233
1234 # Handle remaining blocks.
1235 if tgt_ranges.size() or src_ranges.size():
1236 # Must be both non-empty.
1237 assert tgt_ranges.size() and src_ranges.size()
1238 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1239 src_split_name = "%s-%d" % (src_name, pieces)
Tao Bao183e56e2017-03-05 17:05:09 -08001240 Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges,
1241 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1242 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001243
Tianjie Xu41390d42017-11-22 11:35:18 -08001244 def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges, style,
1245 by_id):
1246 """Find all the zip files and split the others with a fixed chunk size.
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001247
Tianjie Xu41390d42017-11-22 11:35:18 -08001248 This function will construct a list of zip archives, which will later be
1249 split by imgdiff to reduce the final patch size. For the other files,
1250 we will plainly split them based on a fixed chunk size with the potential
1251 patch size penalty.
1252 """
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001253
1254 assert style == "diff"
1255
1256 # Change nothing for small files.
1257 if (tgt_ranges.size() <= max_blocks_per_transfer and
1258 src_ranges.size() <= max_blocks_per_transfer):
1259 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1260 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1261 style, by_id)
1262 return
1263
1264 if tgt_name.split(".")[-1].lower() in ("apk", "jar", "zip"):
1265 split_enable = (not self.disable_imgdiff and src_ranges.monotonic and
1266 tgt_ranges.monotonic)
1267 if split_enable and (self.tgt.RangeSha1(tgt_ranges) !=
1268 self.src.RangeSha1(src_ranges)):
1269 large_apks.append((tgt_name, src_name, tgt_ranges, src_ranges))
1270 return
1271
Tianjie Xu41390d42017-11-22 11:35:18 -08001272 AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,
1273 src_ranges, style, by_id)
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001274
Tao Bao08c85832016-09-19 22:26:30 -07001275 def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
1276 split=False):
1277 """Wrapper function for adding a Transfer()."""
1278
1279 # We specialize diff transfers only (which covers bsdiff/imgdiff/move);
1280 # otherwise add the Transfer() as is.
1281 if style != "diff" or not split:
Tao Bao183e56e2017-03-05 17:05:09 -08001282 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1283 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1284 style, by_id)
Tao Bao08c85832016-09-19 22:26:30 -07001285 return
1286
1287 # Handle .odex files specially to analyze the block-wise difference. If
1288 # most of the blocks are identical with only few changes (e.g. header),
1289 # we will patch the changed blocks only. This avoids stashing unchanged
1290 # blocks while patching. We limit the analysis to files without size
1291 # changes only. This is to avoid sacrificing the OTA generation cost too
1292 # much.
1293 if (tgt_name.split(".")[-1].lower() == 'odex' and
1294 tgt_ranges.size() == src_ranges.size()):
1295
1296 # 0.5 threshold can be further tuned. The tradeoff is: if only very
1297 # few blocks remain identical, we lose the opportunity to use imgdiff
1298 # that may have better compression ratio than bsdiff.
1299 crop_threshold = 0.5
1300
1301 tgt_skipped = RangeSet()
1302 src_skipped = RangeSet()
1303 tgt_size = tgt_ranges.size()
1304 tgt_changed = 0
1305 for src_block, tgt_block in zip(src_ranges.next_item(),
1306 tgt_ranges.next_item()):
1307 src_rs = RangeSet(str(src_block))
1308 tgt_rs = RangeSet(str(tgt_block))
1309 if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):
1310 tgt_skipped = tgt_skipped.union(tgt_rs)
1311 src_skipped = src_skipped.union(src_rs)
1312 else:
1313 tgt_changed += tgt_rs.size()
1314
1315 # Terminate early if no clear sign of benefits.
1316 if tgt_changed > tgt_size * crop_threshold:
1317 break
1318
1319 if tgt_changed < tgt_size * crop_threshold:
1320 assert tgt_changed + tgt_skipped.size() == tgt_size
1321 print('%10d %10d (%6.2f%%) %s' % (tgt_skipped.size(), tgt_size,
1322 tgt_skipped.size() * 100.0 / tgt_size, tgt_name))
Tianjie Xu41390d42017-11-22 11:35:18 -08001323 AddSplitTransfers(
Tao Bao08c85832016-09-19 22:26:30 -07001324 "%s-skipped" % (tgt_name,),
1325 "%s-skipped" % (src_name,),
1326 tgt_skipped, src_skipped, style, by_id)
1327
1328 # Intentionally change the file extension to avoid being imgdiff'd as
1329 # the files are no longer in their original format.
1330 tgt_name = "%s-cropped" % (tgt_name,)
1331 src_name = "%s-cropped" % (src_name,)
1332 tgt_ranges = tgt_ranges.subtract(tgt_skipped)
1333 src_ranges = src_ranges.subtract(src_skipped)
1334
1335 # Possibly having no changed blocks.
1336 if not tgt_ranges:
1337 return
1338
1339 # Add the transfer(s).
Tianjie Xu41390d42017-11-22 11:35:18 -08001340 AddSplitTransfers(
Tao Bao08c85832016-09-19 22:26:30 -07001341 tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1342
Tianjie Xu25366072017-09-08 17:19:02 -07001343 def ParseAndValidateSplitInfo(patch_size, tgt_ranges, src_ranges,
1344 split_info):
1345 """Parse the split_info and return a list of info tuples.
1346
1347 Args:
1348 patch_size: total size of the patch file.
1349 tgt_ranges: Ranges of the target file within the original image.
1350 src_ranges: Ranges of the source file within the original image.
1351 split_info format:
1352 imgdiff version#
1353 count of pieces
1354 <patch_size_1> <tgt_size_1> <src_ranges_1>
1355 ...
1356 <patch_size_n> <tgt_size_n> <src_ranges_n>
1357
1358 Returns:
1359 [patch_start, patch_len, split_tgt_ranges, split_src_ranges]
1360 """
1361
1362 version = int(split_info[0])
1363 assert version == 2
1364 count = int(split_info[1])
1365 assert len(split_info) - 2 == count
1366
1367 split_info_list = []
1368 patch_start = 0
1369 tgt_remain = copy.deepcopy(tgt_ranges)
1370 # each line has the format <patch_size>, <tgt_size>, <src_ranges>
1371 for line in split_info[2:]:
1372 info = line.split()
1373 assert len(info) == 3
1374 patch_length = int(info[0])
1375
1376 split_tgt_size = int(info[1])
1377 assert split_tgt_size % 4096 == 0
1378 assert split_tgt_size / 4096 <= tgt_remain.size()
1379 split_tgt_ranges = tgt_remain.first(split_tgt_size / 4096)
1380 tgt_remain = tgt_remain.subtract(split_tgt_ranges)
1381
1382 # Find the split_src_ranges within the image file from its relative
1383 # position in file.
1384 split_src_indices = RangeSet.parse_raw(info[2])
1385 split_src_ranges = RangeSet()
1386 for r in split_src_indices:
1387 curr_range = src_ranges.first(r[1]).subtract(src_ranges.first(r[0]))
1388 assert not split_src_ranges.overlaps(curr_range)
1389 split_src_ranges = split_src_ranges.union(curr_range)
1390
1391 split_info_list.append((patch_start, patch_length,
1392 split_tgt_ranges, split_src_ranges))
1393 patch_start += patch_length
1394
1395 # Check that the sizes of all the split pieces add up to the final file
1396 # size for patch and target.
1397 assert tgt_remain.size() == 0
1398 assert patch_start == patch_size
1399 return split_info_list
1400
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001401 def SplitLargeApks():
1402 """Split the large apks files.
Tianjie Xu25366072017-09-08 17:19:02 -07001403
1404 Example: Chrome.apk will be split into
1405 src-0: Chrome.apk-0, tgt-0: Chrome.apk-0
1406 src-1: Chrome.apk-1, tgt-1: Chrome.apk-1
1407 ...
1408
1409 After the split, the target pieces are continuous and block aligned; and
1410 the source pieces are mutually exclusive. During the split, we also
1411 generate and save the image patch between src-X & tgt-X. This patch will
1412 be valid because the block ranges of src-X & tgt-X will always stay the
1413 same afterwards; but there's a chance we don't use the patch if we
1414 convert the "diff" command into "new" or "move" later.
Tianjie Xu41390d42017-11-22 11:35:18 -08001415
1416 The split will be attempted by calling imgdiff, which expects the input
1417 files to be valid zip archives. If imgdiff fails for some reason (i.e.
1418 holes in the APK file), we will fall back to split the failed APKs into
1419 fixed size chunks.
Tianjie Xu25366072017-09-08 17:19:02 -07001420 """
1421
1422 while True:
1423 with transfer_lock:
1424 if not large_apks:
1425 return
1426 tgt_name, src_name, tgt_ranges, src_ranges = large_apks.pop(0)
1427
1428 src_file = common.MakeTempFile(prefix="src-")
1429 tgt_file = common.MakeTempFile(prefix="tgt-")
Tianjie Xudf1166e2018-01-27 17:35:41 -08001430 with open(src_file, "wb") as src_fd:
1431 self.src.WriteRangeDataToFd(src_ranges, src_fd)
1432 with open(tgt_file, "wb") as tgt_fd:
1433 self.tgt.WriteRangeDataToFd(tgt_ranges, tgt_fd)
Tianjie Xu25366072017-09-08 17:19:02 -07001434
1435 patch_file = common.MakeTempFile(prefix="patch-")
1436 patch_info_file = common.MakeTempFile(prefix="split_info-")
1437 cmd = ["imgdiff", "-z",
1438 "--block-limit={}".format(max_blocks_per_transfer),
1439 "--split-info=" + patch_info_file,
1440 src_file, tgt_file, patch_file]
1441 p = common.Run(cmd, stdout=subprocess.PIPE)
1442 p.communicate()
Tianjie Xu25366072017-09-08 17:19:02 -07001443 if p.returncode != 0:
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001444 print("Failed to create patch between {} and {},"
1445 " falling back to bsdiff".format(src_name, tgt_name))
1446 with transfer_lock:
Tianjie Xu41390d42017-11-22 11:35:18 -08001447 AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name,
1448 tgt_ranges, src_ranges,
1449 "diff", self.transfers)
Tianjie Xuf68b50f2017-11-21 19:38:03 -08001450 continue
Tianjie Xu25366072017-09-08 17:19:02 -07001451
1452 with open(patch_info_file) as patch_info:
1453 lines = patch_info.readlines()
1454
1455 patch_size_total = os.path.getsize(patch_file)
1456 split_info_list = ParseAndValidateSplitInfo(patch_size_total,
1457 tgt_ranges, src_ranges,
1458 lines)
1459 for index, (patch_start, patch_length, split_tgt_ranges,
1460 split_src_ranges) in enumerate(split_info_list):
1461 with open(patch_file) as f:
1462 f.seek(patch_start)
1463 patch_content = f.read(patch_length)
1464
1465 split_src_name = "{}-{}".format(src_name, index)
1466 split_tgt_name = "{}-{}".format(tgt_name, index)
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001467 split_large_apks.append((split_tgt_name,
1468 split_src_name,
1469 split_tgt_ranges,
1470 split_src_ranges,
1471 patch_content))
Tianjie Xu25366072017-09-08 17:19:02 -07001472
Tao Bao08c85832016-09-19 22:26:30 -07001473 print("Finding transfers...")
1474
Tianjie Xu25366072017-09-08 17:19:02 -07001475 large_apks = []
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001476 split_large_apks = []
Tianjie Xu25366072017-09-08 17:19:02 -07001477 cache_size = common.OPTIONS.cache_size
1478 split_threshold = 0.125
1479 max_blocks_per_transfer = int(cache_size * split_threshold /
1480 self.tgt.blocksize)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001481 empty = RangeSet()
Tianjie Xu20a86cd2018-01-12 12:21:00 -08001482 for tgt_fn, tgt_ranges in sorted(self.tgt.file_map.items()):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001483 if tgt_fn == "__ZERO":
1484 # the special "__ZERO" domain is all the blocks not contained
1485 # in any file and that are filled with zeros. We have a
1486 # special transfer style for zero blocks.
1487 src_ranges = self.src.file_map.get("__ZERO", empty)
Tao Bao9a5caf22015-08-25 15:10:10 -07001488 AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
1489 "zero", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001490 continue
1491
Tao Baoff777812015-05-12 11:42:31 -07001492 elif tgt_fn == "__COPY":
1493 # "__COPY" domain includes all the blocks not contained in any
1494 # file and that need to be copied unconditionally to the target.
Tao Bao9a5caf22015-08-25 15:10:10 -07001495 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Tao Baoff777812015-05-12 11:42:31 -07001496 continue
1497
Doug Zongkerfc44a512014-08-26 13:10:25 -07001498 elif tgt_fn in self.src.file_map:
1499 # Look for an exact pathname match in the source.
Tao Bao9a5caf22015-08-25 15:10:10 -07001500 AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001501 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001502 continue
1503
1504 b = os.path.basename(tgt_fn)
1505 if b in self.src_basenames:
1506 # Look for an exact basename match in the source.
1507 src_fn = self.src_basenames[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001508 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001509 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001510 continue
1511
1512 b = re.sub("[0-9]+", "#", b)
1513 if b in self.src_numpatterns:
1514 # Look for a 'number pattern' match (a basename match after
1515 # all runs of digits are replaced by "#"). (This is useful
1516 # for .so files that contain version numbers in the filename
1517 # that get bumped.)
1518 src_fn = self.src_numpatterns[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001519 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001520 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001521 continue
1522
Tao Bao9a5caf22015-08-25 15:10:10 -07001523 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001524
Tianjie Xu25366072017-09-08 17:19:02 -07001525 transfer_lock = threading.Lock()
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001526 threads = [threading.Thread(target=SplitLargeApks)
Tianjie Xu25366072017-09-08 17:19:02 -07001527 for _ in range(self.threads)]
1528 for th in threads:
1529 th.start()
1530 while threads:
1531 threads.pop().join()
1532
Tianjie Xud3bf67e2018-01-10 10:55:19 -08001533 # Sort the split transfers for large apks to generate a determinate package.
1534 split_large_apks.sort()
1535 for (tgt_name, src_name, tgt_ranges, src_ranges,
1536 patch) in split_large_apks:
1537 transfer_split = Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1538 self.tgt.RangeSha1(tgt_ranges),
1539 self.src.RangeSha1(src_ranges),
1540 "diff", self.transfers)
1541 transfer_split.patch = patch
1542
Doug Zongkerfc44a512014-08-26 13:10:25 -07001543 def AbbreviateSourceNames(self):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001544 for k in self.src.file_map.keys():
1545 b = os.path.basename(k)
1546 self.src_basenames[b] = k
1547 b = re.sub("[0-9]+", "#", b)
1548 self.src_numpatterns[b] = k
1549
1550 @staticmethod
1551 def AssertPartition(total, seq):
1552 """Assert that all the RangeSets in 'seq' form a partition of the
1553 'total' RangeSet (ie, they are nonintersecting and their union
1554 equals 'total')."""
Doug Zongker6ab2a502016-02-09 08:28:09 -08001555
Doug Zongkerfc44a512014-08-26 13:10:25 -07001556 so_far = RangeSet()
1557 for i in seq:
1558 assert not so_far.overlaps(i)
1559 so_far = so_far.union(i)
1560 assert so_far == total