blob: c95512d273ff1fc1af5290b6b98579f559bddfe7 [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015from __future__ import print_function
16
Doug Zongker6ab2a502016-02-09 08:28:09 -080017import array
Tao Bao8dcf7382015-05-21 14:09:49 -070018import common
Doug Zongker6ab2a502016-02-09 08:28:09 -080019import functools
Doug Zongker62338182014-09-08 08:29:55 -070020import heapq
Doug Zongkerfc44a512014-08-26 13:10:25 -070021import itertools
22import multiprocessing
23import os
Tao Bao3a2e3502016-12-28 09:14:39 -080024import os.path
Doug Zongkerfc44a512014-08-26 13:10:25 -070025import re
26import subprocess
Tao Bao183e56e2017-03-05 17:05:09 -080027import sys
Doug Zongkerfc44a512014-08-26 13:10:25 -070028import threading
Doug Zongkerfc44a512014-08-26 13:10:25 -070029
Tao Bao3a2e3502016-12-28 09:14:39 -080030from collections import deque, OrderedDict
31from hashlib import sha1
Dan Albert8b72aef2015-03-23 19:13:21 -070032from rangelib import RangeSet
33
Doug Zongkerfc44a512014-08-26 13:10:25 -070034
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070035__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
36
Dan Albert8b72aef2015-03-23 19:13:21 -070037
Tao Bao183e56e2017-03-05 17:05:09 -080038def compute_patch(srcfile, tgtfile, imgdiff=False):
Tianjie Xub59c17f2016-10-28 17:55:53 -070039 patchfile = common.MakeTempFile(prefix='patch-')
Doug Zongkerfc44a512014-08-26 13:10:25 -070040
Tianjie Xub59c17f2016-10-28 17:55:53 -070041 cmd = ['imgdiff', '-z'] if imgdiff else ['bsdiff']
42 cmd.extend([srcfile, tgtfile, patchfile])
Doug Zongkerfc44a512014-08-26 13:10:25 -070043
Tao Bao39451582017-05-04 11:10:47 -070044 # Don't dump the bsdiff/imgdiff commands, which are not useful for the case
45 # here, since they contain temp filenames only.
46 p = common.Run(cmd, verbose=False, stdout=subprocess.PIPE,
47 stderr=subprocess.STDOUT)
Tianjie Xub59c17f2016-10-28 17:55:53 -070048 output, _ = p.communicate()
Doug Zongkerfc44a512014-08-26 13:10:25 -070049
Tianjie Xub59c17f2016-10-28 17:55:53 -070050 if p.returncode != 0:
51 raise ValueError(output)
52
53 with open(patchfile, 'rb') as f:
Tao Bao183e56e2017-03-05 17:05:09 -080054 return f.read()
Doug Zongkerfc44a512014-08-26 13:10:25 -070055
Dan Albert8b72aef2015-03-23 19:13:21 -070056
57class Image(object):
Tao Bao183e56e2017-03-05 17:05:09 -080058 def RangeSha1(self, ranges):
59 raise NotImplementedError
60
Dan Albert8b72aef2015-03-23 19:13:21 -070061 def ReadRangeSet(self, ranges):
62 raise NotImplementedError
63
Tao Bao68658c02015-06-01 13:40:49 -070064 def TotalSha1(self, include_clobbered_blocks=False):
Dan Albert8b72aef2015-03-23 19:13:21 -070065 raise NotImplementedError
66
Tao Bao183e56e2017-03-05 17:05:09 -080067 def WriteRangeDataToFd(self, ranges, fd):
68 raise NotImplementedError
69
Dan Albert8b72aef2015-03-23 19:13:21 -070070
71class EmptyImage(Image):
Doug Zongkerfc44a512014-08-26 13:10:25 -070072 """A zero-length image."""
Tao Bao183e56e2017-03-05 17:05:09 -080073
74 def __init__(self):
75 self.blocksize = 4096
76 self.care_map = RangeSet()
77 self.clobbered_blocks = RangeSet()
78 self.extended = RangeSet()
79 self.total_blocks = 0
80 self.file_map = {}
81
82 def RangeSha1(self, ranges):
83 return sha1().hexdigest()
84
Doug Zongkerfc44a512014-08-26 13:10:25 -070085 def ReadRangeSet(self, ranges):
86 return ()
Tao Bao183e56e2017-03-05 17:05:09 -080087
Tao Bao68658c02015-06-01 13:40:49 -070088 def TotalSha1(self, include_clobbered_blocks=False):
89 # EmptyImage always carries empty clobbered_blocks, so
90 # include_clobbered_blocks can be ignored.
91 assert self.clobbered_blocks.size() == 0
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070092 return sha1().hexdigest()
93
Tao Bao183e56e2017-03-05 17:05:09 -080094 def WriteRangeDataToFd(self, ranges, fd):
95 raise ValueError("Can't write data from EmptyImage to file")
96
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070097
Dan Albert8b72aef2015-03-23 19:13:21 -070098class DataImage(Image):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070099 """An image wrapped around a single string of data."""
100
101 def __init__(self, data, trim=False, pad=False):
102 self.data = data
103 self.blocksize = 4096
104
105 assert not (trim and pad)
106
107 partial = len(self.data) % self.blocksize
Tao Bao7589e962015-09-05 20:35:32 -0700108 padded = False
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700109 if partial > 0:
110 if trim:
111 self.data = self.data[:-partial]
112 elif pad:
113 self.data += '\0' * (self.blocksize - partial)
Tao Bao7589e962015-09-05 20:35:32 -0700114 padded = True
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700115 else:
116 raise ValueError(("data for DataImage must be multiple of %d bytes "
117 "unless trim or pad is specified") %
118 (self.blocksize,))
119
120 assert len(self.data) % self.blocksize == 0
121
122 self.total_blocks = len(self.data) / self.blocksize
123 self.care_map = RangeSet(data=(0, self.total_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700124 # When the last block is padded, we always write the whole block even for
125 # incremental OTAs. Because otherwise the last block may get skipped if
126 # unchanged for an incremental, but would fail the post-install
127 # verification if it has non-zero contents in the padding bytes.
128 # Bug: 23828506
129 if padded:
Tao Bao42206c32015-09-08 13:39:40 -0700130 clobbered_blocks = [self.total_blocks-1, self.total_blocks]
Tao Bao7589e962015-09-05 20:35:32 -0700131 else:
Tao Bao42206c32015-09-08 13:39:40 -0700132 clobbered_blocks = []
133 self.clobbered_blocks = clobbered_blocks
Tao Baoe9b61912015-07-09 17:37:49 -0700134 self.extended = RangeSet()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700135
136 zero_blocks = []
137 nonzero_blocks = []
138 reference = '\0' * self.blocksize
139
Tao Bao7589e962015-09-05 20:35:32 -0700140 for i in range(self.total_blocks-1 if padded else self.total_blocks):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700141 d = self.data[i*self.blocksize : (i+1)*self.blocksize]
142 if d == reference:
143 zero_blocks.append(i)
144 zero_blocks.append(i+1)
145 else:
146 nonzero_blocks.append(i)
147 nonzero_blocks.append(i+1)
148
Tao Bao42206c32015-09-08 13:39:40 -0700149 assert zero_blocks or nonzero_blocks or clobbered_blocks
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700150
Tao Bao42206c32015-09-08 13:39:40 -0700151 self.file_map = dict()
152 if zero_blocks:
153 self.file_map["__ZERO"] = RangeSet(data=zero_blocks)
154 if nonzero_blocks:
155 self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)
156 if clobbered_blocks:
157 self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)
Tao Bao7589e962015-09-05 20:35:32 -0700158
Tao Bao183e56e2017-03-05 17:05:09 -0800159 def _GetRangeData(self, ranges):
160 for s, e in ranges:
161 yield self.data[s*self.blocksize:e*self.blocksize]
162
163 def RangeSha1(self, ranges):
164 h = sha1()
165 for data in self._GetRangeData(ranges):
166 h.update(data)
167 return h.hexdigest()
168
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700169 def ReadRangeSet(self, ranges):
Tao Bao183e56e2017-03-05 17:05:09 -0800170 return [self._GetRangeData(ranges)]
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700171
Tao Bao68658c02015-06-01 13:40:49 -0700172 def TotalSha1(self, include_clobbered_blocks=False):
Tao Bao7589e962015-09-05 20:35:32 -0700173 if not include_clobbered_blocks:
Tao Bao183e56e2017-03-05 17:05:09 -0800174 return self.RangeSha1(self.care_map.subtract(self.clobbered_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700175 else:
176 return sha1(self.data).hexdigest()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700177
Tao Bao183e56e2017-03-05 17:05:09 -0800178 def WriteRangeDataToFd(self, ranges, fd):
179 for data in self._GetRangeData(ranges):
180 fd.write(data)
181
Doug Zongkerfc44a512014-08-26 13:10:25 -0700182
183class Transfer(object):
Tao Bao183e56e2017-03-05 17:05:09 -0800184 def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, tgt_sha1,
185 src_sha1, style, by_id):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700186 self.tgt_name = tgt_name
187 self.src_name = src_name
188 self.tgt_ranges = tgt_ranges
189 self.src_ranges = src_ranges
Tao Bao183e56e2017-03-05 17:05:09 -0800190 self.tgt_sha1 = tgt_sha1
191 self.src_sha1 = src_sha1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700192 self.style = style
193 self.intact = (getattr(tgt_ranges, "monotonic", False) and
194 getattr(src_ranges, "monotonic", False))
Tao Baob8c87172015-03-19 19:42:12 -0700195
196 # We use OrderedDict rather than dict so that the output is repeatable;
197 # otherwise it would depend on the hash values of the Transfer objects.
198 self.goes_before = OrderedDict()
199 self.goes_after = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700200
Doug Zongker62338182014-09-08 08:29:55 -0700201 self.stash_before = []
202 self.use_stash = []
203
Doug Zongkerfc44a512014-08-26 13:10:25 -0700204 self.id = len(by_id)
205 by_id.append(self)
206
Doug Zongker62338182014-09-08 08:29:55 -0700207 def NetStashChange(self):
208 return (sum(sr.size() for (_, sr) in self.stash_before) -
209 sum(sr.size() for (_, sr) in self.use_stash))
210
Tao Bao82c47982015-08-17 09:45:13 -0700211 def ConvertToNew(self):
212 assert self.style != "new"
213 self.use_stash = []
214 self.style = "new"
215 self.src_ranges = RangeSet()
216
Doug Zongkerfc44a512014-08-26 13:10:25 -0700217 def __str__(self):
218 return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
219 " to " + str(self.tgt_ranges) + ">")
220
221
Doug Zongker6ab2a502016-02-09 08:28:09 -0800222@functools.total_ordering
223class HeapItem(object):
224 def __init__(self, item):
225 self.item = item
226 # Negate the score since python's heap is a min-heap and we want
227 # the maximum score.
228 self.score = -item.score
229 def clear(self):
230 self.item = None
231 def __bool__(self):
232 return self.item is None
233 def __eq__(self, other):
234 return self.score == other.score
235 def __le__(self, other):
236 return self.score <= other.score
237
238
Doug Zongkerfc44a512014-08-26 13:10:25 -0700239# BlockImageDiff works on two image objects. An image object is
240# anything that provides the following attributes:
241#
242# blocksize: the size in bytes of a block, currently must be 4096.
243#
244# total_blocks: the total size of the partition/image, in blocks.
245#
246# care_map: a RangeSet containing which blocks (in the range [0,
247# total_blocks) we actually care about; i.e. which blocks contain
248# data.
249#
250# file_map: a dict that partitions the blocks contained in care_map
251# into smaller domains that are useful for doing diffs on.
252# (Typically a domain is a file, and the key in file_map is the
253# pathname.)
254#
Tao Baoff777812015-05-12 11:42:31 -0700255# clobbered_blocks: a RangeSet containing which blocks contain data
256# but may be altered by the FS. They need to be excluded when
257# verifying the partition integrity.
258#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700259# ReadRangeSet(): a function that takes a RangeSet and returns the
260# data contained in the image blocks of that RangeSet. The data
261# is returned as a list or tuple of strings; concatenating the
262# elements together should produce the requested data.
263# Implementations are free to break up the data into list/tuple
264# elements in any way that is convenient.
265#
Tao Bao183e56e2017-03-05 17:05:09 -0800266# RangeSha1(): a function that returns (as a hex string) the SHA-1
267# hash of all the data in the specified range.
268#
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700269# TotalSha1(): a function that returns (as a hex string) the SHA-1
270# hash of all the data in the image (ie, all the blocks in the
Tao Bao68658c02015-06-01 13:40:49 -0700271# care_map minus clobbered_blocks, or including the clobbered
272# blocks if include_clobbered_blocks is True).
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700273#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700274# When creating a BlockImageDiff, the src image may be None, in which
275# case the list of transfers produced will never read from the
276# original image.
277
278class BlockImageDiff(object):
Tao Bao293fd132016-06-11 12:19:23 -0700279 def __init__(self, tgt, src=None, threads=None, version=4,
280 disable_imgdiff=False):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700281 if threads is None:
282 threads = multiprocessing.cpu_count() // 2
Dan Albert8b72aef2015-03-23 19:13:21 -0700283 if threads == 0:
284 threads = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700285 self.threads = threads
Doug Zongker62338182014-09-08 08:29:55 -0700286 self.version = version
Dan Albert8b72aef2015-03-23 19:13:21 -0700287 self.transfers = []
288 self.src_basenames = {}
289 self.src_numpatterns = {}
Tao Baob4cfca52016-02-04 14:26:02 -0800290 self._max_stashed_size = 0
Tao Baod522bdc2016-04-12 15:53:16 -0700291 self.touched_src_ranges = RangeSet()
292 self.touched_src_sha1 = None
Tao Bao293fd132016-06-11 12:19:23 -0700293 self.disable_imgdiff = disable_imgdiff
Doug Zongker62338182014-09-08 08:29:55 -0700294
Tao Bao8fad03e2017-03-01 14:36:26 -0800295 assert version in (3, 4)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700296
297 self.tgt = tgt
298 if src is None:
299 src = EmptyImage()
300 self.src = src
301
302 # The updater code that installs the patch always uses 4k blocks.
303 assert tgt.blocksize == 4096
304 assert src.blocksize == 4096
305
306 # The range sets in each filemap should comprise a partition of
307 # the care map.
308 self.AssertPartition(src.care_map, src.file_map.values())
309 self.AssertPartition(tgt.care_map, tgt.file_map.values())
310
Tao Baob4cfca52016-02-04 14:26:02 -0800311 @property
312 def max_stashed_size(self):
313 return self._max_stashed_size
314
Doug Zongkerfc44a512014-08-26 13:10:25 -0700315 def Compute(self, prefix):
316 # When looking for a source file to use as the diff input for a
317 # target file, we try:
318 # 1) an exact path match if available, otherwise
319 # 2) a exact basename match if available, otherwise
320 # 3) a basename match after all runs of digits are replaced by
321 # "#" if available, otherwise
322 # 4) we have no source for this target.
323 self.AbbreviateSourceNames()
324 self.FindTransfers()
325
326 # Find the ordering dependencies among transfers (this is O(n^2)
327 # in the number of transfers).
328 self.GenerateDigraph()
329 # Find a sequence of transfers that satisfies as many ordering
330 # dependencies as possible (heuristically).
331 self.FindVertexSequence()
332 # Fix up the ordering dependencies that the sequence didn't
333 # satisfy.
Tao Bao8fad03e2017-03-01 14:36:26 -0800334 self.ReverseBackwardEdges()
335 self.ImproveVertexSequence()
Doug Zongker62338182014-09-08 08:29:55 -0700336
Tao Bao82c47982015-08-17 09:45:13 -0700337 # Ensure the runtime stash size is under the limit.
Tao Bao8fad03e2017-03-01 14:36:26 -0800338 if common.OPTIONS.cache_size is not None:
Tao Bao82c47982015-08-17 09:45:13 -0700339 self.ReviseStashSize()
340
Doug Zongkerfc44a512014-08-26 13:10:25 -0700341 # Double-check our work.
342 self.AssertSequenceGood()
343
344 self.ComputePatches(prefix)
345 self.WriteTransfers(prefix)
346
347 def WriteTransfers(self, prefix):
Tianjie Xu37e29302016-06-23 16:10:35 -0700348 def WriteSplitTransfers(out, style, target_blocks):
349 """Limit the size of operand in command 'new' and 'zero' to 1024 blocks.
Tianjie Xubb848c52016-06-21 15:54:09 -0700350
351 This prevents the target size of one command from being too large; and
352 might help to avoid fsync errors on some devices."""
353
Tao Bao3a2e3502016-12-28 09:14:39 -0800354 assert style == "new" or style == "zero"
Tianjie Xu37e29302016-06-23 16:10:35 -0700355 blocks_limit = 1024
Tianjie Xubb848c52016-06-21 15:54:09 -0700356 total = 0
Tianjie Xu37e29302016-06-23 16:10:35 -0700357 while target_blocks:
358 blocks_to_write = target_blocks.first(blocks_limit)
359 out.append("%s %s\n" % (style, blocks_to_write.to_string_raw()))
360 total += blocks_to_write.size()
361 target_blocks = target_blocks.subtract(blocks_to_write)
Tianjie Xubb848c52016-06-21 15:54:09 -0700362 return total
363
Doug Zongkerfc44a512014-08-26 13:10:25 -0700364 out = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700365 total = 0
Doug Zongkerfc44a512014-08-26 13:10:25 -0700366
Tao Bao3a2e3502016-12-28 09:14:39 -0800367 # In BBOTA v3+, it uses the hash of the stashed blocks as the stash slot
368 # id. 'stashes' records the map from 'hash' to the ref count. The stash
369 # will be freed only if the count decrements to zero.
Doug Zongker62338182014-09-08 08:29:55 -0700370 stashes = {}
371 stashed_blocks = 0
372 max_stashed_blocks = 0
373
Doug Zongkerfc44a512014-08-26 13:10:25 -0700374 for xf in self.transfers:
375
Tao Bao8fad03e2017-03-01 14:36:26 -0800376 for _, sr in xf.stash_before:
377 sh = self.src.RangeSha1(sr)
378 if sh in stashes:
379 stashes[sh] += 1
Sami Tolvanendd67a292014-12-09 16:40:34 +0000380 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800381 stashes[sh] = 1
382 stashed_blocks += sr.size()
383 self.touched_src_ranges = self.touched_src_ranges.union(sr)
384 out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
Doug Zongker62338182014-09-08 08:29:55 -0700385
386 if stashed_blocks > max_stashed_blocks:
387 max_stashed_blocks = stashed_blocks
388
Jesse Zhao7b985f62015-03-02 16:53:08 -0800389 free_string = []
caozhiyuan21b37d82015-10-21 15:14:03 +0800390 free_size = 0
Jesse Zhao7b985f62015-03-02 16:53:08 -0800391
Tao Bao8fad03e2017-03-01 14:36:26 -0800392 # <# blocks> <src ranges>
393 # OR
394 # <# blocks> <src ranges> <src locs> <stash refs...>
395 # OR
396 # <# blocks> - <stash refs...>
Doug Zongker62338182014-09-08 08:29:55 -0700397
Tao Bao8fad03e2017-03-01 14:36:26 -0800398 size = xf.src_ranges.size()
399 src_str = [str(size)]
Doug Zongker62338182014-09-08 08:29:55 -0700400
Tao Bao8fad03e2017-03-01 14:36:26 -0800401 unstashed_src_ranges = xf.src_ranges
402 mapped_stashes = []
403 for _, sr in xf.use_stash:
404 unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
405 sh = self.src.RangeSha1(sr)
406 sr = xf.src_ranges.map_within(sr)
407 mapped_stashes.append(sr)
408 assert sh in stashes
409 src_str.append("%s:%s" % (sh, sr.to_string_raw()))
410 stashes[sh] -= 1
411 if stashes[sh] == 0:
412 free_string.append("free %s\n" % (sh,))
413 free_size += sr.size()
414 stashes.pop(sh)
Doug Zongker62338182014-09-08 08:29:55 -0700415
Tao Bao8fad03e2017-03-01 14:36:26 -0800416 if unstashed_src_ranges:
417 src_str.insert(1, unstashed_src_ranges.to_string_raw())
418 if xf.use_stash:
419 mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
420 src_str.insert(2, mapped_unstashed.to_string_raw())
421 mapped_stashes.append(mapped_unstashed)
Doug Zongker62338182014-09-08 08:29:55 -0700422 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Tao Bao8fad03e2017-03-01 14:36:26 -0800423 else:
424 src_str.insert(1, "-")
425 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
Doug Zongker62338182014-09-08 08:29:55 -0700426
Tao Bao8fad03e2017-03-01 14:36:26 -0800427 src_str = " ".join(src_str)
Doug Zongker62338182014-09-08 08:29:55 -0700428
Tao Bao8fad03e2017-03-01 14:36:26 -0800429 # version 3+:
Doug Zongker62338182014-09-08 08:29:55 -0700430 # zero <rangeset>
431 # new <rangeset>
432 # erase <rangeset>
Dan Albert8b72aef2015-03-23 19:13:21 -0700433 # bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
434 # imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
435 # move hash <tgt rangeset> <src_str>
Doug Zongkerfc44a512014-08-26 13:10:25 -0700436
437 tgt_size = xf.tgt_ranges.size()
438
439 if xf.style == "new":
440 assert xf.tgt_ranges
Tianjie Xu37e29302016-06-23 16:10:35 -0700441 assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700442 total += tgt_size
443 elif xf.style == "move":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700444 assert xf.tgt_ranges
445 assert xf.src_ranges.size() == tgt_size
446 if xf.src_ranges != xf.tgt_ranges:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100447 # take into account automatic stashing of overlapping blocks
448 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700449 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100450 if temp_stash_usage > max_stashed_blocks:
451 max_stashed_blocks = temp_stash_usage
452
Tao Baod522bdc2016-04-12 15:53:16 -0700453 self.touched_src_ranges = self.touched_src_ranges.union(
454 xf.src_ranges)
455
Tao Bao8fad03e2017-03-01 14:36:26 -0800456 out.append("%s %s %s %s\n" % (
Sami Tolvanendd67a292014-12-09 16:40:34 +0000457 xf.style,
Tao Bao183e56e2017-03-05 17:05:09 -0800458 xf.tgt_sha1,
Dan Albert8b72aef2015-03-23 19:13:21 -0700459 xf.tgt_ranges.to_string_raw(), src_str))
Tao Bao8fad03e2017-03-01 14:36:26 -0800460 total += tgt_size
461 elif xf.style in ("bsdiff", "imgdiff"):
462 assert xf.tgt_ranges
463 assert xf.src_ranges
464 # take into account automatic stashing of overlapping blocks
465 if xf.src_ranges.overlaps(xf.tgt_ranges):
466 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
467 if temp_stash_usage > max_stashed_blocks:
468 max_stashed_blocks = temp_stash_usage
469
470 self.touched_src_ranges = self.touched_src_ranges.union(xf.src_ranges)
471
472 out.append("%s %d %d %s %s %s %s\n" % (
473 xf.style,
474 xf.patch_start, xf.patch_len,
475 xf.src_sha1,
476 xf.tgt_sha1,
477 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700478 total += tgt_size
479 elif xf.style == "zero":
480 assert xf.tgt_ranges
481 to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
Tianjie Xu37e29302016-06-23 16:10:35 -0700482 assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size()
Tianjie Xubb848c52016-06-21 15:54:09 -0700483 total += to_zero.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700484 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700485 raise ValueError("unknown transfer style '%s'\n" % xf.style)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700486
Sami Tolvanendd67a292014-12-09 16:40:34 +0000487 if free_string:
488 out.append("".join(free_string))
caozhiyuan21b37d82015-10-21 15:14:03 +0800489 stashed_blocks -= free_size
Sami Tolvanendd67a292014-12-09 16:40:34 +0000490
Tao Bao8fad03e2017-03-01 14:36:26 -0800491 if common.OPTIONS.cache_size is not None:
Tao Bao8dcf7382015-05-21 14:09:49 -0700492 # Sanity check: abort if we're going to need more stash space than
493 # the allowed size (cache_size * threshold). There are two purposes
494 # of having a threshold here. a) Part of the cache may have been
495 # occupied by some recovery logs. b) It will buy us some time to deal
496 # with the oversize issue.
497 cache_size = common.OPTIONS.cache_size
498 stash_threshold = common.OPTIONS.stash_threshold
499 max_allowed = cache_size * stash_threshold
Tao Baoe8c68a02017-02-26 10:48:11 -0800500 assert max_stashed_blocks * self.tgt.blocksize <= max_allowed, \
Tao Bao8dcf7382015-05-21 14:09:49 -0700501 'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
502 max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
503 self.tgt.blocksize, max_allowed, cache_size,
504 stash_threshold)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700505
Tao Bao8fad03e2017-03-01 14:36:26 -0800506 self.touched_src_sha1 = self.src.RangeSha1(self.touched_src_ranges)
Tao Baod522bdc2016-04-12 15:53:16 -0700507
Tao Baoe9b61912015-07-09 17:37:49 -0700508 # Zero out extended blocks as a workaround for bug 20881595.
509 if self.tgt.extended:
Tianjie Xu37e29302016-06-23 16:10:35 -0700510 assert (WriteSplitTransfers(out, "zero", self.tgt.extended) ==
Tianjie Xubb848c52016-06-21 15:54:09 -0700511 self.tgt.extended.size())
Tao Baob32d56e2015-09-09 11:55:01 -0700512 total += self.tgt.extended.size()
Tao Baoe9b61912015-07-09 17:37:49 -0700513
514 # We erase all the blocks on the partition that a) don't contain useful
Tao Bao66f1fa62016-05-03 10:02:01 -0700515 # data in the new image; b) will not be touched by dm-verity. Out of those
516 # blocks, we erase the ones that won't be used in this update at the
517 # beginning of an update. The rest would be erased at the end. This is to
518 # work around the eMMC issue observed on some devices, which may otherwise
519 # get starving for clean blocks and thus fail the update. (b/28347095)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700520 all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
Tao Baoe9b61912015-07-09 17:37:49 -0700521 all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
522 new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
Tao Bao66f1fa62016-05-03 10:02:01 -0700523
524 erase_first = new_dontcare.subtract(self.touched_src_ranges)
525 if erase_first:
526 out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))
527
528 erase_last = new_dontcare.subtract(erase_first)
529 if erase_last:
530 out.append("erase %s\n" % (erase_last.to_string_raw(),))
Doug Zongkere985f6f2014-09-09 12:38:47 -0700531
532 out.insert(0, "%d\n" % (self.version,)) # format version number
Tao Baob32d56e2015-09-09 11:55:01 -0700533 out.insert(1, "%d\n" % (total,))
Tao Bao8fad03e2017-03-01 14:36:26 -0800534 # v3+: the number of stash slots is unused.
535 out.insert(2, "0\n")
536 out.insert(3, str(max_stashed_blocks) + "\n")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700537
538 with open(prefix + ".transfer.list", "wb") as f:
539 for i in out:
540 f.write(i)
541
Tao Bao8fad03e2017-03-01 14:36:26 -0800542 self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize
543 OPTIONS = common.OPTIONS
544 if OPTIONS.cache_size is not None:
545 max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
546 print("max stashed blocks: %d (%d bytes), "
547 "limit: %d bytes (%.2f%%)\n" % (
548 max_stashed_blocks, self._max_stashed_size, max_allowed,
549 self._max_stashed_size * 100.0 / max_allowed))
550 else:
551 print("max stashed blocks: %d (%d bytes), limit: <unknown>\n" % (
552 max_stashed_blocks, self._max_stashed_size))
Doug Zongker62338182014-09-08 08:29:55 -0700553
Tao Bao82c47982015-08-17 09:45:13 -0700554 def ReviseStashSize(self):
555 print("Revising stash size...")
Tao Baoe27acfd2016-12-16 11:13:55 -0800556 stash_map = {}
Tao Bao82c47982015-08-17 09:45:13 -0700557
558 # Create the map between a stash and its def/use points. For example, for a
Tao Bao3c5a16d2017-02-13 11:42:50 -0800559 # given stash of (raw_id, sr), stash_map[raw_id] = (sr, def_cmd, use_cmd).
Tao Bao82c47982015-08-17 09:45:13 -0700560 for xf in self.transfers:
561 # Command xf defines (stores) all the stashes in stash_before.
Tao Bao3a2e3502016-12-28 09:14:39 -0800562 for stash_raw_id, sr in xf.stash_before:
563 stash_map[stash_raw_id] = (sr, xf)
Tao Bao82c47982015-08-17 09:45:13 -0700564
565 # Record all the stashes command xf uses.
Tao Bao3a2e3502016-12-28 09:14:39 -0800566 for stash_raw_id, _ in xf.use_stash:
567 stash_map[stash_raw_id] += (xf,)
Tao Bao82c47982015-08-17 09:45:13 -0700568
569 # Compute the maximum blocks available for stash based on /cache size and
570 # the threshold.
571 cache_size = common.OPTIONS.cache_size
572 stash_threshold = common.OPTIONS.stash_threshold
573 max_allowed = cache_size * stash_threshold / self.tgt.blocksize
574
Tao Bao3a2e3502016-12-28 09:14:39 -0800575 # See the comments for 'stashes' in WriteTransfers().
Tao Baoe27acfd2016-12-16 11:13:55 -0800576 stashes = {}
Tao Bao82c47982015-08-17 09:45:13 -0700577 stashed_blocks = 0
Tao Bao9a5caf22015-08-25 15:10:10 -0700578 new_blocks = 0
Tao Bao82c47982015-08-17 09:45:13 -0700579
580 # Now go through all the commands. Compute the required stash size on the
581 # fly. If a command requires excess stash than available, it deletes the
582 # stash by replacing the command that uses the stash with a "new" command
583 # instead.
584 for xf in self.transfers:
585 replaced_cmds = []
586
587 # xf.stash_before generates explicit stash commands.
Tao Bao3a2e3502016-12-28 09:14:39 -0800588 for stash_raw_id, sr in xf.stash_before:
Tao Baoe27acfd2016-12-16 11:13:55 -0800589 # Check the post-command stashed_blocks.
590 stashed_blocks_after = stashed_blocks
Tao Bao8fad03e2017-03-01 14:36:26 -0800591 sh = self.src.RangeSha1(sr)
592 if sh not in stashes:
Tao Baoe27acfd2016-12-16 11:13:55 -0800593 stashed_blocks_after += sr.size()
Tao Baoe27acfd2016-12-16 11:13:55 -0800594
595 if stashed_blocks_after > max_allowed:
Tao Bao82c47982015-08-17 09:45:13 -0700596 # We cannot stash this one for a later command. Find out the command
597 # that will use this stash and replace the command with "new".
Tao Bao3a2e3502016-12-28 09:14:39 -0800598 use_cmd = stash_map[stash_raw_id][2]
Tao Bao82c47982015-08-17 09:45:13 -0700599 replaced_cmds.append(use_cmd)
Tao Bao9a5caf22015-08-25 15:10:10 -0700600 print("%10d %9s %s" % (sr.size(), "explicit", use_cmd))
Tao Bao82c47982015-08-17 09:45:13 -0700601 else:
Tao Bao3c5a16d2017-02-13 11:42:50 -0800602 # Update the stashes map.
Tao Bao8fad03e2017-03-01 14:36:26 -0800603 if sh in stashes:
604 stashes[sh] += 1
Tao Bao3c5a16d2017-02-13 11:42:50 -0800605 else:
Tao Bao8fad03e2017-03-01 14:36:26 -0800606 stashes[sh] = 1
Tao Baoe27acfd2016-12-16 11:13:55 -0800607 stashed_blocks = stashed_blocks_after
Tao Bao82c47982015-08-17 09:45:13 -0700608
609 # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
610 # ComputePatches(), they both have the style of "diff".
Tao Bao8fad03e2017-03-01 14:36:26 -0800611 if xf.style == "diff":
Tao Bao82c47982015-08-17 09:45:13 -0700612 assert xf.tgt_ranges and xf.src_ranges
613 if xf.src_ranges.overlaps(xf.tgt_ranges):
614 if stashed_blocks + xf.src_ranges.size() > max_allowed:
615 replaced_cmds.append(xf)
Tao Bao9a5caf22015-08-25 15:10:10 -0700616 print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf))
Tao Bao82c47982015-08-17 09:45:13 -0700617
618 # Replace the commands in replaced_cmds with "new"s.
619 for cmd in replaced_cmds:
620 # It no longer uses any commands in "use_stash". Remove the def points
621 # for all those stashes.
Tao Bao3a2e3502016-12-28 09:14:39 -0800622 for stash_raw_id, sr in cmd.use_stash:
623 def_cmd = stash_map[stash_raw_id][1]
624 assert (stash_raw_id, sr) in def_cmd.stash_before
625 def_cmd.stash_before.remove((stash_raw_id, sr))
Tao Bao82c47982015-08-17 09:45:13 -0700626
Tianjie Xuebe39a02016-01-14 14:12:26 -0800627 # Add up blocks that violates space limit and print total number to
628 # screen later.
629 new_blocks += cmd.tgt_ranges.size()
Tao Bao82c47982015-08-17 09:45:13 -0700630 cmd.ConvertToNew()
631
Tao Bao3a2e3502016-12-28 09:14:39 -0800632 # xf.use_stash may generate free commands.
Tao Bao8fad03e2017-03-01 14:36:26 -0800633 for _, sr in xf.use_stash:
634 sh = self.src.RangeSha1(sr)
635 assert sh in stashes
636 stashes[sh] -= 1
637 if stashes[sh] == 0:
Tao Baoe27acfd2016-12-16 11:13:55 -0800638 stashed_blocks -= sr.size()
Tao Bao8fad03e2017-03-01 14:36:26 -0800639 stashes.pop(sh)
Tao Baoe27acfd2016-12-16 11:13:55 -0800640
Tianjie Xuebe39a02016-01-14 14:12:26 -0800641 num_of_bytes = new_blocks * self.tgt.blocksize
642 print(" Total %d blocks (%d bytes) are packed as new blocks due to "
643 "insufficient cache size." % (new_blocks, num_of_bytes))
Tao Bao304ee272016-12-19 11:01:38 -0800644 return new_blocks
Tao Bao9a5caf22015-08-25 15:10:10 -0700645
Doug Zongkerfc44a512014-08-26 13:10:25 -0700646 def ComputePatches(self, prefix):
647 print("Reticulating splines...")
Tao Bao183e56e2017-03-05 17:05:09 -0800648 diff_queue = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700649 patch_num = 0
650 with open(prefix + ".new.dat", "wb") as new_f:
Tao Bao183e56e2017-03-05 17:05:09 -0800651 for index, xf in enumerate(self.transfers):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700652 if xf.style == "zero":
Tao Bao08c85832016-09-19 22:26:30 -0700653 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
654 print("%10d %10d (%6.2f%%) %7s %s %s" % (
655 tgt_size, tgt_size, 100.0, xf.style, xf.tgt_name,
656 str(xf.tgt_ranges)))
657
Doug Zongkerfc44a512014-08-26 13:10:25 -0700658 elif xf.style == "new":
Tao Bao183e56e2017-03-05 17:05:09 -0800659 self.tgt.WriteRangeDataToFd(xf.tgt_ranges, new_f)
Tao Bao08c85832016-09-19 22:26:30 -0700660 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
661 print("%10d %10d (%6.2f%%) %7s %s %s" % (
662 tgt_size, tgt_size, 100.0, xf.style,
663 xf.tgt_name, str(xf.tgt_ranges)))
664
Doug Zongkerfc44a512014-08-26 13:10:25 -0700665 elif xf.style == "diff":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700666 # We can't compare src and tgt directly because they may have
667 # the same content but be broken up into blocks differently, eg:
668 #
669 # ["he", "llo"] vs ["h", "ello"]
670 #
671 # We want those to compare equal, ideally without having to
672 # actually concatenate the strings (these may be tens of
673 # megabytes).
Tao Bao183e56e2017-03-05 17:05:09 -0800674 if xf.src_sha1 == xf.tgt_sha1:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700675 # These are identical; we don't need to generate a patch,
676 # just issue copy commands on the device.
677 xf.style = "move"
Tao Bao183e56e2017-03-05 17:05:09 -0800678 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao08c85832016-09-19 22:26:30 -0700679 if xf.src_ranges != xf.tgt_ranges:
680 print("%10d %10d (%6.2f%%) %7s %s %s (from %s)" % (
681 tgt_size, tgt_size, 100.0, xf.style,
682 xf.tgt_name if xf.tgt_name == xf.src_name else (
683 xf.tgt_name + " (from " + xf.src_name + ")"),
684 str(xf.tgt_ranges), str(xf.src_ranges)))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700685 else:
686 # For files in zip format (eg, APKs, JARs, etc.) we would
687 # like to use imgdiff -z if possible (because it usually
688 # produces significantly smaller patches than bsdiff).
689 # This is permissible if:
690 #
Tao Bao293fd132016-06-11 12:19:23 -0700691 # - imgdiff is not disabled, and
Doug Zongkerfc44a512014-08-26 13:10:25 -0700692 # - the source and target files are monotonic (ie, the
693 # data is stored with blocks in increasing order), and
694 # - we haven't removed any blocks from the source set.
695 #
696 # If these conditions are satisfied then appending all the
697 # blocks in the set together in order will produce a valid
698 # zip file (plus possibly extra zeros in the last block),
699 # which is what imgdiff needs to operate. (imgdiff is
700 # fine with extra zeros at the end of the file.)
Tao Bao293fd132016-06-11 12:19:23 -0700701 imgdiff = (not self.disable_imgdiff and xf.intact and
Doug Zongkerfc44a512014-08-26 13:10:25 -0700702 xf.tgt_name.split(".")[-1].lower()
703 in ("apk", "jar", "zip"))
704 xf.style = "imgdiff" if imgdiff else "bsdiff"
Tao Bao183e56e2017-03-05 17:05:09 -0800705 diff_queue.append((index, imgdiff, patch_num))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700706 patch_num += 1
707
708 else:
709 assert False, "unknown style " + xf.style
710
Tao Bao183e56e2017-03-05 17:05:09 -0800711 if diff_queue:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700712 if self.threads > 1:
713 print("Computing patches (using %d threads)..." % (self.threads,))
714 else:
715 print("Computing patches...")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700716
Tao Bao183e56e2017-03-05 17:05:09 -0800717 diff_total = len(diff_queue)
718 patches = [None] * diff_total
Tianjie Xub59c17f2016-10-28 17:55:53 -0700719 error_messages = []
Tao Baob937ead2017-10-19 16:51:53 -0700720 warning_messages = []
Tao Bao33635b12017-03-12 13:02:51 -0700721 if sys.stdout.isatty():
722 global diff_done
723 diff_done = 0
Doug Zongkerfc44a512014-08-26 13:10:25 -0700724
Tao Bao183e56e2017-03-05 17:05:09 -0800725 # Using multiprocessing doesn't give additional benefits, due to the
726 # pattern of the code. The diffing work is done by subprocess.call, which
727 # already runs in a separate process (not affected much by the GIL -
728 # Global Interpreter Lock). Using multiprocess also requires either a)
729 # writing the diff input files in the main process before forking, or b)
730 # reopening the image file (SparseImage) in the worker processes. Doing
731 # neither of them further improves the performance.
Doug Zongkerfc44a512014-08-26 13:10:25 -0700732 lock = threading.Lock()
733 def diff_worker():
734 while True:
735 with lock:
Tao Bao183e56e2017-03-05 17:05:09 -0800736 if not diff_queue:
Dan Albert8b72aef2015-03-23 19:13:21 -0700737 return
Tao Bao183e56e2017-03-05 17:05:09 -0800738 xf_index, imgdiff, patch_index = diff_queue.pop()
739
740 xf = self.transfers[xf_index]
741 src_ranges = xf.src_ranges
742 tgt_ranges = xf.tgt_ranges
743
744 # Needs lock since WriteRangeDataToFd() is stateful (calling seek).
Doug Zongkerfc44a512014-08-26 13:10:25 -0700745 with lock:
Tao Bao183e56e2017-03-05 17:05:09 -0800746 src_file = common.MakeTempFile(prefix="src-")
747 with open(src_file, "wb") as fd:
748 self.src.WriteRangeDataToFd(src_ranges, fd)
749
750 tgt_file = common.MakeTempFile(prefix="tgt-")
751 with open(tgt_file, "wb") as fd:
752 self.tgt.WriteRangeDataToFd(tgt_ranges, fd)
753
Tao Baob937ead2017-10-19 16:51:53 -0700754 message = []
Tao Bao183e56e2017-03-05 17:05:09 -0800755 try:
756 patch = compute_patch(src_file, tgt_file, imgdiff)
757 except ValueError as e:
Tao Baob937ead2017-10-19 16:51:53 -0700758 message.append(
759 "Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (
760 "imgdiff" if imgdiff else "bsdiff",
761 xf.tgt_name if xf.tgt_name == xf.src_name else
762 xf.tgt_name + " (from " + xf.src_name + ")",
763 xf.tgt_ranges, xf.src_ranges, e.message))
764 # TODO(b/68016761): Better handle the holes in mke2fs created images.
765 if imgdiff:
766 try:
767 patch = compute_patch(src_file, tgt_file, imgdiff=False)
768 message.append(
769 "Fell back and generated with bsdiff instead for %s" % (
770 xf.tgt_name,))
771 with lock:
772 warning_messages.extend(message)
773 del message[:]
774 except ValueError as e:
775 message.append(
776 "Also failed to generate with bsdiff for %s:\n%s" % (
777 xf.tgt_name, e.message))
778
779 if message:
Tianjie Xub59c17f2016-10-28 17:55:53 -0700780 with lock:
Tao Baob937ead2017-10-19 16:51:53 -0700781 error_messages.extend(message)
Tao Bao183e56e2017-03-05 17:05:09 -0800782
783 with lock:
784 patches[patch_index] = (xf_index, patch)
785 if sys.stdout.isatty():
Tao Bao33635b12017-03-12 13:02:51 -0700786 global diff_done
787 diff_done += 1
788 progress = diff_done * 100 / diff_total
Tao Bao183e56e2017-03-05 17:05:09 -0800789 # '\033[K' is to clear to EOL.
790 print(' [%d%%] %s\033[K' % (progress, xf.tgt_name), end='\r')
791 sys.stdout.flush()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700792
793 threads = [threading.Thread(target=diff_worker)
Dan Albert8b72aef2015-03-23 19:13:21 -0700794 for _ in range(self.threads)]
Doug Zongkerfc44a512014-08-26 13:10:25 -0700795 for th in threads:
796 th.start()
797 while threads:
798 threads.pop().join()
Tao Bao183e56e2017-03-05 17:05:09 -0800799
800 if sys.stdout.isatty():
801 print('\n')
Tianjie Xub59c17f2016-10-28 17:55:53 -0700802
Tao Baob937ead2017-10-19 16:51:53 -0700803 if warning_messages:
804 print('WARNING:')
805 print('\n'.join(warning_messages))
806 print('\n\n\n')
807
Tianjie Xub59c17f2016-10-28 17:55:53 -0700808 if error_messages:
Tao Baob937ead2017-10-19 16:51:53 -0700809 print('ERROR:')
Tianjie Xub59c17f2016-10-28 17:55:53 -0700810 print('\n'.join(error_messages))
Tao Baob937ead2017-10-19 16:51:53 -0700811 print('\n\n\n')
Tianjie Xub59c17f2016-10-28 17:55:53 -0700812 sys.exit(1)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700813 else:
814 patches = []
815
Tao Bao183e56e2017-03-05 17:05:09 -0800816 offset = 0
817 with open(prefix + ".patch.dat", "wb") as patch_fd:
818 for index, patch in patches:
819 xf = self.transfers[index]
Doug Zongkerfc44a512014-08-26 13:10:25 -0700820 xf.patch_len = len(patch)
Tao Bao183e56e2017-03-05 17:05:09 -0800821 xf.patch_start = offset
822 offset += xf.patch_len
823 patch_fd.write(patch)
824
825 if common.OPTIONS.verbose:
826 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
827 print("%10d %10d (%6.2f%%) %7s %s %s %s" % (
828 xf.patch_len, tgt_size, xf.patch_len * 100.0 / tgt_size,
829 xf.style,
830 xf.tgt_name if xf.tgt_name == xf.src_name else (
831 xf.tgt_name + " (from " + xf.src_name + ")"),
832 xf.tgt_ranges, xf.src_ranges))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700833
834 def AssertSequenceGood(self):
835 # Simulate the sequences of transfers we will output, and check that:
836 # - we never read a block after writing it, and
837 # - we write every block we care about exactly once.
838
839 # Start with no blocks having been touched yet.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800840 touched = array.array("B", "\0" * self.tgt.total_blocks)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700841
842 # Imagine processing the transfers in order.
843 for xf in self.transfers:
844 # Check that the input blocks for this transfer haven't yet been touched.
Doug Zongker62338182014-09-08 08:29:55 -0700845
846 x = xf.src_ranges
Tao Bao8fad03e2017-03-01 14:36:26 -0800847 for _, sr in xf.use_stash:
848 x = x.subtract(sr)
Doug Zongker62338182014-09-08 08:29:55 -0700849
Doug Zongker6ab2a502016-02-09 08:28:09 -0800850 for s, e in x:
Tao Baoff75c232016-03-04 15:23:34 -0800851 # Source image could be larger. Don't check the blocks that are in the
852 # source image only. Since they are not in 'touched', and won't ever
853 # be touched.
854 for i in range(s, min(e, self.tgt.total_blocks)):
Doug Zongker6ab2a502016-02-09 08:28:09 -0800855 assert touched[i] == 0
856
857 # Check that the output blocks for this transfer haven't yet
858 # been touched, and touch all the blocks written by this
859 # transfer.
860 for s, e in xf.tgt_ranges:
861 for i in range(s, e):
862 assert touched[i] == 0
863 touched[i] = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700864
865 # Check that we've written every target block.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800866 for s, e in self.tgt.care_map:
867 for i in range(s, e):
868 assert touched[i] == 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700869
Doug Zongker62338182014-09-08 08:29:55 -0700870 def ImproveVertexSequence(self):
871 print("Improving vertex order...")
872
873 # At this point our digraph is acyclic; we reversed any edges that
874 # were backwards in the heuristically-generated sequence. The
875 # previously-generated order is still acceptable, but we hope to
876 # find a better order that needs less memory for stashed data.
877 # Now we do a topological sort to generate a new vertex order,
878 # using a greedy algorithm to choose which vertex goes next
879 # whenever we have a choice.
880
881 # Make a copy of the edge set; this copy will get destroyed by the
882 # algorithm.
883 for xf in self.transfers:
884 xf.incoming = xf.goes_after.copy()
885 xf.outgoing = xf.goes_before.copy()
886
887 L = [] # the new vertex order
888
889 # S is the set of sources in the remaining graph; we always choose
890 # the one that leaves the least amount of stashed data after it's
891 # executed.
892 S = [(u.NetStashChange(), u.order, u) for u in self.transfers
893 if not u.incoming]
894 heapq.heapify(S)
895
896 while S:
897 _, _, xf = heapq.heappop(S)
898 L.append(xf)
899 for u in xf.outgoing:
900 del u.incoming[xf]
901 if not u.incoming:
902 heapq.heappush(S, (u.NetStashChange(), u.order, u))
903
904 # if this fails then our graph had a cycle.
905 assert len(L) == len(self.transfers)
906
907 self.transfers = L
908 for i, xf in enumerate(L):
909 xf.order = i
910
Doug Zongkerfc44a512014-08-26 13:10:25 -0700911 def RemoveBackwardEdges(self):
912 print("Removing backward edges...")
913 in_order = 0
914 out_of_order = 0
915 lost_source = 0
916
917 for xf in self.transfers:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700918 lost = 0
919 size = xf.src_ranges.size()
920 for u in xf.goes_before:
921 # xf should go before u
922 if xf.order < u.order:
923 # it does, hurray!
Doug Zongker62338182014-09-08 08:29:55 -0700924 in_order += 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700925 else:
926 # it doesn't, boo. trim the blocks that u writes from xf's
927 # source, so that xf can go after u.
Doug Zongker62338182014-09-08 08:29:55 -0700928 out_of_order += 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700929 assert xf.src_ranges.overlaps(u.tgt_ranges)
930 xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges)
931 xf.intact = False
932
933 if xf.style == "diff" and not xf.src_ranges:
934 # nothing left to diff from; treat as new data
935 xf.style = "new"
936
937 lost = size - xf.src_ranges.size()
938 lost_source += lost
Doug Zongkerfc44a512014-08-26 13:10:25 -0700939
940 print((" %d/%d dependencies (%.2f%%) were violated; "
941 "%d source blocks removed.") %
942 (out_of_order, in_order + out_of_order,
943 (out_of_order * 100.0 / (in_order + out_of_order))
944 if (in_order + out_of_order) else 0.0,
945 lost_source))
946
Doug Zongker62338182014-09-08 08:29:55 -0700947 def ReverseBackwardEdges(self):
Tao Bao3a2e3502016-12-28 09:14:39 -0800948 """Reverse unsatisfying edges and compute pairs of stashed blocks.
949
950 For each transfer, make sure it properly stashes the blocks it touches and
951 will be used by later transfers. It uses pairs of (stash_raw_id, range) to
952 record the blocks to be stashed. 'stash_raw_id' is an id that uniquely
953 identifies each pair. Note that for the same range (e.g. RangeSet("1-5")),
954 it is possible to have multiple pairs with different 'stash_raw_id's. Each
955 'stash_raw_id' will be consumed by one transfer. In BBOTA v3+, identical
956 blocks will be written to the same stash slot in WriteTransfers().
957 """
958
Doug Zongker62338182014-09-08 08:29:55 -0700959 print("Reversing backward edges...")
960 in_order = 0
961 out_of_order = 0
Tao Bao3a2e3502016-12-28 09:14:39 -0800962 stash_raw_id = 0
Doug Zongker62338182014-09-08 08:29:55 -0700963 stash_size = 0
964
965 for xf in self.transfers:
Doug Zongker62338182014-09-08 08:29:55 -0700966 for u in xf.goes_before.copy():
967 # xf should go before u
968 if xf.order < u.order:
969 # it does, hurray!
970 in_order += 1
971 else:
972 # it doesn't, boo. modify u to stash the blocks that it
973 # writes that xf wants to read, and then require u to go
974 # before xf.
975 out_of_order += 1
976
977 overlap = xf.src_ranges.intersect(u.tgt_ranges)
978 assert overlap
979
Tao Bao3a2e3502016-12-28 09:14:39 -0800980 u.stash_before.append((stash_raw_id, overlap))
981 xf.use_stash.append((stash_raw_id, overlap))
982 stash_raw_id += 1
Doug Zongker62338182014-09-08 08:29:55 -0700983 stash_size += overlap.size()
984
985 # reverse the edge direction; now xf must go after u
986 del xf.goes_before[u]
987 del u.goes_after[xf]
988 xf.goes_after[u] = None # value doesn't matter
989 u.goes_before[xf] = None
990
991 print((" %d/%d dependencies (%.2f%%) were violated; "
992 "%d source blocks stashed.") %
993 (out_of_order, in_order + out_of_order,
994 (out_of_order * 100.0 / (in_order + out_of_order))
995 if (in_order + out_of_order) else 0.0,
996 stash_size))
997
Doug Zongkerfc44a512014-08-26 13:10:25 -0700998 def FindVertexSequence(self):
999 print("Finding vertex sequence...")
1000
1001 # This is based on "A Fast & Effective Heuristic for the Feedback
1002 # Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
1003 # it as starting with the digraph G and moving all the vertices to
1004 # be on a horizontal line in some order, trying to minimize the
1005 # number of edges that end up pointing to the left. Left-pointing
1006 # edges will get removed to turn the digraph into a DAG. In this
1007 # case each edge has a weight which is the number of source blocks
1008 # we'll lose if that edge is removed; we try to minimize the total
1009 # weight rather than just the number of edges.
1010
1011 # Make a copy of the edge set; this copy will get destroyed by the
1012 # algorithm.
1013 for xf in self.transfers:
1014 xf.incoming = xf.goes_after.copy()
1015 xf.outgoing = xf.goes_before.copy()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001016 xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())
Doug Zongkerfc44a512014-08-26 13:10:25 -07001017
1018 # We use an OrderedDict instead of just a set so that the output
1019 # is repeatable; otherwise it would depend on the hash values of
1020 # the transfer objects.
1021 G = OrderedDict()
1022 for xf in self.transfers:
1023 G[xf] = None
1024 s1 = deque() # the left side of the sequence, built from left to right
1025 s2 = deque() # the right side of the sequence, built from right to left
1026
Doug Zongker6ab2a502016-02-09 08:28:09 -08001027 heap = []
1028 for xf in self.transfers:
1029 xf.heap_item = HeapItem(xf)
1030 heap.append(xf.heap_item)
1031 heapq.heapify(heap)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001032
Tao Bao33482282016-10-24 16:49:08 -07001033 # Use OrderedDict() instead of set() to preserve the insertion order. Need
1034 # to use 'sinks[key] = None' to add key into the set. sinks will look like
1035 # { key1: None, key2: None, ... }.
1036 sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing)
1037 sources = OrderedDict.fromkeys(u for u in G if not u.incoming)
Doug Zongker6ab2a502016-02-09 08:28:09 -08001038
1039 def adjust_score(iu, delta):
1040 iu.score += delta
1041 iu.heap_item.clear()
1042 iu.heap_item = HeapItem(iu)
1043 heapq.heappush(heap, iu.heap_item)
1044
1045 while G:
Doug Zongkerfc44a512014-08-26 13:10:25 -07001046 # Put all sinks at the end of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001047 while sinks:
Tao Bao33482282016-10-24 16:49:08 -07001048 new_sinks = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001049 for u in sinks:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001050 if u not in G: continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001051 s2.appendleft(u)
1052 del G[u]
1053 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001054 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001055 if not iu.outgoing:
1056 new_sinks[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001057 sinks = new_sinks
Doug Zongkerfc44a512014-08-26 13:10:25 -07001058
1059 # Put all the sources at the beginning of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001060 while sources:
Tao Bao33482282016-10-24 16:49:08 -07001061 new_sources = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001062 for u in sources:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001063 if u not in G: continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001064 s1.append(u)
1065 del G[u]
1066 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001067 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001068 if not iu.incoming:
1069 new_sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001070 sources = new_sources
Doug Zongkerfc44a512014-08-26 13:10:25 -07001071
Doug Zongker6ab2a502016-02-09 08:28:09 -08001072 if not G: break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001073
1074 # Find the "best" vertex to put next. "Best" is the one that
1075 # maximizes the net difference in source blocks saved we get by
1076 # pretending it's a source rather than a sink.
1077
Doug Zongker6ab2a502016-02-09 08:28:09 -08001078 while True:
1079 u = heapq.heappop(heap)
1080 if u and u.item in G:
1081 u = u.item
1082 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001083
Doug Zongkerfc44a512014-08-26 13:10:25 -07001084 s1.append(u)
1085 del G[u]
1086 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001087 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001088 if not iu.incoming:
1089 sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001090
Doug Zongkerfc44a512014-08-26 13:10:25 -07001091 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001092 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001093 if not iu.outgoing:
1094 sinks[iu] = None
Doug Zongkerfc44a512014-08-26 13:10:25 -07001095
1096 # Now record the sequence in the 'order' field of each transfer,
1097 # and by rearranging self.transfers to be in the chosen sequence.
1098
1099 new_transfers = []
1100 for x in itertools.chain(s1, s2):
1101 x.order = len(new_transfers)
1102 new_transfers.append(x)
1103 del x.incoming
1104 del x.outgoing
1105
1106 self.transfers = new_transfers
1107
1108 def GenerateDigraph(self):
1109 print("Generating digraph...")
Doug Zongker6ab2a502016-02-09 08:28:09 -08001110
1111 # Each item of source_ranges will be:
1112 # - None, if that block is not used as a source,
Tao Bao33482282016-10-24 16:49:08 -07001113 # - an ordered set of transfers.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001114 source_ranges = []
1115 for b in self.transfers:
1116 for s, e in b.src_ranges:
1117 if e > len(source_ranges):
1118 source_ranges.extend([None] * (e-len(source_ranges)))
1119 for i in range(s, e):
1120 if source_ranges[i] is None:
Tao Bao33482282016-10-24 16:49:08 -07001121 source_ranges[i] = OrderedDict.fromkeys([b])
Doug Zongker6ab2a502016-02-09 08:28:09 -08001122 else:
Tao Bao33482282016-10-24 16:49:08 -07001123 source_ranges[i][b] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001124
Doug Zongkerfc44a512014-08-26 13:10:25 -07001125 for a in self.transfers:
Tao Bao33482282016-10-24 16:49:08 -07001126 intersections = OrderedDict()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001127 for s, e in a.tgt_ranges:
1128 for i in range(s, e):
1129 if i >= len(source_ranges): break
Tao Bao33482282016-10-24 16:49:08 -07001130 # Add all the Transfers in source_ranges[i] to the (ordered) set.
1131 if source_ranges[i] is not None:
1132 for j in source_ranges[i]:
1133 intersections[j] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001134
1135 for b in intersections:
1136 if a is b: continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001137
1138 # If the blocks written by A are read by B, then B needs to go before A.
1139 i = a.tgt_ranges.intersect(b.src_ranges)
1140 if i:
Doug Zongkerab7ca1d2014-08-26 10:40:28 -07001141 if b.src_name == "__ZERO":
1142 # the cost of removing source blocks for the __ZERO domain
1143 # is (nearly) zero.
1144 size = 0
1145 else:
1146 size = i.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001147 b.goes_before[a] = size
1148 a.goes_after[b] = size
1149
1150 def FindTransfers(self):
Tao Bao9a5caf22015-08-25 15:10:10 -07001151 """Parse the file_map to generate all the transfers."""
1152
Tao Bao08c85832016-09-19 22:26:30 -07001153 def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges,
1154 style, by_id):
1155 """Add one or multiple Transfer()s by splitting large files.
Tao Bao9a5caf22015-08-25 15:10:10 -07001156
1157 For BBOTA v3, we need to stash source blocks for resumable feature.
1158 However, with the growth of file size and the shrink of the cache
1159 partition source blocks are too large to be stashed. If a file occupies
Tao Bao08c85832016-09-19 22:26:30 -07001160 too many blocks, we split it into smaller pieces by getting multiple
1161 Transfer()s.
Tao Bao9a5caf22015-08-25 15:10:10 -07001162
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001163 The downside is that after splitting, we may increase the package size
1164 since the split pieces don't align well. According to our experiments,
1165 1/8 of the cache size as the per-piece limit appears to be optimal.
1166 Compared to the fixed 1024-block limit, it reduces the overall package
Tao Bao08c85832016-09-19 22:26:30 -07001167 size by 30% for volantis, and 20% for angler and bullhead."""
Tao Bao9a5caf22015-08-25 15:10:10 -07001168
Tao Bao08c85832016-09-19 22:26:30 -07001169 # Possibly split large files into smaller chunks.
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001170 pieces = 0
1171 cache_size = common.OPTIONS.cache_size
1172 split_threshold = 0.125
1173 max_blocks_per_transfer = int(cache_size * split_threshold /
1174 self.tgt.blocksize)
1175
Tao Bao9a5caf22015-08-25 15:10:10 -07001176 # Change nothing for small files.
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001177 if (tgt_ranges.size() <= max_blocks_per_transfer and
1178 src_ranges.size() <= max_blocks_per_transfer):
Tao Bao183e56e2017-03-05 17:05:09 -08001179 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1180 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1181 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001182 return
1183
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001184 while (tgt_ranges.size() > max_blocks_per_transfer and
1185 src_ranges.size() > max_blocks_per_transfer):
Tao Bao9a5caf22015-08-25 15:10:10 -07001186 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1187 src_split_name = "%s-%d" % (src_name, pieces)
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001188 tgt_first = tgt_ranges.first(max_blocks_per_transfer)
1189 src_first = src_ranges.first(max_blocks_per_transfer)
1190
Tao Bao183e56e2017-03-05 17:05:09 -08001191 Transfer(tgt_split_name, src_split_name, tgt_first, src_first,
1192 self.tgt.RangeSha1(tgt_first), self.src.RangeSha1(src_first),
1193 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001194
1195 tgt_ranges = tgt_ranges.subtract(tgt_first)
1196 src_ranges = src_ranges.subtract(src_first)
1197 pieces += 1
1198
1199 # Handle remaining blocks.
1200 if tgt_ranges.size() or src_ranges.size():
1201 # Must be both non-empty.
1202 assert tgt_ranges.size() and src_ranges.size()
1203 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1204 src_split_name = "%s-%d" % (src_name, pieces)
Tao Bao183e56e2017-03-05 17:05:09 -08001205 Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges,
1206 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1207 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001208
Tao Bao08c85832016-09-19 22:26:30 -07001209 def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
1210 split=False):
1211 """Wrapper function for adding a Transfer()."""
1212
1213 # We specialize diff transfers only (which covers bsdiff/imgdiff/move);
1214 # otherwise add the Transfer() as is.
1215 if style != "diff" or not split:
Tao Bao183e56e2017-03-05 17:05:09 -08001216 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1217 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1218 style, by_id)
Tao Bao08c85832016-09-19 22:26:30 -07001219 return
1220
1221 # Handle .odex files specially to analyze the block-wise difference. If
1222 # most of the blocks are identical with only few changes (e.g. header),
1223 # we will patch the changed blocks only. This avoids stashing unchanged
1224 # blocks while patching. We limit the analysis to files without size
1225 # changes only. This is to avoid sacrificing the OTA generation cost too
1226 # much.
1227 if (tgt_name.split(".")[-1].lower() == 'odex' and
1228 tgt_ranges.size() == src_ranges.size()):
1229
1230 # 0.5 threshold can be further tuned. The tradeoff is: if only very
1231 # few blocks remain identical, we lose the opportunity to use imgdiff
1232 # that may have better compression ratio than bsdiff.
1233 crop_threshold = 0.5
1234
1235 tgt_skipped = RangeSet()
1236 src_skipped = RangeSet()
1237 tgt_size = tgt_ranges.size()
1238 tgt_changed = 0
1239 for src_block, tgt_block in zip(src_ranges.next_item(),
1240 tgt_ranges.next_item()):
1241 src_rs = RangeSet(str(src_block))
1242 tgt_rs = RangeSet(str(tgt_block))
1243 if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):
1244 tgt_skipped = tgt_skipped.union(tgt_rs)
1245 src_skipped = src_skipped.union(src_rs)
1246 else:
1247 tgt_changed += tgt_rs.size()
1248
1249 # Terminate early if no clear sign of benefits.
1250 if tgt_changed > tgt_size * crop_threshold:
1251 break
1252
1253 if tgt_changed < tgt_size * crop_threshold:
1254 assert tgt_changed + tgt_skipped.size() == tgt_size
1255 print('%10d %10d (%6.2f%%) %s' % (tgt_skipped.size(), tgt_size,
1256 tgt_skipped.size() * 100.0 / tgt_size, tgt_name))
1257 AddSplitTransfers(
1258 "%s-skipped" % (tgt_name,),
1259 "%s-skipped" % (src_name,),
1260 tgt_skipped, src_skipped, style, by_id)
1261
1262 # Intentionally change the file extension to avoid being imgdiff'd as
1263 # the files are no longer in their original format.
1264 tgt_name = "%s-cropped" % (tgt_name,)
1265 src_name = "%s-cropped" % (src_name,)
1266 tgt_ranges = tgt_ranges.subtract(tgt_skipped)
1267 src_ranges = src_ranges.subtract(src_skipped)
1268
1269 # Possibly having no changed blocks.
1270 if not tgt_ranges:
1271 return
1272
1273 # Add the transfer(s).
1274 AddSplitTransfers(
1275 tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1276
1277 print("Finding transfers...")
1278
Doug Zongkerfc44a512014-08-26 13:10:25 -07001279 empty = RangeSet()
1280 for tgt_fn, tgt_ranges in self.tgt.file_map.items():
1281 if tgt_fn == "__ZERO":
1282 # the special "__ZERO" domain is all the blocks not contained
1283 # in any file and that are filled with zeros. We have a
1284 # special transfer style for zero blocks.
1285 src_ranges = self.src.file_map.get("__ZERO", empty)
Tao Bao9a5caf22015-08-25 15:10:10 -07001286 AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
1287 "zero", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001288 continue
1289
Tao Baoff777812015-05-12 11:42:31 -07001290 elif tgt_fn == "__COPY":
1291 # "__COPY" domain includes all the blocks not contained in any
1292 # file and that need to be copied unconditionally to the target.
Tao Bao9a5caf22015-08-25 15:10:10 -07001293 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Tao Baoff777812015-05-12 11:42:31 -07001294 continue
1295
Doug Zongkerfc44a512014-08-26 13:10:25 -07001296 elif tgt_fn in self.src.file_map:
1297 # Look for an exact pathname match in the source.
Tao Bao9a5caf22015-08-25 15:10:10 -07001298 AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001299 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001300 continue
1301
1302 b = os.path.basename(tgt_fn)
1303 if b in self.src_basenames:
1304 # Look for an exact basename match in the source.
1305 src_fn = self.src_basenames[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001306 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001307 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001308 continue
1309
1310 b = re.sub("[0-9]+", "#", b)
1311 if b in self.src_numpatterns:
1312 # Look for a 'number pattern' match (a basename match after
1313 # all runs of digits are replaced by "#"). (This is useful
1314 # for .so files that contain version numbers in the filename
1315 # that get bumped.)
1316 src_fn = self.src_numpatterns[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001317 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
Tao Bao8fad03e2017-03-01 14:36:26 -08001318 "diff", self.transfers, True)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001319 continue
1320
Tao Bao9a5caf22015-08-25 15:10:10 -07001321 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001322
1323 def AbbreviateSourceNames(self):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001324 for k in self.src.file_map.keys():
1325 b = os.path.basename(k)
1326 self.src_basenames[b] = k
1327 b = re.sub("[0-9]+", "#", b)
1328 self.src_numpatterns[b] = k
1329
1330 @staticmethod
1331 def AssertPartition(total, seq):
1332 """Assert that all the RangeSets in 'seq' form a partition of the
1333 'total' RangeSet (ie, they are nonintersecting and their union
1334 equals 'total')."""
Doug Zongker6ab2a502016-02-09 08:28:09 -08001335
Doug Zongkerfc44a512014-08-26 13:10:25 -07001336 so_far = RangeSet()
1337 for i in seq:
1338 assert not so_far.overlaps(i)
1339 so_far = so_far.union(i)
1340 assert so_far == total