blob: 1225b61a46a04b82bcccf53cd1503677188d4a1b [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015from __future__ import print_function
16
17from collections import deque, OrderedDict
18from hashlib import sha1
Tao Bao8dcf7382015-05-21 14:09:49 -070019import common
Doug Zongker62338182014-09-08 08:29:55 -070020import heapq
Doug Zongkerfc44a512014-08-26 13:10:25 -070021import itertools
22import multiprocessing
23import os
Doug Zongkerfc44a512014-08-26 13:10:25 -070024import re
25import subprocess
Doug Zongkerfc44a512014-08-26 13:10:25 -070026import threading
27import tempfile
28
Dan Albert8b72aef2015-03-23 19:13:21 -070029from rangelib import RangeSet
30
Doug Zongkerfc44a512014-08-26 13:10:25 -070031
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070032__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
33
Dan Albert8b72aef2015-03-23 19:13:21 -070034
Doug Zongkerfc44a512014-08-26 13:10:25 -070035def compute_patch(src, tgt, imgdiff=False):
36 srcfd, srcfile = tempfile.mkstemp(prefix="src-")
37 tgtfd, tgtfile = tempfile.mkstemp(prefix="tgt-")
38 patchfd, patchfile = tempfile.mkstemp(prefix="patch-")
39 os.close(patchfd)
40
41 try:
42 with os.fdopen(srcfd, "wb") as f_src:
43 for p in src:
44 f_src.write(p)
45
46 with os.fdopen(tgtfd, "wb") as f_tgt:
47 for p in tgt:
48 f_tgt.write(p)
49 try:
50 os.unlink(patchfile)
51 except OSError:
52 pass
53 if imgdiff:
54 p = subprocess.call(["imgdiff", "-z", srcfile, tgtfile, patchfile],
55 stdout=open("/dev/null", "a"),
56 stderr=subprocess.STDOUT)
57 else:
58 p = subprocess.call(["bsdiff", srcfile, tgtfile, patchfile])
59
60 if p:
61 raise ValueError("diff failed: " + str(p))
62
63 with open(patchfile, "rb") as f:
64 return f.read()
65 finally:
66 try:
67 os.unlink(srcfile)
68 os.unlink(tgtfile)
69 os.unlink(patchfile)
70 except OSError:
71 pass
72
Dan Albert8b72aef2015-03-23 19:13:21 -070073
74class Image(object):
75 def ReadRangeSet(self, ranges):
76 raise NotImplementedError
77
Tao Bao68658c02015-06-01 13:40:49 -070078 def TotalSha1(self, include_clobbered_blocks=False):
Dan Albert8b72aef2015-03-23 19:13:21 -070079 raise NotImplementedError
80
81
82class EmptyImage(Image):
Doug Zongkerfc44a512014-08-26 13:10:25 -070083 """A zero-length image."""
84 blocksize = 4096
85 care_map = RangeSet()
Tao Baoff777812015-05-12 11:42:31 -070086 clobbered_blocks = RangeSet()
Tao Baoe9b61912015-07-09 17:37:49 -070087 extended = RangeSet()
Doug Zongkerfc44a512014-08-26 13:10:25 -070088 total_blocks = 0
89 file_map = {}
90 def ReadRangeSet(self, ranges):
91 return ()
Tao Bao68658c02015-06-01 13:40:49 -070092 def TotalSha1(self, include_clobbered_blocks=False):
93 # EmptyImage always carries empty clobbered_blocks, so
94 # include_clobbered_blocks can be ignored.
95 assert self.clobbered_blocks.size() == 0
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070096 return sha1().hexdigest()
97
98
Dan Albert8b72aef2015-03-23 19:13:21 -070099class DataImage(Image):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700100 """An image wrapped around a single string of data."""
101
102 def __init__(self, data, trim=False, pad=False):
103 self.data = data
104 self.blocksize = 4096
105
106 assert not (trim and pad)
107
108 partial = len(self.data) % self.blocksize
Tao Bao7589e962015-09-05 20:35:32 -0700109 padded = False
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700110 if partial > 0:
111 if trim:
112 self.data = self.data[:-partial]
113 elif pad:
114 self.data += '\0' * (self.blocksize - partial)
Tao Bao7589e962015-09-05 20:35:32 -0700115 padded = True
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700116 else:
117 raise ValueError(("data for DataImage must be multiple of %d bytes "
118 "unless trim or pad is specified") %
119 (self.blocksize,))
120
121 assert len(self.data) % self.blocksize == 0
122
123 self.total_blocks = len(self.data) / self.blocksize
124 self.care_map = RangeSet(data=(0, self.total_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700125 # When the last block is padded, we always write the whole block even for
126 # incremental OTAs. Because otherwise the last block may get skipped if
127 # unchanged for an incremental, but would fail the post-install
128 # verification if it has non-zero contents in the padding bytes.
129 # Bug: 23828506
130 if padded:
131 self.clobbered_blocks = RangeSet(
132 data=(self.total_blocks-1, self.total_blocks))
133 else:
134 self.clobbered_blocks = RangeSet()
Tao Baoe9b61912015-07-09 17:37:49 -0700135 self.extended = RangeSet()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700136
137 zero_blocks = []
138 nonzero_blocks = []
139 reference = '\0' * self.blocksize
140
Tao Bao7589e962015-09-05 20:35:32 -0700141 for i in range(self.total_blocks-1 if padded else self.total_blocks):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700142 d = self.data[i*self.blocksize : (i+1)*self.blocksize]
143 if d == reference:
144 zero_blocks.append(i)
145 zero_blocks.append(i+1)
146 else:
147 nonzero_blocks.append(i)
148 nonzero_blocks.append(i+1)
149
150 self.file_map = {"__ZERO": RangeSet(zero_blocks),
151 "__NONZERO": RangeSet(nonzero_blocks)}
152
Tao Bao7589e962015-09-05 20:35:32 -0700153 if self.clobbered_blocks:
154 self.file_map["__COPY"] = self.clobbered_blocks
155
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700156 def ReadRangeSet(self, ranges):
157 return [self.data[s*self.blocksize:e*self.blocksize] for (s, e) in ranges]
158
Tao Bao68658c02015-06-01 13:40:49 -0700159 def TotalSha1(self, include_clobbered_blocks=False):
Tao Bao7589e962015-09-05 20:35:32 -0700160 if not include_clobbered_blocks:
161 ranges = self.care_map.subtract(self.clobbered_blocks)
162 return sha1(self.ReadRangeSet(ranges)).hexdigest()
163 else:
164 return sha1(self.data).hexdigest()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700165
Doug Zongkerfc44a512014-08-26 13:10:25 -0700166
167class Transfer(object):
168 def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, style, by_id):
169 self.tgt_name = tgt_name
170 self.src_name = src_name
171 self.tgt_ranges = tgt_ranges
172 self.src_ranges = src_ranges
173 self.style = style
174 self.intact = (getattr(tgt_ranges, "monotonic", False) and
175 getattr(src_ranges, "monotonic", False))
Tao Baob8c87172015-03-19 19:42:12 -0700176
177 # We use OrderedDict rather than dict so that the output is repeatable;
178 # otherwise it would depend on the hash values of the Transfer objects.
179 self.goes_before = OrderedDict()
180 self.goes_after = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700181
Doug Zongker62338182014-09-08 08:29:55 -0700182 self.stash_before = []
183 self.use_stash = []
184
Doug Zongkerfc44a512014-08-26 13:10:25 -0700185 self.id = len(by_id)
186 by_id.append(self)
187
Doug Zongker62338182014-09-08 08:29:55 -0700188 def NetStashChange(self):
189 return (sum(sr.size() for (_, sr) in self.stash_before) -
190 sum(sr.size() for (_, sr) in self.use_stash))
191
Tao Bao82c47982015-08-17 09:45:13 -0700192 def ConvertToNew(self):
193 assert self.style != "new"
194 self.use_stash = []
195 self.style = "new"
196 self.src_ranges = RangeSet()
197
Doug Zongkerfc44a512014-08-26 13:10:25 -0700198 def __str__(self):
199 return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
200 " to " + str(self.tgt_ranges) + ">")
201
202
203# BlockImageDiff works on two image objects. An image object is
204# anything that provides the following attributes:
205#
206# blocksize: the size in bytes of a block, currently must be 4096.
207#
208# total_blocks: the total size of the partition/image, in blocks.
209#
210# care_map: a RangeSet containing which blocks (in the range [0,
211# total_blocks) we actually care about; i.e. which blocks contain
212# data.
213#
214# file_map: a dict that partitions the blocks contained in care_map
215# into smaller domains that are useful for doing diffs on.
216# (Typically a domain is a file, and the key in file_map is the
217# pathname.)
218#
Tao Baoff777812015-05-12 11:42:31 -0700219# clobbered_blocks: a RangeSet containing which blocks contain data
220# but may be altered by the FS. They need to be excluded when
221# verifying the partition integrity.
222#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700223# ReadRangeSet(): a function that takes a RangeSet and returns the
224# data contained in the image blocks of that RangeSet. The data
225# is returned as a list or tuple of strings; concatenating the
226# elements together should produce the requested data.
227# Implementations are free to break up the data into list/tuple
228# elements in any way that is convenient.
229#
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700230# TotalSha1(): a function that returns (as a hex string) the SHA-1
231# hash of all the data in the image (ie, all the blocks in the
Tao Bao68658c02015-06-01 13:40:49 -0700232# care_map minus clobbered_blocks, or including the clobbered
233# blocks if include_clobbered_blocks is True).
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700234#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700235# When creating a BlockImageDiff, the src image may be None, in which
236# case the list of transfers produced will never read from the
237# original image.
238
239class BlockImageDiff(object):
Sami Tolvanendd67a292014-12-09 16:40:34 +0000240 def __init__(self, tgt, src=None, threads=None, version=3):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700241 if threads is None:
242 threads = multiprocessing.cpu_count() // 2
Dan Albert8b72aef2015-03-23 19:13:21 -0700243 if threads == 0:
244 threads = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700245 self.threads = threads
Doug Zongker62338182014-09-08 08:29:55 -0700246 self.version = version
Dan Albert8b72aef2015-03-23 19:13:21 -0700247 self.transfers = []
248 self.src_basenames = {}
249 self.src_numpatterns = {}
Doug Zongker62338182014-09-08 08:29:55 -0700250
Sami Tolvanendd67a292014-12-09 16:40:34 +0000251 assert version in (1, 2, 3)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700252
253 self.tgt = tgt
254 if src is None:
255 src = EmptyImage()
256 self.src = src
257
258 # The updater code that installs the patch always uses 4k blocks.
259 assert tgt.blocksize == 4096
260 assert src.blocksize == 4096
261
262 # The range sets in each filemap should comprise a partition of
263 # the care map.
264 self.AssertPartition(src.care_map, src.file_map.values())
265 self.AssertPartition(tgt.care_map, tgt.file_map.values())
266
267 def Compute(self, prefix):
268 # When looking for a source file to use as the diff input for a
269 # target file, we try:
270 # 1) an exact path match if available, otherwise
271 # 2) a exact basename match if available, otherwise
272 # 3) a basename match after all runs of digits are replaced by
273 # "#" if available, otherwise
274 # 4) we have no source for this target.
275 self.AbbreviateSourceNames()
276 self.FindTransfers()
277
278 # Find the ordering dependencies among transfers (this is O(n^2)
279 # in the number of transfers).
280 self.GenerateDigraph()
281 # Find a sequence of transfers that satisfies as many ordering
282 # dependencies as possible (heuristically).
283 self.FindVertexSequence()
284 # Fix up the ordering dependencies that the sequence didn't
285 # satisfy.
Doug Zongker62338182014-09-08 08:29:55 -0700286 if self.version == 1:
287 self.RemoveBackwardEdges()
288 else:
289 self.ReverseBackwardEdges()
290 self.ImproveVertexSequence()
291
Tao Bao82c47982015-08-17 09:45:13 -0700292 # Ensure the runtime stash size is under the limit.
293 if self.version >= 2 and common.OPTIONS.cache_size is not None:
294 self.ReviseStashSize()
295
Doug Zongkerfc44a512014-08-26 13:10:25 -0700296 # Double-check our work.
297 self.AssertSequenceGood()
298
299 self.ComputePatches(prefix)
300 self.WriteTransfers(prefix)
301
Dan Albert8b72aef2015-03-23 19:13:21 -0700302 def HashBlocks(self, source, ranges): # pylint: disable=no-self-use
Sami Tolvanendd67a292014-12-09 16:40:34 +0000303 data = source.ReadRangeSet(ranges)
304 ctx = sha1()
305
306 for p in data:
307 ctx.update(p)
308
309 return ctx.hexdigest()
310
Doug Zongkerfc44a512014-08-26 13:10:25 -0700311 def WriteTransfers(self, prefix):
312 out = []
313
Doug Zongkerfc44a512014-08-26 13:10:25 -0700314 total = 0
Doug Zongkerfc44a512014-08-26 13:10:25 -0700315
Doug Zongker62338182014-09-08 08:29:55 -0700316 stashes = {}
317 stashed_blocks = 0
318 max_stashed_blocks = 0
319
320 free_stash_ids = []
321 next_stash_id = 0
322
Doug Zongkerfc44a512014-08-26 13:10:25 -0700323 for xf in self.transfers:
324
Doug Zongker62338182014-09-08 08:29:55 -0700325 if self.version < 2:
326 assert not xf.stash_before
327 assert not xf.use_stash
328
329 for s, sr in xf.stash_before:
330 assert s not in stashes
331 if free_stash_ids:
332 sid = heapq.heappop(free_stash_ids)
333 else:
334 sid = next_stash_id
335 next_stash_id += 1
336 stashes[s] = sid
337 stashed_blocks += sr.size()
Sami Tolvanendd67a292014-12-09 16:40:34 +0000338 if self.version == 2:
339 out.append("stash %d %s\n" % (sid, sr.to_string_raw()))
340 else:
341 sh = self.HashBlocks(self.src, sr)
342 if sh in stashes:
343 stashes[sh] += 1
344 else:
345 stashes[sh] = 1
346 out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
Doug Zongker62338182014-09-08 08:29:55 -0700347
348 if stashed_blocks > max_stashed_blocks:
349 max_stashed_blocks = stashed_blocks
350
Jesse Zhao7b985f62015-03-02 16:53:08 -0800351 free_string = []
352
Doug Zongker62338182014-09-08 08:29:55 -0700353 if self.version == 1:
Dan Albert8b72aef2015-03-23 19:13:21 -0700354 src_str = xf.src_ranges.to_string_raw()
Sami Tolvanendd67a292014-12-09 16:40:34 +0000355 elif self.version >= 2:
Doug Zongker62338182014-09-08 08:29:55 -0700356
357 # <# blocks> <src ranges>
358 # OR
359 # <# blocks> <src ranges> <src locs> <stash refs...>
360 # OR
361 # <# blocks> - <stash refs...>
362
363 size = xf.src_ranges.size()
Dan Albert8b72aef2015-03-23 19:13:21 -0700364 src_str = [str(size)]
Doug Zongker62338182014-09-08 08:29:55 -0700365
366 unstashed_src_ranges = xf.src_ranges
367 mapped_stashes = []
368 for s, sr in xf.use_stash:
369 sid = stashes.pop(s)
370 stashed_blocks -= sr.size()
371 unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
Sami Tolvanendd67a292014-12-09 16:40:34 +0000372 sh = self.HashBlocks(self.src, sr)
Doug Zongker62338182014-09-08 08:29:55 -0700373 sr = xf.src_ranges.map_within(sr)
374 mapped_stashes.append(sr)
Sami Tolvanendd67a292014-12-09 16:40:34 +0000375 if self.version == 2:
Dan Albert8b72aef2015-03-23 19:13:21 -0700376 src_str.append("%d:%s" % (sid, sr.to_string_raw()))
Tao Baobb625d22015-08-13 14:44:15 -0700377 # A stash will be used only once. We need to free the stash
378 # immediately after the use, instead of waiting for the automatic
379 # clean-up at the end. Because otherwise it may take up extra space
380 # and lead to OTA failures.
381 # Bug: 23119955
382 free_string.append("free %d\n" % (sid,))
Sami Tolvanendd67a292014-12-09 16:40:34 +0000383 else:
384 assert sh in stashes
Dan Albert8b72aef2015-03-23 19:13:21 -0700385 src_str.append("%s:%s" % (sh, sr.to_string_raw()))
Sami Tolvanendd67a292014-12-09 16:40:34 +0000386 stashes[sh] -= 1
387 if stashes[sh] == 0:
388 free_string.append("free %s\n" % (sh))
389 stashes.pop(sh)
Doug Zongker62338182014-09-08 08:29:55 -0700390 heapq.heappush(free_stash_ids, sid)
391
392 if unstashed_src_ranges:
Dan Albert8b72aef2015-03-23 19:13:21 -0700393 src_str.insert(1, unstashed_src_ranges.to_string_raw())
Doug Zongker62338182014-09-08 08:29:55 -0700394 if xf.use_stash:
395 mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
Dan Albert8b72aef2015-03-23 19:13:21 -0700396 src_str.insert(2, mapped_unstashed.to_string_raw())
Doug Zongker62338182014-09-08 08:29:55 -0700397 mapped_stashes.append(mapped_unstashed)
398 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
399 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700400 src_str.insert(1, "-")
Doug Zongker62338182014-09-08 08:29:55 -0700401 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
402
Dan Albert8b72aef2015-03-23 19:13:21 -0700403 src_str = " ".join(src_str)
Doug Zongker62338182014-09-08 08:29:55 -0700404
Sami Tolvanendd67a292014-12-09 16:40:34 +0000405 # all versions:
Doug Zongker62338182014-09-08 08:29:55 -0700406 # zero <rangeset>
407 # new <rangeset>
408 # erase <rangeset>
409 #
410 # version 1:
411 # bsdiff patchstart patchlen <src rangeset> <tgt rangeset>
412 # imgdiff patchstart patchlen <src rangeset> <tgt rangeset>
413 # move <src rangeset> <tgt rangeset>
414 #
415 # version 2:
Dan Albert8b72aef2015-03-23 19:13:21 -0700416 # bsdiff patchstart patchlen <tgt rangeset> <src_str>
417 # imgdiff patchstart patchlen <tgt rangeset> <src_str>
418 # move <tgt rangeset> <src_str>
Sami Tolvanendd67a292014-12-09 16:40:34 +0000419 #
420 # version 3:
Dan Albert8b72aef2015-03-23 19:13:21 -0700421 # bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
422 # imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
423 # move hash <tgt rangeset> <src_str>
Doug Zongkerfc44a512014-08-26 13:10:25 -0700424
425 tgt_size = xf.tgt_ranges.size()
426
427 if xf.style == "new":
428 assert xf.tgt_ranges
429 out.append("%s %s\n" % (xf.style, xf.tgt_ranges.to_string_raw()))
430 total += tgt_size
431 elif xf.style == "move":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700432 assert xf.tgt_ranges
433 assert xf.src_ranges.size() == tgt_size
434 if xf.src_ranges != xf.tgt_ranges:
Doug Zongker62338182014-09-08 08:29:55 -0700435 if self.version == 1:
436 out.append("%s %s %s\n" % (
437 xf.style,
438 xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))
439 elif self.version == 2:
440 out.append("%s %s %s\n" % (
441 xf.style,
Dan Albert8b72aef2015-03-23 19:13:21 -0700442 xf.tgt_ranges.to_string_raw(), src_str))
Sami Tolvanendd67a292014-12-09 16:40:34 +0000443 elif self.version >= 3:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100444 # take into account automatic stashing of overlapping blocks
445 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700446 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100447 if temp_stash_usage > max_stashed_blocks:
448 max_stashed_blocks = temp_stash_usage
449
Sami Tolvanendd67a292014-12-09 16:40:34 +0000450 out.append("%s %s %s %s\n" % (
451 xf.style,
452 self.HashBlocks(self.tgt, xf.tgt_ranges),
Dan Albert8b72aef2015-03-23 19:13:21 -0700453 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700454 total += tgt_size
455 elif xf.style in ("bsdiff", "imgdiff"):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700456 assert xf.tgt_ranges
457 assert xf.src_ranges
Doug Zongker62338182014-09-08 08:29:55 -0700458 if self.version == 1:
459 out.append("%s %d %d %s %s\n" % (
460 xf.style, xf.patch_start, xf.patch_len,
461 xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))
462 elif self.version == 2:
463 out.append("%s %d %d %s %s\n" % (
464 xf.style, xf.patch_start, xf.patch_len,
Dan Albert8b72aef2015-03-23 19:13:21 -0700465 xf.tgt_ranges.to_string_raw(), src_str))
Sami Tolvanendd67a292014-12-09 16:40:34 +0000466 elif self.version >= 3:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100467 # take into account automatic stashing of overlapping blocks
468 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700469 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100470 if temp_stash_usage > max_stashed_blocks:
471 max_stashed_blocks = temp_stash_usage
472
Sami Tolvanendd67a292014-12-09 16:40:34 +0000473 out.append("%s %d %d %s %s %s %s\n" % (
474 xf.style,
475 xf.patch_start, xf.patch_len,
476 self.HashBlocks(self.src, xf.src_ranges),
477 self.HashBlocks(self.tgt, xf.tgt_ranges),
Dan Albert8b72aef2015-03-23 19:13:21 -0700478 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700479 total += tgt_size
480 elif xf.style == "zero":
481 assert xf.tgt_ranges
482 to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
483 if to_zero:
484 out.append("%s %s\n" % (xf.style, to_zero.to_string_raw()))
485 total += to_zero.size()
486 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700487 raise ValueError("unknown transfer style '%s'\n" % xf.style)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700488
Sami Tolvanendd67a292014-12-09 16:40:34 +0000489 if free_string:
490 out.append("".join(free_string))
491
Tao Bao575d68a2015-08-07 19:49:45 -0700492 if self.version >= 2 and common.OPTIONS.cache_size is not None:
Tao Bao8dcf7382015-05-21 14:09:49 -0700493 # Sanity check: abort if we're going to need more stash space than
494 # the allowed size (cache_size * threshold). There are two purposes
495 # of having a threshold here. a) Part of the cache may have been
496 # occupied by some recovery logs. b) It will buy us some time to deal
497 # with the oversize issue.
498 cache_size = common.OPTIONS.cache_size
499 stash_threshold = common.OPTIONS.stash_threshold
500 max_allowed = cache_size * stash_threshold
501 assert max_stashed_blocks * self.tgt.blocksize < max_allowed, \
502 'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
503 max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
504 self.tgt.blocksize, max_allowed, cache_size,
505 stash_threshold)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700506
Tao Baoe9b61912015-07-09 17:37:49 -0700507 # Zero out extended blocks as a workaround for bug 20881595.
508 if self.tgt.extended:
509 out.append("zero %s\n" % (self.tgt.extended.to_string_raw(),))
510
511 # We erase all the blocks on the partition that a) don't contain useful
512 # data in the new image and b) will not be touched by dm-verity.
Doug Zongkerfc44a512014-08-26 13:10:25 -0700513 all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
Tao Baoe9b61912015-07-09 17:37:49 -0700514 all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
515 new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
516 if new_dontcare:
517 out.append("erase %s\n" % (new_dontcare.to_string_raw(),))
Doug Zongkere985f6f2014-09-09 12:38:47 -0700518
519 out.insert(0, "%d\n" % (self.version,)) # format version number
520 out.insert(1, str(total) + "\n")
521 if self.version >= 2:
522 # version 2 only: after the total block count, we give the number
523 # of stash slots needed, and the maximum size needed (in blocks)
524 out.insert(2, str(next_stash_id) + "\n")
525 out.insert(3, str(max_stashed_blocks) + "\n")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700526
527 with open(prefix + ".transfer.list", "wb") as f:
528 for i in out:
529 f.write(i)
530
Doug Zongker62338182014-09-08 08:29:55 -0700531 if self.version >= 2:
Tao Bao8dcf7382015-05-21 14:09:49 -0700532 max_stashed_size = max_stashed_blocks * self.tgt.blocksize
Tao Bao575d68a2015-08-07 19:49:45 -0700533 OPTIONS = common.OPTIONS
534 if OPTIONS.cache_size is not None:
535 max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
536 print("max stashed blocks: %d (%d bytes), "
537 "limit: %d bytes (%.2f%%)\n" % (
538 max_stashed_blocks, max_stashed_size, max_allowed,
539 max_stashed_size * 100.0 / max_allowed))
540 else:
541 print("max stashed blocks: %d (%d bytes), limit: <unknown>\n" % (
542 max_stashed_blocks, max_stashed_size))
Doug Zongker62338182014-09-08 08:29:55 -0700543
Tao Bao82c47982015-08-17 09:45:13 -0700544 def ReviseStashSize(self):
545 print("Revising stash size...")
546 stashes = {}
547
548 # Create the map between a stash and its def/use points. For example, for a
549 # given stash of (idx, sr), stashes[idx] = (sr, def_cmd, use_cmd).
550 for xf in self.transfers:
551 # Command xf defines (stores) all the stashes in stash_before.
552 for idx, sr in xf.stash_before:
553 stashes[idx] = (sr, xf)
554
555 # Record all the stashes command xf uses.
556 for idx, _ in xf.use_stash:
557 stashes[idx] += (xf,)
558
559 # Compute the maximum blocks available for stash based on /cache size and
560 # the threshold.
561 cache_size = common.OPTIONS.cache_size
562 stash_threshold = common.OPTIONS.stash_threshold
563 max_allowed = cache_size * stash_threshold / self.tgt.blocksize
564
565 stashed_blocks = 0
Tao Bao9a5caf22015-08-25 15:10:10 -0700566 new_blocks = 0
Tao Bao82c47982015-08-17 09:45:13 -0700567
568 # Now go through all the commands. Compute the required stash size on the
569 # fly. If a command requires excess stash than available, it deletes the
570 # stash by replacing the command that uses the stash with a "new" command
571 # instead.
572 for xf in self.transfers:
573 replaced_cmds = []
574
575 # xf.stash_before generates explicit stash commands.
576 for idx, sr in xf.stash_before:
577 if stashed_blocks + sr.size() > max_allowed:
578 # We cannot stash this one for a later command. Find out the command
579 # that will use this stash and replace the command with "new".
580 use_cmd = stashes[idx][2]
581 replaced_cmds.append(use_cmd)
Tao Bao9a5caf22015-08-25 15:10:10 -0700582 print("%10d %9s %s" % (sr.size(), "explicit", use_cmd))
Tao Bao82c47982015-08-17 09:45:13 -0700583 else:
584 stashed_blocks += sr.size()
585
586 # xf.use_stash generates free commands.
587 for _, sr in xf.use_stash:
588 stashed_blocks -= sr.size()
589
590 # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
591 # ComputePatches(), they both have the style of "diff".
592 if xf.style == "diff" and self.version >= 3:
593 assert xf.tgt_ranges and xf.src_ranges
594 if xf.src_ranges.overlaps(xf.tgt_ranges):
595 if stashed_blocks + xf.src_ranges.size() > max_allowed:
596 replaced_cmds.append(xf)
Tao Bao9a5caf22015-08-25 15:10:10 -0700597 print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf))
Tao Bao82c47982015-08-17 09:45:13 -0700598
599 # Replace the commands in replaced_cmds with "new"s.
600 for cmd in replaced_cmds:
601 # It no longer uses any commands in "use_stash". Remove the def points
602 # for all those stashes.
603 for idx, sr in cmd.use_stash:
604 def_cmd = stashes[idx][1]
605 assert (idx, sr) in def_cmd.stash_before
606 def_cmd.stash_before.remove((idx, sr))
Tao Bao9a5caf22015-08-25 15:10:10 -0700607 new_blocks += sr.size()
Tao Bao82c47982015-08-17 09:45:13 -0700608
609 cmd.ConvertToNew()
610
Tao Bao9a5caf22015-08-25 15:10:10 -0700611 print(" Total %d blocks are packed as new blocks due to insufficient "
612 "cache size." % (new_blocks,))
613
Doug Zongkerfc44a512014-08-26 13:10:25 -0700614 def ComputePatches(self, prefix):
615 print("Reticulating splines...")
616 diff_q = []
617 patch_num = 0
618 with open(prefix + ".new.dat", "wb") as new_f:
619 for xf in self.transfers:
620 if xf.style == "zero":
621 pass
622 elif xf.style == "new":
623 for piece in self.tgt.ReadRangeSet(xf.tgt_ranges):
624 new_f.write(piece)
625 elif xf.style == "diff":
626 src = self.src.ReadRangeSet(xf.src_ranges)
627 tgt = self.tgt.ReadRangeSet(xf.tgt_ranges)
628
629 # We can't compare src and tgt directly because they may have
630 # the same content but be broken up into blocks differently, eg:
631 #
632 # ["he", "llo"] vs ["h", "ello"]
633 #
634 # We want those to compare equal, ideally without having to
635 # actually concatenate the strings (these may be tens of
636 # megabytes).
637
638 src_sha1 = sha1()
639 for p in src:
640 src_sha1.update(p)
641 tgt_sha1 = sha1()
642 tgt_size = 0
643 for p in tgt:
644 tgt_sha1.update(p)
645 tgt_size += len(p)
646
647 if src_sha1.digest() == tgt_sha1.digest():
648 # These are identical; we don't need to generate a patch,
649 # just issue copy commands on the device.
650 xf.style = "move"
651 else:
652 # For files in zip format (eg, APKs, JARs, etc.) we would
653 # like to use imgdiff -z if possible (because it usually
654 # produces significantly smaller patches than bsdiff).
655 # This is permissible if:
656 #
657 # - the source and target files are monotonic (ie, the
658 # data is stored with blocks in increasing order), and
659 # - we haven't removed any blocks from the source set.
660 #
661 # If these conditions are satisfied then appending all the
662 # blocks in the set together in order will produce a valid
663 # zip file (plus possibly extra zeros in the last block),
664 # which is what imgdiff needs to operate. (imgdiff is
665 # fine with extra zeros at the end of the file.)
666 imgdiff = (xf.intact and
667 xf.tgt_name.split(".")[-1].lower()
668 in ("apk", "jar", "zip"))
669 xf.style = "imgdiff" if imgdiff else "bsdiff"
670 diff_q.append((tgt_size, src, tgt, xf, patch_num))
671 patch_num += 1
672
673 else:
674 assert False, "unknown style " + xf.style
675
676 if diff_q:
677 if self.threads > 1:
678 print("Computing patches (using %d threads)..." % (self.threads,))
679 else:
680 print("Computing patches...")
681 diff_q.sort()
682
683 patches = [None] * patch_num
684
Dan Albert8b72aef2015-03-23 19:13:21 -0700685 # TODO: Rewrite with multiprocessing.ThreadPool?
Doug Zongkerfc44a512014-08-26 13:10:25 -0700686 lock = threading.Lock()
687 def diff_worker():
688 while True:
689 with lock:
Dan Albert8b72aef2015-03-23 19:13:21 -0700690 if not diff_q:
691 return
Doug Zongkerfc44a512014-08-26 13:10:25 -0700692 tgt_size, src, tgt, xf, patchnum = diff_q.pop()
693 patch = compute_patch(src, tgt, imgdiff=(xf.style == "imgdiff"))
694 size = len(patch)
695 with lock:
696 patches[patchnum] = (patch, xf)
697 print("%10d %10d (%6.2f%%) %7s %s" % (
698 size, tgt_size, size * 100.0 / tgt_size, xf.style,
699 xf.tgt_name if xf.tgt_name == xf.src_name else (
700 xf.tgt_name + " (from " + xf.src_name + ")")))
701
702 threads = [threading.Thread(target=diff_worker)
Dan Albert8b72aef2015-03-23 19:13:21 -0700703 for _ in range(self.threads)]
Doug Zongkerfc44a512014-08-26 13:10:25 -0700704 for th in threads:
705 th.start()
706 while threads:
707 threads.pop().join()
708 else:
709 patches = []
710
711 p = 0
712 with open(prefix + ".patch.dat", "wb") as patch_f:
713 for patch, xf in patches:
714 xf.patch_start = p
715 xf.patch_len = len(patch)
716 patch_f.write(patch)
717 p += len(patch)
718
719 def AssertSequenceGood(self):
720 # Simulate the sequences of transfers we will output, and check that:
721 # - we never read a block after writing it, and
722 # - we write every block we care about exactly once.
723
724 # Start with no blocks having been touched yet.
725 touched = RangeSet()
726
727 # Imagine processing the transfers in order.
728 for xf in self.transfers:
729 # Check that the input blocks for this transfer haven't yet been touched.
Doug Zongker62338182014-09-08 08:29:55 -0700730
731 x = xf.src_ranges
732 if self.version >= 2:
733 for _, sr in xf.use_stash:
734 x = x.subtract(sr)
735
736 assert not touched.overlaps(x)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700737 # Check that the output blocks for this transfer haven't yet been touched.
738 assert not touched.overlaps(xf.tgt_ranges)
739 # Touch all the blocks written by this transfer.
740 touched = touched.union(xf.tgt_ranges)
741
742 # Check that we've written every target block.
743 assert touched == self.tgt.care_map
744
Doug Zongker62338182014-09-08 08:29:55 -0700745 def ImproveVertexSequence(self):
746 print("Improving vertex order...")
747
748 # At this point our digraph is acyclic; we reversed any edges that
749 # were backwards in the heuristically-generated sequence. The
750 # previously-generated order is still acceptable, but we hope to
751 # find a better order that needs less memory for stashed data.
752 # Now we do a topological sort to generate a new vertex order,
753 # using a greedy algorithm to choose which vertex goes next
754 # whenever we have a choice.
755
756 # Make a copy of the edge set; this copy will get destroyed by the
757 # algorithm.
758 for xf in self.transfers:
759 xf.incoming = xf.goes_after.copy()
760 xf.outgoing = xf.goes_before.copy()
761
762 L = [] # the new vertex order
763
764 # S is the set of sources in the remaining graph; we always choose
765 # the one that leaves the least amount of stashed data after it's
766 # executed.
767 S = [(u.NetStashChange(), u.order, u) for u in self.transfers
768 if not u.incoming]
769 heapq.heapify(S)
770
771 while S:
772 _, _, xf = heapq.heappop(S)
773 L.append(xf)
774 for u in xf.outgoing:
775 del u.incoming[xf]
776 if not u.incoming:
777 heapq.heappush(S, (u.NetStashChange(), u.order, u))
778
779 # if this fails then our graph had a cycle.
780 assert len(L) == len(self.transfers)
781
782 self.transfers = L
783 for i, xf in enumerate(L):
784 xf.order = i
785
Doug Zongkerfc44a512014-08-26 13:10:25 -0700786 def RemoveBackwardEdges(self):
787 print("Removing backward edges...")
788 in_order = 0
789 out_of_order = 0
790 lost_source = 0
791
792 for xf in self.transfers:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700793 lost = 0
794 size = xf.src_ranges.size()
795 for u in xf.goes_before:
796 # xf should go before u
797 if xf.order < u.order:
798 # it does, hurray!
Doug Zongker62338182014-09-08 08:29:55 -0700799 in_order += 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700800 else:
801 # it doesn't, boo. trim the blocks that u writes from xf's
802 # source, so that xf can go after u.
Doug Zongker62338182014-09-08 08:29:55 -0700803 out_of_order += 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700804 assert xf.src_ranges.overlaps(u.tgt_ranges)
805 xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges)
806 xf.intact = False
807
808 if xf.style == "diff" and not xf.src_ranges:
809 # nothing left to diff from; treat as new data
810 xf.style = "new"
811
812 lost = size - xf.src_ranges.size()
813 lost_source += lost
Doug Zongkerfc44a512014-08-26 13:10:25 -0700814
815 print((" %d/%d dependencies (%.2f%%) were violated; "
816 "%d source blocks removed.") %
817 (out_of_order, in_order + out_of_order,
818 (out_of_order * 100.0 / (in_order + out_of_order))
819 if (in_order + out_of_order) else 0.0,
820 lost_source))
821
Doug Zongker62338182014-09-08 08:29:55 -0700822 def ReverseBackwardEdges(self):
823 print("Reversing backward edges...")
824 in_order = 0
825 out_of_order = 0
826 stashes = 0
827 stash_size = 0
828
829 for xf in self.transfers:
Doug Zongker62338182014-09-08 08:29:55 -0700830 for u in xf.goes_before.copy():
831 # xf should go before u
832 if xf.order < u.order:
833 # it does, hurray!
834 in_order += 1
835 else:
836 # it doesn't, boo. modify u to stash the blocks that it
837 # writes that xf wants to read, and then require u to go
838 # before xf.
839 out_of_order += 1
840
841 overlap = xf.src_ranges.intersect(u.tgt_ranges)
842 assert overlap
843
844 u.stash_before.append((stashes, overlap))
845 xf.use_stash.append((stashes, overlap))
846 stashes += 1
847 stash_size += overlap.size()
848
849 # reverse the edge direction; now xf must go after u
850 del xf.goes_before[u]
851 del u.goes_after[xf]
852 xf.goes_after[u] = None # value doesn't matter
853 u.goes_before[xf] = None
854
855 print((" %d/%d dependencies (%.2f%%) were violated; "
856 "%d source blocks stashed.") %
857 (out_of_order, in_order + out_of_order,
858 (out_of_order * 100.0 / (in_order + out_of_order))
859 if (in_order + out_of_order) else 0.0,
860 stash_size))
861
Doug Zongkerfc44a512014-08-26 13:10:25 -0700862 def FindVertexSequence(self):
863 print("Finding vertex sequence...")
864
865 # This is based on "A Fast & Effective Heuristic for the Feedback
866 # Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
867 # it as starting with the digraph G and moving all the vertices to
868 # be on a horizontal line in some order, trying to minimize the
869 # number of edges that end up pointing to the left. Left-pointing
870 # edges will get removed to turn the digraph into a DAG. In this
871 # case each edge has a weight which is the number of source blocks
872 # we'll lose if that edge is removed; we try to minimize the total
873 # weight rather than just the number of edges.
874
875 # Make a copy of the edge set; this copy will get destroyed by the
876 # algorithm.
877 for xf in self.transfers:
878 xf.incoming = xf.goes_after.copy()
879 xf.outgoing = xf.goes_before.copy()
880
881 # We use an OrderedDict instead of just a set so that the output
882 # is repeatable; otherwise it would depend on the hash values of
883 # the transfer objects.
884 G = OrderedDict()
885 for xf in self.transfers:
886 G[xf] = None
887 s1 = deque() # the left side of the sequence, built from left to right
888 s2 = deque() # the right side of the sequence, built from right to left
889
890 while G:
891
892 # Put all sinks at the end of the sequence.
893 while True:
894 sinks = [u for u in G if not u.outgoing]
Dan Albert8b72aef2015-03-23 19:13:21 -0700895 if not sinks:
896 break
Doug Zongkerfc44a512014-08-26 13:10:25 -0700897 for u in sinks:
898 s2.appendleft(u)
899 del G[u]
900 for iu in u.incoming:
901 del iu.outgoing[u]
902
903 # Put all the sources at the beginning of the sequence.
904 while True:
905 sources = [u for u in G if not u.incoming]
Dan Albert8b72aef2015-03-23 19:13:21 -0700906 if not sources:
907 break
Doug Zongkerfc44a512014-08-26 13:10:25 -0700908 for u in sources:
909 s1.append(u)
910 del G[u]
911 for iu in u.outgoing:
912 del iu.incoming[u]
913
Dan Albert8b72aef2015-03-23 19:13:21 -0700914 if not G:
915 break
Doug Zongkerfc44a512014-08-26 13:10:25 -0700916
917 # Find the "best" vertex to put next. "Best" is the one that
918 # maximizes the net difference in source blocks saved we get by
919 # pretending it's a source rather than a sink.
920
921 max_d = None
922 best_u = None
923 for u in G:
924 d = sum(u.outgoing.values()) - sum(u.incoming.values())
925 if best_u is None or d > max_d:
926 max_d = d
927 best_u = u
928
929 u = best_u
930 s1.append(u)
931 del G[u]
932 for iu in u.outgoing:
933 del iu.incoming[u]
934 for iu in u.incoming:
935 del iu.outgoing[u]
936
937 # Now record the sequence in the 'order' field of each transfer,
938 # and by rearranging self.transfers to be in the chosen sequence.
939
940 new_transfers = []
941 for x in itertools.chain(s1, s2):
942 x.order = len(new_transfers)
943 new_transfers.append(x)
944 del x.incoming
945 del x.outgoing
946
947 self.transfers = new_transfers
948
949 def GenerateDigraph(self):
950 print("Generating digraph...")
951 for a in self.transfers:
952 for b in self.transfers:
Dan Albert8b72aef2015-03-23 19:13:21 -0700953 if a is b:
954 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -0700955
956 # If the blocks written by A are read by B, then B needs to go before A.
957 i = a.tgt_ranges.intersect(b.src_ranges)
958 if i:
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700959 if b.src_name == "__ZERO":
960 # the cost of removing source blocks for the __ZERO domain
961 # is (nearly) zero.
962 size = 0
963 else:
964 size = i.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700965 b.goes_before[a] = size
966 a.goes_after[b] = size
967
968 def FindTransfers(self):
Tao Bao9a5caf22015-08-25 15:10:10 -0700969 """Parse the file_map to generate all the transfers."""
970
971 def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
972 split=False):
973 """Wrapper function for adding a Transfer().
974
975 For BBOTA v3, we need to stash source blocks for resumable feature.
976 However, with the growth of file size and the shrink of the cache
977 partition source blocks are too large to be stashed. If a file occupies
978 too many blocks (greater than MAX_BLOCKS_PER_DIFF_TRANSFER), we split it
979 into smaller pieces by getting multiple Transfer()s.
980
981 The downside is that after splitting, we can no longer use imgdiff but
982 only bsdiff."""
983
984 MAX_BLOCKS_PER_DIFF_TRANSFER = 1024
985
986 # We care about diff transfers only.
987 if style != "diff" or not split:
988 Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
989 return
990
991 # Change nothing for small files.
992 if (tgt_ranges.size() <= MAX_BLOCKS_PER_DIFF_TRANSFER and
993 src_ranges.size() <= MAX_BLOCKS_PER_DIFF_TRANSFER):
994 Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
995 return
996
997 pieces = 0
998 while (tgt_ranges.size() > MAX_BLOCKS_PER_DIFF_TRANSFER and
999 src_ranges.size() > MAX_BLOCKS_PER_DIFF_TRANSFER):
1000 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1001 src_split_name = "%s-%d" % (src_name, pieces)
1002 tgt_first = tgt_ranges.first(MAX_BLOCKS_PER_DIFF_TRANSFER)
1003 src_first = src_ranges.first(MAX_BLOCKS_PER_DIFF_TRANSFER)
1004 Transfer(tgt_split_name, src_split_name, tgt_first, src_first, style,
1005 by_id)
1006
1007 tgt_ranges = tgt_ranges.subtract(tgt_first)
1008 src_ranges = src_ranges.subtract(src_first)
1009 pieces += 1
1010
1011 # Handle remaining blocks.
1012 if tgt_ranges.size() or src_ranges.size():
1013 # Must be both non-empty.
1014 assert tgt_ranges.size() and src_ranges.size()
1015 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1016 src_split_name = "%s-%d" % (src_name, pieces)
1017 Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges, style,
1018 by_id)
1019
Doug Zongkerfc44a512014-08-26 13:10:25 -07001020 empty = RangeSet()
1021 for tgt_fn, tgt_ranges in self.tgt.file_map.items():
1022 if tgt_fn == "__ZERO":
1023 # the special "__ZERO" domain is all the blocks not contained
1024 # in any file and that are filled with zeros. We have a
1025 # special transfer style for zero blocks.
1026 src_ranges = self.src.file_map.get("__ZERO", empty)
Tao Bao9a5caf22015-08-25 15:10:10 -07001027 AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
1028 "zero", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001029 continue
1030
Tao Baoff777812015-05-12 11:42:31 -07001031 elif tgt_fn == "__COPY":
1032 # "__COPY" domain includes all the blocks not contained in any
1033 # file and that need to be copied unconditionally to the target.
Tao Bao9a5caf22015-08-25 15:10:10 -07001034 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Tao Baoff777812015-05-12 11:42:31 -07001035 continue
1036
Doug Zongkerfc44a512014-08-26 13:10:25 -07001037 elif tgt_fn in self.src.file_map:
1038 # Look for an exact pathname match in the source.
Tao Bao9a5caf22015-08-25 15:10:10 -07001039 AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
1040 "diff", self.transfers, self.version >= 3)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001041 continue
1042
1043 b = os.path.basename(tgt_fn)
1044 if b in self.src_basenames:
1045 # Look for an exact basename match in the source.
1046 src_fn = self.src_basenames[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001047 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
1048 "diff", self.transfers, self.version >= 3)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001049 continue
1050
1051 b = re.sub("[0-9]+", "#", b)
1052 if b in self.src_numpatterns:
1053 # Look for a 'number pattern' match (a basename match after
1054 # all runs of digits are replaced by "#"). (This is useful
1055 # for .so files that contain version numbers in the filename
1056 # that get bumped.)
1057 src_fn = self.src_numpatterns[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001058 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
1059 "diff", self.transfers, self.version >= 3)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001060 continue
1061
Tao Bao9a5caf22015-08-25 15:10:10 -07001062 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001063
1064 def AbbreviateSourceNames(self):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001065 for k in self.src.file_map.keys():
1066 b = os.path.basename(k)
1067 self.src_basenames[b] = k
1068 b = re.sub("[0-9]+", "#", b)
1069 self.src_numpatterns[b] = k
1070
1071 @staticmethod
1072 def AssertPartition(total, seq):
1073 """Assert that all the RangeSets in 'seq' form a partition of the
1074 'total' RangeSet (ie, they are nonintersecting and their union
1075 equals 'total')."""
1076 so_far = RangeSet()
1077 for i in seq:
1078 assert not so_far.overlaps(i)
1079 so_far = so_far.union(i)
1080 assert so_far == total