blob: bf6a57d365eb9dfec7ebd0f96edc2fe81ba8abf2 [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015from __future__ import print_function
16
17from collections import deque, OrderedDict
18from hashlib import sha1
Tao Bao8dcf7382015-05-21 14:09:49 -070019import common
Doug Zongker62338182014-09-08 08:29:55 -070020import heapq
Doug Zongkerfc44a512014-08-26 13:10:25 -070021import itertools
22import multiprocessing
23import os
Doug Zongkerfc44a512014-08-26 13:10:25 -070024import re
25import subprocess
Doug Zongkerfc44a512014-08-26 13:10:25 -070026import threading
27import tempfile
28
Dan Albert8b72aef2015-03-23 19:13:21 -070029from rangelib import RangeSet
30
Doug Zongkerfc44a512014-08-26 13:10:25 -070031
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070032__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
33
Dan Albert8b72aef2015-03-23 19:13:21 -070034
Doug Zongkerfc44a512014-08-26 13:10:25 -070035def compute_patch(src, tgt, imgdiff=False):
36 srcfd, srcfile = tempfile.mkstemp(prefix="src-")
37 tgtfd, tgtfile = tempfile.mkstemp(prefix="tgt-")
38 patchfd, patchfile = tempfile.mkstemp(prefix="patch-")
39 os.close(patchfd)
40
41 try:
42 with os.fdopen(srcfd, "wb") as f_src:
43 for p in src:
44 f_src.write(p)
45
46 with os.fdopen(tgtfd, "wb") as f_tgt:
47 for p in tgt:
48 f_tgt.write(p)
49 try:
50 os.unlink(patchfile)
51 except OSError:
52 pass
53 if imgdiff:
54 p = subprocess.call(["imgdiff", "-z", srcfile, tgtfile, patchfile],
55 stdout=open("/dev/null", "a"),
56 stderr=subprocess.STDOUT)
57 else:
58 p = subprocess.call(["bsdiff", srcfile, tgtfile, patchfile])
59
60 if p:
61 raise ValueError("diff failed: " + str(p))
62
63 with open(patchfile, "rb") as f:
64 return f.read()
65 finally:
66 try:
67 os.unlink(srcfile)
68 os.unlink(tgtfile)
69 os.unlink(patchfile)
70 except OSError:
71 pass
72
Dan Albert8b72aef2015-03-23 19:13:21 -070073
74class Image(object):
75 def ReadRangeSet(self, ranges):
76 raise NotImplementedError
77
Tao Bao68658c02015-06-01 13:40:49 -070078 def TotalSha1(self, include_clobbered_blocks=False):
Dan Albert8b72aef2015-03-23 19:13:21 -070079 raise NotImplementedError
80
81
82class EmptyImage(Image):
Doug Zongkerfc44a512014-08-26 13:10:25 -070083 """A zero-length image."""
84 blocksize = 4096
85 care_map = RangeSet()
Tao Baoff777812015-05-12 11:42:31 -070086 clobbered_blocks = RangeSet()
Tao Baoe9b61912015-07-09 17:37:49 -070087 extended = RangeSet()
Doug Zongkerfc44a512014-08-26 13:10:25 -070088 total_blocks = 0
89 file_map = {}
90 def ReadRangeSet(self, ranges):
91 return ()
Tao Bao68658c02015-06-01 13:40:49 -070092 def TotalSha1(self, include_clobbered_blocks=False):
93 # EmptyImage always carries empty clobbered_blocks, so
94 # include_clobbered_blocks can be ignored.
95 assert self.clobbered_blocks.size() == 0
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070096 return sha1().hexdigest()
97
98
Dan Albert8b72aef2015-03-23 19:13:21 -070099class DataImage(Image):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700100 """An image wrapped around a single string of data."""
101
102 def __init__(self, data, trim=False, pad=False):
103 self.data = data
104 self.blocksize = 4096
105
106 assert not (trim and pad)
107
108 partial = len(self.data) % self.blocksize
Tao Bao7589e962015-09-05 20:35:32 -0700109 padded = False
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700110 if partial > 0:
111 if trim:
112 self.data = self.data[:-partial]
113 elif pad:
114 self.data += '\0' * (self.blocksize - partial)
Tao Bao7589e962015-09-05 20:35:32 -0700115 padded = True
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700116 else:
117 raise ValueError(("data for DataImage must be multiple of %d bytes "
118 "unless trim or pad is specified") %
119 (self.blocksize,))
120
121 assert len(self.data) % self.blocksize == 0
122
123 self.total_blocks = len(self.data) / self.blocksize
124 self.care_map = RangeSet(data=(0, self.total_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700125 # When the last block is padded, we always write the whole block even for
126 # incremental OTAs. Because otherwise the last block may get skipped if
127 # unchanged for an incremental, but would fail the post-install
128 # verification if it has non-zero contents in the padding bytes.
129 # Bug: 23828506
130 if padded:
Tao Bao42206c32015-09-08 13:39:40 -0700131 clobbered_blocks = [self.total_blocks-1, self.total_blocks]
Tao Bao7589e962015-09-05 20:35:32 -0700132 else:
Tao Bao42206c32015-09-08 13:39:40 -0700133 clobbered_blocks = []
134 self.clobbered_blocks = clobbered_blocks
Tao Baoe9b61912015-07-09 17:37:49 -0700135 self.extended = RangeSet()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700136
137 zero_blocks = []
138 nonzero_blocks = []
139 reference = '\0' * self.blocksize
140
Tao Bao7589e962015-09-05 20:35:32 -0700141 for i in range(self.total_blocks-1 if padded else self.total_blocks):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700142 d = self.data[i*self.blocksize : (i+1)*self.blocksize]
143 if d == reference:
144 zero_blocks.append(i)
145 zero_blocks.append(i+1)
146 else:
147 nonzero_blocks.append(i)
148 nonzero_blocks.append(i+1)
149
Tao Bao42206c32015-09-08 13:39:40 -0700150 assert zero_blocks or nonzero_blocks or clobbered_blocks
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700151
Tao Bao42206c32015-09-08 13:39:40 -0700152 self.file_map = dict()
153 if zero_blocks:
154 self.file_map["__ZERO"] = RangeSet(data=zero_blocks)
155 if nonzero_blocks:
156 self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)
157 if clobbered_blocks:
158 self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)
Tao Bao7589e962015-09-05 20:35:32 -0700159
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700160 def ReadRangeSet(self, ranges):
161 return [self.data[s*self.blocksize:e*self.blocksize] for (s, e) in ranges]
162
Tao Bao68658c02015-06-01 13:40:49 -0700163 def TotalSha1(self, include_clobbered_blocks=False):
Tao Bao7589e962015-09-05 20:35:32 -0700164 if not include_clobbered_blocks:
165 ranges = self.care_map.subtract(self.clobbered_blocks)
166 return sha1(self.ReadRangeSet(ranges)).hexdigest()
167 else:
168 return sha1(self.data).hexdigest()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700169
Doug Zongkerfc44a512014-08-26 13:10:25 -0700170
171class Transfer(object):
172 def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, style, by_id):
173 self.tgt_name = tgt_name
174 self.src_name = src_name
175 self.tgt_ranges = tgt_ranges
176 self.src_ranges = src_ranges
177 self.style = style
178 self.intact = (getattr(tgt_ranges, "monotonic", False) and
179 getattr(src_ranges, "monotonic", False))
Tao Baob8c87172015-03-19 19:42:12 -0700180
181 # We use OrderedDict rather than dict so that the output is repeatable;
182 # otherwise it would depend on the hash values of the Transfer objects.
183 self.goes_before = OrderedDict()
184 self.goes_after = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700185
Doug Zongker62338182014-09-08 08:29:55 -0700186 self.stash_before = []
187 self.use_stash = []
188
Doug Zongkerfc44a512014-08-26 13:10:25 -0700189 self.id = len(by_id)
190 by_id.append(self)
191
Doug Zongker62338182014-09-08 08:29:55 -0700192 def NetStashChange(self):
193 return (sum(sr.size() for (_, sr) in self.stash_before) -
194 sum(sr.size() for (_, sr) in self.use_stash))
195
Tao Bao82c47982015-08-17 09:45:13 -0700196 def ConvertToNew(self):
197 assert self.style != "new"
198 self.use_stash = []
199 self.style = "new"
200 self.src_ranges = RangeSet()
201
Doug Zongkerfc44a512014-08-26 13:10:25 -0700202 def __str__(self):
203 return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
204 " to " + str(self.tgt_ranges) + ">")
205
206
207# BlockImageDiff works on two image objects. An image object is
208# anything that provides the following attributes:
209#
210# blocksize: the size in bytes of a block, currently must be 4096.
211#
212# total_blocks: the total size of the partition/image, in blocks.
213#
214# care_map: a RangeSet containing which blocks (in the range [0,
215# total_blocks) we actually care about; i.e. which blocks contain
216# data.
217#
218# file_map: a dict that partitions the blocks contained in care_map
219# into smaller domains that are useful for doing diffs on.
220# (Typically a domain is a file, and the key in file_map is the
221# pathname.)
222#
Tao Baoff777812015-05-12 11:42:31 -0700223# clobbered_blocks: a RangeSet containing which blocks contain data
224# but may be altered by the FS. They need to be excluded when
225# verifying the partition integrity.
226#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700227# ReadRangeSet(): a function that takes a RangeSet and returns the
228# data contained in the image blocks of that RangeSet. The data
229# is returned as a list or tuple of strings; concatenating the
230# elements together should produce the requested data.
231# Implementations are free to break up the data into list/tuple
232# elements in any way that is convenient.
233#
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700234# TotalSha1(): a function that returns (as a hex string) the SHA-1
235# hash of all the data in the image (ie, all the blocks in the
Tao Bao68658c02015-06-01 13:40:49 -0700236# care_map minus clobbered_blocks, or including the clobbered
237# blocks if include_clobbered_blocks is True).
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700238#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700239# When creating a BlockImageDiff, the src image may be None, in which
240# case the list of transfers produced will never read from the
241# original image.
242
243class BlockImageDiff(object):
Tao Baoeba409c2015-10-21 13:30:43 -0700244 def __init__(self, tgt, src=None, threads=None, version=4):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700245 if threads is None:
246 threads = multiprocessing.cpu_count() // 2
Dan Albert8b72aef2015-03-23 19:13:21 -0700247 if threads == 0:
248 threads = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700249 self.threads = threads
Doug Zongker62338182014-09-08 08:29:55 -0700250 self.version = version
Dan Albert8b72aef2015-03-23 19:13:21 -0700251 self.transfers = []
252 self.src_basenames = {}
253 self.src_numpatterns = {}
Tao Baod8d14be2016-02-04 14:26:02 -0800254 self._max_stashed_size = 0
Doug Zongker62338182014-09-08 08:29:55 -0700255
Tao Baoeba409c2015-10-21 13:30:43 -0700256 assert version in (1, 2, 3, 4)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700257
258 self.tgt = tgt
259 if src is None:
260 src = EmptyImage()
261 self.src = src
262
263 # The updater code that installs the patch always uses 4k blocks.
264 assert tgt.blocksize == 4096
265 assert src.blocksize == 4096
266
267 # The range sets in each filemap should comprise a partition of
268 # the care map.
269 self.AssertPartition(src.care_map, src.file_map.values())
270 self.AssertPartition(tgt.care_map, tgt.file_map.values())
271
Tao Baod8d14be2016-02-04 14:26:02 -0800272 @property
273 def max_stashed_size(self):
274 return self._max_stashed_size
275
Doug Zongkerfc44a512014-08-26 13:10:25 -0700276 def Compute(self, prefix):
277 # When looking for a source file to use as the diff input for a
278 # target file, we try:
279 # 1) an exact path match if available, otherwise
280 # 2) a exact basename match if available, otherwise
281 # 3) a basename match after all runs of digits are replaced by
282 # "#" if available, otherwise
283 # 4) we have no source for this target.
284 self.AbbreviateSourceNames()
285 self.FindTransfers()
286
287 # Find the ordering dependencies among transfers (this is O(n^2)
288 # in the number of transfers).
289 self.GenerateDigraph()
290 # Find a sequence of transfers that satisfies as many ordering
291 # dependencies as possible (heuristically).
292 self.FindVertexSequence()
293 # Fix up the ordering dependencies that the sequence didn't
294 # satisfy.
Doug Zongker62338182014-09-08 08:29:55 -0700295 if self.version == 1:
296 self.RemoveBackwardEdges()
297 else:
298 self.ReverseBackwardEdges()
299 self.ImproveVertexSequence()
300
Tao Bao82c47982015-08-17 09:45:13 -0700301 # Ensure the runtime stash size is under the limit.
302 if self.version >= 2 and common.OPTIONS.cache_size is not None:
303 self.ReviseStashSize()
304
Doug Zongkerfc44a512014-08-26 13:10:25 -0700305 # Double-check our work.
306 self.AssertSequenceGood()
307
308 self.ComputePatches(prefix)
309 self.WriteTransfers(prefix)
310
Dan Albert8b72aef2015-03-23 19:13:21 -0700311 def HashBlocks(self, source, ranges): # pylint: disable=no-self-use
Sami Tolvanendd67a292014-12-09 16:40:34 +0000312 data = source.ReadRangeSet(ranges)
313 ctx = sha1()
314
315 for p in data:
316 ctx.update(p)
317
318 return ctx.hexdigest()
319
Doug Zongkerfc44a512014-08-26 13:10:25 -0700320 def WriteTransfers(self, prefix):
321 out = []
322
Doug Zongkerfc44a512014-08-26 13:10:25 -0700323 total = 0
Doug Zongkerfc44a512014-08-26 13:10:25 -0700324
Doug Zongker62338182014-09-08 08:29:55 -0700325 stashes = {}
326 stashed_blocks = 0
327 max_stashed_blocks = 0
328
329 free_stash_ids = []
330 next_stash_id = 0
331
Doug Zongkerfc44a512014-08-26 13:10:25 -0700332 for xf in self.transfers:
333
Doug Zongker62338182014-09-08 08:29:55 -0700334 if self.version < 2:
335 assert not xf.stash_before
336 assert not xf.use_stash
337
338 for s, sr in xf.stash_before:
339 assert s not in stashes
340 if free_stash_ids:
341 sid = heapq.heappop(free_stash_ids)
342 else:
343 sid = next_stash_id
344 next_stash_id += 1
345 stashes[s] = sid
Sami Tolvanendd67a292014-12-09 16:40:34 +0000346 if self.version == 2:
caozhiyuan21b37d82015-10-21 15:14:03 +0800347 stashed_blocks += sr.size()
Sami Tolvanendd67a292014-12-09 16:40:34 +0000348 out.append("stash %d %s\n" % (sid, sr.to_string_raw()))
349 else:
350 sh = self.HashBlocks(self.src, sr)
351 if sh in stashes:
352 stashes[sh] += 1
353 else:
354 stashes[sh] = 1
caozhiyuan21b37d82015-10-21 15:14:03 +0800355 stashed_blocks += sr.size()
Sami Tolvanendd67a292014-12-09 16:40:34 +0000356 out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
Doug Zongker62338182014-09-08 08:29:55 -0700357
358 if stashed_blocks > max_stashed_blocks:
359 max_stashed_blocks = stashed_blocks
360
Jesse Zhao7b985f62015-03-02 16:53:08 -0800361 free_string = []
caozhiyuan21b37d82015-10-21 15:14:03 +0800362 free_size = 0
Jesse Zhao7b985f62015-03-02 16:53:08 -0800363
Doug Zongker62338182014-09-08 08:29:55 -0700364 if self.version == 1:
Tao Bao4fcb77e2015-10-21 13:36:01 -0700365 src_str = xf.src_ranges.to_string_raw() if xf.src_ranges else ""
Sami Tolvanendd67a292014-12-09 16:40:34 +0000366 elif self.version >= 2:
Doug Zongker62338182014-09-08 08:29:55 -0700367
368 # <# blocks> <src ranges>
369 # OR
370 # <# blocks> <src ranges> <src locs> <stash refs...>
371 # OR
372 # <# blocks> - <stash refs...>
373
374 size = xf.src_ranges.size()
Dan Albert8b72aef2015-03-23 19:13:21 -0700375 src_str = [str(size)]
Doug Zongker62338182014-09-08 08:29:55 -0700376
377 unstashed_src_ranges = xf.src_ranges
378 mapped_stashes = []
379 for s, sr in xf.use_stash:
380 sid = stashes.pop(s)
Doug Zongker62338182014-09-08 08:29:55 -0700381 unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
Sami Tolvanendd67a292014-12-09 16:40:34 +0000382 sh = self.HashBlocks(self.src, sr)
Doug Zongker62338182014-09-08 08:29:55 -0700383 sr = xf.src_ranges.map_within(sr)
384 mapped_stashes.append(sr)
Sami Tolvanendd67a292014-12-09 16:40:34 +0000385 if self.version == 2:
Dan Albert8b72aef2015-03-23 19:13:21 -0700386 src_str.append("%d:%s" % (sid, sr.to_string_raw()))
Tao Baobb625d22015-08-13 14:44:15 -0700387 # A stash will be used only once. We need to free the stash
388 # immediately after the use, instead of waiting for the automatic
389 # clean-up at the end. Because otherwise it may take up extra space
390 # and lead to OTA failures.
391 # Bug: 23119955
392 free_string.append("free %d\n" % (sid,))
caozhiyuan21b37d82015-10-21 15:14:03 +0800393 free_size += sr.size()
Sami Tolvanendd67a292014-12-09 16:40:34 +0000394 else:
395 assert sh in stashes
Dan Albert8b72aef2015-03-23 19:13:21 -0700396 src_str.append("%s:%s" % (sh, sr.to_string_raw()))
Sami Tolvanendd67a292014-12-09 16:40:34 +0000397 stashes[sh] -= 1
398 if stashes[sh] == 0:
caozhiyuan21b37d82015-10-21 15:14:03 +0800399 free_size += sr.size()
Sami Tolvanendd67a292014-12-09 16:40:34 +0000400 free_string.append("free %s\n" % (sh))
401 stashes.pop(sh)
Doug Zongker62338182014-09-08 08:29:55 -0700402 heapq.heappush(free_stash_ids, sid)
403
404 if unstashed_src_ranges:
Dan Albert8b72aef2015-03-23 19:13:21 -0700405 src_str.insert(1, unstashed_src_ranges.to_string_raw())
Doug Zongker62338182014-09-08 08:29:55 -0700406 if xf.use_stash:
407 mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
Dan Albert8b72aef2015-03-23 19:13:21 -0700408 src_str.insert(2, mapped_unstashed.to_string_raw())
Doug Zongker62338182014-09-08 08:29:55 -0700409 mapped_stashes.append(mapped_unstashed)
410 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
411 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700412 src_str.insert(1, "-")
Doug Zongker62338182014-09-08 08:29:55 -0700413 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
414
Dan Albert8b72aef2015-03-23 19:13:21 -0700415 src_str = " ".join(src_str)
Doug Zongker62338182014-09-08 08:29:55 -0700416
Sami Tolvanendd67a292014-12-09 16:40:34 +0000417 # all versions:
Doug Zongker62338182014-09-08 08:29:55 -0700418 # zero <rangeset>
419 # new <rangeset>
420 # erase <rangeset>
421 #
422 # version 1:
423 # bsdiff patchstart patchlen <src rangeset> <tgt rangeset>
424 # imgdiff patchstart patchlen <src rangeset> <tgt rangeset>
425 # move <src rangeset> <tgt rangeset>
426 #
427 # version 2:
Dan Albert8b72aef2015-03-23 19:13:21 -0700428 # bsdiff patchstart patchlen <tgt rangeset> <src_str>
429 # imgdiff patchstart patchlen <tgt rangeset> <src_str>
430 # move <tgt rangeset> <src_str>
Sami Tolvanendd67a292014-12-09 16:40:34 +0000431 #
432 # version 3:
Dan Albert8b72aef2015-03-23 19:13:21 -0700433 # bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
434 # imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
435 # move hash <tgt rangeset> <src_str>
Doug Zongkerfc44a512014-08-26 13:10:25 -0700436
437 tgt_size = xf.tgt_ranges.size()
438
439 if xf.style == "new":
440 assert xf.tgt_ranges
441 out.append("%s %s\n" % (xf.style, xf.tgt_ranges.to_string_raw()))
442 total += tgt_size
443 elif xf.style == "move":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700444 assert xf.tgt_ranges
445 assert xf.src_ranges.size() == tgt_size
446 if xf.src_ranges != xf.tgt_ranges:
Doug Zongker62338182014-09-08 08:29:55 -0700447 if self.version == 1:
448 out.append("%s %s %s\n" % (
449 xf.style,
450 xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))
451 elif self.version == 2:
452 out.append("%s %s %s\n" % (
453 xf.style,
Dan Albert8b72aef2015-03-23 19:13:21 -0700454 xf.tgt_ranges.to_string_raw(), src_str))
Sami Tolvanendd67a292014-12-09 16:40:34 +0000455 elif self.version >= 3:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100456 # take into account automatic stashing of overlapping blocks
457 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700458 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100459 if temp_stash_usage > max_stashed_blocks:
460 max_stashed_blocks = temp_stash_usage
461
Sami Tolvanendd67a292014-12-09 16:40:34 +0000462 out.append("%s %s %s %s\n" % (
463 xf.style,
464 self.HashBlocks(self.tgt, xf.tgt_ranges),
Dan Albert8b72aef2015-03-23 19:13:21 -0700465 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700466 total += tgt_size
467 elif xf.style in ("bsdiff", "imgdiff"):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700468 assert xf.tgt_ranges
469 assert xf.src_ranges
Doug Zongker62338182014-09-08 08:29:55 -0700470 if self.version == 1:
471 out.append("%s %d %d %s %s\n" % (
472 xf.style, xf.patch_start, xf.patch_len,
473 xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))
474 elif self.version == 2:
475 out.append("%s %d %d %s %s\n" % (
476 xf.style, xf.patch_start, xf.patch_len,
Dan Albert8b72aef2015-03-23 19:13:21 -0700477 xf.tgt_ranges.to_string_raw(), src_str))
Sami Tolvanendd67a292014-12-09 16:40:34 +0000478 elif self.version >= 3:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100479 # take into account automatic stashing of overlapping blocks
480 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700481 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100482 if temp_stash_usage > max_stashed_blocks:
483 max_stashed_blocks = temp_stash_usage
484
Sami Tolvanendd67a292014-12-09 16:40:34 +0000485 out.append("%s %d %d %s %s %s %s\n" % (
486 xf.style,
487 xf.patch_start, xf.patch_len,
488 self.HashBlocks(self.src, xf.src_ranges),
489 self.HashBlocks(self.tgt, xf.tgt_ranges),
Dan Albert8b72aef2015-03-23 19:13:21 -0700490 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700491 total += tgt_size
492 elif xf.style == "zero":
493 assert xf.tgt_ranges
494 to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
495 if to_zero:
496 out.append("%s %s\n" % (xf.style, to_zero.to_string_raw()))
497 total += to_zero.size()
498 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700499 raise ValueError("unknown transfer style '%s'\n" % xf.style)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700500
Sami Tolvanendd67a292014-12-09 16:40:34 +0000501 if free_string:
502 out.append("".join(free_string))
caozhiyuan21b37d82015-10-21 15:14:03 +0800503 stashed_blocks -= free_size
Sami Tolvanendd67a292014-12-09 16:40:34 +0000504
Tao Bao575d68a2015-08-07 19:49:45 -0700505 if self.version >= 2 and common.OPTIONS.cache_size is not None:
Tao Bao8dcf7382015-05-21 14:09:49 -0700506 # Sanity check: abort if we're going to need more stash space than
507 # the allowed size (cache_size * threshold). There are two purposes
508 # of having a threshold here. a) Part of the cache may have been
509 # occupied by some recovery logs. b) It will buy us some time to deal
510 # with the oversize issue.
511 cache_size = common.OPTIONS.cache_size
512 stash_threshold = common.OPTIONS.stash_threshold
513 max_allowed = cache_size * stash_threshold
514 assert max_stashed_blocks * self.tgt.blocksize < max_allowed, \
515 'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
516 max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
517 self.tgt.blocksize, max_allowed, cache_size,
518 stash_threshold)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700519
Tao Baoe9b61912015-07-09 17:37:49 -0700520 # Zero out extended blocks as a workaround for bug 20881595.
521 if self.tgt.extended:
522 out.append("zero %s\n" % (self.tgt.extended.to_string_raw(),))
Tao Baob32d56e2015-09-09 11:55:01 -0700523 total += self.tgt.extended.size()
Tao Baoe9b61912015-07-09 17:37:49 -0700524
525 # We erase all the blocks on the partition that a) don't contain useful
526 # data in the new image and b) will not be touched by dm-verity.
Doug Zongkerfc44a512014-08-26 13:10:25 -0700527 all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
Tao Baoe9b61912015-07-09 17:37:49 -0700528 all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
529 new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
530 if new_dontcare:
531 out.append("erase %s\n" % (new_dontcare.to_string_raw(),))
Doug Zongkere985f6f2014-09-09 12:38:47 -0700532
533 out.insert(0, "%d\n" % (self.version,)) # format version number
Tao Baob32d56e2015-09-09 11:55:01 -0700534 out.insert(1, "%d\n" % (total,))
Doug Zongkere985f6f2014-09-09 12:38:47 -0700535 if self.version >= 2:
536 # version 2 only: after the total block count, we give the number
537 # of stash slots needed, and the maximum size needed (in blocks)
538 out.insert(2, str(next_stash_id) + "\n")
539 out.insert(3, str(max_stashed_blocks) + "\n")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700540
541 with open(prefix + ".transfer.list", "wb") as f:
542 for i in out:
543 f.write(i)
544
Doug Zongker62338182014-09-08 08:29:55 -0700545 if self.version >= 2:
Tao Baod8d14be2016-02-04 14:26:02 -0800546 self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize
Tao Bao575d68a2015-08-07 19:49:45 -0700547 OPTIONS = common.OPTIONS
548 if OPTIONS.cache_size is not None:
549 max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
550 print("max stashed blocks: %d (%d bytes), "
551 "limit: %d bytes (%.2f%%)\n" % (
Tao Baod8d14be2016-02-04 14:26:02 -0800552 max_stashed_blocks, self._max_stashed_size, max_allowed,
553 self._max_stashed_size * 100.0 / max_allowed))
Tao Bao575d68a2015-08-07 19:49:45 -0700554 else:
555 print("max stashed blocks: %d (%d bytes), limit: <unknown>\n" % (
Tao Baod8d14be2016-02-04 14:26:02 -0800556 max_stashed_blocks, self._max_stashed_size))
Doug Zongker62338182014-09-08 08:29:55 -0700557
Tao Bao82c47982015-08-17 09:45:13 -0700558 def ReviseStashSize(self):
559 print("Revising stash size...")
560 stashes = {}
561
562 # Create the map between a stash and its def/use points. For example, for a
563 # given stash of (idx, sr), stashes[idx] = (sr, def_cmd, use_cmd).
564 for xf in self.transfers:
565 # Command xf defines (stores) all the stashes in stash_before.
566 for idx, sr in xf.stash_before:
567 stashes[idx] = (sr, xf)
568
569 # Record all the stashes command xf uses.
570 for idx, _ in xf.use_stash:
571 stashes[idx] += (xf,)
572
573 # Compute the maximum blocks available for stash based on /cache size and
574 # the threshold.
575 cache_size = common.OPTIONS.cache_size
576 stash_threshold = common.OPTIONS.stash_threshold
577 max_allowed = cache_size * stash_threshold / self.tgt.blocksize
578
579 stashed_blocks = 0
Tao Bao9a5caf22015-08-25 15:10:10 -0700580 new_blocks = 0
Tao Bao82c47982015-08-17 09:45:13 -0700581
582 # Now go through all the commands. Compute the required stash size on the
583 # fly. If a command requires excess stash than available, it deletes the
584 # stash by replacing the command that uses the stash with a "new" command
585 # instead.
586 for xf in self.transfers:
587 replaced_cmds = []
588
589 # xf.stash_before generates explicit stash commands.
590 for idx, sr in xf.stash_before:
591 if stashed_blocks + sr.size() > max_allowed:
592 # We cannot stash this one for a later command. Find out the command
593 # that will use this stash and replace the command with "new".
594 use_cmd = stashes[idx][2]
595 replaced_cmds.append(use_cmd)
Tao Bao9a5caf22015-08-25 15:10:10 -0700596 print("%10d %9s %s" % (sr.size(), "explicit", use_cmd))
Tao Bao82c47982015-08-17 09:45:13 -0700597 else:
598 stashed_blocks += sr.size()
599
600 # xf.use_stash generates free commands.
601 for _, sr in xf.use_stash:
602 stashed_blocks -= sr.size()
603
604 # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
605 # ComputePatches(), they both have the style of "diff".
606 if xf.style == "diff" and self.version >= 3:
607 assert xf.tgt_ranges and xf.src_ranges
608 if xf.src_ranges.overlaps(xf.tgt_ranges):
609 if stashed_blocks + xf.src_ranges.size() > max_allowed:
610 replaced_cmds.append(xf)
Tao Bao9a5caf22015-08-25 15:10:10 -0700611 print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf))
Tao Bao82c47982015-08-17 09:45:13 -0700612
613 # Replace the commands in replaced_cmds with "new"s.
614 for cmd in replaced_cmds:
615 # It no longer uses any commands in "use_stash". Remove the def points
616 # for all those stashes.
617 for idx, sr in cmd.use_stash:
618 def_cmd = stashes[idx][1]
619 assert (idx, sr) in def_cmd.stash_before
620 def_cmd.stash_before.remove((idx, sr))
621
Tianjie Xuebe39a02016-01-14 14:12:26 -0800622 # Add up blocks that violates space limit and print total number to
623 # screen later.
624 new_blocks += cmd.tgt_ranges.size()
Tao Bao82c47982015-08-17 09:45:13 -0700625 cmd.ConvertToNew()
626
Tianjie Xuebe39a02016-01-14 14:12:26 -0800627 num_of_bytes = new_blocks * self.tgt.blocksize
628 print(" Total %d blocks (%d bytes) are packed as new blocks due to "
629 "insufficient cache size." % (new_blocks, num_of_bytes))
Tao Bao9a5caf22015-08-25 15:10:10 -0700630
Doug Zongkerfc44a512014-08-26 13:10:25 -0700631 def ComputePatches(self, prefix):
632 print("Reticulating splines...")
633 diff_q = []
634 patch_num = 0
635 with open(prefix + ".new.dat", "wb") as new_f:
636 for xf in self.transfers:
637 if xf.style == "zero":
638 pass
639 elif xf.style == "new":
640 for piece in self.tgt.ReadRangeSet(xf.tgt_ranges):
641 new_f.write(piece)
642 elif xf.style == "diff":
643 src = self.src.ReadRangeSet(xf.src_ranges)
644 tgt = self.tgt.ReadRangeSet(xf.tgt_ranges)
645
646 # We can't compare src and tgt directly because they may have
647 # the same content but be broken up into blocks differently, eg:
648 #
649 # ["he", "llo"] vs ["h", "ello"]
650 #
651 # We want those to compare equal, ideally without having to
652 # actually concatenate the strings (these may be tens of
653 # megabytes).
654
655 src_sha1 = sha1()
656 for p in src:
657 src_sha1.update(p)
658 tgt_sha1 = sha1()
659 tgt_size = 0
660 for p in tgt:
661 tgt_sha1.update(p)
662 tgt_size += len(p)
663
664 if src_sha1.digest() == tgt_sha1.digest():
665 # These are identical; we don't need to generate a patch,
666 # just issue copy commands on the device.
667 xf.style = "move"
668 else:
669 # For files in zip format (eg, APKs, JARs, etc.) we would
670 # like to use imgdiff -z if possible (because it usually
671 # produces significantly smaller patches than bsdiff).
672 # This is permissible if:
673 #
674 # - the source and target files are monotonic (ie, the
675 # data is stored with blocks in increasing order), and
676 # - we haven't removed any blocks from the source set.
677 #
678 # If these conditions are satisfied then appending all the
679 # blocks in the set together in order will produce a valid
680 # zip file (plus possibly extra zeros in the last block),
681 # which is what imgdiff needs to operate. (imgdiff is
682 # fine with extra zeros at the end of the file.)
683 imgdiff = (xf.intact and
684 xf.tgt_name.split(".")[-1].lower()
685 in ("apk", "jar", "zip"))
686 xf.style = "imgdiff" if imgdiff else "bsdiff"
687 diff_q.append((tgt_size, src, tgt, xf, patch_num))
688 patch_num += 1
689
690 else:
691 assert False, "unknown style " + xf.style
692
693 if diff_q:
694 if self.threads > 1:
695 print("Computing patches (using %d threads)..." % (self.threads,))
696 else:
697 print("Computing patches...")
698 diff_q.sort()
699
700 patches = [None] * patch_num
701
Dan Albert8b72aef2015-03-23 19:13:21 -0700702 # TODO: Rewrite with multiprocessing.ThreadPool?
Doug Zongkerfc44a512014-08-26 13:10:25 -0700703 lock = threading.Lock()
704 def diff_worker():
705 while True:
706 with lock:
Dan Albert8b72aef2015-03-23 19:13:21 -0700707 if not diff_q:
708 return
Doug Zongkerfc44a512014-08-26 13:10:25 -0700709 tgt_size, src, tgt, xf, patchnum = diff_q.pop()
710 patch = compute_patch(src, tgt, imgdiff=(xf.style == "imgdiff"))
711 size = len(patch)
712 with lock:
713 patches[patchnum] = (patch, xf)
714 print("%10d %10d (%6.2f%%) %7s %s" % (
715 size, tgt_size, size * 100.0 / tgt_size, xf.style,
716 xf.tgt_name if xf.tgt_name == xf.src_name else (
717 xf.tgt_name + " (from " + xf.src_name + ")")))
718
719 threads = [threading.Thread(target=diff_worker)
Dan Albert8b72aef2015-03-23 19:13:21 -0700720 for _ in range(self.threads)]
Doug Zongkerfc44a512014-08-26 13:10:25 -0700721 for th in threads:
722 th.start()
723 while threads:
724 threads.pop().join()
725 else:
726 patches = []
727
728 p = 0
729 with open(prefix + ".patch.dat", "wb") as patch_f:
730 for patch, xf in patches:
731 xf.patch_start = p
732 xf.patch_len = len(patch)
733 patch_f.write(patch)
734 p += len(patch)
735
736 def AssertSequenceGood(self):
737 # Simulate the sequences of transfers we will output, and check that:
738 # - we never read a block after writing it, and
739 # - we write every block we care about exactly once.
740
741 # Start with no blocks having been touched yet.
742 touched = RangeSet()
743
744 # Imagine processing the transfers in order.
745 for xf in self.transfers:
746 # Check that the input blocks for this transfer haven't yet been touched.
Doug Zongker62338182014-09-08 08:29:55 -0700747
748 x = xf.src_ranges
749 if self.version >= 2:
750 for _, sr in xf.use_stash:
751 x = x.subtract(sr)
752
753 assert not touched.overlaps(x)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700754 # Check that the output blocks for this transfer haven't yet been touched.
755 assert not touched.overlaps(xf.tgt_ranges)
756 # Touch all the blocks written by this transfer.
757 touched = touched.union(xf.tgt_ranges)
758
759 # Check that we've written every target block.
760 assert touched == self.tgt.care_map
761
Doug Zongker62338182014-09-08 08:29:55 -0700762 def ImproveVertexSequence(self):
763 print("Improving vertex order...")
764
765 # At this point our digraph is acyclic; we reversed any edges that
766 # were backwards in the heuristically-generated sequence. The
767 # previously-generated order is still acceptable, but we hope to
768 # find a better order that needs less memory for stashed data.
769 # Now we do a topological sort to generate a new vertex order,
770 # using a greedy algorithm to choose which vertex goes next
771 # whenever we have a choice.
772
773 # Make a copy of the edge set; this copy will get destroyed by the
774 # algorithm.
775 for xf in self.transfers:
776 xf.incoming = xf.goes_after.copy()
777 xf.outgoing = xf.goes_before.copy()
778
779 L = [] # the new vertex order
780
781 # S is the set of sources in the remaining graph; we always choose
782 # the one that leaves the least amount of stashed data after it's
783 # executed.
784 S = [(u.NetStashChange(), u.order, u) for u in self.transfers
785 if not u.incoming]
786 heapq.heapify(S)
787
788 while S:
789 _, _, xf = heapq.heappop(S)
790 L.append(xf)
791 for u in xf.outgoing:
792 del u.incoming[xf]
793 if not u.incoming:
794 heapq.heappush(S, (u.NetStashChange(), u.order, u))
795
796 # if this fails then our graph had a cycle.
797 assert len(L) == len(self.transfers)
798
799 self.transfers = L
800 for i, xf in enumerate(L):
801 xf.order = i
802
Doug Zongkerfc44a512014-08-26 13:10:25 -0700803 def RemoveBackwardEdges(self):
804 print("Removing backward edges...")
805 in_order = 0
806 out_of_order = 0
807 lost_source = 0
808
809 for xf in self.transfers:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700810 lost = 0
811 size = xf.src_ranges.size()
812 for u in xf.goes_before:
813 # xf should go before u
814 if xf.order < u.order:
815 # it does, hurray!
Doug Zongker62338182014-09-08 08:29:55 -0700816 in_order += 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700817 else:
818 # it doesn't, boo. trim the blocks that u writes from xf's
819 # source, so that xf can go after u.
Doug Zongker62338182014-09-08 08:29:55 -0700820 out_of_order += 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700821 assert xf.src_ranges.overlaps(u.tgt_ranges)
822 xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges)
823 xf.intact = False
824
825 if xf.style == "diff" and not xf.src_ranges:
826 # nothing left to diff from; treat as new data
827 xf.style = "new"
828
829 lost = size - xf.src_ranges.size()
830 lost_source += lost
Doug Zongkerfc44a512014-08-26 13:10:25 -0700831
832 print((" %d/%d dependencies (%.2f%%) were violated; "
833 "%d source blocks removed.") %
834 (out_of_order, in_order + out_of_order,
835 (out_of_order * 100.0 / (in_order + out_of_order))
836 if (in_order + out_of_order) else 0.0,
837 lost_source))
838
Doug Zongker62338182014-09-08 08:29:55 -0700839 def ReverseBackwardEdges(self):
840 print("Reversing backward edges...")
841 in_order = 0
842 out_of_order = 0
843 stashes = 0
844 stash_size = 0
845
846 for xf in self.transfers:
Doug Zongker62338182014-09-08 08:29:55 -0700847 for u in xf.goes_before.copy():
848 # xf should go before u
849 if xf.order < u.order:
850 # it does, hurray!
851 in_order += 1
852 else:
853 # it doesn't, boo. modify u to stash the blocks that it
854 # writes that xf wants to read, and then require u to go
855 # before xf.
856 out_of_order += 1
857
858 overlap = xf.src_ranges.intersect(u.tgt_ranges)
859 assert overlap
860
861 u.stash_before.append((stashes, overlap))
862 xf.use_stash.append((stashes, overlap))
863 stashes += 1
864 stash_size += overlap.size()
865
866 # reverse the edge direction; now xf must go after u
867 del xf.goes_before[u]
868 del u.goes_after[xf]
869 xf.goes_after[u] = None # value doesn't matter
870 u.goes_before[xf] = None
871
872 print((" %d/%d dependencies (%.2f%%) were violated; "
873 "%d source blocks stashed.") %
874 (out_of_order, in_order + out_of_order,
875 (out_of_order * 100.0 / (in_order + out_of_order))
876 if (in_order + out_of_order) else 0.0,
877 stash_size))
878
Doug Zongkerfc44a512014-08-26 13:10:25 -0700879 def FindVertexSequence(self):
880 print("Finding vertex sequence...")
881
882 # This is based on "A Fast & Effective Heuristic for the Feedback
883 # Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
884 # it as starting with the digraph G and moving all the vertices to
885 # be on a horizontal line in some order, trying to minimize the
886 # number of edges that end up pointing to the left. Left-pointing
887 # edges will get removed to turn the digraph into a DAG. In this
888 # case each edge has a weight which is the number of source blocks
889 # we'll lose if that edge is removed; we try to minimize the total
890 # weight rather than just the number of edges.
891
892 # Make a copy of the edge set; this copy will get destroyed by the
893 # algorithm.
894 for xf in self.transfers:
895 xf.incoming = xf.goes_after.copy()
896 xf.outgoing = xf.goes_before.copy()
897
898 # We use an OrderedDict instead of just a set so that the output
899 # is repeatable; otherwise it would depend on the hash values of
900 # the transfer objects.
901 G = OrderedDict()
902 for xf in self.transfers:
903 G[xf] = None
904 s1 = deque() # the left side of the sequence, built from left to right
905 s2 = deque() # the right side of the sequence, built from right to left
906
907 while G:
908
909 # Put all sinks at the end of the sequence.
910 while True:
911 sinks = [u for u in G if not u.outgoing]
Dan Albert8b72aef2015-03-23 19:13:21 -0700912 if not sinks:
913 break
Doug Zongkerfc44a512014-08-26 13:10:25 -0700914 for u in sinks:
915 s2.appendleft(u)
916 del G[u]
917 for iu in u.incoming:
918 del iu.outgoing[u]
919
920 # Put all the sources at the beginning of the sequence.
921 while True:
922 sources = [u for u in G if not u.incoming]
Dan Albert8b72aef2015-03-23 19:13:21 -0700923 if not sources:
924 break
Doug Zongkerfc44a512014-08-26 13:10:25 -0700925 for u in sources:
926 s1.append(u)
927 del G[u]
928 for iu in u.outgoing:
929 del iu.incoming[u]
930
Dan Albert8b72aef2015-03-23 19:13:21 -0700931 if not G:
932 break
Doug Zongkerfc44a512014-08-26 13:10:25 -0700933
934 # Find the "best" vertex to put next. "Best" is the one that
935 # maximizes the net difference in source blocks saved we get by
936 # pretending it's a source rather than a sink.
937
938 max_d = None
939 best_u = None
940 for u in G:
941 d = sum(u.outgoing.values()) - sum(u.incoming.values())
942 if best_u is None or d > max_d:
943 max_d = d
944 best_u = u
945
946 u = best_u
947 s1.append(u)
948 del G[u]
949 for iu in u.outgoing:
950 del iu.incoming[u]
951 for iu in u.incoming:
952 del iu.outgoing[u]
953
954 # Now record the sequence in the 'order' field of each transfer,
955 # and by rearranging self.transfers to be in the chosen sequence.
956
957 new_transfers = []
958 for x in itertools.chain(s1, s2):
959 x.order = len(new_transfers)
960 new_transfers.append(x)
961 del x.incoming
962 del x.outgoing
963
964 self.transfers = new_transfers
965
966 def GenerateDigraph(self):
967 print("Generating digraph...")
968 for a in self.transfers:
969 for b in self.transfers:
Dan Albert8b72aef2015-03-23 19:13:21 -0700970 if a is b:
971 continue
Doug Zongkerfc44a512014-08-26 13:10:25 -0700972
973 # If the blocks written by A are read by B, then B needs to go before A.
974 i = a.tgt_ranges.intersect(b.src_ranges)
975 if i:
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700976 if b.src_name == "__ZERO":
977 # the cost of removing source blocks for the __ZERO domain
978 # is (nearly) zero.
979 size = 0
980 else:
981 size = i.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700982 b.goes_before[a] = size
983 a.goes_after[b] = size
984
985 def FindTransfers(self):
Tao Bao9a5caf22015-08-25 15:10:10 -0700986 """Parse the file_map to generate all the transfers."""
987
988 def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
989 split=False):
990 """Wrapper function for adding a Transfer().
991
992 For BBOTA v3, we need to stash source blocks for resumable feature.
993 However, with the growth of file size and the shrink of the cache
994 partition source blocks are too large to be stashed. If a file occupies
995 too many blocks (greater than MAX_BLOCKS_PER_DIFF_TRANSFER), we split it
996 into smaller pieces by getting multiple Transfer()s.
997
Tianjie Xubb86e1d2016-01-13 16:14:10 -0800998 The downside is that after splitting, we may increase the package size
999 since the split pieces don't align well. According to our experiments,
1000 1/8 of the cache size as the per-piece limit appears to be optimal.
1001 Compared to the fixed 1024-block limit, it reduces the overall package
1002 size by 30% volantis, and 20% for angler and bullhead."""
Tao Bao9a5caf22015-08-25 15:10:10 -07001003
1004 # We care about diff transfers only.
1005 if style != "diff" or not split:
1006 Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1007 return
1008
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001009 pieces = 0
1010 cache_size = common.OPTIONS.cache_size
1011 split_threshold = 0.125
1012 max_blocks_per_transfer = int(cache_size * split_threshold /
1013 self.tgt.blocksize)
1014
Tao Bao9a5caf22015-08-25 15:10:10 -07001015 # Change nothing for small files.
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001016 if (tgt_ranges.size() <= max_blocks_per_transfer and
1017 src_ranges.size() <= max_blocks_per_transfer):
Tao Bao9a5caf22015-08-25 15:10:10 -07001018 Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1019 return
1020
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001021 while (tgt_ranges.size() > max_blocks_per_transfer and
1022 src_ranges.size() > max_blocks_per_transfer):
Tao Bao9a5caf22015-08-25 15:10:10 -07001023 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1024 src_split_name = "%s-%d" % (src_name, pieces)
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001025 tgt_first = tgt_ranges.first(max_blocks_per_transfer)
1026 src_first = src_ranges.first(max_blocks_per_transfer)
1027
Tao Bao9a5caf22015-08-25 15:10:10 -07001028 Transfer(tgt_split_name, src_split_name, tgt_first, src_first, style,
1029 by_id)
1030
1031 tgt_ranges = tgt_ranges.subtract(tgt_first)
1032 src_ranges = src_ranges.subtract(src_first)
1033 pieces += 1
1034
1035 # Handle remaining blocks.
1036 if tgt_ranges.size() or src_ranges.size():
1037 # Must be both non-empty.
1038 assert tgt_ranges.size() and src_ranges.size()
1039 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1040 src_split_name = "%s-%d" % (src_name, pieces)
1041 Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges, style,
1042 by_id)
1043
Doug Zongkerfc44a512014-08-26 13:10:25 -07001044 empty = RangeSet()
1045 for tgt_fn, tgt_ranges in self.tgt.file_map.items():
1046 if tgt_fn == "__ZERO":
1047 # the special "__ZERO" domain is all the blocks not contained
1048 # in any file and that are filled with zeros. We have a
1049 # special transfer style for zero blocks.
1050 src_ranges = self.src.file_map.get("__ZERO", empty)
Tao Bao9a5caf22015-08-25 15:10:10 -07001051 AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
1052 "zero", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001053 continue
1054
Tao Baoff777812015-05-12 11:42:31 -07001055 elif tgt_fn == "__COPY":
1056 # "__COPY" domain includes all the blocks not contained in any
1057 # file and that need to be copied unconditionally to the target.
Tao Bao9a5caf22015-08-25 15:10:10 -07001058 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Tao Baoff777812015-05-12 11:42:31 -07001059 continue
1060
Doug Zongkerfc44a512014-08-26 13:10:25 -07001061 elif tgt_fn in self.src.file_map:
1062 # Look for an exact pathname match in the source.
Tao Bao9a5caf22015-08-25 15:10:10 -07001063 AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
1064 "diff", self.transfers, self.version >= 3)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001065 continue
1066
1067 b = os.path.basename(tgt_fn)
1068 if b in self.src_basenames:
1069 # Look for an exact basename match in the source.
1070 src_fn = self.src_basenames[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001071 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
1072 "diff", self.transfers, self.version >= 3)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001073 continue
1074
1075 b = re.sub("[0-9]+", "#", b)
1076 if b in self.src_numpatterns:
1077 # Look for a 'number pattern' match (a basename match after
1078 # all runs of digits are replaced by "#"). (This is useful
1079 # for .so files that contain version numbers in the filename
1080 # that get bumped.)
1081 src_fn = self.src_numpatterns[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001082 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
1083 "diff", self.transfers, self.version >= 3)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001084 continue
1085
Tao Bao9a5caf22015-08-25 15:10:10 -07001086 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001087
1088 def AbbreviateSourceNames(self):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001089 for k in self.src.file_map.keys():
1090 b = os.path.basename(k)
1091 self.src_basenames[b] = k
1092 b = re.sub("[0-9]+", "#", b)
1093 self.src_numpatterns[b] = k
1094
1095 @staticmethod
1096 def AssertPartition(total, seq):
1097 """Assert that all the RangeSets in 'seq' form a partition of the
1098 'total' RangeSet (ie, they are nonintersecting and their union
1099 equals 'total')."""
1100 so_far = RangeSet()
1101 for i in seq:
1102 assert not so_far.overlaps(i)
1103 so_far = so_far.union(i)
1104 assert so_far == total