blob: a7a4098f76ff603aca9dac8894460bbd5719a305 [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015from __future__ import print_function
16
17from collections import deque, OrderedDict
18from hashlib import sha1
Doug Zongker6ab2a502016-02-09 08:28:09 -080019import array
Tao Bao8dcf7382015-05-21 14:09:49 -070020import common
Doug Zongker6ab2a502016-02-09 08:28:09 -080021import functools
Doug Zongker62338182014-09-08 08:29:55 -070022import heapq
Doug Zongkerfc44a512014-08-26 13:10:25 -070023import itertools
24import multiprocessing
25import os
Doug Zongkerfc44a512014-08-26 13:10:25 -070026import re
27import subprocess
Doug Zongkerfc44a512014-08-26 13:10:25 -070028import threading
Doug Zongker6ab2a502016-02-09 08:28:09 -080029import time
Doug Zongkerfc44a512014-08-26 13:10:25 -070030import tempfile
31
Dan Albert8b72aef2015-03-23 19:13:21 -070032from rangelib import RangeSet
33
Doug Zongkerfc44a512014-08-26 13:10:25 -070034
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070035__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
36
Dan Albert8b72aef2015-03-23 19:13:21 -070037
Doug Zongkerfc44a512014-08-26 13:10:25 -070038def compute_patch(src, tgt, imgdiff=False):
39 srcfd, srcfile = tempfile.mkstemp(prefix="src-")
40 tgtfd, tgtfile = tempfile.mkstemp(prefix="tgt-")
41 patchfd, patchfile = tempfile.mkstemp(prefix="patch-")
42 os.close(patchfd)
43
44 try:
45 with os.fdopen(srcfd, "wb") as f_src:
46 for p in src:
47 f_src.write(p)
48
49 with os.fdopen(tgtfd, "wb") as f_tgt:
50 for p in tgt:
51 f_tgt.write(p)
52 try:
53 os.unlink(patchfile)
54 except OSError:
55 pass
56 if imgdiff:
57 p = subprocess.call(["imgdiff", "-z", srcfile, tgtfile, patchfile],
58 stdout=open("/dev/null", "a"),
59 stderr=subprocess.STDOUT)
60 else:
61 p = subprocess.call(["bsdiff", srcfile, tgtfile, patchfile])
62
63 if p:
64 raise ValueError("diff failed: " + str(p))
65
66 with open(patchfile, "rb") as f:
67 return f.read()
68 finally:
69 try:
70 os.unlink(srcfile)
71 os.unlink(tgtfile)
72 os.unlink(patchfile)
73 except OSError:
74 pass
75
Dan Albert8b72aef2015-03-23 19:13:21 -070076
77class Image(object):
78 def ReadRangeSet(self, ranges):
79 raise NotImplementedError
80
Tao Bao68658c02015-06-01 13:40:49 -070081 def TotalSha1(self, include_clobbered_blocks=False):
Dan Albert8b72aef2015-03-23 19:13:21 -070082 raise NotImplementedError
83
84
85class EmptyImage(Image):
Doug Zongkerfc44a512014-08-26 13:10:25 -070086 """A zero-length image."""
87 blocksize = 4096
88 care_map = RangeSet()
Tao Baoff777812015-05-12 11:42:31 -070089 clobbered_blocks = RangeSet()
Tao Baoe9b61912015-07-09 17:37:49 -070090 extended = RangeSet()
Doug Zongkerfc44a512014-08-26 13:10:25 -070091 total_blocks = 0
92 file_map = {}
93 def ReadRangeSet(self, ranges):
94 return ()
Tao Bao68658c02015-06-01 13:40:49 -070095 def TotalSha1(self, include_clobbered_blocks=False):
96 # EmptyImage always carries empty clobbered_blocks, so
97 # include_clobbered_blocks can be ignored.
98 assert self.clobbered_blocks.size() == 0
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070099 return sha1().hexdigest()
100
101
Dan Albert8b72aef2015-03-23 19:13:21 -0700102class DataImage(Image):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700103 """An image wrapped around a single string of data."""
104
105 def __init__(self, data, trim=False, pad=False):
106 self.data = data
107 self.blocksize = 4096
108
109 assert not (trim and pad)
110
111 partial = len(self.data) % self.blocksize
Tao Bao7589e962015-09-05 20:35:32 -0700112 padded = False
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700113 if partial > 0:
114 if trim:
115 self.data = self.data[:-partial]
116 elif pad:
117 self.data += '\0' * (self.blocksize - partial)
Tao Bao7589e962015-09-05 20:35:32 -0700118 padded = True
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700119 else:
120 raise ValueError(("data for DataImage must be multiple of %d bytes "
121 "unless trim or pad is specified") %
122 (self.blocksize,))
123
124 assert len(self.data) % self.blocksize == 0
125
126 self.total_blocks = len(self.data) / self.blocksize
127 self.care_map = RangeSet(data=(0, self.total_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700128 # When the last block is padded, we always write the whole block even for
129 # incremental OTAs. Because otherwise the last block may get skipped if
130 # unchanged for an incremental, but would fail the post-install
131 # verification if it has non-zero contents in the padding bytes.
132 # Bug: 23828506
133 if padded:
Tao Bao42206c32015-09-08 13:39:40 -0700134 clobbered_blocks = [self.total_blocks-1, self.total_blocks]
Tao Bao7589e962015-09-05 20:35:32 -0700135 else:
Tao Bao42206c32015-09-08 13:39:40 -0700136 clobbered_blocks = []
137 self.clobbered_blocks = clobbered_blocks
Tao Baoe9b61912015-07-09 17:37:49 -0700138 self.extended = RangeSet()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700139
140 zero_blocks = []
141 nonzero_blocks = []
142 reference = '\0' * self.blocksize
143
Tao Bao7589e962015-09-05 20:35:32 -0700144 for i in range(self.total_blocks-1 if padded else self.total_blocks):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700145 d = self.data[i*self.blocksize : (i+1)*self.blocksize]
146 if d == reference:
147 zero_blocks.append(i)
148 zero_blocks.append(i+1)
149 else:
150 nonzero_blocks.append(i)
151 nonzero_blocks.append(i+1)
152
Tao Bao42206c32015-09-08 13:39:40 -0700153 assert zero_blocks or nonzero_blocks or clobbered_blocks
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700154
Tao Bao42206c32015-09-08 13:39:40 -0700155 self.file_map = dict()
156 if zero_blocks:
157 self.file_map["__ZERO"] = RangeSet(data=zero_blocks)
158 if nonzero_blocks:
159 self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)
160 if clobbered_blocks:
161 self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)
Tao Bao7589e962015-09-05 20:35:32 -0700162
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700163 def ReadRangeSet(self, ranges):
164 return [self.data[s*self.blocksize:e*self.blocksize] for (s, e) in ranges]
165
Tao Bao68658c02015-06-01 13:40:49 -0700166 def TotalSha1(self, include_clobbered_blocks=False):
Tao Bao7589e962015-09-05 20:35:32 -0700167 if not include_clobbered_blocks:
168 ranges = self.care_map.subtract(self.clobbered_blocks)
169 return sha1(self.ReadRangeSet(ranges)).hexdigest()
170 else:
171 return sha1(self.data).hexdigest()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700172
Doug Zongkerfc44a512014-08-26 13:10:25 -0700173
174class Transfer(object):
175 def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, style, by_id):
176 self.tgt_name = tgt_name
177 self.src_name = src_name
178 self.tgt_ranges = tgt_ranges
179 self.src_ranges = src_ranges
180 self.style = style
181 self.intact = (getattr(tgt_ranges, "monotonic", False) and
182 getattr(src_ranges, "monotonic", False))
Tao Baob8c87172015-03-19 19:42:12 -0700183
184 # We use OrderedDict rather than dict so that the output is repeatable;
185 # otherwise it would depend on the hash values of the Transfer objects.
186 self.goes_before = OrderedDict()
187 self.goes_after = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700188
Doug Zongker62338182014-09-08 08:29:55 -0700189 self.stash_before = []
190 self.use_stash = []
191
Doug Zongkerfc44a512014-08-26 13:10:25 -0700192 self.id = len(by_id)
193 by_id.append(self)
194
Doug Zongker62338182014-09-08 08:29:55 -0700195 def NetStashChange(self):
196 return (sum(sr.size() for (_, sr) in self.stash_before) -
197 sum(sr.size() for (_, sr) in self.use_stash))
198
Tao Bao82c47982015-08-17 09:45:13 -0700199 def ConvertToNew(self):
200 assert self.style != "new"
201 self.use_stash = []
202 self.style = "new"
203 self.src_ranges = RangeSet()
204
Doug Zongkerfc44a512014-08-26 13:10:25 -0700205 def __str__(self):
206 return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
207 " to " + str(self.tgt_ranges) + ">")
208
209
Doug Zongker6ab2a502016-02-09 08:28:09 -0800210@functools.total_ordering
211class HeapItem(object):
212 def __init__(self, item):
213 self.item = item
214 # Negate the score since python's heap is a min-heap and we want
215 # the maximum score.
216 self.score = -item.score
217 def clear(self):
218 self.item = None
219 def __bool__(self):
220 return self.item is None
221 def __eq__(self, other):
222 return self.score == other.score
223 def __le__(self, other):
224 return self.score <= other.score
225
226
Doug Zongkerfc44a512014-08-26 13:10:25 -0700227# BlockImageDiff works on two image objects. An image object is
228# anything that provides the following attributes:
229#
230# blocksize: the size in bytes of a block, currently must be 4096.
231#
232# total_blocks: the total size of the partition/image, in blocks.
233#
234# care_map: a RangeSet containing which blocks (in the range [0,
235# total_blocks) we actually care about; i.e. which blocks contain
236# data.
237#
238# file_map: a dict that partitions the blocks contained in care_map
239# into smaller domains that are useful for doing diffs on.
240# (Typically a domain is a file, and the key in file_map is the
241# pathname.)
242#
Tao Baoff777812015-05-12 11:42:31 -0700243# clobbered_blocks: a RangeSet containing which blocks contain data
244# but may be altered by the FS. They need to be excluded when
245# verifying the partition integrity.
246#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700247# ReadRangeSet(): a function that takes a RangeSet and returns the
248# data contained in the image blocks of that RangeSet. The data
249# is returned as a list or tuple of strings; concatenating the
250# elements together should produce the requested data.
251# Implementations are free to break up the data into list/tuple
252# elements in any way that is convenient.
253#
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700254# TotalSha1(): a function that returns (as a hex string) the SHA-1
255# hash of all the data in the image (ie, all the blocks in the
Tao Bao68658c02015-06-01 13:40:49 -0700256# care_map minus clobbered_blocks, or including the clobbered
257# blocks if include_clobbered_blocks is True).
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700258#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700259# When creating a BlockImageDiff, the src image may be None, in which
260# case the list of transfers produced will never read from the
261# original image.
262
263class BlockImageDiff(object):
Tao Bao293fd132016-06-11 12:19:23 -0700264 def __init__(self, tgt, src=None, threads=None, version=4,
265 disable_imgdiff=False):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700266 if threads is None:
267 threads = multiprocessing.cpu_count() // 2
Dan Albert8b72aef2015-03-23 19:13:21 -0700268 if threads == 0:
269 threads = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700270 self.threads = threads
Doug Zongker62338182014-09-08 08:29:55 -0700271 self.version = version
Dan Albert8b72aef2015-03-23 19:13:21 -0700272 self.transfers = []
273 self.src_basenames = {}
274 self.src_numpatterns = {}
Tao Baob4cfca52016-02-04 14:26:02 -0800275 self._max_stashed_size = 0
Tao Baod522bdc2016-04-12 15:53:16 -0700276 self.touched_src_ranges = RangeSet()
277 self.touched_src_sha1 = None
Tao Bao293fd132016-06-11 12:19:23 -0700278 self.disable_imgdiff = disable_imgdiff
Doug Zongker62338182014-09-08 08:29:55 -0700279
Tao Baoeba409c2015-10-21 13:30:43 -0700280 assert version in (1, 2, 3, 4)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700281
282 self.tgt = tgt
283 if src is None:
284 src = EmptyImage()
285 self.src = src
286
287 # The updater code that installs the patch always uses 4k blocks.
288 assert tgt.blocksize == 4096
289 assert src.blocksize == 4096
290
291 # The range sets in each filemap should comprise a partition of
292 # the care map.
293 self.AssertPartition(src.care_map, src.file_map.values())
294 self.AssertPartition(tgt.care_map, tgt.file_map.values())
295
Tao Baob4cfca52016-02-04 14:26:02 -0800296 @property
297 def max_stashed_size(self):
298 return self._max_stashed_size
299
Doug Zongkerfc44a512014-08-26 13:10:25 -0700300 def Compute(self, prefix):
301 # When looking for a source file to use as the diff input for a
302 # target file, we try:
303 # 1) an exact path match if available, otherwise
304 # 2) a exact basename match if available, otherwise
305 # 3) a basename match after all runs of digits are replaced by
306 # "#" if available, otherwise
307 # 4) we have no source for this target.
308 self.AbbreviateSourceNames()
309 self.FindTransfers()
310
311 # Find the ordering dependencies among transfers (this is O(n^2)
312 # in the number of transfers).
313 self.GenerateDigraph()
314 # Find a sequence of transfers that satisfies as many ordering
315 # dependencies as possible (heuristically).
316 self.FindVertexSequence()
317 # Fix up the ordering dependencies that the sequence didn't
318 # satisfy.
Doug Zongker62338182014-09-08 08:29:55 -0700319 if self.version == 1:
320 self.RemoveBackwardEdges()
321 else:
322 self.ReverseBackwardEdges()
323 self.ImproveVertexSequence()
324
Tao Bao82c47982015-08-17 09:45:13 -0700325 # Ensure the runtime stash size is under the limit.
326 if self.version >= 2 and common.OPTIONS.cache_size is not None:
327 self.ReviseStashSize()
328
Doug Zongkerfc44a512014-08-26 13:10:25 -0700329 # Double-check our work.
330 self.AssertSequenceGood()
331
332 self.ComputePatches(prefix)
333 self.WriteTransfers(prefix)
334
Dan Albert8b72aef2015-03-23 19:13:21 -0700335 def HashBlocks(self, source, ranges): # pylint: disable=no-self-use
Sami Tolvanendd67a292014-12-09 16:40:34 +0000336 data = source.ReadRangeSet(ranges)
337 ctx = sha1()
338
339 for p in data:
340 ctx.update(p)
341
342 return ctx.hexdigest()
343
Doug Zongkerfc44a512014-08-26 13:10:25 -0700344 def WriteTransfers(self, prefix):
345 out = []
346
Doug Zongkerfc44a512014-08-26 13:10:25 -0700347 total = 0
Doug Zongkerfc44a512014-08-26 13:10:25 -0700348
Doug Zongker62338182014-09-08 08:29:55 -0700349 stashes = {}
350 stashed_blocks = 0
351 max_stashed_blocks = 0
352
353 free_stash_ids = []
354 next_stash_id = 0
355
Doug Zongkerfc44a512014-08-26 13:10:25 -0700356 for xf in self.transfers:
357
Doug Zongker62338182014-09-08 08:29:55 -0700358 if self.version < 2:
359 assert not xf.stash_before
360 assert not xf.use_stash
361
362 for s, sr in xf.stash_before:
363 assert s not in stashes
364 if free_stash_ids:
365 sid = heapq.heappop(free_stash_ids)
366 else:
367 sid = next_stash_id
368 next_stash_id += 1
369 stashes[s] = sid
Sami Tolvanendd67a292014-12-09 16:40:34 +0000370 if self.version == 2:
caozhiyuan21b37d82015-10-21 15:14:03 +0800371 stashed_blocks += sr.size()
Sami Tolvanendd67a292014-12-09 16:40:34 +0000372 out.append("stash %d %s\n" % (sid, sr.to_string_raw()))
373 else:
374 sh = self.HashBlocks(self.src, sr)
375 if sh in stashes:
376 stashes[sh] += 1
377 else:
378 stashes[sh] = 1
caozhiyuan21b37d82015-10-21 15:14:03 +0800379 stashed_blocks += sr.size()
Tao Baod522bdc2016-04-12 15:53:16 -0700380 self.touched_src_ranges = self.touched_src_ranges.union(sr)
Sami Tolvanendd67a292014-12-09 16:40:34 +0000381 out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
Doug Zongker62338182014-09-08 08:29:55 -0700382
383 if stashed_blocks > max_stashed_blocks:
384 max_stashed_blocks = stashed_blocks
385
Jesse Zhao7b985f62015-03-02 16:53:08 -0800386 free_string = []
caozhiyuan21b37d82015-10-21 15:14:03 +0800387 free_size = 0
Jesse Zhao7b985f62015-03-02 16:53:08 -0800388
Doug Zongker62338182014-09-08 08:29:55 -0700389 if self.version == 1:
Tao Bao4fcb77e2015-10-21 13:36:01 -0700390 src_str = xf.src_ranges.to_string_raw() if xf.src_ranges else ""
Sami Tolvanendd67a292014-12-09 16:40:34 +0000391 elif self.version >= 2:
Doug Zongker62338182014-09-08 08:29:55 -0700392
393 # <# blocks> <src ranges>
394 # OR
395 # <# blocks> <src ranges> <src locs> <stash refs...>
396 # OR
397 # <# blocks> - <stash refs...>
398
399 size = xf.src_ranges.size()
Dan Albert8b72aef2015-03-23 19:13:21 -0700400 src_str = [str(size)]
Doug Zongker62338182014-09-08 08:29:55 -0700401
402 unstashed_src_ranges = xf.src_ranges
403 mapped_stashes = []
404 for s, sr in xf.use_stash:
405 sid = stashes.pop(s)
Doug Zongker62338182014-09-08 08:29:55 -0700406 unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
Sami Tolvanendd67a292014-12-09 16:40:34 +0000407 sh = self.HashBlocks(self.src, sr)
Doug Zongker62338182014-09-08 08:29:55 -0700408 sr = xf.src_ranges.map_within(sr)
409 mapped_stashes.append(sr)
Sami Tolvanendd67a292014-12-09 16:40:34 +0000410 if self.version == 2:
Dan Albert8b72aef2015-03-23 19:13:21 -0700411 src_str.append("%d:%s" % (sid, sr.to_string_raw()))
Tao Baobb625d22015-08-13 14:44:15 -0700412 # A stash will be used only once. We need to free the stash
413 # immediately after the use, instead of waiting for the automatic
414 # clean-up at the end. Because otherwise it may take up extra space
415 # and lead to OTA failures.
416 # Bug: 23119955
417 free_string.append("free %d\n" % (sid,))
caozhiyuan21b37d82015-10-21 15:14:03 +0800418 free_size += sr.size()
Sami Tolvanendd67a292014-12-09 16:40:34 +0000419 else:
420 assert sh in stashes
Dan Albert8b72aef2015-03-23 19:13:21 -0700421 src_str.append("%s:%s" % (sh, sr.to_string_raw()))
Sami Tolvanendd67a292014-12-09 16:40:34 +0000422 stashes[sh] -= 1
423 if stashes[sh] == 0:
caozhiyuan21b37d82015-10-21 15:14:03 +0800424 free_size += sr.size()
Sami Tolvanendd67a292014-12-09 16:40:34 +0000425 free_string.append("free %s\n" % (sh))
426 stashes.pop(sh)
Doug Zongker62338182014-09-08 08:29:55 -0700427 heapq.heappush(free_stash_ids, sid)
428
429 if unstashed_src_ranges:
Dan Albert8b72aef2015-03-23 19:13:21 -0700430 src_str.insert(1, unstashed_src_ranges.to_string_raw())
Doug Zongker62338182014-09-08 08:29:55 -0700431 if xf.use_stash:
432 mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
Dan Albert8b72aef2015-03-23 19:13:21 -0700433 src_str.insert(2, mapped_unstashed.to_string_raw())
Doug Zongker62338182014-09-08 08:29:55 -0700434 mapped_stashes.append(mapped_unstashed)
435 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
436 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700437 src_str.insert(1, "-")
Doug Zongker62338182014-09-08 08:29:55 -0700438 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
439
Dan Albert8b72aef2015-03-23 19:13:21 -0700440 src_str = " ".join(src_str)
Doug Zongker62338182014-09-08 08:29:55 -0700441
Sami Tolvanendd67a292014-12-09 16:40:34 +0000442 # all versions:
Doug Zongker62338182014-09-08 08:29:55 -0700443 # zero <rangeset>
444 # new <rangeset>
445 # erase <rangeset>
446 #
447 # version 1:
448 # bsdiff patchstart patchlen <src rangeset> <tgt rangeset>
449 # imgdiff patchstart patchlen <src rangeset> <tgt rangeset>
450 # move <src rangeset> <tgt rangeset>
451 #
452 # version 2:
Dan Albert8b72aef2015-03-23 19:13:21 -0700453 # bsdiff patchstart patchlen <tgt rangeset> <src_str>
454 # imgdiff patchstart patchlen <tgt rangeset> <src_str>
455 # move <tgt rangeset> <src_str>
Sami Tolvanendd67a292014-12-09 16:40:34 +0000456 #
457 # version 3:
Dan Albert8b72aef2015-03-23 19:13:21 -0700458 # bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
459 # imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
460 # move hash <tgt rangeset> <src_str>
Doug Zongkerfc44a512014-08-26 13:10:25 -0700461
462 tgt_size = xf.tgt_ranges.size()
463
464 if xf.style == "new":
465 assert xf.tgt_ranges
466 out.append("%s %s\n" % (xf.style, xf.tgt_ranges.to_string_raw()))
467 total += tgt_size
468 elif xf.style == "move":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700469 assert xf.tgt_ranges
470 assert xf.src_ranges.size() == tgt_size
471 if xf.src_ranges != xf.tgt_ranges:
Doug Zongker62338182014-09-08 08:29:55 -0700472 if self.version == 1:
473 out.append("%s %s %s\n" % (
474 xf.style,
475 xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))
476 elif self.version == 2:
477 out.append("%s %s %s\n" % (
478 xf.style,
Dan Albert8b72aef2015-03-23 19:13:21 -0700479 xf.tgt_ranges.to_string_raw(), src_str))
Sami Tolvanendd67a292014-12-09 16:40:34 +0000480 elif self.version >= 3:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100481 # take into account automatic stashing of overlapping blocks
482 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700483 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100484 if temp_stash_usage > max_stashed_blocks:
485 max_stashed_blocks = temp_stash_usage
486
Tao Baod522bdc2016-04-12 15:53:16 -0700487 self.touched_src_ranges = self.touched_src_ranges.union(
488 xf.src_ranges)
489
Sami Tolvanendd67a292014-12-09 16:40:34 +0000490 out.append("%s %s %s %s\n" % (
491 xf.style,
492 self.HashBlocks(self.tgt, xf.tgt_ranges),
Dan Albert8b72aef2015-03-23 19:13:21 -0700493 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700494 total += tgt_size
495 elif xf.style in ("bsdiff", "imgdiff"):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700496 assert xf.tgt_ranges
497 assert xf.src_ranges
Doug Zongker62338182014-09-08 08:29:55 -0700498 if self.version == 1:
499 out.append("%s %d %d %s %s\n" % (
500 xf.style, xf.patch_start, xf.patch_len,
501 xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))
502 elif self.version == 2:
503 out.append("%s %d %d %s %s\n" % (
504 xf.style, xf.patch_start, xf.patch_len,
Dan Albert8b72aef2015-03-23 19:13:21 -0700505 xf.tgt_ranges.to_string_raw(), src_str))
Sami Tolvanendd67a292014-12-09 16:40:34 +0000506 elif self.version >= 3:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100507 # take into account automatic stashing of overlapping blocks
508 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700509 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100510 if temp_stash_usage > max_stashed_blocks:
511 max_stashed_blocks = temp_stash_usage
512
Tao Baod522bdc2016-04-12 15:53:16 -0700513 self.touched_src_ranges = self.touched_src_ranges.union(
514 xf.src_ranges)
515
Sami Tolvanendd67a292014-12-09 16:40:34 +0000516 out.append("%s %d %d %s %s %s %s\n" % (
517 xf.style,
518 xf.patch_start, xf.patch_len,
519 self.HashBlocks(self.src, xf.src_ranges),
520 self.HashBlocks(self.tgt, xf.tgt_ranges),
Dan Albert8b72aef2015-03-23 19:13:21 -0700521 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700522 total += tgt_size
523 elif xf.style == "zero":
524 assert xf.tgt_ranges
525 to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
526 if to_zero:
527 out.append("%s %s\n" % (xf.style, to_zero.to_string_raw()))
528 total += to_zero.size()
529 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700530 raise ValueError("unknown transfer style '%s'\n" % xf.style)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700531
Sami Tolvanendd67a292014-12-09 16:40:34 +0000532 if free_string:
533 out.append("".join(free_string))
caozhiyuan21b37d82015-10-21 15:14:03 +0800534 stashed_blocks -= free_size
Sami Tolvanendd67a292014-12-09 16:40:34 +0000535
Tao Bao575d68a2015-08-07 19:49:45 -0700536 if self.version >= 2 and common.OPTIONS.cache_size is not None:
Tao Bao8dcf7382015-05-21 14:09:49 -0700537 # Sanity check: abort if we're going to need more stash space than
538 # the allowed size (cache_size * threshold). There are two purposes
539 # of having a threshold here. a) Part of the cache may have been
540 # occupied by some recovery logs. b) It will buy us some time to deal
541 # with the oversize issue.
542 cache_size = common.OPTIONS.cache_size
543 stash_threshold = common.OPTIONS.stash_threshold
544 max_allowed = cache_size * stash_threshold
545 assert max_stashed_blocks * self.tgt.blocksize < max_allowed, \
546 'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
547 max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
548 self.tgt.blocksize, max_allowed, cache_size,
549 stash_threshold)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700550
Tao Baod522bdc2016-04-12 15:53:16 -0700551 if self.version >= 3:
552 self.touched_src_sha1 = self.HashBlocks(
553 self.src, self.touched_src_ranges)
554
Tao Baoe9b61912015-07-09 17:37:49 -0700555 # Zero out extended blocks as a workaround for bug 20881595.
556 if self.tgt.extended:
557 out.append("zero %s\n" % (self.tgt.extended.to_string_raw(),))
Tao Baob32d56e2015-09-09 11:55:01 -0700558 total += self.tgt.extended.size()
Tao Baoe9b61912015-07-09 17:37:49 -0700559
560 # We erase all the blocks on the partition that a) don't contain useful
Tao Bao66f1fa62016-05-03 10:02:01 -0700561 # data in the new image; b) will not be touched by dm-verity. Out of those
562 # blocks, we erase the ones that won't be used in this update at the
563 # beginning of an update. The rest would be erased at the end. This is to
564 # work around the eMMC issue observed on some devices, which may otherwise
565 # get starving for clean blocks and thus fail the update. (b/28347095)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700566 all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
Tao Baoe9b61912015-07-09 17:37:49 -0700567 all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
568 new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
Tao Bao66f1fa62016-05-03 10:02:01 -0700569
570 erase_first = new_dontcare.subtract(self.touched_src_ranges)
571 if erase_first:
572 out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))
573
574 erase_last = new_dontcare.subtract(erase_first)
575 if erase_last:
576 out.append("erase %s\n" % (erase_last.to_string_raw(),))
Doug Zongkere985f6f2014-09-09 12:38:47 -0700577
578 out.insert(0, "%d\n" % (self.version,)) # format version number
Tao Baob32d56e2015-09-09 11:55:01 -0700579 out.insert(1, "%d\n" % (total,))
Doug Zongkere985f6f2014-09-09 12:38:47 -0700580 if self.version >= 2:
581 # version 2 only: after the total block count, we give the number
582 # of stash slots needed, and the maximum size needed (in blocks)
583 out.insert(2, str(next_stash_id) + "\n")
584 out.insert(3, str(max_stashed_blocks) + "\n")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700585
586 with open(prefix + ".transfer.list", "wb") as f:
587 for i in out:
588 f.write(i)
589
Doug Zongker62338182014-09-08 08:29:55 -0700590 if self.version >= 2:
Tao Baob4cfca52016-02-04 14:26:02 -0800591 self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize
Tao Bao575d68a2015-08-07 19:49:45 -0700592 OPTIONS = common.OPTIONS
593 if OPTIONS.cache_size is not None:
594 max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
595 print("max stashed blocks: %d (%d bytes), "
596 "limit: %d bytes (%.2f%%)\n" % (
Tao Baob4cfca52016-02-04 14:26:02 -0800597 max_stashed_blocks, self._max_stashed_size, max_allowed,
598 self._max_stashed_size * 100.0 / max_allowed))
Tao Bao575d68a2015-08-07 19:49:45 -0700599 else:
600 print("max stashed blocks: %d (%d bytes), limit: <unknown>\n" % (
Tao Baob4cfca52016-02-04 14:26:02 -0800601 max_stashed_blocks, self._max_stashed_size))
Doug Zongker62338182014-09-08 08:29:55 -0700602
Tao Bao82c47982015-08-17 09:45:13 -0700603 def ReviseStashSize(self):
604 print("Revising stash size...")
605 stashes = {}
606
607 # Create the map between a stash and its def/use points. For example, for a
608 # given stash of (idx, sr), stashes[idx] = (sr, def_cmd, use_cmd).
609 for xf in self.transfers:
610 # Command xf defines (stores) all the stashes in stash_before.
611 for idx, sr in xf.stash_before:
612 stashes[idx] = (sr, xf)
613
614 # Record all the stashes command xf uses.
615 for idx, _ in xf.use_stash:
616 stashes[idx] += (xf,)
617
618 # Compute the maximum blocks available for stash based on /cache size and
619 # the threshold.
620 cache_size = common.OPTIONS.cache_size
621 stash_threshold = common.OPTIONS.stash_threshold
622 max_allowed = cache_size * stash_threshold / self.tgt.blocksize
623
624 stashed_blocks = 0
Tao Bao9a5caf22015-08-25 15:10:10 -0700625 new_blocks = 0
Tao Bao82c47982015-08-17 09:45:13 -0700626
627 # Now go through all the commands. Compute the required stash size on the
628 # fly. If a command requires excess stash than available, it deletes the
629 # stash by replacing the command that uses the stash with a "new" command
630 # instead.
631 for xf in self.transfers:
632 replaced_cmds = []
633
634 # xf.stash_before generates explicit stash commands.
635 for idx, sr in xf.stash_before:
636 if stashed_blocks + sr.size() > max_allowed:
637 # We cannot stash this one for a later command. Find out the command
638 # that will use this stash and replace the command with "new".
639 use_cmd = stashes[idx][2]
640 replaced_cmds.append(use_cmd)
Tao Bao9a5caf22015-08-25 15:10:10 -0700641 print("%10d %9s %s" % (sr.size(), "explicit", use_cmd))
Tao Bao82c47982015-08-17 09:45:13 -0700642 else:
643 stashed_blocks += sr.size()
644
645 # xf.use_stash generates free commands.
646 for _, sr in xf.use_stash:
647 stashed_blocks -= sr.size()
648
649 # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
650 # ComputePatches(), they both have the style of "diff".
651 if xf.style == "diff" and self.version >= 3:
652 assert xf.tgt_ranges and xf.src_ranges
653 if xf.src_ranges.overlaps(xf.tgt_ranges):
654 if stashed_blocks + xf.src_ranges.size() > max_allowed:
655 replaced_cmds.append(xf)
Tao Bao9a5caf22015-08-25 15:10:10 -0700656 print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf))
Tao Bao82c47982015-08-17 09:45:13 -0700657
658 # Replace the commands in replaced_cmds with "new"s.
659 for cmd in replaced_cmds:
660 # It no longer uses any commands in "use_stash". Remove the def points
661 # for all those stashes.
662 for idx, sr in cmd.use_stash:
663 def_cmd = stashes[idx][1]
664 assert (idx, sr) in def_cmd.stash_before
665 def_cmd.stash_before.remove((idx, sr))
666
Tianjie Xuebe39a02016-01-14 14:12:26 -0800667 # Add up blocks that violates space limit and print total number to
668 # screen later.
669 new_blocks += cmd.tgt_ranges.size()
Tao Bao82c47982015-08-17 09:45:13 -0700670 cmd.ConvertToNew()
671
Tianjie Xuebe39a02016-01-14 14:12:26 -0800672 num_of_bytes = new_blocks * self.tgt.blocksize
673 print(" Total %d blocks (%d bytes) are packed as new blocks due to "
674 "insufficient cache size." % (new_blocks, num_of_bytes))
Tao Bao9a5caf22015-08-25 15:10:10 -0700675
Doug Zongkerfc44a512014-08-26 13:10:25 -0700676 def ComputePatches(self, prefix):
677 print("Reticulating splines...")
678 diff_q = []
679 patch_num = 0
680 with open(prefix + ".new.dat", "wb") as new_f:
681 for xf in self.transfers:
682 if xf.style == "zero":
683 pass
684 elif xf.style == "new":
685 for piece in self.tgt.ReadRangeSet(xf.tgt_ranges):
686 new_f.write(piece)
687 elif xf.style == "diff":
688 src = self.src.ReadRangeSet(xf.src_ranges)
689 tgt = self.tgt.ReadRangeSet(xf.tgt_ranges)
690
691 # We can't compare src and tgt directly because they may have
692 # the same content but be broken up into blocks differently, eg:
693 #
694 # ["he", "llo"] vs ["h", "ello"]
695 #
696 # We want those to compare equal, ideally without having to
697 # actually concatenate the strings (these may be tens of
698 # megabytes).
699
700 src_sha1 = sha1()
701 for p in src:
702 src_sha1.update(p)
703 tgt_sha1 = sha1()
704 tgt_size = 0
705 for p in tgt:
706 tgt_sha1.update(p)
707 tgt_size += len(p)
708
709 if src_sha1.digest() == tgt_sha1.digest():
710 # These are identical; we don't need to generate a patch,
711 # just issue copy commands on the device.
712 xf.style = "move"
713 else:
714 # For files in zip format (eg, APKs, JARs, etc.) we would
715 # like to use imgdiff -z if possible (because it usually
716 # produces significantly smaller patches than bsdiff).
717 # This is permissible if:
718 #
Tao Bao293fd132016-06-11 12:19:23 -0700719 # - imgdiff is not disabled, and
Doug Zongkerfc44a512014-08-26 13:10:25 -0700720 # - the source and target files are monotonic (ie, the
721 # data is stored with blocks in increasing order), and
722 # - we haven't removed any blocks from the source set.
723 #
724 # If these conditions are satisfied then appending all the
725 # blocks in the set together in order will produce a valid
726 # zip file (plus possibly extra zeros in the last block),
727 # which is what imgdiff needs to operate. (imgdiff is
728 # fine with extra zeros at the end of the file.)
Tao Bao293fd132016-06-11 12:19:23 -0700729 imgdiff = (not self.disable_imgdiff and xf.intact and
Doug Zongkerfc44a512014-08-26 13:10:25 -0700730 xf.tgt_name.split(".")[-1].lower()
731 in ("apk", "jar", "zip"))
732 xf.style = "imgdiff" if imgdiff else "bsdiff"
733 diff_q.append((tgt_size, src, tgt, xf, patch_num))
734 patch_num += 1
735
736 else:
737 assert False, "unknown style " + xf.style
738
739 if diff_q:
740 if self.threads > 1:
741 print("Computing patches (using %d threads)..." % (self.threads,))
742 else:
743 print("Computing patches...")
744 diff_q.sort()
745
746 patches = [None] * patch_num
747
Dan Albert8b72aef2015-03-23 19:13:21 -0700748 # TODO: Rewrite with multiprocessing.ThreadPool?
Doug Zongkerfc44a512014-08-26 13:10:25 -0700749 lock = threading.Lock()
750 def diff_worker():
751 while True:
752 with lock:
Dan Albert8b72aef2015-03-23 19:13:21 -0700753 if not diff_q:
754 return
Doug Zongkerfc44a512014-08-26 13:10:25 -0700755 tgt_size, src, tgt, xf, patchnum = diff_q.pop()
756 patch = compute_patch(src, tgt, imgdiff=(xf.style == "imgdiff"))
757 size = len(patch)
758 with lock:
759 patches[patchnum] = (patch, xf)
760 print("%10d %10d (%6.2f%%) %7s %s" % (
761 size, tgt_size, size * 100.0 / tgt_size, xf.style,
762 xf.tgt_name if xf.tgt_name == xf.src_name else (
763 xf.tgt_name + " (from " + xf.src_name + ")")))
764
765 threads = [threading.Thread(target=diff_worker)
Dan Albert8b72aef2015-03-23 19:13:21 -0700766 for _ in range(self.threads)]
Doug Zongkerfc44a512014-08-26 13:10:25 -0700767 for th in threads:
768 th.start()
769 while threads:
770 threads.pop().join()
771 else:
772 patches = []
773
774 p = 0
775 with open(prefix + ".patch.dat", "wb") as patch_f:
776 for patch, xf in patches:
777 xf.patch_start = p
778 xf.patch_len = len(patch)
779 patch_f.write(patch)
780 p += len(patch)
781
782 def AssertSequenceGood(self):
783 # Simulate the sequences of transfers we will output, and check that:
784 # - we never read a block after writing it, and
785 # - we write every block we care about exactly once.
786
787 # Start with no blocks having been touched yet.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800788 touched = array.array("B", "\0" * self.tgt.total_blocks)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700789
790 # Imagine processing the transfers in order.
791 for xf in self.transfers:
792 # Check that the input blocks for this transfer haven't yet been touched.
Doug Zongker62338182014-09-08 08:29:55 -0700793
794 x = xf.src_ranges
795 if self.version >= 2:
796 for _, sr in xf.use_stash:
797 x = x.subtract(sr)
798
Doug Zongker6ab2a502016-02-09 08:28:09 -0800799 for s, e in x:
Tao Baoff75c232016-03-04 15:23:34 -0800800 # Source image could be larger. Don't check the blocks that are in the
801 # source image only. Since they are not in 'touched', and won't ever
802 # be touched.
803 for i in range(s, min(e, self.tgt.total_blocks)):
Doug Zongker6ab2a502016-02-09 08:28:09 -0800804 assert touched[i] == 0
805
806 # Check that the output blocks for this transfer haven't yet
807 # been touched, and touch all the blocks written by this
808 # transfer.
809 for s, e in xf.tgt_ranges:
810 for i in range(s, e):
811 assert touched[i] == 0
812 touched[i] = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700813
814 # Check that we've written every target block.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800815 for s, e in self.tgt.care_map:
816 for i in range(s, e):
817 assert touched[i] == 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700818
Doug Zongker62338182014-09-08 08:29:55 -0700819 def ImproveVertexSequence(self):
820 print("Improving vertex order...")
821
822 # At this point our digraph is acyclic; we reversed any edges that
823 # were backwards in the heuristically-generated sequence. The
824 # previously-generated order is still acceptable, but we hope to
825 # find a better order that needs less memory for stashed data.
826 # Now we do a topological sort to generate a new vertex order,
827 # using a greedy algorithm to choose which vertex goes next
828 # whenever we have a choice.
829
830 # Make a copy of the edge set; this copy will get destroyed by the
831 # algorithm.
832 for xf in self.transfers:
833 xf.incoming = xf.goes_after.copy()
834 xf.outgoing = xf.goes_before.copy()
835
836 L = [] # the new vertex order
837
838 # S is the set of sources in the remaining graph; we always choose
839 # the one that leaves the least amount of stashed data after it's
840 # executed.
841 S = [(u.NetStashChange(), u.order, u) for u in self.transfers
842 if not u.incoming]
843 heapq.heapify(S)
844
845 while S:
846 _, _, xf = heapq.heappop(S)
847 L.append(xf)
848 for u in xf.outgoing:
849 del u.incoming[xf]
850 if not u.incoming:
851 heapq.heappush(S, (u.NetStashChange(), u.order, u))
852
853 # if this fails then our graph had a cycle.
854 assert len(L) == len(self.transfers)
855
856 self.transfers = L
857 for i, xf in enumerate(L):
858 xf.order = i
859
Doug Zongkerfc44a512014-08-26 13:10:25 -0700860 def RemoveBackwardEdges(self):
861 print("Removing backward edges...")
862 in_order = 0
863 out_of_order = 0
864 lost_source = 0
865
866 for xf in self.transfers:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700867 lost = 0
868 size = xf.src_ranges.size()
869 for u in xf.goes_before:
870 # xf should go before u
871 if xf.order < u.order:
872 # it does, hurray!
Doug Zongker62338182014-09-08 08:29:55 -0700873 in_order += 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700874 else:
875 # it doesn't, boo. trim the blocks that u writes from xf's
876 # source, so that xf can go after u.
Doug Zongker62338182014-09-08 08:29:55 -0700877 out_of_order += 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700878 assert xf.src_ranges.overlaps(u.tgt_ranges)
879 xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges)
880 xf.intact = False
881
882 if xf.style == "diff" and not xf.src_ranges:
883 # nothing left to diff from; treat as new data
884 xf.style = "new"
885
886 lost = size - xf.src_ranges.size()
887 lost_source += lost
Doug Zongkerfc44a512014-08-26 13:10:25 -0700888
889 print((" %d/%d dependencies (%.2f%%) were violated; "
890 "%d source blocks removed.") %
891 (out_of_order, in_order + out_of_order,
892 (out_of_order * 100.0 / (in_order + out_of_order))
893 if (in_order + out_of_order) else 0.0,
894 lost_source))
895
Doug Zongker62338182014-09-08 08:29:55 -0700896 def ReverseBackwardEdges(self):
897 print("Reversing backward edges...")
898 in_order = 0
899 out_of_order = 0
900 stashes = 0
901 stash_size = 0
902
903 for xf in self.transfers:
Doug Zongker62338182014-09-08 08:29:55 -0700904 for u in xf.goes_before.copy():
905 # xf should go before u
906 if xf.order < u.order:
907 # it does, hurray!
908 in_order += 1
909 else:
910 # it doesn't, boo. modify u to stash the blocks that it
911 # writes that xf wants to read, and then require u to go
912 # before xf.
913 out_of_order += 1
914
915 overlap = xf.src_ranges.intersect(u.tgt_ranges)
916 assert overlap
917
918 u.stash_before.append((stashes, overlap))
919 xf.use_stash.append((stashes, overlap))
920 stashes += 1
921 stash_size += overlap.size()
922
923 # reverse the edge direction; now xf must go after u
924 del xf.goes_before[u]
925 del u.goes_after[xf]
926 xf.goes_after[u] = None # value doesn't matter
927 u.goes_before[xf] = None
928
929 print((" %d/%d dependencies (%.2f%%) were violated; "
930 "%d source blocks stashed.") %
931 (out_of_order, in_order + out_of_order,
932 (out_of_order * 100.0 / (in_order + out_of_order))
933 if (in_order + out_of_order) else 0.0,
934 stash_size))
935
Doug Zongkerfc44a512014-08-26 13:10:25 -0700936 def FindVertexSequence(self):
937 print("Finding vertex sequence...")
938
939 # This is based on "A Fast & Effective Heuristic for the Feedback
940 # Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
941 # it as starting with the digraph G and moving all the vertices to
942 # be on a horizontal line in some order, trying to minimize the
943 # number of edges that end up pointing to the left. Left-pointing
944 # edges will get removed to turn the digraph into a DAG. In this
945 # case each edge has a weight which is the number of source blocks
946 # we'll lose if that edge is removed; we try to minimize the total
947 # weight rather than just the number of edges.
948
949 # Make a copy of the edge set; this copy will get destroyed by the
950 # algorithm.
951 for xf in self.transfers:
952 xf.incoming = xf.goes_after.copy()
953 xf.outgoing = xf.goes_before.copy()
Doug Zongker6ab2a502016-02-09 08:28:09 -0800954 xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())
Doug Zongkerfc44a512014-08-26 13:10:25 -0700955
956 # We use an OrderedDict instead of just a set so that the output
957 # is repeatable; otherwise it would depend on the hash values of
958 # the transfer objects.
959 G = OrderedDict()
960 for xf in self.transfers:
961 G[xf] = None
962 s1 = deque() # the left side of the sequence, built from left to right
963 s2 = deque() # the right side of the sequence, built from right to left
964
Doug Zongker6ab2a502016-02-09 08:28:09 -0800965 heap = []
966 for xf in self.transfers:
967 xf.heap_item = HeapItem(xf)
968 heap.append(xf.heap_item)
969 heapq.heapify(heap)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700970
Doug Zongker6ab2a502016-02-09 08:28:09 -0800971 sinks = set(u for u in G if not u.outgoing)
972 sources = set(u for u in G if not u.incoming)
973
974 def adjust_score(iu, delta):
975 iu.score += delta
976 iu.heap_item.clear()
977 iu.heap_item = HeapItem(iu)
978 heapq.heappush(heap, iu.heap_item)
979
980 while G:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700981 # Put all sinks at the end of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800982 while sinks:
983 new_sinks = set()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700984 for u in sinks:
Doug Zongker6ab2a502016-02-09 08:28:09 -0800985 if u not in G: continue
Doug Zongkerfc44a512014-08-26 13:10:25 -0700986 s2.appendleft(u)
987 del G[u]
988 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -0800989 adjust_score(iu, -iu.outgoing.pop(u))
990 if not iu.outgoing: new_sinks.add(iu)
991 sinks = new_sinks
Doug Zongkerfc44a512014-08-26 13:10:25 -0700992
993 # Put all the sources at the beginning of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800994 while sources:
995 new_sources = set()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700996 for u in sources:
Doug Zongker6ab2a502016-02-09 08:28:09 -0800997 if u not in G: continue
Doug Zongkerfc44a512014-08-26 13:10:25 -0700998 s1.append(u)
999 del G[u]
1000 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001001 adjust_score(iu, +iu.incoming.pop(u))
1002 if not iu.incoming: new_sources.add(iu)
1003 sources = new_sources
Doug Zongkerfc44a512014-08-26 13:10:25 -07001004
Doug Zongker6ab2a502016-02-09 08:28:09 -08001005 if not G: break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001006
1007 # Find the "best" vertex to put next. "Best" is the one that
1008 # maximizes the net difference in source blocks saved we get by
1009 # pretending it's a source rather than a sink.
1010
Doug Zongker6ab2a502016-02-09 08:28:09 -08001011 while True:
1012 u = heapq.heappop(heap)
1013 if u and u.item in G:
1014 u = u.item
1015 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001016
Doug Zongkerfc44a512014-08-26 13:10:25 -07001017 s1.append(u)
1018 del G[u]
1019 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001020 adjust_score(iu, +iu.incoming.pop(u))
1021 if not iu.incoming: sources.add(iu)
1022
Doug Zongkerfc44a512014-08-26 13:10:25 -07001023 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001024 adjust_score(iu, -iu.outgoing.pop(u))
1025 if not iu.outgoing: sinks.add(iu)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001026
1027 # Now record the sequence in the 'order' field of each transfer,
1028 # and by rearranging self.transfers to be in the chosen sequence.
1029
1030 new_transfers = []
1031 for x in itertools.chain(s1, s2):
1032 x.order = len(new_transfers)
1033 new_transfers.append(x)
1034 del x.incoming
1035 del x.outgoing
1036
1037 self.transfers = new_transfers
1038
1039 def GenerateDigraph(self):
1040 print("Generating digraph...")
Doug Zongker6ab2a502016-02-09 08:28:09 -08001041
1042 # Each item of source_ranges will be:
1043 # - None, if that block is not used as a source,
1044 # - a transfer, if one transfer uses it as a source, or
1045 # - a set of transfers.
1046 source_ranges = []
1047 for b in self.transfers:
1048 for s, e in b.src_ranges:
1049 if e > len(source_ranges):
1050 source_ranges.extend([None] * (e-len(source_ranges)))
1051 for i in range(s, e):
1052 if source_ranges[i] is None:
1053 source_ranges[i] = b
1054 else:
1055 if not isinstance(source_ranges[i], set):
1056 source_ranges[i] = set([source_ranges[i]])
1057 source_ranges[i].add(b)
1058
Doug Zongkerfc44a512014-08-26 13:10:25 -07001059 for a in self.transfers:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001060 intersections = set()
1061 for s, e in a.tgt_ranges:
1062 for i in range(s, e):
1063 if i >= len(source_ranges): break
1064 b = source_ranges[i]
1065 if b is not None:
1066 if isinstance(b, set):
1067 intersections.update(b)
1068 else:
1069 intersections.add(b)
1070
1071 for b in intersections:
1072 if a is b: continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001073
1074 # If the blocks written by A are read by B, then B needs to go before A.
1075 i = a.tgt_ranges.intersect(b.src_ranges)
1076 if i:
Doug Zongkerab7ca1d2014-08-26 10:40:28 -07001077 if b.src_name == "__ZERO":
1078 # the cost of removing source blocks for the __ZERO domain
1079 # is (nearly) zero.
1080 size = 0
1081 else:
1082 size = i.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001083 b.goes_before[a] = size
1084 a.goes_after[b] = size
1085
1086 def FindTransfers(self):
Tao Bao9a5caf22015-08-25 15:10:10 -07001087 """Parse the file_map to generate all the transfers."""
1088
1089 def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
1090 split=False):
1091 """Wrapper function for adding a Transfer().
1092
1093 For BBOTA v3, we need to stash source blocks for resumable feature.
1094 However, with the growth of file size and the shrink of the cache
1095 partition source blocks are too large to be stashed. If a file occupies
1096 too many blocks (greater than MAX_BLOCKS_PER_DIFF_TRANSFER), we split it
1097 into smaller pieces by getting multiple Transfer()s.
1098
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001099 The downside is that after splitting, we may increase the package size
1100 since the split pieces don't align well. According to our experiments,
1101 1/8 of the cache size as the per-piece limit appears to be optimal.
1102 Compared to the fixed 1024-block limit, it reduces the overall package
1103 size by 30% volantis, and 20% for angler and bullhead."""
Tao Bao9a5caf22015-08-25 15:10:10 -07001104
1105 # We care about diff transfers only.
1106 if style != "diff" or not split:
1107 Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1108 return
1109
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001110 pieces = 0
1111 cache_size = common.OPTIONS.cache_size
1112 split_threshold = 0.125
1113 max_blocks_per_transfer = int(cache_size * split_threshold /
1114 self.tgt.blocksize)
1115
Tao Bao9a5caf22015-08-25 15:10:10 -07001116 # Change nothing for small files.
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001117 if (tgt_ranges.size() <= max_blocks_per_transfer and
1118 src_ranges.size() <= max_blocks_per_transfer):
Tao Bao9a5caf22015-08-25 15:10:10 -07001119 Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1120 return
1121
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001122 while (tgt_ranges.size() > max_blocks_per_transfer and
1123 src_ranges.size() > max_blocks_per_transfer):
Tao Bao9a5caf22015-08-25 15:10:10 -07001124 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1125 src_split_name = "%s-%d" % (src_name, pieces)
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001126 tgt_first = tgt_ranges.first(max_blocks_per_transfer)
1127 src_first = src_ranges.first(max_blocks_per_transfer)
1128
Tao Bao9a5caf22015-08-25 15:10:10 -07001129 Transfer(tgt_split_name, src_split_name, tgt_first, src_first, style,
1130 by_id)
1131
1132 tgt_ranges = tgt_ranges.subtract(tgt_first)
1133 src_ranges = src_ranges.subtract(src_first)
1134 pieces += 1
1135
1136 # Handle remaining blocks.
1137 if tgt_ranges.size() or src_ranges.size():
1138 # Must be both non-empty.
1139 assert tgt_ranges.size() and src_ranges.size()
1140 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1141 src_split_name = "%s-%d" % (src_name, pieces)
1142 Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges, style,
1143 by_id)
1144
Doug Zongkerfc44a512014-08-26 13:10:25 -07001145 empty = RangeSet()
1146 for tgt_fn, tgt_ranges in self.tgt.file_map.items():
1147 if tgt_fn == "__ZERO":
1148 # the special "__ZERO" domain is all the blocks not contained
1149 # in any file and that are filled with zeros. We have a
1150 # special transfer style for zero blocks.
1151 src_ranges = self.src.file_map.get("__ZERO", empty)
Tao Bao9a5caf22015-08-25 15:10:10 -07001152 AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
1153 "zero", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001154 continue
1155
Tao Baoff777812015-05-12 11:42:31 -07001156 elif tgt_fn == "__COPY":
1157 # "__COPY" domain includes all the blocks not contained in any
1158 # file and that need to be copied unconditionally to the target.
Tao Bao9a5caf22015-08-25 15:10:10 -07001159 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Tao Baoff777812015-05-12 11:42:31 -07001160 continue
1161
Doug Zongkerfc44a512014-08-26 13:10:25 -07001162 elif tgt_fn in self.src.file_map:
1163 # Look for an exact pathname match in the source.
Tao Bao9a5caf22015-08-25 15:10:10 -07001164 AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
1165 "diff", self.transfers, self.version >= 3)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001166 continue
1167
1168 b = os.path.basename(tgt_fn)
1169 if b in self.src_basenames:
1170 # Look for an exact basename match in the source.
1171 src_fn = self.src_basenames[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001172 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
1173 "diff", self.transfers, self.version >= 3)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001174 continue
1175
1176 b = re.sub("[0-9]+", "#", b)
1177 if b in self.src_numpatterns:
1178 # Look for a 'number pattern' match (a basename match after
1179 # all runs of digits are replaced by "#"). (This is useful
1180 # for .so files that contain version numbers in the filename
1181 # that get bumped.)
1182 src_fn = self.src_numpatterns[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001183 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
1184 "diff", self.transfers, self.version >= 3)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001185 continue
1186
Tao Bao9a5caf22015-08-25 15:10:10 -07001187 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001188
1189 def AbbreviateSourceNames(self):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001190 for k in self.src.file_map.keys():
1191 b = os.path.basename(k)
1192 self.src_basenames[b] = k
1193 b = re.sub("[0-9]+", "#", b)
1194 self.src_numpatterns[b] = k
1195
1196 @staticmethod
1197 def AssertPartition(total, seq):
1198 """Assert that all the RangeSets in 'seq' form a partition of the
1199 'total' RangeSet (ie, they are nonintersecting and their union
1200 equals 'total')."""
Doug Zongker6ab2a502016-02-09 08:28:09 -08001201
Doug Zongkerfc44a512014-08-26 13:10:25 -07001202 so_far = RangeSet()
1203 for i in seq:
1204 assert not so_far.overlaps(i)
1205 so_far = so_far.union(i)
1206 assert so_far == total