blob: eee7e8d992c6dde72aa00c8494907a1c3158f4b6 [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015from __future__ import print_function
16
17from collections import deque, OrderedDict
18from hashlib import sha1
Doug Zongker6ab2a502016-02-09 08:28:09 -080019import array
Tao Bao8dcf7382015-05-21 14:09:49 -070020import common
Doug Zongker6ab2a502016-02-09 08:28:09 -080021import functools
Doug Zongker62338182014-09-08 08:29:55 -070022import heapq
Doug Zongkerfc44a512014-08-26 13:10:25 -070023import itertools
24import multiprocessing
25import os
Doug Zongkerfc44a512014-08-26 13:10:25 -070026import re
27import subprocess
Doug Zongkerfc44a512014-08-26 13:10:25 -070028import threading
Doug Zongker6ab2a502016-02-09 08:28:09 -080029import time
Doug Zongkerfc44a512014-08-26 13:10:25 -070030import tempfile
31
Dan Albert8b72aef2015-03-23 19:13:21 -070032from rangelib import RangeSet
33
Doug Zongkerfc44a512014-08-26 13:10:25 -070034
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070035__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
36
Dan Albert8b72aef2015-03-23 19:13:21 -070037
Doug Zongkerfc44a512014-08-26 13:10:25 -070038def compute_patch(src, tgt, imgdiff=False):
39 srcfd, srcfile = tempfile.mkstemp(prefix="src-")
40 tgtfd, tgtfile = tempfile.mkstemp(prefix="tgt-")
41 patchfd, patchfile = tempfile.mkstemp(prefix="patch-")
42 os.close(patchfd)
43
44 try:
45 with os.fdopen(srcfd, "wb") as f_src:
46 for p in src:
47 f_src.write(p)
48
49 with os.fdopen(tgtfd, "wb") as f_tgt:
50 for p in tgt:
51 f_tgt.write(p)
52 try:
53 os.unlink(patchfile)
54 except OSError:
55 pass
56 if imgdiff:
57 p = subprocess.call(["imgdiff", "-z", srcfile, tgtfile, patchfile],
58 stdout=open("/dev/null", "a"),
59 stderr=subprocess.STDOUT)
60 else:
61 p = subprocess.call(["bsdiff", srcfile, tgtfile, patchfile])
62
63 if p:
64 raise ValueError("diff failed: " + str(p))
65
66 with open(patchfile, "rb") as f:
67 return f.read()
68 finally:
69 try:
70 os.unlink(srcfile)
71 os.unlink(tgtfile)
72 os.unlink(patchfile)
73 except OSError:
74 pass
75
Dan Albert8b72aef2015-03-23 19:13:21 -070076
77class Image(object):
78 def ReadRangeSet(self, ranges):
79 raise NotImplementedError
80
Tao Bao68658c02015-06-01 13:40:49 -070081 def TotalSha1(self, include_clobbered_blocks=False):
Dan Albert8b72aef2015-03-23 19:13:21 -070082 raise NotImplementedError
83
84
85class EmptyImage(Image):
Doug Zongkerfc44a512014-08-26 13:10:25 -070086 """A zero-length image."""
87 blocksize = 4096
88 care_map = RangeSet()
Tao Baoff777812015-05-12 11:42:31 -070089 clobbered_blocks = RangeSet()
Tao Baoe9b61912015-07-09 17:37:49 -070090 extended = RangeSet()
Doug Zongkerfc44a512014-08-26 13:10:25 -070091 total_blocks = 0
92 file_map = {}
93 def ReadRangeSet(self, ranges):
94 return ()
Tao Bao68658c02015-06-01 13:40:49 -070095 def TotalSha1(self, include_clobbered_blocks=False):
96 # EmptyImage always carries empty clobbered_blocks, so
97 # include_clobbered_blocks can be ignored.
98 assert self.clobbered_blocks.size() == 0
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070099 return sha1().hexdigest()
100
101
Dan Albert8b72aef2015-03-23 19:13:21 -0700102class DataImage(Image):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700103 """An image wrapped around a single string of data."""
104
105 def __init__(self, data, trim=False, pad=False):
106 self.data = data
107 self.blocksize = 4096
108
109 assert not (trim and pad)
110
111 partial = len(self.data) % self.blocksize
Tao Bao7589e962015-09-05 20:35:32 -0700112 padded = False
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700113 if partial > 0:
114 if trim:
115 self.data = self.data[:-partial]
116 elif pad:
117 self.data += '\0' * (self.blocksize - partial)
Tao Bao7589e962015-09-05 20:35:32 -0700118 padded = True
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700119 else:
120 raise ValueError(("data for DataImage must be multiple of %d bytes "
121 "unless trim or pad is specified") %
122 (self.blocksize,))
123
124 assert len(self.data) % self.blocksize == 0
125
126 self.total_blocks = len(self.data) / self.blocksize
127 self.care_map = RangeSet(data=(0, self.total_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700128 # When the last block is padded, we always write the whole block even for
129 # incremental OTAs. Because otherwise the last block may get skipped if
130 # unchanged for an incremental, but would fail the post-install
131 # verification if it has non-zero contents in the padding bytes.
132 # Bug: 23828506
133 if padded:
Tao Bao42206c32015-09-08 13:39:40 -0700134 clobbered_blocks = [self.total_blocks-1, self.total_blocks]
Tao Bao7589e962015-09-05 20:35:32 -0700135 else:
Tao Bao42206c32015-09-08 13:39:40 -0700136 clobbered_blocks = []
137 self.clobbered_blocks = clobbered_blocks
Tao Baoe9b61912015-07-09 17:37:49 -0700138 self.extended = RangeSet()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700139
140 zero_blocks = []
141 nonzero_blocks = []
142 reference = '\0' * self.blocksize
143
Tao Bao7589e962015-09-05 20:35:32 -0700144 for i in range(self.total_blocks-1 if padded else self.total_blocks):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700145 d = self.data[i*self.blocksize : (i+1)*self.blocksize]
146 if d == reference:
147 zero_blocks.append(i)
148 zero_blocks.append(i+1)
149 else:
150 nonzero_blocks.append(i)
151 nonzero_blocks.append(i+1)
152
Tao Bao42206c32015-09-08 13:39:40 -0700153 assert zero_blocks or nonzero_blocks or clobbered_blocks
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700154
Tao Bao42206c32015-09-08 13:39:40 -0700155 self.file_map = dict()
156 if zero_blocks:
157 self.file_map["__ZERO"] = RangeSet(data=zero_blocks)
158 if nonzero_blocks:
159 self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)
160 if clobbered_blocks:
161 self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)
Tao Bao7589e962015-09-05 20:35:32 -0700162
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700163 def ReadRangeSet(self, ranges):
164 return [self.data[s*self.blocksize:e*self.blocksize] for (s, e) in ranges]
165
Tao Bao68658c02015-06-01 13:40:49 -0700166 def TotalSha1(self, include_clobbered_blocks=False):
Tao Bao7589e962015-09-05 20:35:32 -0700167 if not include_clobbered_blocks:
168 ranges = self.care_map.subtract(self.clobbered_blocks)
169 return sha1(self.ReadRangeSet(ranges)).hexdigest()
170 else:
171 return sha1(self.data).hexdigest()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700172
Doug Zongkerfc44a512014-08-26 13:10:25 -0700173
174class Transfer(object):
175 def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, style, by_id):
176 self.tgt_name = tgt_name
177 self.src_name = src_name
178 self.tgt_ranges = tgt_ranges
179 self.src_ranges = src_ranges
180 self.style = style
181 self.intact = (getattr(tgt_ranges, "monotonic", False) and
182 getattr(src_ranges, "monotonic", False))
Tao Baob8c87172015-03-19 19:42:12 -0700183
184 # We use OrderedDict rather than dict so that the output is repeatable;
185 # otherwise it would depend on the hash values of the Transfer objects.
186 self.goes_before = OrderedDict()
187 self.goes_after = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700188
Doug Zongker62338182014-09-08 08:29:55 -0700189 self.stash_before = []
190 self.use_stash = []
191
Doug Zongkerfc44a512014-08-26 13:10:25 -0700192 self.id = len(by_id)
193 by_id.append(self)
194
Doug Zongker62338182014-09-08 08:29:55 -0700195 def NetStashChange(self):
196 return (sum(sr.size() for (_, sr) in self.stash_before) -
197 sum(sr.size() for (_, sr) in self.use_stash))
198
Tao Bao82c47982015-08-17 09:45:13 -0700199 def ConvertToNew(self):
200 assert self.style != "new"
201 self.use_stash = []
202 self.style = "new"
203 self.src_ranges = RangeSet()
204
Doug Zongkerfc44a512014-08-26 13:10:25 -0700205 def __str__(self):
206 return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
207 " to " + str(self.tgt_ranges) + ">")
208
209
Doug Zongker6ab2a502016-02-09 08:28:09 -0800210@functools.total_ordering
211class HeapItem(object):
212 def __init__(self, item):
213 self.item = item
214 # Negate the score since python's heap is a min-heap and we want
215 # the maximum score.
216 self.score = -item.score
217 def clear(self):
218 self.item = None
219 def __bool__(self):
220 return self.item is None
221 def __eq__(self, other):
222 return self.score == other.score
223 def __le__(self, other):
224 return self.score <= other.score
225
226
Doug Zongkerfc44a512014-08-26 13:10:25 -0700227# BlockImageDiff works on two image objects. An image object is
228# anything that provides the following attributes:
229#
230# blocksize: the size in bytes of a block, currently must be 4096.
231#
232# total_blocks: the total size of the partition/image, in blocks.
233#
234# care_map: a RangeSet containing which blocks (in the range [0,
235# total_blocks) we actually care about; i.e. which blocks contain
236# data.
237#
238# file_map: a dict that partitions the blocks contained in care_map
239# into smaller domains that are useful for doing diffs on.
240# (Typically a domain is a file, and the key in file_map is the
241# pathname.)
242#
Tao Baoff777812015-05-12 11:42:31 -0700243# clobbered_blocks: a RangeSet containing which blocks contain data
244# but may be altered by the FS. They need to be excluded when
245# verifying the partition integrity.
246#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700247# ReadRangeSet(): a function that takes a RangeSet and returns the
248# data contained in the image blocks of that RangeSet. The data
249# is returned as a list or tuple of strings; concatenating the
250# elements together should produce the requested data.
251# Implementations are free to break up the data into list/tuple
252# elements in any way that is convenient.
253#
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700254# TotalSha1(): a function that returns (as a hex string) the SHA-1
255# hash of all the data in the image (ie, all the blocks in the
Tao Bao68658c02015-06-01 13:40:49 -0700256# care_map minus clobbered_blocks, or including the clobbered
257# blocks if include_clobbered_blocks is True).
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700258#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700259# When creating a BlockImageDiff, the src image may be None, in which
260# case the list of transfers produced will never read from the
261# original image.
262
263class BlockImageDiff(object):
Tao Baoeba409c2015-10-21 13:30:43 -0700264 def __init__(self, tgt, src=None, threads=None, version=4):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700265 if threads is None:
266 threads = multiprocessing.cpu_count() // 2
Dan Albert8b72aef2015-03-23 19:13:21 -0700267 if threads == 0:
268 threads = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700269 self.threads = threads
Doug Zongker62338182014-09-08 08:29:55 -0700270 self.version = version
Dan Albert8b72aef2015-03-23 19:13:21 -0700271 self.transfers = []
272 self.src_basenames = {}
273 self.src_numpatterns = {}
Doug Zongker62338182014-09-08 08:29:55 -0700274
Tao Baoeba409c2015-10-21 13:30:43 -0700275 assert version in (1, 2, 3, 4)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700276
277 self.tgt = tgt
278 if src is None:
279 src = EmptyImage()
280 self.src = src
281
282 # The updater code that installs the patch always uses 4k blocks.
283 assert tgt.blocksize == 4096
284 assert src.blocksize == 4096
285
286 # The range sets in each filemap should comprise a partition of
287 # the care map.
288 self.AssertPartition(src.care_map, src.file_map.values())
289 self.AssertPartition(tgt.care_map, tgt.file_map.values())
290
291 def Compute(self, prefix):
292 # When looking for a source file to use as the diff input for a
293 # target file, we try:
294 # 1) an exact path match if available, otherwise
295 # 2) a exact basename match if available, otherwise
296 # 3) a basename match after all runs of digits are replaced by
297 # "#" if available, otherwise
298 # 4) we have no source for this target.
299 self.AbbreviateSourceNames()
300 self.FindTransfers()
301
302 # Find the ordering dependencies among transfers (this is O(n^2)
303 # in the number of transfers).
304 self.GenerateDigraph()
305 # Find a sequence of transfers that satisfies as many ordering
306 # dependencies as possible (heuristically).
307 self.FindVertexSequence()
308 # Fix up the ordering dependencies that the sequence didn't
309 # satisfy.
Doug Zongker62338182014-09-08 08:29:55 -0700310 if self.version == 1:
311 self.RemoveBackwardEdges()
312 else:
313 self.ReverseBackwardEdges()
314 self.ImproveVertexSequence()
315
Tao Bao82c47982015-08-17 09:45:13 -0700316 # Ensure the runtime stash size is under the limit.
317 if self.version >= 2 and common.OPTIONS.cache_size is not None:
318 self.ReviseStashSize()
319
Doug Zongkerfc44a512014-08-26 13:10:25 -0700320 # Double-check our work.
321 self.AssertSequenceGood()
322
323 self.ComputePatches(prefix)
324 self.WriteTransfers(prefix)
325
Dan Albert8b72aef2015-03-23 19:13:21 -0700326 def HashBlocks(self, source, ranges): # pylint: disable=no-self-use
Sami Tolvanendd67a292014-12-09 16:40:34 +0000327 data = source.ReadRangeSet(ranges)
328 ctx = sha1()
329
330 for p in data:
331 ctx.update(p)
332
333 return ctx.hexdigest()
334
Doug Zongkerfc44a512014-08-26 13:10:25 -0700335 def WriteTransfers(self, prefix):
336 out = []
337
Doug Zongkerfc44a512014-08-26 13:10:25 -0700338 total = 0
Doug Zongkerfc44a512014-08-26 13:10:25 -0700339
Doug Zongker62338182014-09-08 08:29:55 -0700340 stashes = {}
341 stashed_blocks = 0
342 max_stashed_blocks = 0
343
344 free_stash_ids = []
345 next_stash_id = 0
346
Doug Zongkerfc44a512014-08-26 13:10:25 -0700347 for xf in self.transfers:
348
Doug Zongker62338182014-09-08 08:29:55 -0700349 if self.version < 2:
350 assert not xf.stash_before
351 assert not xf.use_stash
352
353 for s, sr in xf.stash_before:
354 assert s not in stashes
355 if free_stash_ids:
356 sid = heapq.heappop(free_stash_ids)
357 else:
358 sid = next_stash_id
359 next_stash_id += 1
360 stashes[s] = sid
Sami Tolvanendd67a292014-12-09 16:40:34 +0000361 if self.version == 2:
caozhiyuan21b37d82015-10-21 15:14:03 +0800362 stashed_blocks += sr.size()
Sami Tolvanendd67a292014-12-09 16:40:34 +0000363 out.append("stash %d %s\n" % (sid, sr.to_string_raw()))
364 else:
365 sh = self.HashBlocks(self.src, sr)
366 if sh in stashes:
367 stashes[sh] += 1
368 else:
369 stashes[sh] = 1
caozhiyuan21b37d82015-10-21 15:14:03 +0800370 stashed_blocks += sr.size()
Sami Tolvanendd67a292014-12-09 16:40:34 +0000371 out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
Doug Zongker62338182014-09-08 08:29:55 -0700372
373 if stashed_blocks > max_stashed_blocks:
374 max_stashed_blocks = stashed_blocks
375
Jesse Zhao7b985f62015-03-02 16:53:08 -0800376 free_string = []
caozhiyuan21b37d82015-10-21 15:14:03 +0800377 free_size = 0
Jesse Zhao7b985f62015-03-02 16:53:08 -0800378
Doug Zongker62338182014-09-08 08:29:55 -0700379 if self.version == 1:
Tao Bao4fcb77e2015-10-21 13:36:01 -0700380 src_str = xf.src_ranges.to_string_raw() if xf.src_ranges else ""
Sami Tolvanendd67a292014-12-09 16:40:34 +0000381 elif self.version >= 2:
Doug Zongker62338182014-09-08 08:29:55 -0700382
383 # <# blocks> <src ranges>
384 # OR
385 # <# blocks> <src ranges> <src locs> <stash refs...>
386 # OR
387 # <# blocks> - <stash refs...>
388
389 size = xf.src_ranges.size()
Dan Albert8b72aef2015-03-23 19:13:21 -0700390 src_str = [str(size)]
Doug Zongker62338182014-09-08 08:29:55 -0700391
392 unstashed_src_ranges = xf.src_ranges
393 mapped_stashes = []
394 for s, sr in xf.use_stash:
395 sid = stashes.pop(s)
Doug Zongker62338182014-09-08 08:29:55 -0700396 unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
Sami Tolvanendd67a292014-12-09 16:40:34 +0000397 sh = self.HashBlocks(self.src, sr)
Doug Zongker62338182014-09-08 08:29:55 -0700398 sr = xf.src_ranges.map_within(sr)
399 mapped_stashes.append(sr)
Sami Tolvanendd67a292014-12-09 16:40:34 +0000400 if self.version == 2:
Dan Albert8b72aef2015-03-23 19:13:21 -0700401 src_str.append("%d:%s" % (sid, sr.to_string_raw()))
Tao Baobb625d22015-08-13 14:44:15 -0700402 # A stash will be used only once. We need to free the stash
403 # immediately after the use, instead of waiting for the automatic
404 # clean-up at the end. Because otherwise it may take up extra space
405 # and lead to OTA failures.
406 # Bug: 23119955
407 free_string.append("free %d\n" % (sid,))
caozhiyuan21b37d82015-10-21 15:14:03 +0800408 free_size += sr.size()
Sami Tolvanendd67a292014-12-09 16:40:34 +0000409 else:
410 assert sh in stashes
Dan Albert8b72aef2015-03-23 19:13:21 -0700411 src_str.append("%s:%s" % (sh, sr.to_string_raw()))
Sami Tolvanendd67a292014-12-09 16:40:34 +0000412 stashes[sh] -= 1
413 if stashes[sh] == 0:
caozhiyuan21b37d82015-10-21 15:14:03 +0800414 free_size += sr.size()
Sami Tolvanendd67a292014-12-09 16:40:34 +0000415 free_string.append("free %s\n" % (sh))
416 stashes.pop(sh)
Doug Zongker62338182014-09-08 08:29:55 -0700417 heapq.heappush(free_stash_ids, sid)
418
419 if unstashed_src_ranges:
Dan Albert8b72aef2015-03-23 19:13:21 -0700420 src_str.insert(1, unstashed_src_ranges.to_string_raw())
Doug Zongker62338182014-09-08 08:29:55 -0700421 if xf.use_stash:
422 mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
Dan Albert8b72aef2015-03-23 19:13:21 -0700423 src_str.insert(2, mapped_unstashed.to_string_raw())
Doug Zongker62338182014-09-08 08:29:55 -0700424 mapped_stashes.append(mapped_unstashed)
425 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
426 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700427 src_str.insert(1, "-")
Doug Zongker62338182014-09-08 08:29:55 -0700428 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
429
Dan Albert8b72aef2015-03-23 19:13:21 -0700430 src_str = " ".join(src_str)
Doug Zongker62338182014-09-08 08:29:55 -0700431
Sami Tolvanendd67a292014-12-09 16:40:34 +0000432 # all versions:
Doug Zongker62338182014-09-08 08:29:55 -0700433 # zero <rangeset>
434 # new <rangeset>
435 # erase <rangeset>
436 #
437 # version 1:
438 # bsdiff patchstart patchlen <src rangeset> <tgt rangeset>
439 # imgdiff patchstart patchlen <src rangeset> <tgt rangeset>
440 # move <src rangeset> <tgt rangeset>
441 #
442 # version 2:
Dan Albert8b72aef2015-03-23 19:13:21 -0700443 # bsdiff patchstart patchlen <tgt rangeset> <src_str>
444 # imgdiff patchstart patchlen <tgt rangeset> <src_str>
445 # move <tgt rangeset> <src_str>
Sami Tolvanendd67a292014-12-09 16:40:34 +0000446 #
447 # version 3:
Dan Albert8b72aef2015-03-23 19:13:21 -0700448 # bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
449 # imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
450 # move hash <tgt rangeset> <src_str>
Doug Zongkerfc44a512014-08-26 13:10:25 -0700451
452 tgt_size = xf.tgt_ranges.size()
453
454 if xf.style == "new":
455 assert xf.tgt_ranges
456 out.append("%s %s\n" % (xf.style, xf.tgt_ranges.to_string_raw()))
457 total += tgt_size
458 elif xf.style == "move":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700459 assert xf.tgt_ranges
460 assert xf.src_ranges.size() == tgt_size
461 if xf.src_ranges != xf.tgt_ranges:
Doug Zongker62338182014-09-08 08:29:55 -0700462 if self.version == 1:
463 out.append("%s %s %s\n" % (
464 xf.style,
465 xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))
466 elif self.version == 2:
467 out.append("%s %s %s\n" % (
468 xf.style,
Dan Albert8b72aef2015-03-23 19:13:21 -0700469 xf.tgt_ranges.to_string_raw(), src_str))
Sami Tolvanendd67a292014-12-09 16:40:34 +0000470 elif self.version >= 3:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100471 # take into account automatic stashing of overlapping blocks
472 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700473 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100474 if temp_stash_usage > max_stashed_blocks:
475 max_stashed_blocks = temp_stash_usage
476
Sami Tolvanendd67a292014-12-09 16:40:34 +0000477 out.append("%s %s %s %s\n" % (
478 xf.style,
479 self.HashBlocks(self.tgt, xf.tgt_ranges),
Dan Albert8b72aef2015-03-23 19:13:21 -0700480 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700481 total += tgt_size
482 elif xf.style in ("bsdiff", "imgdiff"):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700483 assert xf.tgt_ranges
484 assert xf.src_ranges
Doug Zongker62338182014-09-08 08:29:55 -0700485 if self.version == 1:
486 out.append("%s %d %d %s %s\n" % (
487 xf.style, xf.patch_start, xf.patch_len,
488 xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))
489 elif self.version == 2:
490 out.append("%s %d %d %s %s\n" % (
491 xf.style, xf.patch_start, xf.patch_len,
Dan Albert8b72aef2015-03-23 19:13:21 -0700492 xf.tgt_ranges.to_string_raw(), src_str))
Sami Tolvanendd67a292014-12-09 16:40:34 +0000493 elif self.version >= 3:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100494 # take into account automatic stashing of overlapping blocks
495 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700496 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100497 if temp_stash_usage > max_stashed_blocks:
498 max_stashed_blocks = temp_stash_usage
499
Sami Tolvanendd67a292014-12-09 16:40:34 +0000500 out.append("%s %d %d %s %s %s %s\n" % (
501 xf.style,
502 xf.patch_start, xf.patch_len,
503 self.HashBlocks(self.src, xf.src_ranges),
504 self.HashBlocks(self.tgt, xf.tgt_ranges),
Dan Albert8b72aef2015-03-23 19:13:21 -0700505 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700506 total += tgt_size
507 elif xf.style == "zero":
508 assert xf.tgt_ranges
509 to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
510 if to_zero:
511 out.append("%s %s\n" % (xf.style, to_zero.to_string_raw()))
512 total += to_zero.size()
513 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700514 raise ValueError("unknown transfer style '%s'\n" % xf.style)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700515
Sami Tolvanendd67a292014-12-09 16:40:34 +0000516 if free_string:
517 out.append("".join(free_string))
caozhiyuan21b37d82015-10-21 15:14:03 +0800518 stashed_blocks -= free_size
Sami Tolvanendd67a292014-12-09 16:40:34 +0000519
Tao Bao575d68a2015-08-07 19:49:45 -0700520 if self.version >= 2 and common.OPTIONS.cache_size is not None:
Tao Bao8dcf7382015-05-21 14:09:49 -0700521 # Sanity check: abort if we're going to need more stash space than
522 # the allowed size (cache_size * threshold). There are two purposes
523 # of having a threshold here. a) Part of the cache may have been
524 # occupied by some recovery logs. b) It will buy us some time to deal
525 # with the oversize issue.
526 cache_size = common.OPTIONS.cache_size
527 stash_threshold = common.OPTIONS.stash_threshold
528 max_allowed = cache_size * stash_threshold
529 assert max_stashed_blocks * self.tgt.blocksize < max_allowed, \
530 'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
531 max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
532 self.tgt.blocksize, max_allowed, cache_size,
533 stash_threshold)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700534
Tao Baoe9b61912015-07-09 17:37:49 -0700535 # Zero out extended blocks as a workaround for bug 20881595.
536 if self.tgt.extended:
537 out.append("zero %s\n" % (self.tgt.extended.to_string_raw(),))
Tao Baob32d56e2015-09-09 11:55:01 -0700538 total += self.tgt.extended.size()
Tao Baoe9b61912015-07-09 17:37:49 -0700539
540 # We erase all the blocks on the partition that a) don't contain useful
541 # data in the new image and b) will not be touched by dm-verity.
Doug Zongkerfc44a512014-08-26 13:10:25 -0700542 all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
Tao Baoe9b61912015-07-09 17:37:49 -0700543 all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
544 new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
545 if new_dontcare:
546 out.append("erase %s\n" % (new_dontcare.to_string_raw(),))
Doug Zongkere985f6f2014-09-09 12:38:47 -0700547
548 out.insert(0, "%d\n" % (self.version,)) # format version number
Tao Baob32d56e2015-09-09 11:55:01 -0700549 out.insert(1, "%d\n" % (total,))
Doug Zongkere985f6f2014-09-09 12:38:47 -0700550 if self.version >= 2:
551 # version 2 only: after the total block count, we give the number
552 # of stash slots needed, and the maximum size needed (in blocks)
553 out.insert(2, str(next_stash_id) + "\n")
554 out.insert(3, str(max_stashed_blocks) + "\n")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700555
556 with open(prefix + ".transfer.list", "wb") as f:
557 for i in out:
558 f.write(i)
559
Doug Zongker62338182014-09-08 08:29:55 -0700560 if self.version >= 2:
Tao Bao8dcf7382015-05-21 14:09:49 -0700561 max_stashed_size = max_stashed_blocks * self.tgt.blocksize
Tao Bao575d68a2015-08-07 19:49:45 -0700562 OPTIONS = common.OPTIONS
563 if OPTIONS.cache_size is not None:
564 max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
565 print("max stashed blocks: %d (%d bytes), "
566 "limit: %d bytes (%.2f%%)\n" % (
567 max_stashed_blocks, max_stashed_size, max_allowed,
568 max_stashed_size * 100.0 / max_allowed))
569 else:
570 print("max stashed blocks: %d (%d bytes), limit: <unknown>\n" % (
571 max_stashed_blocks, max_stashed_size))
Doug Zongker62338182014-09-08 08:29:55 -0700572
Tao Bao82c47982015-08-17 09:45:13 -0700573 def ReviseStashSize(self):
574 print("Revising stash size...")
575 stashes = {}
576
577 # Create the map between a stash and its def/use points. For example, for a
578 # given stash of (idx, sr), stashes[idx] = (sr, def_cmd, use_cmd).
579 for xf in self.transfers:
580 # Command xf defines (stores) all the stashes in stash_before.
581 for idx, sr in xf.stash_before:
582 stashes[idx] = (sr, xf)
583
584 # Record all the stashes command xf uses.
585 for idx, _ in xf.use_stash:
586 stashes[idx] += (xf,)
587
588 # Compute the maximum blocks available for stash based on /cache size and
589 # the threshold.
590 cache_size = common.OPTIONS.cache_size
591 stash_threshold = common.OPTIONS.stash_threshold
592 max_allowed = cache_size * stash_threshold / self.tgt.blocksize
593
594 stashed_blocks = 0
Tao Bao9a5caf22015-08-25 15:10:10 -0700595 new_blocks = 0
Tao Bao82c47982015-08-17 09:45:13 -0700596
597 # Now go through all the commands. Compute the required stash size on the
598 # fly. If a command requires excess stash than available, it deletes the
599 # stash by replacing the command that uses the stash with a "new" command
600 # instead.
601 for xf in self.transfers:
602 replaced_cmds = []
603
604 # xf.stash_before generates explicit stash commands.
605 for idx, sr in xf.stash_before:
606 if stashed_blocks + sr.size() > max_allowed:
607 # We cannot stash this one for a later command. Find out the command
608 # that will use this stash and replace the command with "new".
609 use_cmd = stashes[idx][2]
610 replaced_cmds.append(use_cmd)
Tao Bao9a5caf22015-08-25 15:10:10 -0700611 print("%10d %9s %s" % (sr.size(), "explicit", use_cmd))
Tao Bao82c47982015-08-17 09:45:13 -0700612 else:
613 stashed_blocks += sr.size()
614
615 # xf.use_stash generates free commands.
616 for _, sr in xf.use_stash:
617 stashed_blocks -= sr.size()
618
619 # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
620 # ComputePatches(), they both have the style of "diff".
621 if xf.style == "diff" and self.version >= 3:
622 assert xf.tgt_ranges and xf.src_ranges
623 if xf.src_ranges.overlaps(xf.tgt_ranges):
624 if stashed_blocks + xf.src_ranges.size() > max_allowed:
625 replaced_cmds.append(xf)
Tao Bao9a5caf22015-08-25 15:10:10 -0700626 print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf))
Tao Bao82c47982015-08-17 09:45:13 -0700627
628 # Replace the commands in replaced_cmds with "new"s.
629 for cmd in replaced_cmds:
630 # It no longer uses any commands in "use_stash". Remove the def points
631 # for all those stashes.
632 for idx, sr in cmd.use_stash:
633 def_cmd = stashes[idx][1]
634 assert (idx, sr) in def_cmd.stash_before
635 def_cmd.stash_before.remove((idx, sr))
636
Tianjie Xuebe39a02016-01-14 14:12:26 -0800637 # Add up blocks that violates space limit and print total number to
638 # screen later.
639 new_blocks += cmd.tgt_ranges.size()
Tao Bao82c47982015-08-17 09:45:13 -0700640 cmd.ConvertToNew()
641
Tianjie Xuebe39a02016-01-14 14:12:26 -0800642 num_of_bytes = new_blocks * self.tgt.blocksize
643 print(" Total %d blocks (%d bytes) are packed as new blocks due to "
644 "insufficient cache size." % (new_blocks, num_of_bytes))
Tao Bao9a5caf22015-08-25 15:10:10 -0700645
Doug Zongkerfc44a512014-08-26 13:10:25 -0700646 def ComputePatches(self, prefix):
647 print("Reticulating splines...")
648 diff_q = []
649 patch_num = 0
650 with open(prefix + ".new.dat", "wb") as new_f:
651 for xf in self.transfers:
652 if xf.style == "zero":
653 pass
654 elif xf.style == "new":
655 for piece in self.tgt.ReadRangeSet(xf.tgt_ranges):
656 new_f.write(piece)
657 elif xf.style == "diff":
658 src = self.src.ReadRangeSet(xf.src_ranges)
659 tgt = self.tgt.ReadRangeSet(xf.tgt_ranges)
660
661 # We can't compare src and tgt directly because they may have
662 # the same content but be broken up into blocks differently, eg:
663 #
664 # ["he", "llo"] vs ["h", "ello"]
665 #
666 # We want those to compare equal, ideally without having to
667 # actually concatenate the strings (these may be tens of
668 # megabytes).
669
670 src_sha1 = sha1()
671 for p in src:
672 src_sha1.update(p)
673 tgt_sha1 = sha1()
674 tgt_size = 0
675 for p in tgt:
676 tgt_sha1.update(p)
677 tgt_size += len(p)
678
679 if src_sha1.digest() == tgt_sha1.digest():
680 # These are identical; we don't need to generate a patch,
681 # just issue copy commands on the device.
682 xf.style = "move"
683 else:
684 # For files in zip format (eg, APKs, JARs, etc.) we would
685 # like to use imgdiff -z if possible (because it usually
686 # produces significantly smaller patches than bsdiff).
687 # This is permissible if:
688 #
689 # - the source and target files are monotonic (ie, the
690 # data is stored with blocks in increasing order), and
691 # - we haven't removed any blocks from the source set.
692 #
693 # If these conditions are satisfied then appending all the
694 # blocks in the set together in order will produce a valid
695 # zip file (plus possibly extra zeros in the last block),
696 # which is what imgdiff needs to operate. (imgdiff is
697 # fine with extra zeros at the end of the file.)
698 imgdiff = (xf.intact and
699 xf.tgt_name.split(".")[-1].lower()
700 in ("apk", "jar", "zip"))
701 xf.style = "imgdiff" if imgdiff else "bsdiff"
702 diff_q.append((tgt_size, src, tgt, xf, patch_num))
703 patch_num += 1
704
705 else:
706 assert False, "unknown style " + xf.style
707
708 if diff_q:
709 if self.threads > 1:
710 print("Computing patches (using %d threads)..." % (self.threads,))
711 else:
712 print("Computing patches...")
713 diff_q.sort()
714
715 patches = [None] * patch_num
716
Dan Albert8b72aef2015-03-23 19:13:21 -0700717 # TODO: Rewrite with multiprocessing.ThreadPool?
Doug Zongkerfc44a512014-08-26 13:10:25 -0700718 lock = threading.Lock()
719 def diff_worker():
720 while True:
721 with lock:
Dan Albert8b72aef2015-03-23 19:13:21 -0700722 if not diff_q:
723 return
Doug Zongkerfc44a512014-08-26 13:10:25 -0700724 tgt_size, src, tgt, xf, patchnum = diff_q.pop()
725 patch = compute_patch(src, tgt, imgdiff=(xf.style == "imgdiff"))
726 size = len(patch)
727 with lock:
728 patches[patchnum] = (patch, xf)
729 print("%10d %10d (%6.2f%%) %7s %s" % (
730 size, tgt_size, size * 100.0 / tgt_size, xf.style,
731 xf.tgt_name if xf.tgt_name == xf.src_name else (
732 xf.tgt_name + " (from " + xf.src_name + ")")))
733
734 threads = [threading.Thread(target=diff_worker)
Dan Albert8b72aef2015-03-23 19:13:21 -0700735 for _ in range(self.threads)]
Doug Zongkerfc44a512014-08-26 13:10:25 -0700736 for th in threads:
737 th.start()
738 while threads:
739 threads.pop().join()
740 else:
741 patches = []
742
743 p = 0
744 with open(prefix + ".patch.dat", "wb") as patch_f:
745 for patch, xf in patches:
746 xf.patch_start = p
747 xf.patch_len = len(patch)
748 patch_f.write(patch)
749 p += len(patch)
750
751 def AssertSequenceGood(self):
752 # Simulate the sequences of transfers we will output, and check that:
753 # - we never read a block after writing it, and
754 # - we write every block we care about exactly once.
755
756 # Start with no blocks having been touched yet.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800757 touched = array.array("B", "\0" * self.tgt.total_blocks)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700758
759 # Imagine processing the transfers in order.
760 for xf in self.transfers:
761 # Check that the input blocks for this transfer haven't yet been touched.
Doug Zongker62338182014-09-08 08:29:55 -0700762
763 x = xf.src_ranges
764 if self.version >= 2:
765 for _, sr in xf.use_stash:
766 x = x.subtract(sr)
767
Doug Zongker6ab2a502016-02-09 08:28:09 -0800768 for s, e in x:
769 for i in range(s, e):
770 assert touched[i] == 0
771
772 # Check that the output blocks for this transfer haven't yet
773 # been touched, and touch all the blocks written by this
774 # transfer.
775 for s, e in xf.tgt_ranges:
776 for i in range(s, e):
777 assert touched[i] == 0
778 touched[i] = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700779
780 # Check that we've written every target block.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800781 for s, e in self.tgt.care_map:
782 for i in range(s, e):
783 assert touched[i] == 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700784
Doug Zongker62338182014-09-08 08:29:55 -0700785 def ImproveVertexSequence(self):
786 print("Improving vertex order...")
787
788 # At this point our digraph is acyclic; we reversed any edges that
789 # were backwards in the heuristically-generated sequence. The
790 # previously-generated order is still acceptable, but we hope to
791 # find a better order that needs less memory for stashed data.
792 # Now we do a topological sort to generate a new vertex order,
793 # using a greedy algorithm to choose which vertex goes next
794 # whenever we have a choice.
795
796 # Make a copy of the edge set; this copy will get destroyed by the
797 # algorithm.
798 for xf in self.transfers:
799 xf.incoming = xf.goes_after.copy()
800 xf.outgoing = xf.goes_before.copy()
801
802 L = [] # the new vertex order
803
804 # S is the set of sources in the remaining graph; we always choose
805 # the one that leaves the least amount of stashed data after it's
806 # executed.
807 S = [(u.NetStashChange(), u.order, u) for u in self.transfers
808 if not u.incoming]
809 heapq.heapify(S)
810
811 while S:
812 _, _, xf = heapq.heappop(S)
813 L.append(xf)
814 for u in xf.outgoing:
815 del u.incoming[xf]
816 if not u.incoming:
817 heapq.heappush(S, (u.NetStashChange(), u.order, u))
818
819 # if this fails then our graph had a cycle.
820 assert len(L) == len(self.transfers)
821
822 self.transfers = L
823 for i, xf in enumerate(L):
824 xf.order = i
825
Doug Zongkerfc44a512014-08-26 13:10:25 -0700826 def RemoveBackwardEdges(self):
827 print("Removing backward edges...")
828 in_order = 0
829 out_of_order = 0
830 lost_source = 0
831
832 for xf in self.transfers:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700833 lost = 0
834 size = xf.src_ranges.size()
835 for u in xf.goes_before:
836 # xf should go before u
837 if xf.order < u.order:
838 # it does, hurray!
Doug Zongker62338182014-09-08 08:29:55 -0700839 in_order += 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700840 else:
841 # it doesn't, boo. trim the blocks that u writes from xf's
842 # source, so that xf can go after u.
Doug Zongker62338182014-09-08 08:29:55 -0700843 out_of_order += 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700844 assert xf.src_ranges.overlaps(u.tgt_ranges)
845 xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges)
846 xf.intact = False
847
848 if xf.style == "diff" and not xf.src_ranges:
849 # nothing left to diff from; treat as new data
850 xf.style = "new"
851
852 lost = size - xf.src_ranges.size()
853 lost_source += lost
Doug Zongkerfc44a512014-08-26 13:10:25 -0700854
855 print((" %d/%d dependencies (%.2f%%) were violated; "
856 "%d source blocks removed.") %
857 (out_of_order, in_order + out_of_order,
858 (out_of_order * 100.0 / (in_order + out_of_order))
859 if (in_order + out_of_order) else 0.0,
860 lost_source))
861
Doug Zongker62338182014-09-08 08:29:55 -0700862 def ReverseBackwardEdges(self):
863 print("Reversing backward edges...")
864 in_order = 0
865 out_of_order = 0
866 stashes = 0
867 stash_size = 0
868
869 for xf in self.transfers:
Doug Zongker62338182014-09-08 08:29:55 -0700870 for u in xf.goes_before.copy():
871 # xf should go before u
872 if xf.order < u.order:
873 # it does, hurray!
874 in_order += 1
875 else:
876 # it doesn't, boo. modify u to stash the blocks that it
877 # writes that xf wants to read, and then require u to go
878 # before xf.
879 out_of_order += 1
880
881 overlap = xf.src_ranges.intersect(u.tgt_ranges)
882 assert overlap
883
884 u.stash_before.append((stashes, overlap))
885 xf.use_stash.append((stashes, overlap))
886 stashes += 1
887 stash_size += overlap.size()
888
889 # reverse the edge direction; now xf must go after u
890 del xf.goes_before[u]
891 del u.goes_after[xf]
892 xf.goes_after[u] = None # value doesn't matter
893 u.goes_before[xf] = None
894
895 print((" %d/%d dependencies (%.2f%%) were violated; "
896 "%d source blocks stashed.") %
897 (out_of_order, in_order + out_of_order,
898 (out_of_order * 100.0 / (in_order + out_of_order))
899 if (in_order + out_of_order) else 0.0,
900 stash_size))
901
Doug Zongkerfc44a512014-08-26 13:10:25 -0700902 def FindVertexSequence(self):
903 print("Finding vertex sequence...")
904
905 # This is based on "A Fast & Effective Heuristic for the Feedback
906 # Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
907 # it as starting with the digraph G and moving all the vertices to
908 # be on a horizontal line in some order, trying to minimize the
909 # number of edges that end up pointing to the left. Left-pointing
910 # edges will get removed to turn the digraph into a DAG. In this
911 # case each edge has a weight which is the number of source blocks
912 # we'll lose if that edge is removed; we try to minimize the total
913 # weight rather than just the number of edges.
914
915 # Make a copy of the edge set; this copy will get destroyed by the
916 # algorithm.
917 for xf in self.transfers:
918 xf.incoming = xf.goes_after.copy()
919 xf.outgoing = xf.goes_before.copy()
Doug Zongker6ab2a502016-02-09 08:28:09 -0800920 xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())
Doug Zongkerfc44a512014-08-26 13:10:25 -0700921
922 # We use an OrderedDict instead of just a set so that the output
923 # is repeatable; otherwise it would depend on the hash values of
924 # the transfer objects.
925 G = OrderedDict()
926 for xf in self.transfers:
927 G[xf] = None
928 s1 = deque() # the left side of the sequence, built from left to right
929 s2 = deque() # the right side of the sequence, built from right to left
930
Doug Zongker6ab2a502016-02-09 08:28:09 -0800931 heap = []
932 for xf in self.transfers:
933 xf.heap_item = HeapItem(xf)
934 heap.append(xf.heap_item)
935 heapq.heapify(heap)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700936
Doug Zongker6ab2a502016-02-09 08:28:09 -0800937 sinks = set(u for u in G if not u.outgoing)
938 sources = set(u for u in G if not u.incoming)
939
940 def adjust_score(iu, delta):
941 iu.score += delta
942 iu.heap_item.clear()
943 iu.heap_item = HeapItem(iu)
944 heapq.heappush(heap, iu.heap_item)
945
946 while G:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700947 # Put all sinks at the end of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800948 while sinks:
949 new_sinks = set()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700950 for u in sinks:
Doug Zongker6ab2a502016-02-09 08:28:09 -0800951 if u not in G: continue
Doug Zongkerfc44a512014-08-26 13:10:25 -0700952 s2.appendleft(u)
953 del G[u]
954 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -0800955 adjust_score(iu, -iu.outgoing.pop(u))
956 if not iu.outgoing: new_sinks.add(iu)
957 sinks = new_sinks
Doug Zongkerfc44a512014-08-26 13:10:25 -0700958
959 # Put all the sources at the beginning of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800960 while sources:
961 new_sources = set()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700962 for u in sources:
Doug Zongker6ab2a502016-02-09 08:28:09 -0800963 if u not in G: continue
Doug Zongkerfc44a512014-08-26 13:10:25 -0700964 s1.append(u)
965 del G[u]
966 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -0800967 adjust_score(iu, +iu.incoming.pop(u))
968 if not iu.incoming: new_sources.add(iu)
969 sources = new_sources
Doug Zongkerfc44a512014-08-26 13:10:25 -0700970
Doug Zongker6ab2a502016-02-09 08:28:09 -0800971 if not G: break
Doug Zongkerfc44a512014-08-26 13:10:25 -0700972
973 # Find the "best" vertex to put next. "Best" is the one that
974 # maximizes the net difference in source blocks saved we get by
975 # pretending it's a source rather than a sink.
976
Doug Zongker6ab2a502016-02-09 08:28:09 -0800977 while True:
978 u = heapq.heappop(heap)
979 if u and u.item in G:
980 u = u.item
981 break
Doug Zongkerfc44a512014-08-26 13:10:25 -0700982
Doug Zongkerfc44a512014-08-26 13:10:25 -0700983 s1.append(u)
984 del G[u]
985 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -0800986 adjust_score(iu, +iu.incoming.pop(u))
987 if not iu.incoming: sources.add(iu)
988
Doug Zongkerfc44a512014-08-26 13:10:25 -0700989 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -0800990 adjust_score(iu, -iu.outgoing.pop(u))
991 if not iu.outgoing: sinks.add(iu)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700992
993 # Now record the sequence in the 'order' field of each transfer,
994 # and by rearranging self.transfers to be in the chosen sequence.
995
996 new_transfers = []
997 for x in itertools.chain(s1, s2):
998 x.order = len(new_transfers)
999 new_transfers.append(x)
1000 del x.incoming
1001 del x.outgoing
1002
1003 self.transfers = new_transfers
1004
1005 def GenerateDigraph(self):
1006 print("Generating digraph...")
Doug Zongker6ab2a502016-02-09 08:28:09 -08001007
1008 # Each item of source_ranges will be:
1009 # - None, if that block is not used as a source,
1010 # - a transfer, if one transfer uses it as a source, or
1011 # - a set of transfers.
1012 source_ranges = []
1013 for b in self.transfers:
1014 for s, e in b.src_ranges:
1015 if e > len(source_ranges):
1016 source_ranges.extend([None] * (e-len(source_ranges)))
1017 for i in range(s, e):
1018 if source_ranges[i] is None:
1019 source_ranges[i] = b
1020 else:
1021 if not isinstance(source_ranges[i], set):
1022 source_ranges[i] = set([source_ranges[i]])
1023 source_ranges[i].add(b)
1024
Doug Zongkerfc44a512014-08-26 13:10:25 -07001025 for a in self.transfers:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001026 intersections = set()
1027 for s, e in a.tgt_ranges:
1028 for i in range(s, e):
1029 if i >= len(source_ranges): break
1030 b = source_ranges[i]
1031 if b is not None:
1032 if isinstance(b, set):
1033 intersections.update(b)
1034 else:
1035 intersections.add(b)
1036
1037 for b in intersections:
1038 if a is b: continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001039
1040 # If the blocks written by A are read by B, then B needs to go before A.
1041 i = a.tgt_ranges.intersect(b.src_ranges)
1042 if i:
Doug Zongkerab7ca1d2014-08-26 10:40:28 -07001043 if b.src_name == "__ZERO":
1044 # the cost of removing source blocks for the __ZERO domain
1045 # is (nearly) zero.
1046 size = 0
1047 else:
1048 size = i.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001049 b.goes_before[a] = size
1050 a.goes_after[b] = size
1051
1052 def FindTransfers(self):
Tao Bao9a5caf22015-08-25 15:10:10 -07001053 """Parse the file_map to generate all the transfers."""
1054
1055 def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
1056 split=False):
1057 """Wrapper function for adding a Transfer().
1058
1059 For BBOTA v3, we need to stash source blocks for resumable feature.
1060 However, with the growth of file size and the shrink of the cache
1061 partition source blocks are too large to be stashed. If a file occupies
1062 too many blocks (greater than MAX_BLOCKS_PER_DIFF_TRANSFER), we split it
1063 into smaller pieces by getting multiple Transfer()s.
1064
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001065 The downside is that after splitting, we may increase the package size
1066 since the split pieces don't align well. According to our experiments,
1067 1/8 of the cache size as the per-piece limit appears to be optimal.
1068 Compared to the fixed 1024-block limit, it reduces the overall package
1069 size by 30% volantis, and 20% for angler and bullhead."""
Tao Bao9a5caf22015-08-25 15:10:10 -07001070
1071 # We care about diff transfers only.
1072 if style != "diff" or not split:
1073 Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1074 return
1075
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001076 pieces = 0
1077 cache_size = common.OPTIONS.cache_size
1078 split_threshold = 0.125
1079 max_blocks_per_transfer = int(cache_size * split_threshold /
1080 self.tgt.blocksize)
1081
Tao Bao9a5caf22015-08-25 15:10:10 -07001082 # Change nothing for small files.
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001083 if (tgt_ranges.size() <= max_blocks_per_transfer and
1084 src_ranges.size() <= max_blocks_per_transfer):
Tao Bao9a5caf22015-08-25 15:10:10 -07001085 Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1086 return
1087
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001088 while (tgt_ranges.size() > max_blocks_per_transfer and
1089 src_ranges.size() > max_blocks_per_transfer):
Tao Bao9a5caf22015-08-25 15:10:10 -07001090 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1091 src_split_name = "%s-%d" % (src_name, pieces)
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001092 tgt_first = tgt_ranges.first(max_blocks_per_transfer)
1093 src_first = src_ranges.first(max_blocks_per_transfer)
1094
Tao Bao9a5caf22015-08-25 15:10:10 -07001095 Transfer(tgt_split_name, src_split_name, tgt_first, src_first, style,
1096 by_id)
1097
1098 tgt_ranges = tgt_ranges.subtract(tgt_first)
1099 src_ranges = src_ranges.subtract(src_first)
1100 pieces += 1
1101
1102 # Handle remaining blocks.
1103 if tgt_ranges.size() or src_ranges.size():
1104 # Must be both non-empty.
1105 assert tgt_ranges.size() and src_ranges.size()
1106 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1107 src_split_name = "%s-%d" % (src_name, pieces)
1108 Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges, style,
1109 by_id)
1110
Doug Zongkerfc44a512014-08-26 13:10:25 -07001111 empty = RangeSet()
1112 for tgt_fn, tgt_ranges in self.tgt.file_map.items():
1113 if tgt_fn == "__ZERO":
1114 # the special "__ZERO" domain is all the blocks not contained
1115 # in any file and that are filled with zeros. We have a
1116 # special transfer style for zero blocks.
1117 src_ranges = self.src.file_map.get("__ZERO", empty)
Tao Bao9a5caf22015-08-25 15:10:10 -07001118 AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
1119 "zero", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001120 continue
1121
Tao Baoff777812015-05-12 11:42:31 -07001122 elif tgt_fn == "__COPY":
1123 # "__COPY" domain includes all the blocks not contained in any
1124 # file and that need to be copied unconditionally to the target.
Tao Bao9a5caf22015-08-25 15:10:10 -07001125 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Tao Baoff777812015-05-12 11:42:31 -07001126 continue
1127
Doug Zongkerfc44a512014-08-26 13:10:25 -07001128 elif tgt_fn in self.src.file_map:
1129 # Look for an exact pathname match in the source.
Tao Bao9a5caf22015-08-25 15:10:10 -07001130 AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
1131 "diff", self.transfers, self.version >= 3)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001132 continue
1133
1134 b = os.path.basename(tgt_fn)
1135 if b in self.src_basenames:
1136 # Look for an exact basename match in the source.
1137 src_fn = self.src_basenames[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001138 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
1139 "diff", self.transfers, self.version >= 3)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001140 continue
1141
1142 b = re.sub("[0-9]+", "#", b)
1143 if b in self.src_numpatterns:
1144 # Look for a 'number pattern' match (a basename match after
1145 # all runs of digits are replaced by "#"). (This is useful
1146 # for .so files that contain version numbers in the filename
1147 # that get bumped.)
1148 src_fn = self.src_numpatterns[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001149 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
1150 "diff", self.transfers, self.version >= 3)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001151 continue
1152
Tao Bao9a5caf22015-08-25 15:10:10 -07001153 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001154
1155 def AbbreviateSourceNames(self):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001156 for k in self.src.file_map.keys():
1157 b = os.path.basename(k)
1158 self.src_basenames[b] = k
1159 b = re.sub("[0-9]+", "#", b)
1160 self.src_numpatterns[b] = k
1161
1162 @staticmethod
1163 def AssertPartition(total, seq):
1164 """Assert that all the RangeSets in 'seq' form a partition of the
1165 'total' RangeSet (ie, they are nonintersecting and their union
1166 equals 'total')."""
Doug Zongker6ab2a502016-02-09 08:28:09 -08001167
Doug Zongkerfc44a512014-08-26 13:10:25 -07001168 so_far = RangeSet()
1169 for i in seq:
1170 assert not so_far.overlaps(i)
1171 so_far = so_far.union(i)
1172 assert so_far == total