Blame - tools/releasetools/blockimgdiff.py - android_build

blob: 1225b61a46a04b82bcccf53cd1503677188d4a1b [file] [log] [blame]

Doug Zongker	424296a	2014-09-02 08:53:09 -0700	[diff] [blame]	1	# Copyright (C) 2014 The Android Open Source Project
				2	#
				3	# Licensed under the Apache License, Version 2.0 (the "License");
				4	# you may not use this file except in compliance with the License.
				5	# You may obtain a copy of the License at
				6	#
				7	# http://www.apache.org/licenses/LICENSE-2.0
				8	#
				9	# Unless required by applicable law or agreed to in writing, software
				10	# distributed under the License is distributed on an "AS IS" BASIS,
				11	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				12	# See the License for the specific language governing permissions and
				13	# limitations under the License.
				14
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	15	from __future__ import print_function
				16
				17	from collections import deque, OrderedDict
				18	from hashlib import sha1
Tao Bao	8dcf738	2015-05-21 14:09:49 -0700	[diff] [blame]	19	import common
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	20	import heapq
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	21	import itertools
				22	import multiprocessing
				23	import os
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	24	import re
				25	import subprocess
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	26	import threading
				27	import tempfile
				28
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	29	from rangelib import RangeSet
				30
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	31
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	32	__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
				33
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	34
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	35	def compute_patch(src, tgt, imgdiff=False):
				36	srcfd, srcfile = tempfile.mkstemp(prefix="src-")
				37	tgtfd, tgtfile = tempfile.mkstemp(prefix="tgt-")
				38	patchfd, patchfile = tempfile.mkstemp(prefix="patch-")
				39	os.close(patchfd)
				40
				41	try:
				42	with os.fdopen(srcfd, "wb") as f_src:
				43	for p in src:
				44	f_src.write(p)
				45
				46	with os.fdopen(tgtfd, "wb") as f_tgt:
				47	for p in tgt:
				48	f_tgt.write(p)
				49	try:
				50	os.unlink(patchfile)
				51	except OSError:
				52	pass
				53	if imgdiff:
				54	p = subprocess.call(["imgdiff", "-z", srcfile, tgtfile, patchfile],
				55	stdout=open("/dev/null", "a"),
				56	stderr=subprocess.STDOUT)
				57	else:
				58	p = subprocess.call(["bsdiff", srcfile, tgtfile, patchfile])
				59
				60	if p:
				61	raise ValueError("diff failed: " + str(p))
				62
				63	with open(patchfile, "rb") as f:
				64	return f.read()
				65	finally:
				66	try:
				67	os.unlink(srcfile)
				68	os.unlink(tgtfile)
				69	os.unlink(patchfile)
				70	except OSError:
				71	pass
				72
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	73
				74	class Image(object):
				75	def ReadRangeSet(self, ranges):
				76	raise NotImplementedError
				77
Tao Bao	68658c0	2015-06-01 13:40:49 -0700	[diff] [blame]	78	def TotalSha1(self, include_clobbered_blocks=False):
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	79	raise NotImplementedError
				80
				81
				82	class EmptyImage(Image):
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	83	"""A zero-length image."""
				84	blocksize = 4096
				85	care_map = RangeSet()
Tao Bao	ff77781	2015-05-12 11:42:31 -0700	[diff] [blame]	86	clobbered_blocks = RangeSet()
Tao Bao	e9b6191	2015-07-09 17:37:49 -0700	[diff] [blame]	87	extended = RangeSet()
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	88	total_blocks = 0
				89	file_map = {}
				90	def ReadRangeSet(self, ranges):
				91	return ()
Tao Bao	68658c0	2015-06-01 13:40:49 -0700	[diff] [blame]	92	def TotalSha1(self, include_clobbered_blocks=False):
				93	# EmptyImage always carries empty clobbered_blocks, so
				94	# include_clobbered_blocks can be ignored.
				95	assert self.clobbered_blocks.size() == 0
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	96	return sha1().hexdigest()
				97
				98
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	99	class DataImage(Image):
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	100	"""An image wrapped around a single string of data."""
				101
				102	def __init__(self, data, trim=False, pad=False):
				103	self.data = data
				104	self.blocksize = 4096
				105
				106	assert not (trim and pad)
				107
				108	partial = len(self.data) % self.blocksize
Tao Bao	7589e96	2015-09-05 20:35:32 -0700	[diff] [blame]	109	padded = False
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	110	if partial > 0:
				111	if trim:
				112	self.data = self.data[:-partial]
				113	elif pad:
				114	self.data += '\0' * (self.blocksize - partial)
Tao Bao	7589e96	2015-09-05 20:35:32 -0700	[diff] [blame]	115	padded = True
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	116	else:
				117	raise ValueError(("data for DataImage must be multiple of %d bytes "
				118	"unless trim or pad is specified") %
				119	(self.blocksize,))
				120
				121	assert len(self.data) % self.blocksize == 0
				122
				123	self.total_blocks = len(self.data) / self.blocksize
				124	self.care_map = RangeSet(data=(0, self.total_blocks))
Tao Bao	7589e96	2015-09-05 20:35:32 -0700	[diff] [blame]	125	# When the last block is padded, we always write the whole block even for
				126	# incremental OTAs. Because otherwise the last block may get skipped if
				127	# unchanged for an incremental, but would fail the post-install
				128	# verification if it has non-zero contents in the padding bytes.
				129	# Bug: 23828506
				130	if padded:
				131	self.clobbered_blocks = RangeSet(
				132	data=(self.total_blocks-1, self.total_blocks))
				133	else:
				134	self.clobbered_blocks = RangeSet()
Tao Bao	e9b6191	2015-07-09 17:37:49 -0700	[diff] [blame]	135	self.extended = RangeSet()
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	136
				137	zero_blocks = []
				138	nonzero_blocks = []
				139	reference = '\0' * self.blocksize
				140
Tao Bao	7589e96	2015-09-05 20:35:32 -0700	[diff] [blame]	141	for i in range(self.total_blocks-1 if padded else self.total_blocks):
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	142	d = self.data[iself.blocksize : (i+1)self.blocksize]
				143	if d == reference:
				144	zero_blocks.append(i)
				145	zero_blocks.append(i+1)
				146	else:
				147	nonzero_blocks.append(i)
				148	nonzero_blocks.append(i+1)
				149
				150	self.file_map = {"__ZERO": RangeSet(zero_blocks),
				151	"__NONZERO": RangeSet(nonzero_blocks)}
				152
Tao Bao	7589e96	2015-09-05 20:35:32 -0700	[diff] [blame]	153	if self.clobbered_blocks:
				154	self.file_map["__COPY"] = self.clobbered_blocks
				155
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	156	def ReadRangeSet(self, ranges):
				157	return [self.data[sself.blocksize:eself.blocksize] for (s, e) in ranges]
				158
Tao Bao	68658c0	2015-06-01 13:40:49 -0700	[diff] [blame]	159	def TotalSha1(self, include_clobbered_blocks=False):
Tao Bao	7589e96	2015-09-05 20:35:32 -0700	[diff] [blame]	160	if not include_clobbered_blocks:
				161	ranges = self.care_map.subtract(self.clobbered_blocks)
				162	return sha1(self.ReadRangeSet(ranges)).hexdigest()
				163	else:
				164	return sha1(self.data).hexdigest()
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	165
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	166
				167	class Transfer(object):
				168	def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, style, by_id):
				169	self.tgt_name = tgt_name
				170	self.src_name = src_name
				171	self.tgt_ranges = tgt_ranges
				172	self.src_ranges = src_ranges
				173	self.style = style
				174	self.intact = (getattr(tgt_ranges, "monotonic", False) and
				175	getattr(src_ranges, "monotonic", False))
Tao Bao	b8c8717	2015-03-19 19:42:12 -0700	[diff] [blame]	176
				177	# We use OrderedDict rather than dict so that the output is repeatable;
				178	# otherwise it would depend on the hash values of the Transfer objects.
				179	self.goes_before = OrderedDict()
				180	self.goes_after = OrderedDict()
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	181
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	182	self.stash_before = []
				183	self.use_stash = []
				184
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	185	self.id = len(by_id)
				186	by_id.append(self)
				187
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	188	def NetStashChange(self):
				189	return (sum(sr.size() for (_, sr) in self.stash_before) -
				190	sum(sr.size() for (_, sr) in self.use_stash))
				191
Tao Bao	82c4798	2015-08-17 09:45:13 -0700	[diff] [blame]	192	def ConvertToNew(self):
				193	assert self.style != "new"
				194	self.use_stash = []
				195	self.style = "new"
				196	self.src_ranges = RangeSet()
				197
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	198	def __str__(self):
				199	return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
				200	" to " + str(self.tgt_ranges) + ">")
				201
				202
				203	# BlockImageDiff works on two image objects. An image object is
				204	# anything that provides the following attributes:
				205	#
				206	# blocksize: the size in bytes of a block, currently must be 4096.
				207	#
				208	# total_blocks: the total size of the partition/image, in blocks.
				209	#
				210	# care_map: a RangeSet containing which blocks (in the range [0,
				211	# total_blocks) we actually care about; i.e. which blocks contain
				212	# data.
				213	#
				214	# file_map: a dict that partitions the blocks contained in care_map
				215	# into smaller domains that are useful for doing diffs on.
				216	# (Typically a domain is a file, and the key in file_map is the
				217	# pathname.)
				218	#
Tao Bao	ff77781	2015-05-12 11:42:31 -0700	[diff] [blame]	219	# clobbered_blocks: a RangeSet containing which blocks contain data
				220	# but may be altered by the FS. They need to be excluded when
				221	# verifying the partition integrity.
				222	#
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	223	# ReadRangeSet(): a function that takes a RangeSet and returns the
				224	# data contained in the image blocks of that RangeSet. The data
				225	# is returned as a list or tuple of strings; concatenating the
				226	# elements together should produce the requested data.
				227	# Implementations are free to break up the data into list/tuple
				228	# elements in any way that is convenient.
				229	#
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	230	# TotalSha1(): a function that returns (as a hex string) the SHA-1
				231	# hash of all the data in the image (ie, all the blocks in the
Tao Bao	68658c0	2015-06-01 13:40:49 -0700	[diff] [blame]	232	# care_map minus clobbered_blocks, or including the clobbered
				233	# blocks if include_clobbered_blocks is True).
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	234	#
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	235	# When creating a BlockImageDiff, the src image may be None, in which
				236	# case the list of transfers produced will never read from the
				237	# original image.
				238
				239	class BlockImageDiff(object):
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	240	def __init__(self, tgt, src=None, threads=None, version=3):
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	241	if threads is None:
				242	threads = multiprocessing.cpu_count() // 2
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	243	if threads == 0:
				244	threads = 1
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	245	self.threads = threads
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	246	self.version = version
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	247	self.transfers = []
				248	self.src_basenames = {}
				249	self.src_numpatterns = {}
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	250
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	251	assert version in (1, 2, 3)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	252
				253	self.tgt = tgt
				254	if src is None:
				255	src = EmptyImage()
				256	self.src = src
				257
				258	# The updater code that installs the patch always uses 4k blocks.
				259	assert tgt.blocksize == 4096
				260	assert src.blocksize == 4096
				261
				262	# The range sets in each filemap should comprise a partition of
				263	# the care map.
				264	self.AssertPartition(src.care_map, src.file_map.values())
				265	self.AssertPartition(tgt.care_map, tgt.file_map.values())
				266
				267	def Compute(self, prefix):
				268	# When looking for a source file to use as the diff input for a
				269	# target file, we try:
				270	# 1) an exact path match if available, otherwise
				271	# 2) a exact basename match if available, otherwise
				272	# 3) a basename match after all runs of digits are replaced by
				273	# "#" if available, otherwise
				274	# 4) we have no source for this target.
				275	self.AbbreviateSourceNames()
				276	self.FindTransfers()
				277
				278	# Find the ordering dependencies among transfers (this is O(n^2)
				279	# in the number of transfers).
				280	self.GenerateDigraph()
				281	# Find a sequence of transfers that satisfies as many ordering
				282	# dependencies as possible (heuristically).
				283	self.FindVertexSequence()
				284	# Fix up the ordering dependencies that the sequence didn't
				285	# satisfy.
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	286	if self.version == 1:
				287	self.RemoveBackwardEdges()
				288	else:
				289	self.ReverseBackwardEdges()
				290	self.ImproveVertexSequence()
				291
Tao Bao	82c4798	2015-08-17 09:45:13 -0700	[diff] [blame]	292	# Ensure the runtime stash size is under the limit.
				293	if self.version >= 2 and common.OPTIONS.cache_size is not None:
				294	self.ReviseStashSize()
				295
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	296	# Double-check our work.
				297	self.AssertSequenceGood()
				298
				299	self.ComputePatches(prefix)
				300	self.WriteTransfers(prefix)
				301
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	302	def HashBlocks(self, source, ranges): # pylint: disable=no-self-use
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	303	data = source.ReadRangeSet(ranges)
				304	ctx = sha1()
				305
				306	for p in data:
				307	ctx.update(p)
				308
				309	return ctx.hexdigest()
				310
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	311	def WriteTransfers(self, prefix):
				312	out = []
				313
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	314	total = 0
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	315
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	316	stashes = {}
				317	stashed_blocks = 0
				318	max_stashed_blocks = 0
				319
				320	free_stash_ids = []
				321	next_stash_id = 0
				322
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	323	for xf in self.transfers:
				324
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	325	if self.version < 2:
				326	assert not xf.stash_before
				327	assert not xf.use_stash
				328
				329	for s, sr in xf.stash_before:
				330	assert s not in stashes
				331	if free_stash_ids:
				332	sid = heapq.heappop(free_stash_ids)
				333	else:
				334	sid = next_stash_id
				335	next_stash_id += 1
				336	stashes[s] = sid
				337	stashed_blocks += sr.size()
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	338	if self.version == 2:
				339	out.append("stash %d %s\n" % (sid, sr.to_string_raw()))
				340	else:
				341	sh = self.HashBlocks(self.src, sr)
				342	if sh in stashes:
				343	stashes[sh] += 1
				344	else:
				345	stashes[sh] = 1
				346	out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	347
				348	if stashed_blocks > max_stashed_blocks:
				349	max_stashed_blocks = stashed_blocks
				350
Jesse Zhao	7b985f6	2015-03-02 16:53:08 -0800	[diff] [blame]	351	free_string = []
				352
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	353	if self.version == 1:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	354	src_str = xf.src_ranges.to_string_raw()
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	355	elif self.version >= 2:
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	356
				357	# <# blocks> <src ranges>
				358	# OR
				359	# <# blocks> <src ranges> <src locs> <stash refs...>
				360	# OR
				361	# <# blocks> - <stash refs...>
				362
				363	size = xf.src_ranges.size()
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	364	src_str = [str(size)]
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	365
				366	unstashed_src_ranges = xf.src_ranges
				367	mapped_stashes = []
				368	for s, sr in xf.use_stash:
				369	sid = stashes.pop(s)
				370	stashed_blocks -= sr.size()
				371	unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	372	sh = self.HashBlocks(self.src, sr)
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	373	sr = xf.src_ranges.map_within(sr)
				374	mapped_stashes.append(sr)
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	375	if self.version == 2:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	376	src_str.append("%d:%s" % (sid, sr.to_string_raw()))
Tao Bao	bb625d2	2015-08-13 14:44:15 -0700	[diff] [blame]	377	# A stash will be used only once. We need to free the stash
				378	# immediately after the use, instead of waiting for the automatic
				379	# clean-up at the end. Because otherwise it may take up extra space
				380	# and lead to OTA failures.
				381	# Bug: 23119955
				382	free_string.append("free %d\n" % (sid,))
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	383	else:
				384	assert sh in stashes
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	385	src_str.append("%s:%s" % (sh, sr.to_string_raw()))
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	386	stashes[sh] -= 1
				387	if stashes[sh] == 0:
				388	free_string.append("free %s\n" % (sh))
				389	stashes.pop(sh)
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	390	heapq.heappush(free_stash_ids, sid)
				391
				392	if unstashed_src_ranges:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	393	src_str.insert(1, unstashed_src_ranges.to_string_raw())
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	394	if xf.use_stash:
				395	mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	396	src_str.insert(2, mapped_unstashed.to_string_raw())
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	397	mapped_stashes.append(mapped_unstashed)
				398	self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
				399	else:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	400	src_str.insert(1, "-")
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	401	self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
				402
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	403	src_str = " ".join(src_str)
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	404
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	405	# all versions:
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	406	# zero <rangeset>
				407	# new <rangeset>
				408	# erase <rangeset>
				409	#
				410	# version 1:
				411	# bsdiff patchstart patchlen <src rangeset> <tgt rangeset>
				412	# imgdiff patchstart patchlen <src rangeset> <tgt rangeset>
				413	# move <src rangeset> <tgt rangeset>
				414	#
				415	# version 2:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	416	# bsdiff patchstart patchlen <tgt rangeset> <src_str>
				417	# imgdiff patchstart patchlen <tgt rangeset> <src_str>
				418	# move <tgt rangeset> <src_str>
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	419	#
				420	# version 3:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	421	# bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
				422	# imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
				423	# move hash <tgt rangeset> <src_str>
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	424
				425	tgt_size = xf.tgt_ranges.size()
				426
				427	if xf.style == "new":
				428	assert xf.tgt_ranges
				429	out.append("%s %s\n" % (xf.style, xf.tgt_ranges.to_string_raw()))
				430	total += tgt_size
				431	elif xf.style == "move":
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	432	assert xf.tgt_ranges
				433	assert xf.src_ranges.size() == tgt_size
				434	if xf.src_ranges != xf.tgt_ranges:
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	435	if self.version == 1:
				436	out.append("%s %s %s\n" % (
				437	xf.style,
				438	xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))
				439	elif self.version == 2:
				440	out.append("%s %s %s\n" % (
				441	xf.style,
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	442	xf.tgt_ranges.to_string_raw(), src_str))
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	443	elif self.version >= 3:
Sami Tolvanen	29f529f	2015-04-17 16:28:08 +0100	[diff] [blame]	444	# take into account automatic stashing of overlapping blocks
				445	if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Bao	e9b6191	2015-07-09 17:37:49 -0700	[diff] [blame]	446	temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen	29f529f	2015-04-17 16:28:08 +0100	[diff] [blame]	447	if temp_stash_usage > max_stashed_blocks:
				448	max_stashed_blocks = temp_stash_usage
				449
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	450	out.append("%s %s %s %s\n" % (
				451	xf.style,
				452	self.HashBlocks(self.tgt, xf.tgt_ranges),
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	453	xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	454	total += tgt_size
				455	elif xf.style in ("bsdiff", "imgdiff"):
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	456	assert xf.tgt_ranges
				457	assert xf.src_ranges
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	458	if self.version == 1:
				459	out.append("%s %d %d %s %s\n" % (
				460	xf.style, xf.patch_start, xf.patch_len,
				461	xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))
				462	elif self.version == 2:
				463	out.append("%s %d %d %s %s\n" % (
				464	xf.style, xf.patch_start, xf.patch_len,
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	465	xf.tgt_ranges.to_string_raw(), src_str))
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	466	elif self.version >= 3:
Sami Tolvanen	29f529f	2015-04-17 16:28:08 +0100	[diff] [blame]	467	# take into account automatic stashing of overlapping blocks
				468	if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Bao	e9b6191	2015-07-09 17:37:49 -0700	[diff] [blame]	469	temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen	29f529f	2015-04-17 16:28:08 +0100	[diff] [blame]	470	if temp_stash_usage > max_stashed_blocks:
				471	max_stashed_blocks = temp_stash_usage
				472
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	473	out.append("%s %d %d %s %s %s %s\n" % (
				474	xf.style,
				475	xf.patch_start, xf.patch_len,
				476	self.HashBlocks(self.src, xf.src_ranges),
				477	self.HashBlocks(self.tgt, xf.tgt_ranges),
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	478	xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	479	total += tgt_size
				480	elif xf.style == "zero":
				481	assert xf.tgt_ranges
				482	to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
				483	if to_zero:
				484	out.append("%s %s\n" % (xf.style, to_zero.to_string_raw()))
				485	total += to_zero.size()
				486	else:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	487	raise ValueError("unknown transfer style '%s'\n" % xf.style)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	488
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	489	if free_string:
				490	out.append("".join(free_string))
				491
Tao Bao	575d68a	2015-08-07 19:49:45 -0700	[diff] [blame]	492	if self.version >= 2 and common.OPTIONS.cache_size is not None:
Tao Bao	8dcf738	2015-05-21 14:09:49 -0700	[diff] [blame]	493	# Sanity check: abort if we're going to need more stash space than
				494	# the allowed size (cache_size * threshold). There are two purposes
				495	# of having a threshold here. a) Part of the cache may have been
				496	# occupied by some recovery logs. b) It will buy us some time to deal
				497	# with the oversize issue.
				498	cache_size = common.OPTIONS.cache_size
				499	stash_threshold = common.OPTIONS.stash_threshold
				500	max_allowed = cache_size * stash_threshold
				501	assert max_stashed_blocks * self.tgt.blocksize < max_allowed, \
				502	'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
				503	max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
				504	self.tgt.blocksize, max_allowed, cache_size,
				505	stash_threshold)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	506
Tao Bao	e9b6191	2015-07-09 17:37:49 -0700	[diff] [blame]	507	# Zero out extended blocks as a workaround for bug 20881595.
				508	if self.tgt.extended:
				509	out.append("zero %s\n" % (self.tgt.extended.to_string_raw(),))
				510
				511	# We erase all the blocks on the partition that a) don't contain useful
				512	# data in the new image and b) will not be touched by dm-verity.
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	513	all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
Tao Bao	e9b6191	2015-07-09 17:37:49 -0700	[diff] [blame]	514	all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
				515	new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
				516	if new_dontcare:
				517	out.append("erase %s\n" % (new_dontcare.to_string_raw(),))
Doug Zongker	e985f6f	2014-09-09 12:38:47 -0700	[diff] [blame]	518
				519	out.insert(0, "%d\n" % (self.version,)) # format version number
				520	out.insert(1, str(total) + "\n")
				521	if self.version >= 2:
				522	# version 2 only: after the total block count, we give the number
				523	# of stash slots needed, and the maximum size needed (in blocks)
				524	out.insert(2, str(next_stash_id) + "\n")
				525	out.insert(3, str(max_stashed_blocks) + "\n")
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	526
				527	with open(prefix + ".transfer.list", "wb") as f:
				528	for i in out:
				529	f.write(i)
				530
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	531	if self.version >= 2:
Tao Bao	8dcf738	2015-05-21 14:09:49 -0700	[diff] [blame]	532	max_stashed_size = max_stashed_blocks * self.tgt.blocksize
Tao Bao	575d68a	2015-08-07 19:49:45 -0700	[diff] [blame]	533	OPTIONS = common.OPTIONS
				534	if OPTIONS.cache_size is not None:
				535	max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
				536	print("max stashed blocks: %d (%d bytes), "
				537	"limit: %d bytes (%.2f%%)\n" % (
				538	max_stashed_blocks, max_stashed_size, max_allowed,
				539	max_stashed_size * 100.0 / max_allowed))
				540	else:
				541	print("max stashed blocks: %d (%d bytes), limit: <unknown>\n" % (
				542	max_stashed_blocks, max_stashed_size))
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	543
Tao Bao	82c4798	2015-08-17 09:45:13 -0700	[diff] [blame]	544	def ReviseStashSize(self):
				545	print("Revising stash size...")
				546	stashes = {}
				547
				548	# Create the map between a stash and its def/use points. For example, for a
				549	# given stash of (idx, sr), stashes[idx] = (sr, def_cmd, use_cmd).
				550	for xf in self.transfers:
				551	# Command xf defines (stores) all the stashes in stash_before.
				552	for idx, sr in xf.stash_before:
				553	stashes[idx] = (sr, xf)
				554
				555	# Record all the stashes command xf uses.
				556	for idx, _ in xf.use_stash:
				557	stashes[idx] += (xf,)
				558
				559	# Compute the maximum blocks available for stash based on /cache size and
				560	# the threshold.
				561	cache_size = common.OPTIONS.cache_size
				562	stash_threshold = common.OPTIONS.stash_threshold
				563	max_allowed = cache_size * stash_threshold / self.tgt.blocksize
				564
				565	stashed_blocks = 0
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame]	566	new_blocks = 0
Tao Bao	82c4798	2015-08-17 09:45:13 -0700	[diff] [blame]	567
				568	# Now go through all the commands. Compute the required stash size on the
				569	# fly. If a command requires excess stash than available, it deletes the
				570	# stash by replacing the command that uses the stash with a "new" command
				571	# instead.
				572	for xf in self.transfers:
				573	replaced_cmds = []
				574
				575	# xf.stash_before generates explicit stash commands.
				576	for idx, sr in xf.stash_before:
				577	if stashed_blocks + sr.size() > max_allowed:
				578	# We cannot stash this one for a later command. Find out the command
				579	# that will use this stash and replace the command with "new".
				580	use_cmd = stashes[idx][2]
				581	replaced_cmds.append(use_cmd)
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame]	582	print("%10d %9s %s" % (sr.size(), "explicit", use_cmd))
Tao Bao	82c4798	2015-08-17 09:45:13 -0700	[diff] [blame]	583	else:
				584	stashed_blocks += sr.size()
				585
				586	# xf.use_stash generates free commands.
				587	for _, sr in xf.use_stash:
				588	stashed_blocks -= sr.size()
				589
				590	# "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
				591	# ComputePatches(), they both have the style of "diff".
				592	if xf.style == "diff" and self.version >= 3:
				593	assert xf.tgt_ranges and xf.src_ranges
				594	if xf.src_ranges.overlaps(xf.tgt_ranges):
				595	if stashed_blocks + xf.src_ranges.size() > max_allowed:
				596	replaced_cmds.append(xf)
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame]	597	print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf))
Tao Bao	82c4798	2015-08-17 09:45:13 -0700	[diff] [blame]	598
				599	# Replace the commands in replaced_cmds with "new"s.
				600	for cmd in replaced_cmds:
				601	# It no longer uses any commands in "use_stash". Remove the def points
				602	# for all those stashes.
				603	for idx, sr in cmd.use_stash:
				604	def_cmd = stashes[idx][1]
				605	assert (idx, sr) in def_cmd.stash_before
				606	def_cmd.stash_before.remove((idx, sr))
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame]	607	new_blocks += sr.size()
Tao Bao	82c4798	2015-08-17 09:45:13 -0700	[diff] [blame]	608
				609	cmd.ConvertToNew()
				610
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame]	611	print(" Total %d blocks are packed as new blocks due to insufficient "
				612	"cache size." % (new_blocks,))
				613
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	614	def ComputePatches(self, prefix):
				615	print("Reticulating splines...")
				616	diff_q = []
				617	patch_num = 0
				618	with open(prefix + ".new.dat", "wb") as new_f:
				619	for xf in self.transfers:
				620	if xf.style == "zero":
				621	pass
				622	elif xf.style == "new":
				623	for piece in self.tgt.ReadRangeSet(xf.tgt_ranges):
				624	new_f.write(piece)
				625	elif xf.style == "diff":
				626	src = self.src.ReadRangeSet(xf.src_ranges)
				627	tgt = self.tgt.ReadRangeSet(xf.tgt_ranges)
				628
				629	# We can't compare src and tgt directly because they may have
				630	# the same content but be broken up into blocks differently, eg:
				631	#
				632	# ["he", "llo"] vs ["h", "ello"]
				633	#
				634	# We want those to compare equal, ideally without having to
				635	# actually concatenate the strings (these may be tens of
				636	# megabytes).
				637
				638	src_sha1 = sha1()
				639	for p in src:
				640	src_sha1.update(p)
				641	tgt_sha1 = sha1()
				642	tgt_size = 0
				643	for p in tgt:
				644	tgt_sha1.update(p)
				645	tgt_size += len(p)
				646
				647	if src_sha1.digest() == tgt_sha1.digest():
				648	# These are identical; we don't need to generate a patch,
				649	# just issue copy commands on the device.
				650	xf.style = "move"
				651	else:
				652	# For files in zip format (eg, APKs, JARs, etc.) we would
				653	# like to use imgdiff -z if possible (because it usually
				654	# produces significantly smaller patches than bsdiff).
				655	# This is permissible if:
				656	#
				657	# - the source and target files are monotonic (ie, the
				658	# data is stored with blocks in increasing order), and
				659	# - we haven't removed any blocks from the source set.
				660	#
				661	# If these conditions are satisfied then appending all the
				662	# blocks in the set together in order will produce a valid
				663	# zip file (plus possibly extra zeros in the last block),
				664	# which is what imgdiff needs to operate. (imgdiff is
				665	# fine with extra zeros at the end of the file.)
				666	imgdiff = (xf.intact and
				667	xf.tgt_name.split(".")[-1].lower()
				668	in ("apk", "jar", "zip"))
				669	xf.style = "imgdiff" if imgdiff else "bsdiff"
				670	diff_q.append((tgt_size, src, tgt, xf, patch_num))
				671	patch_num += 1
				672
				673	else:
				674	assert False, "unknown style " + xf.style
				675
				676	if diff_q:
				677	if self.threads > 1:
				678	print("Computing patches (using %d threads)..." % (self.threads,))
				679	else:
				680	print("Computing patches...")
				681	diff_q.sort()
				682
				683	patches = [None] * patch_num
				684
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	685	# TODO: Rewrite with multiprocessing.ThreadPool?
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	686	lock = threading.Lock()
				687	def diff_worker():
				688	while True:
				689	with lock:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	690	if not diff_q:
				691	return
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	692	tgt_size, src, tgt, xf, patchnum = diff_q.pop()
				693	patch = compute_patch(src, tgt, imgdiff=(xf.style == "imgdiff"))
				694	size = len(patch)
				695	with lock:
				696	patches[patchnum] = (patch, xf)
				697	print("%10d %10d (%6.2f%%) %7s %s" % (
				698	size, tgt_size, size * 100.0 / tgt_size, xf.style,
				699	xf.tgt_name if xf.tgt_name == xf.src_name else (
				700	xf.tgt_name + " (from " + xf.src_name + ")")))
				701
				702	threads = [threading.Thread(target=diff_worker)
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	703	for _ in range(self.threads)]
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	704	for th in threads:
				705	th.start()
				706	while threads:
				707	threads.pop().join()
				708	else:
				709	patches = []
				710
				711	p = 0
				712	with open(prefix + ".patch.dat", "wb") as patch_f:
				713	for patch, xf in patches:
				714	xf.patch_start = p
				715	xf.patch_len = len(patch)
				716	patch_f.write(patch)
				717	p += len(patch)
				718
				719	def AssertSequenceGood(self):
				720	# Simulate the sequences of transfers we will output, and check that:
				721	# - we never read a block after writing it, and
				722	# - we write every block we care about exactly once.
				723
				724	# Start with no blocks having been touched yet.
				725	touched = RangeSet()
				726
				727	# Imagine processing the transfers in order.
				728	for xf in self.transfers:
				729	# Check that the input blocks for this transfer haven't yet been touched.
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	730
				731	x = xf.src_ranges
				732	if self.version >= 2:
				733	for _, sr in xf.use_stash:
				734	x = x.subtract(sr)
				735
				736	assert not touched.overlaps(x)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	737	# Check that the output blocks for this transfer haven't yet been touched.
				738	assert not touched.overlaps(xf.tgt_ranges)
				739	# Touch all the blocks written by this transfer.
				740	touched = touched.union(xf.tgt_ranges)
				741
				742	# Check that we've written every target block.
				743	assert touched == self.tgt.care_map
				744
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	745	def ImproveVertexSequence(self):
				746	print("Improving vertex order...")
				747
				748	# At this point our digraph is acyclic; we reversed any edges that
				749	# were backwards in the heuristically-generated sequence. The
				750	# previously-generated order is still acceptable, but we hope to
				751	# find a better order that needs less memory for stashed data.
				752	# Now we do a topological sort to generate a new vertex order,
				753	# using a greedy algorithm to choose which vertex goes next
				754	# whenever we have a choice.
				755
				756	# Make a copy of the edge set; this copy will get destroyed by the
				757	# algorithm.
				758	for xf in self.transfers:
				759	xf.incoming = xf.goes_after.copy()
				760	xf.outgoing = xf.goes_before.copy()
				761
				762	L = [] # the new vertex order
				763
				764	# S is the set of sources in the remaining graph; we always choose
				765	# the one that leaves the least amount of stashed data after it's
				766	# executed.
				767	S = [(u.NetStashChange(), u.order, u) for u in self.transfers
				768	if not u.incoming]
				769	heapq.heapify(S)
				770
				771	while S:
				772	_, _, xf = heapq.heappop(S)
				773	L.append(xf)
				774	for u in xf.outgoing:
				775	del u.incoming[xf]
				776	if not u.incoming:
				777	heapq.heappush(S, (u.NetStashChange(), u.order, u))
				778
				779	# if this fails then our graph had a cycle.
				780	assert len(L) == len(self.transfers)
				781
				782	self.transfers = L
				783	for i, xf in enumerate(L):
				784	xf.order = i
				785
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	786	def RemoveBackwardEdges(self):
				787	print("Removing backward edges...")
				788	in_order = 0
				789	out_of_order = 0
				790	lost_source = 0
				791
				792	for xf in self.transfers:
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	793	lost = 0
				794	size = xf.src_ranges.size()
				795	for u in xf.goes_before:
				796	# xf should go before u
				797	if xf.order < u.order:
				798	# it does, hurray!
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	799	in_order += 1
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	800	else:
				801	# it doesn't, boo. trim the blocks that u writes from xf's
				802	# source, so that xf can go after u.
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	803	out_of_order += 1
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	804	assert xf.src_ranges.overlaps(u.tgt_ranges)
				805	xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges)
				806	xf.intact = False
				807
				808	if xf.style == "diff" and not xf.src_ranges:
				809	# nothing left to diff from; treat as new data
				810	xf.style = "new"
				811
				812	lost = size - xf.src_ranges.size()
				813	lost_source += lost
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	814
				815	print((" %d/%d dependencies (%.2f%%) were violated; "
				816	"%d source blocks removed.") %
				817	(out_of_order, in_order + out_of_order,
				818	(out_of_order * 100.0 / (in_order + out_of_order))
				819	if (in_order + out_of_order) else 0.0,
				820	lost_source))
				821
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	822	def ReverseBackwardEdges(self):
				823	print("Reversing backward edges...")
				824	in_order = 0
				825	out_of_order = 0
				826	stashes = 0
				827	stash_size = 0
				828
				829	for xf in self.transfers:
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	830	for u in xf.goes_before.copy():
				831	# xf should go before u
				832	if xf.order < u.order:
				833	# it does, hurray!
				834	in_order += 1
				835	else:
				836	# it doesn't, boo. modify u to stash the blocks that it
				837	# writes that xf wants to read, and then require u to go
				838	# before xf.
				839	out_of_order += 1
				840
				841	overlap = xf.src_ranges.intersect(u.tgt_ranges)
				842	assert overlap
				843
				844	u.stash_before.append((stashes, overlap))
				845	xf.use_stash.append((stashes, overlap))
				846	stashes += 1
				847	stash_size += overlap.size()
				848
				849	# reverse the edge direction; now xf must go after u
				850	del xf.goes_before[u]
				851	del u.goes_after[xf]
				852	xf.goes_after[u] = None # value doesn't matter
				853	u.goes_before[xf] = None
				854
				855	print((" %d/%d dependencies (%.2f%%) were violated; "
				856	"%d source blocks stashed.") %
				857	(out_of_order, in_order + out_of_order,
				858	(out_of_order * 100.0 / (in_order + out_of_order))
				859	if (in_order + out_of_order) else 0.0,
				860	stash_size))
				861
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	862	def FindVertexSequence(self):
				863	print("Finding vertex sequence...")
				864
				865	# This is based on "A Fast & Effective Heuristic for the Feedback
				866	# Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
				867	# it as starting with the digraph G and moving all the vertices to
				868	# be on a horizontal line in some order, trying to minimize the
				869	# number of edges that end up pointing to the left. Left-pointing
				870	# edges will get removed to turn the digraph into a DAG. In this
				871	# case each edge has a weight which is the number of source blocks
				872	# we'll lose if that edge is removed; we try to minimize the total
				873	# weight rather than just the number of edges.
				874
				875	# Make a copy of the edge set; this copy will get destroyed by the
				876	# algorithm.
				877	for xf in self.transfers:
				878	xf.incoming = xf.goes_after.copy()
				879	xf.outgoing = xf.goes_before.copy()
				880
				881	# We use an OrderedDict instead of just a set so that the output
				882	# is repeatable; otherwise it would depend on the hash values of
				883	# the transfer objects.
				884	G = OrderedDict()
				885	for xf in self.transfers:
				886	G[xf] = None
				887	s1 = deque() # the left side of the sequence, built from left to right
				888	s2 = deque() # the right side of the sequence, built from right to left
				889
				890	while G:
				891
				892	# Put all sinks at the end of the sequence.
				893	while True:
				894	sinks = [u for u in G if not u.outgoing]
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	895	if not sinks:
				896	break
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	897	for u in sinks:
				898	s2.appendleft(u)
				899	del G[u]
				900	for iu in u.incoming:
				901	del iu.outgoing[u]
				902
				903	# Put all the sources at the beginning of the sequence.
				904	while True:
				905	sources = [u for u in G if not u.incoming]
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	906	if not sources:
				907	break
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	908	for u in sources:
				909	s1.append(u)
				910	del G[u]
				911	for iu in u.outgoing:
				912	del iu.incoming[u]
				913
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	914	if not G:
				915	break
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	916
				917	# Find the "best" vertex to put next. "Best" is the one that
				918	# maximizes the net difference in source blocks saved we get by
				919	# pretending it's a source rather than a sink.
				920
				921	max_d = None
				922	best_u = None
				923	for u in G:
				924	d = sum(u.outgoing.values()) - sum(u.incoming.values())
				925	if best_u is None or d > max_d:
				926	max_d = d
				927	best_u = u
				928
				929	u = best_u
				930	s1.append(u)
				931	del G[u]
				932	for iu in u.outgoing:
				933	del iu.incoming[u]
				934	for iu in u.incoming:
				935	del iu.outgoing[u]
				936
				937	# Now record the sequence in the 'order' field of each transfer,
				938	# and by rearranging self.transfers to be in the chosen sequence.
				939
				940	new_transfers = []
				941	for x in itertools.chain(s1, s2):
				942	x.order = len(new_transfers)
				943	new_transfers.append(x)
				944	del x.incoming
				945	del x.outgoing
				946
				947	self.transfers = new_transfers
				948
				949	def GenerateDigraph(self):
				950	print("Generating digraph...")
				951	for a in self.transfers:
				952	for b in self.transfers:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	953	if a is b:
				954	continue
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	955
				956	# If the blocks written by A are read by B, then B needs to go before A.
				957	i = a.tgt_ranges.intersect(b.src_ranges)
				958	if i:
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	959	if b.src_name == "__ZERO":
				960	# the cost of removing source blocks for the __ZERO domain
				961	# is (nearly) zero.
				962	size = 0
				963	else:
				964	size = i.size()
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	965	b.goes_before[a] = size
				966	a.goes_after[b] = size
				967
				968	def FindTransfers(self):
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame]	969	"""Parse the file_map to generate all the transfers."""
				970
				971	def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
				972	split=False):
				973	"""Wrapper function for adding a Transfer().
				974
				975	For BBOTA v3, we need to stash source blocks for resumable feature.
				976	However, with the growth of file size and the shrink of the cache
				977	partition source blocks are too large to be stashed. If a file occupies
				978	too many blocks (greater than MAX_BLOCKS_PER_DIFF_TRANSFER), we split it
				979	into smaller pieces by getting multiple Transfer()s.
				980
				981	The downside is that after splitting, we can no longer use imgdiff but
				982	only bsdiff."""
				983
				984	MAX_BLOCKS_PER_DIFF_TRANSFER = 1024
				985
				986	# We care about diff transfers only.
				987	if style != "diff" or not split:
				988	Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
				989	return
				990
				991	# Change nothing for small files.
				992	if (tgt_ranges.size() <= MAX_BLOCKS_PER_DIFF_TRANSFER and
				993	src_ranges.size() <= MAX_BLOCKS_PER_DIFF_TRANSFER):
				994	Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
				995	return
				996
				997	pieces = 0
				998	while (tgt_ranges.size() > MAX_BLOCKS_PER_DIFF_TRANSFER and
				999	src_ranges.size() > MAX_BLOCKS_PER_DIFF_TRANSFER):
				1000	tgt_split_name = "%s-%d" % (tgt_name, pieces)
				1001	src_split_name = "%s-%d" % (src_name, pieces)
				1002	tgt_first = tgt_ranges.first(MAX_BLOCKS_PER_DIFF_TRANSFER)
				1003	src_first = src_ranges.first(MAX_BLOCKS_PER_DIFF_TRANSFER)
				1004	Transfer(tgt_split_name, src_split_name, tgt_first, src_first, style,
				1005	by_id)
				1006
				1007	tgt_ranges = tgt_ranges.subtract(tgt_first)
				1008	src_ranges = src_ranges.subtract(src_first)
				1009	pieces += 1
				1010
				1011	# Handle remaining blocks.
				1012	if tgt_ranges.size() or src_ranges.size():
				1013	# Must be both non-empty.
				1014	assert tgt_ranges.size() and src_ranges.size()
				1015	tgt_split_name = "%s-%d" % (tgt_name, pieces)
				1016	src_split_name = "%s-%d" % (src_name, pieces)
				1017	Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges, style,
				1018	by_id)
				1019
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	1020	empty = RangeSet()
				1021	for tgt_fn, tgt_ranges in self.tgt.file_map.items():
				1022	if tgt_fn == "__ZERO":
				1023	# the special "__ZERO" domain is all the blocks not contained
				1024	# in any file and that are filled with zeros. We have a
				1025	# special transfer style for zero blocks.
				1026	src_ranges = self.src.file_map.get("__ZERO", empty)
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame]	1027	AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
				1028	"zero", self.transfers)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	1029	continue
				1030
Tao Bao	ff77781	2015-05-12 11:42:31 -0700	[diff] [blame]	1031	elif tgt_fn == "__COPY":
				1032	# "__COPY" domain includes all the blocks not contained in any
				1033	# file and that need to be copied unconditionally to the target.
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame]	1034	AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Tao Bao	ff77781	2015-05-12 11:42:31 -0700	[diff] [blame]	1035	continue
				1036
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	1037	elif tgt_fn in self.src.file_map:
				1038	# Look for an exact pathname match in the source.
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame]	1039	AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
				1040	"diff", self.transfers, self.version >= 3)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	1041	continue
				1042
				1043	b = os.path.basename(tgt_fn)
				1044	if b in self.src_basenames:
				1045	# Look for an exact basename match in the source.
				1046	src_fn = self.src_basenames[b]
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame]	1047	AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
				1048	"diff", self.transfers, self.version >= 3)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	1049	continue
				1050
				1051	b = re.sub("[0-9]+", "#", b)
				1052	if b in self.src_numpatterns:
				1053	# Look for a 'number pattern' match (a basename match after
				1054	# all runs of digits are replaced by "#"). (This is useful
				1055	# for .so files that contain version numbers in the filename
				1056	# that get bumped.)
				1057	src_fn = self.src_numpatterns[b]
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame]	1058	AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
				1059	"diff", self.transfers, self.version >= 3)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	1060	continue
				1061
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame]	1062	AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	1063
				1064	def AbbreviateSourceNames(self):
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	1065	for k in self.src.file_map.keys():
				1066	b = os.path.basename(k)
				1067	self.src_basenames[b] = k
				1068	b = re.sub("[0-9]+", "#", b)
				1069	self.src_numpatterns[b] = k
				1070
				1071	@staticmethod
				1072	def AssertPartition(total, seq):
				1073	"""Assert that all the RangeSets in 'seq' form a partition of the
				1074	'total' RangeSet (ie, they are nonintersecting and their union
				1075	equals 'total')."""
				1076	so_far = RangeSet()
				1077	for i in seq:
				1078	assert not so_far.overlaps(i)
				1079	so_far = so_far.union(i)
				1080	assert so_far == total