Blame - tools/releasetools/blockimgdiff.py - android_build

blob: bf6a57d365eb9dfec7ebd0f96edc2fe81ba8abf2 [file] [log] [blame]

Doug Zongker	424296a	2014-09-02 08:53:09 -0700	[diff] [blame]	1	# Copyright (C) 2014 The Android Open Source Project
				2	#
				3	# Licensed under the Apache License, Version 2.0 (the "License");
				4	# you may not use this file except in compliance with the License.
				5	# You may obtain a copy of the License at
				6	#
				7	# http://www.apache.org/licenses/LICENSE-2.0
				8	#
				9	# Unless required by applicable law or agreed to in writing, software
				10	# distributed under the License is distributed on an "AS IS" BASIS,
				11	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				12	# See the License for the specific language governing permissions and
				13	# limitations under the License.
				14
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	15	from __future__ import print_function
				16
				17	from collections import deque, OrderedDict
				18	from hashlib import sha1
Tao Bao	8dcf738	2015-05-21 14:09:49 -0700	[diff] [blame]	19	import common
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	20	import heapq
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	21	import itertools
				22	import multiprocessing
				23	import os
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	24	import re
				25	import subprocess
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	26	import threading
				27	import tempfile
				28
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	29	from rangelib import RangeSet
				30
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	31
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	32	__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
				33
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	34
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	35	def compute_patch(src, tgt, imgdiff=False):
				36	srcfd, srcfile = tempfile.mkstemp(prefix="src-")
				37	tgtfd, tgtfile = tempfile.mkstemp(prefix="tgt-")
				38	patchfd, patchfile = tempfile.mkstemp(prefix="patch-")
				39	os.close(patchfd)
				40
				41	try:
				42	with os.fdopen(srcfd, "wb") as f_src:
				43	for p in src:
				44	f_src.write(p)
				45
				46	with os.fdopen(tgtfd, "wb") as f_tgt:
				47	for p in tgt:
				48	f_tgt.write(p)
				49	try:
				50	os.unlink(patchfile)
				51	except OSError:
				52	pass
				53	if imgdiff:
				54	p = subprocess.call(["imgdiff", "-z", srcfile, tgtfile, patchfile],
				55	stdout=open("/dev/null", "a"),
				56	stderr=subprocess.STDOUT)
				57	else:
				58	p = subprocess.call(["bsdiff", srcfile, tgtfile, patchfile])
				59
				60	if p:
				61	raise ValueError("diff failed: " + str(p))
				62
				63	with open(patchfile, "rb") as f:
				64	return f.read()
				65	finally:
				66	try:
				67	os.unlink(srcfile)
				68	os.unlink(tgtfile)
				69	os.unlink(patchfile)
				70	except OSError:
				71	pass
				72
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	73
				74	class Image(object):
				75	def ReadRangeSet(self, ranges):
				76	raise NotImplementedError
				77
Tao Bao	68658c0	2015-06-01 13:40:49 -0700	[diff] [blame]	78	def TotalSha1(self, include_clobbered_blocks=False):
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	79	raise NotImplementedError
				80
				81
				82	class EmptyImage(Image):
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	83	"""A zero-length image."""
				84	blocksize = 4096
				85	care_map = RangeSet()
Tao Bao	ff77781	2015-05-12 11:42:31 -0700	[diff] [blame]	86	clobbered_blocks = RangeSet()
Tao Bao	e9b6191	2015-07-09 17:37:49 -0700	[diff] [blame]	87	extended = RangeSet()
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	88	total_blocks = 0
				89	file_map = {}
				90	def ReadRangeSet(self, ranges):
				91	return ()
Tao Bao	68658c0	2015-06-01 13:40:49 -0700	[diff] [blame]	92	def TotalSha1(self, include_clobbered_blocks=False):
				93	# EmptyImage always carries empty clobbered_blocks, so
				94	# include_clobbered_blocks can be ignored.
				95	assert self.clobbered_blocks.size() == 0
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	96	return sha1().hexdigest()
				97
				98
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	99	class DataImage(Image):
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	100	"""An image wrapped around a single string of data."""
				101
				102	def __init__(self, data, trim=False, pad=False):
				103	self.data = data
				104	self.blocksize = 4096
				105
				106	assert not (trim and pad)
				107
				108	partial = len(self.data) % self.blocksize
Tao Bao	7589e96	2015-09-05 20:35:32 -0700	[diff] [blame]	109	padded = False
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	110	if partial > 0:
				111	if trim:
				112	self.data = self.data[:-partial]
				113	elif pad:
				114	self.data += '\0' * (self.blocksize - partial)
Tao Bao	7589e96	2015-09-05 20:35:32 -0700	[diff] [blame]	115	padded = True
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	116	else:
				117	raise ValueError(("data for DataImage must be multiple of %d bytes "
				118	"unless trim or pad is specified") %
				119	(self.blocksize,))
				120
				121	assert len(self.data) % self.blocksize == 0
				122
				123	self.total_blocks = len(self.data) / self.blocksize
				124	self.care_map = RangeSet(data=(0, self.total_blocks))
Tao Bao	7589e96	2015-09-05 20:35:32 -0700	[diff] [blame]	125	# When the last block is padded, we always write the whole block even for
				126	# incremental OTAs. Because otherwise the last block may get skipped if
				127	# unchanged for an incremental, but would fail the post-install
				128	# verification if it has non-zero contents in the padding bytes.
				129	# Bug: 23828506
				130	if padded:
Tao Bao	42206c3	2015-09-08 13:39:40 -0700	[diff] [blame]	131	clobbered_blocks = [self.total_blocks-1, self.total_blocks]
Tao Bao	7589e96	2015-09-05 20:35:32 -0700	[diff] [blame]	132	else:
Tao Bao	42206c3	2015-09-08 13:39:40 -0700	[diff] [blame]	133	clobbered_blocks = []
				134	self.clobbered_blocks = clobbered_blocks
Tao Bao	e9b6191	2015-07-09 17:37:49 -0700	[diff] [blame]	135	self.extended = RangeSet()
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	136
				137	zero_blocks = []
				138	nonzero_blocks = []
				139	reference = '\0' * self.blocksize
				140
Tao Bao	7589e96	2015-09-05 20:35:32 -0700	[diff] [blame]	141	for i in range(self.total_blocks-1 if padded else self.total_blocks):
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	142	d = self.data[iself.blocksize : (i+1)self.blocksize]
				143	if d == reference:
				144	zero_blocks.append(i)
				145	zero_blocks.append(i+1)
				146	else:
				147	nonzero_blocks.append(i)
				148	nonzero_blocks.append(i+1)
				149
Tao Bao	42206c3	2015-09-08 13:39:40 -0700	[diff] [blame]	150	assert zero_blocks or nonzero_blocks or clobbered_blocks
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	151
Tao Bao	42206c3	2015-09-08 13:39:40 -0700	[diff] [blame]	152	self.file_map = dict()
				153	if zero_blocks:
				154	self.file_map["__ZERO"] = RangeSet(data=zero_blocks)
				155	if nonzero_blocks:
				156	self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)
				157	if clobbered_blocks:
				158	self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)
Tao Bao	7589e96	2015-09-05 20:35:32 -0700	[diff] [blame]	159
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	160	def ReadRangeSet(self, ranges):
				161	return [self.data[sself.blocksize:eself.blocksize] for (s, e) in ranges]
				162
Tao Bao	68658c0	2015-06-01 13:40:49 -0700	[diff] [blame]	163	def TotalSha1(self, include_clobbered_blocks=False):
Tao Bao	7589e96	2015-09-05 20:35:32 -0700	[diff] [blame]	164	if not include_clobbered_blocks:
				165	ranges = self.care_map.subtract(self.clobbered_blocks)
				166	return sha1(self.ReadRangeSet(ranges)).hexdigest()
				167	else:
				168	return sha1(self.data).hexdigest()
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	169
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	170
				171	class Transfer(object):
				172	def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, style, by_id):
				173	self.tgt_name = tgt_name
				174	self.src_name = src_name
				175	self.tgt_ranges = tgt_ranges
				176	self.src_ranges = src_ranges
				177	self.style = style
				178	self.intact = (getattr(tgt_ranges, "monotonic", False) and
				179	getattr(src_ranges, "monotonic", False))
Tao Bao	b8c8717	2015-03-19 19:42:12 -0700	[diff] [blame]	180
				181	# We use OrderedDict rather than dict so that the output is repeatable;
				182	# otherwise it would depend on the hash values of the Transfer objects.
				183	self.goes_before = OrderedDict()
				184	self.goes_after = OrderedDict()
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	185
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	186	self.stash_before = []
				187	self.use_stash = []
				188
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	189	self.id = len(by_id)
				190	by_id.append(self)
				191
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	192	def NetStashChange(self):
				193	return (sum(sr.size() for (_, sr) in self.stash_before) -
				194	sum(sr.size() for (_, sr) in self.use_stash))
				195
Tao Bao	82c4798	2015-08-17 09:45:13 -0700	[diff] [blame]	196	def ConvertToNew(self):
				197	assert self.style != "new"
				198	self.use_stash = []
				199	self.style = "new"
				200	self.src_ranges = RangeSet()
				201
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	202	def __str__(self):
				203	return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
				204	" to " + str(self.tgt_ranges) + ">")
				205
				206
				207	# BlockImageDiff works on two image objects. An image object is
				208	# anything that provides the following attributes:
				209	#
				210	# blocksize: the size in bytes of a block, currently must be 4096.
				211	#
				212	# total_blocks: the total size of the partition/image, in blocks.
				213	#
				214	# care_map: a RangeSet containing which blocks (in the range [0,
				215	# total_blocks) we actually care about; i.e. which blocks contain
				216	# data.
				217	#
				218	# file_map: a dict that partitions the blocks contained in care_map
				219	# into smaller domains that are useful for doing diffs on.
				220	# (Typically a domain is a file, and the key in file_map is the
				221	# pathname.)
				222	#
Tao Bao	ff77781	2015-05-12 11:42:31 -0700	[diff] [blame]	223	# clobbered_blocks: a RangeSet containing which blocks contain data
				224	# but may be altered by the FS. They need to be excluded when
				225	# verifying the partition integrity.
				226	#
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	227	# ReadRangeSet(): a function that takes a RangeSet and returns the
				228	# data contained in the image blocks of that RangeSet. The data
				229	# is returned as a list or tuple of strings; concatenating the
				230	# elements together should produce the requested data.
				231	# Implementations are free to break up the data into list/tuple
				232	# elements in any way that is convenient.
				233	#
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	234	# TotalSha1(): a function that returns (as a hex string) the SHA-1
				235	# hash of all the data in the image (ie, all the blocks in the
Tao Bao	68658c0	2015-06-01 13:40:49 -0700	[diff] [blame]	236	# care_map minus clobbered_blocks, or including the clobbered
				237	# blocks if include_clobbered_blocks is True).
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	238	#
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	239	# When creating a BlockImageDiff, the src image may be None, in which
				240	# case the list of transfers produced will never read from the
				241	# original image.
				242
				243	class BlockImageDiff(object):
Tao Bao	eba409c	2015-10-21 13:30:43 -0700	[diff] [blame]	244	def __init__(self, tgt, src=None, threads=None, version=4):
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	245	if threads is None:
				246	threads = multiprocessing.cpu_count() // 2
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	247	if threads == 0:
				248	threads = 1
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	249	self.threads = threads
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	250	self.version = version
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	251	self.transfers = []
				252	self.src_basenames = {}
				253	self.src_numpatterns = {}
Tao Bao	d8d14be	2016-02-04 14:26:02 -0800	[diff] [blame^]	254	self._max_stashed_size = 0
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	255
Tao Bao	eba409c	2015-10-21 13:30:43 -0700	[diff] [blame]	256	assert version in (1, 2, 3, 4)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	257
				258	self.tgt = tgt
				259	if src is None:
				260	src = EmptyImage()
				261	self.src = src
				262
				263	# The updater code that installs the patch always uses 4k blocks.
				264	assert tgt.blocksize == 4096
				265	assert src.blocksize == 4096
				266
				267	# The range sets in each filemap should comprise a partition of
				268	# the care map.
				269	self.AssertPartition(src.care_map, src.file_map.values())
				270	self.AssertPartition(tgt.care_map, tgt.file_map.values())
				271
Tao Bao	d8d14be	2016-02-04 14:26:02 -0800	[diff] [blame^]	272	@property
				273	def max_stashed_size(self):
				274	return self._max_stashed_size
				275
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	276	def Compute(self, prefix):
				277	# When looking for a source file to use as the diff input for a
				278	# target file, we try:
				279	# 1) an exact path match if available, otherwise
				280	# 2) a exact basename match if available, otherwise
				281	# 3) a basename match after all runs of digits are replaced by
				282	# "#" if available, otherwise
				283	# 4) we have no source for this target.
				284	self.AbbreviateSourceNames()
				285	self.FindTransfers()
				286
				287	# Find the ordering dependencies among transfers (this is O(n^2)
				288	# in the number of transfers).
				289	self.GenerateDigraph()
				290	# Find a sequence of transfers that satisfies as many ordering
				291	# dependencies as possible (heuristically).
				292	self.FindVertexSequence()
				293	# Fix up the ordering dependencies that the sequence didn't
				294	# satisfy.
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	295	if self.version == 1:
				296	self.RemoveBackwardEdges()
				297	else:
				298	self.ReverseBackwardEdges()
				299	self.ImproveVertexSequence()
				300
Tao Bao	82c4798	2015-08-17 09:45:13 -0700	[diff] [blame]	301	# Ensure the runtime stash size is under the limit.
				302	if self.version >= 2 and common.OPTIONS.cache_size is not None:
				303	self.ReviseStashSize()
				304
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	305	# Double-check our work.
				306	self.AssertSequenceGood()
				307
				308	self.ComputePatches(prefix)
				309	self.WriteTransfers(prefix)
				310
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	311	def HashBlocks(self, source, ranges): # pylint: disable=no-self-use
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	312	data = source.ReadRangeSet(ranges)
				313	ctx = sha1()
				314
				315	for p in data:
				316	ctx.update(p)
				317
				318	return ctx.hexdigest()
				319
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	320	def WriteTransfers(self, prefix):
				321	out = []
				322
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	323	total = 0
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	324
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	325	stashes = {}
				326	stashed_blocks = 0
				327	max_stashed_blocks = 0
				328
				329	free_stash_ids = []
				330	next_stash_id = 0
				331
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	332	for xf in self.transfers:
				333
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	334	if self.version < 2:
				335	assert not xf.stash_before
				336	assert not xf.use_stash
				337
				338	for s, sr in xf.stash_before:
				339	assert s not in stashes
				340	if free_stash_ids:
				341	sid = heapq.heappop(free_stash_ids)
				342	else:
				343	sid = next_stash_id
				344	next_stash_id += 1
				345	stashes[s] = sid
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	346	if self.version == 2:
caozhiyuan	21b37d8	2015-10-21 15:14:03 +0800	[diff] [blame]	347	stashed_blocks += sr.size()
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	348	out.append("stash %d %s\n" % (sid, sr.to_string_raw()))
				349	else:
				350	sh = self.HashBlocks(self.src, sr)
				351	if sh in stashes:
				352	stashes[sh] += 1
				353	else:
				354	stashes[sh] = 1
caozhiyuan	21b37d8	2015-10-21 15:14:03 +0800	[diff] [blame]	355	stashed_blocks += sr.size()
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	356	out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	357
				358	if stashed_blocks > max_stashed_blocks:
				359	max_stashed_blocks = stashed_blocks
				360
Jesse Zhao	7b985f6	2015-03-02 16:53:08 -0800	[diff] [blame]	361	free_string = []
caozhiyuan	21b37d8	2015-10-21 15:14:03 +0800	[diff] [blame]	362	free_size = 0
Jesse Zhao	7b985f6	2015-03-02 16:53:08 -0800	[diff] [blame]	363
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	364	if self.version == 1:
Tao Bao	4fcb77e	2015-10-21 13:36:01 -0700	[diff] [blame]	365	src_str = xf.src_ranges.to_string_raw() if xf.src_ranges else ""
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	366	elif self.version >= 2:
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	367
				368	# <# blocks> <src ranges>
				369	# OR
				370	# <# blocks> <src ranges> <src locs> <stash refs...>
				371	# OR
				372	# <# blocks> - <stash refs...>
				373
				374	size = xf.src_ranges.size()
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	375	src_str = [str(size)]
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	376
				377	unstashed_src_ranges = xf.src_ranges
				378	mapped_stashes = []
				379	for s, sr in xf.use_stash:
				380	sid = stashes.pop(s)
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	381	unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	382	sh = self.HashBlocks(self.src, sr)
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	383	sr = xf.src_ranges.map_within(sr)
				384	mapped_stashes.append(sr)
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	385	if self.version == 2:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	386	src_str.append("%d:%s" % (sid, sr.to_string_raw()))
Tao Bao	bb625d2	2015-08-13 14:44:15 -0700	[diff] [blame]	387	# A stash will be used only once. We need to free the stash
				388	# immediately after the use, instead of waiting for the automatic
				389	# clean-up at the end. Because otherwise it may take up extra space
				390	# and lead to OTA failures.
				391	# Bug: 23119955
				392	free_string.append("free %d\n" % (sid,))
caozhiyuan	21b37d8	2015-10-21 15:14:03 +0800	[diff] [blame]	393	free_size += sr.size()
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	394	else:
				395	assert sh in stashes
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	396	src_str.append("%s:%s" % (sh, sr.to_string_raw()))
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	397	stashes[sh] -= 1
				398	if stashes[sh] == 0:
caozhiyuan	21b37d8	2015-10-21 15:14:03 +0800	[diff] [blame]	399	free_size += sr.size()
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	400	free_string.append("free %s\n" % (sh))
				401	stashes.pop(sh)
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	402	heapq.heappush(free_stash_ids, sid)
				403
				404	if unstashed_src_ranges:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	405	src_str.insert(1, unstashed_src_ranges.to_string_raw())
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	406	if xf.use_stash:
				407	mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	408	src_str.insert(2, mapped_unstashed.to_string_raw())
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	409	mapped_stashes.append(mapped_unstashed)
				410	self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
				411	else:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	412	src_str.insert(1, "-")
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	413	self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
				414
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	415	src_str = " ".join(src_str)
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	416
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	417	# all versions:
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	418	# zero <rangeset>
				419	# new <rangeset>
				420	# erase <rangeset>
				421	#
				422	# version 1:
				423	# bsdiff patchstart patchlen <src rangeset> <tgt rangeset>
				424	# imgdiff patchstart patchlen <src rangeset> <tgt rangeset>
				425	# move <src rangeset> <tgt rangeset>
				426	#
				427	# version 2:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	428	# bsdiff patchstart patchlen <tgt rangeset> <src_str>
				429	# imgdiff patchstart patchlen <tgt rangeset> <src_str>
				430	# move <tgt rangeset> <src_str>
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	431	#
				432	# version 3:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	433	# bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
				434	# imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
				435	# move hash <tgt rangeset> <src_str>
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	436
				437	tgt_size = xf.tgt_ranges.size()
				438
				439	if xf.style == "new":
				440	assert xf.tgt_ranges
				441	out.append("%s %s\n" % (xf.style, xf.tgt_ranges.to_string_raw()))
				442	total += tgt_size
				443	elif xf.style == "move":
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	444	assert xf.tgt_ranges
				445	assert xf.src_ranges.size() == tgt_size
				446	if xf.src_ranges != xf.tgt_ranges:
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	447	if self.version == 1:
				448	out.append("%s %s %s\n" % (
				449	xf.style,
				450	xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))
				451	elif self.version == 2:
				452	out.append("%s %s %s\n" % (
				453	xf.style,
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	454	xf.tgt_ranges.to_string_raw(), src_str))
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	455	elif self.version >= 3:
Sami Tolvanen	29f529f	2015-04-17 16:28:08 +0100	[diff] [blame]	456	# take into account automatic stashing of overlapping blocks
				457	if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Bao	e9b6191	2015-07-09 17:37:49 -0700	[diff] [blame]	458	temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen	29f529f	2015-04-17 16:28:08 +0100	[diff] [blame]	459	if temp_stash_usage > max_stashed_blocks:
				460	max_stashed_blocks = temp_stash_usage
				461
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	462	out.append("%s %s %s %s\n" % (
				463	xf.style,
				464	self.HashBlocks(self.tgt, xf.tgt_ranges),
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	465	xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	466	total += tgt_size
				467	elif xf.style in ("bsdiff", "imgdiff"):
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	468	assert xf.tgt_ranges
				469	assert xf.src_ranges
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	470	if self.version == 1:
				471	out.append("%s %d %d %s %s\n" % (
				472	xf.style, xf.patch_start, xf.patch_len,
				473	xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))
				474	elif self.version == 2:
				475	out.append("%s %d %d %s %s\n" % (
				476	xf.style, xf.patch_start, xf.patch_len,
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	477	xf.tgt_ranges.to_string_raw(), src_str))
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	478	elif self.version >= 3:
Sami Tolvanen	29f529f	2015-04-17 16:28:08 +0100	[diff] [blame]	479	# take into account automatic stashing of overlapping blocks
				480	if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Bao	e9b6191	2015-07-09 17:37:49 -0700	[diff] [blame]	481	temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen	29f529f	2015-04-17 16:28:08 +0100	[diff] [blame]	482	if temp_stash_usage > max_stashed_blocks:
				483	max_stashed_blocks = temp_stash_usage
				484
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	485	out.append("%s %d %d %s %s %s %s\n" % (
				486	xf.style,
				487	xf.patch_start, xf.patch_len,
				488	self.HashBlocks(self.src, xf.src_ranges),
				489	self.HashBlocks(self.tgt, xf.tgt_ranges),
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	490	xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	491	total += tgt_size
				492	elif xf.style == "zero":
				493	assert xf.tgt_ranges
				494	to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
				495	if to_zero:
				496	out.append("%s %s\n" % (xf.style, to_zero.to_string_raw()))
				497	total += to_zero.size()
				498	else:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	499	raise ValueError("unknown transfer style '%s'\n" % xf.style)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	500
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	501	if free_string:
				502	out.append("".join(free_string))
caozhiyuan	21b37d8	2015-10-21 15:14:03 +0800	[diff] [blame]	503	stashed_blocks -= free_size
Sami Tolvanen	dd67a29	2014-12-09 16:40:34 +0000	[diff] [blame]	504
Tao Bao	575d68a	2015-08-07 19:49:45 -0700	[diff] [blame]	505	if self.version >= 2 and common.OPTIONS.cache_size is not None:
Tao Bao	8dcf738	2015-05-21 14:09:49 -0700	[diff] [blame]	506	# Sanity check: abort if we're going to need more stash space than
				507	# the allowed size (cache_size * threshold). There are two purposes
				508	# of having a threshold here. a) Part of the cache may have been
				509	# occupied by some recovery logs. b) It will buy us some time to deal
				510	# with the oversize issue.
				511	cache_size = common.OPTIONS.cache_size
				512	stash_threshold = common.OPTIONS.stash_threshold
				513	max_allowed = cache_size * stash_threshold
				514	assert max_stashed_blocks * self.tgt.blocksize < max_allowed, \
				515	'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
				516	max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
				517	self.tgt.blocksize, max_allowed, cache_size,
				518	stash_threshold)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	519
Tao Bao	e9b6191	2015-07-09 17:37:49 -0700	[diff] [blame]	520	# Zero out extended blocks as a workaround for bug 20881595.
				521	if self.tgt.extended:
				522	out.append("zero %s\n" % (self.tgt.extended.to_string_raw(),))
Tao Bao	b32d56e	2015-09-09 11:55:01 -0700	[diff] [blame]	523	total += self.tgt.extended.size()
Tao Bao	e9b6191	2015-07-09 17:37:49 -0700	[diff] [blame]	524
				525	# We erase all the blocks on the partition that a) don't contain useful
				526	# data in the new image and b) will not be touched by dm-verity.
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	527	all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
Tao Bao	e9b6191	2015-07-09 17:37:49 -0700	[diff] [blame]	528	all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
				529	new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
				530	if new_dontcare:
				531	out.append("erase %s\n" % (new_dontcare.to_string_raw(),))
Doug Zongker	e985f6f	2014-09-09 12:38:47 -0700	[diff] [blame]	532
				533	out.insert(0, "%d\n" % (self.version,)) # format version number
Tao Bao	b32d56e	2015-09-09 11:55:01 -0700	[diff] [blame]	534	out.insert(1, "%d\n" % (total,))
Doug Zongker	e985f6f	2014-09-09 12:38:47 -0700	[diff] [blame]	535	if self.version >= 2:
				536	# version 2 only: after the total block count, we give the number
				537	# of stash slots needed, and the maximum size needed (in blocks)
				538	out.insert(2, str(next_stash_id) + "\n")
				539	out.insert(3, str(max_stashed_blocks) + "\n")
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	540
				541	with open(prefix + ".transfer.list", "wb") as f:
				542	for i in out:
				543	f.write(i)
				544
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	545	if self.version >= 2:
Tao Bao	d8d14be	2016-02-04 14:26:02 -0800	[diff] [blame^]	546	self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize
Tao Bao	575d68a	2015-08-07 19:49:45 -0700	[diff] [blame]	547	OPTIONS = common.OPTIONS
				548	if OPTIONS.cache_size is not None:
				549	max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
				550	print("max stashed blocks: %d (%d bytes), "
				551	"limit: %d bytes (%.2f%%)\n" % (
Tao Bao	d8d14be	2016-02-04 14:26:02 -0800	[diff] [blame^]	552	max_stashed_blocks, self._max_stashed_size, max_allowed,
				553	self._max_stashed_size * 100.0 / max_allowed))
Tao Bao	575d68a	2015-08-07 19:49:45 -0700	[diff] [blame]	554	else:
				555	print("max stashed blocks: %d (%d bytes), limit: <unknown>\n" % (
Tao Bao	d8d14be	2016-02-04 14:26:02 -0800	[diff] [blame^]	556	max_stashed_blocks, self._max_stashed_size))
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	557
Tao Bao	82c4798	2015-08-17 09:45:13 -0700	[diff] [blame]	558	def ReviseStashSize(self):
				559	print("Revising stash size...")
				560	stashes = {}
				561
				562	# Create the map between a stash and its def/use points. For example, for a
				563	# given stash of (idx, sr), stashes[idx] = (sr, def_cmd, use_cmd).
				564	for xf in self.transfers:
				565	# Command xf defines (stores) all the stashes in stash_before.
				566	for idx, sr in xf.stash_before:
				567	stashes[idx] = (sr, xf)
				568
				569	# Record all the stashes command xf uses.
				570	for idx, _ in xf.use_stash:
				571	stashes[idx] += (xf,)
				572
				573	# Compute the maximum blocks available for stash based on /cache size and
				574	# the threshold.
				575	cache_size = common.OPTIONS.cache_size
				576	stash_threshold = common.OPTIONS.stash_threshold
				577	max_allowed = cache_size * stash_threshold / self.tgt.blocksize
				578
				579	stashed_blocks = 0
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame]	580	new_blocks = 0
Tao Bao	82c4798	2015-08-17 09:45:13 -0700	[diff] [blame]	581
				582	# Now go through all the commands. Compute the required stash size on the
				583	# fly. If a command requires excess stash than available, it deletes the
				584	# stash by replacing the command that uses the stash with a "new" command
				585	# instead.
				586	for xf in self.transfers:
				587	replaced_cmds = []
				588
				589	# xf.stash_before generates explicit stash commands.
				590	for idx, sr in xf.stash_before:
				591	if stashed_blocks + sr.size() > max_allowed:
				592	# We cannot stash this one for a later command. Find out the command
				593	# that will use this stash and replace the command with "new".
				594	use_cmd = stashes[idx][2]
				595	replaced_cmds.append(use_cmd)
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame]	596	print("%10d %9s %s" % (sr.size(), "explicit", use_cmd))
Tao Bao	82c4798	2015-08-17 09:45:13 -0700	[diff] [blame]	597	else:
				598	stashed_blocks += sr.size()
				599
				600	# xf.use_stash generates free commands.
				601	for _, sr in xf.use_stash:
				602	stashed_blocks -= sr.size()
				603
				604	# "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
				605	# ComputePatches(), they both have the style of "diff".
				606	if xf.style == "diff" and self.version >= 3:
				607	assert xf.tgt_ranges and xf.src_ranges
				608	if xf.src_ranges.overlaps(xf.tgt_ranges):
				609	if stashed_blocks + xf.src_ranges.size() > max_allowed:
				610	replaced_cmds.append(xf)
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame]	611	print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf))
Tao Bao	82c4798	2015-08-17 09:45:13 -0700	[diff] [blame]	612
				613	# Replace the commands in replaced_cmds with "new"s.
				614	for cmd in replaced_cmds:
				615	# It no longer uses any commands in "use_stash". Remove the def points
				616	# for all those stashes.
				617	for idx, sr in cmd.use_stash:
				618	def_cmd = stashes[idx][1]
				619	assert (idx, sr) in def_cmd.stash_before
				620	def_cmd.stash_before.remove((idx, sr))
				621
Tianjie Xu	ebe39a0	2016-01-14 14:12:26 -0800	[diff] [blame]	622	# Add up blocks that violates space limit and print total number to
				623	# screen later.
				624	new_blocks += cmd.tgt_ranges.size()
Tao Bao	82c4798	2015-08-17 09:45:13 -0700	[diff] [blame]	625	cmd.ConvertToNew()
				626
Tianjie Xu	ebe39a0	2016-01-14 14:12:26 -0800	[diff] [blame]	627	num_of_bytes = new_blocks * self.tgt.blocksize
				628	print(" Total %d blocks (%d bytes) are packed as new blocks due to "
				629	"insufficient cache size." % (new_blocks, num_of_bytes))
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame]	630
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	631	def ComputePatches(self, prefix):
				632	print("Reticulating splines...")
				633	diff_q = []
				634	patch_num = 0
				635	with open(prefix + ".new.dat", "wb") as new_f:
				636	for xf in self.transfers:
				637	if xf.style == "zero":
				638	pass
				639	elif xf.style == "new":
				640	for piece in self.tgt.ReadRangeSet(xf.tgt_ranges):
				641	new_f.write(piece)
				642	elif xf.style == "diff":
				643	src = self.src.ReadRangeSet(xf.src_ranges)
				644	tgt = self.tgt.ReadRangeSet(xf.tgt_ranges)
				645
				646	# We can't compare src and tgt directly because they may have
				647	# the same content but be broken up into blocks differently, eg:
				648	#
				649	# ["he", "llo"] vs ["h", "ello"]
				650	#
				651	# We want those to compare equal, ideally without having to
				652	# actually concatenate the strings (these may be tens of
				653	# megabytes).
				654
				655	src_sha1 = sha1()
				656	for p in src:
				657	src_sha1.update(p)
				658	tgt_sha1 = sha1()
				659	tgt_size = 0
				660	for p in tgt:
				661	tgt_sha1.update(p)
				662	tgt_size += len(p)
				663
				664	if src_sha1.digest() == tgt_sha1.digest():
				665	# These are identical; we don't need to generate a patch,
				666	# just issue copy commands on the device.
				667	xf.style = "move"
				668	else:
				669	# For files in zip format (eg, APKs, JARs, etc.) we would
				670	# like to use imgdiff -z if possible (because it usually
				671	# produces significantly smaller patches than bsdiff).
				672	# This is permissible if:
				673	#
				674	# - the source and target files are monotonic (ie, the
				675	# data is stored with blocks in increasing order), and
				676	# - we haven't removed any blocks from the source set.
				677	#
				678	# If these conditions are satisfied then appending all the
				679	# blocks in the set together in order will produce a valid
				680	# zip file (plus possibly extra zeros in the last block),
				681	# which is what imgdiff needs to operate. (imgdiff is
				682	# fine with extra zeros at the end of the file.)
				683	imgdiff = (xf.intact and
				684	xf.tgt_name.split(".")[-1].lower()
				685	in ("apk", "jar", "zip"))
				686	xf.style = "imgdiff" if imgdiff else "bsdiff"
				687	diff_q.append((tgt_size, src, tgt, xf, patch_num))
				688	patch_num += 1
				689
				690	else:
				691	assert False, "unknown style " + xf.style
				692
				693	if diff_q:
				694	if self.threads > 1:
				695	print("Computing patches (using %d threads)..." % (self.threads,))
				696	else:
				697	print("Computing patches...")
				698	diff_q.sort()
				699
				700	patches = [None] * patch_num
				701
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	702	# TODO: Rewrite with multiprocessing.ThreadPool?
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	703	lock = threading.Lock()
				704	def diff_worker():
				705	while True:
				706	with lock:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	707	if not diff_q:
				708	return
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	709	tgt_size, src, tgt, xf, patchnum = diff_q.pop()
				710	patch = compute_patch(src, tgt, imgdiff=(xf.style == "imgdiff"))
				711	size = len(patch)
				712	with lock:
				713	patches[patchnum] = (patch, xf)
				714	print("%10d %10d (%6.2f%%) %7s %s" % (
				715	size, tgt_size, size * 100.0 / tgt_size, xf.style,
				716	xf.tgt_name if xf.tgt_name == xf.src_name else (
				717	xf.tgt_name + " (from " + xf.src_name + ")")))
				718
				719	threads = [threading.Thread(target=diff_worker)
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	720	for _ in range(self.threads)]
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	721	for th in threads:
				722	th.start()
				723	while threads:
				724	threads.pop().join()
				725	else:
				726	patches = []
				727
				728	p = 0
				729	with open(prefix + ".patch.dat", "wb") as patch_f:
				730	for patch, xf in patches:
				731	xf.patch_start = p
				732	xf.patch_len = len(patch)
				733	patch_f.write(patch)
				734	p += len(patch)
				735
				736	def AssertSequenceGood(self):
				737	# Simulate the sequences of transfers we will output, and check that:
				738	# - we never read a block after writing it, and
				739	# - we write every block we care about exactly once.
				740
				741	# Start with no blocks having been touched yet.
				742	touched = RangeSet()
				743
				744	# Imagine processing the transfers in order.
				745	for xf in self.transfers:
				746	# Check that the input blocks for this transfer haven't yet been touched.
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	747
				748	x = xf.src_ranges
				749	if self.version >= 2:
				750	for _, sr in xf.use_stash:
				751	x = x.subtract(sr)
				752
				753	assert not touched.overlaps(x)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	754	# Check that the output blocks for this transfer haven't yet been touched.
				755	assert not touched.overlaps(xf.tgt_ranges)
				756	# Touch all the blocks written by this transfer.
				757	touched = touched.union(xf.tgt_ranges)
				758
				759	# Check that we've written every target block.
				760	assert touched == self.tgt.care_map
				761
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	762	def ImproveVertexSequence(self):
				763	print("Improving vertex order...")
				764
				765	# At this point our digraph is acyclic; we reversed any edges that
				766	# were backwards in the heuristically-generated sequence. The
				767	# previously-generated order is still acceptable, but we hope to
				768	# find a better order that needs less memory for stashed data.
				769	# Now we do a topological sort to generate a new vertex order,
				770	# using a greedy algorithm to choose which vertex goes next
				771	# whenever we have a choice.
				772
				773	# Make a copy of the edge set; this copy will get destroyed by the
				774	# algorithm.
				775	for xf in self.transfers:
				776	xf.incoming = xf.goes_after.copy()
				777	xf.outgoing = xf.goes_before.copy()
				778
				779	L = [] # the new vertex order
				780
				781	# S is the set of sources in the remaining graph; we always choose
				782	# the one that leaves the least amount of stashed data after it's
				783	# executed.
				784	S = [(u.NetStashChange(), u.order, u) for u in self.transfers
				785	if not u.incoming]
				786	heapq.heapify(S)
				787
				788	while S:
				789	_, _, xf = heapq.heappop(S)
				790	L.append(xf)
				791	for u in xf.outgoing:
				792	del u.incoming[xf]
				793	if not u.incoming:
				794	heapq.heappush(S, (u.NetStashChange(), u.order, u))
				795
				796	# if this fails then our graph had a cycle.
				797	assert len(L) == len(self.transfers)
				798
				799	self.transfers = L
				800	for i, xf in enumerate(L):
				801	xf.order = i
				802
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	803	def RemoveBackwardEdges(self):
				804	print("Removing backward edges...")
				805	in_order = 0
				806	out_of_order = 0
				807	lost_source = 0
				808
				809	for xf in self.transfers:
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	810	lost = 0
				811	size = xf.src_ranges.size()
				812	for u in xf.goes_before:
				813	# xf should go before u
				814	if xf.order < u.order:
				815	# it does, hurray!
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	816	in_order += 1
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	817	else:
				818	# it doesn't, boo. trim the blocks that u writes from xf's
				819	# source, so that xf can go after u.
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	820	out_of_order += 1
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	821	assert xf.src_ranges.overlaps(u.tgt_ranges)
				822	xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges)
				823	xf.intact = False
				824
				825	if xf.style == "diff" and not xf.src_ranges:
				826	# nothing left to diff from; treat as new data
				827	xf.style = "new"
				828
				829	lost = size - xf.src_ranges.size()
				830	lost_source += lost
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	831
				832	print((" %d/%d dependencies (%.2f%%) were violated; "
				833	"%d source blocks removed.") %
				834	(out_of_order, in_order + out_of_order,
				835	(out_of_order * 100.0 / (in_order + out_of_order))
				836	if (in_order + out_of_order) else 0.0,
				837	lost_source))
				838
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	839	def ReverseBackwardEdges(self):
				840	print("Reversing backward edges...")
				841	in_order = 0
				842	out_of_order = 0
				843	stashes = 0
				844	stash_size = 0
				845
				846	for xf in self.transfers:
Doug Zongker	6233818	2014-09-08 08:29:55 -0700	[diff] [blame]	847	for u in xf.goes_before.copy():
				848	# xf should go before u
				849	if xf.order < u.order:
				850	# it does, hurray!
				851	in_order += 1
				852	else:
				853	# it doesn't, boo. modify u to stash the blocks that it
				854	# writes that xf wants to read, and then require u to go
				855	# before xf.
				856	out_of_order += 1
				857
				858	overlap = xf.src_ranges.intersect(u.tgt_ranges)
				859	assert overlap
				860
				861	u.stash_before.append((stashes, overlap))
				862	xf.use_stash.append((stashes, overlap))
				863	stashes += 1
				864	stash_size += overlap.size()
				865
				866	# reverse the edge direction; now xf must go after u
				867	del xf.goes_before[u]
				868	del u.goes_after[xf]
				869	xf.goes_after[u] = None # value doesn't matter
				870	u.goes_before[xf] = None
				871
				872	print((" %d/%d dependencies (%.2f%%) were violated; "
				873	"%d source blocks stashed.") %
				874	(out_of_order, in_order + out_of_order,
				875	(out_of_order * 100.0 / (in_order + out_of_order))
				876	if (in_order + out_of_order) else 0.0,
				877	stash_size))
				878
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	879	def FindVertexSequence(self):
				880	print("Finding vertex sequence...")
				881
				882	# This is based on "A Fast & Effective Heuristic for the Feedback
				883	# Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
				884	# it as starting with the digraph G and moving all the vertices to
				885	# be on a horizontal line in some order, trying to minimize the
				886	# number of edges that end up pointing to the left. Left-pointing
				887	# edges will get removed to turn the digraph into a DAG. In this
				888	# case each edge has a weight which is the number of source blocks
				889	# we'll lose if that edge is removed; we try to minimize the total
				890	# weight rather than just the number of edges.
				891
				892	# Make a copy of the edge set; this copy will get destroyed by the
				893	# algorithm.
				894	for xf in self.transfers:
				895	xf.incoming = xf.goes_after.copy()
				896	xf.outgoing = xf.goes_before.copy()
				897
				898	# We use an OrderedDict instead of just a set so that the output
				899	# is repeatable; otherwise it would depend on the hash values of
				900	# the transfer objects.
				901	G = OrderedDict()
				902	for xf in self.transfers:
				903	G[xf] = None
				904	s1 = deque() # the left side of the sequence, built from left to right
				905	s2 = deque() # the right side of the sequence, built from right to left
				906
				907	while G:
				908
				909	# Put all sinks at the end of the sequence.
				910	while True:
				911	sinks = [u for u in G if not u.outgoing]
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	912	if not sinks:
				913	break
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	914	for u in sinks:
				915	s2.appendleft(u)
				916	del G[u]
				917	for iu in u.incoming:
				918	del iu.outgoing[u]
				919
				920	# Put all the sources at the beginning of the sequence.
				921	while True:
				922	sources = [u for u in G if not u.incoming]
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	923	if not sources:
				924	break
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	925	for u in sources:
				926	s1.append(u)
				927	del G[u]
				928	for iu in u.outgoing:
				929	del iu.incoming[u]
				930
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	931	if not G:
				932	break
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	933
				934	# Find the "best" vertex to put next. "Best" is the one that
				935	# maximizes the net difference in source blocks saved we get by
				936	# pretending it's a source rather than a sink.
				937
				938	max_d = None
				939	best_u = None
				940	for u in G:
				941	d = sum(u.outgoing.values()) - sum(u.incoming.values())
				942	if best_u is None or d > max_d:
				943	max_d = d
				944	best_u = u
				945
				946	u = best_u
				947	s1.append(u)
				948	del G[u]
				949	for iu in u.outgoing:
				950	del iu.incoming[u]
				951	for iu in u.incoming:
				952	del iu.outgoing[u]
				953
				954	# Now record the sequence in the 'order' field of each transfer,
				955	# and by rearranging self.transfers to be in the chosen sequence.
				956
				957	new_transfers = []
				958	for x in itertools.chain(s1, s2):
				959	x.order = len(new_transfers)
				960	new_transfers.append(x)
				961	del x.incoming
				962	del x.outgoing
				963
				964	self.transfers = new_transfers
				965
				966	def GenerateDigraph(self):
				967	print("Generating digraph...")
				968	for a in self.transfers:
				969	for b in self.transfers:
Dan Albert	8b72aef	2015-03-23 19:13:21 -0700	[diff] [blame]	970	if a is b:
				971	continue
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	972
				973	# If the blocks written by A are read by B, then B needs to go before A.
				974	i = a.tgt_ranges.intersect(b.src_ranges)
				975	if i:
Doug Zongker	ab7ca1d	2014-08-26 10:40:28 -0700	[diff] [blame]	976	if b.src_name == "__ZERO":
				977	# the cost of removing source blocks for the __ZERO domain
				978	# is (nearly) zero.
				979	size = 0
				980	else:
				981	size = i.size()
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	982	b.goes_before[a] = size
				983	a.goes_after[b] = size
				984
				985	def FindTransfers(self):
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame]	986	"""Parse the file_map to generate all the transfers."""
				987
				988	def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
				989	split=False):
				990	"""Wrapper function for adding a Transfer().
				991
				992	For BBOTA v3, we need to stash source blocks for resumable feature.
				993	However, with the growth of file size and the shrink of the cache
				994	partition source blocks are too large to be stashed. If a file occupies
				995	too many blocks (greater than MAX_BLOCKS_PER_DIFF_TRANSFER), we split it
				996	into smaller pieces by getting multiple Transfer()s.
				997
Tianjie Xu	bb86e1d	2016-01-13 16:14:10 -0800	[diff] [blame]	998	The downside is that after splitting, we may increase the package size
				999	since the split pieces don't align well. According to our experiments,
				1000	1/8 of the cache size as the per-piece limit appears to be optimal.
				1001	Compared to the fixed 1024-block limit, it reduces the overall package
				1002	size by 30% volantis, and 20% for angler and bullhead."""
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame]	1003
				1004	# We care about diff transfers only.
				1005	if style != "diff" or not split:
				1006	Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
				1007	return
				1008
Tianjie Xu	bb86e1d	2016-01-13 16:14:10 -0800	[diff] [blame]	1009	pieces = 0
				1010	cache_size = common.OPTIONS.cache_size
				1011	split_threshold = 0.125
				1012	max_blocks_per_transfer = int(cache_size * split_threshold /
				1013	self.tgt.blocksize)
				1014
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame]	1015	# Change nothing for small files.
Tianjie Xu	bb86e1d	2016-01-13 16:14:10 -0800	[diff] [blame]	1016	if (tgt_ranges.size() <= max_blocks_per_transfer and
				1017	src_ranges.size() <= max_blocks_per_transfer):
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame]	1018	Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
				1019	return
				1020
Tianjie Xu	bb86e1d	2016-01-13 16:14:10 -0800	[diff] [blame]	1021	while (tgt_ranges.size() > max_blocks_per_transfer and
				1022	src_ranges.size() > max_blocks_per_transfer):
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame]	1023	tgt_split_name = "%s-%d" % (tgt_name, pieces)
				1024	src_split_name = "%s-%d" % (src_name, pieces)
Tianjie Xu	bb86e1d	2016-01-13 16:14:10 -0800	[diff] [blame]	1025	tgt_first = tgt_ranges.first(max_blocks_per_transfer)
				1026	src_first = src_ranges.first(max_blocks_per_transfer)
				1027
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame]	1028	Transfer(tgt_split_name, src_split_name, tgt_first, src_first, style,
				1029	by_id)
				1030
				1031	tgt_ranges = tgt_ranges.subtract(tgt_first)
				1032	src_ranges = src_ranges.subtract(src_first)
				1033	pieces += 1
				1034
				1035	# Handle remaining blocks.
				1036	if tgt_ranges.size() or src_ranges.size():
				1037	# Must be both non-empty.
				1038	assert tgt_ranges.size() and src_ranges.size()
				1039	tgt_split_name = "%s-%d" % (tgt_name, pieces)
				1040	src_split_name = "%s-%d" % (src_name, pieces)
				1041	Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges, style,
				1042	by_id)
				1043
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	1044	empty = RangeSet()
				1045	for tgt_fn, tgt_ranges in self.tgt.file_map.items():
				1046	if tgt_fn == "__ZERO":
				1047	# the special "__ZERO" domain is all the blocks not contained
				1048	# in any file and that are filled with zeros. We have a
				1049	# special transfer style for zero blocks.
				1050	src_ranges = self.src.file_map.get("__ZERO", empty)
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame]	1051	AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
				1052	"zero", self.transfers)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	1053	continue
				1054
Tao Bao	ff77781	2015-05-12 11:42:31 -0700	[diff] [blame]	1055	elif tgt_fn == "__COPY":
				1056	# "__COPY" domain includes all the blocks not contained in any
				1057	# file and that need to be copied unconditionally to the target.
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame]	1058	AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Tao Bao	ff77781	2015-05-12 11:42:31 -0700	[diff] [blame]	1059	continue
				1060
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	1061	elif tgt_fn in self.src.file_map:
				1062	# Look for an exact pathname match in the source.
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame]	1063	AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
				1064	"diff", self.transfers, self.version >= 3)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	1065	continue
				1066
				1067	b = os.path.basename(tgt_fn)
				1068	if b in self.src_basenames:
				1069	# Look for an exact basename match in the source.
				1070	src_fn = self.src_basenames[b]
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame]	1071	AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
				1072	"diff", self.transfers, self.version >= 3)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	1073	continue
				1074
				1075	b = re.sub("[0-9]+", "#", b)
				1076	if b in self.src_numpatterns:
				1077	# Look for a 'number pattern' match (a basename match after
				1078	# all runs of digits are replaced by "#"). (This is useful
				1079	# for .so files that contain version numbers in the filename
				1080	# that get bumped.)
				1081	src_fn = self.src_numpatterns[b]
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame]	1082	AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
				1083	"diff", self.transfers, self.version >= 3)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	1084	continue
				1085
Tao Bao	9a5caf2	2015-08-25 15:10:10 -0700	[diff] [blame]	1086	AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	1087
				1088	def AbbreviateSourceNames(self):
Doug Zongker	fc44a51	2014-08-26 13:10:25 -0700	[diff] [blame]	1089	for k in self.src.file_map.keys():
				1090	b = os.path.basename(k)
				1091	self.src_basenames[b] = k
				1092	b = re.sub("[0-9]+", "#", b)
				1093	self.src_numpatterns[b] = k
				1094
				1095	@staticmethod
				1096	def AssertPartition(total, seq):
				1097	"""Assert that all the RangeSets in 'seq' form a partition of the
				1098	'total' RangeSet (ie, they are nonintersecting and their union
				1099	equals 'total')."""
				1100	so_far = RangeSet()
				1101	for i in seq:
				1102	assert not so_far.overlaps(i)
				1103	so_far = so_far.union(i)
				1104	assert so_far == total