blob: 662b4ec935cc5256c7622c89e4ca061f43cb66bc [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015from __future__ import print_function
16
17from collections import deque, OrderedDict
18from hashlib import sha1
Doug Zongker6ab2a502016-02-09 08:28:09 -080019import array
Tao Bao8dcf7382015-05-21 14:09:49 -070020import common
Doug Zongker6ab2a502016-02-09 08:28:09 -080021import functools
Doug Zongker62338182014-09-08 08:29:55 -070022import heapq
Doug Zongkerfc44a512014-08-26 13:10:25 -070023import itertools
24import multiprocessing
25import os
Doug Zongkerfc44a512014-08-26 13:10:25 -070026import re
27import subprocess
Doug Zongkerfc44a512014-08-26 13:10:25 -070028import threading
Doug Zongker6ab2a502016-02-09 08:28:09 -080029import time
Doug Zongkerfc44a512014-08-26 13:10:25 -070030import tempfile
31
Dan Albert8b72aef2015-03-23 19:13:21 -070032from rangelib import RangeSet
33
Doug Zongkerfc44a512014-08-26 13:10:25 -070034
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070035__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
36
Dan Albert8b72aef2015-03-23 19:13:21 -070037
Doug Zongkerfc44a512014-08-26 13:10:25 -070038def compute_patch(src, tgt, imgdiff=False):
39 srcfd, srcfile = tempfile.mkstemp(prefix="src-")
40 tgtfd, tgtfile = tempfile.mkstemp(prefix="tgt-")
41 patchfd, patchfile = tempfile.mkstemp(prefix="patch-")
42 os.close(patchfd)
43
44 try:
45 with os.fdopen(srcfd, "wb") as f_src:
46 for p in src:
47 f_src.write(p)
48
49 with os.fdopen(tgtfd, "wb") as f_tgt:
50 for p in tgt:
51 f_tgt.write(p)
52 try:
53 os.unlink(patchfile)
54 except OSError:
55 pass
56 if imgdiff:
57 p = subprocess.call(["imgdiff", "-z", srcfile, tgtfile, patchfile],
58 stdout=open("/dev/null", "a"),
59 stderr=subprocess.STDOUT)
60 else:
61 p = subprocess.call(["bsdiff", srcfile, tgtfile, patchfile])
62
63 if p:
64 raise ValueError("diff failed: " + str(p))
65
66 with open(patchfile, "rb") as f:
67 return f.read()
68 finally:
69 try:
70 os.unlink(srcfile)
71 os.unlink(tgtfile)
72 os.unlink(patchfile)
73 except OSError:
74 pass
75
Dan Albert8b72aef2015-03-23 19:13:21 -070076
77class Image(object):
78 def ReadRangeSet(self, ranges):
79 raise NotImplementedError
80
Tao Bao68658c02015-06-01 13:40:49 -070081 def TotalSha1(self, include_clobbered_blocks=False):
Dan Albert8b72aef2015-03-23 19:13:21 -070082 raise NotImplementedError
83
84
85class EmptyImage(Image):
Doug Zongkerfc44a512014-08-26 13:10:25 -070086 """A zero-length image."""
87 blocksize = 4096
88 care_map = RangeSet()
Tao Baoff777812015-05-12 11:42:31 -070089 clobbered_blocks = RangeSet()
Tao Baoe9b61912015-07-09 17:37:49 -070090 extended = RangeSet()
Doug Zongkerfc44a512014-08-26 13:10:25 -070091 total_blocks = 0
92 file_map = {}
93 def ReadRangeSet(self, ranges):
94 return ()
Tao Bao68658c02015-06-01 13:40:49 -070095 def TotalSha1(self, include_clobbered_blocks=False):
96 # EmptyImage always carries empty clobbered_blocks, so
97 # include_clobbered_blocks can be ignored.
98 assert self.clobbered_blocks.size() == 0
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070099 return sha1().hexdigest()
100
101
Dan Albert8b72aef2015-03-23 19:13:21 -0700102class DataImage(Image):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700103 """An image wrapped around a single string of data."""
104
105 def __init__(self, data, trim=False, pad=False):
106 self.data = data
107 self.blocksize = 4096
108
109 assert not (trim and pad)
110
111 partial = len(self.data) % self.blocksize
Tao Bao7589e962015-09-05 20:35:32 -0700112 padded = False
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700113 if partial > 0:
114 if trim:
115 self.data = self.data[:-partial]
116 elif pad:
117 self.data += '\0' * (self.blocksize - partial)
Tao Bao7589e962015-09-05 20:35:32 -0700118 padded = True
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700119 else:
120 raise ValueError(("data for DataImage must be multiple of %d bytes "
121 "unless trim or pad is specified") %
122 (self.blocksize,))
123
124 assert len(self.data) % self.blocksize == 0
125
126 self.total_blocks = len(self.data) / self.blocksize
127 self.care_map = RangeSet(data=(0, self.total_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700128 # When the last block is padded, we always write the whole block even for
129 # incremental OTAs. Because otherwise the last block may get skipped if
130 # unchanged for an incremental, but would fail the post-install
131 # verification if it has non-zero contents in the padding bytes.
132 # Bug: 23828506
133 if padded:
Tao Bao42206c32015-09-08 13:39:40 -0700134 clobbered_blocks = [self.total_blocks-1, self.total_blocks]
Tao Bao7589e962015-09-05 20:35:32 -0700135 else:
Tao Bao42206c32015-09-08 13:39:40 -0700136 clobbered_blocks = []
137 self.clobbered_blocks = clobbered_blocks
Tao Baoe9b61912015-07-09 17:37:49 -0700138 self.extended = RangeSet()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700139
140 zero_blocks = []
141 nonzero_blocks = []
142 reference = '\0' * self.blocksize
143
Tao Bao7589e962015-09-05 20:35:32 -0700144 for i in range(self.total_blocks-1 if padded else self.total_blocks):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700145 d = self.data[i*self.blocksize : (i+1)*self.blocksize]
146 if d == reference:
147 zero_blocks.append(i)
148 zero_blocks.append(i+1)
149 else:
150 nonzero_blocks.append(i)
151 nonzero_blocks.append(i+1)
152
Tao Bao42206c32015-09-08 13:39:40 -0700153 assert zero_blocks or nonzero_blocks or clobbered_blocks
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700154
Tao Bao42206c32015-09-08 13:39:40 -0700155 self.file_map = dict()
156 if zero_blocks:
157 self.file_map["__ZERO"] = RangeSet(data=zero_blocks)
158 if nonzero_blocks:
159 self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)
160 if clobbered_blocks:
161 self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)
Tao Bao7589e962015-09-05 20:35:32 -0700162
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700163 def ReadRangeSet(self, ranges):
164 return [self.data[s*self.blocksize:e*self.blocksize] for (s, e) in ranges]
165
Tao Bao68658c02015-06-01 13:40:49 -0700166 def TotalSha1(self, include_clobbered_blocks=False):
Tao Bao7589e962015-09-05 20:35:32 -0700167 if not include_clobbered_blocks:
168 ranges = self.care_map.subtract(self.clobbered_blocks)
169 return sha1(self.ReadRangeSet(ranges)).hexdigest()
170 else:
171 return sha1(self.data).hexdigest()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700172
Doug Zongkerfc44a512014-08-26 13:10:25 -0700173
174class Transfer(object):
175 def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, style, by_id):
176 self.tgt_name = tgt_name
177 self.src_name = src_name
178 self.tgt_ranges = tgt_ranges
179 self.src_ranges = src_ranges
180 self.style = style
181 self.intact = (getattr(tgt_ranges, "monotonic", False) and
182 getattr(src_ranges, "monotonic", False))
Tao Baob8c87172015-03-19 19:42:12 -0700183
184 # We use OrderedDict rather than dict so that the output is repeatable;
185 # otherwise it would depend on the hash values of the Transfer objects.
186 self.goes_before = OrderedDict()
187 self.goes_after = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700188
Doug Zongker62338182014-09-08 08:29:55 -0700189 self.stash_before = []
190 self.use_stash = []
191
Doug Zongkerfc44a512014-08-26 13:10:25 -0700192 self.id = len(by_id)
193 by_id.append(self)
194
Doug Zongker62338182014-09-08 08:29:55 -0700195 def NetStashChange(self):
196 return (sum(sr.size() for (_, sr) in self.stash_before) -
197 sum(sr.size() for (_, sr) in self.use_stash))
198
Tao Bao82c47982015-08-17 09:45:13 -0700199 def ConvertToNew(self):
200 assert self.style != "new"
201 self.use_stash = []
202 self.style = "new"
203 self.src_ranges = RangeSet()
204
Doug Zongkerfc44a512014-08-26 13:10:25 -0700205 def __str__(self):
206 return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
207 " to " + str(self.tgt_ranges) + ">")
208
209
Doug Zongker6ab2a502016-02-09 08:28:09 -0800210@functools.total_ordering
211class HeapItem(object):
212 def __init__(self, item):
213 self.item = item
214 # Negate the score since python's heap is a min-heap and we want
215 # the maximum score.
216 self.score = -item.score
217 def clear(self):
218 self.item = None
219 def __bool__(self):
220 return self.item is None
221 def __eq__(self, other):
222 return self.score == other.score
223 def __le__(self, other):
224 return self.score <= other.score
225
226
Doug Zongkerfc44a512014-08-26 13:10:25 -0700227# BlockImageDiff works on two image objects. An image object is
228# anything that provides the following attributes:
229#
230# blocksize: the size in bytes of a block, currently must be 4096.
231#
232# total_blocks: the total size of the partition/image, in blocks.
233#
234# care_map: a RangeSet containing which blocks (in the range [0,
235# total_blocks) we actually care about; i.e. which blocks contain
236# data.
237#
238# file_map: a dict that partitions the blocks contained in care_map
239# into smaller domains that are useful for doing diffs on.
240# (Typically a domain is a file, and the key in file_map is the
241# pathname.)
242#
Tao Baoff777812015-05-12 11:42:31 -0700243# clobbered_blocks: a RangeSet containing which blocks contain data
244# but may be altered by the FS. They need to be excluded when
245# verifying the partition integrity.
246#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700247# ReadRangeSet(): a function that takes a RangeSet and returns the
248# data contained in the image blocks of that RangeSet. The data
249# is returned as a list or tuple of strings; concatenating the
250# elements together should produce the requested data.
251# Implementations are free to break up the data into list/tuple
252# elements in any way that is convenient.
253#
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700254# TotalSha1(): a function that returns (as a hex string) the SHA-1
255# hash of all the data in the image (ie, all the blocks in the
Tao Bao68658c02015-06-01 13:40:49 -0700256# care_map minus clobbered_blocks, or including the clobbered
257# blocks if include_clobbered_blocks is True).
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700258#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700259# When creating a BlockImageDiff, the src image may be None, in which
260# case the list of transfers produced will never read from the
261# original image.
262
263class BlockImageDiff(object):
Tao Bao293fd132016-06-11 12:19:23 -0700264 def __init__(self, tgt, src=None, threads=None, version=4,
265 disable_imgdiff=False):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700266 if threads is None:
267 threads = multiprocessing.cpu_count() // 2
Dan Albert8b72aef2015-03-23 19:13:21 -0700268 if threads == 0:
269 threads = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700270 self.threads = threads
Doug Zongker62338182014-09-08 08:29:55 -0700271 self.version = version
Dan Albert8b72aef2015-03-23 19:13:21 -0700272 self.transfers = []
273 self.src_basenames = {}
274 self.src_numpatterns = {}
Tao Baob4cfca52016-02-04 14:26:02 -0800275 self._max_stashed_size = 0
Tao Baod522bdc2016-04-12 15:53:16 -0700276 self.touched_src_ranges = RangeSet()
277 self.touched_src_sha1 = None
Tao Bao293fd132016-06-11 12:19:23 -0700278 self.disable_imgdiff = disable_imgdiff
Doug Zongker62338182014-09-08 08:29:55 -0700279
Tao Baoeba409c2015-10-21 13:30:43 -0700280 assert version in (1, 2, 3, 4)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700281
282 self.tgt = tgt
283 if src is None:
284 src = EmptyImage()
285 self.src = src
286
287 # The updater code that installs the patch always uses 4k blocks.
288 assert tgt.blocksize == 4096
289 assert src.blocksize == 4096
290
291 # The range sets in each filemap should comprise a partition of
292 # the care map.
293 self.AssertPartition(src.care_map, src.file_map.values())
294 self.AssertPartition(tgt.care_map, tgt.file_map.values())
295
Tao Baob4cfca52016-02-04 14:26:02 -0800296 @property
297 def max_stashed_size(self):
298 return self._max_stashed_size
299
Doug Zongkerfc44a512014-08-26 13:10:25 -0700300 def Compute(self, prefix):
301 # When looking for a source file to use as the diff input for a
302 # target file, we try:
303 # 1) an exact path match if available, otherwise
304 # 2) a exact basename match if available, otherwise
305 # 3) a basename match after all runs of digits are replaced by
306 # "#" if available, otherwise
307 # 4) we have no source for this target.
308 self.AbbreviateSourceNames()
309 self.FindTransfers()
310
311 # Find the ordering dependencies among transfers (this is O(n^2)
312 # in the number of transfers).
313 self.GenerateDigraph()
314 # Find a sequence of transfers that satisfies as many ordering
315 # dependencies as possible (heuristically).
316 self.FindVertexSequence()
317 # Fix up the ordering dependencies that the sequence didn't
318 # satisfy.
Doug Zongker62338182014-09-08 08:29:55 -0700319 if self.version == 1:
320 self.RemoveBackwardEdges()
321 else:
322 self.ReverseBackwardEdges()
323 self.ImproveVertexSequence()
324
Tao Bao82c47982015-08-17 09:45:13 -0700325 # Ensure the runtime stash size is under the limit.
326 if self.version >= 2 and common.OPTIONS.cache_size is not None:
327 self.ReviseStashSize()
328
Doug Zongkerfc44a512014-08-26 13:10:25 -0700329 # Double-check our work.
330 self.AssertSequenceGood()
331
332 self.ComputePatches(prefix)
333 self.WriteTransfers(prefix)
334
Dan Albert8b72aef2015-03-23 19:13:21 -0700335 def HashBlocks(self, source, ranges): # pylint: disable=no-self-use
Sami Tolvanendd67a292014-12-09 16:40:34 +0000336 data = source.ReadRangeSet(ranges)
337 ctx = sha1()
338
339 for p in data:
340 ctx.update(p)
341
342 return ctx.hexdigest()
343
Doug Zongkerfc44a512014-08-26 13:10:25 -0700344 def WriteTransfers(self, prefix):
Tianjie Xub64439b2016-06-21 15:54:09 -0700345 def WriteTransfersZero(out, to_zero):
346 """Limit the number of blocks in command zero to 1024 blocks.
347
348 This prevents the target size of one command from being too large; and
349 might help to avoid fsync errors on some devices."""
350
351 zero_blocks_limit = 1024
352 total = 0
353 while to_zero:
354 zero_blocks = to_zero.first(zero_blocks_limit)
355 out.append("zero %s\n" % (zero_blocks.to_string_raw(),))
356 total += zero_blocks.size()
357 to_zero = to_zero.subtract(zero_blocks)
358 return total
359
Doug Zongkerfc44a512014-08-26 13:10:25 -0700360 out = []
361
Doug Zongkerfc44a512014-08-26 13:10:25 -0700362 total = 0
Doug Zongkerfc44a512014-08-26 13:10:25 -0700363
Doug Zongker62338182014-09-08 08:29:55 -0700364 stashes = {}
365 stashed_blocks = 0
366 max_stashed_blocks = 0
367
368 free_stash_ids = []
369 next_stash_id = 0
370
Doug Zongkerfc44a512014-08-26 13:10:25 -0700371 for xf in self.transfers:
372
Doug Zongker62338182014-09-08 08:29:55 -0700373 if self.version < 2:
374 assert not xf.stash_before
375 assert not xf.use_stash
376
377 for s, sr in xf.stash_before:
378 assert s not in stashes
379 if free_stash_ids:
380 sid = heapq.heappop(free_stash_ids)
381 else:
382 sid = next_stash_id
383 next_stash_id += 1
384 stashes[s] = sid
Sami Tolvanendd67a292014-12-09 16:40:34 +0000385 if self.version == 2:
caozhiyuan21b37d82015-10-21 15:14:03 +0800386 stashed_blocks += sr.size()
Sami Tolvanendd67a292014-12-09 16:40:34 +0000387 out.append("stash %d %s\n" % (sid, sr.to_string_raw()))
388 else:
389 sh = self.HashBlocks(self.src, sr)
390 if sh in stashes:
391 stashes[sh] += 1
392 else:
393 stashes[sh] = 1
caozhiyuan21b37d82015-10-21 15:14:03 +0800394 stashed_blocks += sr.size()
Tao Baod522bdc2016-04-12 15:53:16 -0700395 self.touched_src_ranges = self.touched_src_ranges.union(sr)
Sami Tolvanendd67a292014-12-09 16:40:34 +0000396 out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
Doug Zongker62338182014-09-08 08:29:55 -0700397
398 if stashed_blocks > max_stashed_blocks:
399 max_stashed_blocks = stashed_blocks
400
Jesse Zhao7b985f62015-03-02 16:53:08 -0800401 free_string = []
caozhiyuan21b37d82015-10-21 15:14:03 +0800402 free_size = 0
Jesse Zhao7b985f62015-03-02 16:53:08 -0800403
Doug Zongker62338182014-09-08 08:29:55 -0700404 if self.version == 1:
Tao Bao4fcb77e2015-10-21 13:36:01 -0700405 src_str = xf.src_ranges.to_string_raw() if xf.src_ranges else ""
Sami Tolvanendd67a292014-12-09 16:40:34 +0000406 elif self.version >= 2:
Doug Zongker62338182014-09-08 08:29:55 -0700407
408 # <# blocks> <src ranges>
409 # OR
410 # <# blocks> <src ranges> <src locs> <stash refs...>
411 # OR
412 # <# blocks> - <stash refs...>
413
414 size = xf.src_ranges.size()
Dan Albert8b72aef2015-03-23 19:13:21 -0700415 src_str = [str(size)]
Doug Zongker62338182014-09-08 08:29:55 -0700416
417 unstashed_src_ranges = xf.src_ranges
418 mapped_stashes = []
419 for s, sr in xf.use_stash:
420 sid = stashes.pop(s)
Doug Zongker62338182014-09-08 08:29:55 -0700421 unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
Sami Tolvanendd67a292014-12-09 16:40:34 +0000422 sh = self.HashBlocks(self.src, sr)
Doug Zongker62338182014-09-08 08:29:55 -0700423 sr = xf.src_ranges.map_within(sr)
424 mapped_stashes.append(sr)
Sami Tolvanendd67a292014-12-09 16:40:34 +0000425 if self.version == 2:
Dan Albert8b72aef2015-03-23 19:13:21 -0700426 src_str.append("%d:%s" % (sid, sr.to_string_raw()))
Tao Baobb625d22015-08-13 14:44:15 -0700427 # A stash will be used only once. We need to free the stash
428 # immediately after the use, instead of waiting for the automatic
429 # clean-up at the end. Because otherwise it may take up extra space
430 # and lead to OTA failures.
431 # Bug: 23119955
432 free_string.append("free %d\n" % (sid,))
caozhiyuan21b37d82015-10-21 15:14:03 +0800433 free_size += sr.size()
Sami Tolvanendd67a292014-12-09 16:40:34 +0000434 else:
435 assert sh in stashes
Dan Albert8b72aef2015-03-23 19:13:21 -0700436 src_str.append("%s:%s" % (sh, sr.to_string_raw()))
Sami Tolvanendd67a292014-12-09 16:40:34 +0000437 stashes[sh] -= 1
438 if stashes[sh] == 0:
caozhiyuan21b37d82015-10-21 15:14:03 +0800439 free_size += sr.size()
Sami Tolvanendd67a292014-12-09 16:40:34 +0000440 free_string.append("free %s\n" % (sh))
441 stashes.pop(sh)
Doug Zongker62338182014-09-08 08:29:55 -0700442 heapq.heappush(free_stash_ids, sid)
443
444 if unstashed_src_ranges:
Dan Albert8b72aef2015-03-23 19:13:21 -0700445 src_str.insert(1, unstashed_src_ranges.to_string_raw())
Doug Zongker62338182014-09-08 08:29:55 -0700446 if xf.use_stash:
447 mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
Dan Albert8b72aef2015-03-23 19:13:21 -0700448 src_str.insert(2, mapped_unstashed.to_string_raw())
Doug Zongker62338182014-09-08 08:29:55 -0700449 mapped_stashes.append(mapped_unstashed)
450 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
451 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700452 src_str.insert(1, "-")
Doug Zongker62338182014-09-08 08:29:55 -0700453 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
454
Dan Albert8b72aef2015-03-23 19:13:21 -0700455 src_str = " ".join(src_str)
Doug Zongker62338182014-09-08 08:29:55 -0700456
Sami Tolvanendd67a292014-12-09 16:40:34 +0000457 # all versions:
Doug Zongker62338182014-09-08 08:29:55 -0700458 # zero <rangeset>
459 # new <rangeset>
460 # erase <rangeset>
461 #
462 # version 1:
463 # bsdiff patchstart patchlen <src rangeset> <tgt rangeset>
464 # imgdiff patchstart patchlen <src rangeset> <tgt rangeset>
465 # move <src rangeset> <tgt rangeset>
466 #
467 # version 2:
Dan Albert8b72aef2015-03-23 19:13:21 -0700468 # bsdiff patchstart patchlen <tgt rangeset> <src_str>
469 # imgdiff patchstart patchlen <tgt rangeset> <src_str>
470 # move <tgt rangeset> <src_str>
Sami Tolvanendd67a292014-12-09 16:40:34 +0000471 #
472 # version 3:
Dan Albert8b72aef2015-03-23 19:13:21 -0700473 # bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
474 # imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
475 # move hash <tgt rangeset> <src_str>
Doug Zongkerfc44a512014-08-26 13:10:25 -0700476
477 tgt_size = xf.tgt_ranges.size()
478
479 if xf.style == "new":
480 assert xf.tgt_ranges
481 out.append("%s %s\n" % (xf.style, xf.tgt_ranges.to_string_raw()))
482 total += tgt_size
483 elif xf.style == "move":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700484 assert xf.tgt_ranges
485 assert xf.src_ranges.size() == tgt_size
486 if xf.src_ranges != xf.tgt_ranges:
Doug Zongker62338182014-09-08 08:29:55 -0700487 if self.version == 1:
488 out.append("%s %s %s\n" % (
489 xf.style,
490 xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))
491 elif self.version == 2:
492 out.append("%s %s %s\n" % (
493 xf.style,
Dan Albert8b72aef2015-03-23 19:13:21 -0700494 xf.tgt_ranges.to_string_raw(), src_str))
Sami Tolvanendd67a292014-12-09 16:40:34 +0000495 elif self.version >= 3:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100496 # take into account automatic stashing of overlapping blocks
497 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700498 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100499 if temp_stash_usage > max_stashed_blocks:
500 max_stashed_blocks = temp_stash_usage
501
Tao Baod522bdc2016-04-12 15:53:16 -0700502 self.touched_src_ranges = self.touched_src_ranges.union(
503 xf.src_ranges)
504
Sami Tolvanendd67a292014-12-09 16:40:34 +0000505 out.append("%s %s %s %s\n" % (
506 xf.style,
507 self.HashBlocks(self.tgt, xf.tgt_ranges),
Dan Albert8b72aef2015-03-23 19:13:21 -0700508 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700509 total += tgt_size
510 elif xf.style in ("bsdiff", "imgdiff"):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700511 assert xf.tgt_ranges
512 assert xf.src_ranges
Doug Zongker62338182014-09-08 08:29:55 -0700513 if self.version == 1:
514 out.append("%s %d %d %s %s\n" % (
515 xf.style, xf.patch_start, xf.patch_len,
516 xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))
517 elif self.version == 2:
518 out.append("%s %d %d %s %s\n" % (
519 xf.style, xf.patch_start, xf.patch_len,
Dan Albert8b72aef2015-03-23 19:13:21 -0700520 xf.tgt_ranges.to_string_raw(), src_str))
Sami Tolvanendd67a292014-12-09 16:40:34 +0000521 elif self.version >= 3:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100522 # take into account automatic stashing of overlapping blocks
523 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700524 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100525 if temp_stash_usage > max_stashed_blocks:
526 max_stashed_blocks = temp_stash_usage
527
Tao Baod522bdc2016-04-12 15:53:16 -0700528 self.touched_src_ranges = self.touched_src_ranges.union(
529 xf.src_ranges)
530
Sami Tolvanendd67a292014-12-09 16:40:34 +0000531 out.append("%s %d %d %s %s %s %s\n" % (
532 xf.style,
533 xf.patch_start, xf.patch_len,
534 self.HashBlocks(self.src, xf.src_ranges),
535 self.HashBlocks(self.tgt, xf.tgt_ranges),
Dan Albert8b72aef2015-03-23 19:13:21 -0700536 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700537 total += tgt_size
538 elif xf.style == "zero":
539 assert xf.tgt_ranges
540 to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
Tianjie Xub64439b2016-06-21 15:54:09 -0700541 assert WriteTransfersZero(out, to_zero) == to_zero.size()
542 total += to_zero.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700543 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700544 raise ValueError("unknown transfer style '%s'\n" % xf.style)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700545
Sami Tolvanendd67a292014-12-09 16:40:34 +0000546 if free_string:
547 out.append("".join(free_string))
caozhiyuan21b37d82015-10-21 15:14:03 +0800548 stashed_blocks -= free_size
Sami Tolvanendd67a292014-12-09 16:40:34 +0000549
Tao Bao575d68a2015-08-07 19:49:45 -0700550 if self.version >= 2 and common.OPTIONS.cache_size is not None:
Tao Bao8dcf7382015-05-21 14:09:49 -0700551 # Sanity check: abort if we're going to need more stash space than
552 # the allowed size (cache_size * threshold). There are two purposes
553 # of having a threshold here. a) Part of the cache may have been
554 # occupied by some recovery logs. b) It will buy us some time to deal
555 # with the oversize issue.
556 cache_size = common.OPTIONS.cache_size
557 stash_threshold = common.OPTIONS.stash_threshold
558 max_allowed = cache_size * stash_threshold
559 assert max_stashed_blocks * self.tgt.blocksize < max_allowed, \
560 'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
561 max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
562 self.tgt.blocksize, max_allowed, cache_size,
563 stash_threshold)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700564
Tao Baod522bdc2016-04-12 15:53:16 -0700565 if self.version >= 3:
566 self.touched_src_sha1 = self.HashBlocks(
567 self.src, self.touched_src_ranges)
568
Tao Baoe9b61912015-07-09 17:37:49 -0700569 # Zero out extended blocks as a workaround for bug 20881595.
570 if self.tgt.extended:
Tianjie Xub64439b2016-06-21 15:54:09 -0700571 assert (WriteTransfersZero(out, self.tgt.extended) ==
572 self.tgt.extended.size())
Tao Baob32d56e2015-09-09 11:55:01 -0700573 total += self.tgt.extended.size()
Tao Baoe9b61912015-07-09 17:37:49 -0700574
575 # We erase all the blocks on the partition that a) don't contain useful
Tao Bao66f1fa62016-05-03 10:02:01 -0700576 # data in the new image; b) will not be touched by dm-verity. Out of those
577 # blocks, we erase the ones that won't be used in this update at the
578 # beginning of an update. The rest would be erased at the end. This is to
579 # work around the eMMC issue observed on some devices, which may otherwise
580 # get starving for clean blocks and thus fail the update. (b/28347095)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700581 all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
Tao Baoe9b61912015-07-09 17:37:49 -0700582 all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
583 new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
Tao Bao66f1fa62016-05-03 10:02:01 -0700584
585 erase_first = new_dontcare.subtract(self.touched_src_ranges)
586 if erase_first:
587 out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))
588
589 erase_last = new_dontcare.subtract(erase_first)
590 if erase_last:
591 out.append("erase %s\n" % (erase_last.to_string_raw(),))
Doug Zongkere985f6f2014-09-09 12:38:47 -0700592
593 out.insert(0, "%d\n" % (self.version,)) # format version number
Tao Baob32d56e2015-09-09 11:55:01 -0700594 out.insert(1, "%d\n" % (total,))
Doug Zongkere985f6f2014-09-09 12:38:47 -0700595 if self.version >= 2:
596 # version 2 only: after the total block count, we give the number
597 # of stash slots needed, and the maximum size needed (in blocks)
598 out.insert(2, str(next_stash_id) + "\n")
599 out.insert(3, str(max_stashed_blocks) + "\n")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700600
601 with open(prefix + ".transfer.list", "wb") as f:
602 for i in out:
603 f.write(i)
604
Doug Zongker62338182014-09-08 08:29:55 -0700605 if self.version >= 2:
Tao Baob4cfca52016-02-04 14:26:02 -0800606 self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize
Tao Bao575d68a2015-08-07 19:49:45 -0700607 OPTIONS = common.OPTIONS
608 if OPTIONS.cache_size is not None:
609 max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
610 print("max stashed blocks: %d (%d bytes), "
611 "limit: %d bytes (%.2f%%)\n" % (
Tao Baob4cfca52016-02-04 14:26:02 -0800612 max_stashed_blocks, self._max_stashed_size, max_allowed,
613 self._max_stashed_size * 100.0 / max_allowed))
Tao Bao575d68a2015-08-07 19:49:45 -0700614 else:
615 print("max stashed blocks: %d (%d bytes), limit: <unknown>\n" % (
Tao Baob4cfca52016-02-04 14:26:02 -0800616 max_stashed_blocks, self._max_stashed_size))
Doug Zongker62338182014-09-08 08:29:55 -0700617
Tao Bao82c47982015-08-17 09:45:13 -0700618 def ReviseStashSize(self):
619 print("Revising stash size...")
620 stashes = {}
621
622 # Create the map between a stash and its def/use points. For example, for a
623 # given stash of (idx, sr), stashes[idx] = (sr, def_cmd, use_cmd).
624 for xf in self.transfers:
625 # Command xf defines (stores) all the stashes in stash_before.
626 for idx, sr in xf.stash_before:
627 stashes[idx] = (sr, xf)
628
629 # Record all the stashes command xf uses.
630 for idx, _ in xf.use_stash:
631 stashes[idx] += (xf,)
632
633 # Compute the maximum blocks available for stash based on /cache size and
634 # the threshold.
635 cache_size = common.OPTIONS.cache_size
636 stash_threshold = common.OPTIONS.stash_threshold
637 max_allowed = cache_size * stash_threshold / self.tgt.blocksize
638
639 stashed_blocks = 0
Tao Bao9a5caf22015-08-25 15:10:10 -0700640 new_blocks = 0
Tao Bao82c47982015-08-17 09:45:13 -0700641
642 # Now go through all the commands. Compute the required stash size on the
643 # fly. If a command requires excess stash than available, it deletes the
644 # stash by replacing the command that uses the stash with a "new" command
645 # instead.
646 for xf in self.transfers:
647 replaced_cmds = []
648
649 # xf.stash_before generates explicit stash commands.
650 for idx, sr in xf.stash_before:
651 if stashed_blocks + sr.size() > max_allowed:
652 # We cannot stash this one for a later command. Find out the command
653 # that will use this stash and replace the command with "new".
654 use_cmd = stashes[idx][2]
655 replaced_cmds.append(use_cmd)
Tao Bao9a5caf22015-08-25 15:10:10 -0700656 print("%10d %9s %s" % (sr.size(), "explicit", use_cmd))
Tao Bao82c47982015-08-17 09:45:13 -0700657 else:
658 stashed_blocks += sr.size()
659
660 # xf.use_stash generates free commands.
661 for _, sr in xf.use_stash:
662 stashed_blocks -= sr.size()
663
664 # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
665 # ComputePatches(), they both have the style of "diff".
666 if xf.style == "diff" and self.version >= 3:
667 assert xf.tgt_ranges and xf.src_ranges
668 if xf.src_ranges.overlaps(xf.tgt_ranges):
669 if stashed_blocks + xf.src_ranges.size() > max_allowed:
670 replaced_cmds.append(xf)
Tao Bao9a5caf22015-08-25 15:10:10 -0700671 print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf))
Tao Bao82c47982015-08-17 09:45:13 -0700672
673 # Replace the commands in replaced_cmds with "new"s.
674 for cmd in replaced_cmds:
675 # It no longer uses any commands in "use_stash". Remove the def points
676 # for all those stashes.
677 for idx, sr in cmd.use_stash:
678 def_cmd = stashes[idx][1]
679 assert (idx, sr) in def_cmd.stash_before
680 def_cmd.stash_before.remove((idx, sr))
681
Tianjie Xuebe39a02016-01-14 14:12:26 -0800682 # Add up blocks that violates space limit and print total number to
683 # screen later.
684 new_blocks += cmd.tgt_ranges.size()
Tao Bao82c47982015-08-17 09:45:13 -0700685 cmd.ConvertToNew()
686
Tianjie Xuebe39a02016-01-14 14:12:26 -0800687 num_of_bytes = new_blocks * self.tgt.blocksize
688 print(" Total %d blocks (%d bytes) are packed as new blocks due to "
689 "insufficient cache size." % (new_blocks, num_of_bytes))
Tao Bao9a5caf22015-08-25 15:10:10 -0700690
Doug Zongkerfc44a512014-08-26 13:10:25 -0700691 def ComputePatches(self, prefix):
692 print("Reticulating splines...")
693 diff_q = []
694 patch_num = 0
695 with open(prefix + ".new.dat", "wb") as new_f:
696 for xf in self.transfers:
697 if xf.style == "zero":
Tao Bao08c85832016-09-19 22:26:30 -0700698 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
699 print("%10d %10d (%6.2f%%) %7s %s %s" % (
700 tgt_size, tgt_size, 100.0, xf.style, xf.tgt_name,
701 str(xf.tgt_ranges)))
702
Doug Zongkerfc44a512014-08-26 13:10:25 -0700703 elif xf.style == "new":
704 for piece in self.tgt.ReadRangeSet(xf.tgt_ranges):
705 new_f.write(piece)
Tao Bao08c85832016-09-19 22:26:30 -0700706 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
707 print("%10d %10d (%6.2f%%) %7s %s %s" % (
708 tgt_size, tgt_size, 100.0, xf.style,
709 xf.tgt_name, str(xf.tgt_ranges)))
710
Doug Zongkerfc44a512014-08-26 13:10:25 -0700711 elif xf.style == "diff":
712 src = self.src.ReadRangeSet(xf.src_ranges)
713 tgt = self.tgt.ReadRangeSet(xf.tgt_ranges)
714
715 # We can't compare src and tgt directly because they may have
716 # the same content but be broken up into blocks differently, eg:
717 #
718 # ["he", "llo"] vs ["h", "ello"]
719 #
720 # We want those to compare equal, ideally without having to
721 # actually concatenate the strings (these may be tens of
722 # megabytes).
723
724 src_sha1 = sha1()
725 for p in src:
726 src_sha1.update(p)
727 tgt_sha1 = sha1()
728 tgt_size = 0
729 for p in tgt:
730 tgt_sha1.update(p)
731 tgt_size += len(p)
732
733 if src_sha1.digest() == tgt_sha1.digest():
734 # These are identical; we don't need to generate a patch,
735 # just issue copy commands on the device.
736 xf.style = "move"
Tao Bao08c85832016-09-19 22:26:30 -0700737 if xf.src_ranges != xf.tgt_ranges:
738 print("%10d %10d (%6.2f%%) %7s %s %s (from %s)" % (
739 tgt_size, tgt_size, 100.0, xf.style,
740 xf.tgt_name if xf.tgt_name == xf.src_name else (
741 xf.tgt_name + " (from " + xf.src_name + ")"),
742 str(xf.tgt_ranges), str(xf.src_ranges)))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700743 else:
744 # For files in zip format (eg, APKs, JARs, etc.) we would
745 # like to use imgdiff -z if possible (because it usually
746 # produces significantly smaller patches than bsdiff).
747 # This is permissible if:
748 #
Tao Bao293fd132016-06-11 12:19:23 -0700749 # - imgdiff is not disabled, and
Doug Zongkerfc44a512014-08-26 13:10:25 -0700750 # - the source and target files are monotonic (ie, the
751 # data is stored with blocks in increasing order), and
752 # - we haven't removed any blocks from the source set.
753 #
754 # If these conditions are satisfied then appending all the
755 # blocks in the set together in order will produce a valid
756 # zip file (plus possibly extra zeros in the last block),
757 # which is what imgdiff needs to operate. (imgdiff is
758 # fine with extra zeros at the end of the file.)
Tao Bao293fd132016-06-11 12:19:23 -0700759 imgdiff = (not self.disable_imgdiff and xf.intact and
Doug Zongkerfc44a512014-08-26 13:10:25 -0700760 xf.tgt_name.split(".")[-1].lower()
761 in ("apk", "jar", "zip"))
762 xf.style = "imgdiff" if imgdiff else "bsdiff"
763 diff_q.append((tgt_size, src, tgt, xf, patch_num))
764 patch_num += 1
765
766 else:
767 assert False, "unknown style " + xf.style
768
769 if diff_q:
770 if self.threads > 1:
771 print("Computing patches (using %d threads)..." % (self.threads,))
772 else:
773 print("Computing patches...")
774 diff_q.sort()
775
776 patches = [None] * patch_num
777
Dan Albert8b72aef2015-03-23 19:13:21 -0700778 # TODO: Rewrite with multiprocessing.ThreadPool?
Doug Zongkerfc44a512014-08-26 13:10:25 -0700779 lock = threading.Lock()
780 def diff_worker():
781 while True:
782 with lock:
Dan Albert8b72aef2015-03-23 19:13:21 -0700783 if not diff_q:
784 return
Doug Zongkerfc44a512014-08-26 13:10:25 -0700785 tgt_size, src, tgt, xf, patchnum = diff_q.pop()
786 patch = compute_patch(src, tgt, imgdiff=(xf.style == "imgdiff"))
787 size = len(patch)
788 with lock:
789 patches[patchnum] = (patch, xf)
Tao Bao08c85832016-09-19 22:26:30 -0700790 print("%10d %10d (%6.2f%%) %7s %s %s %s" % (
Doug Zongkerfc44a512014-08-26 13:10:25 -0700791 size, tgt_size, size * 100.0 / tgt_size, xf.style,
792 xf.tgt_name if xf.tgt_name == xf.src_name else (
Tao Bao08c85832016-09-19 22:26:30 -0700793 xf.tgt_name + " (from " + xf.src_name + ")"),
794 str(xf.tgt_ranges), str(xf.src_ranges)))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700795
796 threads = [threading.Thread(target=diff_worker)
Dan Albert8b72aef2015-03-23 19:13:21 -0700797 for _ in range(self.threads)]
Doug Zongkerfc44a512014-08-26 13:10:25 -0700798 for th in threads:
799 th.start()
800 while threads:
801 threads.pop().join()
802 else:
803 patches = []
804
805 p = 0
806 with open(prefix + ".patch.dat", "wb") as patch_f:
807 for patch, xf in patches:
808 xf.patch_start = p
809 xf.patch_len = len(patch)
810 patch_f.write(patch)
811 p += len(patch)
812
813 def AssertSequenceGood(self):
814 # Simulate the sequences of transfers we will output, and check that:
815 # - we never read a block after writing it, and
816 # - we write every block we care about exactly once.
817
818 # Start with no blocks having been touched yet.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800819 touched = array.array("B", "\0" * self.tgt.total_blocks)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700820
821 # Imagine processing the transfers in order.
822 for xf in self.transfers:
823 # Check that the input blocks for this transfer haven't yet been touched.
Doug Zongker62338182014-09-08 08:29:55 -0700824
825 x = xf.src_ranges
826 if self.version >= 2:
827 for _, sr in xf.use_stash:
828 x = x.subtract(sr)
829
Doug Zongker6ab2a502016-02-09 08:28:09 -0800830 for s, e in x:
Tao Baoff75c232016-03-04 15:23:34 -0800831 # Source image could be larger. Don't check the blocks that are in the
832 # source image only. Since they are not in 'touched', and won't ever
833 # be touched.
834 for i in range(s, min(e, self.tgt.total_blocks)):
Doug Zongker6ab2a502016-02-09 08:28:09 -0800835 assert touched[i] == 0
836
837 # Check that the output blocks for this transfer haven't yet
838 # been touched, and touch all the blocks written by this
839 # transfer.
840 for s, e in xf.tgt_ranges:
841 for i in range(s, e):
842 assert touched[i] == 0
843 touched[i] = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700844
845 # Check that we've written every target block.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800846 for s, e in self.tgt.care_map:
847 for i in range(s, e):
848 assert touched[i] == 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700849
Doug Zongker62338182014-09-08 08:29:55 -0700850 def ImproveVertexSequence(self):
851 print("Improving vertex order...")
852
853 # At this point our digraph is acyclic; we reversed any edges that
854 # were backwards in the heuristically-generated sequence. The
855 # previously-generated order is still acceptable, but we hope to
856 # find a better order that needs less memory for stashed data.
857 # Now we do a topological sort to generate a new vertex order,
858 # using a greedy algorithm to choose which vertex goes next
859 # whenever we have a choice.
860
861 # Make a copy of the edge set; this copy will get destroyed by the
862 # algorithm.
863 for xf in self.transfers:
864 xf.incoming = xf.goes_after.copy()
865 xf.outgoing = xf.goes_before.copy()
866
867 L = [] # the new vertex order
868
869 # S is the set of sources in the remaining graph; we always choose
870 # the one that leaves the least amount of stashed data after it's
871 # executed.
872 S = [(u.NetStashChange(), u.order, u) for u in self.transfers
873 if not u.incoming]
874 heapq.heapify(S)
875
876 while S:
877 _, _, xf = heapq.heappop(S)
878 L.append(xf)
879 for u in xf.outgoing:
880 del u.incoming[xf]
881 if not u.incoming:
882 heapq.heappush(S, (u.NetStashChange(), u.order, u))
883
884 # if this fails then our graph had a cycle.
885 assert len(L) == len(self.transfers)
886
887 self.transfers = L
888 for i, xf in enumerate(L):
889 xf.order = i
890
Doug Zongkerfc44a512014-08-26 13:10:25 -0700891 def RemoveBackwardEdges(self):
892 print("Removing backward edges...")
893 in_order = 0
894 out_of_order = 0
895 lost_source = 0
896
897 for xf in self.transfers:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700898 lost = 0
899 size = xf.src_ranges.size()
900 for u in xf.goes_before:
901 # xf should go before u
902 if xf.order < u.order:
903 # it does, hurray!
Doug Zongker62338182014-09-08 08:29:55 -0700904 in_order += 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700905 else:
906 # it doesn't, boo. trim the blocks that u writes from xf's
907 # source, so that xf can go after u.
Doug Zongker62338182014-09-08 08:29:55 -0700908 out_of_order += 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700909 assert xf.src_ranges.overlaps(u.tgt_ranges)
910 xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges)
911 xf.intact = False
912
913 if xf.style == "diff" and not xf.src_ranges:
914 # nothing left to diff from; treat as new data
915 xf.style = "new"
916
917 lost = size - xf.src_ranges.size()
918 lost_source += lost
Doug Zongkerfc44a512014-08-26 13:10:25 -0700919
920 print((" %d/%d dependencies (%.2f%%) were violated; "
921 "%d source blocks removed.") %
922 (out_of_order, in_order + out_of_order,
923 (out_of_order * 100.0 / (in_order + out_of_order))
924 if (in_order + out_of_order) else 0.0,
925 lost_source))
926
Doug Zongker62338182014-09-08 08:29:55 -0700927 def ReverseBackwardEdges(self):
928 print("Reversing backward edges...")
929 in_order = 0
930 out_of_order = 0
931 stashes = 0
932 stash_size = 0
933
934 for xf in self.transfers:
Doug Zongker62338182014-09-08 08:29:55 -0700935 for u in xf.goes_before.copy():
936 # xf should go before u
937 if xf.order < u.order:
938 # it does, hurray!
939 in_order += 1
940 else:
941 # it doesn't, boo. modify u to stash the blocks that it
942 # writes that xf wants to read, and then require u to go
943 # before xf.
944 out_of_order += 1
945
946 overlap = xf.src_ranges.intersect(u.tgt_ranges)
947 assert overlap
948
949 u.stash_before.append((stashes, overlap))
950 xf.use_stash.append((stashes, overlap))
951 stashes += 1
952 stash_size += overlap.size()
953
954 # reverse the edge direction; now xf must go after u
955 del xf.goes_before[u]
956 del u.goes_after[xf]
957 xf.goes_after[u] = None # value doesn't matter
958 u.goes_before[xf] = None
959
960 print((" %d/%d dependencies (%.2f%%) were violated; "
961 "%d source blocks stashed.") %
962 (out_of_order, in_order + out_of_order,
963 (out_of_order * 100.0 / (in_order + out_of_order))
964 if (in_order + out_of_order) else 0.0,
965 stash_size))
966
Doug Zongkerfc44a512014-08-26 13:10:25 -0700967 def FindVertexSequence(self):
968 print("Finding vertex sequence...")
969
970 # This is based on "A Fast & Effective Heuristic for the Feedback
971 # Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
972 # it as starting with the digraph G and moving all the vertices to
973 # be on a horizontal line in some order, trying to minimize the
974 # number of edges that end up pointing to the left. Left-pointing
975 # edges will get removed to turn the digraph into a DAG. In this
976 # case each edge has a weight which is the number of source blocks
977 # we'll lose if that edge is removed; we try to minimize the total
978 # weight rather than just the number of edges.
979
980 # Make a copy of the edge set; this copy will get destroyed by the
981 # algorithm.
982 for xf in self.transfers:
983 xf.incoming = xf.goes_after.copy()
984 xf.outgoing = xf.goes_before.copy()
Doug Zongker6ab2a502016-02-09 08:28:09 -0800985 xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())
Doug Zongkerfc44a512014-08-26 13:10:25 -0700986
987 # We use an OrderedDict instead of just a set so that the output
988 # is repeatable; otherwise it would depend on the hash values of
989 # the transfer objects.
990 G = OrderedDict()
991 for xf in self.transfers:
992 G[xf] = None
993 s1 = deque() # the left side of the sequence, built from left to right
994 s2 = deque() # the right side of the sequence, built from right to left
995
Doug Zongker6ab2a502016-02-09 08:28:09 -0800996 heap = []
997 for xf in self.transfers:
998 xf.heap_item = HeapItem(xf)
999 heap.append(xf.heap_item)
1000 heapq.heapify(heap)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001001
Tao Bao33482282016-10-24 16:49:08 -07001002 # Use OrderedDict() instead of set() to preserve the insertion order. Need
1003 # to use 'sinks[key] = None' to add key into the set. sinks will look like
1004 # { key1: None, key2: None, ... }.
1005 sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing)
1006 sources = OrderedDict.fromkeys(u for u in G if not u.incoming)
Doug Zongker6ab2a502016-02-09 08:28:09 -08001007
1008 def adjust_score(iu, delta):
1009 iu.score += delta
1010 iu.heap_item.clear()
1011 iu.heap_item = HeapItem(iu)
1012 heapq.heappush(heap, iu.heap_item)
1013
1014 while G:
Doug Zongkerfc44a512014-08-26 13:10:25 -07001015 # Put all sinks at the end of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001016 while sinks:
Tao Bao33482282016-10-24 16:49:08 -07001017 new_sinks = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001018 for u in sinks:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001019 if u not in G: continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001020 s2.appendleft(u)
1021 del G[u]
1022 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001023 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001024 if not iu.outgoing:
1025 new_sinks[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001026 sinks = new_sinks
Doug Zongkerfc44a512014-08-26 13:10:25 -07001027
1028 # Put all the sources at the beginning of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001029 while sources:
Tao Bao33482282016-10-24 16:49:08 -07001030 new_sources = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001031 for u in sources:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001032 if u not in G: continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001033 s1.append(u)
1034 del G[u]
1035 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001036 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001037 if not iu.incoming:
1038 new_sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001039 sources = new_sources
Doug Zongkerfc44a512014-08-26 13:10:25 -07001040
Doug Zongker6ab2a502016-02-09 08:28:09 -08001041 if not G: break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001042
1043 # Find the "best" vertex to put next. "Best" is the one that
1044 # maximizes the net difference in source blocks saved we get by
1045 # pretending it's a source rather than a sink.
1046
Doug Zongker6ab2a502016-02-09 08:28:09 -08001047 while True:
1048 u = heapq.heappop(heap)
1049 if u and u.item in G:
1050 u = u.item
1051 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001052
Doug Zongkerfc44a512014-08-26 13:10:25 -07001053 s1.append(u)
1054 del G[u]
1055 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001056 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001057 if not iu.incoming:
1058 sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001059
Doug Zongkerfc44a512014-08-26 13:10:25 -07001060 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001061 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001062 if not iu.outgoing:
1063 sinks[iu] = None
Doug Zongkerfc44a512014-08-26 13:10:25 -07001064
1065 # Now record the sequence in the 'order' field of each transfer,
1066 # and by rearranging self.transfers to be in the chosen sequence.
1067
1068 new_transfers = []
1069 for x in itertools.chain(s1, s2):
1070 x.order = len(new_transfers)
1071 new_transfers.append(x)
1072 del x.incoming
1073 del x.outgoing
1074
1075 self.transfers = new_transfers
1076
1077 def GenerateDigraph(self):
1078 print("Generating digraph...")
Doug Zongker6ab2a502016-02-09 08:28:09 -08001079
1080 # Each item of source_ranges will be:
1081 # - None, if that block is not used as a source,
Tao Bao33482282016-10-24 16:49:08 -07001082 # - an ordered set of transfers.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001083 source_ranges = []
1084 for b in self.transfers:
1085 for s, e in b.src_ranges:
1086 if e > len(source_ranges):
1087 source_ranges.extend([None] * (e-len(source_ranges)))
1088 for i in range(s, e):
1089 if source_ranges[i] is None:
Tao Bao33482282016-10-24 16:49:08 -07001090 source_ranges[i] = OrderedDict.fromkeys([b])
Doug Zongker6ab2a502016-02-09 08:28:09 -08001091 else:
Tao Bao33482282016-10-24 16:49:08 -07001092 source_ranges[i][b] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001093
Doug Zongkerfc44a512014-08-26 13:10:25 -07001094 for a in self.transfers:
Tao Bao33482282016-10-24 16:49:08 -07001095 intersections = OrderedDict()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001096 for s, e in a.tgt_ranges:
1097 for i in range(s, e):
1098 if i >= len(source_ranges): break
Tao Bao33482282016-10-24 16:49:08 -07001099 # Add all the Transfers in source_ranges[i] to the (ordered) set.
1100 if source_ranges[i] is not None:
1101 for j in source_ranges[i]:
1102 intersections[j] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001103
1104 for b in intersections:
1105 if a is b: continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001106
1107 # If the blocks written by A are read by B, then B needs to go before A.
1108 i = a.tgt_ranges.intersect(b.src_ranges)
1109 if i:
Doug Zongkerab7ca1d2014-08-26 10:40:28 -07001110 if b.src_name == "__ZERO":
1111 # the cost of removing source blocks for the __ZERO domain
1112 # is (nearly) zero.
1113 size = 0
1114 else:
1115 size = i.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001116 b.goes_before[a] = size
1117 a.goes_after[b] = size
1118
1119 def FindTransfers(self):
Tao Bao9a5caf22015-08-25 15:10:10 -07001120 """Parse the file_map to generate all the transfers."""
1121
Tao Bao08c85832016-09-19 22:26:30 -07001122 def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges,
1123 style, by_id):
1124 """Add one or multiple Transfer()s by splitting large files.
Tao Bao9a5caf22015-08-25 15:10:10 -07001125
1126 For BBOTA v3, we need to stash source blocks for resumable feature.
1127 However, with the growth of file size and the shrink of the cache
1128 partition source blocks are too large to be stashed. If a file occupies
Tao Bao08c85832016-09-19 22:26:30 -07001129 too many blocks, we split it into smaller pieces by getting multiple
1130 Transfer()s.
Tao Bao9a5caf22015-08-25 15:10:10 -07001131
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001132 The downside is that after splitting, we may increase the package size
1133 since the split pieces don't align well. According to our experiments,
1134 1/8 of the cache size as the per-piece limit appears to be optimal.
1135 Compared to the fixed 1024-block limit, it reduces the overall package
Tao Bao08c85832016-09-19 22:26:30 -07001136 size by 30% for volantis, and 20% for angler and bullhead."""
Tao Bao9a5caf22015-08-25 15:10:10 -07001137
Tao Bao08c85832016-09-19 22:26:30 -07001138 # Possibly split large files into smaller chunks.
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001139 pieces = 0
1140 cache_size = common.OPTIONS.cache_size
1141 split_threshold = 0.125
1142 max_blocks_per_transfer = int(cache_size * split_threshold /
1143 self.tgt.blocksize)
1144
Tao Bao9a5caf22015-08-25 15:10:10 -07001145 # Change nothing for small files.
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001146 if (tgt_ranges.size() <= max_blocks_per_transfer and
1147 src_ranges.size() <= max_blocks_per_transfer):
Tao Bao9a5caf22015-08-25 15:10:10 -07001148 Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1149 return
1150
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001151 while (tgt_ranges.size() > max_blocks_per_transfer and
1152 src_ranges.size() > max_blocks_per_transfer):
Tao Bao9a5caf22015-08-25 15:10:10 -07001153 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1154 src_split_name = "%s-%d" % (src_name, pieces)
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001155 tgt_first = tgt_ranges.first(max_blocks_per_transfer)
1156 src_first = src_ranges.first(max_blocks_per_transfer)
1157
Tao Bao9a5caf22015-08-25 15:10:10 -07001158 Transfer(tgt_split_name, src_split_name, tgt_first, src_first, style,
1159 by_id)
1160
1161 tgt_ranges = tgt_ranges.subtract(tgt_first)
1162 src_ranges = src_ranges.subtract(src_first)
1163 pieces += 1
1164
1165 # Handle remaining blocks.
1166 if tgt_ranges.size() or src_ranges.size():
1167 # Must be both non-empty.
1168 assert tgt_ranges.size() and src_ranges.size()
1169 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1170 src_split_name = "%s-%d" % (src_name, pieces)
1171 Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges, style,
1172 by_id)
1173
Tao Bao08c85832016-09-19 22:26:30 -07001174 def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
1175 split=False):
1176 """Wrapper function for adding a Transfer()."""
1177
1178 # We specialize diff transfers only (which covers bsdiff/imgdiff/move);
1179 # otherwise add the Transfer() as is.
1180 if style != "diff" or not split:
1181 Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1182 return
1183
1184 # Handle .odex files specially to analyze the block-wise difference. If
1185 # most of the blocks are identical with only few changes (e.g. header),
1186 # we will patch the changed blocks only. This avoids stashing unchanged
1187 # blocks while patching. We limit the analysis to files without size
1188 # changes only. This is to avoid sacrificing the OTA generation cost too
1189 # much.
1190 if (tgt_name.split(".")[-1].lower() == 'odex' and
1191 tgt_ranges.size() == src_ranges.size()):
1192
1193 # 0.5 threshold can be further tuned. The tradeoff is: if only very
1194 # few blocks remain identical, we lose the opportunity to use imgdiff
1195 # that may have better compression ratio than bsdiff.
1196 crop_threshold = 0.5
1197
1198 tgt_skipped = RangeSet()
1199 src_skipped = RangeSet()
1200 tgt_size = tgt_ranges.size()
1201 tgt_changed = 0
1202 for src_block, tgt_block in zip(src_ranges.next_item(),
1203 tgt_ranges.next_item()):
1204 src_rs = RangeSet(str(src_block))
1205 tgt_rs = RangeSet(str(tgt_block))
1206 if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):
1207 tgt_skipped = tgt_skipped.union(tgt_rs)
1208 src_skipped = src_skipped.union(src_rs)
1209 else:
1210 tgt_changed += tgt_rs.size()
1211
1212 # Terminate early if no clear sign of benefits.
1213 if tgt_changed > tgt_size * crop_threshold:
1214 break
1215
1216 if tgt_changed < tgt_size * crop_threshold:
1217 assert tgt_changed + tgt_skipped.size() == tgt_size
1218 print('%10d %10d (%6.2f%%) %s' % (tgt_skipped.size(), tgt_size,
1219 tgt_skipped.size() * 100.0 / tgt_size, tgt_name))
1220 AddSplitTransfers(
1221 "%s-skipped" % (tgt_name,),
1222 "%s-skipped" % (src_name,),
1223 tgt_skipped, src_skipped, style, by_id)
1224
1225 # Intentionally change the file extension to avoid being imgdiff'd as
1226 # the files are no longer in their original format.
1227 tgt_name = "%s-cropped" % (tgt_name,)
1228 src_name = "%s-cropped" % (src_name,)
1229 tgt_ranges = tgt_ranges.subtract(tgt_skipped)
1230 src_ranges = src_ranges.subtract(src_skipped)
1231
1232 # Possibly having no changed blocks.
1233 if not tgt_ranges:
1234 return
1235
1236 # Add the transfer(s).
1237 AddSplitTransfers(
1238 tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1239
1240 print("Finding transfers...")
1241
Doug Zongkerfc44a512014-08-26 13:10:25 -07001242 empty = RangeSet()
1243 for tgt_fn, tgt_ranges in self.tgt.file_map.items():
1244 if tgt_fn == "__ZERO":
1245 # the special "__ZERO" domain is all the blocks not contained
1246 # in any file and that are filled with zeros. We have a
1247 # special transfer style for zero blocks.
1248 src_ranges = self.src.file_map.get("__ZERO", empty)
Tao Bao9a5caf22015-08-25 15:10:10 -07001249 AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
1250 "zero", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001251 continue
1252
Tao Baoff777812015-05-12 11:42:31 -07001253 elif tgt_fn == "__COPY":
1254 # "__COPY" domain includes all the blocks not contained in any
1255 # file and that need to be copied unconditionally to the target.
Tao Bao9a5caf22015-08-25 15:10:10 -07001256 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Tao Baoff777812015-05-12 11:42:31 -07001257 continue
1258
Doug Zongkerfc44a512014-08-26 13:10:25 -07001259 elif tgt_fn in self.src.file_map:
1260 # Look for an exact pathname match in the source.
Tao Bao9a5caf22015-08-25 15:10:10 -07001261 AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
1262 "diff", self.transfers, self.version >= 3)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001263 continue
1264
1265 b = os.path.basename(tgt_fn)
1266 if b in self.src_basenames:
1267 # Look for an exact basename match in the source.
1268 src_fn = self.src_basenames[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001269 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
1270 "diff", self.transfers, self.version >= 3)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001271 continue
1272
1273 b = re.sub("[0-9]+", "#", b)
1274 if b in self.src_numpatterns:
1275 # Look for a 'number pattern' match (a basename match after
1276 # all runs of digits are replaced by "#"). (This is useful
1277 # for .so files that contain version numbers in the filename
1278 # that get bumped.)
1279 src_fn = self.src_numpatterns[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001280 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
1281 "diff", self.transfers, self.version >= 3)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001282 continue
1283
Tao Bao9a5caf22015-08-25 15:10:10 -07001284 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001285
1286 def AbbreviateSourceNames(self):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001287 for k in self.src.file_map.keys():
1288 b = os.path.basename(k)
1289 self.src_basenames[b] = k
1290 b = re.sub("[0-9]+", "#", b)
1291 self.src_numpatterns[b] = k
1292
1293 @staticmethod
1294 def AssertPartition(total, seq):
1295 """Assert that all the RangeSets in 'seq' form a partition of the
1296 'total' RangeSet (ie, they are nonintersecting and their union
1297 equals 'total')."""
Doug Zongker6ab2a502016-02-09 08:28:09 -08001298
Doug Zongkerfc44a512014-08-26 13:10:25 -07001299 so_far = RangeSet()
1300 for i in seq:
1301 assert not so_far.overlaps(i)
1302 so_far = so_far.union(i)
1303 assert so_far == total