blob: cc06a425e88a1aac50405f7cc62fffa75e8d4681 [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015from __future__ import print_function
16
17from collections import deque, OrderedDict
18from hashlib import sha1
Doug Zongker6ab2a502016-02-09 08:28:09 -080019import array
Tao Bao8dcf7382015-05-21 14:09:49 -070020import common
Doug Zongker6ab2a502016-02-09 08:28:09 -080021import functools
Doug Zongker62338182014-09-08 08:29:55 -070022import heapq
Doug Zongkerfc44a512014-08-26 13:10:25 -070023import itertools
24import multiprocessing
25import os
Doug Zongkerfc44a512014-08-26 13:10:25 -070026import re
27import subprocess
Doug Zongkerfc44a512014-08-26 13:10:25 -070028import threading
Doug Zongker6ab2a502016-02-09 08:28:09 -080029import time
Doug Zongkerfc44a512014-08-26 13:10:25 -070030import tempfile
31
Dan Albert8b72aef2015-03-23 19:13:21 -070032from rangelib import RangeSet
33
Doug Zongkerfc44a512014-08-26 13:10:25 -070034
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070035__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
36
Dan Albert8b72aef2015-03-23 19:13:21 -070037
Doug Zongkerfc44a512014-08-26 13:10:25 -070038def compute_patch(src, tgt, imgdiff=False):
39 srcfd, srcfile = tempfile.mkstemp(prefix="src-")
40 tgtfd, tgtfile = tempfile.mkstemp(prefix="tgt-")
41 patchfd, patchfile = tempfile.mkstemp(prefix="patch-")
42 os.close(patchfd)
43
44 try:
45 with os.fdopen(srcfd, "wb") as f_src:
46 for p in src:
47 f_src.write(p)
48
49 with os.fdopen(tgtfd, "wb") as f_tgt:
50 for p in tgt:
51 f_tgt.write(p)
52 try:
53 os.unlink(patchfile)
54 except OSError:
55 pass
56 if imgdiff:
57 p = subprocess.call(["imgdiff", "-z", srcfile, tgtfile, patchfile],
58 stdout=open("/dev/null", "a"),
59 stderr=subprocess.STDOUT)
60 else:
61 p = subprocess.call(["bsdiff", srcfile, tgtfile, patchfile])
62
63 if p:
64 raise ValueError("diff failed: " + str(p))
65
66 with open(patchfile, "rb") as f:
67 return f.read()
68 finally:
69 try:
70 os.unlink(srcfile)
71 os.unlink(tgtfile)
72 os.unlink(patchfile)
73 except OSError:
74 pass
75
Dan Albert8b72aef2015-03-23 19:13:21 -070076
77class Image(object):
78 def ReadRangeSet(self, ranges):
79 raise NotImplementedError
80
Tao Bao68658c02015-06-01 13:40:49 -070081 def TotalSha1(self, include_clobbered_blocks=False):
Dan Albert8b72aef2015-03-23 19:13:21 -070082 raise NotImplementedError
83
84
85class EmptyImage(Image):
Doug Zongkerfc44a512014-08-26 13:10:25 -070086 """A zero-length image."""
87 blocksize = 4096
88 care_map = RangeSet()
Tao Baoff777812015-05-12 11:42:31 -070089 clobbered_blocks = RangeSet()
Tao Baoe9b61912015-07-09 17:37:49 -070090 extended = RangeSet()
Doug Zongkerfc44a512014-08-26 13:10:25 -070091 total_blocks = 0
92 file_map = {}
93 def ReadRangeSet(self, ranges):
94 return ()
Tao Bao68658c02015-06-01 13:40:49 -070095 def TotalSha1(self, include_clobbered_blocks=False):
96 # EmptyImage always carries empty clobbered_blocks, so
97 # include_clobbered_blocks can be ignored.
98 assert self.clobbered_blocks.size() == 0
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070099 return sha1().hexdigest()
100
101
Dan Albert8b72aef2015-03-23 19:13:21 -0700102class DataImage(Image):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700103 """An image wrapped around a single string of data."""
104
105 def __init__(self, data, trim=False, pad=False):
106 self.data = data
107 self.blocksize = 4096
108
109 assert not (trim and pad)
110
111 partial = len(self.data) % self.blocksize
Tao Bao7589e962015-09-05 20:35:32 -0700112 padded = False
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700113 if partial > 0:
114 if trim:
115 self.data = self.data[:-partial]
116 elif pad:
117 self.data += '\0' * (self.blocksize - partial)
Tao Bao7589e962015-09-05 20:35:32 -0700118 padded = True
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700119 else:
120 raise ValueError(("data for DataImage must be multiple of %d bytes "
121 "unless trim or pad is specified") %
122 (self.blocksize,))
123
124 assert len(self.data) % self.blocksize == 0
125
126 self.total_blocks = len(self.data) / self.blocksize
127 self.care_map = RangeSet(data=(0, self.total_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700128 # When the last block is padded, we always write the whole block even for
129 # incremental OTAs. Because otherwise the last block may get skipped if
130 # unchanged for an incremental, but would fail the post-install
131 # verification if it has non-zero contents in the padding bytes.
132 # Bug: 23828506
133 if padded:
Tao Bao42206c32015-09-08 13:39:40 -0700134 clobbered_blocks = [self.total_blocks-1, self.total_blocks]
Tao Bao7589e962015-09-05 20:35:32 -0700135 else:
Tao Bao42206c32015-09-08 13:39:40 -0700136 clobbered_blocks = []
137 self.clobbered_blocks = clobbered_blocks
Tao Baoe9b61912015-07-09 17:37:49 -0700138 self.extended = RangeSet()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700139
140 zero_blocks = []
141 nonzero_blocks = []
142 reference = '\0' * self.blocksize
143
Tao Bao7589e962015-09-05 20:35:32 -0700144 for i in range(self.total_blocks-1 if padded else self.total_blocks):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700145 d = self.data[i*self.blocksize : (i+1)*self.blocksize]
146 if d == reference:
147 zero_blocks.append(i)
148 zero_blocks.append(i+1)
149 else:
150 nonzero_blocks.append(i)
151 nonzero_blocks.append(i+1)
152
Tao Bao42206c32015-09-08 13:39:40 -0700153 assert zero_blocks or nonzero_blocks or clobbered_blocks
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700154
Tao Bao42206c32015-09-08 13:39:40 -0700155 self.file_map = dict()
156 if zero_blocks:
157 self.file_map["__ZERO"] = RangeSet(data=zero_blocks)
158 if nonzero_blocks:
159 self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)
160 if clobbered_blocks:
161 self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)
Tao Bao7589e962015-09-05 20:35:32 -0700162
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700163 def ReadRangeSet(self, ranges):
164 return [self.data[s*self.blocksize:e*self.blocksize] for (s, e) in ranges]
165
Tao Bao68658c02015-06-01 13:40:49 -0700166 def TotalSha1(self, include_clobbered_blocks=False):
Tao Bao7589e962015-09-05 20:35:32 -0700167 if not include_clobbered_blocks:
168 ranges = self.care_map.subtract(self.clobbered_blocks)
169 return sha1(self.ReadRangeSet(ranges)).hexdigest()
170 else:
171 return sha1(self.data).hexdigest()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700172
Doug Zongkerfc44a512014-08-26 13:10:25 -0700173
174class Transfer(object):
175 def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, style, by_id):
176 self.tgt_name = tgt_name
177 self.src_name = src_name
178 self.tgt_ranges = tgt_ranges
179 self.src_ranges = src_ranges
180 self.style = style
181 self.intact = (getattr(tgt_ranges, "monotonic", False) and
182 getattr(src_ranges, "monotonic", False))
Tao Baob8c87172015-03-19 19:42:12 -0700183
184 # We use OrderedDict rather than dict so that the output is repeatable;
185 # otherwise it would depend on the hash values of the Transfer objects.
186 self.goes_before = OrderedDict()
187 self.goes_after = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700188
Doug Zongker62338182014-09-08 08:29:55 -0700189 self.stash_before = []
190 self.use_stash = []
191
Doug Zongkerfc44a512014-08-26 13:10:25 -0700192 self.id = len(by_id)
193 by_id.append(self)
194
Doug Zongker62338182014-09-08 08:29:55 -0700195 def NetStashChange(self):
196 return (sum(sr.size() for (_, sr) in self.stash_before) -
197 sum(sr.size() for (_, sr) in self.use_stash))
198
Tao Bao82c47982015-08-17 09:45:13 -0700199 def ConvertToNew(self):
200 assert self.style != "new"
201 self.use_stash = []
202 self.style = "new"
203 self.src_ranges = RangeSet()
204
Doug Zongkerfc44a512014-08-26 13:10:25 -0700205 def __str__(self):
206 return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
207 " to " + str(self.tgt_ranges) + ">")
208
209
Doug Zongker6ab2a502016-02-09 08:28:09 -0800210@functools.total_ordering
211class HeapItem(object):
212 def __init__(self, item):
213 self.item = item
214 # Negate the score since python's heap is a min-heap and we want
215 # the maximum score.
216 self.score = -item.score
217 def clear(self):
218 self.item = None
219 def __bool__(self):
220 return self.item is None
221 def __eq__(self, other):
222 return self.score == other.score
223 def __le__(self, other):
224 return self.score <= other.score
225
226
Doug Zongkerfc44a512014-08-26 13:10:25 -0700227# BlockImageDiff works on two image objects. An image object is
228# anything that provides the following attributes:
229#
230# blocksize: the size in bytes of a block, currently must be 4096.
231#
232# total_blocks: the total size of the partition/image, in blocks.
233#
234# care_map: a RangeSet containing which blocks (in the range [0,
235# total_blocks) we actually care about; i.e. which blocks contain
236# data.
237#
238# file_map: a dict that partitions the blocks contained in care_map
239# into smaller domains that are useful for doing diffs on.
240# (Typically a domain is a file, and the key in file_map is the
241# pathname.)
242#
Tao Baoff777812015-05-12 11:42:31 -0700243# clobbered_blocks: a RangeSet containing which blocks contain data
244# but may be altered by the FS. They need to be excluded when
245# verifying the partition integrity.
246#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700247# ReadRangeSet(): a function that takes a RangeSet and returns the
248# data contained in the image blocks of that RangeSet. The data
249# is returned as a list or tuple of strings; concatenating the
250# elements together should produce the requested data.
251# Implementations are free to break up the data into list/tuple
252# elements in any way that is convenient.
253#
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700254# TotalSha1(): a function that returns (as a hex string) the SHA-1
255# hash of all the data in the image (ie, all the blocks in the
Tao Bao68658c02015-06-01 13:40:49 -0700256# care_map minus clobbered_blocks, or including the clobbered
257# blocks if include_clobbered_blocks is True).
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700258#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700259# When creating a BlockImageDiff, the src image may be None, in which
260# case the list of transfers produced will never read from the
261# original image.
262
263class BlockImageDiff(object):
Tao Bao293fd132016-06-11 12:19:23 -0700264 def __init__(self, tgt, src=None, threads=None, version=4,
265 disable_imgdiff=False):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700266 if threads is None:
267 threads = multiprocessing.cpu_count() // 2
Dan Albert8b72aef2015-03-23 19:13:21 -0700268 if threads == 0:
269 threads = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700270 self.threads = threads
Doug Zongker62338182014-09-08 08:29:55 -0700271 self.version = version
Dan Albert8b72aef2015-03-23 19:13:21 -0700272 self.transfers = []
273 self.src_basenames = {}
274 self.src_numpatterns = {}
Tao Baob4cfca52016-02-04 14:26:02 -0800275 self._max_stashed_size = 0
Tao Baod522bdc2016-04-12 15:53:16 -0700276 self.touched_src_ranges = RangeSet()
277 self.touched_src_sha1 = None
Tao Bao293fd132016-06-11 12:19:23 -0700278 self.disable_imgdiff = disable_imgdiff
Doug Zongker62338182014-09-08 08:29:55 -0700279
Tao Baoeba409c2015-10-21 13:30:43 -0700280 assert version in (1, 2, 3, 4)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700281
282 self.tgt = tgt
283 if src is None:
284 src = EmptyImage()
285 self.src = src
286
287 # The updater code that installs the patch always uses 4k blocks.
288 assert tgt.blocksize == 4096
289 assert src.blocksize == 4096
290
291 # The range sets in each filemap should comprise a partition of
292 # the care map.
293 self.AssertPartition(src.care_map, src.file_map.values())
294 self.AssertPartition(tgt.care_map, tgt.file_map.values())
295
Tao Baob4cfca52016-02-04 14:26:02 -0800296 @property
297 def max_stashed_size(self):
298 return self._max_stashed_size
299
Doug Zongkerfc44a512014-08-26 13:10:25 -0700300 def Compute(self, prefix):
301 # When looking for a source file to use as the diff input for a
302 # target file, we try:
303 # 1) an exact path match if available, otherwise
304 # 2) a exact basename match if available, otherwise
305 # 3) a basename match after all runs of digits are replaced by
306 # "#" if available, otherwise
307 # 4) we have no source for this target.
308 self.AbbreviateSourceNames()
309 self.FindTransfers()
310
311 # Find the ordering dependencies among transfers (this is O(n^2)
312 # in the number of transfers).
313 self.GenerateDigraph()
314 # Find a sequence of transfers that satisfies as many ordering
315 # dependencies as possible (heuristically).
316 self.FindVertexSequence()
317 # Fix up the ordering dependencies that the sequence didn't
318 # satisfy.
Doug Zongker62338182014-09-08 08:29:55 -0700319 if self.version == 1:
320 self.RemoveBackwardEdges()
321 else:
322 self.ReverseBackwardEdges()
323 self.ImproveVertexSequence()
324
Tao Bao82c47982015-08-17 09:45:13 -0700325 # Ensure the runtime stash size is under the limit.
326 if self.version >= 2 and common.OPTIONS.cache_size is not None:
327 self.ReviseStashSize()
328
Doug Zongkerfc44a512014-08-26 13:10:25 -0700329 # Double-check our work.
330 self.AssertSequenceGood()
331
332 self.ComputePatches(prefix)
333 self.WriteTransfers(prefix)
334
Dan Albert8b72aef2015-03-23 19:13:21 -0700335 def HashBlocks(self, source, ranges): # pylint: disable=no-self-use
Sami Tolvanendd67a292014-12-09 16:40:34 +0000336 data = source.ReadRangeSet(ranges)
337 ctx = sha1()
338
339 for p in data:
340 ctx.update(p)
341
342 return ctx.hexdigest()
343
Doug Zongkerfc44a512014-08-26 13:10:25 -0700344 def WriteTransfers(self, prefix):
Tianjie Xu37e29302016-06-23 16:10:35 -0700345 def WriteSplitTransfers(out, style, target_blocks):
346 """Limit the size of operand in command 'new' and 'zero' to 1024 blocks.
Tianjie Xubb848c52016-06-21 15:54:09 -0700347
348 This prevents the target size of one command from being too large; and
349 might help to avoid fsync errors on some devices."""
350
Tianjie Xu37e29302016-06-23 16:10:35 -0700351 assert (style == "new" or style == "zero")
352 blocks_limit = 1024
Tianjie Xubb848c52016-06-21 15:54:09 -0700353 total = 0
Tianjie Xu37e29302016-06-23 16:10:35 -0700354 while target_blocks:
355 blocks_to_write = target_blocks.first(blocks_limit)
356 out.append("%s %s\n" % (style, blocks_to_write.to_string_raw()))
357 total += blocks_to_write.size()
358 target_blocks = target_blocks.subtract(blocks_to_write)
Tianjie Xubb848c52016-06-21 15:54:09 -0700359 return total
360
Doug Zongkerfc44a512014-08-26 13:10:25 -0700361 out = []
362
Doug Zongkerfc44a512014-08-26 13:10:25 -0700363 total = 0
Doug Zongkerfc44a512014-08-26 13:10:25 -0700364
Doug Zongker62338182014-09-08 08:29:55 -0700365 stashes = {}
366 stashed_blocks = 0
367 max_stashed_blocks = 0
368
369 free_stash_ids = []
370 next_stash_id = 0
371
Doug Zongkerfc44a512014-08-26 13:10:25 -0700372 for xf in self.transfers:
373
Doug Zongker62338182014-09-08 08:29:55 -0700374 if self.version < 2:
375 assert not xf.stash_before
376 assert not xf.use_stash
377
378 for s, sr in xf.stash_before:
379 assert s not in stashes
380 if free_stash_ids:
381 sid = heapq.heappop(free_stash_ids)
382 else:
383 sid = next_stash_id
384 next_stash_id += 1
385 stashes[s] = sid
Sami Tolvanendd67a292014-12-09 16:40:34 +0000386 if self.version == 2:
caozhiyuan21b37d82015-10-21 15:14:03 +0800387 stashed_blocks += sr.size()
Sami Tolvanendd67a292014-12-09 16:40:34 +0000388 out.append("stash %d %s\n" % (sid, sr.to_string_raw()))
389 else:
390 sh = self.HashBlocks(self.src, sr)
391 if sh in stashes:
392 stashes[sh] += 1
393 else:
394 stashes[sh] = 1
caozhiyuan21b37d82015-10-21 15:14:03 +0800395 stashed_blocks += sr.size()
Tao Baod522bdc2016-04-12 15:53:16 -0700396 self.touched_src_ranges = self.touched_src_ranges.union(sr)
Sami Tolvanendd67a292014-12-09 16:40:34 +0000397 out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
Doug Zongker62338182014-09-08 08:29:55 -0700398
399 if stashed_blocks > max_stashed_blocks:
400 max_stashed_blocks = stashed_blocks
401
Jesse Zhao7b985f62015-03-02 16:53:08 -0800402 free_string = []
caozhiyuan21b37d82015-10-21 15:14:03 +0800403 free_size = 0
Jesse Zhao7b985f62015-03-02 16:53:08 -0800404
Doug Zongker62338182014-09-08 08:29:55 -0700405 if self.version == 1:
Tao Bao4fcb77e2015-10-21 13:36:01 -0700406 src_str = xf.src_ranges.to_string_raw() if xf.src_ranges else ""
Sami Tolvanendd67a292014-12-09 16:40:34 +0000407 elif self.version >= 2:
Doug Zongker62338182014-09-08 08:29:55 -0700408
409 # <# blocks> <src ranges>
410 # OR
411 # <# blocks> <src ranges> <src locs> <stash refs...>
412 # OR
413 # <# blocks> - <stash refs...>
414
415 size = xf.src_ranges.size()
Dan Albert8b72aef2015-03-23 19:13:21 -0700416 src_str = [str(size)]
Doug Zongker62338182014-09-08 08:29:55 -0700417
418 unstashed_src_ranges = xf.src_ranges
419 mapped_stashes = []
420 for s, sr in xf.use_stash:
421 sid = stashes.pop(s)
Doug Zongker62338182014-09-08 08:29:55 -0700422 unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
Sami Tolvanendd67a292014-12-09 16:40:34 +0000423 sh = self.HashBlocks(self.src, sr)
Doug Zongker62338182014-09-08 08:29:55 -0700424 sr = xf.src_ranges.map_within(sr)
425 mapped_stashes.append(sr)
Sami Tolvanendd67a292014-12-09 16:40:34 +0000426 if self.version == 2:
Dan Albert8b72aef2015-03-23 19:13:21 -0700427 src_str.append("%d:%s" % (sid, sr.to_string_raw()))
Tao Baobb625d22015-08-13 14:44:15 -0700428 # A stash will be used only once. We need to free the stash
429 # immediately after the use, instead of waiting for the automatic
430 # clean-up at the end. Because otherwise it may take up extra space
431 # and lead to OTA failures.
432 # Bug: 23119955
433 free_string.append("free %d\n" % (sid,))
caozhiyuan21b37d82015-10-21 15:14:03 +0800434 free_size += sr.size()
Sami Tolvanendd67a292014-12-09 16:40:34 +0000435 else:
436 assert sh in stashes
Dan Albert8b72aef2015-03-23 19:13:21 -0700437 src_str.append("%s:%s" % (sh, sr.to_string_raw()))
Sami Tolvanendd67a292014-12-09 16:40:34 +0000438 stashes[sh] -= 1
439 if stashes[sh] == 0:
caozhiyuan21b37d82015-10-21 15:14:03 +0800440 free_size += sr.size()
Sami Tolvanendd67a292014-12-09 16:40:34 +0000441 free_string.append("free %s\n" % (sh))
442 stashes.pop(sh)
Doug Zongker62338182014-09-08 08:29:55 -0700443 heapq.heappush(free_stash_ids, sid)
444
445 if unstashed_src_ranges:
Dan Albert8b72aef2015-03-23 19:13:21 -0700446 src_str.insert(1, unstashed_src_ranges.to_string_raw())
Doug Zongker62338182014-09-08 08:29:55 -0700447 if xf.use_stash:
448 mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
Dan Albert8b72aef2015-03-23 19:13:21 -0700449 src_str.insert(2, mapped_unstashed.to_string_raw())
Doug Zongker62338182014-09-08 08:29:55 -0700450 mapped_stashes.append(mapped_unstashed)
451 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
452 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700453 src_str.insert(1, "-")
Doug Zongker62338182014-09-08 08:29:55 -0700454 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
455
Dan Albert8b72aef2015-03-23 19:13:21 -0700456 src_str = " ".join(src_str)
Doug Zongker62338182014-09-08 08:29:55 -0700457
Sami Tolvanendd67a292014-12-09 16:40:34 +0000458 # all versions:
Doug Zongker62338182014-09-08 08:29:55 -0700459 # zero <rangeset>
460 # new <rangeset>
461 # erase <rangeset>
462 #
463 # version 1:
464 # bsdiff patchstart patchlen <src rangeset> <tgt rangeset>
465 # imgdiff patchstart patchlen <src rangeset> <tgt rangeset>
466 # move <src rangeset> <tgt rangeset>
467 #
468 # version 2:
Dan Albert8b72aef2015-03-23 19:13:21 -0700469 # bsdiff patchstart patchlen <tgt rangeset> <src_str>
470 # imgdiff patchstart patchlen <tgt rangeset> <src_str>
471 # move <tgt rangeset> <src_str>
Sami Tolvanendd67a292014-12-09 16:40:34 +0000472 #
473 # version 3:
Dan Albert8b72aef2015-03-23 19:13:21 -0700474 # bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
475 # imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
476 # move hash <tgt rangeset> <src_str>
Doug Zongkerfc44a512014-08-26 13:10:25 -0700477
478 tgt_size = xf.tgt_ranges.size()
479
480 if xf.style == "new":
481 assert xf.tgt_ranges
Tianjie Xu37e29302016-06-23 16:10:35 -0700482 assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700483 total += tgt_size
484 elif xf.style == "move":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700485 assert xf.tgt_ranges
486 assert xf.src_ranges.size() == tgt_size
487 if xf.src_ranges != xf.tgt_ranges:
Doug Zongker62338182014-09-08 08:29:55 -0700488 if self.version == 1:
489 out.append("%s %s %s\n" % (
490 xf.style,
491 xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))
492 elif self.version == 2:
493 out.append("%s %s %s\n" % (
494 xf.style,
Dan Albert8b72aef2015-03-23 19:13:21 -0700495 xf.tgt_ranges.to_string_raw(), src_str))
Sami Tolvanendd67a292014-12-09 16:40:34 +0000496 elif self.version >= 3:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100497 # take into account automatic stashing of overlapping blocks
498 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700499 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100500 if temp_stash_usage > max_stashed_blocks:
501 max_stashed_blocks = temp_stash_usage
502
Tao Baod522bdc2016-04-12 15:53:16 -0700503 self.touched_src_ranges = self.touched_src_ranges.union(
504 xf.src_ranges)
505
Sami Tolvanendd67a292014-12-09 16:40:34 +0000506 out.append("%s %s %s %s\n" % (
507 xf.style,
508 self.HashBlocks(self.tgt, xf.tgt_ranges),
Dan Albert8b72aef2015-03-23 19:13:21 -0700509 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700510 total += tgt_size
511 elif xf.style in ("bsdiff", "imgdiff"):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700512 assert xf.tgt_ranges
513 assert xf.src_ranges
Doug Zongker62338182014-09-08 08:29:55 -0700514 if self.version == 1:
515 out.append("%s %d %d %s %s\n" % (
516 xf.style, xf.patch_start, xf.patch_len,
517 xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))
518 elif self.version == 2:
519 out.append("%s %d %d %s %s\n" % (
520 xf.style, xf.patch_start, xf.patch_len,
Dan Albert8b72aef2015-03-23 19:13:21 -0700521 xf.tgt_ranges.to_string_raw(), src_str))
Sami Tolvanendd67a292014-12-09 16:40:34 +0000522 elif self.version >= 3:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100523 # take into account automatic stashing of overlapping blocks
524 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700525 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100526 if temp_stash_usage > max_stashed_blocks:
527 max_stashed_blocks = temp_stash_usage
528
Tao Baod522bdc2016-04-12 15:53:16 -0700529 self.touched_src_ranges = self.touched_src_ranges.union(
530 xf.src_ranges)
531
Sami Tolvanendd67a292014-12-09 16:40:34 +0000532 out.append("%s %d %d %s %s %s %s\n" % (
533 xf.style,
534 xf.patch_start, xf.patch_len,
535 self.HashBlocks(self.src, xf.src_ranges),
536 self.HashBlocks(self.tgt, xf.tgt_ranges),
Dan Albert8b72aef2015-03-23 19:13:21 -0700537 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700538 total += tgt_size
539 elif xf.style == "zero":
540 assert xf.tgt_ranges
541 to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
Tianjie Xu37e29302016-06-23 16:10:35 -0700542 assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size()
Tianjie Xubb848c52016-06-21 15:54:09 -0700543 total += to_zero.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700544 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700545 raise ValueError("unknown transfer style '%s'\n" % xf.style)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700546
Sami Tolvanendd67a292014-12-09 16:40:34 +0000547 if free_string:
548 out.append("".join(free_string))
caozhiyuan21b37d82015-10-21 15:14:03 +0800549 stashed_blocks -= free_size
Sami Tolvanendd67a292014-12-09 16:40:34 +0000550
Tao Bao575d68a2015-08-07 19:49:45 -0700551 if self.version >= 2 and common.OPTIONS.cache_size is not None:
Tao Bao8dcf7382015-05-21 14:09:49 -0700552 # Sanity check: abort if we're going to need more stash space than
553 # the allowed size (cache_size * threshold). There are two purposes
554 # of having a threshold here. a) Part of the cache may have been
555 # occupied by some recovery logs. b) It will buy us some time to deal
556 # with the oversize issue.
557 cache_size = common.OPTIONS.cache_size
558 stash_threshold = common.OPTIONS.stash_threshold
559 max_allowed = cache_size * stash_threshold
560 assert max_stashed_blocks * self.tgt.blocksize < max_allowed, \
561 'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
562 max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
563 self.tgt.blocksize, max_allowed, cache_size,
564 stash_threshold)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700565
Tao Baod522bdc2016-04-12 15:53:16 -0700566 if self.version >= 3:
567 self.touched_src_sha1 = self.HashBlocks(
568 self.src, self.touched_src_ranges)
569
Tao Baoe9b61912015-07-09 17:37:49 -0700570 # Zero out extended blocks as a workaround for bug 20881595.
571 if self.tgt.extended:
Tianjie Xu37e29302016-06-23 16:10:35 -0700572 assert (WriteSplitTransfers(out, "zero", self.tgt.extended) ==
Tianjie Xubb848c52016-06-21 15:54:09 -0700573 self.tgt.extended.size())
Tao Baob32d56e2015-09-09 11:55:01 -0700574 total += self.tgt.extended.size()
Tao Baoe9b61912015-07-09 17:37:49 -0700575
576 # We erase all the blocks on the partition that a) don't contain useful
Tao Bao66f1fa62016-05-03 10:02:01 -0700577 # data in the new image; b) will not be touched by dm-verity. Out of those
578 # blocks, we erase the ones that won't be used in this update at the
579 # beginning of an update. The rest would be erased at the end. This is to
580 # work around the eMMC issue observed on some devices, which may otherwise
581 # get starving for clean blocks and thus fail the update. (b/28347095)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700582 all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
Tao Baoe9b61912015-07-09 17:37:49 -0700583 all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
584 new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
Tao Bao66f1fa62016-05-03 10:02:01 -0700585
586 erase_first = new_dontcare.subtract(self.touched_src_ranges)
587 if erase_first:
588 out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))
589
590 erase_last = new_dontcare.subtract(erase_first)
591 if erase_last:
592 out.append("erase %s\n" % (erase_last.to_string_raw(),))
Doug Zongkere985f6f2014-09-09 12:38:47 -0700593
594 out.insert(0, "%d\n" % (self.version,)) # format version number
Tao Baob32d56e2015-09-09 11:55:01 -0700595 out.insert(1, "%d\n" % (total,))
Doug Zongkere985f6f2014-09-09 12:38:47 -0700596 if self.version >= 2:
597 # version 2 only: after the total block count, we give the number
598 # of stash slots needed, and the maximum size needed (in blocks)
599 out.insert(2, str(next_stash_id) + "\n")
600 out.insert(3, str(max_stashed_blocks) + "\n")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700601
602 with open(prefix + ".transfer.list", "wb") as f:
603 for i in out:
604 f.write(i)
605
Doug Zongker62338182014-09-08 08:29:55 -0700606 if self.version >= 2:
Tao Baob4cfca52016-02-04 14:26:02 -0800607 self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize
Tao Bao575d68a2015-08-07 19:49:45 -0700608 OPTIONS = common.OPTIONS
609 if OPTIONS.cache_size is not None:
610 max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
611 print("max stashed blocks: %d (%d bytes), "
612 "limit: %d bytes (%.2f%%)\n" % (
Tao Baob4cfca52016-02-04 14:26:02 -0800613 max_stashed_blocks, self._max_stashed_size, max_allowed,
614 self._max_stashed_size * 100.0 / max_allowed))
Tao Bao575d68a2015-08-07 19:49:45 -0700615 else:
616 print("max stashed blocks: %d (%d bytes), limit: <unknown>\n" % (
Tao Baob4cfca52016-02-04 14:26:02 -0800617 max_stashed_blocks, self._max_stashed_size))
Doug Zongker62338182014-09-08 08:29:55 -0700618
Tao Bao82c47982015-08-17 09:45:13 -0700619 def ReviseStashSize(self):
620 print("Revising stash size...")
621 stashes = {}
622
623 # Create the map between a stash and its def/use points. For example, for a
624 # given stash of (idx, sr), stashes[idx] = (sr, def_cmd, use_cmd).
625 for xf in self.transfers:
626 # Command xf defines (stores) all the stashes in stash_before.
627 for idx, sr in xf.stash_before:
628 stashes[idx] = (sr, xf)
629
630 # Record all the stashes command xf uses.
631 for idx, _ in xf.use_stash:
632 stashes[idx] += (xf,)
633
634 # Compute the maximum blocks available for stash based on /cache size and
635 # the threshold.
636 cache_size = common.OPTIONS.cache_size
637 stash_threshold = common.OPTIONS.stash_threshold
638 max_allowed = cache_size * stash_threshold / self.tgt.blocksize
639
640 stashed_blocks = 0
Tao Bao9a5caf22015-08-25 15:10:10 -0700641 new_blocks = 0
Tao Bao82c47982015-08-17 09:45:13 -0700642
643 # Now go through all the commands. Compute the required stash size on the
644 # fly. If a command requires excess stash than available, it deletes the
645 # stash by replacing the command that uses the stash with a "new" command
646 # instead.
647 for xf in self.transfers:
648 replaced_cmds = []
649
650 # xf.stash_before generates explicit stash commands.
651 for idx, sr in xf.stash_before:
652 if stashed_blocks + sr.size() > max_allowed:
653 # We cannot stash this one for a later command. Find out the command
654 # that will use this stash and replace the command with "new".
655 use_cmd = stashes[idx][2]
656 replaced_cmds.append(use_cmd)
Tao Bao9a5caf22015-08-25 15:10:10 -0700657 print("%10d %9s %s" % (sr.size(), "explicit", use_cmd))
Tao Bao82c47982015-08-17 09:45:13 -0700658 else:
659 stashed_blocks += sr.size()
660
661 # xf.use_stash generates free commands.
662 for _, sr in xf.use_stash:
663 stashed_blocks -= sr.size()
664
665 # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
666 # ComputePatches(), they both have the style of "diff".
667 if xf.style == "diff" and self.version >= 3:
668 assert xf.tgt_ranges and xf.src_ranges
669 if xf.src_ranges.overlaps(xf.tgt_ranges):
670 if stashed_blocks + xf.src_ranges.size() > max_allowed:
671 replaced_cmds.append(xf)
Tao Bao9a5caf22015-08-25 15:10:10 -0700672 print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf))
Tao Bao82c47982015-08-17 09:45:13 -0700673
674 # Replace the commands in replaced_cmds with "new"s.
675 for cmd in replaced_cmds:
676 # It no longer uses any commands in "use_stash". Remove the def points
677 # for all those stashes.
678 for idx, sr in cmd.use_stash:
679 def_cmd = stashes[idx][1]
680 assert (idx, sr) in def_cmd.stash_before
681 def_cmd.stash_before.remove((idx, sr))
682
Tianjie Xuebe39a02016-01-14 14:12:26 -0800683 # Add up blocks that violates space limit and print total number to
684 # screen later.
685 new_blocks += cmd.tgt_ranges.size()
Tao Bao82c47982015-08-17 09:45:13 -0700686 cmd.ConvertToNew()
687
Tianjie Xuebe39a02016-01-14 14:12:26 -0800688 num_of_bytes = new_blocks * self.tgt.blocksize
689 print(" Total %d blocks (%d bytes) are packed as new blocks due to "
690 "insufficient cache size." % (new_blocks, num_of_bytes))
Tao Bao9a5caf22015-08-25 15:10:10 -0700691
Doug Zongkerfc44a512014-08-26 13:10:25 -0700692 def ComputePatches(self, prefix):
693 print("Reticulating splines...")
694 diff_q = []
695 patch_num = 0
696 with open(prefix + ".new.dat", "wb") as new_f:
697 for xf in self.transfers:
698 if xf.style == "zero":
Tao Bao08c85832016-09-19 22:26:30 -0700699 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
700 print("%10d %10d (%6.2f%%) %7s %s %s" % (
701 tgt_size, tgt_size, 100.0, xf.style, xf.tgt_name,
702 str(xf.tgt_ranges)))
703
Doug Zongkerfc44a512014-08-26 13:10:25 -0700704 elif xf.style == "new":
705 for piece in self.tgt.ReadRangeSet(xf.tgt_ranges):
706 new_f.write(piece)
Tao Bao08c85832016-09-19 22:26:30 -0700707 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
708 print("%10d %10d (%6.2f%%) %7s %s %s" % (
709 tgt_size, tgt_size, 100.0, xf.style,
710 xf.tgt_name, str(xf.tgt_ranges)))
711
Doug Zongkerfc44a512014-08-26 13:10:25 -0700712 elif xf.style == "diff":
713 src = self.src.ReadRangeSet(xf.src_ranges)
714 tgt = self.tgt.ReadRangeSet(xf.tgt_ranges)
715
716 # We can't compare src and tgt directly because they may have
717 # the same content but be broken up into blocks differently, eg:
718 #
719 # ["he", "llo"] vs ["h", "ello"]
720 #
721 # We want those to compare equal, ideally without having to
722 # actually concatenate the strings (these may be tens of
723 # megabytes).
724
725 src_sha1 = sha1()
726 for p in src:
727 src_sha1.update(p)
728 tgt_sha1 = sha1()
729 tgt_size = 0
730 for p in tgt:
731 tgt_sha1.update(p)
732 tgt_size += len(p)
733
734 if src_sha1.digest() == tgt_sha1.digest():
735 # These are identical; we don't need to generate a patch,
736 # just issue copy commands on the device.
737 xf.style = "move"
Tao Bao08c85832016-09-19 22:26:30 -0700738 if xf.src_ranges != xf.tgt_ranges:
739 print("%10d %10d (%6.2f%%) %7s %s %s (from %s)" % (
740 tgt_size, tgt_size, 100.0, xf.style,
741 xf.tgt_name if xf.tgt_name == xf.src_name else (
742 xf.tgt_name + " (from " + xf.src_name + ")"),
743 str(xf.tgt_ranges), str(xf.src_ranges)))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700744 else:
745 # For files in zip format (eg, APKs, JARs, etc.) we would
746 # like to use imgdiff -z if possible (because it usually
747 # produces significantly smaller patches than bsdiff).
748 # This is permissible if:
749 #
Tao Bao293fd132016-06-11 12:19:23 -0700750 # - imgdiff is not disabled, and
Doug Zongkerfc44a512014-08-26 13:10:25 -0700751 # - the source and target files are monotonic (ie, the
752 # data is stored with blocks in increasing order), and
753 # - we haven't removed any blocks from the source set.
754 #
755 # If these conditions are satisfied then appending all the
756 # blocks in the set together in order will produce a valid
757 # zip file (plus possibly extra zeros in the last block),
758 # which is what imgdiff needs to operate. (imgdiff is
759 # fine with extra zeros at the end of the file.)
Tao Bao293fd132016-06-11 12:19:23 -0700760 imgdiff = (not self.disable_imgdiff and xf.intact and
Doug Zongkerfc44a512014-08-26 13:10:25 -0700761 xf.tgt_name.split(".")[-1].lower()
762 in ("apk", "jar", "zip"))
763 xf.style = "imgdiff" if imgdiff else "bsdiff"
764 diff_q.append((tgt_size, src, tgt, xf, patch_num))
765 patch_num += 1
766
767 else:
768 assert False, "unknown style " + xf.style
769
770 if diff_q:
771 if self.threads > 1:
772 print("Computing patches (using %d threads)..." % (self.threads,))
773 else:
774 print("Computing patches...")
775 diff_q.sort()
776
777 patches = [None] * patch_num
778
Dan Albert8b72aef2015-03-23 19:13:21 -0700779 # TODO: Rewrite with multiprocessing.ThreadPool?
Doug Zongkerfc44a512014-08-26 13:10:25 -0700780 lock = threading.Lock()
781 def diff_worker():
782 while True:
783 with lock:
Dan Albert8b72aef2015-03-23 19:13:21 -0700784 if not diff_q:
785 return
Doug Zongkerfc44a512014-08-26 13:10:25 -0700786 tgt_size, src, tgt, xf, patchnum = diff_q.pop()
787 patch = compute_patch(src, tgt, imgdiff=(xf.style == "imgdiff"))
788 size = len(patch)
789 with lock:
790 patches[patchnum] = (patch, xf)
Tao Bao08c85832016-09-19 22:26:30 -0700791 print("%10d %10d (%6.2f%%) %7s %s %s %s" % (
Doug Zongkerfc44a512014-08-26 13:10:25 -0700792 size, tgt_size, size * 100.0 / tgt_size, xf.style,
793 xf.tgt_name if xf.tgt_name == xf.src_name else (
Tao Bao08c85832016-09-19 22:26:30 -0700794 xf.tgt_name + " (from " + xf.src_name + ")"),
795 str(xf.tgt_ranges), str(xf.src_ranges)))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700796
797 threads = [threading.Thread(target=diff_worker)
Dan Albert8b72aef2015-03-23 19:13:21 -0700798 for _ in range(self.threads)]
Doug Zongkerfc44a512014-08-26 13:10:25 -0700799 for th in threads:
800 th.start()
801 while threads:
802 threads.pop().join()
803 else:
804 patches = []
805
806 p = 0
807 with open(prefix + ".patch.dat", "wb") as patch_f:
808 for patch, xf in patches:
809 xf.patch_start = p
810 xf.patch_len = len(patch)
811 patch_f.write(patch)
812 p += len(patch)
813
814 def AssertSequenceGood(self):
815 # Simulate the sequences of transfers we will output, and check that:
816 # - we never read a block after writing it, and
817 # - we write every block we care about exactly once.
818
819 # Start with no blocks having been touched yet.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800820 touched = array.array("B", "\0" * self.tgt.total_blocks)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700821
822 # Imagine processing the transfers in order.
823 for xf in self.transfers:
824 # Check that the input blocks for this transfer haven't yet been touched.
Doug Zongker62338182014-09-08 08:29:55 -0700825
826 x = xf.src_ranges
827 if self.version >= 2:
828 for _, sr in xf.use_stash:
829 x = x.subtract(sr)
830
Doug Zongker6ab2a502016-02-09 08:28:09 -0800831 for s, e in x:
Tao Baoff75c232016-03-04 15:23:34 -0800832 # Source image could be larger. Don't check the blocks that are in the
833 # source image only. Since they are not in 'touched', and won't ever
834 # be touched.
835 for i in range(s, min(e, self.tgt.total_blocks)):
Doug Zongker6ab2a502016-02-09 08:28:09 -0800836 assert touched[i] == 0
837
838 # Check that the output blocks for this transfer haven't yet
839 # been touched, and touch all the blocks written by this
840 # transfer.
841 for s, e in xf.tgt_ranges:
842 for i in range(s, e):
843 assert touched[i] == 0
844 touched[i] = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700845
846 # Check that we've written every target block.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800847 for s, e in self.tgt.care_map:
848 for i in range(s, e):
849 assert touched[i] == 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700850
Doug Zongker62338182014-09-08 08:29:55 -0700851 def ImproveVertexSequence(self):
852 print("Improving vertex order...")
853
854 # At this point our digraph is acyclic; we reversed any edges that
855 # were backwards in the heuristically-generated sequence. The
856 # previously-generated order is still acceptable, but we hope to
857 # find a better order that needs less memory for stashed data.
858 # Now we do a topological sort to generate a new vertex order,
859 # using a greedy algorithm to choose which vertex goes next
860 # whenever we have a choice.
861
862 # Make a copy of the edge set; this copy will get destroyed by the
863 # algorithm.
864 for xf in self.transfers:
865 xf.incoming = xf.goes_after.copy()
866 xf.outgoing = xf.goes_before.copy()
867
868 L = [] # the new vertex order
869
870 # S is the set of sources in the remaining graph; we always choose
871 # the one that leaves the least amount of stashed data after it's
872 # executed.
873 S = [(u.NetStashChange(), u.order, u) for u in self.transfers
874 if not u.incoming]
875 heapq.heapify(S)
876
877 while S:
878 _, _, xf = heapq.heappop(S)
879 L.append(xf)
880 for u in xf.outgoing:
881 del u.incoming[xf]
882 if not u.incoming:
883 heapq.heappush(S, (u.NetStashChange(), u.order, u))
884
885 # if this fails then our graph had a cycle.
886 assert len(L) == len(self.transfers)
887
888 self.transfers = L
889 for i, xf in enumerate(L):
890 xf.order = i
891
Doug Zongkerfc44a512014-08-26 13:10:25 -0700892 def RemoveBackwardEdges(self):
893 print("Removing backward edges...")
894 in_order = 0
895 out_of_order = 0
896 lost_source = 0
897
898 for xf in self.transfers:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700899 lost = 0
900 size = xf.src_ranges.size()
901 for u in xf.goes_before:
902 # xf should go before u
903 if xf.order < u.order:
904 # it does, hurray!
Doug Zongker62338182014-09-08 08:29:55 -0700905 in_order += 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700906 else:
907 # it doesn't, boo. trim the blocks that u writes from xf's
908 # source, so that xf can go after u.
Doug Zongker62338182014-09-08 08:29:55 -0700909 out_of_order += 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700910 assert xf.src_ranges.overlaps(u.tgt_ranges)
911 xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges)
912 xf.intact = False
913
914 if xf.style == "diff" and not xf.src_ranges:
915 # nothing left to diff from; treat as new data
916 xf.style = "new"
917
918 lost = size - xf.src_ranges.size()
919 lost_source += lost
Doug Zongkerfc44a512014-08-26 13:10:25 -0700920
921 print((" %d/%d dependencies (%.2f%%) were violated; "
922 "%d source blocks removed.") %
923 (out_of_order, in_order + out_of_order,
924 (out_of_order * 100.0 / (in_order + out_of_order))
925 if (in_order + out_of_order) else 0.0,
926 lost_source))
927
Doug Zongker62338182014-09-08 08:29:55 -0700928 def ReverseBackwardEdges(self):
929 print("Reversing backward edges...")
930 in_order = 0
931 out_of_order = 0
932 stashes = 0
933 stash_size = 0
934
935 for xf in self.transfers:
Doug Zongker62338182014-09-08 08:29:55 -0700936 for u in xf.goes_before.copy():
937 # xf should go before u
938 if xf.order < u.order:
939 # it does, hurray!
940 in_order += 1
941 else:
942 # it doesn't, boo. modify u to stash the blocks that it
943 # writes that xf wants to read, and then require u to go
944 # before xf.
945 out_of_order += 1
946
947 overlap = xf.src_ranges.intersect(u.tgt_ranges)
948 assert overlap
949
950 u.stash_before.append((stashes, overlap))
951 xf.use_stash.append((stashes, overlap))
952 stashes += 1
953 stash_size += overlap.size()
954
955 # reverse the edge direction; now xf must go after u
956 del xf.goes_before[u]
957 del u.goes_after[xf]
958 xf.goes_after[u] = None # value doesn't matter
959 u.goes_before[xf] = None
960
961 print((" %d/%d dependencies (%.2f%%) were violated; "
962 "%d source blocks stashed.") %
963 (out_of_order, in_order + out_of_order,
964 (out_of_order * 100.0 / (in_order + out_of_order))
965 if (in_order + out_of_order) else 0.0,
966 stash_size))
967
Doug Zongkerfc44a512014-08-26 13:10:25 -0700968 def FindVertexSequence(self):
969 print("Finding vertex sequence...")
970
971 # This is based on "A Fast & Effective Heuristic for the Feedback
972 # Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
973 # it as starting with the digraph G and moving all the vertices to
974 # be on a horizontal line in some order, trying to minimize the
975 # number of edges that end up pointing to the left. Left-pointing
976 # edges will get removed to turn the digraph into a DAG. In this
977 # case each edge has a weight which is the number of source blocks
978 # we'll lose if that edge is removed; we try to minimize the total
979 # weight rather than just the number of edges.
980
981 # Make a copy of the edge set; this copy will get destroyed by the
982 # algorithm.
983 for xf in self.transfers:
984 xf.incoming = xf.goes_after.copy()
985 xf.outgoing = xf.goes_before.copy()
Doug Zongker6ab2a502016-02-09 08:28:09 -0800986 xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())
Doug Zongkerfc44a512014-08-26 13:10:25 -0700987
988 # We use an OrderedDict instead of just a set so that the output
989 # is repeatable; otherwise it would depend on the hash values of
990 # the transfer objects.
991 G = OrderedDict()
992 for xf in self.transfers:
993 G[xf] = None
994 s1 = deque() # the left side of the sequence, built from left to right
995 s2 = deque() # the right side of the sequence, built from right to left
996
Doug Zongker6ab2a502016-02-09 08:28:09 -0800997 heap = []
998 for xf in self.transfers:
999 xf.heap_item = HeapItem(xf)
1000 heap.append(xf.heap_item)
1001 heapq.heapify(heap)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001002
Tao Bao33482282016-10-24 16:49:08 -07001003 # Use OrderedDict() instead of set() to preserve the insertion order. Need
1004 # to use 'sinks[key] = None' to add key into the set. sinks will look like
1005 # { key1: None, key2: None, ... }.
1006 sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing)
1007 sources = OrderedDict.fromkeys(u for u in G if not u.incoming)
Doug Zongker6ab2a502016-02-09 08:28:09 -08001008
1009 def adjust_score(iu, delta):
1010 iu.score += delta
1011 iu.heap_item.clear()
1012 iu.heap_item = HeapItem(iu)
1013 heapq.heappush(heap, iu.heap_item)
1014
1015 while G:
Doug Zongkerfc44a512014-08-26 13:10:25 -07001016 # Put all sinks at the end of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001017 while sinks:
Tao Bao33482282016-10-24 16:49:08 -07001018 new_sinks = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001019 for u in sinks:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001020 if u not in G: continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001021 s2.appendleft(u)
1022 del G[u]
1023 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001024 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001025 if not iu.outgoing:
1026 new_sinks[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001027 sinks = new_sinks
Doug Zongkerfc44a512014-08-26 13:10:25 -07001028
1029 # Put all the sources at the beginning of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001030 while sources:
Tao Bao33482282016-10-24 16:49:08 -07001031 new_sources = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001032 for u in sources:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001033 if u not in G: continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001034 s1.append(u)
1035 del G[u]
1036 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001037 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001038 if not iu.incoming:
1039 new_sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001040 sources = new_sources
Doug Zongkerfc44a512014-08-26 13:10:25 -07001041
Doug Zongker6ab2a502016-02-09 08:28:09 -08001042 if not G: break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001043
1044 # Find the "best" vertex to put next. "Best" is the one that
1045 # maximizes the net difference in source blocks saved we get by
1046 # pretending it's a source rather than a sink.
1047
Doug Zongker6ab2a502016-02-09 08:28:09 -08001048 while True:
1049 u = heapq.heappop(heap)
1050 if u and u.item in G:
1051 u = u.item
1052 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001053
Doug Zongkerfc44a512014-08-26 13:10:25 -07001054 s1.append(u)
1055 del G[u]
1056 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001057 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001058 if not iu.incoming:
1059 sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001060
Doug Zongkerfc44a512014-08-26 13:10:25 -07001061 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001062 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001063 if not iu.outgoing:
1064 sinks[iu] = None
Doug Zongkerfc44a512014-08-26 13:10:25 -07001065
1066 # Now record the sequence in the 'order' field of each transfer,
1067 # and by rearranging self.transfers to be in the chosen sequence.
1068
1069 new_transfers = []
1070 for x in itertools.chain(s1, s2):
1071 x.order = len(new_transfers)
1072 new_transfers.append(x)
1073 del x.incoming
1074 del x.outgoing
1075
1076 self.transfers = new_transfers
1077
1078 def GenerateDigraph(self):
1079 print("Generating digraph...")
Doug Zongker6ab2a502016-02-09 08:28:09 -08001080
1081 # Each item of source_ranges will be:
1082 # - None, if that block is not used as a source,
Tao Bao33482282016-10-24 16:49:08 -07001083 # - an ordered set of transfers.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001084 source_ranges = []
1085 for b in self.transfers:
1086 for s, e in b.src_ranges:
1087 if e > len(source_ranges):
1088 source_ranges.extend([None] * (e-len(source_ranges)))
1089 for i in range(s, e):
1090 if source_ranges[i] is None:
Tao Bao33482282016-10-24 16:49:08 -07001091 source_ranges[i] = OrderedDict.fromkeys([b])
Doug Zongker6ab2a502016-02-09 08:28:09 -08001092 else:
Tao Bao33482282016-10-24 16:49:08 -07001093 source_ranges[i][b] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001094
Doug Zongkerfc44a512014-08-26 13:10:25 -07001095 for a in self.transfers:
Tao Bao33482282016-10-24 16:49:08 -07001096 intersections = OrderedDict()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001097 for s, e in a.tgt_ranges:
1098 for i in range(s, e):
1099 if i >= len(source_ranges): break
Tao Bao33482282016-10-24 16:49:08 -07001100 # Add all the Transfers in source_ranges[i] to the (ordered) set.
1101 if source_ranges[i] is not None:
1102 for j in source_ranges[i]:
1103 intersections[j] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001104
1105 for b in intersections:
1106 if a is b: continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001107
1108 # If the blocks written by A are read by B, then B needs to go before A.
1109 i = a.tgt_ranges.intersect(b.src_ranges)
1110 if i:
Doug Zongkerab7ca1d2014-08-26 10:40:28 -07001111 if b.src_name == "__ZERO":
1112 # the cost of removing source blocks for the __ZERO domain
1113 # is (nearly) zero.
1114 size = 0
1115 else:
1116 size = i.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001117 b.goes_before[a] = size
1118 a.goes_after[b] = size
1119
1120 def FindTransfers(self):
Tao Bao9a5caf22015-08-25 15:10:10 -07001121 """Parse the file_map to generate all the transfers."""
1122
Tao Bao08c85832016-09-19 22:26:30 -07001123 def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges,
1124 style, by_id):
1125 """Add one or multiple Transfer()s by splitting large files.
Tao Bao9a5caf22015-08-25 15:10:10 -07001126
1127 For BBOTA v3, we need to stash source blocks for resumable feature.
1128 However, with the growth of file size and the shrink of the cache
1129 partition source blocks are too large to be stashed. If a file occupies
Tao Bao08c85832016-09-19 22:26:30 -07001130 too many blocks, we split it into smaller pieces by getting multiple
1131 Transfer()s.
Tao Bao9a5caf22015-08-25 15:10:10 -07001132
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001133 The downside is that after splitting, we may increase the package size
1134 since the split pieces don't align well. According to our experiments,
1135 1/8 of the cache size as the per-piece limit appears to be optimal.
1136 Compared to the fixed 1024-block limit, it reduces the overall package
Tao Bao08c85832016-09-19 22:26:30 -07001137 size by 30% for volantis, and 20% for angler and bullhead."""
Tao Bao9a5caf22015-08-25 15:10:10 -07001138
Tao Bao08c85832016-09-19 22:26:30 -07001139 # Possibly split large files into smaller chunks.
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001140 pieces = 0
1141 cache_size = common.OPTIONS.cache_size
1142 split_threshold = 0.125
1143 max_blocks_per_transfer = int(cache_size * split_threshold /
1144 self.tgt.blocksize)
1145
Tao Bao9a5caf22015-08-25 15:10:10 -07001146 # Change nothing for small files.
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001147 if (tgt_ranges.size() <= max_blocks_per_transfer and
1148 src_ranges.size() <= max_blocks_per_transfer):
Tao Bao9a5caf22015-08-25 15:10:10 -07001149 Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1150 return
1151
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001152 while (tgt_ranges.size() > max_blocks_per_transfer and
1153 src_ranges.size() > max_blocks_per_transfer):
Tao Bao9a5caf22015-08-25 15:10:10 -07001154 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1155 src_split_name = "%s-%d" % (src_name, pieces)
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001156 tgt_first = tgt_ranges.first(max_blocks_per_transfer)
1157 src_first = src_ranges.first(max_blocks_per_transfer)
1158
Tao Bao9a5caf22015-08-25 15:10:10 -07001159 Transfer(tgt_split_name, src_split_name, tgt_first, src_first, style,
1160 by_id)
1161
1162 tgt_ranges = tgt_ranges.subtract(tgt_first)
1163 src_ranges = src_ranges.subtract(src_first)
1164 pieces += 1
1165
1166 # Handle remaining blocks.
1167 if tgt_ranges.size() or src_ranges.size():
1168 # Must be both non-empty.
1169 assert tgt_ranges.size() and src_ranges.size()
1170 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1171 src_split_name = "%s-%d" % (src_name, pieces)
1172 Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges, style,
1173 by_id)
1174
Tao Bao08c85832016-09-19 22:26:30 -07001175 def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
1176 split=False):
1177 """Wrapper function for adding a Transfer()."""
1178
1179 # We specialize diff transfers only (which covers bsdiff/imgdiff/move);
1180 # otherwise add the Transfer() as is.
1181 if style != "diff" or not split:
1182 Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1183 return
1184
1185 # Handle .odex files specially to analyze the block-wise difference. If
1186 # most of the blocks are identical with only few changes (e.g. header),
1187 # we will patch the changed blocks only. This avoids stashing unchanged
1188 # blocks while patching. We limit the analysis to files without size
1189 # changes only. This is to avoid sacrificing the OTA generation cost too
1190 # much.
1191 if (tgt_name.split(".")[-1].lower() == 'odex' and
1192 tgt_ranges.size() == src_ranges.size()):
1193
1194 # 0.5 threshold can be further tuned. The tradeoff is: if only very
1195 # few blocks remain identical, we lose the opportunity to use imgdiff
1196 # that may have better compression ratio than bsdiff.
1197 crop_threshold = 0.5
1198
1199 tgt_skipped = RangeSet()
1200 src_skipped = RangeSet()
1201 tgt_size = tgt_ranges.size()
1202 tgt_changed = 0
1203 for src_block, tgt_block in zip(src_ranges.next_item(),
1204 tgt_ranges.next_item()):
1205 src_rs = RangeSet(str(src_block))
1206 tgt_rs = RangeSet(str(tgt_block))
1207 if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):
1208 tgt_skipped = tgt_skipped.union(tgt_rs)
1209 src_skipped = src_skipped.union(src_rs)
1210 else:
1211 tgt_changed += tgt_rs.size()
1212
1213 # Terminate early if no clear sign of benefits.
1214 if tgt_changed > tgt_size * crop_threshold:
1215 break
1216
1217 if tgt_changed < tgt_size * crop_threshold:
1218 assert tgt_changed + tgt_skipped.size() == tgt_size
1219 print('%10d %10d (%6.2f%%) %s' % (tgt_skipped.size(), tgt_size,
1220 tgt_skipped.size() * 100.0 / tgt_size, tgt_name))
1221 AddSplitTransfers(
1222 "%s-skipped" % (tgt_name,),
1223 "%s-skipped" % (src_name,),
1224 tgt_skipped, src_skipped, style, by_id)
1225
1226 # Intentionally change the file extension to avoid being imgdiff'd as
1227 # the files are no longer in their original format.
1228 tgt_name = "%s-cropped" % (tgt_name,)
1229 src_name = "%s-cropped" % (src_name,)
1230 tgt_ranges = tgt_ranges.subtract(tgt_skipped)
1231 src_ranges = src_ranges.subtract(src_skipped)
1232
1233 # Possibly having no changed blocks.
1234 if not tgt_ranges:
1235 return
1236
1237 # Add the transfer(s).
1238 AddSplitTransfers(
1239 tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1240
1241 print("Finding transfers...")
1242
Doug Zongkerfc44a512014-08-26 13:10:25 -07001243 empty = RangeSet()
1244 for tgt_fn, tgt_ranges in self.tgt.file_map.items():
1245 if tgt_fn == "__ZERO":
1246 # the special "__ZERO" domain is all the blocks not contained
1247 # in any file and that are filled with zeros. We have a
1248 # special transfer style for zero blocks.
1249 src_ranges = self.src.file_map.get("__ZERO", empty)
Tao Bao9a5caf22015-08-25 15:10:10 -07001250 AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
1251 "zero", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001252 continue
1253
Tao Baoff777812015-05-12 11:42:31 -07001254 elif tgt_fn == "__COPY":
1255 # "__COPY" domain includes all the blocks not contained in any
1256 # file and that need to be copied unconditionally to the target.
Tao Bao9a5caf22015-08-25 15:10:10 -07001257 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Tao Baoff777812015-05-12 11:42:31 -07001258 continue
1259
Doug Zongkerfc44a512014-08-26 13:10:25 -07001260 elif tgt_fn in self.src.file_map:
1261 # Look for an exact pathname match in the source.
Tao Bao9a5caf22015-08-25 15:10:10 -07001262 AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
1263 "diff", self.transfers, self.version >= 3)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001264 continue
1265
1266 b = os.path.basename(tgt_fn)
1267 if b in self.src_basenames:
1268 # Look for an exact basename match in the source.
1269 src_fn = self.src_basenames[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001270 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
1271 "diff", self.transfers, self.version >= 3)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001272 continue
1273
1274 b = re.sub("[0-9]+", "#", b)
1275 if b in self.src_numpatterns:
1276 # Look for a 'number pattern' match (a basename match after
1277 # all runs of digits are replaced by "#"). (This is useful
1278 # for .so files that contain version numbers in the filename
1279 # that get bumped.)
1280 src_fn = self.src_numpatterns[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001281 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
1282 "diff", self.transfers, self.version >= 3)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001283 continue
1284
Tao Bao9a5caf22015-08-25 15:10:10 -07001285 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001286
1287 def AbbreviateSourceNames(self):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001288 for k in self.src.file_map.keys():
1289 b = os.path.basename(k)
1290 self.src_basenames[b] = k
1291 b = re.sub("[0-9]+", "#", b)
1292 self.src_numpatterns[b] = k
1293
1294 @staticmethod
1295 def AssertPartition(total, seq):
1296 """Assert that all the RangeSets in 'seq' form a partition of the
1297 'total' RangeSet (ie, they are nonintersecting and their union
1298 equals 'total')."""
Doug Zongker6ab2a502016-02-09 08:28:09 -08001299
Doug Zongkerfc44a512014-08-26 13:10:25 -07001300 so_far = RangeSet()
1301 for i in seq:
1302 assert not so_far.overlaps(i)
1303 so_far = so_far.union(i)
1304 assert so_far == total