blob: 6c842dc1ef6dfb690314ecc34a9f10e2215fd46d [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015from __future__ import print_function
16
Doug Zongker6ab2a502016-02-09 08:28:09 -080017import array
Tao Bao8dcf7382015-05-21 14:09:49 -070018import common
Doug Zongker6ab2a502016-02-09 08:28:09 -080019import functools
Doug Zongker62338182014-09-08 08:29:55 -070020import heapq
Doug Zongkerfc44a512014-08-26 13:10:25 -070021import itertools
22import multiprocessing
23import os
Tao Bao3a2e3502016-12-28 09:14:39 -080024import os.path
Doug Zongkerfc44a512014-08-26 13:10:25 -070025import re
26import subprocess
Tao Bao183e56e2017-03-05 17:05:09 -080027import sys
Doug Zongkerfc44a512014-08-26 13:10:25 -070028import threading
Doug Zongkerfc44a512014-08-26 13:10:25 -070029
Tao Bao3a2e3502016-12-28 09:14:39 -080030from collections import deque, OrderedDict
31from hashlib import sha1
Dan Albert8b72aef2015-03-23 19:13:21 -070032from rangelib import RangeSet
33
Doug Zongkerfc44a512014-08-26 13:10:25 -070034
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070035__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]
36
Dan Albert8b72aef2015-03-23 19:13:21 -070037
Tao Bao183e56e2017-03-05 17:05:09 -080038def compute_patch(srcfile, tgtfile, imgdiff=False):
39 patchfile = common.MakeTempFile(prefix="patch-")
Doug Zongkerfc44a512014-08-26 13:10:25 -070040
Tao Bao183e56e2017-03-05 17:05:09 -080041 if imgdiff:
42 p = subprocess.call(
43 ["imgdiff", "-z", srcfile, tgtfile, patchfile],
44 stdout=open(os.devnull, 'w'),
45 stderr=subprocess.STDOUT)
46 else:
47 p = subprocess.call(
48 ["bsdiff", srcfile, tgtfile, patchfile],
49 stdout=open(os.devnull, 'w'),
50 stderr=subprocess.STDOUT)
Doug Zongkerfc44a512014-08-26 13:10:25 -070051
Tao Bao183e56e2017-03-05 17:05:09 -080052 if p:
53 raise ValueError("diff failed: " + str(p))
Doug Zongkerfc44a512014-08-26 13:10:25 -070054
Tao Bao183e56e2017-03-05 17:05:09 -080055 with open(patchfile, "rb") as f:
56 return f.read()
Doug Zongkerfc44a512014-08-26 13:10:25 -070057
Dan Albert8b72aef2015-03-23 19:13:21 -070058
59class Image(object):
Tao Bao183e56e2017-03-05 17:05:09 -080060 def RangeSha1(self, ranges):
61 raise NotImplementedError
62
Dan Albert8b72aef2015-03-23 19:13:21 -070063 def ReadRangeSet(self, ranges):
64 raise NotImplementedError
65
Tao Bao68658c02015-06-01 13:40:49 -070066 def TotalSha1(self, include_clobbered_blocks=False):
Dan Albert8b72aef2015-03-23 19:13:21 -070067 raise NotImplementedError
68
Tao Bao183e56e2017-03-05 17:05:09 -080069 def WriteRangeDataToFd(self, ranges, fd):
70 raise NotImplementedError
71
Dan Albert8b72aef2015-03-23 19:13:21 -070072
73class EmptyImage(Image):
Doug Zongkerfc44a512014-08-26 13:10:25 -070074 """A zero-length image."""
Tao Bao183e56e2017-03-05 17:05:09 -080075
76 def __init__(self):
77 self.blocksize = 4096
78 self.care_map = RangeSet()
79 self.clobbered_blocks = RangeSet()
80 self.extended = RangeSet()
81 self.total_blocks = 0
82 self.file_map = {}
83
84 def RangeSha1(self, ranges):
85 return sha1().hexdigest()
86
Doug Zongkerfc44a512014-08-26 13:10:25 -070087 def ReadRangeSet(self, ranges):
88 return ()
Tao Bao183e56e2017-03-05 17:05:09 -080089
Tao Bao68658c02015-06-01 13:40:49 -070090 def TotalSha1(self, include_clobbered_blocks=False):
91 # EmptyImage always carries empty clobbered_blocks, so
92 # include_clobbered_blocks can be ignored.
93 assert self.clobbered_blocks.size() == 0
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070094 return sha1().hexdigest()
95
Tao Bao183e56e2017-03-05 17:05:09 -080096 def WriteRangeDataToFd(self, ranges, fd):
97 raise ValueError("Can't write data from EmptyImage to file")
98
Doug Zongkerab7ca1d2014-08-26 10:40:28 -070099
Dan Albert8b72aef2015-03-23 19:13:21 -0700100class DataImage(Image):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700101 """An image wrapped around a single string of data."""
102
103 def __init__(self, data, trim=False, pad=False):
104 self.data = data
105 self.blocksize = 4096
106
107 assert not (trim and pad)
108
109 partial = len(self.data) % self.blocksize
Tao Bao7589e962015-09-05 20:35:32 -0700110 padded = False
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700111 if partial > 0:
112 if trim:
113 self.data = self.data[:-partial]
114 elif pad:
115 self.data += '\0' * (self.blocksize - partial)
Tao Bao7589e962015-09-05 20:35:32 -0700116 padded = True
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700117 else:
118 raise ValueError(("data for DataImage must be multiple of %d bytes "
119 "unless trim or pad is specified") %
120 (self.blocksize,))
121
122 assert len(self.data) % self.blocksize == 0
123
124 self.total_blocks = len(self.data) / self.blocksize
125 self.care_map = RangeSet(data=(0, self.total_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700126 # When the last block is padded, we always write the whole block even for
127 # incremental OTAs. Because otherwise the last block may get skipped if
128 # unchanged for an incremental, but would fail the post-install
129 # verification if it has non-zero contents in the padding bytes.
130 # Bug: 23828506
131 if padded:
Tao Bao42206c32015-09-08 13:39:40 -0700132 clobbered_blocks = [self.total_blocks-1, self.total_blocks]
Tao Bao7589e962015-09-05 20:35:32 -0700133 else:
Tao Bao42206c32015-09-08 13:39:40 -0700134 clobbered_blocks = []
135 self.clobbered_blocks = clobbered_blocks
Tao Baoe9b61912015-07-09 17:37:49 -0700136 self.extended = RangeSet()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700137
138 zero_blocks = []
139 nonzero_blocks = []
140 reference = '\0' * self.blocksize
141
Tao Bao7589e962015-09-05 20:35:32 -0700142 for i in range(self.total_blocks-1 if padded else self.total_blocks):
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700143 d = self.data[i*self.blocksize : (i+1)*self.blocksize]
144 if d == reference:
145 zero_blocks.append(i)
146 zero_blocks.append(i+1)
147 else:
148 nonzero_blocks.append(i)
149 nonzero_blocks.append(i+1)
150
Tao Bao42206c32015-09-08 13:39:40 -0700151 assert zero_blocks or nonzero_blocks or clobbered_blocks
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700152
Tao Bao42206c32015-09-08 13:39:40 -0700153 self.file_map = dict()
154 if zero_blocks:
155 self.file_map["__ZERO"] = RangeSet(data=zero_blocks)
156 if nonzero_blocks:
157 self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)
158 if clobbered_blocks:
159 self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)
Tao Bao7589e962015-09-05 20:35:32 -0700160
Tao Bao183e56e2017-03-05 17:05:09 -0800161 def _GetRangeData(self, ranges):
162 for s, e in ranges:
163 yield self.data[s*self.blocksize:e*self.blocksize]
164
165 def RangeSha1(self, ranges):
166 h = sha1()
167 for data in self._GetRangeData(ranges):
168 h.update(data)
169 return h.hexdigest()
170
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700171 def ReadRangeSet(self, ranges):
Tao Bao183e56e2017-03-05 17:05:09 -0800172 return [self._GetRangeData(ranges)]
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700173
Tao Bao68658c02015-06-01 13:40:49 -0700174 def TotalSha1(self, include_clobbered_blocks=False):
Tao Bao7589e962015-09-05 20:35:32 -0700175 if not include_clobbered_blocks:
Tao Bao183e56e2017-03-05 17:05:09 -0800176 return self.RangeSha1(self.care_map.subtract(self.clobbered_blocks))
Tao Bao7589e962015-09-05 20:35:32 -0700177 else:
178 return sha1(self.data).hexdigest()
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700179
Tao Bao183e56e2017-03-05 17:05:09 -0800180 def WriteRangeDataToFd(self, ranges, fd):
181 for data in self._GetRangeData(ranges):
182 fd.write(data)
183
Doug Zongkerfc44a512014-08-26 13:10:25 -0700184
185class Transfer(object):
Tao Bao183e56e2017-03-05 17:05:09 -0800186 def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, tgt_sha1,
187 src_sha1, style, by_id):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700188 self.tgt_name = tgt_name
189 self.src_name = src_name
190 self.tgt_ranges = tgt_ranges
191 self.src_ranges = src_ranges
Tao Bao183e56e2017-03-05 17:05:09 -0800192 self.tgt_sha1 = tgt_sha1
193 self.src_sha1 = src_sha1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700194 self.style = style
195 self.intact = (getattr(tgt_ranges, "monotonic", False) and
196 getattr(src_ranges, "monotonic", False))
Tao Baob8c87172015-03-19 19:42:12 -0700197
198 # We use OrderedDict rather than dict so that the output is repeatable;
199 # otherwise it would depend on the hash values of the Transfer objects.
200 self.goes_before = OrderedDict()
201 self.goes_after = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700202
Doug Zongker62338182014-09-08 08:29:55 -0700203 self.stash_before = []
204 self.use_stash = []
205
Doug Zongkerfc44a512014-08-26 13:10:25 -0700206 self.id = len(by_id)
207 by_id.append(self)
208
Doug Zongker62338182014-09-08 08:29:55 -0700209 def NetStashChange(self):
210 return (sum(sr.size() for (_, sr) in self.stash_before) -
211 sum(sr.size() for (_, sr) in self.use_stash))
212
Tao Bao82c47982015-08-17 09:45:13 -0700213 def ConvertToNew(self):
214 assert self.style != "new"
215 self.use_stash = []
216 self.style = "new"
217 self.src_ranges = RangeSet()
218
Doug Zongkerfc44a512014-08-26 13:10:25 -0700219 def __str__(self):
220 return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +
221 " to " + str(self.tgt_ranges) + ">")
222
223
Doug Zongker6ab2a502016-02-09 08:28:09 -0800224@functools.total_ordering
225class HeapItem(object):
226 def __init__(self, item):
227 self.item = item
228 # Negate the score since python's heap is a min-heap and we want
229 # the maximum score.
230 self.score = -item.score
231 def clear(self):
232 self.item = None
233 def __bool__(self):
234 return self.item is None
235 def __eq__(self, other):
236 return self.score == other.score
237 def __le__(self, other):
238 return self.score <= other.score
239
240
Doug Zongkerfc44a512014-08-26 13:10:25 -0700241# BlockImageDiff works on two image objects. An image object is
242# anything that provides the following attributes:
243#
244# blocksize: the size in bytes of a block, currently must be 4096.
245#
246# total_blocks: the total size of the partition/image, in blocks.
247#
248# care_map: a RangeSet containing which blocks (in the range [0,
249# total_blocks) we actually care about; i.e. which blocks contain
250# data.
251#
252# file_map: a dict that partitions the blocks contained in care_map
253# into smaller domains that are useful for doing diffs on.
254# (Typically a domain is a file, and the key in file_map is the
255# pathname.)
256#
Tao Baoff777812015-05-12 11:42:31 -0700257# clobbered_blocks: a RangeSet containing which blocks contain data
258# but may be altered by the FS. They need to be excluded when
259# verifying the partition integrity.
260#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700261# ReadRangeSet(): a function that takes a RangeSet and returns the
262# data contained in the image blocks of that RangeSet. The data
263# is returned as a list or tuple of strings; concatenating the
264# elements together should produce the requested data.
265# Implementations are free to break up the data into list/tuple
266# elements in any way that is convenient.
267#
Tao Bao183e56e2017-03-05 17:05:09 -0800268# RangeSha1(): a function that returns (as a hex string) the SHA-1
269# hash of all the data in the specified range.
270#
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700271# TotalSha1(): a function that returns (as a hex string) the SHA-1
272# hash of all the data in the image (ie, all the blocks in the
Tao Bao68658c02015-06-01 13:40:49 -0700273# care_map minus clobbered_blocks, or including the clobbered
274# blocks if include_clobbered_blocks is True).
Doug Zongkerab7ca1d2014-08-26 10:40:28 -0700275#
Doug Zongkerfc44a512014-08-26 13:10:25 -0700276# When creating a BlockImageDiff, the src image may be None, in which
277# case the list of transfers produced will never read from the
278# original image.
279
280class BlockImageDiff(object):
Tao Bao293fd132016-06-11 12:19:23 -0700281 def __init__(self, tgt, src=None, threads=None, version=4,
282 disable_imgdiff=False):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700283 if threads is None:
284 threads = multiprocessing.cpu_count() // 2
Dan Albert8b72aef2015-03-23 19:13:21 -0700285 if threads == 0:
286 threads = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700287 self.threads = threads
Doug Zongker62338182014-09-08 08:29:55 -0700288 self.version = version
Dan Albert8b72aef2015-03-23 19:13:21 -0700289 self.transfers = []
290 self.src_basenames = {}
291 self.src_numpatterns = {}
Tao Baob4cfca52016-02-04 14:26:02 -0800292 self._max_stashed_size = 0
Tao Baod522bdc2016-04-12 15:53:16 -0700293 self.touched_src_ranges = RangeSet()
294 self.touched_src_sha1 = None
Tao Bao293fd132016-06-11 12:19:23 -0700295 self.disable_imgdiff = disable_imgdiff
Doug Zongker62338182014-09-08 08:29:55 -0700296
Tao Baoeba409c2015-10-21 13:30:43 -0700297 assert version in (1, 2, 3, 4)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700298
299 self.tgt = tgt
300 if src is None:
301 src = EmptyImage()
302 self.src = src
303
304 # The updater code that installs the patch always uses 4k blocks.
305 assert tgt.blocksize == 4096
306 assert src.blocksize == 4096
307
308 # The range sets in each filemap should comprise a partition of
309 # the care map.
310 self.AssertPartition(src.care_map, src.file_map.values())
311 self.AssertPartition(tgt.care_map, tgt.file_map.values())
312
Tao Baob4cfca52016-02-04 14:26:02 -0800313 @property
314 def max_stashed_size(self):
315 return self._max_stashed_size
316
Doug Zongkerfc44a512014-08-26 13:10:25 -0700317 def Compute(self, prefix):
318 # When looking for a source file to use as the diff input for a
319 # target file, we try:
320 # 1) an exact path match if available, otherwise
321 # 2) a exact basename match if available, otherwise
322 # 3) a basename match after all runs of digits are replaced by
323 # "#" if available, otherwise
324 # 4) we have no source for this target.
325 self.AbbreviateSourceNames()
326 self.FindTransfers()
327
328 # Find the ordering dependencies among transfers (this is O(n^2)
329 # in the number of transfers).
330 self.GenerateDigraph()
331 # Find a sequence of transfers that satisfies as many ordering
332 # dependencies as possible (heuristically).
333 self.FindVertexSequence()
334 # Fix up the ordering dependencies that the sequence didn't
335 # satisfy.
Doug Zongker62338182014-09-08 08:29:55 -0700336 if self.version == 1:
337 self.RemoveBackwardEdges()
338 else:
339 self.ReverseBackwardEdges()
340 self.ImproveVertexSequence()
341
Tao Bao82c47982015-08-17 09:45:13 -0700342 # Ensure the runtime stash size is under the limit.
343 if self.version >= 2 and common.OPTIONS.cache_size is not None:
344 self.ReviseStashSize()
345
Doug Zongkerfc44a512014-08-26 13:10:25 -0700346 # Double-check our work.
347 self.AssertSequenceGood()
348
349 self.ComputePatches(prefix)
350 self.WriteTransfers(prefix)
351
352 def WriteTransfers(self, prefix):
Tianjie Xu37e29302016-06-23 16:10:35 -0700353 def WriteSplitTransfers(out, style, target_blocks):
354 """Limit the size of operand in command 'new' and 'zero' to 1024 blocks.
Tianjie Xubb848c52016-06-21 15:54:09 -0700355
356 This prevents the target size of one command from being too large; and
357 might help to avoid fsync errors on some devices."""
358
Tao Bao3a2e3502016-12-28 09:14:39 -0800359 assert style == "new" or style == "zero"
Tianjie Xu37e29302016-06-23 16:10:35 -0700360 blocks_limit = 1024
Tianjie Xubb848c52016-06-21 15:54:09 -0700361 total = 0
Tianjie Xu37e29302016-06-23 16:10:35 -0700362 while target_blocks:
363 blocks_to_write = target_blocks.first(blocks_limit)
364 out.append("%s %s\n" % (style, blocks_to_write.to_string_raw()))
365 total += blocks_to_write.size()
366 target_blocks = target_blocks.subtract(blocks_to_write)
Tianjie Xubb848c52016-06-21 15:54:09 -0700367 return total
368
Doug Zongkerfc44a512014-08-26 13:10:25 -0700369 out = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700370 total = 0
Doug Zongkerfc44a512014-08-26 13:10:25 -0700371
Tao Bao3a2e3502016-12-28 09:14:39 -0800372 # In BBOTA v2, 'stashes' records the map from 'stash_raw_id' to 'stash_id'
373 # (aka 'sid', which is the stash slot id). The stash in a 'stash_id' will
374 # be freed immediately after its use. So unlike 'stash_raw_id' (which
375 # uniquely identifies each pair of stashed blocks), the same 'stash_id'
376 # may be reused during the life cycle of an update (maintained by
377 # 'free_stash_ids' heap and 'next_stash_id').
378 #
379 # In BBOTA v3+, it uses the hash of the stashed blocks as the stash slot
380 # id. 'stashes' records the map from 'hash' to the ref count. The stash
381 # will be freed only if the count decrements to zero.
Doug Zongker62338182014-09-08 08:29:55 -0700382 stashes = {}
383 stashed_blocks = 0
384 max_stashed_blocks = 0
385
Tao Bao3a2e3502016-12-28 09:14:39 -0800386 if self.version == 2:
387 free_stash_ids = []
388 next_stash_id = 0
Doug Zongker62338182014-09-08 08:29:55 -0700389
Doug Zongkerfc44a512014-08-26 13:10:25 -0700390 for xf in self.transfers:
391
Doug Zongker62338182014-09-08 08:29:55 -0700392 if self.version < 2:
393 assert not xf.stash_before
394 assert not xf.use_stash
395
Tao Bao3a2e3502016-12-28 09:14:39 -0800396 for stash_raw_id, sr in xf.stash_before:
Sami Tolvanendd67a292014-12-09 16:40:34 +0000397 if self.version == 2:
Tao Bao3a2e3502016-12-28 09:14:39 -0800398 assert stash_raw_id not in stashes
399 if free_stash_ids:
400 sid = heapq.heappop(free_stash_ids)
401 else:
402 sid = next_stash_id
403 next_stash_id += 1
404 stashes[stash_raw_id] = sid
caozhiyuan21b37d82015-10-21 15:14:03 +0800405 stashed_blocks += sr.size()
Sami Tolvanendd67a292014-12-09 16:40:34 +0000406 out.append("stash %d %s\n" % (sid, sr.to_string_raw()))
407 else:
Tao Bao183e56e2017-03-05 17:05:09 -0800408 sh = self.src.RangeSha1(sr)
Sami Tolvanendd67a292014-12-09 16:40:34 +0000409 if sh in stashes:
410 stashes[sh] += 1
411 else:
412 stashes[sh] = 1
caozhiyuan21b37d82015-10-21 15:14:03 +0800413 stashed_blocks += sr.size()
Tao Baod522bdc2016-04-12 15:53:16 -0700414 self.touched_src_ranges = self.touched_src_ranges.union(sr)
Sami Tolvanendd67a292014-12-09 16:40:34 +0000415 out.append("stash %s %s\n" % (sh, sr.to_string_raw()))
Doug Zongker62338182014-09-08 08:29:55 -0700416
417 if stashed_blocks > max_stashed_blocks:
418 max_stashed_blocks = stashed_blocks
419
Jesse Zhao7b985f62015-03-02 16:53:08 -0800420 free_string = []
caozhiyuan21b37d82015-10-21 15:14:03 +0800421 free_size = 0
Jesse Zhao7b985f62015-03-02 16:53:08 -0800422
Doug Zongker62338182014-09-08 08:29:55 -0700423 if self.version == 1:
Tao Bao4fcb77e2015-10-21 13:36:01 -0700424 src_str = xf.src_ranges.to_string_raw() if xf.src_ranges else ""
Sami Tolvanendd67a292014-12-09 16:40:34 +0000425 elif self.version >= 2:
Doug Zongker62338182014-09-08 08:29:55 -0700426
427 # <# blocks> <src ranges>
428 # OR
429 # <# blocks> <src ranges> <src locs> <stash refs...>
430 # OR
431 # <# blocks> - <stash refs...>
432
433 size = xf.src_ranges.size()
Dan Albert8b72aef2015-03-23 19:13:21 -0700434 src_str = [str(size)]
Doug Zongker62338182014-09-08 08:29:55 -0700435
436 unstashed_src_ranges = xf.src_ranges
437 mapped_stashes = []
Tao Bao3a2e3502016-12-28 09:14:39 -0800438 for stash_raw_id, sr in xf.use_stash:
Doug Zongker62338182014-09-08 08:29:55 -0700439 unstashed_src_ranges = unstashed_src_ranges.subtract(sr)
Tao Bao183e56e2017-03-05 17:05:09 -0800440 sh = self.src.RangeSha1(sr)
Doug Zongker62338182014-09-08 08:29:55 -0700441 sr = xf.src_ranges.map_within(sr)
442 mapped_stashes.append(sr)
Sami Tolvanendd67a292014-12-09 16:40:34 +0000443 if self.version == 2:
Tao Bao3a2e3502016-12-28 09:14:39 -0800444 sid = stashes.pop(stash_raw_id)
Dan Albert8b72aef2015-03-23 19:13:21 -0700445 src_str.append("%d:%s" % (sid, sr.to_string_raw()))
Tao Baobb625d22015-08-13 14:44:15 -0700446 # A stash will be used only once. We need to free the stash
447 # immediately after the use, instead of waiting for the automatic
448 # clean-up at the end. Because otherwise it may take up extra space
449 # and lead to OTA failures.
450 # Bug: 23119955
451 free_string.append("free %d\n" % (sid,))
caozhiyuan21b37d82015-10-21 15:14:03 +0800452 free_size += sr.size()
Tao Bao3a2e3502016-12-28 09:14:39 -0800453 heapq.heappush(free_stash_ids, sid)
Sami Tolvanendd67a292014-12-09 16:40:34 +0000454 else:
455 assert sh in stashes
Dan Albert8b72aef2015-03-23 19:13:21 -0700456 src_str.append("%s:%s" % (sh, sr.to_string_raw()))
Sami Tolvanendd67a292014-12-09 16:40:34 +0000457 stashes[sh] -= 1
458 if stashes[sh] == 0:
Tao Baoe27acfd2016-12-16 11:13:55 -0800459 free_string.append("free %s\n" % (sh,))
Tao Bao3a2e3502016-12-28 09:14:39 -0800460 free_size += sr.size()
Sami Tolvanendd67a292014-12-09 16:40:34 +0000461 stashes.pop(sh)
Doug Zongker62338182014-09-08 08:29:55 -0700462
463 if unstashed_src_ranges:
Dan Albert8b72aef2015-03-23 19:13:21 -0700464 src_str.insert(1, unstashed_src_ranges.to_string_raw())
Doug Zongker62338182014-09-08 08:29:55 -0700465 if xf.use_stash:
466 mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)
Dan Albert8b72aef2015-03-23 19:13:21 -0700467 src_str.insert(2, mapped_unstashed.to_string_raw())
Doug Zongker62338182014-09-08 08:29:55 -0700468 mapped_stashes.append(mapped_unstashed)
469 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
470 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700471 src_str.insert(1, "-")
Doug Zongker62338182014-09-08 08:29:55 -0700472 self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)
473
Dan Albert8b72aef2015-03-23 19:13:21 -0700474 src_str = " ".join(src_str)
Doug Zongker62338182014-09-08 08:29:55 -0700475
Sami Tolvanendd67a292014-12-09 16:40:34 +0000476 # all versions:
Doug Zongker62338182014-09-08 08:29:55 -0700477 # zero <rangeset>
478 # new <rangeset>
479 # erase <rangeset>
480 #
481 # version 1:
482 # bsdiff patchstart patchlen <src rangeset> <tgt rangeset>
483 # imgdiff patchstart patchlen <src rangeset> <tgt rangeset>
484 # move <src rangeset> <tgt rangeset>
485 #
486 # version 2:
Dan Albert8b72aef2015-03-23 19:13:21 -0700487 # bsdiff patchstart patchlen <tgt rangeset> <src_str>
488 # imgdiff patchstart patchlen <tgt rangeset> <src_str>
489 # move <tgt rangeset> <src_str>
Sami Tolvanendd67a292014-12-09 16:40:34 +0000490 #
491 # version 3:
Dan Albert8b72aef2015-03-23 19:13:21 -0700492 # bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
493 # imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>
494 # move hash <tgt rangeset> <src_str>
Doug Zongkerfc44a512014-08-26 13:10:25 -0700495
496 tgt_size = xf.tgt_ranges.size()
497
498 if xf.style == "new":
499 assert xf.tgt_ranges
Tianjie Xu37e29302016-06-23 16:10:35 -0700500 assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700501 total += tgt_size
502 elif xf.style == "move":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700503 assert xf.tgt_ranges
504 assert xf.src_ranges.size() == tgt_size
505 if xf.src_ranges != xf.tgt_ranges:
Doug Zongker62338182014-09-08 08:29:55 -0700506 if self.version == 1:
507 out.append("%s %s %s\n" % (
508 xf.style,
509 xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))
510 elif self.version == 2:
511 out.append("%s %s %s\n" % (
512 xf.style,
Dan Albert8b72aef2015-03-23 19:13:21 -0700513 xf.tgt_ranges.to_string_raw(), src_str))
Sami Tolvanendd67a292014-12-09 16:40:34 +0000514 elif self.version >= 3:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100515 # take into account automatic stashing of overlapping blocks
516 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700517 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100518 if temp_stash_usage > max_stashed_blocks:
519 max_stashed_blocks = temp_stash_usage
520
Tao Baod522bdc2016-04-12 15:53:16 -0700521 self.touched_src_ranges = self.touched_src_ranges.union(
522 xf.src_ranges)
523
Sami Tolvanendd67a292014-12-09 16:40:34 +0000524 out.append("%s %s %s %s\n" % (
525 xf.style,
Tao Bao183e56e2017-03-05 17:05:09 -0800526 xf.tgt_sha1,
Dan Albert8b72aef2015-03-23 19:13:21 -0700527 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700528 total += tgt_size
529 elif xf.style in ("bsdiff", "imgdiff"):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700530 assert xf.tgt_ranges
531 assert xf.src_ranges
Doug Zongker62338182014-09-08 08:29:55 -0700532 if self.version == 1:
533 out.append("%s %d %d %s %s\n" % (
534 xf.style, xf.patch_start, xf.patch_len,
535 xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))
536 elif self.version == 2:
537 out.append("%s %d %d %s %s\n" % (
538 xf.style, xf.patch_start, xf.patch_len,
Dan Albert8b72aef2015-03-23 19:13:21 -0700539 xf.tgt_ranges.to_string_raw(), src_str))
Sami Tolvanendd67a292014-12-09 16:40:34 +0000540 elif self.version >= 3:
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100541 # take into account automatic stashing of overlapping blocks
542 if xf.src_ranges.overlaps(xf.tgt_ranges):
Tao Baoe9b61912015-07-09 17:37:49 -0700543 temp_stash_usage = stashed_blocks + xf.src_ranges.size()
Sami Tolvanen29f529f2015-04-17 16:28:08 +0100544 if temp_stash_usage > max_stashed_blocks:
545 max_stashed_blocks = temp_stash_usage
546
Tao Baod522bdc2016-04-12 15:53:16 -0700547 self.touched_src_ranges = self.touched_src_ranges.union(
548 xf.src_ranges)
549
Sami Tolvanendd67a292014-12-09 16:40:34 +0000550 out.append("%s %d %d %s %s %s %s\n" % (
551 xf.style,
552 xf.patch_start, xf.patch_len,
Tao Bao183e56e2017-03-05 17:05:09 -0800553 xf.src_sha1,
554 xf.tgt_sha1,
Dan Albert8b72aef2015-03-23 19:13:21 -0700555 xf.tgt_ranges.to_string_raw(), src_str))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700556 total += tgt_size
557 elif xf.style == "zero":
558 assert xf.tgt_ranges
559 to_zero = xf.tgt_ranges.subtract(xf.src_ranges)
Tianjie Xu37e29302016-06-23 16:10:35 -0700560 assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size()
Tianjie Xubb848c52016-06-21 15:54:09 -0700561 total += to_zero.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700562 else:
Dan Albert8b72aef2015-03-23 19:13:21 -0700563 raise ValueError("unknown transfer style '%s'\n" % xf.style)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700564
Sami Tolvanendd67a292014-12-09 16:40:34 +0000565 if free_string:
566 out.append("".join(free_string))
caozhiyuan21b37d82015-10-21 15:14:03 +0800567 stashed_blocks -= free_size
Sami Tolvanendd67a292014-12-09 16:40:34 +0000568
Tao Bao575d68a2015-08-07 19:49:45 -0700569 if self.version >= 2 and common.OPTIONS.cache_size is not None:
Tao Bao8dcf7382015-05-21 14:09:49 -0700570 # Sanity check: abort if we're going to need more stash space than
571 # the allowed size (cache_size * threshold). There are two purposes
572 # of having a threshold here. a) Part of the cache may have been
573 # occupied by some recovery logs. b) It will buy us some time to deal
574 # with the oversize issue.
575 cache_size = common.OPTIONS.cache_size
576 stash_threshold = common.OPTIONS.stash_threshold
577 max_allowed = cache_size * stash_threshold
Tao Baoe8c68a02017-02-26 10:48:11 -0800578 assert max_stashed_blocks * self.tgt.blocksize <= max_allowed, \
Tao Bao8dcf7382015-05-21 14:09:49 -0700579 'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (
580 max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,
581 self.tgt.blocksize, max_allowed, cache_size,
582 stash_threshold)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700583
Tao Baod522bdc2016-04-12 15:53:16 -0700584 if self.version >= 3:
Tao Bao183e56e2017-03-05 17:05:09 -0800585 self.touched_src_sha1 = self.src.RangeSha1(self.touched_src_ranges)
Tao Baod522bdc2016-04-12 15:53:16 -0700586
Tao Baoe9b61912015-07-09 17:37:49 -0700587 # Zero out extended blocks as a workaround for bug 20881595.
588 if self.tgt.extended:
Tianjie Xu37e29302016-06-23 16:10:35 -0700589 assert (WriteSplitTransfers(out, "zero", self.tgt.extended) ==
Tianjie Xubb848c52016-06-21 15:54:09 -0700590 self.tgt.extended.size())
Tao Baob32d56e2015-09-09 11:55:01 -0700591 total += self.tgt.extended.size()
Tao Baoe9b61912015-07-09 17:37:49 -0700592
593 # We erase all the blocks on the partition that a) don't contain useful
Tao Bao66f1fa62016-05-03 10:02:01 -0700594 # data in the new image; b) will not be touched by dm-verity. Out of those
595 # blocks, we erase the ones that won't be used in this update at the
596 # beginning of an update. The rest would be erased at the end. This is to
597 # work around the eMMC issue observed on some devices, which may otherwise
598 # get starving for clean blocks and thus fail the update. (b/28347095)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700599 all_tgt = RangeSet(data=(0, self.tgt.total_blocks))
Tao Baoe9b61912015-07-09 17:37:49 -0700600 all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)
601 new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)
Tao Bao66f1fa62016-05-03 10:02:01 -0700602
603 erase_first = new_dontcare.subtract(self.touched_src_ranges)
604 if erase_first:
605 out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))
606
607 erase_last = new_dontcare.subtract(erase_first)
608 if erase_last:
609 out.append("erase %s\n" % (erase_last.to_string_raw(),))
Doug Zongkere985f6f2014-09-09 12:38:47 -0700610
611 out.insert(0, "%d\n" % (self.version,)) # format version number
Tao Baob32d56e2015-09-09 11:55:01 -0700612 out.insert(1, "%d\n" % (total,))
Tao Bao3a2e3502016-12-28 09:14:39 -0800613 if self.version == 2:
614 # v2 only: after the total block count, we give the number of stash slots
615 # needed, and the maximum size needed (in blocks).
Doug Zongkere985f6f2014-09-09 12:38:47 -0700616 out.insert(2, str(next_stash_id) + "\n")
617 out.insert(3, str(max_stashed_blocks) + "\n")
Tao Bao3a2e3502016-12-28 09:14:39 -0800618 elif self.version >= 3:
619 # v3+: the number of stash slots is unused.
620 out.insert(2, "0\n")
621 out.insert(3, str(max_stashed_blocks) + "\n")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700622
623 with open(prefix + ".transfer.list", "wb") as f:
624 for i in out:
625 f.write(i)
626
Doug Zongker62338182014-09-08 08:29:55 -0700627 if self.version >= 2:
Tao Baob4cfca52016-02-04 14:26:02 -0800628 self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize
Tao Bao575d68a2015-08-07 19:49:45 -0700629 OPTIONS = common.OPTIONS
630 if OPTIONS.cache_size is not None:
631 max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold
632 print("max stashed blocks: %d (%d bytes), "
633 "limit: %d bytes (%.2f%%)\n" % (
Tao Baob4cfca52016-02-04 14:26:02 -0800634 max_stashed_blocks, self._max_stashed_size, max_allowed,
635 self._max_stashed_size * 100.0 / max_allowed))
Tao Bao575d68a2015-08-07 19:49:45 -0700636 else:
637 print("max stashed blocks: %d (%d bytes), limit: <unknown>\n" % (
Tao Baob4cfca52016-02-04 14:26:02 -0800638 max_stashed_blocks, self._max_stashed_size))
Doug Zongker62338182014-09-08 08:29:55 -0700639
Tao Bao82c47982015-08-17 09:45:13 -0700640 def ReviseStashSize(self):
641 print("Revising stash size...")
Tao Baoe27acfd2016-12-16 11:13:55 -0800642 stash_map = {}
Tao Bao82c47982015-08-17 09:45:13 -0700643
644 # Create the map between a stash and its def/use points. For example, for a
Tao Bao3c5a16d2017-02-13 11:42:50 -0800645 # given stash of (raw_id, sr), stash_map[raw_id] = (sr, def_cmd, use_cmd).
Tao Bao82c47982015-08-17 09:45:13 -0700646 for xf in self.transfers:
647 # Command xf defines (stores) all the stashes in stash_before.
Tao Bao3a2e3502016-12-28 09:14:39 -0800648 for stash_raw_id, sr in xf.stash_before:
649 stash_map[stash_raw_id] = (sr, xf)
Tao Bao82c47982015-08-17 09:45:13 -0700650
651 # Record all the stashes command xf uses.
Tao Bao3a2e3502016-12-28 09:14:39 -0800652 for stash_raw_id, _ in xf.use_stash:
653 stash_map[stash_raw_id] += (xf,)
Tao Bao82c47982015-08-17 09:45:13 -0700654
655 # Compute the maximum blocks available for stash based on /cache size and
656 # the threshold.
657 cache_size = common.OPTIONS.cache_size
658 stash_threshold = common.OPTIONS.stash_threshold
659 max_allowed = cache_size * stash_threshold / self.tgt.blocksize
660
Tao Bao3a2e3502016-12-28 09:14:39 -0800661 # See the comments for 'stashes' in WriteTransfers().
Tao Baoe27acfd2016-12-16 11:13:55 -0800662 stashes = {}
Tao Bao82c47982015-08-17 09:45:13 -0700663 stashed_blocks = 0
Tao Bao9a5caf22015-08-25 15:10:10 -0700664 new_blocks = 0
Tao Bao82c47982015-08-17 09:45:13 -0700665
Tao Bao3a2e3502016-12-28 09:14:39 -0800666 if self.version == 2:
667 free_stash_ids = []
668 next_stash_id = 0
Tao Baoe27acfd2016-12-16 11:13:55 -0800669
Tao Bao82c47982015-08-17 09:45:13 -0700670 # Now go through all the commands. Compute the required stash size on the
671 # fly. If a command requires excess stash than available, it deletes the
672 # stash by replacing the command that uses the stash with a "new" command
673 # instead.
674 for xf in self.transfers:
675 replaced_cmds = []
676
677 # xf.stash_before generates explicit stash commands.
Tao Bao3a2e3502016-12-28 09:14:39 -0800678 for stash_raw_id, sr in xf.stash_before:
Tao Baoe27acfd2016-12-16 11:13:55 -0800679 # Check the post-command stashed_blocks.
680 stashed_blocks_after = stashed_blocks
681 if self.version == 2:
682 stashed_blocks_after += sr.size()
683 else:
Tao Bao183e56e2017-03-05 17:05:09 -0800684 sh = self.src.RangeSha1(sr)
Tao Bao3c5a16d2017-02-13 11:42:50 -0800685 if sh not in stashes:
Tao Baoe27acfd2016-12-16 11:13:55 -0800686 stashed_blocks_after += sr.size()
687
688 if stashed_blocks_after > max_allowed:
Tao Bao82c47982015-08-17 09:45:13 -0700689 # We cannot stash this one for a later command. Find out the command
690 # that will use this stash and replace the command with "new".
Tao Bao3a2e3502016-12-28 09:14:39 -0800691 use_cmd = stash_map[stash_raw_id][2]
Tao Bao82c47982015-08-17 09:45:13 -0700692 replaced_cmds.append(use_cmd)
Tao Bao9a5caf22015-08-25 15:10:10 -0700693 print("%10d %9s %s" % (sr.size(), "explicit", use_cmd))
Tao Bao82c47982015-08-17 09:45:13 -0700694 else:
Tao Bao3c5a16d2017-02-13 11:42:50 -0800695 # Update the stashes map.
696 if self.version == 2:
697 assert stash_raw_id not in stashes
698 if free_stash_ids:
699 sid = heapq.heappop(free_stash_ids)
700 else:
701 sid = next_stash_id
702 next_stash_id += 1
703 stashes[stash_raw_id] = sid
704 else:
705 if sh in stashes:
706 stashes[sh] += 1
707 else:
708 stashes[sh] = 1
Tao Baoe27acfd2016-12-16 11:13:55 -0800709 stashed_blocks = stashed_blocks_after
Tao Bao82c47982015-08-17 09:45:13 -0700710
711 # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to
712 # ComputePatches(), they both have the style of "diff".
713 if xf.style == "diff" and self.version >= 3:
714 assert xf.tgt_ranges and xf.src_ranges
715 if xf.src_ranges.overlaps(xf.tgt_ranges):
716 if stashed_blocks + xf.src_ranges.size() > max_allowed:
717 replaced_cmds.append(xf)
Tao Bao9a5caf22015-08-25 15:10:10 -0700718 print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf))
Tao Bao82c47982015-08-17 09:45:13 -0700719
720 # Replace the commands in replaced_cmds with "new"s.
721 for cmd in replaced_cmds:
722 # It no longer uses any commands in "use_stash". Remove the def points
723 # for all those stashes.
Tao Bao3a2e3502016-12-28 09:14:39 -0800724 for stash_raw_id, sr in cmd.use_stash:
725 def_cmd = stash_map[stash_raw_id][1]
726 assert (stash_raw_id, sr) in def_cmd.stash_before
727 def_cmd.stash_before.remove((stash_raw_id, sr))
Tao Bao82c47982015-08-17 09:45:13 -0700728
Tianjie Xuebe39a02016-01-14 14:12:26 -0800729 # Add up blocks that violates space limit and print total number to
730 # screen later.
731 new_blocks += cmd.tgt_ranges.size()
Tao Bao82c47982015-08-17 09:45:13 -0700732 cmd.ConvertToNew()
733
Tao Bao3a2e3502016-12-28 09:14:39 -0800734 # xf.use_stash may generate free commands.
735 for stash_raw_id, sr in xf.use_stash:
Tao Baoe27acfd2016-12-16 11:13:55 -0800736 if self.version == 2:
Tao Bao3a2e3502016-12-28 09:14:39 -0800737 sid = stashes.pop(stash_raw_id)
Tao Baoe27acfd2016-12-16 11:13:55 -0800738 stashed_blocks -= sr.size()
Tao Bao3a2e3502016-12-28 09:14:39 -0800739 heapq.heappush(free_stash_ids, sid)
Tao Baoe27acfd2016-12-16 11:13:55 -0800740 else:
Tao Bao183e56e2017-03-05 17:05:09 -0800741 sh = self.src.RangeSha1(sr)
Tao Baoe27acfd2016-12-16 11:13:55 -0800742 assert sh in stashes
743 stashes[sh] -= 1
744 if stashes[sh] == 0:
745 stashed_blocks -= sr.size()
746 stashes.pop(sh)
Tao Baoe27acfd2016-12-16 11:13:55 -0800747
Tianjie Xuebe39a02016-01-14 14:12:26 -0800748 num_of_bytes = new_blocks * self.tgt.blocksize
749 print(" Total %d blocks (%d bytes) are packed as new blocks due to "
750 "insufficient cache size." % (new_blocks, num_of_bytes))
Tao Bao304ee272016-12-19 11:01:38 -0800751 return new_blocks
Tao Bao9a5caf22015-08-25 15:10:10 -0700752
Doug Zongkerfc44a512014-08-26 13:10:25 -0700753 def ComputePatches(self, prefix):
754 print("Reticulating splines...")
Tao Bao183e56e2017-03-05 17:05:09 -0800755 diff_queue = []
Doug Zongkerfc44a512014-08-26 13:10:25 -0700756 patch_num = 0
757 with open(prefix + ".new.dat", "wb") as new_f:
Tao Bao183e56e2017-03-05 17:05:09 -0800758 for index, xf in enumerate(self.transfers):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700759 if xf.style == "zero":
Tao Bao08c85832016-09-19 22:26:30 -0700760 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
761 print("%10d %10d (%6.2f%%) %7s %s %s" % (
762 tgt_size, tgt_size, 100.0, xf.style, xf.tgt_name,
763 str(xf.tgt_ranges)))
764
Doug Zongkerfc44a512014-08-26 13:10:25 -0700765 elif xf.style == "new":
Tao Bao183e56e2017-03-05 17:05:09 -0800766 self.tgt.WriteRangeDataToFd(xf.tgt_ranges, new_f)
Tao Bao08c85832016-09-19 22:26:30 -0700767 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
768 print("%10d %10d (%6.2f%%) %7s %s %s" % (
769 tgt_size, tgt_size, 100.0, xf.style,
770 xf.tgt_name, str(xf.tgt_ranges)))
771
Doug Zongkerfc44a512014-08-26 13:10:25 -0700772 elif xf.style == "diff":
Doug Zongkerfc44a512014-08-26 13:10:25 -0700773 # We can't compare src and tgt directly because they may have
774 # the same content but be broken up into blocks differently, eg:
775 #
776 # ["he", "llo"] vs ["h", "ello"]
777 #
778 # We want those to compare equal, ideally without having to
779 # actually concatenate the strings (these may be tens of
780 # megabytes).
Tao Bao183e56e2017-03-05 17:05:09 -0800781 if xf.src_sha1 == xf.tgt_sha1:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700782 # These are identical; we don't need to generate a patch,
783 # just issue copy commands on the device.
784 xf.style = "move"
Tao Bao183e56e2017-03-05 17:05:09 -0800785 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
Tao Bao08c85832016-09-19 22:26:30 -0700786 if xf.src_ranges != xf.tgt_ranges:
787 print("%10d %10d (%6.2f%%) %7s %s %s (from %s)" % (
788 tgt_size, tgt_size, 100.0, xf.style,
789 xf.tgt_name if xf.tgt_name == xf.src_name else (
790 xf.tgt_name + " (from " + xf.src_name + ")"),
791 str(xf.tgt_ranges), str(xf.src_ranges)))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700792 else:
793 # For files in zip format (eg, APKs, JARs, etc.) we would
794 # like to use imgdiff -z if possible (because it usually
795 # produces significantly smaller patches than bsdiff).
796 # This is permissible if:
797 #
Tao Bao293fd132016-06-11 12:19:23 -0700798 # - imgdiff is not disabled, and
Doug Zongkerfc44a512014-08-26 13:10:25 -0700799 # - the source and target files are monotonic (ie, the
800 # data is stored with blocks in increasing order), and
801 # - we haven't removed any blocks from the source set.
802 #
803 # If these conditions are satisfied then appending all the
804 # blocks in the set together in order will produce a valid
805 # zip file (plus possibly extra zeros in the last block),
806 # which is what imgdiff needs to operate. (imgdiff is
807 # fine with extra zeros at the end of the file.)
Tao Bao293fd132016-06-11 12:19:23 -0700808 imgdiff = (not self.disable_imgdiff and xf.intact and
Doug Zongkerfc44a512014-08-26 13:10:25 -0700809 xf.tgt_name.split(".")[-1].lower()
810 in ("apk", "jar", "zip"))
811 xf.style = "imgdiff" if imgdiff else "bsdiff"
Tao Bao183e56e2017-03-05 17:05:09 -0800812 diff_queue.append((index, imgdiff, patch_num))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700813 patch_num += 1
814
815 else:
816 assert False, "unknown style " + xf.style
817
Tao Bao183e56e2017-03-05 17:05:09 -0800818 if diff_queue:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700819 if self.threads > 1:
820 print("Computing patches (using %d threads)..." % (self.threads,))
821 else:
822 print("Computing patches...")
Doug Zongkerfc44a512014-08-26 13:10:25 -0700823
Tao Bao183e56e2017-03-05 17:05:09 -0800824 diff_total = len(diff_queue)
825 patches = [None] * diff_total
Doug Zongkerfc44a512014-08-26 13:10:25 -0700826
Tao Bao183e56e2017-03-05 17:05:09 -0800827 # Using multiprocessing doesn't give additional benefits, due to the
828 # pattern of the code. The diffing work is done by subprocess.call, which
829 # already runs in a separate process (not affected much by the GIL -
830 # Global Interpreter Lock). Using multiprocess also requires either a)
831 # writing the diff input files in the main process before forking, or b)
832 # reopening the image file (SparseImage) in the worker processes. Doing
833 # neither of them further improves the performance.
Doug Zongkerfc44a512014-08-26 13:10:25 -0700834 lock = threading.Lock()
835 def diff_worker():
836 while True:
837 with lock:
Tao Bao183e56e2017-03-05 17:05:09 -0800838 if not diff_queue:
Dan Albert8b72aef2015-03-23 19:13:21 -0700839 return
Tao Bao183e56e2017-03-05 17:05:09 -0800840 xf_index, imgdiff, patch_index = diff_queue.pop()
841
842 xf = self.transfers[xf_index]
843 src_ranges = xf.src_ranges
844 tgt_ranges = xf.tgt_ranges
845
846 # Needs lock since WriteRangeDataToFd() is stateful (calling seek).
Doug Zongkerfc44a512014-08-26 13:10:25 -0700847 with lock:
Tao Bao183e56e2017-03-05 17:05:09 -0800848 src_file = common.MakeTempFile(prefix="src-")
849 with open(src_file, "wb") as fd:
850 self.src.WriteRangeDataToFd(src_ranges, fd)
851
852 tgt_file = common.MakeTempFile(prefix="tgt-")
853 with open(tgt_file, "wb") as fd:
854 self.tgt.WriteRangeDataToFd(tgt_ranges, fd)
855
856 try:
857 patch = compute_patch(src_file, tgt_file, imgdiff)
858 except ValueError as e:
859 raise ValueError(
860 "Failed to generate diff for %s: src=%s, tgt=%s: %s" % (
861 xf.tgt_name, xf.src_ranges, xf.tgt_ranges, e.message))
862
863 with lock:
864 patches[patch_index] = (xf_index, patch)
865 if sys.stdout.isatty():
866 progress = len(patches) * 100 / diff_total
867 # '\033[K' is to clear to EOL.
868 print(' [%d%%] %s\033[K' % (progress, xf.tgt_name), end='\r')
869 sys.stdout.flush()
Doug Zongkerfc44a512014-08-26 13:10:25 -0700870
871 threads = [threading.Thread(target=diff_worker)
Dan Albert8b72aef2015-03-23 19:13:21 -0700872 for _ in range(self.threads)]
Doug Zongkerfc44a512014-08-26 13:10:25 -0700873 for th in threads:
874 th.start()
875 while threads:
876 threads.pop().join()
Tao Bao183e56e2017-03-05 17:05:09 -0800877
878 if sys.stdout.isatty():
879 print('\n')
Doug Zongkerfc44a512014-08-26 13:10:25 -0700880 else:
881 patches = []
882
Tao Bao183e56e2017-03-05 17:05:09 -0800883 offset = 0
884 with open(prefix + ".patch.dat", "wb") as patch_fd:
885 for index, patch in patches:
886 xf = self.transfers[index]
Doug Zongkerfc44a512014-08-26 13:10:25 -0700887 xf.patch_len = len(patch)
Tao Bao183e56e2017-03-05 17:05:09 -0800888 xf.patch_start = offset
889 offset += xf.patch_len
890 patch_fd.write(patch)
891
892 if common.OPTIONS.verbose:
893 tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize
894 print("%10d %10d (%6.2f%%) %7s %s %s %s" % (
895 xf.patch_len, tgt_size, xf.patch_len * 100.0 / tgt_size,
896 xf.style,
897 xf.tgt_name if xf.tgt_name == xf.src_name else (
898 xf.tgt_name + " (from " + xf.src_name + ")"),
899 xf.tgt_ranges, xf.src_ranges))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700900
901 def AssertSequenceGood(self):
902 # Simulate the sequences of transfers we will output, and check that:
903 # - we never read a block after writing it, and
904 # - we write every block we care about exactly once.
905
906 # Start with no blocks having been touched yet.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800907 touched = array.array("B", "\0" * self.tgt.total_blocks)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700908
909 # Imagine processing the transfers in order.
910 for xf in self.transfers:
911 # Check that the input blocks for this transfer haven't yet been touched.
Doug Zongker62338182014-09-08 08:29:55 -0700912
913 x = xf.src_ranges
914 if self.version >= 2:
915 for _, sr in xf.use_stash:
916 x = x.subtract(sr)
917
Doug Zongker6ab2a502016-02-09 08:28:09 -0800918 for s, e in x:
Tao Baoff75c232016-03-04 15:23:34 -0800919 # Source image could be larger. Don't check the blocks that are in the
920 # source image only. Since they are not in 'touched', and won't ever
921 # be touched.
922 for i in range(s, min(e, self.tgt.total_blocks)):
Doug Zongker6ab2a502016-02-09 08:28:09 -0800923 assert touched[i] == 0
924
925 # Check that the output blocks for this transfer haven't yet
926 # been touched, and touch all the blocks written by this
927 # transfer.
928 for s, e in xf.tgt_ranges:
929 for i in range(s, e):
930 assert touched[i] == 0
931 touched[i] = 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700932
933 # Check that we've written every target block.
Doug Zongker6ab2a502016-02-09 08:28:09 -0800934 for s, e in self.tgt.care_map:
935 for i in range(s, e):
936 assert touched[i] == 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700937
Doug Zongker62338182014-09-08 08:29:55 -0700938 def ImproveVertexSequence(self):
939 print("Improving vertex order...")
940
941 # At this point our digraph is acyclic; we reversed any edges that
942 # were backwards in the heuristically-generated sequence. The
943 # previously-generated order is still acceptable, but we hope to
944 # find a better order that needs less memory for stashed data.
945 # Now we do a topological sort to generate a new vertex order,
946 # using a greedy algorithm to choose which vertex goes next
947 # whenever we have a choice.
948
949 # Make a copy of the edge set; this copy will get destroyed by the
950 # algorithm.
951 for xf in self.transfers:
952 xf.incoming = xf.goes_after.copy()
953 xf.outgoing = xf.goes_before.copy()
954
955 L = [] # the new vertex order
956
957 # S is the set of sources in the remaining graph; we always choose
958 # the one that leaves the least amount of stashed data after it's
959 # executed.
960 S = [(u.NetStashChange(), u.order, u) for u in self.transfers
961 if not u.incoming]
962 heapq.heapify(S)
963
964 while S:
965 _, _, xf = heapq.heappop(S)
966 L.append(xf)
967 for u in xf.outgoing:
968 del u.incoming[xf]
969 if not u.incoming:
970 heapq.heappush(S, (u.NetStashChange(), u.order, u))
971
972 # if this fails then our graph had a cycle.
973 assert len(L) == len(self.transfers)
974
975 self.transfers = L
976 for i, xf in enumerate(L):
977 xf.order = i
978
Doug Zongkerfc44a512014-08-26 13:10:25 -0700979 def RemoveBackwardEdges(self):
980 print("Removing backward edges...")
981 in_order = 0
982 out_of_order = 0
983 lost_source = 0
984
985 for xf in self.transfers:
Doug Zongkerfc44a512014-08-26 13:10:25 -0700986 lost = 0
987 size = xf.src_ranges.size()
988 for u in xf.goes_before:
989 # xf should go before u
990 if xf.order < u.order:
991 # it does, hurray!
Doug Zongker62338182014-09-08 08:29:55 -0700992 in_order += 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700993 else:
994 # it doesn't, boo. trim the blocks that u writes from xf's
995 # source, so that xf can go after u.
Doug Zongker62338182014-09-08 08:29:55 -0700996 out_of_order += 1
Doug Zongkerfc44a512014-08-26 13:10:25 -0700997 assert xf.src_ranges.overlaps(u.tgt_ranges)
998 xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges)
999 xf.intact = False
1000
1001 if xf.style == "diff" and not xf.src_ranges:
1002 # nothing left to diff from; treat as new data
1003 xf.style = "new"
1004
1005 lost = size - xf.src_ranges.size()
1006 lost_source += lost
Doug Zongkerfc44a512014-08-26 13:10:25 -07001007
1008 print((" %d/%d dependencies (%.2f%%) were violated; "
1009 "%d source blocks removed.") %
1010 (out_of_order, in_order + out_of_order,
1011 (out_of_order * 100.0 / (in_order + out_of_order))
1012 if (in_order + out_of_order) else 0.0,
1013 lost_source))
1014
Doug Zongker62338182014-09-08 08:29:55 -07001015 def ReverseBackwardEdges(self):
Tao Bao3a2e3502016-12-28 09:14:39 -08001016 """Reverse unsatisfying edges and compute pairs of stashed blocks.
1017
1018 For each transfer, make sure it properly stashes the blocks it touches and
1019 will be used by later transfers. It uses pairs of (stash_raw_id, range) to
1020 record the blocks to be stashed. 'stash_raw_id' is an id that uniquely
1021 identifies each pair. Note that for the same range (e.g. RangeSet("1-5")),
1022 it is possible to have multiple pairs with different 'stash_raw_id's. Each
1023 'stash_raw_id' will be consumed by one transfer. In BBOTA v3+, identical
1024 blocks will be written to the same stash slot in WriteTransfers().
1025 """
1026
Doug Zongker62338182014-09-08 08:29:55 -07001027 print("Reversing backward edges...")
1028 in_order = 0
1029 out_of_order = 0
Tao Bao3a2e3502016-12-28 09:14:39 -08001030 stash_raw_id = 0
Doug Zongker62338182014-09-08 08:29:55 -07001031 stash_size = 0
1032
1033 for xf in self.transfers:
Doug Zongker62338182014-09-08 08:29:55 -07001034 for u in xf.goes_before.copy():
1035 # xf should go before u
1036 if xf.order < u.order:
1037 # it does, hurray!
1038 in_order += 1
1039 else:
1040 # it doesn't, boo. modify u to stash the blocks that it
1041 # writes that xf wants to read, and then require u to go
1042 # before xf.
1043 out_of_order += 1
1044
1045 overlap = xf.src_ranges.intersect(u.tgt_ranges)
1046 assert overlap
1047
Tao Bao3a2e3502016-12-28 09:14:39 -08001048 u.stash_before.append((stash_raw_id, overlap))
1049 xf.use_stash.append((stash_raw_id, overlap))
1050 stash_raw_id += 1
Doug Zongker62338182014-09-08 08:29:55 -07001051 stash_size += overlap.size()
1052
1053 # reverse the edge direction; now xf must go after u
1054 del xf.goes_before[u]
1055 del u.goes_after[xf]
1056 xf.goes_after[u] = None # value doesn't matter
1057 u.goes_before[xf] = None
1058
1059 print((" %d/%d dependencies (%.2f%%) were violated; "
1060 "%d source blocks stashed.") %
1061 (out_of_order, in_order + out_of_order,
1062 (out_of_order * 100.0 / (in_order + out_of_order))
1063 if (in_order + out_of_order) else 0.0,
1064 stash_size))
1065
Doug Zongkerfc44a512014-08-26 13:10:25 -07001066 def FindVertexSequence(self):
1067 print("Finding vertex sequence...")
1068
1069 # This is based on "A Fast & Effective Heuristic for the Feedback
1070 # Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of
1071 # it as starting with the digraph G and moving all the vertices to
1072 # be on a horizontal line in some order, trying to minimize the
1073 # number of edges that end up pointing to the left. Left-pointing
1074 # edges will get removed to turn the digraph into a DAG. In this
1075 # case each edge has a weight which is the number of source blocks
1076 # we'll lose if that edge is removed; we try to minimize the total
1077 # weight rather than just the number of edges.
1078
1079 # Make a copy of the edge set; this copy will get destroyed by the
1080 # algorithm.
1081 for xf in self.transfers:
1082 xf.incoming = xf.goes_after.copy()
1083 xf.outgoing = xf.goes_before.copy()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001084 xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())
Doug Zongkerfc44a512014-08-26 13:10:25 -07001085
1086 # We use an OrderedDict instead of just a set so that the output
1087 # is repeatable; otherwise it would depend on the hash values of
1088 # the transfer objects.
1089 G = OrderedDict()
1090 for xf in self.transfers:
1091 G[xf] = None
1092 s1 = deque() # the left side of the sequence, built from left to right
1093 s2 = deque() # the right side of the sequence, built from right to left
1094
Doug Zongker6ab2a502016-02-09 08:28:09 -08001095 heap = []
1096 for xf in self.transfers:
1097 xf.heap_item = HeapItem(xf)
1098 heap.append(xf.heap_item)
1099 heapq.heapify(heap)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001100
Tao Bao33482282016-10-24 16:49:08 -07001101 # Use OrderedDict() instead of set() to preserve the insertion order. Need
1102 # to use 'sinks[key] = None' to add key into the set. sinks will look like
1103 # { key1: None, key2: None, ... }.
1104 sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing)
1105 sources = OrderedDict.fromkeys(u for u in G if not u.incoming)
Doug Zongker6ab2a502016-02-09 08:28:09 -08001106
1107 def adjust_score(iu, delta):
1108 iu.score += delta
1109 iu.heap_item.clear()
1110 iu.heap_item = HeapItem(iu)
1111 heapq.heappush(heap, iu.heap_item)
1112
1113 while G:
Doug Zongkerfc44a512014-08-26 13:10:25 -07001114 # Put all sinks at the end of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001115 while sinks:
Tao Bao33482282016-10-24 16:49:08 -07001116 new_sinks = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001117 for u in sinks:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001118 if u not in G: continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001119 s2.appendleft(u)
1120 del G[u]
1121 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001122 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001123 if not iu.outgoing:
1124 new_sinks[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001125 sinks = new_sinks
Doug Zongkerfc44a512014-08-26 13:10:25 -07001126
1127 # Put all the sources at the beginning of the sequence.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001128 while sources:
Tao Bao33482282016-10-24 16:49:08 -07001129 new_sources = OrderedDict()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001130 for u in sources:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001131 if u not in G: continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001132 s1.append(u)
1133 del G[u]
1134 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001135 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001136 if not iu.incoming:
1137 new_sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001138 sources = new_sources
Doug Zongkerfc44a512014-08-26 13:10:25 -07001139
Doug Zongker6ab2a502016-02-09 08:28:09 -08001140 if not G: break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001141
1142 # Find the "best" vertex to put next. "Best" is the one that
1143 # maximizes the net difference in source blocks saved we get by
1144 # pretending it's a source rather than a sink.
1145
Doug Zongker6ab2a502016-02-09 08:28:09 -08001146 while True:
1147 u = heapq.heappop(heap)
1148 if u and u.item in G:
1149 u = u.item
1150 break
Doug Zongkerfc44a512014-08-26 13:10:25 -07001151
Doug Zongkerfc44a512014-08-26 13:10:25 -07001152 s1.append(u)
1153 del G[u]
1154 for iu in u.outgoing:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001155 adjust_score(iu, +iu.incoming.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001156 if not iu.incoming:
1157 sources[iu] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001158
Doug Zongkerfc44a512014-08-26 13:10:25 -07001159 for iu in u.incoming:
Doug Zongker6ab2a502016-02-09 08:28:09 -08001160 adjust_score(iu, -iu.outgoing.pop(u))
Tao Bao33482282016-10-24 16:49:08 -07001161 if not iu.outgoing:
1162 sinks[iu] = None
Doug Zongkerfc44a512014-08-26 13:10:25 -07001163
1164 # Now record the sequence in the 'order' field of each transfer,
1165 # and by rearranging self.transfers to be in the chosen sequence.
1166
1167 new_transfers = []
1168 for x in itertools.chain(s1, s2):
1169 x.order = len(new_transfers)
1170 new_transfers.append(x)
1171 del x.incoming
1172 del x.outgoing
1173
1174 self.transfers = new_transfers
1175
1176 def GenerateDigraph(self):
1177 print("Generating digraph...")
Doug Zongker6ab2a502016-02-09 08:28:09 -08001178
1179 # Each item of source_ranges will be:
1180 # - None, if that block is not used as a source,
Tao Bao33482282016-10-24 16:49:08 -07001181 # - an ordered set of transfers.
Doug Zongker6ab2a502016-02-09 08:28:09 -08001182 source_ranges = []
1183 for b in self.transfers:
1184 for s, e in b.src_ranges:
1185 if e > len(source_ranges):
1186 source_ranges.extend([None] * (e-len(source_ranges)))
1187 for i in range(s, e):
1188 if source_ranges[i] is None:
Tao Bao33482282016-10-24 16:49:08 -07001189 source_ranges[i] = OrderedDict.fromkeys([b])
Doug Zongker6ab2a502016-02-09 08:28:09 -08001190 else:
Tao Bao33482282016-10-24 16:49:08 -07001191 source_ranges[i][b] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001192
Doug Zongkerfc44a512014-08-26 13:10:25 -07001193 for a in self.transfers:
Tao Bao33482282016-10-24 16:49:08 -07001194 intersections = OrderedDict()
Doug Zongker6ab2a502016-02-09 08:28:09 -08001195 for s, e in a.tgt_ranges:
1196 for i in range(s, e):
1197 if i >= len(source_ranges): break
Tao Bao33482282016-10-24 16:49:08 -07001198 # Add all the Transfers in source_ranges[i] to the (ordered) set.
1199 if source_ranges[i] is not None:
1200 for j in source_ranges[i]:
1201 intersections[j] = None
Doug Zongker6ab2a502016-02-09 08:28:09 -08001202
1203 for b in intersections:
1204 if a is b: continue
Doug Zongkerfc44a512014-08-26 13:10:25 -07001205
1206 # If the blocks written by A are read by B, then B needs to go before A.
1207 i = a.tgt_ranges.intersect(b.src_ranges)
1208 if i:
Doug Zongkerab7ca1d2014-08-26 10:40:28 -07001209 if b.src_name == "__ZERO":
1210 # the cost of removing source blocks for the __ZERO domain
1211 # is (nearly) zero.
1212 size = 0
1213 else:
1214 size = i.size()
Doug Zongkerfc44a512014-08-26 13:10:25 -07001215 b.goes_before[a] = size
1216 a.goes_after[b] = size
1217
1218 def FindTransfers(self):
Tao Bao9a5caf22015-08-25 15:10:10 -07001219 """Parse the file_map to generate all the transfers."""
1220
Tao Bao08c85832016-09-19 22:26:30 -07001221 def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges,
1222 style, by_id):
1223 """Add one or multiple Transfer()s by splitting large files.
Tao Bao9a5caf22015-08-25 15:10:10 -07001224
1225 For BBOTA v3, we need to stash source blocks for resumable feature.
1226 However, with the growth of file size and the shrink of the cache
1227 partition source blocks are too large to be stashed. If a file occupies
Tao Bao08c85832016-09-19 22:26:30 -07001228 too many blocks, we split it into smaller pieces by getting multiple
1229 Transfer()s.
Tao Bao9a5caf22015-08-25 15:10:10 -07001230
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001231 The downside is that after splitting, we may increase the package size
1232 since the split pieces don't align well. According to our experiments,
1233 1/8 of the cache size as the per-piece limit appears to be optimal.
1234 Compared to the fixed 1024-block limit, it reduces the overall package
Tao Bao08c85832016-09-19 22:26:30 -07001235 size by 30% for volantis, and 20% for angler and bullhead."""
Tao Bao9a5caf22015-08-25 15:10:10 -07001236
Tao Bao08c85832016-09-19 22:26:30 -07001237 # Possibly split large files into smaller chunks.
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001238 pieces = 0
1239 cache_size = common.OPTIONS.cache_size
1240 split_threshold = 0.125
1241 max_blocks_per_transfer = int(cache_size * split_threshold /
1242 self.tgt.blocksize)
1243
Tao Bao9a5caf22015-08-25 15:10:10 -07001244 # Change nothing for small files.
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001245 if (tgt_ranges.size() <= max_blocks_per_transfer and
1246 src_ranges.size() <= max_blocks_per_transfer):
Tao Bao183e56e2017-03-05 17:05:09 -08001247 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1248 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1249 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001250 return
1251
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001252 while (tgt_ranges.size() > max_blocks_per_transfer and
1253 src_ranges.size() > max_blocks_per_transfer):
Tao Bao9a5caf22015-08-25 15:10:10 -07001254 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1255 src_split_name = "%s-%d" % (src_name, pieces)
Tianjie Xubb86e1d2016-01-13 16:14:10 -08001256 tgt_first = tgt_ranges.first(max_blocks_per_transfer)
1257 src_first = src_ranges.first(max_blocks_per_transfer)
1258
Tao Bao183e56e2017-03-05 17:05:09 -08001259 Transfer(tgt_split_name, src_split_name, tgt_first, src_first,
1260 self.tgt.RangeSha1(tgt_first), self.src.RangeSha1(src_first),
1261 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001262
1263 tgt_ranges = tgt_ranges.subtract(tgt_first)
1264 src_ranges = src_ranges.subtract(src_first)
1265 pieces += 1
1266
1267 # Handle remaining blocks.
1268 if tgt_ranges.size() or src_ranges.size():
1269 # Must be both non-empty.
1270 assert tgt_ranges.size() and src_ranges.size()
1271 tgt_split_name = "%s-%d" % (tgt_name, pieces)
1272 src_split_name = "%s-%d" % (src_name, pieces)
Tao Bao183e56e2017-03-05 17:05:09 -08001273 Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges,
1274 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1275 style, by_id)
Tao Bao9a5caf22015-08-25 15:10:10 -07001276
Tao Bao08c85832016-09-19 22:26:30 -07001277 def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,
1278 split=False):
1279 """Wrapper function for adding a Transfer()."""
1280
1281 # We specialize diff transfers only (which covers bsdiff/imgdiff/move);
1282 # otherwise add the Transfer() as is.
1283 if style != "diff" or not split:
Tao Bao183e56e2017-03-05 17:05:09 -08001284 Transfer(tgt_name, src_name, tgt_ranges, src_ranges,
1285 self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),
1286 style, by_id)
Tao Bao08c85832016-09-19 22:26:30 -07001287 return
1288
1289 # Handle .odex files specially to analyze the block-wise difference. If
1290 # most of the blocks are identical with only few changes (e.g. header),
1291 # we will patch the changed blocks only. This avoids stashing unchanged
1292 # blocks while patching. We limit the analysis to files without size
1293 # changes only. This is to avoid sacrificing the OTA generation cost too
1294 # much.
1295 if (tgt_name.split(".")[-1].lower() == 'odex' and
1296 tgt_ranges.size() == src_ranges.size()):
1297
1298 # 0.5 threshold can be further tuned. The tradeoff is: if only very
1299 # few blocks remain identical, we lose the opportunity to use imgdiff
1300 # that may have better compression ratio than bsdiff.
1301 crop_threshold = 0.5
1302
1303 tgt_skipped = RangeSet()
1304 src_skipped = RangeSet()
1305 tgt_size = tgt_ranges.size()
1306 tgt_changed = 0
1307 for src_block, tgt_block in zip(src_ranges.next_item(),
1308 tgt_ranges.next_item()):
1309 src_rs = RangeSet(str(src_block))
1310 tgt_rs = RangeSet(str(tgt_block))
1311 if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):
1312 tgt_skipped = tgt_skipped.union(tgt_rs)
1313 src_skipped = src_skipped.union(src_rs)
1314 else:
1315 tgt_changed += tgt_rs.size()
1316
1317 # Terminate early if no clear sign of benefits.
1318 if tgt_changed > tgt_size * crop_threshold:
1319 break
1320
1321 if tgt_changed < tgt_size * crop_threshold:
1322 assert tgt_changed + tgt_skipped.size() == tgt_size
1323 print('%10d %10d (%6.2f%%) %s' % (tgt_skipped.size(), tgt_size,
1324 tgt_skipped.size() * 100.0 / tgt_size, tgt_name))
1325 AddSplitTransfers(
1326 "%s-skipped" % (tgt_name,),
1327 "%s-skipped" % (src_name,),
1328 tgt_skipped, src_skipped, style, by_id)
1329
1330 # Intentionally change the file extension to avoid being imgdiff'd as
1331 # the files are no longer in their original format.
1332 tgt_name = "%s-cropped" % (tgt_name,)
1333 src_name = "%s-cropped" % (src_name,)
1334 tgt_ranges = tgt_ranges.subtract(tgt_skipped)
1335 src_ranges = src_ranges.subtract(src_skipped)
1336
1337 # Possibly having no changed blocks.
1338 if not tgt_ranges:
1339 return
1340
1341 # Add the transfer(s).
1342 AddSplitTransfers(
1343 tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)
1344
1345 print("Finding transfers...")
1346
Doug Zongkerfc44a512014-08-26 13:10:25 -07001347 empty = RangeSet()
1348 for tgt_fn, tgt_ranges in self.tgt.file_map.items():
1349 if tgt_fn == "__ZERO":
1350 # the special "__ZERO" domain is all the blocks not contained
1351 # in any file and that are filled with zeros. We have a
1352 # special transfer style for zero blocks.
1353 src_ranges = self.src.file_map.get("__ZERO", empty)
Tao Bao9a5caf22015-08-25 15:10:10 -07001354 AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,
1355 "zero", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001356 continue
1357
Tao Baoff777812015-05-12 11:42:31 -07001358 elif tgt_fn == "__COPY":
1359 # "__COPY" domain includes all the blocks not contained in any
1360 # file and that need to be copied unconditionally to the target.
Tao Bao9a5caf22015-08-25 15:10:10 -07001361 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Tao Baoff777812015-05-12 11:42:31 -07001362 continue
1363
Doug Zongkerfc44a512014-08-26 13:10:25 -07001364 elif tgt_fn in self.src.file_map:
1365 # Look for an exact pathname match in the source.
Tao Bao9a5caf22015-08-25 15:10:10 -07001366 AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],
1367 "diff", self.transfers, self.version >= 3)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001368 continue
1369
1370 b = os.path.basename(tgt_fn)
1371 if b in self.src_basenames:
1372 # Look for an exact basename match in the source.
1373 src_fn = self.src_basenames[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001374 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
1375 "diff", self.transfers, self.version >= 3)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001376 continue
1377
1378 b = re.sub("[0-9]+", "#", b)
1379 if b in self.src_numpatterns:
1380 # Look for a 'number pattern' match (a basename match after
1381 # all runs of digits are replaced by "#"). (This is useful
1382 # for .so files that contain version numbers in the filename
1383 # that get bumped.)
1384 src_fn = self.src_numpatterns[b]
Tao Bao9a5caf22015-08-25 15:10:10 -07001385 AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],
1386 "diff", self.transfers, self.version >= 3)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001387 continue
1388
Tao Bao9a5caf22015-08-25 15:10:10 -07001389 AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)
Doug Zongkerfc44a512014-08-26 13:10:25 -07001390
1391 def AbbreviateSourceNames(self):
Doug Zongkerfc44a512014-08-26 13:10:25 -07001392 for k in self.src.file_map.keys():
1393 b = os.path.basename(k)
1394 self.src_basenames[b] = k
1395 b = re.sub("[0-9]+", "#", b)
1396 self.src_numpatterns[b] = k
1397
1398 @staticmethod
1399 def AssertPartition(total, seq):
1400 """Assert that all the RangeSets in 'seq' form a partition of the
1401 'total' RangeSet (ie, they are nonintersecting and their union
1402 equals 'total')."""
Doug Zongker6ab2a502016-02-09 08:28:09 -08001403
Doug Zongkerfc44a512014-08-26 13:10:25 -07001404 so_far = RangeSet()
1405 for i in seq:
1406 assert not so_far.overlaps(i)
1407 so_far = so_far.union(i)
1408 assert so_far == total