| Doug Zongker | 424296a | 2014-09-02 08:53:09 -0700 | [diff] [blame] | 1 | # Copyright (C) 2014 The Android Open Source Project | 
 | 2 | # | 
 | 3 | # Licensed under the Apache License, Version 2.0 (the "License"); | 
 | 4 | # you may not use this file except in compliance with the License. | 
 | 5 | # You may obtain a copy of the License at | 
 | 6 | # | 
 | 7 | #      http://www.apache.org/licenses/LICENSE-2.0 | 
 | 8 | # | 
 | 9 | # Unless required by applicable law or agreed to in writing, software | 
 | 10 | # distributed under the License is distributed on an "AS IS" BASIS, | 
 | 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
 | 12 | # See the License for the specific language governing permissions and | 
 | 13 | # limitations under the License. | 
 | 14 |  | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 15 | from __future__ import print_function | 
 | 16 |  | 
 | 17 | from collections import deque, OrderedDict | 
 | 18 | from hashlib import sha1 | 
| Doug Zongker | 6ab2a50 | 2016-02-09 08:28:09 -0800 | [diff] [blame] | 19 | import array | 
| Tao Bao | 8dcf738 | 2015-05-21 14:09:49 -0700 | [diff] [blame] | 20 | import common | 
| Doug Zongker | 6ab2a50 | 2016-02-09 08:28:09 -0800 | [diff] [blame] | 21 | import functools | 
| Doug Zongker | 6233818 | 2014-09-08 08:29:55 -0700 | [diff] [blame] | 22 | import heapq | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 23 | import itertools | 
 | 24 | import multiprocessing | 
 | 25 | import os | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 26 | import re | 
 | 27 | import subprocess | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 28 | import threading | 
| Doug Zongker | 6ab2a50 | 2016-02-09 08:28:09 -0800 | [diff] [blame] | 29 | import time | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 30 | import tempfile | 
 | 31 |  | 
| Dan Albert | 8b72aef | 2015-03-23 19:13:21 -0700 | [diff] [blame] | 32 | from rangelib import RangeSet | 
 | 33 |  | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 34 |  | 
| Doug Zongker | ab7ca1d | 2014-08-26 10:40:28 -0700 | [diff] [blame] | 35 | __all__ = ["EmptyImage", "DataImage", "BlockImageDiff"] | 
 | 36 |  | 
| Dan Albert | 8b72aef | 2015-03-23 19:13:21 -0700 | [diff] [blame] | 37 |  | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 38 | def compute_patch(src, tgt, imgdiff=False): | 
 | 39 |   srcfd, srcfile = tempfile.mkstemp(prefix="src-") | 
 | 40 |   tgtfd, tgtfile = tempfile.mkstemp(prefix="tgt-") | 
 | 41 |   patchfd, patchfile = tempfile.mkstemp(prefix="patch-") | 
 | 42 |   os.close(patchfd) | 
 | 43 |  | 
 | 44 |   try: | 
 | 45 |     with os.fdopen(srcfd, "wb") as f_src: | 
 | 46 |       for p in src: | 
 | 47 |         f_src.write(p) | 
 | 48 |  | 
 | 49 |     with os.fdopen(tgtfd, "wb") as f_tgt: | 
 | 50 |       for p in tgt: | 
 | 51 |         f_tgt.write(p) | 
 | 52 |     try: | 
 | 53 |       os.unlink(patchfile) | 
 | 54 |     except OSError: | 
 | 55 |       pass | 
 | 56 |     if imgdiff: | 
 | 57 |       p = subprocess.call(["imgdiff", "-z", srcfile, tgtfile, patchfile], | 
 | 58 |                           stdout=open("/dev/null", "a"), | 
 | 59 |                           stderr=subprocess.STDOUT) | 
 | 60 |     else: | 
 | 61 |       p = subprocess.call(["bsdiff", srcfile, tgtfile, patchfile]) | 
 | 62 |  | 
 | 63 |     if p: | 
 | 64 |       raise ValueError("diff failed: " + str(p)) | 
 | 65 |  | 
 | 66 |     with open(patchfile, "rb") as f: | 
 | 67 |       return f.read() | 
 | 68 |   finally: | 
 | 69 |     try: | 
 | 70 |       os.unlink(srcfile) | 
 | 71 |       os.unlink(tgtfile) | 
 | 72 |       os.unlink(patchfile) | 
 | 73 |     except OSError: | 
 | 74 |       pass | 
 | 75 |  | 
| Dan Albert | 8b72aef | 2015-03-23 19:13:21 -0700 | [diff] [blame] | 76 |  | 
 | 77 | class Image(object): | 
 | 78 |   def ReadRangeSet(self, ranges): | 
 | 79 |     raise NotImplementedError | 
 | 80 |  | 
| Tao Bao | 68658c0 | 2015-06-01 13:40:49 -0700 | [diff] [blame] | 81 |   def TotalSha1(self, include_clobbered_blocks=False): | 
| Dan Albert | 8b72aef | 2015-03-23 19:13:21 -0700 | [diff] [blame] | 82 |     raise NotImplementedError | 
 | 83 |  | 
 | 84 |  | 
 | 85 | class EmptyImage(Image): | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 86 |   """A zero-length image.""" | 
 | 87 |   blocksize = 4096 | 
 | 88 |   care_map = RangeSet() | 
| Tao Bao | ff77781 | 2015-05-12 11:42:31 -0700 | [diff] [blame] | 89 |   clobbered_blocks = RangeSet() | 
| Tao Bao | e9b6191 | 2015-07-09 17:37:49 -0700 | [diff] [blame] | 90 |   extended = RangeSet() | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 91 |   total_blocks = 0 | 
 | 92 |   file_map = {} | 
 | 93 |   def ReadRangeSet(self, ranges): | 
 | 94 |     return () | 
| Tao Bao | 68658c0 | 2015-06-01 13:40:49 -0700 | [diff] [blame] | 95 |   def TotalSha1(self, include_clobbered_blocks=False): | 
 | 96 |     # EmptyImage always carries empty clobbered_blocks, so | 
 | 97 |     # include_clobbered_blocks can be ignored. | 
 | 98 |     assert self.clobbered_blocks.size() == 0 | 
| Doug Zongker | ab7ca1d | 2014-08-26 10:40:28 -0700 | [diff] [blame] | 99 |     return sha1().hexdigest() | 
 | 100 |  | 
 | 101 |  | 
| Dan Albert | 8b72aef | 2015-03-23 19:13:21 -0700 | [diff] [blame] | 102 | class DataImage(Image): | 
| Doug Zongker | ab7ca1d | 2014-08-26 10:40:28 -0700 | [diff] [blame] | 103 |   """An image wrapped around a single string of data.""" | 
 | 104 |  | 
 | 105 |   def __init__(self, data, trim=False, pad=False): | 
 | 106 |     self.data = data | 
 | 107 |     self.blocksize = 4096 | 
 | 108 |  | 
 | 109 |     assert not (trim and pad) | 
 | 110 |  | 
 | 111 |     partial = len(self.data) % self.blocksize | 
| Tao Bao | 7589e96 | 2015-09-05 20:35:32 -0700 | [diff] [blame] | 112 |     padded = False | 
| Doug Zongker | ab7ca1d | 2014-08-26 10:40:28 -0700 | [diff] [blame] | 113 |     if partial > 0: | 
 | 114 |       if trim: | 
 | 115 |         self.data = self.data[:-partial] | 
 | 116 |       elif pad: | 
 | 117 |         self.data += '\0' * (self.blocksize - partial) | 
| Tao Bao | 7589e96 | 2015-09-05 20:35:32 -0700 | [diff] [blame] | 118 |         padded = True | 
| Doug Zongker | ab7ca1d | 2014-08-26 10:40:28 -0700 | [diff] [blame] | 119 |       else: | 
 | 120 |         raise ValueError(("data for DataImage must be multiple of %d bytes " | 
 | 121 |                           "unless trim or pad is specified") % | 
 | 122 |                          (self.blocksize,)) | 
 | 123 |  | 
 | 124 |     assert len(self.data) % self.blocksize == 0 | 
 | 125 |  | 
 | 126 |     self.total_blocks = len(self.data) / self.blocksize | 
 | 127 |     self.care_map = RangeSet(data=(0, self.total_blocks)) | 
| Tao Bao | 7589e96 | 2015-09-05 20:35:32 -0700 | [diff] [blame] | 128 |     # When the last block is padded, we always write the whole block even for | 
 | 129 |     # incremental OTAs. Because otherwise the last block may get skipped if | 
 | 130 |     # unchanged for an incremental, but would fail the post-install | 
 | 131 |     # verification if it has non-zero contents in the padding bytes. | 
 | 132 |     # Bug: 23828506 | 
 | 133 |     if padded: | 
| Tao Bao | 42206c3 | 2015-09-08 13:39:40 -0700 | [diff] [blame] | 134 |       clobbered_blocks = [self.total_blocks-1, self.total_blocks] | 
| Tao Bao | 7589e96 | 2015-09-05 20:35:32 -0700 | [diff] [blame] | 135 |     else: | 
| Tao Bao | 42206c3 | 2015-09-08 13:39:40 -0700 | [diff] [blame] | 136 |       clobbered_blocks = [] | 
 | 137 |     self.clobbered_blocks = clobbered_blocks | 
| Tao Bao | e9b6191 | 2015-07-09 17:37:49 -0700 | [diff] [blame] | 138 |     self.extended = RangeSet() | 
| Doug Zongker | ab7ca1d | 2014-08-26 10:40:28 -0700 | [diff] [blame] | 139 |  | 
 | 140 |     zero_blocks = [] | 
 | 141 |     nonzero_blocks = [] | 
 | 142 |     reference = '\0' * self.blocksize | 
 | 143 |  | 
| Tao Bao | 7589e96 | 2015-09-05 20:35:32 -0700 | [diff] [blame] | 144 |     for i in range(self.total_blocks-1 if padded else self.total_blocks): | 
| Doug Zongker | ab7ca1d | 2014-08-26 10:40:28 -0700 | [diff] [blame] | 145 |       d = self.data[i*self.blocksize : (i+1)*self.blocksize] | 
 | 146 |       if d == reference: | 
 | 147 |         zero_blocks.append(i) | 
 | 148 |         zero_blocks.append(i+1) | 
 | 149 |       else: | 
 | 150 |         nonzero_blocks.append(i) | 
 | 151 |         nonzero_blocks.append(i+1) | 
 | 152 |  | 
| Tao Bao | 42206c3 | 2015-09-08 13:39:40 -0700 | [diff] [blame] | 153 |     assert zero_blocks or nonzero_blocks or clobbered_blocks | 
| Doug Zongker | ab7ca1d | 2014-08-26 10:40:28 -0700 | [diff] [blame] | 154 |  | 
| Tao Bao | 42206c3 | 2015-09-08 13:39:40 -0700 | [diff] [blame] | 155 |     self.file_map = dict() | 
 | 156 |     if zero_blocks: | 
 | 157 |       self.file_map["__ZERO"] = RangeSet(data=zero_blocks) | 
 | 158 |     if nonzero_blocks: | 
 | 159 |       self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks) | 
 | 160 |     if clobbered_blocks: | 
 | 161 |       self.file_map["__COPY"] = RangeSet(data=clobbered_blocks) | 
| Tao Bao | 7589e96 | 2015-09-05 20:35:32 -0700 | [diff] [blame] | 162 |  | 
| Doug Zongker | ab7ca1d | 2014-08-26 10:40:28 -0700 | [diff] [blame] | 163 |   def ReadRangeSet(self, ranges): | 
 | 164 |     return [self.data[s*self.blocksize:e*self.blocksize] for (s, e) in ranges] | 
 | 165 |  | 
| Tao Bao | 68658c0 | 2015-06-01 13:40:49 -0700 | [diff] [blame] | 166 |   def TotalSha1(self, include_clobbered_blocks=False): | 
| Tao Bao | 7589e96 | 2015-09-05 20:35:32 -0700 | [diff] [blame] | 167 |     if not include_clobbered_blocks: | 
 | 168 |       ranges = self.care_map.subtract(self.clobbered_blocks) | 
 | 169 |       return sha1(self.ReadRangeSet(ranges)).hexdigest() | 
 | 170 |     else: | 
 | 171 |       return sha1(self.data).hexdigest() | 
| Doug Zongker | ab7ca1d | 2014-08-26 10:40:28 -0700 | [diff] [blame] | 172 |  | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 173 |  | 
 | 174 | class Transfer(object): | 
 | 175 |   def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, style, by_id): | 
 | 176 |     self.tgt_name = tgt_name | 
 | 177 |     self.src_name = src_name | 
 | 178 |     self.tgt_ranges = tgt_ranges | 
 | 179 |     self.src_ranges = src_ranges | 
 | 180 |     self.style = style | 
 | 181 |     self.intact = (getattr(tgt_ranges, "monotonic", False) and | 
 | 182 |                    getattr(src_ranges, "monotonic", False)) | 
| Tao Bao | b8c8717 | 2015-03-19 19:42:12 -0700 | [diff] [blame] | 183 |  | 
 | 184 |     # We use OrderedDict rather than dict so that the output is repeatable; | 
 | 185 |     # otherwise it would depend on the hash values of the Transfer objects. | 
 | 186 |     self.goes_before = OrderedDict() | 
 | 187 |     self.goes_after = OrderedDict() | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 188 |  | 
| Doug Zongker | 6233818 | 2014-09-08 08:29:55 -0700 | [diff] [blame] | 189 |     self.stash_before = [] | 
 | 190 |     self.use_stash = [] | 
 | 191 |  | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 192 |     self.id = len(by_id) | 
 | 193 |     by_id.append(self) | 
 | 194 |  | 
| Doug Zongker | 6233818 | 2014-09-08 08:29:55 -0700 | [diff] [blame] | 195 |   def NetStashChange(self): | 
 | 196 |     return (sum(sr.size() for (_, sr) in self.stash_before) - | 
 | 197 |             sum(sr.size() for (_, sr) in self.use_stash)) | 
 | 198 |  | 
| Tao Bao | 82c4798 | 2015-08-17 09:45:13 -0700 | [diff] [blame] | 199 |   def ConvertToNew(self): | 
 | 200 |     assert self.style != "new" | 
 | 201 |     self.use_stash = [] | 
 | 202 |     self.style = "new" | 
 | 203 |     self.src_ranges = RangeSet() | 
 | 204 |  | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 205 |   def __str__(self): | 
 | 206 |     return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style + | 
 | 207 |             " to " + str(self.tgt_ranges) + ">") | 
 | 208 |  | 
 | 209 |  | 
| Doug Zongker | 6ab2a50 | 2016-02-09 08:28:09 -0800 | [diff] [blame] | 210 | @functools.total_ordering | 
 | 211 | class HeapItem(object): | 
 | 212 |   def __init__(self, item): | 
 | 213 |     self.item = item | 
 | 214 |     # Negate the score since python's heap is a min-heap and we want | 
 | 215 |     # the maximum score. | 
 | 216 |     self.score = -item.score | 
 | 217 |   def clear(self): | 
 | 218 |     self.item = None | 
 | 219 |   def __bool__(self): | 
 | 220 |     return self.item is None | 
 | 221 |   def __eq__(self, other): | 
 | 222 |     return self.score == other.score | 
 | 223 |   def __le__(self, other): | 
 | 224 |     return self.score <= other.score | 
 | 225 |  | 
 | 226 |  | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 227 | # BlockImageDiff works on two image objects.  An image object is | 
 | 228 | # anything that provides the following attributes: | 
 | 229 | # | 
 | 230 | #    blocksize: the size in bytes of a block, currently must be 4096. | 
 | 231 | # | 
 | 232 | #    total_blocks: the total size of the partition/image, in blocks. | 
 | 233 | # | 
 | 234 | #    care_map: a RangeSet containing which blocks (in the range [0, | 
 | 235 | #      total_blocks) we actually care about; i.e. which blocks contain | 
 | 236 | #      data. | 
 | 237 | # | 
 | 238 | #    file_map: a dict that partitions the blocks contained in care_map | 
 | 239 | #      into smaller domains that are useful for doing diffs on. | 
 | 240 | #      (Typically a domain is a file, and the key in file_map is the | 
 | 241 | #      pathname.) | 
 | 242 | # | 
| Tao Bao | ff77781 | 2015-05-12 11:42:31 -0700 | [diff] [blame] | 243 | #    clobbered_blocks: a RangeSet containing which blocks contain data | 
 | 244 | #      but may be altered by the FS. They need to be excluded when | 
 | 245 | #      verifying the partition integrity. | 
 | 246 | # | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 247 | #    ReadRangeSet(): a function that takes a RangeSet and returns the | 
 | 248 | #      data contained in the image blocks of that RangeSet.  The data | 
 | 249 | #      is returned as a list or tuple of strings; concatenating the | 
 | 250 | #      elements together should produce the requested data. | 
 | 251 | #      Implementations are free to break up the data into list/tuple | 
 | 252 | #      elements in any way that is convenient. | 
 | 253 | # | 
| Doug Zongker | ab7ca1d | 2014-08-26 10:40:28 -0700 | [diff] [blame] | 254 | #    TotalSha1(): a function that returns (as a hex string) the SHA-1 | 
 | 255 | #      hash of all the data in the image (ie, all the blocks in the | 
| Tao Bao | 68658c0 | 2015-06-01 13:40:49 -0700 | [diff] [blame] | 256 | #      care_map minus clobbered_blocks, or including the clobbered | 
 | 257 | #      blocks if include_clobbered_blocks is True). | 
| Doug Zongker | ab7ca1d | 2014-08-26 10:40:28 -0700 | [diff] [blame] | 258 | # | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 259 | # When creating a BlockImageDiff, the src image may be None, in which | 
 | 260 | # case the list of transfers produced will never read from the | 
 | 261 | # original image. | 
 | 262 |  | 
 | 263 | class BlockImageDiff(object): | 
| Tao Bao | 293fd13 | 2016-06-11 12:19:23 -0700 | [diff] [blame] | 264 |   def __init__(self, tgt, src=None, threads=None, version=4, | 
 | 265 |                disable_imgdiff=False): | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 266 |     if threads is None: | 
 | 267 |       threads = multiprocessing.cpu_count() // 2 | 
| Dan Albert | 8b72aef | 2015-03-23 19:13:21 -0700 | [diff] [blame] | 268 |       if threads == 0: | 
 | 269 |         threads = 1 | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 270 |     self.threads = threads | 
| Doug Zongker | 6233818 | 2014-09-08 08:29:55 -0700 | [diff] [blame] | 271 |     self.version = version | 
| Dan Albert | 8b72aef | 2015-03-23 19:13:21 -0700 | [diff] [blame] | 272 |     self.transfers = [] | 
 | 273 |     self.src_basenames = {} | 
 | 274 |     self.src_numpatterns = {} | 
| Tao Bao | b4cfca5 | 2016-02-04 14:26:02 -0800 | [diff] [blame] | 275 |     self._max_stashed_size = 0 | 
| Tao Bao | d522bdc | 2016-04-12 15:53:16 -0700 | [diff] [blame] | 276 |     self.touched_src_ranges = RangeSet() | 
 | 277 |     self.touched_src_sha1 = None | 
| Tao Bao | 293fd13 | 2016-06-11 12:19:23 -0700 | [diff] [blame] | 278 |     self.disable_imgdiff = disable_imgdiff | 
| Doug Zongker | 6233818 | 2014-09-08 08:29:55 -0700 | [diff] [blame] | 279 |  | 
| Tao Bao | eba409c | 2015-10-21 13:30:43 -0700 | [diff] [blame] | 280 |     assert version in (1, 2, 3, 4) | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 281 |  | 
 | 282 |     self.tgt = tgt | 
 | 283 |     if src is None: | 
 | 284 |       src = EmptyImage() | 
 | 285 |     self.src = src | 
 | 286 |  | 
 | 287 |     # The updater code that installs the patch always uses 4k blocks. | 
 | 288 |     assert tgt.blocksize == 4096 | 
 | 289 |     assert src.blocksize == 4096 | 
 | 290 |  | 
 | 291 |     # The range sets in each filemap should comprise a partition of | 
 | 292 |     # the care map. | 
 | 293 |     self.AssertPartition(src.care_map, src.file_map.values()) | 
 | 294 |     self.AssertPartition(tgt.care_map, tgt.file_map.values()) | 
 | 295 |  | 
| Tao Bao | b4cfca5 | 2016-02-04 14:26:02 -0800 | [diff] [blame] | 296 |   @property | 
 | 297 |   def max_stashed_size(self): | 
 | 298 |     return self._max_stashed_size | 
 | 299 |  | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 300 |   def Compute(self, prefix): | 
 | 301 |     # When looking for a source file to use as the diff input for a | 
 | 302 |     # target file, we try: | 
 | 303 |     #   1) an exact path match if available, otherwise | 
 | 304 |     #   2) a exact basename match if available, otherwise | 
 | 305 |     #   3) a basename match after all runs of digits are replaced by | 
 | 306 |     #      "#" if available, otherwise | 
 | 307 |     #   4) we have no source for this target. | 
 | 308 |     self.AbbreviateSourceNames() | 
 | 309 |     self.FindTransfers() | 
 | 310 |  | 
 | 311 |     # Find the ordering dependencies among transfers (this is O(n^2) | 
 | 312 |     # in the number of transfers). | 
 | 313 |     self.GenerateDigraph() | 
 | 314 |     # Find a sequence of transfers that satisfies as many ordering | 
 | 315 |     # dependencies as possible (heuristically). | 
 | 316 |     self.FindVertexSequence() | 
 | 317 |     # Fix up the ordering dependencies that the sequence didn't | 
 | 318 |     # satisfy. | 
| Doug Zongker | 6233818 | 2014-09-08 08:29:55 -0700 | [diff] [blame] | 319 |     if self.version == 1: | 
 | 320 |       self.RemoveBackwardEdges() | 
 | 321 |     else: | 
 | 322 |       self.ReverseBackwardEdges() | 
 | 323 |       self.ImproveVertexSequence() | 
 | 324 |  | 
| Tao Bao | 82c4798 | 2015-08-17 09:45:13 -0700 | [diff] [blame] | 325 |     # Ensure the runtime stash size is under the limit. | 
 | 326 |     if self.version >= 2 and common.OPTIONS.cache_size is not None: | 
 | 327 |       self.ReviseStashSize() | 
 | 328 |  | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 329 |     # Double-check our work. | 
 | 330 |     self.AssertSequenceGood() | 
 | 331 |  | 
 | 332 |     self.ComputePatches(prefix) | 
 | 333 |     self.WriteTransfers(prefix) | 
 | 334 |  | 
| Dan Albert | 8b72aef | 2015-03-23 19:13:21 -0700 | [diff] [blame] | 335 |   def HashBlocks(self, source, ranges): # pylint: disable=no-self-use | 
| Sami Tolvanen | dd67a29 | 2014-12-09 16:40:34 +0000 | [diff] [blame] | 336 |     data = source.ReadRangeSet(ranges) | 
 | 337 |     ctx = sha1() | 
 | 338 |  | 
 | 339 |     for p in data: | 
 | 340 |       ctx.update(p) | 
 | 341 |  | 
 | 342 |     return ctx.hexdigest() | 
 | 343 |  | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 344 |   def WriteTransfers(self, prefix): | 
| Tianjie Xu | 37e2930 | 2016-06-23 16:10:35 -0700 | [diff] [blame] | 345 |     def WriteSplitTransfers(out, style, target_blocks): | 
 | 346 |       """Limit the size of operand in command 'new' and 'zero' to 1024 blocks. | 
| Tianjie Xu | bb848c5 | 2016-06-21 15:54:09 -0700 | [diff] [blame] | 347 |  | 
 | 348 |       This prevents the target size of one command from being too large; and | 
 | 349 |       might help to avoid fsync errors on some devices.""" | 
 | 350 |  | 
| Tianjie Xu | 37e2930 | 2016-06-23 16:10:35 -0700 | [diff] [blame] | 351 |       assert (style == "new" or style == "zero") | 
 | 352 |       blocks_limit = 1024 | 
| Tianjie Xu | bb848c5 | 2016-06-21 15:54:09 -0700 | [diff] [blame] | 353 |       total = 0 | 
| Tianjie Xu | 37e2930 | 2016-06-23 16:10:35 -0700 | [diff] [blame] | 354 |       while target_blocks: | 
 | 355 |         blocks_to_write = target_blocks.first(blocks_limit) | 
 | 356 |         out.append("%s %s\n" % (style, blocks_to_write.to_string_raw())) | 
 | 357 |         total += blocks_to_write.size() | 
 | 358 |         target_blocks = target_blocks.subtract(blocks_to_write) | 
| Tianjie Xu | bb848c5 | 2016-06-21 15:54:09 -0700 | [diff] [blame] | 359 |       return total | 
 | 360 |  | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 361 |     out = [] | 
 | 362 |  | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 363 |     total = 0 | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 364 |  | 
| Doug Zongker | 6233818 | 2014-09-08 08:29:55 -0700 | [diff] [blame] | 365 |     stashes = {} | 
 | 366 |     stashed_blocks = 0 | 
 | 367 |     max_stashed_blocks = 0 | 
 | 368 |  | 
 | 369 |     free_stash_ids = [] | 
 | 370 |     next_stash_id = 0 | 
 | 371 |  | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 372 |     for xf in self.transfers: | 
 | 373 |  | 
| Doug Zongker | 6233818 | 2014-09-08 08:29:55 -0700 | [diff] [blame] | 374 |       if self.version < 2: | 
 | 375 |         assert not xf.stash_before | 
 | 376 |         assert not xf.use_stash | 
 | 377 |  | 
 | 378 |       for s, sr in xf.stash_before: | 
 | 379 |         assert s not in stashes | 
 | 380 |         if free_stash_ids: | 
 | 381 |           sid = heapq.heappop(free_stash_ids) | 
 | 382 |         else: | 
 | 383 |           sid = next_stash_id | 
 | 384 |           next_stash_id += 1 | 
 | 385 |         stashes[s] = sid | 
| Sami Tolvanen | dd67a29 | 2014-12-09 16:40:34 +0000 | [diff] [blame] | 386 |         if self.version == 2: | 
| caozhiyuan | 21b37d8 | 2015-10-21 15:14:03 +0800 | [diff] [blame] | 387 |           stashed_blocks += sr.size() | 
| Sami Tolvanen | dd67a29 | 2014-12-09 16:40:34 +0000 | [diff] [blame] | 388 |           out.append("stash %d %s\n" % (sid, sr.to_string_raw())) | 
 | 389 |         else: | 
 | 390 |           sh = self.HashBlocks(self.src, sr) | 
 | 391 |           if sh in stashes: | 
 | 392 |             stashes[sh] += 1 | 
 | 393 |           else: | 
 | 394 |             stashes[sh] = 1 | 
| caozhiyuan | 21b37d8 | 2015-10-21 15:14:03 +0800 | [diff] [blame] | 395 |             stashed_blocks += sr.size() | 
| Tao Bao | d522bdc | 2016-04-12 15:53:16 -0700 | [diff] [blame] | 396 |             self.touched_src_ranges = self.touched_src_ranges.union(sr) | 
| Sami Tolvanen | dd67a29 | 2014-12-09 16:40:34 +0000 | [diff] [blame] | 397 |             out.append("stash %s %s\n" % (sh, sr.to_string_raw())) | 
| Doug Zongker | 6233818 | 2014-09-08 08:29:55 -0700 | [diff] [blame] | 398 |  | 
 | 399 |       if stashed_blocks > max_stashed_blocks: | 
 | 400 |         max_stashed_blocks = stashed_blocks | 
 | 401 |  | 
| Jesse Zhao | 7b985f6 | 2015-03-02 16:53:08 -0800 | [diff] [blame] | 402 |       free_string = [] | 
| caozhiyuan | 21b37d8 | 2015-10-21 15:14:03 +0800 | [diff] [blame] | 403 |       free_size = 0 | 
| Jesse Zhao | 7b985f6 | 2015-03-02 16:53:08 -0800 | [diff] [blame] | 404 |  | 
| Doug Zongker | 6233818 | 2014-09-08 08:29:55 -0700 | [diff] [blame] | 405 |       if self.version == 1: | 
| Tao Bao | 4fcb77e | 2015-10-21 13:36:01 -0700 | [diff] [blame] | 406 |         src_str = xf.src_ranges.to_string_raw() if xf.src_ranges else "" | 
| Sami Tolvanen | dd67a29 | 2014-12-09 16:40:34 +0000 | [diff] [blame] | 407 |       elif self.version >= 2: | 
| Doug Zongker | 6233818 | 2014-09-08 08:29:55 -0700 | [diff] [blame] | 408 |  | 
 | 409 |         #   <# blocks> <src ranges> | 
 | 410 |         #     OR | 
 | 411 |         #   <# blocks> <src ranges> <src locs> <stash refs...> | 
 | 412 |         #     OR | 
 | 413 |         #   <# blocks> - <stash refs...> | 
 | 414 |  | 
 | 415 |         size = xf.src_ranges.size() | 
| Dan Albert | 8b72aef | 2015-03-23 19:13:21 -0700 | [diff] [blame] | 416 |         src_str = [str(size)] | 
| Doug Zongker | 6233818 | 2014-09-08 08:29:55 -0700 | [diff] [blame] | 417 |  | 
 | 418 |         unstashed_src_ranges = xf.src_ranges | 
 | 419 |         mapped_stashes = [] | 
 | 420 |         for s, sr in xf.use_stash: | 
 | 421 |           sid = stashes.pop(s) | 
| Doug Zongker | 6233818 | 2014-09-08 08:29:55 -0700 | [diff] [blame] | 422 |           unstashed_src_ranges = unstashed_src_ranges.subtract(sr) | 
| Sami Tolvanen | dd67a29 | 2014-12-09 16:40:34 +0000 | [diff] [blame] | 423 |           sh = self.HashBlocks(self.src, sr) | 
| Doug Zongker | 6233818 | 2014-09-08 08:29:55 -0700 | [diff] [blame] | 424 |           sr = xf.src_ranges.map_within(sr) | 
 | 425 |           mapped_stashes.append(sr) | 
| Sami Tolvanen | dd67a29 | 2014-12-09 16:40:34 +0000 | [diff] [blame] | 426 |           if self.version == 2: | 
| Dan Albert | 8b72aef | 2015-03-23 19:13:21 -0700 | [diff] [blame] | 427 |             src_str.append("%d:%s" % (sid, sr.to_string_raw())) | 
| Tao Bao | bb625d2 | 2015-08-13 14:44:15 -0700 | [diff] [blame] | 428 |             # A stash will be used only once. We need to free the stash | 
 | 429 |             # immediately after the use, instead of waiting for the automatic | 
 | 430 |             # clean-up at the end. Because otherwise it may take up extra space | 
 | 431 |             # and lead to OTA failures. | 
 | 432 |             # Bug: 23119955 | 
 | 433 |             free_string.append("free %d\n" % (sid,)) | 
| caozhiyuan | 21b37d8 | 2015-10-21 15:14:03 +0800 | [diff] [blame] | 434 |             free_size += sr.size() | 
| Sami Tolvanen | dd67a29 | 2014-12-09 16:40:34 +0000 | [diff] [blame] | 435 |           else: | 
 | 436 |             assert sh in stashes | 
| Dan Albert | 8b72aef | 2015-03-23 19:13:21 -0700 | [diff] [blame] | 437 |             src_str.append("%s:%s" % (sh, sr.to_string_raw())) | 
| Sami Tolvanen | dd67a29 | 2014-12-09 16:40:34 +0000 | [diff] [blame] | 438 |             stashes[sh] -= 1 | 
 | 439 |             if stashes[sh] == 0: | 
| caozhiyuan | 21b37d8 | 2015-10-21 15:14:03 +0800 | [diff] [blame] | 440 |               free_size += sr.size() | 
| Sami Tolvanen | dd67a29 | 2014-12-09 16:40:34 +0000 | [diff] [blame] | 441 |               free_string.append("free %s\n" % (sh)) | 
 | 442 |               stashes.pop(sh) | 
| Doug Zongker | 6233818 | 2014-09-08 08:29:55 -0700 | [diff] [blame] | 443 |           heapq.heappush(free_stash_ids, sid) | 
 | 444 |  | 
 | 445 |         if unstashed_src_ranges: | 
| Dan Albert | 8b72aef | 2015-03-23 19:13:21 -0700 | [diff] [blame] | 446 |           src_str.insert(1, unstashed_src_ranges.to_string_raw()) | 
| Doug Zongker | 6233818 | 2014-09-08 08:29:55 -0700 | [diff] [blame] | 447 |           if xf.use_stash: | 
 | 448 |             mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges) | 
| Dan Albert | 8b72aef | 2015-03-23 19:13:21 -0700 | [diff] [blame] | 449 |             src_str.insert(2, mapped_unstashed.to_string_raw()) | 
| Doug Zongker | 6233818 | 2014-09-08 08:29:55 -0700 | [diff] [blame] | 450 |             mapped_stashes.append(mapped_unstashed) | 
 | 451 |             self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes) | 
 | 452 |         else: | 
| Dan Albert | 8b72aef | 2015-03-23 19:13:21 -0700 | [diff] [blame] | 453 |           src_str.insert(1, "-") | 
| Doug Zongker | 6233818 | 2014-09-08 08:29:55 -0700 | [diff] [blame] | 454 |           self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes) | 
 | 455 |  | 
| Dan Albert | 8b72aef | 2015-03-23 19:13:21 -0700 | [diff] [blame] | 456 |         src_str = " ".join(src_str) | 
| Doug Zongker | 6233818 | 2014-09-08 08:29:55 -0700 | [diff] [blame] | 457 |  | 
| Sami Tolvanen | dd67a29 | 2014-12-09 16:40:34 +0000 | [diff] [blame] | 458 |       # all versions: | 
| Doug Zongker | 6233818 | 2014-09-08 08:29:55 -0700 | [diff] [blame] | 459 |       #   zero <rangeset> | 
 | 460 |       #   new <rangeset> | 
 | 461 |       #   erase <rangeset> | 
 | 462 |       # | 
 | 463 |       # version 1: | 
 | 464 |       #   bsdiff patchstart patchlen <src rangeset> <tgt rangeset> | 
 | 465 |       #   imgdiff patchstart patchlen <src rangeset> <tgt rangeset> | 
 | 466 |       #   move <src rangeset> <tgt rangeset> | 
 | 467 |       # | 
 | 468 |       # version 2: | 
| Dan Albert | 8b72aef | 2015-03-23 19:13:21 -0700 | [diff] [blame] | 469 |       #   bsdiff patchstart patchlen <tgt rangeset> <src_str> | 
 | 470 |       #   imgdiff patchstart patchlen <tgt rangeset> <src_str> | 
 | 471 |       #   move <tgt rangeset> <src_str> | 
| Sami Tolvanen | dd67a29 | 2014-12-09 16:40:34 +0000 | [diff] [blame] | 472 |       # | 
 | 473 |       # version 3: | 
| Dan Albert | 8b72aef | 2015-03-23 19:13:21 -0700 | [diff] [blame] | 474 |       #   bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str> | 
 | 475 |       #   imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str> | 
 | 476 |       #   move hash <tgt rangeset> <src_str> | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 477 |  | 
 | 478 |       tgt_size = xf.tgt_ranges.size() | 
 | 479 |  | 
 | 480 |       if xf.style == "new": | 
 | 481 |         assert xf.tgt_ranges | 
| Tianjie Xu | 37e2930 | 2016-06-23 16:10:35 -0700 | [diff] [blame] | 482 |         assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges) | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 483 |         total += tgt_size | 
 | 484 |       elif xf.style == "move": | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 485 |         assert xf.tgt_ranges | 
 | 486 |         assert xf.src_ranges.size() == tgt_size | 
 | 487 |         if xf.src_ranges != xf.tgt_ranges: | 
| Doug Zongker | 6233818 | 2014-09-08 08:29:55 -0700 | [diff] [blame] | 488 |           if self.version == 1: | 
 | 489 |             out.append("%s %s %s\n" % ( | 
 | 490 |                 xf.style, | 
 | 491 |                 xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw())) | 
 | 492 |           elif self.version == 2: | 
 | 493 |             out.append("%s %s %s\n" % ( | 
 | 494 |                 xf.style, | 
| Dan Albert | 8b72aef | 2015-03-23 19:13:21 -0700 | [diff] [blame] | 495 |                 xf.tgt_ranges.to_string_raw(), src_str)) | 
| Sami Tolvanen | dd67a29 | 2014-12-09 16:40:34 +0000 | [diff] [blame] | 496 |           elif self.version >= 3: | 
| Sami Tolvanen | 29f529f | 2015-04-17 16:28:08 +0100 | [diff] [blame] | 497 |             # take into account automatic stashing of overlapping blocks | 
 | 498 |             if xf.src_ranges.overlaps(xf.tgt_ranges): | 
| Tao Bao | e9b6191 | 2015-07-09 17:37:49 -0700 | [diff] [blame] | 499 |               temp_stash_usage = stashed_blocks + xf.src_ranges.size() | 
| Sami Tolvanen | 29f529f | 2015-04-17 16:28:08 +0100 | [diff] [blame] | 500 |               if temp_stash_usage > max_stashed_blocks: | 
 | 501 |                 max_stashed_blocks = temp_stash_usage | 
 | 502 |  | 
| Tao Bao | d522bdc | 2016-04-12 15:53:16 -0700 | [diff] [blame] | 503 |             self.touched_src_ranges = self.touched_src_ranges.union( | 
 | 504 |                 xf.src_ranges) | 
 | 505 |  | 
| Sami Tolvanen | dd67a29 | 2014-12-09 16:40:34 +0000 | [diff] [blame] | 506 |             out.append("%s %s %s %s\n" % ( | 
 | 507 |                 xf.style, | 
 | 508 |                 self.HashBlocks(self.tgt, xf.tgt_ranges), | 
| Dan Albert | 8b72aef | 2015-03-23 19:13:21 -0700 | [diff] [blame] | 509 |                 xf.tgt_ranges.to_string_raw(), src_str)) | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 510 |           total += tgt_size | 
 | 511 |       elif xf.style in ("bsdiff", "imgdiff"): | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 512 |         assert xf.tgt_ranges | 
 | 513 |         assert xf.src_ranges | 
| Doug Zongker | 6233818 | 2014-09-08 08:29:55 -0700 | [diff] [blame] | 514 |         if self.version == 1: | 
 | 515 |           out.append("%s %d %d %s %s\n" % ( | 
 | 516 |               xf.style, xf.patch_start, xf.patch_len, | 
 | 517 |               xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw())) | 
 | 518 |         elif self.version == 2: | 
 | 519 |           out.append("%s %d %d %s %s\n" % ( | 
 | 520 |               xf.style, xf.patch_start, xf.patch_len, | 
| Dan Albert | 8b72aef | 2015-03-23 19:13:21 -0700 | [diff] [blame] | 521 |               xf.tgt_ranges.to_string_raw(), src_str)) | 
| Sami Tolvanen | dd67a29 | 2014-12-09 16:40:34 +0000 | [diff] [blame] | 522 |         elif self.version >= 3: | 
| Sami Tolvanen | 29f529f | 2015-04-17 16:28:08 +0100 | [diff] [blame] | 523 |           # take into account automatic stashing of overlapping blocks | 
 | 524 |           if xf.src_ranges.overlaps(xf.tgt_ranges): | 
| Tao Bao | e9b6191 | 2015-07-09 17:37:49 -0700 | [diff] [blame] | 525 |             temp_stash_usage = stashed_blocks + xf.src_ranges.size() | 
| Sami Tolvanen | 29f529f | 2015-04-17 16:28:08 +0100 | [diff] [blame] | 526 |             if temp_stash_usage > max_stashed_blocks: | 
 | 527 |               max_stashed_blocks = temp_stash_usage | 
 | 528 |  | 
| Tao Bao | d522bdc | 2016-04-12 15:53:16 -0700 | [diff] [blame] | 529 |           self.touched_src_ranges = self.touched_src_ranges.union( | 
 | 530 |               xf.src_ranges) | 
 | 531 |  | 
| Sami Tolvanen | dd67a29 | 2014-12-09 16:40:34 +0000 | [diff] [blame] | 532 |           out.append("%s %d %d %s %s %s %s\n" % ( | 
 | 533 |               xf.style, | 
 | 534 |               xf.patch_start, xf.patch_len, | 
 | 535 |               self.HashBlocks(self.src, xf.src_ranges), | 
 | 536 |               self.HashBlocks(self.tgt, xf.tgt_ranges), | 
| Dan Albert | 8b72aef | 2015-03-23 19:13:21 -0700 | [diff] [blame] | 537 |               xf.tgt_ranges.to_string_raw(), src_str)) | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 538 |         total += tgt_size | 
 | 539 |       elif xf.style == "zero": | 
 | 540 |         assert xf.tgt_ranges | 
 | 541 |         to_zero = xf.tgt_ranges.subtract(xf.src_ranges) | 
| Tianjie Xu | 37e2930 | 2016-06-23 16:10:35 -0700 | [diff] [blame] | 542 |         assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size() | 
| Tianjie Xu | bb848c5 | 2016-06-21 15:54:09 -0700 | [diff] [blame] | 543 |         total += to_zero.size() | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 544 |       else: | 
| Dan Albert | 8b72aef | 2015-03-23 19:13:21 -0700 | [diff] [blame] | 545 |         raise ValueError("unknown transfer style '%s'\n" % xf.style) | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 546 |  | 
| Sami Tolvanen | dd67a29 | 2014-12-09 16:40:34 +0000 | [diff] [blame] | 547 |       if free_string: | 
 | 548 |         out.append("".join(free_string)) | 
| caozhiyuan | 21b37d8 | 2015-10-21 15:14:03 +0800 | [diff] [blame] | 549 |         stashed_blocks -= free_size | 
| Sami Tolvanen | dd67a29 | 2014-12-09 16:40:34 +0000 | [diff] [blame] | 550 |  | 
| Tao Bao | 575d68a | 2015-08-07 19:49:45 -0700 | [diff] [blame] | 551 |       if self.version >= 2 and common.OPTIONS.cache_size is not None: | 
| Tao Bao | 8dcf738 | 2015-05-21 14:09:49 -0700 | [diff] [blame] | 552 |         # Sanity check: abort if we're going to need more stash space than | 
 | 553 |         # the allowed size (cache_size * threshold). There are two purposes | 
 | 554 |         # of having a threshold here. a) Part of the cache may have been | 
 | 555 |         # occupied by some recovery logs. b) It will buy us some time to deal | 
 | 556 |         # with the oversize issue. | 
 | 557 |         cache_size = common.OPTIONS.cache_size | 
 | 558 |         stash_threshold = common.OPTIONS.stash_threshold | 
 | 559 |         max_allowed = cache_size * stash_threshold | 
 | 560 |         assert max_stashed_blocks * self.tgt.blocksize < max_allowed, \ | 
 | 561 |                'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % ( | 
 | 562 |                    max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks, | 
 | 563 |                    self.tgt.blocksize, max_allowed, cache_size, | 
 | 564 |                    stash_threshold) | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 565 |  | 
| Tao Bao | d522bdc | 2016-04-12 15:53:16 -0700 | [diff] [blame] | 566 |     if self.version >= 3: | 
 | 567 |       self.touched_src_sha1 = self.HashBlocks( | 
 | 568 |           self.src, self.touched_src_ranges) | 
 | 569 |  | 
| Tao Bao | e9b6191 | 2015-07-09 17:37:49 -0700 | [diff] [blame] | 570 |     # Zero out extended blocks as a workaround for bug 20881595. | 
 | 571 |     if self.tgt.extended: | 
| Tianjie Xu | 37e2930 | 2016-06-23 16:10:35 -0700 | [diff] [blame] | 572 |       assert (WriteSplitTransfers(out, "zero", self.tgt.extended) == | 
| Tianjie Xu | bb848c5 | 2016-06-21 15:54:09 -0700 | [diff] [blame] | 573 |               self.tgt.extended.size()) | 
| Tao Bao | b32d56e | 2015-09-09 11:55:01 -0700 | [diff] [blame] | 574 |       total += self.tgt.extended.size() | 
| Tao Bao | e9b6191 | 2015-07-09 17:37:49 -0700 | [diff] [blame] | 575 |  | 
 | 576 |     # We erase all the blocks on the partition that a) don't contain useful | 
| Tao Bao | 66f1fa6 | 2016-05-03 10:02:01 -0700 | [diff] [blame] | 577 |     # data in the new image; b) will not be touched by dm-verity. Out of those | 
 | 578 |     # blocks, we erase the ones that won't be used in this update at the | 
 | 579 |     # beginning of an update. The rest would be erased at the end. This is to | 
 | 580 |     # work around the eMMC issue observed on some devices, which may otherwise | 
 | 581 |     # get starving for clean blocks and thus fail the update. (b/28347095) | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 582 |     all_tgt = RangeSet(data=(0, self.tgt.total_blocks)) | 
| Tao Bao | e9b6191 | 2015-07-09 17:37:49 -0700 | [diff] [blame] | 583 |     all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended) | 
 | 584 |     new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map) | 
| Tao Bao | 66f1fa6 | 2016-05-03 10:02:01 -0700 | [diff] [blame] | 585 |  | 
 | 586 |     erase_first = new_dontcare.subtract(self.touched_src_ranges) | 
 | 587 |     if erase_first: | 
 | 588 |       out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),)) | 
 | 589 |  | 
 | 590 |     erase_last = new_dontcare.subtract(erase_first) | 
 | 591 |     if erase_last: | 
 | 592 |       out.append("erase %s\n" % (erase_last.to_string_raw(),)) | 
| Doug Zongker | e985f6f | 2014-09-09 12:38:47 -0700 | [diff] [blame] | 593 |  | 
 | 594 |     out.insert(0, "%d\n" % (self.version,))   # format version number | 
| Tao Bao | b32d56e | 2015-09-09 11:55:01 -0700 | [diff] [blame] | 595 |     out.insert(1, "%d\n" % (total,)) | 
| Doug Zongker | e985f6f | 2014-09-09 12:38:47 -0700 | [diff] [blame] | 596 |     if self.version >= 2: | 
 | 597 |       # version 2 only: after the total block count, we give the number | 
 | 598 |       # of stash slots needed, and the maximum size needed (in blocks) | 
 | 599 |       out.insert(2, str(next_stash_id) + "\n") | 
 | 600 |       out.insert(3, str(max_stashed_blocks) + "\n") | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 601 |  | 
 | 602 |     with open(prefix + ".transfer.list", "wb") as f: | 
 | 603 |       for i in out: | 
 | 604 |         f.write(i) | 
 | 605 |  | 
| Doug Zongker | 6233818 | 2014-09-08 08:29:55 -0700 | [diff] [blame] | 606 |     if self.version >= 2: | 
| Tao Bao | b4cfca5 | 2016-02-04 14:26:02 -0800 | [diff] [blame] | 607 |       self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize | 
| Tao Bao | 575d68a | 2015-08-07 19:49:45 -0700 | [diff] [blame] | 608 |       OPTIONS = common.OPTIONS | 
 | 609 |       if OPTIONS.cache_size is not None: | 
 | 610 |         max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold | 
 | 611 |         print("max stashed blocks: %d  (%d bytes), " | 
 | 612 |               "limit: %d bytes (%.2f%%)\n" % ( | 
| Tao Bao | b4cfca5 | 2016-02-04 14:26:02 -0800 | [diff] [blame] | 613 |               max_stashed_blocks, self._max_stashed_size, max_allowed, | 
 | 614 |               self._max_stashed_size * 100.0 / max_allowed)) | 
| Tao Bao | 575d68a | 2015-08-07 19:49:45 -0700 | [diff] [blame] | 615 |       else: | 
 | 616 |         print("max stashed blocks: %d  (%d bytes), limit: <unknown>\n" % ( | 
| Tao Bao | b4cfca5 | 2016-02-04 14:26:02 -0800 | [diff] [blame] | 617 |               max_stashed_blocks, self._max_stashed_size)) | 
| Doug Zongker | 6233818 | 2014-09-08 08:29:55 -0700 | [diff] [blame] | 618 |  | 
| Tao Bao | 82c4798 | 2015-08-17 09:45:13 -0700 | [diff] [blame] | 619 |   def ReviseStashSize(self): | 
 | 620 |     print("Revising stash size...") | 
 | 621 |     stashes = {} | 
 | 622 |  | 
 | 623 |     # Create the map between a stash and its def/use points. For example, for a | 
 | 624 |     # given stash of (idx, sr), stashes[idx] = (sr, def_cmd, use_cmd). | 
 | 625 |     for xf in self.transfers: | 
 | 626 |       # Command xf defines (stores) all the stashes in stash_before. | 
 | 627 |       for idx, sr in xf.stash_before: | 
 | 628 |         stashes[idx] = (sr, xf) | 
 | 629 |  | 
 | 630 |       # Record all the stashes command xf uses. | 
 | 631 |       for idx, _ in xf.use_stash: | 
 | 632 |         stashes[idx] += (xf,) | 
 | 633 |  | 
 | 634 |     # Compute the maximum blocks available for stash based on /cache size and | 
 | 635 |     # the threshold. | 
 | 636 |     cache_size = common.OPTIONS.cache_size | 
 | 637 |     stash_threshold = common.OPTIONS.stash_threshold | 
 | 638 |     max_allowed = cache_size * stash_threshold / self.tgt.blocksize | 
 | 639 |  | 
 | 640 |     stashed_blocks = 0 | 
| Tao Bao | 9a5caf2 | 2015-08-25 15:10:10 -0700 | [diff] [blame] | 641 |     new_blocks = 0 | 
| Tao Bao | 82c4798 | 2015-08-17 09:45:13 -0700 | [diff] [blame] | 642 |  | 
 | 643 |     # Now go through all the commands. Compute the required stash size on the | 
 | 644 |     # fly. If a command requires excess stash than available, it deletes the | 
 | 645 |     # stash by replacing the command that uses the stash with a "new" command | 
 | 646 |     # instead. | 
 | 647 |     for xf in self.transfers: | 
 | 648 |       replaced_cmds = [] | 
 | 649 |  | 
 | 650 |       # xf.stash_before generates explicit stash commands. | 
 | 651 |       for idx, sr in xf.stash_before: | 
 | 652 |         if stashed_blocks + sr.size() > max_allowed: | 
 | 653 |           # We cannot stash this one for a later command. Find out the command | 
 | 654 |           # that will use this stash and replace the command with "new". | 
 | 655 |           use_cmd = stashes[idx][2] | 
 | 656 |           replaced_cmds.append(use_cmd) | 
| Tao Bao | 9a5caf2 | 2015-08-25 15:10:10 -0700 | [diff] [blame] | 657 |           print("%10d  %9s  %s" % (sr.size(), "explicit", use_cmd)) | 
| Tao Bao | 82c4798 | 2015-08-17 09:45:13 -0700 | [diff] [blame] | 658 |         else: | 
 | 659 |           stashed_blocks += sr.size() | 
 | 660 |  | 
 | 661 |       # xf.use_stash generates free commands. | 
 | 662 |       for _, sr in xf.use_stash: | 
 | 663 |         stashed_blocks -= sr.size() | 
 | 664 |  | 
 | 665 |       # "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to | 
 | 666 |       # ComputePatches(), they both have the style of "diff". | 
 | 667 |       if xf.style == "diff" and self.version >= 3: | 
 | 668 |         assert xf.tgt_ranges and xf.src_ranges | 
 | 669 |         if xf.src_ranges.overlaps(xf.tgt_ranges): | 
 | 670 |           if stashed_blocks + xf.src_ranges.size() > max_allowed: | 
 | 671 |             replaced_cmds.append(xf) | 
| Tao Bao | 9a5caf2 | 2015-08-25 15:10:10 -0700 | [diff] [blame] | 672 |             print("%10d  %9s  %s" % (xf.src_ranges.size(), "implicit", xf)) | 
| Tao Bao | 82c4798 | 2015-08-17 09:45:13 -0700 | [diff] [blame] | 673 |  | 
 | 674 |       # Replace the commands in replaced_cmds with "new"s. | 
 | 675 |       for cmd in replaced_cmds: | 
 | 676 |         # It no longer uses any commands in "use_stash". Remove the def points | 
 | 677 |         # for all those stashes. | 
 | 678 |         for idx, sr in cmd.use_stash: | 
 | 679 |           def_cmd = stashes[idx][1] | 
 | 680 |           assert (idx, sr) in def_cmd.stash_before | 
 | 681 |           def_cmd.stash_before.remove((idx, sr)) | 
 | 682 |  | 
| Tianjie Xu | ebe39a0 | 2016-01-14 14:12:26 -0800 | [diff] [blame] | 683 |         # Add up blocks that violates space limit and print total number to | 
 | 684 |         # screen later. | 
 | 685 |         new_blocks += cmd.tgt_ranges.size() | 
| Tao Bao | 82c4798 | 2015-08-17 09:45:13 -0700 | [diff] [blame] | 686 |         cmd.ConvertToNew() | 
 | 687 |  | 
| Tianjie Xu | ebe39a0 | 2016-01-14 14:12:26 -0800 | [diff] [blame] | 688 |     num_of_bytes = new_blocks * self.tgt.blocksize | 
 | 689 |     print("  Total %d blocks (%d bytes) are packed as new blocks due to " | 
 | 690 |           "insufficient cache size." % (new_blocks, num_of_bytes)) | 
| Tao Bao | 9a5caf2 | 2015-08-25 15:10:10 -0700 | [diff] [blame] | 691 |  | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 692 |   def ComputePatches(self, prefix): | 
 | 693 |     print("Reticulating splines...") | 
 | 694 |     diff_q = [] | 
 | 695 |     patch_num = 0 | 
 | 696 |     with open(prefix + ".new.dat", "wb") as new_f: | 
 | 697 |       for xf in self.transfers: | 
 | 698 |         if xf.style == "zero": | 
| Tao Bao | 08c8583 | 2016-09-19 22:26:30 -0700 | [diff] [blame] | 699 |           tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize | 
 | 700 |           print("%10d %10d (%6.2f%%) %7s %s %s" % ( | 
 | 701 |               tgt_size, tgt_size, 100.0, xf.style, xf.tgt_name, | 
 | 702 |               str(xf.tgt_ranges))) | 
 | 703 |  | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 704 |         elif xf.style == "new": | 
 | 705 |           for piece in self.tgt.ReadRangeSet(xf.tgt_ranges): | 
 | 706 |             new_f.write(piece) | 
| Tao Bao | 08c8583 | 2016-09-19 22:26:30 -0700 | [diff] [blame] | 707 |           tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize | 
 | 708 |           print("%10d %10d (%6.2f%%) %7s %s %s" % ( | 
 | 709 |               tgt_size, tgt_size, 100.0, xf.style, | 
 | 710 |               xf.tgt_name, str(xf.tgt_ranges))) | 
 | 711 |  | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 712 |         elif xf.style == "diff": | 
 | 713 |           src = self.src.ReadRangeSet(xf.src_ranges) | 
 | 714 |           tgt = self.tgt.ReadRangeSet(xf.tgt_ranges) | 
 | 715 |  | 
 | 716 |           # We can't compare src and tgt directly because they may have | 
 | 717 |           # the same content but be broken up into blocks differently, eg: | 
 | 718 |           # | 
 | 719 |           #    ["he", "llo"]  vs  ["h", "ello"] | 
 | 720 |           # | 
 | 721 |           # We want those to compare equal, ideally without having to | 
 | 722 |           # actually concatenate the strings (these may be tens of | 
 | 723 |           # megabytes). | 
 | 724 |  | 
 | 725 |           src_sha1 = sha1() | 
 | 726 |           for p in src: | 
 | 727 |             src_sha1.update(p) | 
 | 728 |           tgt_sha1 = sha1() | 
 | 729 |           tgt_size = 0 | 
 | 730 |           for p in tgt: | 
 | 731 |             tgt_sha1.update(p) | 
 | 732 |             tgt_size += len(p) | 
 | 733 |  | 
 | 734 |           if src_sha1.digest() == tgt_sha1.digest(): | 
 | 735 |             # These are identical; we don't need to generate a patch, | 
 | 736 |             # just issue copy commands on the device. | 
 | 737 |             xf.style = "move" | 
| Tao Bao | 08c8583 | 2016-09-19 22:26:30 -0700 | [diff] [blame] | 738 |             if xf.src_ranges != xf.tgt_ranges: | 
 | 739 |               print("%10d %10d (%6.2f%%) %7s %s %s (from %s)" % ( | 
 | 740 |                   tgt_size, tgt_size, 100.0, xf.style, | 
 | 741 |                   xf.tgt_name if xf.tgt_name == xf.src_name else ( | 
 | 742 |                       xf.tgt_name + " (from " + xf.src_name + ")"), | 
 | 743 |                   str(xf.tgt_ranges), str(xf.src_ranges))) | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 744 |           else: | 
 | 745 |             # For files in zip format (eg, APKs, JARs, etc.) we would | 
 | 746 |             # like to use imgdiff -z if possible (because it usually | 
 | 747 |             # produces significantly smaller patches than bsdiff). | 
 | 748 |             # This is permissible if: | 
 | 749 |             # | 
| Tao Bao | 293fd13 | 2016-06-11 12:19:23 -0700 | [diff] [blame] | 750 |             #  - imgdiff is not disabled, and | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 751 |             #  - the source and target files are monotonic (ie, the | 
 | 752 |             #    data is stored with blocks in increasing order), and | 
 | 753 |             #  - we haven't removed any blocks from the source set. | 
 | 754 |             # | 
 | 755 |             # If these conditions are satisfied then appending all the | 
 | 756 |             # blocks in the set together in order will produce a valid | 
 | 757 |             # zip file (plus possibly extra zeros in the last block), | 
 | 758 |             # which is what imgdiff needs to operate.  (imgdiff is | 
 | 759 |             # fine with extra zeros at the end of the file.) | 
| Tao Bao | 293fd13 | 2016-06-11 12:19:23 -0700 | [diff] [blame] | 760 |             imgdiff = (not self.disable_imgdiff and xf.intact and | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 761 |                        xf.tgt_name.split(".")[-1].lower() | 
 | 762 |                        in ("apk", "jar", "zip")) | 
 | 763 |             xf.style = "imgdiff" if imgdiff else "bsdiff" | 
 | 764 |             diff_q.append((tgt_size, src, tgt, xf, patch_num)) | 
 | 765 |             patch_num += 1 | 
 | 766 |  | 
 | 767 |         else: | 
 | 768 |           assert False, "unknown style " + xf.style | 
 | 769 |  | 
 | 770 |     if diff_q: | 
 | 771 |       if self.threads > 1: | 
 | 772 |         print("Computing patches (using %d threads)..." % (self.threads,)) | 
 | 773 |       else: | 
 | 774 |         print("Computing patches...") | 
 | 775 |       diff_q.sort() | 
 | 776 |  | 
 | 777 |       patches = [None] * patch_num | 
 | 778 |  | 
| Dan Albert | 8b72aef | 2015-03-23 19:13:21 -0700 | [diff] [blame] | 779 |       # TODO: Rewrite with multiprocessing.ThreadPool? | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 780 |       lock = threading.Lock() | 
 | 781 |       def diff_worker(): | 
 | 782 |         while True: | 
 | 783 |           with lock: | 
| Dan Albert | 8b72aef | 2015-03-23 19:13:21 -0700 | [diff] [blame] | 784 |             if not diff_q: | 
 | 785 |               return | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 786 |             tgt_size, src, tgt, xf, patchnum = diff_q.pop() | 
 | 787 |           patch = compute_patch(src, tgt, imgdiff=(xf.style == "imgdiff")) | 
 | 788 |           size = len(patch) | 
 | 789 |           with lock: | 
 | 790 |             patches[patchnum] = (patch, xf) | 
| Tao Bao | 08c8583 | 2016-09-19 22:26:30 -0700 | [diff] [blame] | 791 |             print("%10d %10d (%6.2f%%) %7s %s %s %s" % ( | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 792 |                 size, tgt_size, size * 100.0 / tgt_size, xf.style, | 
 | 793 |                 xf.tgt_name if xf.tgt_name == xf.src_name else ( | 
| Tao Bao | 08c8583 | 2016-09-19 22:26:30 -0700 | [diff] [blame] | 794 |                     xf.tgt_name + " (from " + xf.src_name + ")"), | 
 | 795 |                 str(xf.tgt_ranges), str(xf.src_ranges))) | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 796 |  | 
 | 797 |       threads = [threading.Thread(target=diff_worker) | 
| Dan Albert | 8b72aef | 2015-03-23 19:13:21 -0700 | [diff] [blame] | 798 |                  for _ in range(self.threads)] | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 799 |       for th in threads: | 
 | 800 |         th.start() | 
 | 801 |       while threads: | 
 | 802 |         threads.pop().join() | 
 | 803 |     else: | 
 | 804 |       patches = [] | 
 | 805 |  | 
 | 806 |     p = 0 | 
 | 807 |     with open(prefix + ".patch.dat", "wb") as patch_f: | 
 | 808 |       for patch, xf in patches: | 
 | 809 |         xf.patch_start = p | 
 | 810 |         xf.patch_len = len(patch) | 
 | 811 |         patch_f.write(patch) | 
 | 812 |         p += len(patch) | 
 | 813 |  | 
 | 814 |   def AssertSequenceGood(self): | 
 | 815 |     # Simulate the sequences of transfers we will output, and check that: | 
 | 816 |     # - we never read a block after writing it, and | 
 | 817 |     # - we write every block we care about exactly once. | 
 | 818 |  | 
 | 819 |     # Start with no blocks having been touched yet. | 
| Doug Zongker | 6ab2a50 | 2016-02-09 08:28:09 -0800 | [diff] [blame] | 820 |     touched = array.array("B", "\0" * self.tgt.total_blocks) | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 821 |  | 
 | 822 |     # Imagine processing the transfers in order. | 
 | 823 |     for xf in self.transfers: | 
 | 824 |       # Check that the input blocks for this transfer haven't yet been touched. | 
| Doug Zongker | 6233818 | 2014-09-08 08:29:55 -0700 | [diff] [blame] | 825 |  | 
 | 826 |       x = xf.src_ranges | 
 | 827 |       if self.version >= 2: | 
 | 828 |         for _, sr in xf.use_stash: | 
 | 829 |           x = x.subtract(sr) | 
 | 830 |  | 
| Doug Zongker | 6ab2a50 | 2016-02-09 08:28:09 -0800 | [diff] [blame] | 831 |       for s, e in x: | 
| Tao Bao | ff75c23 | 2016-03-04 15:23:34 -0800 | [diff] [blame] | 832 |         # Source image could be larger. Don't check the blocks that are in the | 
 | 833 |         # source image only. Since they are not in 'touched', and won't ever | 
 | 834 |         # be touched. | 
 | 835 |         for i in range(s, min(e, self.tgt.total_blocks)): | 
| Doug Zongker | 6ab2a50 | 2016-02-09 08:28:09 -0800 | [diff] [blame] | 836 |           assert touched[i] == 0 | 
 | 837 |  | 
 | 838 |       # Check that the output blocks for this transfer haven't yet | 
 | 839 |       # been touched, and touch all the blocks written by this | 
 | 840 |       # transfer. | 
 | 841 |       for s, e in xf.tgt_ranges: | 
 | 842 |         for i in range(s, e): | 
 | 843 |           assert touched[i] == 0 | 
 | 844 |           touched[i] = 1 | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 845 |  | 
 | 846 |     # Check that we've written every target block. | 
| Doug Zongker | 6ab2a50 | 2016-02-09 08:28:09 -0800 | [diff] [blame] | 847 |     for s, e in self.tgt.care_map: | 
 | 848 |       for i in range(s, e): | 
 | 849 |         assert touched[i] == 1 | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 850 |  | 
| Doug Zongker | 6233818 | 2014-09-08 08:29:55 -0700 | [diff] [blame] | 851 |   def ImproveVertexSequence(self): | 
 | 852 |     print("Improving vertex order...") | 
 | 853 |  | 
 | 854 |     # At this point our digraph is acyclic; we reversed any edges that | 
 | 855 |     # were backwards in the heuristically-generated sequence.  The | 
 | 856 |     # previously-generated order is still acceptable, but we hope to | 
 | 857 |     # find a better order that needs less memory for stashed data. | 
 | 858 |     # Now we do a topological sort to generate a new vertex order, | 
 | 859 |     # using a greedy algorithm to choose which vertex goes next | 
 | 860 |     # whenever we have a choice. | 
 | 861 |  | 
 | 862 |     # Make a copy of the edge set; this copy will get destroyed by the | 
 | 863 |     # algorithm. | 
 | 864 |     for xf in self.transfers: | 
 | 865 |       xf.incoming = xf.goes_after.copy() | 
 | 866 |       xf.outgoing = xf.goes_before.copy() | 
 | 867 |  | 
 | 868 |     L = []   # the new vertex order | 
 | 869 |  | 
 | 870 |     # S is the set of sources in the remaining graph; we always choose | 
 | 871 |     # the one that leaves the least amount of stashed data after it's | 
 | 872 |     # executed. | 
 | 873 |     S = [(u.NetStashChange(), u.order, u) for u in self.transfers | 
 | 874 |          if not u.incoming] | 
 | 875 |     heapq.heapify(S) | 
 | 876 |  | 
 | 877 |     while S: | 
 | 878 |       _, _, xf = heapq.heappop(S) | 
 | 879 |       L.append(xf) | 
 | 880 |       for u in xf.outgoing: | 
 | 881 |         del u.incoming[xf] | 
 | 882 |         if not u.incoming: | 
 | 883 |           heapq.heappush(S, (u.NetStashChange(), u.order, u)) | 
 | 884 |  | 
 | 885 |     # if this fails then our graph had a cycle. | 
 | 886 |     assert len(L) == len(self.transfers) | 
 | 887 |  | 
 | 888 |     self.transfers = L | 
 | 889 |     for i, xf in enumerate(L): | 
 | 890 |       xf.order = i | 
 | 891 |  | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 892 |   def RemoveBackwardEdges(self): | 
 | 893 |     print("Removing backward edges...") | 
 | 894 |     in_order = 0 | 
 | 895 |     out_of_order = 0 | 
 | 896 |     lost_source = 0 | 
 | 897 |  | 
 | 898 |     for xf in self.transfers: | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 899 |       lost = 0 | 
 | 900 |       size = xf.src_ranges.size() | 
 | 901 |       for u in xf.goes_before: | 
 | 902 |         # xf should go before u | 
 | 903 |         if xf.order < u.order: | 
 | 904 |           # it does, hurray! | 
| Doug Zongker | 6233818 | 2014-09-08 08:29:55 -0700 | [diff] [blame] | 905 |           in_order += 1 | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 906 |         else: | 
 | 907 |           # it doesn't, boo.  trim the blocks that u writes from xf's | 
 | 908 |           # source, so that xf can go after u. | 
| Doug Zongker | 6233818 | 2014-09-08 08:29:55 -0700 | [diff] [blame] | 909 |           out_of_order += 1 | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 910 |           assert xf.src_ranges.overlaps(u.tgt_ranges) | 
 | 911 |           xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges) | 
 | 912 |           xf.intact = False | 
 | 913 |  | 
 | 914 |       if xf.style == "diff" and not xf.src_ranges: | 
 | 915 |         # nothing left to diff from; treat as new data | 
 | 916 |         xf.style = "new" | 
 | 917 |  | 
 | 918 |       lost = size - xf.src_ranges.size() | 
 | 919 |       lost_source += lost | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 920 |  | 
 | 921 |     print(("  %d/%d dependencies (%.2f%%) were violated; " | 
 | 922 |            "%d source blocks removed.") % | 
 | 923 |           (out_of_order, in_order + out_of_order, | 
 | 924 |            (out_of_order * 100.0 / (in_order + out_of_order)) | 
 | 925 |            if (in_order + out_of_order) else 0.0, | 
 | 926 |            lost_source)) | 
 | 927 |  | 
| Doug Zongker | 6233818 | 2014-09-08 08:29:55 -0700 | [diff] [blame] | 928 |   def ReverseBackwardEdges(self): | 
 | 929 |     print("Reversing backward edges...") | 
 | 930 |     in_order = 0 | 
 | 931 |     out_of_order = 0 | 
 | 932 |     stashes = 0 | 
 | 933 |     stash_size = 0 | 
 | 934 |  | 
 | 935 |     for xf in self.transfers: | 
| Doug Zongker | 6233818 | 2014-09-08 08:29:55 -0700 | [diff] [blame] | 936 |       for u in xf.goes_before.copy(): | 
 | 937 |         # xf should go before u | 
 | 938 |         if xf.order < u.order: | 
 | 939 |           # it does, hurray! | 
 | 940 |           in_order += 1 | 
 | 941 |         else: | 
 | 942 |           # it doesn't, boo.  modify u to stash the blocks that it | 
 | 943 |           # writes that xf wants to read, and then require u to go | 
 | 944 |           # before xf. | 
 | 945 |           out_of_order += 1 | 
 | 946 |  | 
 | 947 |           overlap = xf.src_ranges.intersect(u.tgt_ranges) | 
 | 948 |           assert overlap | 
 | 949 |  | 
 | 950 |           u.stash_before.append((stashes, overlap)) | 
 | 951 |           xf.use_stash.append((stashes, overlap)) | 
 | 952 |           stashes += 1 | 
 | 953 |           stash_size += overlap.size() | 
 | 954 |  | 
 | 955 |           # reverse the edge direction; now xf must go after u | 
 | 956 |           del xf.goes_before[u] | 
 | 957 |           del u.goes_after[xf] | 
 | 958 |           xf.goes_after[u] = None    # value doesn't matter | 
 | 959 |           u.goes_before[xf] = None | 
 | 960 |  | 
 | 961 |     print(("  %d/%d dependencies (%.2f%%) were violated; " | 
 | 962 |            "%d source blocks stashed.") % | 
 | 963 |           (out_of_order, in_order + out_of_order, | 
 | 964 |            (out_of_order * 100.0 / (in_order + out_of_order)) | 
 | 965 |            if (in_order + out_of_order) else 0.0, | 
 | 966 |            stash_size)) | 
 | 967 |  | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 968 |   def FindVertexSequence(self): | 
 | 969 |     print("Finding vertex sequence...") | 
 | 970 |  | 
 | 971 |     # This is based on "A Fast & Effective Heuristic for the Feedback | 
 | 972 |     # Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth.  Think of | 
 | 973 |     # it as starting with the digraph G and moving all the vertices to | 
 | 974 |     # be on a horizontal line in some order, trying to minimize the | 
 | 975 |     # number of edges that end up pointing to the left.  Left-pointing | 
 | 976 |     # edges will get removed to turn the digraph into a DAG.  In this | 
 | 977 |     # case each edge has a weight which is the number of source blocks | 
 | 978 |     # we'll lose if that edge is removed; we try to minimize the total | 
 | 979 |     # weight rather than just the number of edges. | 
 | 980 |  | 
 | 981 |     # Make a copy of the edge set; this copy will get destroyed by the | 
 | 982 |     # algorithm. | 
 | 983 |     for xf in self.transfers: | 
 | 984 |       xf.incoming = xf.goes_after.copy() | 
 | 985 |       xf.outgoing = xf.goes_before.copy() | 
| Doug Zongker | 6ab2a50 | 2016-02-09 08:28:09 -0800 | [diff] [blame] | 986 |       xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values()) | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 987 |  | 
 | 988 |     # We use an OrderedDict instead of just a set so that the output | 
 | 989 |     # is repeatable; otherwise it would depend on the hash values of | 
 | 990 |     # the transfer objects. | 
 | 991 |     G = OrderedDict() | 
 | 992 |     for xf in self.transfers: | 
 | 993 |       G[xf] = None | 
 | 994 |     s1 = deque()  # the left side of the sequence, built from left to right | 
 | 995 |     s2 = deque()  # the right side of the sequence, built from right to left | 
 | 996 |  | 
| Doug Zongker | 6ab2a50 | 2016-02-09 08:28:09 -0800 | [diff] [blame] | 997 |     heap = [] | 
 | 998 |     for xf in self.transfers: | 
 | 999 |       xf.heap_item = HeapItem(xf) | 
 | 1000 |       heap.append(xf.heap_item) | 
 | 1001 |     heapq.heapify(heap) | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 1002 |  | 
| Tao Bao | 3348228 | 2016-10-24 16:49:08 -0700 | [diff] [blame] | 1003 |     # Use OrderedDict() instead of set() to preserve the insertion order. Need | 
 | 1004 |     # to use 'sinks[key] = None' to add key into the set. sinks will look like | 
 | 1005 |     # { key1: None, key2: None, ... }. | 
 | 1006 |     sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing) | 
 | 1007 |     sources = OrderedDict.fromkeys(u for u in G if not u.incoming) | 
| Doug Zongker | 6ab2a50 | 2016-02-09 08:28:09 -0800 | [diff] [blame] | 1008 |  | 
 | 1009 |     def adjust_score(iu, delta): | 
 | 1010 |       iu.score += delta | 
 | 1011 |       iu.heap_item.clear() | 
 | 1012 |       iu.heap_item = HeapItem(iu) | 
 | 1013 |       heapq.heappush(heap, iu.heap_item) | 
 | 1014 |  | 
 | 1015 |     while G: | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 1016 |       # Put all sinks at the end of the sequence. | 
| Doug Zongker | 6ab2a50 | 2016-02-09 08:28:09 -0800 | [diff] [blame] | 1017 |       while sinks: | 
| Tao Bao | 3348228 | 2016-10-24 16:49:08 -0700 | [diff] [blame] | 1018 |         new_sinks = OrderedDict() | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 1019 |         for u in sinks: | 
| Doug Zongker | 6ab2a50 | 2016-02-09 08:28:09 -0800 | [diff] [blame] | 1020 |           if u not in G: continue | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 1021 |           s2.appendleft(u) | 
 | 1022 |           del G[u] | 
 | 1023 |           for iu in u.incoming: | 
| Doug Zongker | 6ab2a50 | 2016-02-09 08:28:09 -0800 | [diff] [blame] | 1024 |             adjust_score(iu, -iu.outgoing.pop(u)) | 
| Tao Bao | 3348228 | 2016-10-24 16:49:08 -0700 | [diff] [blame] | 1025 |             if not iu.outgoing: | 
 | 1026 |               new_sinks[iu] = None | 
| Doug Zongker | 6ab2a50 | 2016-02-09 08:28:09 -0800 | [diff] [blame] | 1027 |         sinks = new_sinks | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 1028 |  | 
 | 1029 |       # Put all the sources at the beginning of the sequence. | 
| Doug Zongker | 6ab2a50 | 2016-02-09 08:28:09 -0800 | [diff] [blame] | 1030 |       while sources: | 
| Tao Bao | 3348228 | 2016-10-24 16:49:08 -0700 | [diff] [blame] | 1031 |         new_sources = OrderedDict() | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 1032 |         for u in sources: | 
| Doug Zongker | 6ab2a50 | 2016-02-09 08:28:09 -0800 | [diff] [blame] | 1033 |           if u not in G: continue | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 1034 |           s1.append(u) | 
 | 1035 |           del G[u] | 
 | 1036 |           for iu in u.outgoing: | 
| Doug Zongker | 6ab2a50 | 2016-02-09 08:28:09 -0800 | [diff] [blame] | 1037 |             adjust_score(iu, +iu.incoming.pop(u)) | 
| Tao Bao | 3348228 | 2016-10-24 16:49:08 -0700 | [diff] [blame] | 1038 |             if not iu.incoming: | 
 | 1039 |               new_sources[iu] = None | 
| Doug Zongker | 6ab2a50 | 2016-02-09 08:28:09 -0800 | [diff] [blame] | 1040 |         sources = new_sources | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 1041 |  | 
| Doug Zongker | 6ab2a50 | 2016-02-09 08:28:09 -0800 | [diff] [blame] | 1042 |       if not G: break | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 1043 |  | 
 | 1044 |       # Find the "best" vertex to put next.  "Best" is the one that | 
 | 1045 |       # maximizes the net difference in source blocks saved we get by | 
 | 1046 |       # pretending it's a source rather than a sink. | 
 | 1047 |  | 
| Doug Zongker | 6ab2a50 | 2016-02-09 08:28:09 -0800 | [diff] [blame] | 1048 |       while True: | 
 | 1049 |         u = heapq.heappop(heap) | 
 | 1050 |         if u and u.item in G: | 
 | 1051 |           u = u.item | 
 | 1052 |           break | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 1053 |  | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 1054 |       s1.append(u) | 
 | 1055 |       del G[u] | 
 | 1056 |       for iu in u.outgoing: | 
| Doug Zongker | 6ab2a50 | 2016-02-09 08:28:09 -0800 | [diff] [blame] | 1057 |         adjust_score(iu, +iu.incoming.pop(u)) | 
| Tao Bao | 3348228 | 2016-10-24 16:49:08 -0700 | [diff] [blame] | 1058 |         if not iu.incoming: | 
 | 1059 |           sources[iu] = None | 
| Doug Zongker | 6ab2a50 | 2016-02-09 08:28:09 -0800 | [diff] [blame] | 1060 |  | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 1061 |       for iu in u.incoming: | 
| Doug Zongker | 6ab2a50 | 2016-02-09 08:28:09 -0800 | [diff] [blame] | 1062 |         adjust_score(iu, -iu.outgoing.pop(u)) | 
| Tao Bao | 3348228 | 2016-10-24 16:49:08 -0700 | [diff] [blame] | 1063 |         if not iu.outgoing: | 
 | 1064 |           sinks[iu] = None | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 1065 |  | 
 | 1066 |     # Now record the sequence in the 'order' field of each transfer, | 
 | 1067 |     # and by rearranging self.transfers to be in the chosen sequence. | 
 | 1068 |  | 
 | 1069 |     new_transfers = [] | 
 | 1070 |     for x in itertools.chain(s1, s2): | 
 | 1071 |       x.order = len(new_transfers) | 
 | 1072 |       new_transfers.append(x) | 
 | 1073 |       del x.incoming | 
 | 1074 |       del x.outgoing | 
 | 1075 |  | 
 | 1076 |     self.transfers = new_transfers | 
 | 1077 |  | 
 | 1078 |   def GenerateDigraph(self): | 
 | 1079 |     print("Generating digraph...") | 
| Doug Zongker | 6ab2a50 | 2016-02-09 08:28:09 -0800 | [diff] [blame] | 1080 |  | 
 | 1081 |     # Each item of source_ranges will be: | 
 | 1082 |     #   - None, if that block is not used as a source, | 
| Tao Bao | 3348228 | 2016-10-24 16:49:08 -0700 | [diff] [blame] | 1083 |     #   - an ordered set of transfers. | 
| Doug Zongker | 6ab2a50 | 2016-02-09 08:28:09 -0800 | [diff] [blame] | 1084 |     source_ranges = [] | 
 | 1085 |     for b in self.transfers: | 
 | 1086 |       for s, e in b.src_ranges: | 
 | 1087 |         if e > len(source_ranges): | 
 | 1088 |           source_ranges.extend([None] * (e-len(source_ranges))) | 
 | 1089 |         for i in range(s, e): | 
 | 1090 |           if source_ranges[i] is None: | 
| Tao Bao | 3348228 | 2016-10-24 16:49:08 -0700 | [diff] [blame] | 1091 |             source_ranges[i] = OrderedDict.fromkeys([b]) | 
| Doug Zongker | 6ab2a50 | 2016-02-09 08:28:09 -0800 | [diff] [blame] | 1092 |           else: | 
| Tao Bao | 3348228 | 2016-10-24 16:49:08 -0700 | [diff] [blame] | 1093 |             source_ranges[i][b] = None | 
| Doug Zongker | 6ab2a50 | 2016-02-09 08:28:09 -0800 | [diff] [blame] | 1094 |  | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 1095 |     for a in self.transfers: | 
| Tao Bao | 3348228 | 2016-10-24 16:49:08 -0700 | [diff] [blame] | 1096 |       intersections = OrderedDict() | 
| Doug Zongker | 6ab2a50 | 2016-02-09 08:28:09 -0800 | [diff] [blame] | 1097 |       for s, e in a.tgt_ranges: | 
 | 1098 |         for i in range(s, e): | 
 | 1099 |           if i >= len(source_ranges): break | 
| Tao Bao | 3348228 | 2016-10-24 16:49:08 -0700 | [diff] [blame] | 1100 |           # Add all the Transfers in source_ranges[i] to the (ordered) set. | 
 | 1101 |           if source_ranges[i] is not None: | 
 | 1102 |             for j in source_ranges[i]: | 
 | 1103 |               intersections[j] = None | 
| Doug Zongker | 6ab2a50 | 2016-02-09 08:28:09 -0800 | [diff] [blame] | 1104 |  | 
 | 1105 |       for b in intersections: | 
 | 1106 |         if a is b: continue | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 1107 |  | 
 | 1108 |         # If the blocks written by A are read by B, then B needs to go before A. | 
 | 1109 |         i = a.tgt_ranges.intersect(b.src_ranges) | 
 | 1110 |         if i: | 
| Doug Zongker | ab7ca1d | 2014-08-26 10:40:28 -0700 | [diff] [blame] | 1111 |           if b.src_name == "__ZERO": | 
 | 1112 |             # the cost of removing source blocks for the __ZERO domain | 
 | 1113 |             # is (nearly) zero. | 
 | 1114 |             size = 0 | 
 | 1115 |           else: | 
 | 1116 |             size = i.size() | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 1117 |           b.goes_before[a] = size | 
 | 1118 |           a.goes_after[b] = size | 
 | 1119 |  | 
 | 1120 |   def FindTransfers(self): | 
| Tao Bao | 9a5caf2 | 2015-08-25 15:10:10 -0700 | [diff] [blame] | 1121 |     """Parse the file_map to generate all the transfers.""" | 
 | 1122 |  | 
| Tao Bao | 08c8583 | 2016-09-19 22:26:30 -0700 | [diff] [blame] | 1123 |     def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges, | 
 | 1124 |                           style, by_id): | 
 | 1125 |       """Add one or multiple Transfer()s by splitting large files. | 
| Tao Bao | 9a5caf2 | 2015-08-25 15:10:10 -0700 | [diff] [blame] | 1126 |  | 
 | 1127 |       For BBOTA v3, we need to stash source blocks for resumable feature. | 
 | 1128 |       However, with the growth of file size and the shrink of the cache | 
 | 1129 |       partition source blocks are too large to be stashed. If a file occupies | 
| Tao Bao | 08c8583 | 2016-09-19 22:26:30 -0700 | [diff] [blame] | 1130 |       too many blocks, we split it into smaller pieces by getting multiple | 
 | 1131 |       Transfer()s. | 
| Tao Bao | 9a5caf2 | 2015-08-25 15:10:10 -0700 | [diff] [blame] | 1132 |  | 
| Tianjie Xu | bb86e1d | 2016-01-13 16:14:10 -0800 | [diff] [blame] | 1133 |       The downside is that after splitting, we may increase the package size | 
 | 1134 |       since the split pieces don't align well. According to our experiments, | 
 | 1135 |       1/8 of the cache size as the per-piece limit appears to be optimal. | 
 | 1136 |       Compared to the fixed 1024-block limit, it reduces the overall package | 
| Tao Bao | 08c8583 | 2016-09-19 22:26:30 -0700 | [diff] [blame] | 1137 |       size by 30% for volantis, and 20% for angler and bullhead.""" | 
| Tao Bao | 9a5caf2 | 2015-08-25 15:10:10 -0700 | [diff] [blame] | 1138 |  | 
| Tao Bao | 08c8583 | 2016-09-19 22:26:30 -0700 | [diff] [blame] | 1139 |       # Possibly split large files into smaller chunks. | 
| Tianjie Xu | bb86e1d | 2016-01-13 16:14:10 -0800 | [diff] [blame] | 1140 |       pieces = 0 | 
 | 1141 |       cache_size = common.OPTIONS.cache_size | 
 | 1142 |       split_threshold = 0.125 | 
 | 1143 |       max_blocks_per_transfer = int(cache_size * split_threshold / | 
 | 1144 |                                     self.tgt.blocksize) | 
 | 1145 |  | 
| Tao Bao | 9a5caf2 | 2015-08-25 15:10:10 -0700 | [diff] [blame] | 1146 |       # Change nothing for small files. | 
| Tianjie Xu | bb86e1d | 2016-01-13 16:14:10 -0800 | [diff] [blame] | 1147 |       if (tgt_ranges.size() <= max_blocks_per_transfer and | 
 | 1148 |           src_ranges.size() <= max_blocks_per_transfer): | 
| Tao Bao | 9a5caf2 | 2015-08-25 15:10:10 -0700 | [diff] [blame] | 1149 |         Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id) | 
 | 1150 |         return | 
 | 1151 |  | 
| Tianjie Xu | bb86e1d | 2016-01-13 16:14:10 -0800 | [diff] [blame] | 1152 |       while (tgt_ranges.size() > max_blocks_per_transfer and | 
 | 1153 |              src_ranges.size() > max_blocks_per_transfer): | 
| Tao Bao | 9a5caf2 | 2015-08-25 15:10:10 -0700 | [diff] [blame] | 1154 |         tgt_split_name = "%s-%d" % (tgt_name, pieces) | 
 | 1155 |         src_split_name = "%s-%d" % (src_name, pieces) | 
| Tianjie Xu | bb86e1d | 2016-01-13 16:14:10 -0800 | [diff] [blame] | 1156 |         tgt_first = tgt_ranges.first(max_blocks_per_transfer) | 
 | 1157 |         src_first = src_ranges.first(max_blocks_per_transfer) | 
 | 1158 |  | 
| Tao Bao | 9a5caf2 | 2015-08-25 15:10:10 -0700 | [diff] [blame] | 1159 |         Transfer(tgt_split_name, src_split_name, tgt_first, src_first, style, | 
 | 1160 |                  by_id) | 
 | 1161 |  | 
 | 1162 |         tgt_ranges = tgt_ranges.subtract(tgt_first) | 
 | 1163 |         src_ranges = src_ranges.subtract(src_first) | 
 | 1164 |         pieces += 1 | 
 | 1165 |  | 
 | 1166 |       # Handle remaining blocks. | 
 | 1167 |       if tgt_ranges.size() or src_ranges.size(): | 
 | 1168 |         # Must be both non-empty. | 
 | 1169 |         assert tgt_ranges.size() and src_ranges.size() | 
 | 1170 |         tgt_split_name = "%s-%d" % (tgt_name, pieces) | 
 | 1171 |         src_split_name = "%s-%d" % (src_name, pieces) | 
 | 1172 |         Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges, style, | 
 | 1173 |                  by_id) | 
 | 1174 |  | 
| Tao Bao | 08c8583 | 2016-09-19 22:26:30 -0700 | [diff] [blame] | 1175 |     def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id, | 
 | 1176 |                     split=False): | 
 | 1177 |       """Wrapper function for adding a Transfer().""" | 
 | 1178 |  | 
 | 1179 |       # We specialize diff transfers only (which covers bsdiff/imgdiff/move); | 
 | 1180 |       # otherwise add the Transfer() as is. | 
 | 1181 |       if style != "diff" or not split: | 
 | 1182 |         Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id) | 
 | 1183 |         return | 
 | 1184 |  | 
 | 1185 |       # Handle .odex files specially to analyze the block-wise difference. If | 
 | 1186 |       # most of the blocks are identical with only few changes (e.g. header), | 
 | 1187 |       # we will patch the changed blocks only. This avoids stashing unchanged | 
 | 1188 |       # blocks while patching. We limit the analysis to files without size | 
 | 1189 |       # changes only. This is to avoid sacrificing the OTA generation cost too | 
 | 1190 |       # much. | 
 | 1191 |       if (tgt_name.split(".")[-1].lower() == 'odex' and | 
 | 1192 |           tgt_ranges.size() == src_ranges.size()): | 
 | 1193 |  | 
 | 1194 |         # 0.5 threshold can be further tuned. The tradeoff is: if only very | 
 | 1195 |         # few blocks remain identical, we lose the opportunity to use imgdiff | 
 | 1196 |         # that may have better compression ratio than bsdiff. | 
 | 1197 |         crop_threshold = 0.5 | 
 | 1198 |  | 
 | 1199 |         tgt_skipped = RangeSet() | 
 | 1200 |         src_skipped = RangeSet() | 
 | 1201 |         tgt_size = tgt_ranges.size() | 
 | 1202 |         tgt_changed = 0 | 
 | 1203 |         for src_block, tgt_block in zip(src_ranges.next_item(), | 
 | 1204 |                                         tgt_ranges.next_item()): | 
 | 1205 |           src_rs = RangeSet(str(src_block)) | 
 | 1206 |           tgt_rs = RangeSet(str(tgt_block)) | 
 | 1207 |           if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs): | 
 | 1208 |             tgt_skipped = tgt_skipped.union(tgt_rs) | 
 | 1209 |             src_skipped = src_skipped.union(src_rs) | 
 | 1210 |           else: | 
 | 1211 |             tgt_changed += tgt_rs.size() | 
 | 1212 |  | 
 | 1213 |           # Terminate early if no clear sign of benefits. | 
 | 1214 |           if tgt_changed > tgt_size * crop_threshold: | 
 | 1215 |             break | 
 | 1216 |  | 
 | 1217 |         if tgt_changed < tgt_size * crop_threshold: | 
 | 1218 |           assert tgt_changed + tgt_skipped.size() == tgt_size | 
 | 1219 |           print('%10d %10d (%6.2f%%) %s' % (tgt_skipped.size(), tgt_size, | 
 | 1220 |                 tgt_skipped.size() * 100.0 / tgt_size, tgt_name)) | 
 | 1221 |           AddSplitTransfers( | 
 | 1222 |               "%s-skipped" % (tgt_name,), | 
 | 1223 |               "%s-skipped" % (src_name,), | 
 | 1224 |               tgt_skipped, src_skipped, style, by_id) | 
 | 1225 |  | 
 | 1226 |           # Intentionally change the file extension to avoid being imgdiff'd as | 
 | 1227 |           # the files are no longer in their original format. | 
 | 1228 |           tgt_name = "%s-cropped" % (tgt_name,) | 
 | 1229 |           src_name = "%s-cropped" % (src_name,) | 
 | 1230 |           tgt_ranges = tgt_ranges.subtract(tgt_skipped) | 
 | 1231 |           src_ranges = src_ranges.subtract(src_skipped) | 
 | 1232 |  | 
 | 1233 |           # Possibly having no changed blocks. | 
 | 1234 |           if not tgt_ranges: | 
 | 1235 |             return | 
 | 1236 |  | 
 | 1237 |       # Add the transfer(s). | 
 | 1238 |       AddSplitTransfers( | 
 | 1239 |           tgt_name, src_name, tgt_ranges, src_ranges, style, by_id) | 
 | 1240 |  | 
 | 1241 |     print("Finding transfers...") | 
 | 1242 |  | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 1243 |     empty = RangeSet() | 
 | 1244 |     for tgt_fn, tgt_ranges in self.tgt.file_map.items(): | 
 | 1245 |       if tgt_fn == "__ZERO": | 
 | 1246 |         # the special "__ZERO" domain is all the blocks not contained | 
 | 1247 |         # in any file and that are filled with zeros.  We have a | 
 | 1248 |         # special transfer style for zero blocks. | 
 | 1249 |         src_ranges = self.src.file_map.get("__ZERO", empty) | 
| Tao Bao | 9a5caf2 | 2015-08-25 15:10:10 -0700 | [diff] [blame] | 1250 |         AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges, | 
 | 1251 |                     "zero", self.transfers) | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 1252 |         continue | 
 | 1253 |  | 
| Tao Bao | ff77781 | 2015-05-12 11:42:31 -0700 | [diff] [blame] | 1254 |       elif tgt_fn == "__COPY": | 
 | 1255 |         # "__COPY" domain includes all the blocks not contained in any | 
 | 1256 |         # file and that need to be copied unconditionally to the target. | 
| Tao Bao | 9a5caf2 | 2015-08-25 15:10:10 -0700 | [diff] [blame] | 1257 |         AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers) | 
| Tao Bao | ff77781 | 2015-05-12 11:42:31 -0700 | [diff] [blame] | 1258 |         continue | 
 | 1259 |  | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 1260 |       elif tgt_fn in self.src.file_map: | 
 | 1261 |         # Look for an exact pathname match in the source. | 
| Tao Bao | 9a5caf2 | 2015-08-25 15:10:10 -0700 | [diff] [blame] | 1262 |         AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn], | 
 | 1263 |                     "diff", self.transfers, self.version >= 3) | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 1264 |         continue | 
 | 1265 |  | 
 | 1266 |       b = os.path.basename(tgt_fn) | 
 | 1267 |       if b in self.src_basenames: | 
 | 1268 |         # Look for an exact basename match in the source. | 
 | 1269 |         src_fn = self.src_basenames[b] | 
| Tao Bao | 9a5caf2 | 2015-08-25 15:10:10 -0700 | [diff] [blame] | 1270 |         AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn], | 
 | 1271 |                     "diff", self.transfers, self.version >= 3) | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 1272 |         continue | 
 | 1273 |  | 
 | 1274 |       b = re.sub("[0-9]+", "#", b) | 
 | 1275 |       if b in self.src_numpatterns: | 
 | 1276 |         # Look for a 'number pattern' match (a basename match after | 
 | 1277 |         # all runs of digits are replaced by "#").  (This is useful | 
 | 1278 |         # for .so files that contain version numbers in the filename | 
 | 1279 |         # that get bumped.) | 
 | 1280 |         src_fn = self.src_numpatterns[b] | 
| Tao Bao | 9a5caf2 | 2015-08-25 15:10:10 -0700 | [diff] [blame] | 1281 |         AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn], | 
 | 1282 |                     "diff", self.transfers, self.version >= 3) | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 1283 |         continue | 
 | 1284 |  | 
| Tao Bao | 9a5caf2 | 2015-08-25 15:10:10 -0700 | [diff] [blame] | 1285 |       AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers) | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 1286 |  | 
 | 1287 |   def AbbreviateSourceNames(self): | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 1288 |     for k in self.src.file_map.keys(): | 
 | 1289 |       b = os.path.basename(k) | 
 | 1290 |       self.src_basenames[b] = k | 
 | 1291 |       b = re.sub("[0-9]+", "#", b) | 
 | 1292 |       self.src_numpatterns[b] = k | 
 | 1293 |  | 
 | 1294 |   @staticmethod | 
 | 1295 |   def AssertPartition(total, seq): | 
 | 1296 |     """Assert that all the RangeSets in 'seq' form a partition of the | 
 | 1297 |     'total' RangeSet (ie, they are nonintersecting and their union | 
 | 1298 |     equals 'total').""" | 
| Doug Zongker | 6ab2a50 | 2016-02-09 08:28:09 -0800 | [diff] [blame] | 1299 |  | 
| Doug Zongker | fc44a51 | 2014-08-26 13:10:25 -0700 | [diff] [blame] | 1300 |     so_far = RangeSet() | 
 | 1301 |     for i in seq: | 
 | 1302 |       assert not so_far.overlaps(i) | 
 | 1303 |       so_far = so_far.union(i) | 
 | 1304 |     assert so_far == total |