blob: 7919ba4a17deaf244e4c645f6bbc8a3254152cb6 [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015import bisect
Tao Bao32fcdab2018-10-12 10:30:39 -070016import logging
Doug Zongkerfc44a512014-08-26 13:10:25 -070017import os
Doug Zongkerfc44a512014-08-26 13:10:25 -070018import struct
Tianjie Xudf1166e2018-01-27 17:35:41 -080019import threading
Doug Zongkerfc44a512014-08-26 13:10:25 -070020from hashlib import sha1
21
Dan Albert8b72aef2015-03-23 19:13:21 -070022import rangelib
23
Tao Bao32fcdab2018-10-12 10:30:39 -070024logger = logging.getLogger(__name__)
25
Doug Zongkerfc44a512014-08-26 13:10:25 -070026
27class SparseImage(object):
Tao Bao5ece99d2015-05-12 11:42:31 -070028 """Wraps a sparse image file into an image object.
Doug Zongkerfc44a512014-08-26 13:10:25 -070029
Tao Bao5ece99d2015-05-12 11:42:31 -070030 Wraps a sparse image file (and optional file map and clobbered_blocks) into
31 an image object suitable for passing to BlockImageDiff. file_map contains
32 the mapping between files and their blocks. clobbered_blocks contains the set
33 of blocks that should be always written to the target regardless of the old
34 contents (i.e. copying instead of patching). clobbered_blocks should be in
35 the form of a string like "0" or "0 1-5 8".
36 """
37
Sami Tolvanen405e71d2016-02-09 12:28:58 -080038 def __init__(self, simg_fn, file_map_fn=None, clobbered_blocks=None,
Tianjie Xu67c7cbb2018-08-30 00:32:07 -070039 mode="rb", build_map=True, allow_shared_blocks=False,
40 hashtree_info_generator=None):
Sami Tolvanen405e71d2016-02-09 12:28:58 -080041 self.simg_f = f = open(simg_fn, mode)
Doug Zongkerfc44a512014-08-26 13:10:25 -070042
43 header_bin = f.read(28)
44 header = struct.unpack("<I4H4I", header_bin)
45
46 magic = header[0]
47 major_version = header[1]
48 minor_version = header[2]
49 file_hdr_sz = header[3]
50 chunk_hdr_sz = header[4]
51 self.blocksize = blk_sz = header[5]
52 self.total_blocks = total_blks = header[6]
Sami Tolvanen405e71d2016-02-09 12:28:58 -080053 self.total_chunks = total_chunks = header[7]
Doug Zongkerfc44a512014-08-26 13:10:25 -070054
55 if magic != 0xED26FF3A:
56 raise ValueError("Magic should be 0xED26FF3A but is 0x%08X" % (magic,))
57 if major_version != 1 or minor_version != 0:
58 raise ValueError("I know about version 1.0, but this is version %u.%u" %
59 (major_version, minor_version))
60 if file_hdr_sz != 28:
61 raise ValueError("File header size was expected to be 28, but is %u." %
62 (file_hdr_sz,))
63 if chunk_hdr_sz != 12:
64 raise ValueError("Chunk header size was expected to be 12, but is %u." %
65 (chunk_hdr_sz,))
66
Tao Bao32fcdab2018-10-12 10:30:39 -070067 logger.info(
68 "Total of %u %u-byte output blocks in %u input chunks.", total_blks,
69 blk_sz, total_chunks)
Doug Zongkerfc44a512014-08-26 13:10:25 -070070
Sami Tolvanen405e71d2016-02-09 12:28:58 -080071 if not build_map:
Tianjie Xu67c7cbb2018-08-30 00:32:07 -070072 assert not hashtree_info_generator, \
73 "Cannot generate the hashtree info without building the offset map."
Sami Tolvanen405e71d2016-02-09 12:28:58 -080074 return
75
Doug Zongkerfc44a512014-08-26 13:10:25 -070076 pos = 0 # in blocks
77 care_data = []
78 self.offset_map = offset_map = []
Tao Bao5ece99d2015-05-12 11:42:31 -070079 self.clobbered_blocks = rangelib.RangeSet(data=clobbered_blocks)
Doug Zongkerfc44a512014-08-26 13:10:25 -070080
81 for i in range(total_chunks):
82 header_bin = f.read(12)
83 header = struct.unpack("<2H2I", header_bin)
84 chunk_type = header[0]
Doug Zongkerfc44a512014-08-26 13:10:25 -070085 chunk_sz = header[2]
86 total_sz = header[3]
87 data_sz = total_sz - 12
88
89 if chunk_type == 0xCAC1:
90 if data_sz != (chunk_sz * blk_sz):
91 raise ValueError(
92 "Raw chunk input size (%u) does not match output size (%u)" %
93 (data_sz, chunk_sz * blk_sz))
94 else:
95 care_data.append(pos)
96 care_data.append(pos + chunk_sz)
Doug Zongkere18eb502014-10-15 15:55:50 -070097 offset_map.append((pos, chunk_sz, f.tell(), None))
Doug Zongkerfc44a512014-08-26 13:10:25 -070098 pos += chunk_sz
99 f.seek(data_sz, os.SEEK_CUR)
100
101 elif chunk_type == 0xCAC2:
Doug Zongkere18eb502014-10-15 15:55:50 -0700102 fill_data = f.read(4)
103 care_data.append(pos)
104 care_data.append(pos + chunk_sz)
105 offset_map.append((pos, chunk_sz, None, fill_data))
106 pos += chunk_sz
Doug Zongkerfc44a512014-08-26 13:10:25 -0700107
108 elif chunk_type == 0xCAC3:
109 if data_sz != 0:
110 raise ValueError("Don't care chunk input size is non-zero (%u)" %
111 (data_sz))
Tianjie Xu67c7cbb2018-08-30 00:32:07 -0700112 # Fills the don't care data ranges with zeros.
113 # TODO(xunchang) pass the care_map to hashtree info generator.
114 if hashtree_info_generator:
115 fill_data = '\x00' * 4
116 # In order to compute verity hashtree on device, we need to write
117 # zeros explicitly to the don't care ranges. Because these ranges may
118 # contain non-zero data from the previous build.
119 care_data.append(pos)
120 care_data.append(pos + chunk_sz)
121 offset_map.append((pos, chunk_sz, None, fill_data))
122
123 pos += chunk_sz
Doug Zongkerfc44a512014-08-26 13:10:25 -0700124
125 elif chunk_type == 0xCAC4:
126 raise ValueError("CRC32 chunks are not supported")
127
128 else:
129 raise ValueError("Unknown chunk type 0x%04X not supported" %
130 (chunk_type,))
131
Tianjie Xudf1166e2018-01-27 17:35:41 -0800132 self.generator_lock = threading.Lock()
133
Dan Albert8b72aef2015-03-23 19:13:21 -0700134 self.care_map = rangelib.RangeSet(care_data)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700135 self.offset_index = [i[0] for i in offset_map]
136
Tao Bao2fd2c9b2015-07-09 17:37:49 -0700137 # Bug: 20881595
138 # Introduce extended blocks as a workaround for the bug. dm-verity may
139 # touch blocks that are not in the care_map due to block device
140 # read-ahead. It will fail if such blocks contain non-zeroes. We zero out
141 # the extended blocks explicitly to avoid dm-verity failures. 512 blocks
142 # are the maximum read-ahead we configure for dm-verity block devices.
143 extended = self.care_map.extend(512)
144 all_blocks = rangelib.RangeSet(data=(0, self.total_blocks))
145 extended = extended.intersect(all_blocks).subtract(self.care_map)
146 self.extended = extended
147
Tianjie Xu67c7cbb2018-08-30 00:32:07 -0700148 self.hashtree_info = None
149 if hashtree_info_generator:
150 self.hashtree_info = hashtree_info_generator.Generate(self)
151
Doug Zongkerfc44a512014-08-26 13:10:25 -0700152 if file_map_fn:
Tao Baoe709b092018-02-07 12:40:00 -0800153 self.LoadFileBlockMap(file_map_fn, self.clobbered_blocks,
154 allow_shared_blocks)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700155 else:
156 self.file_map = {"__DATA": self.care_map}
157
Sami Tolvanen405e71d2016-02-09 12:28:58 -0800158 def AppendFillChunk(self, data, blocks):
159 f = self.simg_f
160
161 # Append a fill chunk
162 f.seek(0, os.SEEK_END)
163 f.write(struct.pack("<2H3I", 0xCAC2, 0, blocks, 16, data))
164
165 # Update the sparse header
166 self.total_blocks += blocks
167 self.total_chunks += 1
168
169 f.seek(16, os.SEEK_SET)
170 f.write(struct.pack("<2I", self.total_blocks, self.total_chunks))
171
Tao Bao183e56e2017-03-05 17:05:09 -0800172 def RangeSha1(self, ranges):
173 h = sha1()
174 for data in self._GetRangeData(ranges):
175 h.update(data)
176 return h.hexdigest()
177
Doug Zongkerfc44a512014-08-26 13:10:25 -0700178 def ReadRangeSet(self, ranges):
179 return [d for d in self._GetRangeData(ranges)]
180
Tao Bao5fcaaef2015-06-01 13:40:49 -0700181 def TotalSha1(self, include_clobbered_blocks=False):
182 """Return the SHA-1 hash of all data in the 'care' regions.
183
184 If include_clobbered_blocks is True, it returns the hash including the
185 clobbered_blocks."""
186 ranges = self.care_map
187 if not include_clobbered_blocks:
Tao Bao2b4ff172015-06-23 17:30:35 -0700188 ranges = ranges.subtract(self.clobbered_blocks)
Tao Bao183e56e2017-03-05 17:05:09 -0800189 return self.RangeSha1(ranges)
190
191 def WriteRangeDataToFd(self, ranges, fd):
192 for data in self._GetRangeData(ranges):
193 fd.write(data)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700194
195 def _GetRangeData(self, ranges):
196 """Generator that produces all the image data in 'ranges'. The
197 number of individual pieces returned is arbitrary (and in
198 particular is not necessarily equal to the number of ranges in
199 'ranges'.
200
Tianjie Xudf1166e2018-01-27 17:35:41 -0800201 Use a lock to protect the generator so that we will not run two
Doug Zongkerfc44a512014-08-26 13:10:25 -0700202 instances of this generator on the same object simultaneously."""
203
204 f = self.simg_f
Tianjie Xudf1166e2018-01-27 17:35:41 -0800205 with self.generator_lock:
206 for s, e in ranges:
207 to_read = e-s
208 idx = bisect.bisect_right(self.offset_index, s) - 1
Doug Zongkere18eb502014-10-15 15:55:50 -0700209 chunk_start, chunk_len, filepos, fill_data = self.offset_map[idx]
Tianjie Xudf1166e2018-01-27 17:35:41 -0800210
211 # for the first chunk we may be starting partway through it.
212 remain = chunk_len - (s - chunk_start)
213 this_read = min(remain, to_read)
Doug Zongkere18eb502014-10-15 15:55:50 -0700214 if filepos is not None:
Tianjie Xudf1166e2018-01-27 17:35:41 -0800215 p = filepos + ((s - chunk_start) * self.blocksize)
216 f.seek(p, os.SEEK_SET)
Doug Zongkere18eb502014-10-15 15:55:50 -0700217 yield f.read(this_read * self.blocksize)
218 else:
219 yield fill_data * (this_read * (self.blocksize >> 2))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700220 to_read -= this_read
221
Tianjie Xudf1166e2018-01-27 17:35:41 -0800222 while to_read > 0:
223 # continue with following chunks if this range spans multiple chunks.
224 idx += 1
225 chunk_start, chunk_len, filepos, fill_data = self.offset_map[idx]
226 this_read = min(chunk_len, to_read)
227 if filepos is not None:
228 f.seek(filepos, os.SEEK_SET)
229 yield f.read(this_read * self.blocksize)
230 else:
231 yield fill_data * (this_read * (self.blocksize >> 2))
232 to_read -= this_read
233
Tao Baoe709b092018-02-07 12:40:00 -0800234 def LoadFileBlockMap(self, fn, clobbered_blocks, allow_shared_blocks):
235 """Loads the given block map file.
236
237 Args:
238 fn: The filename of the block map file.
239 clobbered_blocks: A RangeSet instance for the clobbered blocks.
240 allow_shared_blocks: Whether having shared blocks is allowed.
241 """
Doug Zongkerfc44a512014-08-26 13:10:25 -0700242 remaining = self.care_map
243 self.file_map = out = {}
244
245 with open(fn) as f:
246 for line in f:
247 fn, ranges = line.split(None, 1)
Dan Albert8b72aef2015-03-23 19:13:21 -0700248 ranges = rangelib.RangeSet.parse(ranges)
Tao Baoe709b092018-02-07 12:40:00 -0800249
250 if allow_shared_blocks:
Tao Bao2a20f342018-12-03 15:08:23 -0800251 # Find the shared blocks that have been claimed by others. If so, tag
252 # the entry so that we can skip applying imgdiff on this file.
Tao Baoe709b092018-02-07 12:40:00 -0800253 shared_blocks = ranges.subtract(remaining)
254 if shared_blocks:
Tao Bao2a20f342018-12-03 15:08:23 -0800255 non_shared = ranges.subtract(shared_blocks)
256 if not non_shared:
Tao Baoe709b092018-02-07 12:40:00 -0800257 continue
258
Tao Bao2a20f342018-12-03 15:08:23 -0800259 # There shouldn't anything in the extra dict yet.
260 assert not ranges.extra, "Non-empty RangeSet.extra"
261
262 # Put the non-shared RangeSet as the value in the block map, which
263 # has a copy of the original RangeSet.
264 non_shared.extra['uses_shared_blocks'] = ranges
265 ranges = non_shared
Tao Baoe709b092018-02-07 12:40:00 -0800266
Doug Zongkerfc44a512014-08-26 13:10:25 -0700267 out[fn] = ranges
268 assert ranges.size() == ranges.intersect(remaining).size()
Tao Bao5ece99d2015-05-12 11:42:31 -0700269
270 # Currently we assume that blocks in clobbered_blocks are not part of
271 # any file.
272 assert not clobbered_blocks.overlaps(ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700273 remaining = remaining.subtract(ranges)
274
Tao Bao5ece99d2015-05-12 11:42:31 -0700275 remaining = remaining.subtract(clobbered_blocks)
Tianjie Xu67c7cbb2018-08-30 00:32:07 -0700276 if self.hashtree_info:
277 remaining = remaining.subtract(self.hashtree_info.hashtree_range)
Tao Bao5ece99d2015-05-12 11:42:31 -0700278
Doug Zongkerfc44a512014-08-26 13:10:25 -0700279 # For all the remaining blocks in the care_map (ie, those that
Tao Bao5ece99d2015-05-12 11:42:31 -0700280 # aren't part of the data for any file nor part of the clobbered_blocks),
281 # divide them into blocks that are all zero and blocks that aren't.
282 # (Zero blocks are handled specially because (1) there are usually
283 # a lot of them and (2) bsdiff handles files with long sequences of
284 # repeated bytes especially poorly.)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700285
286 zero_blocks = []
287 nonzero_blocks = []
288 reference = '\0' * self.blocksize
289
Tao Bao7c4c6f52015-08-19 17:07:50 -0700290 # Workaround for bug 23227672. For squashfs, we don't have a system.map. So
291 # the whole system image will be treated as a single file. But for some
292 # unknown bug, the updater will be killed due to OOM when writing back the
293 # patched image to flash (observed on lenok-userdebug MEA49). Prior to
294 # getting a real fix, we evenly divide the non-zero blocks into smaller
295 # groups (currently 1024 blocks or 4MB per group).
296 # Bug: 23227672
297 MAX_BLOCKS_PER_GROUP = 1024
298 nonzero_groups = []
299
Doug Zongkerfc44a512014-08-26 13:10:25 -0700300 f = self.simg_f
301 for s, e in remaining:
302 for b in range(s, e):
303 idx = bisect.bisect_right(self.offset_index, b) - 1
Dan Albert8b72aef2015-03-23 19:13:21 -0700304 chunk_start, _, filepos, fill_data = self.offset_map[idx]
Doug Zongkere18eb502014-10-15 15:55:50 -0700305 if filepos is not None:
306 filepos += (b-chunk_start) * self.blocksize
307 f.seek(filepos, os.SEEK_SET)
308 data = f.read(self.blocksize)
309 else:
310 if fill_data == reference[:4]: # fill with all zeros
311 data = reference
312 else:
313 data = None
Doug Zongkerfc44a512014-08-26 13:10:25 -0700314
315 if data == reference:
316 zero_blocks.append(b)
317 zero_blocks.append(b+1)
318 else:
319 nonzero_blocks.append(b)
320 nonzero_blocks.append(b+1)
321
Tao Bao7c4c6f52015-08-19 17:07:50 -0700322 if len(nonzero_blocks) >= MAX_BLOCKS_PER_GROUP:
323 nonzero_groups.append(nonzero_blocks)
324 # Clear the list.
325 nonzero_blocks = []
326
327 if nonzero_blocks:
328 nonzero_groups.append(nonzero_blocks)
329 nonzero_blocks = []
330
331 assert zero_blocks or nonzero_groups or clobbered_blocks
Tao Bao7f9470c2015-06-26 17:49:39 -0700332
333 if zero_blocks:
334 out["__ZERO"] = rangelib.RangeSet(data=zero_blocks)
Tao Bao7c4c6f52015-08-19 17:07:50 -0700335 if nonzero_groups:
336 for i, blocks in enumerate(nonzero_groups):
337 out["__NONZERO-%d" % i] = rangelib.RangeSet(data=blocks)
Tao Bao8bd72022015-07-01 18:06:33 -0700338 if clobbered_blocks:
339 out["__COPY"] = clobbered_blocks
Tianjie Xu67c7cbb2018-08-30 00:32:07 -0700340 if self.hashtree_info:
341 out["__HASHTREE"] = self.hashtree_info.hashtree_range
Doug Zongkerfc44a512014-08-26 13:10:25 -0700342
343 def ResetFileMap(self):
344 """Throw away the file map and treat the entire image as
345 undifferentiated data."""
346 self.file_map = {"__DATA": self.care_map}