blob: 5ebb1f0572b856959448de3a3ca2d0e80f7ed2f3 [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015import bisect
Tao Bao32fcdab2018-10-12 10:30:39 -070016import logging
Doug Zongkerfc44a512014-08-26 13:10:25 -070017import os
Doug Zongkerfc44a512014-08-26 13:10:25 -070018import struct
Tianjie Xudf1166e2018-01-27 17:35:41 -080019import threading
Doug Zongkerfc44a512014-08-26 13:10:25 -070020from hashlib import sha1
21
Dan Albert8b72aef2015-03-23 19:13:21 -070022import rangelib
23
Tao Bao32fcdab2018-10-12 10:30:39 -070024logger = logging.getLogger(__name__)
25
Doug Zongkerfc44a512014-08-26 13:10:25 -070026
27class SparseImage(object):
Tao Bao5ece99d2015-05-12 11:42:31 -070028 """Wraps a sparse image file into an image object.
Doug Zongkerfc44a512014-08-26 13:10:25 -070029
Tao Bao5ece99d2015-05-12 11:42:31 -070030 Wraps a sparse image file (and optional file map and clobbered_blocks) into
31 an image object suitable for passing to BlockImageDiff. file_map contains
32 the mapping between files and their blocks. clobbered_blocks contains the set
33 of blocks that should be always written to the target regardless of the old
34 contents (i.e. copying instead of patching). clobbered_blocks should be in
35 the form of a string like "0" or "0 1-5 8".
36 """
37
Sami Tolvanen405e71d2016-02-09 12:28:58 -080038 def __init__(self, simg_fn, file_map_fn=None, clobbered_blocks=None,
Tianjie Xu67c7cbb2018-08-30 00:32:07 -070039 mode="rb", build_map=True, allow_shared_blocks=False,
40 hashtree_info_generator=None):
Sami Tolvanen405e71d2016-02-09 12:28:58 -080041 self.simg_f = f = open(simg_fn, mode)
Doug Zongkerfc44a512014-08-26 13:10:25 -070042
43 header_bin = f.read(28)
44 header = struct.unpack("<I4H4I", header_bin)
45
46 magic = header[0]
47 major_version = header[1]
48 minor_version = header[2]
49 file_hdr_sz = header[3]
50 chunk_hdr_sz = header[4]
51 self.blocksize = blk_sz = header[5]
52 self.total_blocks = total_blks = header[6]
Sami Tolvanen405e71d2016-02-09 12:28:58 -080053 self.total_chunks = total_chunks = header[7]
Doug Zongkerfc44a512014-08-26 13:10:25 -070054
55 if magic != 0xED26FF3A:
56 raise ValueError("Magic should be 0xED26FF3A but is 0x%08X" % (magic,))
57 if major_version != 1 or minor_version != 0:
58 raise ValueError("I know about version 1.0, but this is version %u.%u" %
59 (major_version, minor_version))
60 if file_hdr_sz != 28:
61 raise ValueError("File header size was expected to be 28, but is %u." %
62 (file_hdr_sz,))
63 if chunk_hdr_sz != 12:
64 raise ValueError("Chunk header size was expected to be 12, but is %u." %
65 (chunk_hdr_sz,))
66
Tao Bao32fcdab2018-10-12 10:30:39 -070067 logger.info(
68 "Total of %u %u-byte output blocks in %u input chunks.", total_blks,
69 blk_sz, total_chunks)
Doug Zongkerfc44a512014-08-26 13:10:25 -070070
Sami Tolvanen405e71d2016-02-09 12:28:58 -080071 if not build_map:
Tianjie Xu67c7cbb2018-08-30 00:32:07 -070072 assert not hashtree_info_generator, \
73 "Cannot generate the hashtree info without building the offset map."
Sami Tolvanen405e71d2016-02-09 12:28:58 -080074 return
75
Doug Zongkerfc44a512014-08-26 13:10:25 -070076 pos = 0 # in blocks
77 care_data = []
78 self.offset_map = offset_map = []
Tao Bao5ece99d2015-05-12 11:42:31 -070079 self.clobbered_blocks = rangelib.RangeSet(data=clobbered_blocks)
Doug Zongkerfc44a512014-08-26 13:10:25 -070080
81 for i in range(total_chunks):
82 header_bin = f.read(12)
83 header = struct.unpack("<2H2I", header_bin)
84 chunk_type = header[0]
Doug Zongkerfc44a512014-08-26 13:10:25 -070085 chunk_sz = header[2]
86 total_sz = header[3]
87 data_sz = total_sz - 12
88
89 if chunk_type == 0xCAC1:
90 if data_sz != (chunk_sz * blk_sz):
91 raise ValueError(
92 "Raw chunk input size (%u) does not match output size (%u)" %
93 (data_sz, chunk_sz * blk_sz))
94 else:
95 care_data.append(pos)
96 care_data.append(pos + chunk_sz)
Doug Zongkere18eb502014-10-15 15:55:50 -070097 offset_map.append((pos, chunk_sz, f.tell(), None))
Doug Zongkerfc44a512014-08-26 13:10:25 -070098 pos += chunk_sz
99 f.seek(data_sz, os.SEEK_CUR)
100
101 elif chunk_type == 0xCAC2:
Doug Zongkere18eb502014-10-15 15:55:50 -0700102 fill_data = f.read(4)
103 care_data.append(pos)
104 care_data.append(pos + chunk_sz)
105 offset_map.append((pos, chunk_sz, None, fill_data))
106 pos += chunk_sz
Doug Zongkerfc44a512014-08-26 13:10:25 -0700107
108 elif chunk_type == 0xCAC3:
109 if data_sz != 0:
110 raise ValueError("Don't care chunk input size is non-zero (%u)" %
111 (data_sz))
Tianjie Xu67c7cbb2018-08-30 00:32:07 -0700112 # Fills the don't care data ranges with zeros.
113 # TODO(xunchang) pass the care_map to hashtree info generator.
114 if hashtree_info_generator:
115 fill_data = '\x00' * 4
116 # In order to compute verity hashtree on device, we need to write
117 # zeros explicitly to the don't care ranges. Because these ranges may
118 # contain non-zero data from the previous build.
119 care_data.append(pos)
120 care_data.append(pos + chunk_sz)
121 offset_map.append((pos, chunk_sz, None, fill_data))
122
123 pos += chunk_sz
Doug Zongkerfc44a512014-08-26 13:10:25 -0700124
125 elif chunk_type == 0xCAC4:
126 raise ValueError("CRC32 chunks are not supported")
127
128 else:
129 raise ValueError("Unknown chunk type 0x%04X not supported" %
130 (chunk_type,))
131
Tianjie Xudf1166e2018-01-27 17:35:41 -0800132 self.generator_lock = threading.Lock()
133
Dan Albert8b72aef2015-03-23 19:13:21 -0700134 self.care_map = rangelib.RangeSet(care_data)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700135 self.offset_index = [i[0] for i in offset_map]
136
Tao Bao2fd2c9b2015-07-09 17:37:49 -0700137 # Bug: 20881595
138 # Introduce extended blocks as a workaround for the bug. dm-verity may
139 # touch blocks that are not in the care_map due to block device
140 # read-ahead. It will fail if such blocks contain non-zeroes. We zero out
141 # the extended blocks explicitly to avoid dm-verity failures. 512 blocks
142 # are the maximum read-ahead we configure for dm-verity block devices.
143 extended = self.care_map.extend(512)
144 all_blocks = rangelib.RangeSet(data=(0, self.total_blocks))
145 extended = extended.intersect(all_blocks).subtract(self.care_map)
146 self.extended = extended
147
Tianjie Xu67c7cbb2018-08-30 00:32:07 -0700148 self.hashtree_info = None
149 if hashtree_info_generator:
150 self.hashtree_info = hashtree_info_generator.Generate(self)
151
Doug Zongkerfc44a512014-08-26 13:10:25 -0700152 if file_map_fn:
Tao Baoe709b092018-02-07 12:40:00 -0800153 self.LoadFileBlockMap(file_map_fn, self.clobbered_blocks,
154 allow_shared_blocks)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700155 else:
156 self.file_map = {"__DATA": self.care_map}
157
Sami Tolvanen405e71d2016-02-09 12:28:58 -0800158 def AppendFillChunk(self, data, blocks):
159 f = self.simg_f
160
161 # Append a fill chunk
162 f.seek(0, os.SEEK_END)
163 f.write(struct.pack("<2H3I", 0xCAC2, 0, blocks, 16, data))
164
165 # Update the sparse header
166 self.total_blocks += blocks
167 self.total_chunks += 1
168
169 f.seek(16, os.SEEK_SET)
170 f.write(struct.pack("<2I", self.total_blocks, self.total_chunks))
171
Tao Bao183e56e2017-03-05 17:05:09 -0800172 def RangeSha1(self, ranges):
173 h = sha1()
174 for data in self._GetRangeData(ranges):
175 h.update(data)
176 return h.hexdigest()
177
Doug Zongkerfc44a512014-08-26 13:10:25 -0700178 def ReadRangeSet(self, ranges):
179 return [d for d in self._GetRangeData(ranges)]
180
Tao Bao5fcaaef2015-06-01 13:40:49 -0700181 def TotalSha1(self, include_clobbered_blocks=False):
182 """Return the SHA-1 hash of all data in the 'care' regions.
183
184 If include_clobbered_blocks is True, it returns the hash including the
185 clobbered_blocks."""
186 ranges = self.care_map
187 if not include_clobbered_blocks:
Tao Bao2b4ff172015-06-23 17:30:35 -0700188 ranges = ranges.subtract(self.clobbered_blocks)
Tao Bao183e56e2017-03-05 17:05:09 -0800189 return self.RangeSha1(ranges)
190
191 def WriteRangeDataToFd(self, ranges, fd):
192 for data in self._GetRangeData(ranges):
193 fd.write(data)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700194
195 def _GetRangeData(self, ranges):
196 """Generator that produces all the image data in 'ranges'. The
197 number of individual pieces returned is arbitrary (and in
198 particular is not necessarily equal to the number of ranges in
199 'ranges'.
200
Tianjie Xudf1166e2018-01-27 17:35:41 -0800201 Use a lock to protect the generator so that we will not run two
Doug Zongkerfc44a512014-08-26 13:10:25 -0700202 instances of this generator on the same object simultaneously."""
203
204 f = self.simg_f
Tianjie Xudf1166e2018-01-27 17:35:41 -0800205 with self.generator_lock:
206 for s, e in ranges:
207 to_read = e-s
208 idx = bisect.bisect_right(self.offset_index, s) - 1
Doug Zongkere18eb502014-10-15 15:55:50 -0700209 chunk_start, chunk_len, filepos, fill_data = self.offset_map[idx]
Tianjie Xudf1166e2018-01-27 17:35:41 -0800210
211 # for the first chunk we may be starting partway through it.
212 remain = chunk_len - (s - chunk_start)
213 this_read = min(remain, to_read)
Doug Zongkere18eb502014-10-15 15:55:50 -0700214 if filepos is not None:
Tianjie Xudf1166e2018-01-27 17:35:41 -0800215 p = filepos + ((s - chunk_start) * self.blocksize)
216 f.seek(p, os.SEEK_SET)
Doug Zongkere18eb502014-10-15 15:55:50 -0700217 yield f.read(this_read * self.blocksize)
218 else:
219 yield fill_data * (this_read * (self.blocksize >> 2))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700220 to_read -= this_read
221
Tianjie Xudf1166e2018-01-27 17:35:41 -0800222 while to_read > 0:
223 # continue with following chunks if this range spans multiple chunks.
224 idx += 1
225 chunk_start, chunk_len, filepos, fill_data = self.offset_map[idx]
226 this_read = min(chunk_len, to_read)
227 if filepos is not None:
228 f.seek(filepos, os.SEEK_SET)
229 yield f.read(this_read * self.blocksize)
230 else:
231 yield fill_data * (this_read * (self.blocksize >> 2))
232 to_read -= this_read
233
Tao Baoe709b092018-02-07 12:40:00 -0800234 def LoadFileBlockMap(self, fn, clobbered_blocks, allow_shared_blocks):
235 """Loads the given block map file.
236
237 Args:
238 fn: The filename of the block map file.
239 clobbered_blocks: A RangeSet instance for the clobbered blocks.
240 allow_shared_blocks: Whether having shared blocks is allowed.
241 """
Doug Zongkerfc44a512014-08-26 13:10:25 -0700242 remaining = self.care_map
243 self.file_map = out = {}
244
245 with open(fn) as f:
246 for line in f:
247 fn, ranges = line.split(None, 1)
Dan Albert8b72aef2015-03-23 19:13:21 -0700248 ranges = rangelib.RangeSet.parse(ranges)
Tao Baoe709b092018-02-07 12:40:00 -0800249
250 if allow_shared_blocks:
251 # Find the shared blocks that have been claimed by others.
252 shared_blocks = ranges.subtract(remaining)
253 if shared_blocks:
254 ranges = ranges.subtract(shared_blocks)
255 if not ranges:
256 continue
257
258 # Tag the entry so that we can skip applying imgdiff on this file.
259 ranges.extra['uses_shared_blocks'] = True
260
Doug Zongkerfc44a512014-08-26 13:10:25 -0700261 out[fn] = ranges
262 assert ranges.size() == ranges.intersect(remaining).size()
Tao Bao5ece99d2015-05-12 11:42:31 -0700263
264 # Currently we assume that blocks in clobbered_blocks are not part of
265 # any file.
266 assert not clobbered_blocks.overlaps(ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700267 remaining = remaining.subtract(ranges)
268
Tao Bao5ece99d2015-05-12 11:42:31 -0700269 remaining = remaining.subtract(clobbered_blocks)
Tianjie Xu67c7cbb2018-08-30 00:32:07 -0700270 if self.hashtree_info:
271 remaining = remaining.subtract(self.hashtree_info.hashtree_range)
Tao Bao5ece99d2015-05-12 11:42:31 -0700272
Doug Zongkerfc44a512014-08-26 13:10:25 -0700273 # For all the remaining blocks in the care_map (ie, those that
Tao Bao5ece99d2015-05-12 11:42:31 -0700274 # aren't part of the data for any file nor part of the clobbered_blocks),
275 # divide them into blocks that are all zero and blocks that aren't.
276 # (Zero blocks are handled specially because (1) there are usually
277 # a lot of them and (2) bsdiff handles files with long sequences of
278 # repeated bytes especially poorly.)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700279
280 zero_blocks = []
281 nonzero_blocks = []
282 reference = '\0' * self.blocksize
283
Tao Bao7c4c6f52015-08-19 17:07:50 -0700284 # Workaround for bug 23227672. For squashfs, we don't have a system.map. So
285 # the whole system image will be treated as a single file. But for some
286 # unknown bug, the updater will be killed due to OOM when writing back the
287 # patched image to flash (observed on lenok-userdebug MEA49). Prior to
288 # getting a real fix, we evenly divide the non-zero blocks into smaller
289 # groups (currently 1024 blocks or 4MB per group).
290 # Bug: 23227672
291 MAX_BLOCKS_PER_GROUP = 1024
292 nonzero_groups = []
293
Doug Zongkerfc44a512014-08-26 13:10:25 -0700294 f = self.simg_f
295 for s, e in remaining:
296 for b in range(s, e):
297 idx = bisect.bisect_right(self.offset_index, b) - 1
Dan Albert8b72aef2015-03-23 19:13:21 -0700298 chunk_start, _, filepos, fill_data = self.offset_map[idx]
Doug Zongkere18eb502014-10-15 15:55:50 -0700299 if filepos is not None:
300 filepos += (b-chunk_start) * self.blocksize
301 f.seek(filepos, os.SEEK_SET)
302 data = f.read(self.blocksize)
303 else:
304 if fill_data == reference[:4]: # fill with all zeros
305 data = reference
306 else:
307 data = None
Doug Zongkerfc44a512014-08-26 13:10:25 -0700308
309 if data == reference:
310 zero_blocks.append(b)
311 zero_blocks.append(b+1)
312 else:
313 nonzero_blocks.append(b)
314 nonzero_blocks.append(b+1)
315
Tao Bao7c4c6f52015-08-19 17:07:50 -0700316 if len(nonzero_blocks) >= MAX_BLOCKS_PER_GROUP:
317 nonzero_groups.append(nonzero_blocks)
318 # Clear the list.
319 nonzero_blocks = []
320
321 if nonzero_blocks:
322 nonzero_groups.append(nonzero_blocks)
323 nonzero_blocks = []
324
325 assert zero_blocks or nonzero_groups or clobbered_blocks
Tao Bao7f9470c2015-06-26 17:49:39 -0700326
327 if zero_blocks:
328 out["__ZERO"] = rangelib.RangeSet(data=zero_blocks)
Tao Bao7c4c6f52015-08-19 17:07:50 -0700329 if nonzero_groups:
330 for i, blocks in enumerate(nonzero_groups):
331 out["__NONZERO-%d" % i] = rangelib.RangeSet(data=blocks)
Tao Bao8bd72022015-07-01 18:06:33 -0700332 if clobbered_blocks:
333 out["__COPY"] = clobbered_blocks
Tianjie Xu67c7cbb2018-08-30 00:32:07 -0700334 if self.hashtree_info:
335 out["__HASHTREE"] = self.hashtree_info.hashtree_range
Doug Zongkerfc44a512014-08-26 13:10:25 -0700336
337 def ResetFileMap(self):
338 """Throw away the file map and treat the entire image as
339 undifferentiated data."""
340 self.file_map = {"__DATA": self.care_map}