blob: ca53ae1dd4b5c17b90ed3e9359c677bf0099185a [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015import bisect
16import os
Doug Zongkerfc44a512014-08-26 13:10:25 -070017import struct
Tianjie Xudf1166e2018-01-27 17:35:41 -080018import threading
Doug Zongkerfc44a512014-08-26 13:10:25 -070019from hashlib import sha1
20
Dan Albert8b72aef2015-03-23 19:13:21 -070021import rangelib
22
Doug Zongkerfc44a512014-08-26 13:10:25 -070023
24class SparseImage(object):
Tao Bao5ece99d2015-05-12 11:42:31 -070025 """Wraps a sparse image file into an image object.
Doug Zongkerfc44a512014-08-26 13:10:25 -070026
Tao Bao5ece99d2015-05-12 11:42:31 -070027 Wraps a sparse image file (and optional file map and clobbered_blocks) into
28 an image object suitable for passing to BlockImageDiff. file_map contains
29 the mapping between files and their blocks. clobbered_blocks contains the set
30 of blocks that should be always written to the target regardless of the old
31 contents (i.e. copying instead of patching). clobbered_blocks should be in
32 the form of a string like "0" or "0 1-5 8".
33 """
34
Sami Tolvanen405e71d2016-02-09 12:28:58 -080035 def __init__(self, simg_fn, file_map_fn=None, clobbered_blocks=None,
Tianjie Xu67c7cbb2018-08-30 00:32:07 -070036 mode="rb", build_map=True, allow_shared_blocks=False,
37 hashtree_info_generator=None):
Sami Tolvanen405e71d2016-02-09 12:28:58 -080038 self.simg_f = f = open(simg_fn, mode)
Doug Zongkerfc44a512014-08-26 13:10:25 -070039
40 header_bin = f.read(28)
41 header = struct.unpack("<I4H4I", header_bin)
42
43 magic = header[0]
44 major_version = header[1]
45 minor_version = header[2]
46 file_hdr_sz = header[3]
47 chunk_hdr_sz = header[4]
48 self.blocksize = blk_sz = header[5]
49 self.total_blocks = total_blks = header[6]
Sami Tolvanen405e71d2016-02-09 12:28:58 -080050 self.total_chunks = total_chunks = header[7]
Doug Zongkerfc44a512014-08-26 13:10:25 -070051
52 if magic != 0xED26FF3A:
53 raise ValueError("Magic should be 0xED26FF3A but is 0x%08X" % (magic,))
54 if major_version != 1 or minor_version != 0:
55 raise ValueError("I know about version 1.0, but this is version %u.%u" %
56 (major_version, minor_version))
57 if file_hdr_sz != 28:
58 raise ValueError("File header size was expected to be 28, but is %u." %
59 (file_hdr_sz,))
60 if chunk_hdr_sz != 12:
61 raise ValueError("Chunk header size was expected to be 12, but is %u." %
62 (chunk_hdr_sz,))
63
64 print("Total of %u %u-byte output blocks in %u input chunks."
65 % (total_blks, blk_sz, total_chunks))
66
Sami Tolvanen405e71d2016-02-09 12:28:58 -080067 if not build_map:
Tianjie Xu67c7cbb2018-08-30 00:32:07 -070068 assert not hashtree_info_generator, \
69 "Cannot generate the hashtree info without building the offset map."
Sami Tolvanen405e71d2016-02-09 12:28:58 -080070 return
71
Doug Zongkerfc44a512014-08-26 13:10:25 -070072 pos = 0 # in blocks
73 care_data = []
74 self.offset_map = offset_map = []
Tao Bao5ece99d2015-05-12 11:42:31 -070075 self.clobbered_blocks = rangelib.RangeSet(data=clobbered_blocks)
Doug Zongkerfc44a512014-08-26 13:10:25 -070076
77 for i in range(total_chunks):
78 header_bin = f.read(12)
79 header = struct.unpack("<2H2I", header_bin)
80 chunk_type = header[0]
Doug Zongkerfc44a512014-08-26 13:10:25 -070081 chunk_sz = header[2]
82 total_sz = header[3]
83 data_sz = total_sz - 12
84
85 if chunk_type == 0xCAC1:
86 if data_sz != (chunk_sz * blk_sz):
87 raise ValueError(
88 "Raw chunk input size (%u) does not match output size (%u)" %
89 (data_sz, chunk_sz * blk_sz))
90 else:
91 care_data.append(pos)
92 care_data.append(pos + chunk_sz)
Doug Zongkere18eb502014-10-15 15:55:50 -070093 offset_map.append((pos, chunk_sz, f.tell(), None))
Doug Zongkerfc44a512014-08-26 13:10:25 -070094 pos += chunk_sz
95 f.seek(data_sz, os.SEEK_CUR)
96
97 elif chunk_type == 0xCAC2:
Doug Zongkere18eb502014-10-15 15:55:50 -070098 fill_data = f.read(4)
99 care_data.append(pos)
100 care_data.append(pos + chunk_sz)
101 offset_map.append((pos, chunk_sz, None, fill_data))
102 pos += chunk_sz
Doug Zongkerfc44a512014-08-26 13:10:25 -0700103
104 elif chunk_type == 0xCAC3:
105 if data_sz != 0:
106 raise ValueError("Don't care chunk input size is non-zero (%u)" %
107 (data_sz))
Tianjie Xu67c7cbb2018-08-30 00:32:07 -0700108 # Fills the don't care data ranges with zeros.
109 # TODO(xunchang) pass the care_map to hashtree info generator.
110 if hashtree_info_generator:
111 fill_data = '\x00' * 4
112 # In order to compute verity hashtree on device, we need to write
113 # zeros explicitly to the don't care ranges. Because these ranges may
114 # contain non-zero data from the previous build.
115 care_data.append(pos)
116 care_data.append(pos + chunk_sz)
117 offset_map.append((pos, chunk_sz, None, fill_data))
118
119 pos += chunk_sz
Doug Zongkerfc44a512014-08-26 13:10:25 -0700120
121 elif chunk_type == 0xCAC4:
122 raise ValueError("CRC32 chunks are not supported")
123
124 else:
125 raise ValueError("Unknown chunk type 0x%04X not supported" %
126 (chunk_type,))
127
Tianjie Xudf1166e2018-01-27 17:35:41 -0800128 self.generator_lock = threading.Lock()
129
Dan Albert8b72aef2015-03-23 19:13:21 -0700130 self.care_map = rangelib.RangeSet(care_data)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700131 self.offset_index = [i[0] for i in offset_map]
132
Tao Bao2fd2c9b2015-07-09 17:37:49 -0700133 # Bug: 20881595
134 # Introduce extended blocks as a workaround for the bug. dm-verity may
135 # touch blocks that are not in the care_map due to block device
136 # read-ahead. It will fail if such blocks contain non-zeroes. We zero out
137 # the extended blocks explicitly to avoid dm-verity failures. 512 blocks
138 # are the maximum read-ahead we configure for dm-verity block devices.
139 extended = self.care_map.extend(512)
140 all_blocks = rangelib.RangeSet(data=(0, self.total_blocks))
141 extended = extended.intersect(all_blocks).subtract(self.care_map)
142 self.extended = extended
143
Tianjie Xu67c7cbb2018-08-30 00:32:07 -0700144 self.hashtree_info = None
145 if hashtree_info_generator:
146 self.hashtree_info = hashtree_info_generator.Generate(self)
147
Doug Zongkerfc44a512014-08-26 13:10:25 -0700148 if file_map_fn:
Tao Baoe709b092018-02-07 12:40:00 -0800149 self.LoadFileBlockMap(file_map_fn, self.clobbered_blocks,
150 allow_shared_blocks)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700151 else:
152 self.file_map = {"__DATA": self.care_map}
153
Sami Tolvanen405e71d2016-02-09 12:28:58 -0800154 def AppendFillChunk(self, data, blocks):
155 f = self.simg_f
156
157 # Append a fill chunk
158 f.seek(0, os.SEEK_END)
159 f.write(struct.pack("<2H3I", 0xCAC2, 0, blocks, 16, data))
160
161 # Update the sparse header
162 self.total_blocks += blocks
163 self.total_chunks += 1
164
165 f.seek(16, os.SEEK_SET)
166 f.write(struct.pack("<2I", self.total_blocks, self.total_chunks))
167
Tao Bao183e56e2017-03-05 17:05:09 -0800168 def RangeSha1(self, ranges):
169 h = sha1()
170 for data in self._GetRangeData(ranges):
171 h.update(data)
172 return h.hexdigest()
173
Doug Zongkerfc44a512014-08-26 13:10:25 -0700174 def ReadRangeSet(self, ranges):
175 return [d for d in self._GetRangeData(ranges)]
176
Tao Bao5fcaaef2015-06-01 13:40:49 -0700177 def TotalSha1(self, include_clobbered_blocks=False):
178 """Return the SHA-1 hash of all data in the 'care' regions.
179
180 If include_clobbered_blocks is True, it returns the hash including the
181 clobbered_blocks."""
182 ranges = self.care_map
183 if not include_clobbered_blocks:
Tao Bao2b4ff172015-06-23 17:30:35 -0700184 ranges = ranges.subtract(self.clobbered_blocks)
Tao Bao183e56e2017-03-05 17:05:09 -0800185 return self.RangeSha1(ranges)
186
187 def WriteRangeDataToFd(self, ranges, fd):
188 for data in self._GetRangeData(ranges):
189 fd.write(data)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700190
191 def _GetRangeData(self, ranges):
192 """Generator that produces all the image data in 'ranges'. The
193 number of individual pieces returned is arbitrary (and in
194 particular is not necessarily equal to the number of ranges in
195 'ranges'.
196
Tianjie Xudf1166e2018-01-27 17:35:41 -0800197 Use a lock to protect the generator so that we will not run two
Doug Zongkerfc44a512014-08-26 13:10:25 -0700198 instances of this generator on the same object simultaneously."""
199
200 f = self.simg_f
Tianjie Xudf1166e2018-01-27 17:35:41 -0800201 with self.generator_lock:
202 for s, e in ranges:
203 to_read = e-s
204 idx = bisect.bisect_right(self.offset_index, s) - 1
Doug Zongkere18eb502014-10-15 15:55:50 -0700205 chunk_start, chunk_len, filepos, fill_data = self.offset_map[idx]
Tianjie Xudf1166e2018-01-27 17:35:41 -0800206
207 # for the first chunk we may be starting partway through it.
208 remain = chunk_len - (s - chunk_start)
209 this_read = min(remain, to_read)
Doug Zongkere18eb502014-10-15 15:55:50 -0700210 if filepos is not None:
Tianjie Xudf1166e2018-01-27 17:35:41 -0800211 p = filepos + ((s - chunk_start) * self.blocksize)
212 f.seek(p, os.SEEK_SET)
Doug Zongkere18eb502014-10-15 15:55:50 -0700213 yield f.read(this_read * self.blocksize)
214 else:
215 yield fill_data * (this_read * (self.blocksize >> 2))
Doug Zongkerfc44a512014-08-26 13:10:25 -0700216 to_read -= this_read
217
Tianjie Xudf1166e2018-01-27 17:35:41 -0800218 while to_read > 0:
219 # continue with following chunks if this range spans multiple chunks.
220 idx += 1
221 chunk_start, chunk_len, filepos, fill_data = self.offset_map[idx]
222 this_read = min(chunk_len, to_read)
223 if filepos is not None:
224 f.seek(filepos, os.SEEK_SET)
225 yield f.read(this_read * self.blocksize)
226 else:
227 yield fill_data * (this_read * (self.blocksize >> 2))
228 to_read -= this_read
229
Tao Baoe709b092018-02-07 12:40:00 -0800230 def LoadFileBlockMap(self, fn, clobbered_blocks, allow_shared_blocks):
231 """Loads the given block map file.
232
233 Args:
234 fn: The filename of the block map file.
235 clobbered_blocks: A RangeSet instance for the clobbered blocks.
236 allow_shared_blocks: Whether having shared blocks is allowed.
237 """
Doug Zongkerfc44a512014-08-26 13:10:25 -0700238 remaining = self.care_map
239 self.file_map = out = {}
240
241 with open(fn) as f:
242 for line in f:
243 fn, ranges = line.split(None, 1)
Dan Albert8b72aef2015-03-23 19:13:21 -0700244 ranges = rangelib.RangeSet.parse(ranges)
Tao Baoe709b092018-02-07 12:40:00 -0800245
246 if allow_shared_blocks:
247 # Find the shared blocks that have been claimed by others.
248 shared_blocks = ranges.subtract(remaining)
249 if shared_blocks:
250 ranges = ranges.subtract(shared_blocks)
251 if not ranges:
252 continue
253
254 # Tag the entry so that we can skip applying imgdiff on this file.
255 ranges.extra['uses_shared_blocks'] = True
256
Doug Zongkerfc44a512014-08-26 13:10:25 -0700257 out[fn] = ranges
258 assert ranges.size() == ranges.intersect(remaining).size()
Tao Bao5ece99d2015-05-12 11:42:31 -0700259
260 # Currently we assume that blocks in clobbered_blocks are not part of
261 # any file.
262 assert not clobbered_blocks.overlaps(ranges)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700263 remaining = remaining.subtract(ranges)
264
Tao Bao5ece99d2015-05-12 11:42:31 -0700265 remaining = remaining.subtract(clobbered_blocks)
Tianjie Xu67c7cbb2018-08-30 00:32:07 -0700266 if self.hashtree_info:
267 remaining = remaining.subtract(self.hashtree_info.hashtree_range)
Tao Bao5ece99d2015-05-12 11:42:31 -0700268
Doug Zongkerfc44a512014-08-26 13:10:25 -0700269 # For all the remaining blocks in the care_map (ie, those that
Tao Bao5ece99d2015-05-12 11:42:31 -0700270 # aren't part of the data for any file nor part of the clobbered_blocks),
271 # divide them into blocks that are all zero and blocks that aren't.
272 # (Zero blocks are handled specially because (1) there are usually
273 # a lot of them and (2) bsdiff handles files with long sequences of
274 # repeated bytes especially poorly.)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700275
276 zero_blocks = []
277 nonzero_blocks = []
278 reference = '\0' * self.blocksize
279
Tao Bao7c4c6f52015-08-19 17:07:50 -0700280 # Workaround for bug 23227672. For squashfs, we don't have a system.map. So
281 # the whole system image will be treated as a single file. But for some
282 # unknown bug, the updater will be killed due to OOM when writing back the
283 # patched image to flash (observed on lenok-userdebug MEA49). Prior to
284 # getting a real fix, we evenly divide the non-zero blocks into smaller
285 # groups (currently 1024 blocks or 4MB per group).
286 # Bug: 23227672
287 MAX_BLOCKS_PER_GROUP = 1024
288 nonzero_groups = []
289
Doug Zongkerfc44a512014-08-26 13:10:25 -0700290 f = self.simg_f
291 for s, e in remaining:
292 for b in range(s, e):
293 idx = bisect.bisect_right(self.offset_index, b) - 1
Dan Albert8b72aef2015-03-23 19:13:21 -0700294 chunk_start, _, filepos, fill_data = self.offset_map[idx]
Doug Zongkere18eb502014-10-15 15:55:50 -0700295 if filepos is not None:
296 filepos += (b-chunk_start) * self.blocksize
297 f.seek(filepos, os.SEEK_SET)
298 data = f.read(self.blocksize)
299 else:
300 if fill_data == reference[:4]: # fill with all zeros
301 data = reference
302 else:
303 data = None
Doug Zongkerfc44a512014-08-26 13:10:25 -0700304
305 if data == reference:
306 zero_blocks.append(b)
307 zero_blocks.append(b+1)
308 else:
309 nonzero_blocks.append(b)
310 nonzero_blocks.append(b+1)
311
Tao Bao7c4c6f52015-08-19 17:07:50 -0700312 if len(nonzero_blocks) >= MAX_BLOCKS_PER_GROUP:
313 nonzero_groups.append(nonzero_blocks)
314 # Clear the list.
315 nonzero_blocks = []
316
317 if nonzero_blocks:
318 nonzero_groups.append(nonzero_blocks)
319 nonzero_blocks = []
320
321 assert zero_blocks or nonzero_groups or clobbered_blocks
Tao Bao7f9470c2015-06-26 17:49:39 -0700322
323 if zero_blocks:
324 out["__ZERO"] = rangelib.RangeSet(data=zero_blocks)
Tao Bao7c4c6f52015-08-19 17:07:50 -0700325 if nonzero_groups:
326 for i, blocks in enumerate(nonzero_groups):
327 out["__NONZERO-%d" % i] = rangelib.RangeSet(data=blocks)
Tao Bao8bd72022015-07-01 18:06:33 -0700328 if clobbered_blocks:
329 out["__COPY"] = clobbered_blocks
Tianjie Xu67c7cbb2018-08-30 00:32:07 -0700330 if self.hashtree_info:
331 out["__HASHTREE"] = self.hashtree_info.hashtree_range
Doug Zongkerfc44a512014-08-26 13:10:25 -0700332
333 def ResetFileMap(self):
334 """Throw away the file map and treat the entire image as
335 undifferentiated data."""
336 self.file_map = {"__DATA": self.care_map}