blob: 97c6024af93edfd63e7c6fb27559a98813904eb6 [file] [log] [blame]
Jiyong Parkbf9673a2021-08-11 15:58:07 +09001/*
2 * Copyright (C) 2021 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
Jiyong Park4c2f33d2021-08-11 17:09:58 +090016
17//! `idsig` provides routines for creating the idsig file that is defined for the APK signature
18//! scheme v4 and for parsing the file.
19
Jiyong Parkbf9673a2021-08-11 15:58:07 +090020use ring::digest::{self, Algorithm};
21use std::io::{Cursor, Read, Result, Seek, SeekFrom, Write};
22
23/// `HashTree` is a merkle tree (and its root hash) that is compatible with fs-verity.
24pub struct HashTree {
Jiyong Park4c2f33d2021-08-11 17:09:58 +090025 /// Binary presentation of the merkle tree
Jiyong Parkbf9673a2021-08-11 15:58:07 +090026 pub tree: Vec<u8>,
Jiyong Park4c2f33d2021-08-11 17:09:58 +090027 /// Root hash
Jiyong Parkbf9673a2021-08-11 15:58:07 +090028 pub root_hash: Vec<u8>,
29}
30
31impl HashTree {
32 /// Creates merkle tree from `input`, using the given `salt` and hashing `algorithm`. `input`
33 /// is divided into `block_size` chunks.
34 pub fn from<R: Read>(
35 input: &mut R,
36 input_size: usize,
37 salt: &[u8],
38 block_size: usize,
39 algorithm: &'static Algorithm,
40 ) -> Result<Self> {
Jiyong Park4c2f33d2021-08-11 17:09:58 +090041 let salt = zero_pad_salt(salt, algorithm);
Jiyong Parkbf9673a2021-08-11 15:58:07 +090042 let tree = generate_hash_tree(input, input_size, &salt, block_size, algorithm)?;
43
44 // Root hash is from the first block of the hash or the input data if there is no hash tree
45 // generate which can happen when input data is smaller than block size
46 let root_hash = if tree.is_empty() {
47 hash_one_level(input, input_size, &salt, block_size, algorithm)?
48 } else {
49 let mut ctx = digest::Context::new(algorithm);
50 ctx.update(&salt);
51 ctx.update(&tree[0..block_size]);
52 ctx.finish().as_ref().to_vec()
53 };
54 Ok(HashTree { tree, root_hash })
55 }
56}
57
58/// Calculate hash tree for the blocks in `input`.
59///
60/// This function implements: https://www.kernel.org/doc/html/latest/filesystems/fsverity.html#merkle-tree
61///
62/// The file contents is divided into blocks, where the block size is configurable but is usually
63/// 4096 bytes. The end of the last block is zero-padded if needed. Each block is then hashed,
64/// producing the first level of hashes. Then, the hashes in this first level are grouped into
65/// blocksize-byte blocks (zero-padding the ends as needed) and these blocks are hashed,
66/// producing the second level of hashes. This proceeds up the tree until only a single block
67/// remains.
68fn generate_hash_tree<R: Read>(
69 input: &mut R,
70 input_size: usize,
71 salt: &[u8],
72 block_size: usize,
73 algorithm: &'static Algorithm,
74) -> Result<Vec<u8>> {
75 let digest_size = algorithm.output_len;
76 let (hash_level_offsets, tree_size) =
77 calc_hash_level_offsets(input_size, block_size, digest_size);
78
79 let mut hash_tree = Cursor::new(vec![0; tree_size]);
80 let mut input_size = input_size;
81 for (level, offset) in hash_level_offsets.iter().enumerate() {
82 let hashes = if level == 0 {
83 hash_one_level(input, input_size, salt, block_size, algorithm)?
84 } else {
85 // For the intermediate levels, input is the output from the previous level
86 hash_tree.seek(SeekFrom::Start(hash_level_offsets[level - 1] as u64)).unwrap();
87 hash_one_level(&mut hash_tree, input_size, salt, block_size, algorithm)?
88 };
89 hash_tree.seek(SeekFrom::Start(*offset as u64)).unwrap();
90 hash_tree.write_all(hashes.as_ref()).unwrap();
91 // Output from this level becomes input for the next level
92 input_size = hashes.len();
93 }
94 Ok(hash_tree.into_inner())
95}
96
97/// Calculate hashes for the blocks in `input`. The end of the last block is zero-padded if needed.
98/// Each block is then hashed, producing a stream of hashes for a level.
99fn hash_one_level<R: Read>(
100 input: &mut R,
101 input_size: usize,
102 salt: &[u8],
103 block_size: usize,
104 algorithm: &'static Algorithm,
105) -> Result<Vec<u8>> {
106 // Input is zero padded when it's not multiple of blocks. Note that `take()` is also needed to
107 // not read more than `input_size` from the `input` reader. This is required because `input`
108 // can be from the in-memory hashtree. We need to read only the part of hashtree that is for
109 // the current level.
110 let pad_size = round_to_multiple(input_size, block_size) - input_size;
111 let mut input = input.take(input_size as u64).chain(Cursor::new(vec![0; pad_size]));
112
113 // Read one block from input, write the hash of it to the output. Repeat that for all input
114 // blocks.
115 let mut hashes = Cursor::new(Vec::new());
116 let mut buf = vec![0; block_size];
117 let mut num_blocks = (input_size + block_size - 1) / block_size;
118 while num_blocks > 0 {
119 input.read_exact(&mut buf)?;
120 let mut ctx = digest::Context::new(algorithm);
121 ctx.update(salt);
122 ctx.update(&buf);
123 let hash = ctx.finish();
124 hashes.write_all(hash.as_ref())?;
125 num_blocks -= 1;
126 }
127 Ok(hashes.into_inner())
128}
129
130/// Calculate the size of hashes for each level, and also returns the total size of the hash tree.
131/// This function is needed because hash tree is stored upside down; hashes for level N is stored
132/// "after" hashes for level N + 1.
133fn calc_hash_level_offsets(
134 input_size: usize,
135 block_size: usize,
136 digest_size: usize,
137) -> (Vec<usize>, usize) {
138 // The input is split into multiple blocks and each block is hashed, which becomes the input
139 // for the next level. Size of a single hash is `digest_size`.
140 let mut level_sizes = Vec::new();
141 loop {
142 // Input for this level is from either the last level (if exists), or the input parameter.
143 let input_size = *level_sizes.last().unwrap_or(&input_size);
144 if input_size <= block_size {
145 break;
146 }
147 let num_blocks = (input_size + block_size - 1) / block_size;
148 let hashes_size = round_to_multiple(num_blocks * digest_size, block_size);
149 level_sizes.push(hashes_size);
150 }
151 if level_sizes.is_empty() {
152 return ([].to_vec(), 0);
153 }
154
155 // The hash tree is stored upside down. The top level is at offset 0. The second level comes
156 // next, and so on. Level 0 is located at the end.
157 //
158 // Given level_sizes [10, 3, 1], the offsets for each label are ...
159 //
160 // Level 2 is at offset 0
161 // Level 1 is at offset 1 (because Level 2 is of size 1)
162 // Level 0 is at offset 4 (because Level 1 is of size 3)
163 //
164 // This is done by accumulating the sizes in reverse order (i.e. from the highest level to the
165 // level 1 (not level 0)
166 let mut offsets = level_sizes.iter().rev().take(level_sizes.len() - 1).fold(
167 vec![0; 1], // offset for the top level
168 |mut offsets, size| {
169 offsets.push(offsets.last().unwrap() + size);
170 offsets
171 },
172 );
173 offsets.reverse(); // reverse the offsets again so that index N is for level N
174 let tree_size = level_sizes.iter().sum();
175 (offsets, tree_size)
176}
177
178/// Round `n` up to the nearest multiple of `unit`
179fn round_to_multiple(n: usize, unit: usize) -> usize {
180 (n + unit - 1) & !(unit - 1)
181}
182
183/// Pad zero to salt if necessary.
184///
185/// According to https://www.kernel.org/doc/html/latest/filesystems/fsverity.html:
186///
187/// If a salt was specified, then it’s zero-padded to the closest multiple of the input size of the
188/// hash algorithm’s compression function, e.g. 64 bytes for SHA-256 or 128 bytes for SHA-512. The
189/// padded salt is prepended to every data or Merkle tree block that is hashed.
190fn zero_pad_salt(salt: &[u8], algorithm: &Algorithm) -> Vec<u8> {
191 if salt.is_empty() {
192 salt.to_vec()
193 } else {
194 let padded_len = round_to_multiple(salt.len(), algorithm.block_len);
195 let mut salt = salt.to_vec();
196 salt.resize(padded_len, 0);
197 salt
198 }
199}
200
201#[cfg(test)]
202mod tests {
203 use crate::*;
204 use ring::digest;
205 use std::fs::{self, File};
206
207 #[test]
208 fn compare_with_golden_output() -> Result<()> {
209 // The golden outputs are generated by using the `fsverity` utility.
210 let sizes = ["512", "4K", "1M", "10000000"];
211 for size in sizes.iter() {
212 let input_name = format!("testdata/input.{}", size);
213 let mut input = File::open(&input_name)?;
214 let golden_hash_tree = fs::read(format!("testdata/input.{}.hash", size))?;
215 let golden_descriptor = fs::read(format!("testdata/input.{}.descriptor", size))?;
216 let golden_root_hash = &golden_descriptor[16..16 + 32];
217
218 let size = std::fs::metadata(&input_name)?.len() as usize;
219 let salt = vec![1, 2, 3, 4, 5, 6];
220 let ht = HashTree::from(&mut input, size, &salt, 4096, &digest::SHA256)?;
221
222 assert_eq!(golden_hash_tree.as_slice(), ht.tree.as_slice());
223 assert_eq!(golden_root_hash, ht.root_hash.as_slice());
224 }
225 Ok(())
226 }
227}