blob: 63f83ea8b04e52f3f1148ee88a0902f974558eb7 [file] [log] [blame]
Jiyong Parkbde94ab2021-08-11 18:32:01 +09001/*
2 * Copyright (C) 2021 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
Jiyong Parkec168e32021-08-13 10:26:34 +090017pub use ring::digest::{Algorithm, Digest};
18
19use ring::digest;
Jiyong Park58f9abc2021-08-12 16:48:54 +090020use std::io::{Cursor, Read, Result, Write};
Jiyong Parkbde94ab2021-08-11 18:32:01 +090021
22/// `HashTree` is a merkle tree (and its root hash) that is compatible with fs-verity.
23pub struct HashTree {
24 /// Binary presentation of the merkle tree
25 pub tree: Vec<u8>,
26 /// Root hash
27 pub root_hash: Vec<u8>,
28}
29
30impl HashTree {
31 /// Creates merkle tree from `input`, using the given `salt` and hashing `algorithm`. `input`
32 /// is divided into `block_size` chunks.
33 pub fn from<R: Read>(
34 input: &mut R,
35 input_size: usize,
36 salt: &[u8],
37 block_size: usize,
38 algorithm: &'static Algorithm,
39 ) -> Result<Self> {
40 let salt = zero_pad_salt(salt, algorithm);
41 let tree = generate_hash_tree(input, input_size, &salt, block_size, algorithm)?;
42
43 // Root hash is from the first block of the hash or the input data if there is no hash tree
Jiyong Park58f9abc2021-08-12 16:48:54 +090044 // generated which can happen when input data is smaller than block size
Jiyong Parkbde94ab2021-08-11 18:32:01 +090045 let root_hash = if tree.is_empty() {
Jiyong Park58f9abc2021-08-12 16:48:54 +090046 let mut data = Vec::new();
47 input.read_to_end(&mut data)?;
48 hash_one_block(&data, &salt, block_size, algorithm).as_ref().to_vec()
Jiyong Parkbde94ab2021-08-11 18:32:01 +090049 } else {
Jiyong Park58f9abc2021-08-12 16:48:54 +090050 let first_block = &tree[0..block_size];
51 hash_one_block(first_block, &salt, block_size, algorithm).as_ref().to_vec()
Jiyong Parkbde94ab2021-08-11 18:32:01 +090052 };
53 Ok(HashTree { tree, root_hash })
54 }
55}
56
57/// Calculate hash tree for the blocks in `input`.
58///
59/// This function implements: https://www.kernel.org/doc/html/latest/filesystems/fsverity.html#merkle-tree
60///
61/// The file contents is divided into blocks, where the block size is configurable but is usually
62/// 4096 bytes. The end of the last block is zero-padded if needed. Each block is then hashed,
63/// producing the first level of hashes. Then, the hashes in this first level are grouped into
64/// blocksize-byte blocks (zero-padding the ends as needed) and these blocks are hashed,
65/// producing the second level of hashes. This proceeds up the tree until only a single block
66/// remains.
Jiyong Parkec168e32021-08-13 10:26:34 +090067pub fn generate_hash_tree<R: Read>(
Jiyong Parkbde94ab2021-08-11 18:32:01 +090068 input: &mut R,
69 input_size: usize,
70 salt: &[u8],
71 block_size: usize,
72 algorithm: &'static Algorithm,
73) -> Result<Vec<u8>> {
74 let digest_size = algorithm.output_len;
Jiyong Park58f9abc2021-08-12 16:48:54 +090075 let levels = calc_hash_levels(input_size, block_size, digest_size);
76 let tree_size = levels.iter().map(|r| r.len()).sum();
Jiyong Parkbde94ab2021-08-11 18:32:01 +090077
Jiyong Park58f9abc2021-08-12 16:48:54 +090078 // The contiguous memory that holds the entire merkle tree
79 let mut hash_tree = vec![0; tree_size];
80
81 for (n, cur) in levels.iter().enumerate() {
82 if n == 0 {
83 // Level 0: the (zero-padded) input stream is hashed into level 0
84 let pad_size = round_to_multiple(input_size, block_size) - input_size;
85 let mut input = input.chain(Cursor::new(vec![0; pad_size]));
86 let mut level0 = Cursor::new(&mut hash_tree[cur.start..cur.end]);
87
88 let mut a_block = vec![0; block_size];
89 let mut num_blocks = (input_size + block_size - 1) / block_size;
90 while num_blocks > 0 {
91 input.read_exact(&mut a_block)?;
92 let h = hash_one_block(&a_block, salt, block_size, algorithm);
93 level0.write_all(h.as_ref()).unwrap();
94 num_blocks -= 1;
95 }
Jiyong Parkbde94ab2021-08-11 18:32:01 +090096 } else {
Jiyong Park58f9abc2021-08-12 16:48:54 +090097 // Intermediate levels: level n - 1 is hashed into level n
98 // Both levels belong to the same `hash_tree`. In order to have a mutable slice for
99 // level n while having a slice for level n - 1, take the mutable slice for both levels
100 // and split it.
101 let prev = &levels[n - 1];
102 let cur_and_prev = &mut hash_tree[cur.start..prev.end];
Jooyung Hana5e0bed2021-12-20 17:11:49 +0900103 let (cur, prev) = cur_and_prev.split_at_mut(prev.start - cur.start);
Jiyong Park58f9abc2021-08-12 16:48:54 +0900104 let mut cur = Cursor::new(cur);
105 prev.chunks(block_size).for_each(|data| {
106 let h = hash_one_block(data, salt, block_size, algorithm);
107 cur.write_all(h.as_ref()).unwrap();
108 });
109 }
Jiyong Parkbde94ab2021-08-11 18:32:01 +0900110 }
Jiyong Park58f9abc2021-08-12 16:48:54 +0900111 Ok(hash_tree)
Jiyong Parkbde94ab2021-08-11 18:32:01 +0900112}
113
Jiyong Park58f9abc2021-08-12 16:48:54 +0900114/// Hash one block of input using the given hash algorithm and the salt. Input might be smaller
115/// than a block, in which case zero is padded.
116fn hash_one_block(
117 input: &[u8],
Jiyong Parkbde94ab2021-08-11 18:32:01 +0900118 salt: &[u8],
119 block_size: usize,
120 algorithm: &'static Algorithm,
Jiyong Park58f9abc2021-08-12 16:48:54 +0900121) -> Digest {
122 let mut ctx = digest::Context::new(algorithm);
123 ctx.update(salt);
124 ctx.update(input);
125 let pad_size = block_size - input.len();
126 ctx.update(&vec![0; pad_size]);
127 ctx.finish()
Jiyong Parkbde94ab2021-08-11 18:32:01 +0900128}
129
Jiyong Park58f9abc2021-08-12 16:48:54 +0900130type Range = std::ops::Range<usize>;
131
132/// Calculate the ranges of hash for each level
133fn calc_hash_levels(input_size: usize, block_size: usize, digest_size: usize) -> Vec<Range> {
Jiyong Parkbde94ab2021-08-11 18:32:01 +0900134 // The input is split into multiple blocks and each block is hashed, which becomes the input
135 // for the next level. Size of a single hash is `digest_size`.
136 let mut level_sizes = Vec::new();
137 loop {
138 // Input for this level is from either the last level (if exists), or the input parameter.
139 let input_size = *level_sizes.last().unwrap_or(&input_size);
140 if input_size <= block_size {
141 break;
142 }
143 let num_blocks = (input_size + block_size - 1) / block_size;
144 let hashes_size = round_to_multiple(num_blocks * digest_size, block_size);
145 level_sizes.push(hashes_size);
146 }
Jiyong Parkbde94ab2021-08-11 18:32:01 +0900147
148 // The hash tree is stored upside down. The top level is at offset 0. The second level comes
149 // next, and so on. Level 0 is located at the end.
150 //
151 // Given level_sizes [10, 3, 1], the offsets for each label are ...
152 //
153 // Level 2 is at offset 0
154 // Level 1 is at offset 1 (because Level 2 is of size 1)
155 // Level 0 is at offset 4 (because Level 1 is of size 3)
156 //
Jiyong Park58f9abc2021-08-12 16:48:54 +0900157 // This is done by scanning the sizes in reverse order
158 let mut ranges = level_sizes
159 .iter()
160 .rev()
161 .scan(0, |prev_end, size| {
162 let range = *prev_end..*prev_end + size;
163 *prev_end = range.end;
164 Some(range)
165 })
166 .collect::<Vec<_>>();
167 ranges.reverse(); // reverse again so that index N is for level N
168 ranges
Jiyong Parkbde94ab2021-08-11 18:32:01 +0900169}
170
171/// Round `n` up to the nearest multiple of `unit`
172fn round_to_multiple(n: usize, unit: usize) -> usize {
173 (n + unit - 1) & !(unit - 1)
174}
175
176/// Pad zero to salt if necessary.
177///
178/// According to https://www.kernel.org/doc/html/latest/filesystems/fsverity.html:
179///
180/// If a salt was specified, then it’s zero-padded to the closest multiple of the input size of the
181/// hash algorithm’s compression function, e.g. 64 bytes for SHA-256 or 128 bytes for SHA-512. The
182/// padded salt is prepended to every data or Merkle tree block that is hashed.
183fn zero_pad_salt(salt: &[u8], algorithm: &Algorithm) -> Vec<u8> {
184 if salt.is_empty() {
185 salt.to_vec()
186 } else {
187 let padded_len = round_to_multiple(salt.len(), algorithm.block_len);
188 let mut salt = salt.to_vec();
189 salt.resize(padded_len, 0);
190 salt
191 }
192}
193
194#[cfg(test)]
195mod tests {
Andrew Walbran117cd5e2021-08-13 11:42:13 +0000196 use super::*;
Jiyong Parkbde94ab2021-08-11 18:32:01 +0900197 use ring::digest;
198 use std::fs::{self, File};
199
200 #[test]
201 fn compare_with_golden_output() -> Result<()> {
202 // The golden outputs are generated by using the `fsverity` utility.
Jooyung Hana5e0bed2021-12-20 17:11:49 +0900203 let sizes = ["512", "4K", "1M", "10000000", "272629760"];
Jiyong Parkbde94ab2021-08-11 18:32:01 +0900204 for size in sizes.iter() {
205 let input_name = format!("testdata/input.{}", size);
206 let mut input = File::open(&input_name)?;
207 let golden_hash_tree = fs::read(format!("testdata/input.{}.hash", size))?;
208 let golden_descriptor = fs::read(format!("testdata/input.{}.descriptor", size))?;
209 let golden_root_hash = &golden_descriptor[16..16 + 32];
210
211 let size = std::fs::metadata(&input_name)?.len() as usize;
212 let salt = vec![1, 2, 3, 4, 5, 6];
213 let ht = HashTree::from(&mut input, size, &salt, 4096, &digest::SHA256)?;
214
215 assert_eq!(golden_hash_tree.as_slice(), ht.tree.as_slice());
216 assert_eq!(golden_root_hash, ht.root_hash.as_slice());
217 }
218 Ok(())
219 }
220}