Victor Hsieh | dde1790 | 2021-02-26 12:35:31 -0800 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2021 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | use super::common::{build_fsverity_digest, merkle_tree_height, FsverityError}; |
Victor Hsieh | 9d0ab62 | 2021-04-26 17:07:02 -0700 | [diff] [blame^] | 18 | use crate::common::{divide_roundup, CHUNK_SIZE}; |
Victor Hsieh | dde1790 | 2021-02-26 12:35:31 -0800 | [diff] [blame] | 19 | use crate::crypto::{CryptoError, Sha256Hash, Sha256Hasher}; |
| 20 | |
| 21 | const HASH_SIZE: usize = Sha256Hasher::HASH_SIZE; |
| 22 | const HASH_PER_PAGE: usize = CHUNK_SIZE as usize / HASH_SIZE; |
| 23 | |
| 24 | /// MerkleLeaves can be used by the class' customer for bookkeeping integrity data for their bytes. |
| 25 | /// It can also be used to generate the standard fs-verity digest for the source data. |
| 26 | /// |
| 27 | /// It's in-memory because for the initial use cases, we don't need to read back an existing file, |
| 28 | /// and only need to deal with new files. Also, considering that the output file won't be large at |
| 29 | /// the moment, it is sufficient to simply keep the Merkle tree in memory in the trusted world. To |
| 30 | /// further simplify the initial implementation, we only need to keep the leaf nodes in memory, and |
| 31 | /// generate the tree / root hash when requested. |
| 32 | pub struct MerkleLeaves { |
| 33 | leaves: Vec<Sha256Hash>, |
| 34 | file_size: u64, |
| 35 | } |
| 36 | |
| 37 | fn hash_all_pages(source: &[Sha256Hash]) -> Result<Vec<Sha256Hash>, CryptoError> { |
| 38 | source |
| 39 | .chunks(HASH_PER_PAGE) |
| 40 | .map(|chunk| { |
| 41 | let padding_bytes = (HASH_PER_PAGE - chunk.len()) * HASH_SIZE; |
Victor Hsieh | 6cf75b5 | 2021-04-01 12:45:49 -0700 | [diff] [blame] | 42 | Sha256Hasher::new()?.update_from(chunk)?.update(&vec![0u8; padding_bytes])?.finalize() |
Victor Hsieh | dde1790 | 2021-02-26 12:35:31 -0800 | [diff] [blame] | 43 | }) |
| 44 | .collect() |
| 45 | } |
| 46 | |
Victor Hsieh | dde1790 | 2021-02-26 12:35:31 -0800 | [diff] [blame] | 47 | impl MerkleLeaves { |
| 48 | /// Creates a `MerkleLeaves` instance with empty data. |
| 49 | pub fn new() -> Self { |
| 50 | Self { leaves: Vec::new(), file_size: 0 } |
| 51 | } |
| 52 | |
Victor Hsieh | 6a47e7f | 2021-03-03 15:53:49 -0800 | [diff] [blame] | 53 | /// Gets size of the file represented by `MerkleLeaves`. |
| 54 | pub fn file_size(&self) -> u64 { |
| 55 | self.file_size |
| 56 | } |
| 57 | |
Victor Hsieh | 9d0ab62 | 2021-04-26 17:07:02 -0700 | [diff] [blame^] | 58 | /// Grows the `MerkleLeaves` to the new file size. To keep the consistency, if any new leaves |
| 59 | /// are added, the leaves/hashes are as if the extended content is all zero. |
| 60 | /// |
| 61 | /// However, when the change shrinks the leaf number, `MerkleLeaves` does not know if the hash |
| 62 | /// of the last chunk has changed, or what the new value should be. As the result, it is up to |
| 63 | /// the caller to fix the last leaf if needed. |
| 64 | pub fn resize(&mut self, new_file_size: usize) { |
| 65 | let new_file_size = new_file_size as u64; |
| 66 | let leaves_size = divide_roundup(new_file_size, CHUNK_SIZE); |
| 67 | self.leaves.resize(leaves_size as usize, Sha256Hasher::HASH_OF_4096_ZEROS); |
| 68 | self.file_size = new_file_size; |
| 69 | } |
| 70 | |
Victor Hsieh | dde1790 | 2021-02-26 12:35:31 -0800 | [diff] [blame] | 71 | /// Updates the hash of the `index`-th leaf, and increase the size to `size_at_least` if the |
| 72 | /// current size is smaller. |
| 73 | pub fn update_hash(&mut self, index: usize, hash: &Sha256Hash, size_at_least: u64) { |
| 74 | // +1 since index is zero-based. |
| 75 | if self.leaves.len() < index + 1 { |
| 76 | // When resizing, fill in hash of zeros by default. This makes it easy to handle holes |
| 77 | // in a file. |
| 78 | self.leaves.resize(index + 1, Sha256Hasher::HASH_OF_4096_ZEROS); |
| 79 | } |
| 80 | self.leaves[index].clone_from_slice(hash); |
| 81 | |
| 82 | if size_at_least > self.file_size { |
| 83 | self.file_size = size_at_least; |
| 84 | } |
| 85 | } |
| 86 | |
| 87 | /// Returns whether `index` is within the bound of leaves. |
| 88 | pub fn is_index_valid(&self, index: usize) -> bool { |
| 89 | index < self.leaves.len() |
| 90 | } |
| 91 | |
| 92 | /// Returns whether the `index`-th hash is consistent to `hash`. |
| 93 | pub fn is_consistent(&self, index: usize, hash: &Sha256Hash) -> bool { |
| 94 | if let Some(element) = self.leaves.get(index) { |
| 95 | element == hash |
| 96 | } else { |
| 97 | false |
| 98 | } |
| 99 | } |
| 100 | |
| 101 | fn calculate_root_hash(&self) -> Result<Sha256Hash, FsverityError> { |
| 102 | match self.leaves.len() { |
| 103 | // Special cases per fs-verity digest definition. |
| 104 | 0 => { |
| 105 | debug_assert_eq!(self.file_size, 0); |
| 106 | Ok([0u8; HASH_SIZE]) |
| 107 | } |
| 108 | 1 => { |
| 109 | debug_assert!(self.file_size <= CHUNK_SIZE && self.file_size > 0); |
| 110 | Ok(self.leaves[0]) |
| 111 | } |
| 112 | n => { |
| 113 | debug_assert_eq!((self.file_size - 1) / CHUNK_SIZE, n as u64); |
| 114 | let size_for_equivalent = n as u64 * CHUNK_SIZE; |
| 115 | let level = merkle_tree_height(size_for_equivalent).unwrap(); // safe since n > 0 |
| 116 | |
| 117 | // `leaves` is owned and can't be the initial state below. Here we manually hash it |
| 118 | // first to avoid a copy and to get the type right. |
| 119 | let second_level = hash_all_pages(&self.leaves)?; |
| 120 | let hashes = |
| 121 | (1..=level).try_fold(second_level, |source, _| hash_all_pages(&source))?; |
| 122 | if hashes.len() != 1 { |
| 123 | Err(FsverityError::InvalidState) |
| 124 | } else { |
| 125 | Ok(hashes.into_iter().next().unwrap()) |
| 126 | } |
| 127 | } |
| 128 | } |
| 129 | } |
| 130 | |
| 131 | /// Returns the fs-verity digest based on the current tree and file size. |
| 132 | pub fn calculate_fsverity_digest(&self) -> Result<Sha256Hash, FsverityError> { |
| 133 | let root_hash = self.calculate_root_hash()?; |
| 134 | Ok(build_fsverity_digest(&root_hash, self.file_size)?) |
| 135 | } |
| 136 | } |
| 137 | |
| 138 | #[cfg(test)] |
| 139 | mod tests { |
| 140 | // Test data below can be generated by: |
| 141 | // $ perl -e 'print "\x{00}" x 6000' > foo |
| 142 | // $ perl -e 'print "\x{01}" x 5000' >> foo |
| 143 | // $ fsverity digest foo |
| 144 | use super::*; |
| 145 | use anyhow::Result; |
| 146 | |
| 147 | #[test] |
| 148 | fn merkle_tree_empty_file() -> Result<()> { |
| 149 | assert_eq!( |
| 150 | to_u8_vec("3d248ca542a24fc62d1c43b916eae5016878e2533c88238480b26128a1f1af95"), |
| 151 | generate_fsverity_digest_sequentially(&Vec::new())? |
| 152 | ); |
| 153 | Ok(()) |
| 154 | } |
| 155 | |
| 156 | #[test] |
| 157 | fn merkle_tree_file_size_less_than_or_equal_to_4k() -> Result<()> { |
| 158 | // Test a file that contains 4096 '\01's. |
| 159 | assert_eq!( |
| 160 | to_u8_vec("cd0875ca59c7d37e962c5e8f5acd3770750ac80225e2df652ce5672fd34500af"), |
| 161 | generate_fsverity_digest_sequentially(&vec![1; 4096])? |
| 162 | ); |
| 163 | Ok(()) |
| 164 | } |
| 165 | |
| 166 | #[test] |
| 167 | fn merkle_tree_more_sizes() -> Result<()> { |
| 168 | // Test files that contains >4096 '\01's. |
| 169 | |
| 170 | assert_eq!( |
| 171 | to_u8_vec("2901b849fda2d91e3929524561c4a47e77bb64734319759507b2029f18b9cc52"), |
| 172 | generate_fsverity_digest_sequentially(&vec![1; 4097])? |
| 173 | ); |
| 174 | |
| 175 | assert_eq!( |
| 176 | to_u8_vec("2a476d58eb80394052a3a783111e1458ac3ecf68a7878183fed86ca0ff47ec0d"), |
| 177 | generate_fsverity_digest_sequentially(&vec![1; 8192])? |
| 178 | ); |
| 179 | |
| 180 | // Test with max size that still fits in 2 levels. |
| 181 | assert_eq!( |
| 182 | to_u8_vec("26b7c190a34e19f420808ee7ec233b09fa6c34543b5a9d2950530114c205d14f"), |
| 183 | generate_fsverity_digest_sequentially(&vec![1; 524288])? |
| 184 | ); |
| 185 | |
| 186 | // Test with data that requires 3 levels. |
| 187 | assert_eq!( |
| 188 | to_u8_vec("316835d9be1c95b5cd55d07ae7965d651689efad186e26cbf680e40b683a3262"), |
| 189 | generate_fsverity_digest_sequentially(&vec![1; 524289])? |
| 190 | ); |
| 191 | Ok(()) |
| 192 | } |
| 193 | |
| 194 | #[test] |
| 195 | fn merkle_tree_non_sequential() -> Result<()> { |
| 196 | let mut tree = MerkleLeaves::new(); |
| 197 | let hash = Sha256Hasher::new()?.update(&vec![1u8; CHUNK_SIZE as usize])?.finalize()?; |
| 198 | |
| 199 | // Update hashes of 4 1-blocks. |
| 200 | tree.update_hash(1, &hash, CHUNK_SIZE * 2); |
| 201 | tree.update_hash(3, &hash, CHUNK_SIZE * 4); |
| 202 | tree.update_hash(0, &hash, CHUNK_SIZE); |
| 203 | tree.update_hash(2, &hash, CHUNK_SIZE * 3); |
| 204 | |
| 205 | assert_eq!( |
| 206 | to_u8_vec("7d3c0d2e1dc54230b20ed875f5f3a4bd3f9873df601936b3ca8127d4db3548f3"), |
| 207 | tree.calculate_fsverity_digest()? |
| 208 | ); |
| 209 | Ok(()) |
| 210 | } |
| 211 | |
Victor Hsieh | 9d0ab62 | 2021-04-26 17:07:02 -0700 | [diff] [blame^] | 212 | #[test] |
| 213 | fn merkle_tree_grow_leaves() -> Result<()> { |
| 214 | let mut tree = MerkleLeaves::new(); |
| 215 | tree.update_hash(0, &[42; HASH_SIZE], CHUNK_SIZE); // fake hash of a CHUNK_SIZE block |
| 216 | |
| 217 | // Grows the leaves |
| 218 | tree.resize(4096 * 3 - 100); |
| 219 | |
| 220 | assert!(tree.is_index_valid(0)); |
| 221 | assert!(tree.is_index_valid(1)); |
| 222 | assert!(tree.is_index_valid(2)); |
| 223 | assert!(!tree.is_index_valid(3)); |
| 224 | assert!(tree.is_consistent(1, &Sha256Hasher::HASH_OF_4096_ZEROS)); |
| 225 | assert!(tree.is_consistent(2, &Sha256Hasher::HASH_OF_4096_ZEROS)); |
| 226 | Ok(()) |
| 227 | } |
| 228 | |
| 229 | #[test] |
| 230 | fn merkle_tree_shrink_leaves() -> Result<()> { |
| 231 | let mut tree = MerkleLeaves::new(); |
| 232 | tree.update_hash(0, &[42; HASH_SIZE], CHUNK_SIZE); |
| 233 | tree.update_hash(0, &[42; HASH_SIZE], CHUNK_SIZE * 3); |
| 234 | |
| 235 | // Shrink the leaves |
| 236 | tree.resize(CHUNK_SIZE as usize * 2 - 100); |
| 237 | |
| 238 | assert!(tree.is_index_valid(0)); |
| 239 | assert!(tree.is_index_valid(1)); |
| 240 | assert!(!tree.is_index_valid(2)); |
| 241 | // The second chunk is a hole and full of zero. When shrunk, with zero padding, the hash |
| 242 | // happens to be consistent to a full-zero chunk. |
| 243 | assert!(tree.is_consistent(1, &Sha256Hasher::HASH_OF_4096_ZEROS)); |
| 244 | Ok(()) |
| 245 | } |
| 246 | |
Victor Hsieh | dde1790 | 2021-02-26 12:35:31 -0800 | [diff] [blame] | 247 | fn generate_fsverity_digest_sequentially(test_data: &[u8]) -> Result<Sha256Hash> { |
| 248 | let mut tree = MerkleLeaves::new(); |
| 249 | for (index, chunk) in test_data.chunks(CHUNK_SIZE as usize).enumerate() { |
| 250 | let hash = Sha256Hasher::new()? |
| 251 | .update(&chunk)? |
| 252 | .update(&vec![0u8; CHUNK_SIZE as usize - chunk.len()])? |
| 253 | .finalize()?; |
| 254 | |
| 255 | tree.update_hash(index, &hash, CHUNK_SIZE * index as u64 + chunk.len() as u64); |
| 256 | } |
| 257 | Ok(tree.calculate_fsverity_digest()?) |
| 258 | } |
| 259 | |
| 260 | fn to_u8_vec(hex_str: &str) -> Vec<u8> { |
| 261 | assert!(hex_str.len() % 2 == 0); |
| 262 | (0..hex_str.len()) |
| 263 | .step_by(2) |
| 264 | .map(|i| u8::from_str_radix(&hex_str[i..i + 2], 16).unwrap()) |
| 265 | .collect() |
| 266 | } |
| 267 | } |