Victor Hsieh | dde1790 | 2021-02-26 12:35:31 -0800 | [diff] [blame^] | 1 | /* |
| 2 | * Copyright (C) 2021 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | use super::common::{build_fsverity_digest, merkle_tree_height, FsverityError}; |
| 18 | use crate::common::CHUNK_SIZE; |
| 19 | use crate::crypto::{CryptoError, Sha256Hash, Sha256Hasher}; |
| 20 | |
| 21 | const HASH_SIZE: usize = Sha256Hasher::HASH_SIZE; |
| 22 | const HASH_PER_PAGE: usize = CHUNK_SIZE as usize / HASH_SIZE; |
| 23 | |
| 24 | /// MerkleLeaves can be used by the class' customer for bookkeeping integrity data for their bytes. |
| 25 | /// It can also be used to generate the standard fs-verity digest for the source data. |
| 26 | /// |
| 27 | /// It's in-memory because for the initial use cases, we don't need to read back an existing file, |
| 28 | /// and only need to deal with new files. Also, considering that the output file won't be large at |
| 29 | /// the moment, it is sufficient to simply keep the Merkle tree in memory in the trusted world. To |
| 30 | /// further simplify the initial implementation, we only need to keep the leaf nodes in memory, and |
| 31 | /// generate the tree / root hash when requested. |
| 32 | pub struct MerkleLeaves { |
| 33 | leaves: Vec<Sha256Hash>, |
| 34 | file_size: u64, |
| 35 | } |
| 36 | |
| 37 | fn hash_all_pages(source: &[Sha256Hash]) -> Result<Vec<Sha256Hash>, CryptoError> { |
| 38 | source |
| 39 | .chunks(HASH_PER_PAGE) |
| 40 | .map(|chunk| { |
| 41 | let padding_bytes = (HASH_PER_PAGE - chunk.len()) * HASH_SIZE; |
| 42 | Ok(Sha256Hasher::new()? |
| 43 | .update_from(chunk)? |
| 44 | .update(&vec![0u8; padding_bytes])? |
| 45 | .finalize()?) |
| 46 | }) |
| 47 | .collect() |
| 48 | } |
| 49 | |
| 50 | #[allow(dead_code)] |
| 51 | impl MerkleLeaves { |
| 52 | /// Creates a `MerkleLeaves` instance with empty data. |
| 53 | pub fn new() -> Self { |
| 54 | Self { leaves: Vec::new(), file_size: 0 } |
| 55 | } |
| 56 | |
| 57 | /// Updates the hash of the `index`-th leaf, and increase the size to `size_at_least` if the |
| 58 | /// current size is smaller. |
| 59 | pub fn update_hash(&mut self, index: usize, hash: &Sha256Hash, size_at_least: u64) { |
| 60 | // +1 since index is zero-based. |
| 61 | if self.leaves.len() < index + 1 { |
| 62 | // When resizing, fill in hash of zeros by default. This makes it easy to handle holes |
| 63 | // in a file. |
| 64 | self.leaves.resize(index + 1, Sha256Hasher::HASH_OF_4096_ZEROS); |
| 65 | } |
| 66 | self.leaves[index].clone_from_slice(hash); |
| 67 | |
| 68 | if size_at_least > self.file_size { |
| 69 | self.file_size = size_at_least; |
| 70 | } |
| 71 | } |
| 72 | |
| 73 | /// Returns whether `index` is within the bound of leaves. |
| 74 | pub fn is_index_valid(&self, index: usize) -> bool { |
| 75 | index < self.leaves.len() |
| 76 | } |
| 77 | |
| 78 | /// Returns whether the `index`-th hash is consistent to `hash`. |
| 79 | pub fn is_consistent(&self, index: usize, hash: &Sha256Hash) -> bool { |
| 80 | if let Some(element) = self.leaves.get(index) { |
| 81 | element == hash |
| 82 | } else { |
| 83 | false |
| 84 | } |
| 85 | } |
| 86 | |
| 87 | fn calculate_root_hash(&self) -> Result<Sha256Hash, FsverityError> { |
| 88 | match self.leaves.len() { |
| 89 | // Special cases per fs-verity digest definition. |
| 90 | 0 => { |
| 91 | debug_assert_eq!(self.file_size, 0); |
| 92 | Ok([0u8; HASH_SIZE]) |
| 93 | } |
| 94 | 1 => { |
| 95 | debug_assert!(self.file_size <= CHUNK_SIZE && self.file_size > 0); |
| 96 | Ok(self.leaves[0]) |
| 97 | } |
| 98 | n => { |
| 99 | debug_assert_eq!((self.file_size - 1) / CHUNK_SIZE, n as u64); |
| 100 | let size_for_equivalent = n as u64 * CHUNK_SIZE; |
| 101 | let level = merkle_tree_height(size_for_equivalent).unwrap(); // safe since n > 0 |
| 102 | |
| 103 | // `leaves` is owned and can't be the initial state below. Here we manually hash it |
| 104 | // first to avoid a copy and to get the type right. |
| 105 | let second_level = hash_all_pages(&self.leaves)?; |
| 106 | let hashes = |
| 107 | (1..=level).try_fold(second_level, |source, _| hash_all_pages(&source))?; |
| 108 | if hashes.len() != 1 { |
| 109 | Err(FsverityError::InvalidState) |
| 110 | } else { |
| 111 | Ok(hashes.into_iter().next().unwrap()) |
| 112 | } |
| 113 | } |
| 114 | } |
| 115 | } |
| 116 | |
| 117 | /// Returns the fs-verity digest based on the current tree and file size. |
| 118 | pub fn calculate_fsverity_digest(&self) -> Result<Sha256Hash, FsverityError> { |
| 119 | let root_hash = self.calculate_root_hash()?; |
| 120 | Ok(build_fsverity_digest(&root_hash, self.file_size)?) |
| 121 | } |
| 122 | } |
| 123 | |
| 124 | #[cfg(test)] |
| 125 | mod tests { |
| 126 | // Test data below can be generated by: |
| 127 | // $ perl -e 'print "\x{00}" x 6000' > foo |
| 128 | // $ perl -e 'print "\x{01}" x 5000' >> foo |
| 129 | // $ fsverity digest foo |
| 130 | use super::*; |
| 131 | use anyhow::Result; |
| 132 | |
| 133 | #[test] |
| 134 | fn merkle_tree_empty_file() -> Result<()> { |
| 135 | assert_eq!( |
| 136 | to_u8_vec("3d248ca542a24fc62d1c43b916eae5016878e2533c88238480b26128a1f1af95"), |
| 137 | generate_fsverity_digest_sequentially(&Vec::new())? |
| 138 | ); |
| 139 | Ok(()) |
| 140 | } |
| 141 | |
| 142 | #[test] |
| 143 | fn merkle_tree_file_size_less_than_or_equal_to_4k() -> Result<()> { |
| 144 | // Test a file that contains 4096 '\01's. |
| 145 | assert_eq!( |
| 146 | to_u8_vec("cd0875ca59c7d37e962c5e8f5acd3770750ac80225e2df652ce5672fd34500af"), |
| 147 | generate_fsverity_digest_sequentially(&vec![1; 4096])? |
| 148 | ); |
| 149 | Ok(()) |
| 150 | } |
| 151 | |
| 152 | #[test] |
| 153 | fn merkle_tree_more_sizes() -> Result<()> { |
| 154 | // Test files that contains >4096 '\01's. |
| 155 | |
| 156 | assert_eq!( |
| 157 | to_u8_vec("2901b849fda2d91e3929524561c4a47e77bb64734319759507b2029f18b9cc52"), |
| 158 | generate_fsverity_digest_sequentially(&vec![1; 4097])? |
| 159 | ); |
| 160 | |
| 161 | assert_eq!( |
| 162 | to_u8_vec("2a476d58eb80394052a3a783111e1458ac3ecf68a7878183fed86ca0ff47ec0d"), |
| 163 | generate_fsverity_digest_sequentially(&vec![1; 8192])? |
| 164 | ); |
| 165 | |
| 166 | // Test with max size that still fits in 2 levels. |
| 167 | assert_eq!( |
| 168 | to_u8_vec("26b7c190a34e19f420808ee7ec233b09fa6c34543b5a9d2950530114c205d14f"), |
| 169 | generate_fsverity_digest_sequentially(&vec![1; 524288])? |
| 170 | ); |
| 171 | |
| 172 | // Test with data that requires 3 levels. |
| 173 | assert_eq!( |
| 174 | to_u8_vec("316835d9be1c95b5cd55d07ae7965d651689efad186e26cbf680e40b683a3262"), |
| 175 | generate_fsverity_digest_sequentially(&vec![1; 524289])? |
| 176 | ); |
| 177 | Ok(()) |
| 178 | } |
| 179 | |
| 180 | #[test] |
| 181 | fn merkle_tree_non_sequential() -> Result<()> { |
| 182 | let mut tree = MerkleLeaves::new(); |
| 183 | let hash = Sha256Hasher::new()?.update(&vec![1u8; CHUNK_SIZE as usize])?.finalize()?; |
| 184 | |
| 185 | // Update hashes of 4 1-blocks. |
| 186 | tree.update_hash(1, &hash, CHUNK_SIZE * 2); |
| 187 | tree.update_hash(3, &hash, CHUNK_SIZE * 4); |
| 188 | tree.update_hash(0, &hash, CHUNK_SIZE); |
| 189 | tree.update_hash(2, &hash, CHUNK_SIZE * 3); |
| 190 | |
| 191 | assert_eq!( |
| 192 | to_u8_vec("7d3c0d2e1dc54230b20ed875f5f3a4bd3f9873df601936b3ca8127d4db3548f3"), |
| 193 | tree.calculate_fsverity_digest()? |
| 194 | ); |
| 195 | Ok(()) |
| 196 | } |
| 197 | |
| 198 | fn generate_fsverity_digest_sequentially(test_data: &[u8]) -> Result<Sha256Hash> { |
| 199 | let mut tree = MerkleLeaves::new(); |
| 200 | for (index, chunk) in test_data.chunks(CHUNK_SIZE as usize).enumerate() { |
| 201 | let hash = Sha256Hasher::new()? |
| 202 | .update(&chunk)? |
| 203 | .update(&vec![0u8; CHUNK_SIZE as usize - chunk.len()])? |
| 204 | .finalize()?; |
| 205 | |
| 206 | tree.update_hash(index, &hash, CHUNK_SIZE * index as u64 + chunk.len() as u64); |
| 207 | } |
| 208 | Ok(tree.calculate_fsverity_digest()?) |
| 209 | } |
| 210 | |
| 211 | fn to_u8_vec(hex_str: &str) -> Vec<u8> { |
| 212 | assert!(hex_str.len() % 2 == 0); |
| 213 | (0..hex_str.len()) |
| 214 | .step_by(2) |
| 215 | .map(|i| u8::from_str_radix(&hex_str[i..i + 2], 16).unwrap()) |
| 216 | .collect() |
| 217 | } |
| 218 | } |