authfs: Add MerkleLeaves for integrity bookkeeping

MerkleLeaves will be used by a "writer" for remembering the hashes of
written blocks for integrity checking. For example, when a file is
written from a trusted environment to an untrusted storage / remote,
MerkleLeaves allows the writer to verify the reads later with a
cryptographical strong hash.

Besides verification, if requested, the tree can grow from the leaves(!)
to generate the root hash and fs-verity digest.

 - fsverity/builder.rs: implements MerkleLeaves
 - fsverity/verifier.rs: renamed from fsverity.rs with minor changes
 - fsverity/common.rs: common utils from the original fsverity.rs with
    one addition error in the enum
 - crypto.rs: more helper function / constant

Bug: 171279640
Test: atest authfs_device_test_src_lib

Change-Id: I76e5ebd81a2f2afa017e3c670774ccbb797766df
diff --git a/authfs/src/fsverity/builder.rs b/authfs/src/fsverity/builder.rs
new file mode 100644
index 0000000..607d3a7
--- /dev/null
+++ b/authfs/src/fsverity/builder.rs
@@ -0,0 +1,218 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+use super::common::{build_fsverity_digest, merkle_tree_height, FsverityError};
+use crate::common::CHUNK_SIZE;
+use crate::crypto::{CryptoError, Sha256Hash, Sha256Hasher};
+
+const HASH_SIZE: usize = Sha256Hasher::HASH_SIZE;
+const HASH_PER_PAGE: usize = CHUNK_SIZE as usize / HASH_SIZE;
+
+/// MerkleLeaves can be used by the class' customer for bookkeeping integrity data for their bytes.
+/// It can also be used to generate the standard fs-verity digest for the source data.
+///
+/// It's in-memory because for the initial use cases, we don't need to read back an existing file,
+/// and only need to deal with new files. Also, considering that the output file won't be large at
+/// the moment, it is sufficient to simply keep the Merkle tree in memory in the trusted world. To
+/// further simplify the initial implementation, we only need to keep the leaf nodes in memory, and
+/// generate the tree / root hash when requested.
+pub struct MerkleLeaves {
+    leaves: Vec<Sha256Hash>,
+    file_size: u64,
+}
+
+fn hash_all_pages(source: &[Sha256Hash]) -> Result<Vec<Sha256Hash>, CryptoError> {
+    source
+        .chunks(HASH_PER_PAGE)
+        .map(|chunk| {
+            let padding_bytes = (HASH_PER_PAGE - chunk.len()) * HASH_SIZE;
+            Ok(Sha256Hasher::new()?
+                .update_from(chunk)?
+                .update(&vec![0u8; padding_bytes])?
+                .finalize()?)
+        })
+        .collect()
+}
+
+#[allow(dead_code)]
+impl MerkleLeaves {
+    /// Creates a `MerkleLeaves` instance with empty data.
+    pub fn new() -> Self {
+        Self { leaves: Vec::new(), file_size: 0 }
+    }
+
+    /// Updates the hash of the `index`-th leaf, and increase the size to `size_at_least` if the
+    /// current size is smaller.
+    pub fn update_hash(&mut self, index: usize, hash: &Sha256Hash, size_at_least: u64) {
+        // +1 since index is zero-based.
+        if self.leaves.len() < index + 1 {
+            // When resizing, fill in hash of zeros by default. This makes it easy to handle holes
+            // in a file.
+            self.leaves.resize(index + 1, Sha256Hasher::HASH_OF_4096_ZEROS);
+        }
+        self.leaves[index].clone_from_slice(hash);
+
+        if size_at_least > self.file_size {
+            self.file_size = size_at_least;
+        }
+    }
+
+    /// Returns whether `index` is within the bound of leaves.
+    pub fn is_index_valid(&self, index: usize) -> bool {
+        index < self.leaves.len()
+    }
+
+    /// Returns whether the `index`-th hash is consistent to `hash`.
+    pub fn is_consistent(&self, index: usize, hash: &Sha256Hash) -> bool {
+        if let Some(element) = self.leaves.get(index) {
+            element == hash
+        } else {
+            false
+        }
+    }
+
+    fn calculate_root_hash(&self) -> Result<Sha256Hash, FsverityError> {
+        match self.leaves.len() {
+            // Special cases per fs-verity digest definition.
+            0 => {
+                debug_assert_eq!(self.file_size, 0);
+                Ok([0u8; HASH_SIZE])
+            }
+            1 => {
+                debug_assert!(self.file_size <= CHUNK_SIZE && self.file_size > 0);
+                Ok(self.leaves[0])
+            }
+            n => {
+                debug_assert_eq!((self.file_size - 1) / CHUNK_SIZE, n as u64);
+                let size_for_equivalent = n as u64 * CHUNK_SIZE;
+                let level = merkle_tree_height(size_for_equivalent).unwrap(); // safe since n > 0
+
+                // `leaves` is owned and can't be the initial state below. Here we manually hash it
+                // first to avoid a copy and to get the type right.
+                let second_level = hash_all_pages(&self.leaves)?;
+                let hashes =
+                    (1..=level).try_fold(second_level, |source, _| hash_all_pages(&source))?;
+                if hashes.len() != 1 {
+                    Err(FsverityError::InvalidState)
+                } else {
+                    Ok(hashes.into_iter().next().unwrap())
+                }
+            }
+        }
+    }
+
+    /// Returns the fs-verity digest based on the current tree and file size.
+    pub fn calculate_fsverity_digest(&self) -> Result<Sha256Hash, FsverityError> {
+        let root_hash = self.calculate_root_hash()?;
+        Ok(build_fsverity_digest(&root_hash, self.file_size)?)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    // Test data below can be generated by:
+    //  $ perl -e 'print "\x{00}" x 6000' > foo
+    //  $ perl -e 'print "\x{01}" x 5000' >> foo
+    //  $ fsverity digest foo
+    use super::*;
+    use anyhow::Result;
+
+    #[test]
+    fn merkle_tree_empty_file() -> Result<()> {
+        assert_eq!(
+            to_u8_vec("3d248ca542a24fc62d1c43b916eae5016878e2533c88238480b26128a1f1af95"),
+            generate_fsverity_digest_sequentially(&Vec::new())?
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn merkle_tree_file_size_less_than_or_equal_to_4k() -> Result<()> {
+        // Test a file that contains 4096 '\01's.
+        assert_eq!(
+            to_u8_vec("cd0875ca59c7d37e962c5e8f5acd3770750ac80225e2df652ce5672fd34500af"),
+            generate_fsverity_digest_sequentially(&vec![1; 4096])?
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn merkle_tree_more_sizes() -> Result<()> {
+        // Test files that contains >4096 '\01's.
+
+        assert_eq!(
+            to_u8_vec("2901b849fda2d91e3929524561c4a47e77bb64734319759507b2029f18b9cc52"),
+            generate_fsverity_digest_sequentially(&vec![1; 4097])?
+        );
+
+        assert_eq!(
+            to_u8_vec("2a476d58eb80394052a3a783111e1458ac3ecf68a7878183fed86ca0ff47ec0d"),
+            generate_fsverity_digest_sequentially(&vec![1; 8192])?
+        );
+
+        // Test with max size that still fits in 2 levels.
+        assert_eq!(
+            to_u8_vec("26b7c190a34e19f420808ee7ec233b09fa6c34543b5a9d2950530114c205d14f"),
+            generate_fsverity_digest_sequentially(&vec![1; 524288])?
+        );
+
+        // Test with data that requires 3 levels.
+        assert_eq!(
+            to_u8_vec("316835d9be1c95b5cd55d07ae7965d651689efad186e26cbf680e40b683a3262"),
+            generate_fsverity_digest_sequentially(&vec![1; 524289])?
+        );
+        Ok(())
+    }
+
+    #[test]
+    fn merkle_tree_non_sequential() -> Result<()> {
+        let mut tree = MerkleLeaves::new();
+        let hash = Sha256Hasher::new()?.update(&vec![1u8; CHUNK_SIZE as usize])?.finalize()?;
+
+        // Update hashes of 4 1-blocks.
+        tree.update_hash(1, &hash, CHUNK_SIZE * 2);
+        tree.update_hash(3, &hash, CHUNK_SIZE * 4);
+        tree.update_hash(0, &hash, CHUNK_SIZE);
+        tree.update_hash(2, &hash, CHUNK_SIZE * 3);
+
+        assert_eq!(
+            to_u8_vec("7d3c0d2e1dc54230b20ed875f5f3a4bd3f9873df601936b3ca8127d4db3548f3"),
+            tree.calculate_fsverity_digest()?
+        );
+        Ok(())
+    }
+
+    fn generate_fsverity_digest_sequentially(test_data: &[u8]) -> Result<Sha256Hash> {
+        let mut tree = MerkleLeaves::new();
+        for (index, chunk) in test_data.chunks(CHUNK_SIZE as usize).enumerate() {
+            let hash = Sha256Hasher::new()?
+                .update(&chunk)?
+                .update(&vec![0u8; CHUNK_SIZE as usize - chunk.len()])?
+                .finalize()?;
+
+            tree.update_hash(index, &hash, CHUNK_SIZE * index as u64 + chunk.len() as u64);
+        }
+        Ok(tree.calculate_fsverity_digest()?)
+    }
+
+    fn to_u8_vec(hex_str: &str) -> Vec<u8> {
+        assert!(hex_str.len() % 2 == 0);
+        (0..hex_str.len())
+            .step_by(2)
+            .map(|i| u8::from_str_radix(&hex_str[i..i + 2], 16).unwrap())
+            .collect()
+    }
+}