/*
 * Copyright (C) 2021 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

mod file;
mod mount;

use anyhow::{anyhow, bail, Result};
use fuse::filesystem::{
    Context, DirEntry, DirectoryIterator, Entry, FileSystem, FsOptions, GetxattrReply,
    SetattrValid, ZeroCopyReader, ZeroCopyWriter,
};
use fuse::sys::OpenOptions as FuseOpenOptions;
use log::{error, trace, warn};
use std::collections::{btree_map, BTreeMap};
use std::convert::{TryFrom, TryInto};
use std::ffi::{CStr, CString, OsStr};
use std::io;
use std::mem::{zeroed, MaybeUninit};
use std::option::Option;
use std::os::unix::ffi::OsStrExt;
use std::path::{Component, Path, PathBuf};
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::{Arc, RwLock};
use std::time::Duration;

use crate::common::{divide_roundup, ChunkedSizeIter, CHUNK_SIZE};
use crate::file::{
    validate_basename, Attr, InMemoryDir, RandomWrite, ReadByChunk, RemoteDirEditor,
    RemoteFileEditor, RemoteFileReader,
};
use crate::fsstat::RemoteFsStatsReader;
use crate::fsverity::VerifiedFileEditor;

pub use self::file::LazyVerifiedReadonlyFile;
pub use self::mount::mount_and_enter_message_loop;
use self::mount::MAX_WRITE_BYTES;

pub type Inode = u64;
type Handle = u64;

/// Maximum time for a file's metadata to be cached by the kernel. Since any file and directory
/// changes (if not read-only) has to go through AuthFS to be trusted, the timeout can be maximum.
const DEFAULT_METADATA_TIMEOUT: Duration = Duration::MAX;

const ROOT_INODE: Inode = 1;

/// `AuthFsEntry` defines the filesystem entry type supported by AuthFS.
pub enum AuthFsEntry {
    /// A read-only directory (writable during initialization). Root directory is an example.
    ReadonlyDirectory { dir: InMemoryDir },
    /// A file type that is verified against fs-verity signature (thus read-only). The file is
    /// served from a remote server.
    VerifiedReadonly { reader: LazyVerifiedReadonlyFile },
    /// A file type that is a read-only passthrough from a file on a remote server.
    UnverifiedReadonly { reader: RemoteFileReader, file_size: u64 },
    /// A file type that is initially empty, and the content is stored on a remote server. File
    /// integrity is guaranteed with private Merkle tree.
    VerifiedNew { editor: VerifiedFileEditor<RemoteFileEditor>, attr: Attr },
    /// A directory type that is initially empty. One can create new file (`VerifiedNew`) and new
    /// directory (`VerifiedNewDirectory` itself) with integrity guaranteed within the VM.
    VerifiedNewDirectory { dir: RemoteDirEditor, attr: Attr },
}

impl AuthFsEntry {
    fn expect_empty_deletable_directory(&self) -> io::Result<()> {
        match self {
            AuthFsEntry::VerifiedNewDirectory { dir, .. } => {
                if dir.number_of_entries() == 0 {
                    Ok(())
                } else {
                    Err(io::Error::from_raw_os_error(libc::ENOTEMPTY))
                }
            }
            AuthFsEntry::ReadonlyDirectory { .. } => {
                Err(io::Error::from_raw_os_error(libc::EACCES))
            }
            _ => Err(io::Error::from_raw_os_error(libc::ENOTDIR)),
        }
    }
}

struct InodeState {
    /// Actual inode entry.
    entry: AuthFsEntry,

    /// Number of `Handle`s (i.e. file descriptors) that are currently referring to the this inode.
    ///
    /// Technically, this does not matter to readonly entries, since they live forever. The
    /// reference count is only needed for manageing lifetime of writable entries like `VerifiedNew`
    /// and `VerifiedNewDirectory`. That is, when an entry is deleted, the actual entry needs to
    /// stay alive until the reference count reaches zero.
    ///
    /// Note: This is not to be confused with hardlinks, which AuthFS doesn't currently implement.
    handle_ref_count: AtomicU64,

    /// Whether the inode is already unlinked, i.e. should be removed, once `handle_ref_count` is
    /// down to zero.
    unlinked: bool,
}

impl InodeState {
    fn new(entry: AuthFsEntry) -> Self {
        InodeState { entry, handle_ref_count: AtomicU64::new(0), unlinked: false }
    }

    fn new_with_ref_count(entry: AuthFsEntry, handle_ref_count: u64) -> Self {
        InodeState { entry, handle_ref_count: AtomicU64::new(handle_ref_count), unlinked: false }
    }
}

/// Data type that a directory implementation should be able to present its entry to `AuthFs`.
#[derive(Clone)]
pub struct AuthFsDirEntry {
    pub inode: Inode,
    pub name: CString,
    pub is_dir: bool,
}

/// A snapshot of a directory entries for supporting `readdir` operation.
///
/// The `readdir` implementation is required by FUSE to not return any entries that have been
/// returned previously (while it's fine to not return new entries). Snapshot is the easiest way to
/// be compliant. See `fuse::filesystem::readdir` for more details.
///
/// A `DirEntriesSnapshot` is created on `opendir`, and is associated with the returned
/// `Handle`/FD. The snapshot is deleted when the handle is released in `releasedir`.
type DirEntriesSnapshot = Vec<AuthFsDirEntry>;

/// An iterator for reading from `DirEntriesSnapshot`.
pub struct DirEntriesSnapshotIterator {
    /// A reference to the `DirEntriesSnapshot` in `AuthFs`.
    snapshot: Arc<DirEntriesSnapshot>,

    /// A value determined by `Self` to identify the last entry. 0 is a reserved value by FUSE to
    /// mean reading from the beginning.
    prev_offset: usize,
}

impl DirectoryIterator for DirEntriesSnapshotIterator {
    fn next(&mut self) -> Option<DirEntry> {
        // This iterator should not be the only reference to the snapshot. The snapshot should
        // still be hold in `dir_handle_table`, i.e. when the FD is not yet closed.
        //
        // This code is unreachable when `readdir` is called with a closed FD. Only when the FD is
        // not yet closed, `DirEntriesSnapshotIterator` can be created (but still short-lived
        // during `readdir`).
        debug_assert!(Arc::strong_count(&self.snapshot) >= 2);

        // Since 0 is reserved, let's use 1-based index for the offset. This allows us to
        // resume from the previous read in the snapshot easily.
        let current_offset = if self.prev_offset == 0 {
            1 // first element in the vector
        } else {
            self.prev_offset + 1 // next element in the vector
        };
        if current_offset > self.snapshot.len() {
            None
        } else {
            let AuthFsDirEntry { inode, name, is_dir } = &self.snapshot[current_offset - 1];
            let entry = DirEntry {
                offset: current_offset as u64,
                ino: *inode,
                name,
                type_: if *is_dir { libc::DT_DIR.into() } else { libc::DT_REG.into() },
            };
            self.prev_offset = current_offset;
            Some(entry)
        }
    }
}

type DirHandleTable = BTreeMap<Handle, Arc<DirEntriesSnapshot>>;

// AuthFS needs to be `Sync` to be used with the `fuse` crate.
pub struct AuthFs {
    /// Table for `Inode` to `InodeState` lookup.
    inode_table: RwLock<BTreeMap<Inode, InodeState>>,

    /// The next available inode number.
    next_inode: AtomicU64,

    /// Table for `Handle` to `Arc<DirEntriesSnapshot>` lookup. On `opendir`, a new directory handle
    /// is created and the snapshot of the current directory is created. This is not super
    /// efficient, but is the simplest way to be compliant to the FUSE contract (see
    /// `fuse::filesystem::readdir`).
    ///
    /// Currently, no code locks `dir_handle_table` and `inode_table` at the same time to avoid
    /// deadlock.
    dir_handle_table: RwLock<DirHandleTable>,

    /// The next available handle number.
    next_handle: AtomicU64,

    /// A reader to access the remote filesystem stats, which is supposed to be of "the" output
    /// directory. We assume all output are stored in the same partition.
    remote_fs_stats_reader: RemoteFsStatsReader,
}

// Implementation for preparing an `AuthFs` instance, before starting to serve.
// TODO(victorhsieh): Consider implement a builder to separate the mutable initialization from the
// immutable / interiorly mutable serving phase.
impl AuthFs {
    pub fn new(remote_fs_stats_reader: RemoteFsStatsReader) -> AuthFs {
        let mut inode_table = BTreeMap::new();
        inode_table.insert(
            ROOT_INODE,
            InodeState::new(AuthFsEntry::ReadonlyDirectory { dir: InMemoryDir::new() }),
        );

        AuthFs {
            inode_table: RwLock::new(inode_table),
            next_inode: AtomicU64::new(ROOT_INODE + 1),
            dir_handle_table: RwLock::new(BTreeMap::new()),
            next_handle: AtomicU64::new(1),
            remote_fs_stats_reader,
        }
    }

    /// Add an `AuthFsEntry` as `basename` to the filesystem root.
    pub fn add_entry_at_root_dir(
        &mut self,
        basename: PathBuf,
        entry: AuthFsEntry,
    ) -> Result<Inode> {
        validate_basename(&basename)?;
        self.add_entry_at_ro_dir_by_path(ROOT_INODE, &basename, entry)
    }

    /// Add an `AuthFsEntry` by path from the `ReadonlyDirectory` represented by `dir_inode`. The
    /// path must be a related path. If some ancestor directories do not exist, they will be
    /// created (also as `ReadonlyDirectory`) automatically.
    pub fn add_entry_at_ro_dir_by_path(
        &mut self,
        dir_inode: Inode,
        path: &Path,
        entry: AuthFsEntry,
    ) -> Result<Inode> {
        // 1. Make sure the parent directories all exist. Derive the entry's parent inode.
        let parent_path =
            path.parent().ok_or_else(|| anyhow!("No parent directory: {:?}", path))?;
        let parent_inode =
            parent_path.components().try_fold(dir_inode, |current_dir_inode, path_component| {
                match path_component {
                    Component::RootDir => bail!("Absolute path is not supported"),
                    Component::Normal(name) => {
                        let inode_table = self.inode_table.get_mut().unwrap();
                        // Locate the internal directory structure.
                        let current_dir_entry = &mut inode_table
                            .get_mut(&current_dir_inode)
                            .ok_or_else(|| {
                                anyhow!("Unknown directory inode {}", current_dir_inode)
                            })?
                            .entry;
                        let dir = match current_dir_entry {
                            AuthFsEntry::ReadonlyDirectory { dir } => dir,
                            _ => unreachable!("Not a ReadonlyDirectory"),
                        };
                        // Return directory inode. Create first if not exists.
                        if let Some(existing_inode) = dir.lookup_inode(name.as_ref()) {
                            Ok(existing_inode)
                        } else {
                            let new_inode = self.next_inode.fetch_add(1, Ordering::Relaxed);
                            let new_dir_entry =
                                AuthFsEntry::ReadonlyDirectory { dir: InMemoryDir::new() };

                            // Actually update the tables.
                            dir.add_dir(name.as_ref(), new_inode)?;
                            if inode_table
                                .insert(new_inode, InodeState::new(new_dir_entry))
                                .is_some()
                            {
                                bail!("Unexpected to find a duplicated inode");
                            }
                            Ok(new_inode)
                        }
                    }
                    _ => Err(anyhow!("Path is not canonical: {:?}", path)),
                }
            })?;

        // 2. Insert the entry to the parent directory, as well as the inode table.
        let inode_table = self.inode_table.get_mut().unwrap();
        let inode_state = inode_table.get_mut(&parent_inode).expect("previously returned inode");
        match &mut inode_state.entry {
            AuthFsEntry::ReadonlyDirectory { dir } => {
                let basename =
                    path.file_name().ok_or_else(|| anyhow!("Bad file name: {:?}", path))?;
                let new_inode = self.next_inode.fetch_add(1, Ordering::Relaxed);

                // Actually update the tables.
                dir.add_file(basename.as_ref(), new_inode)?;
                if inode_table.insert(new_inode, InodeState::new(entry)).is_some() {
                    bail!("Unexpected to find a duplicated inode");
                }
                Ok(new_inode)
            }
            _ => unreachable!("Not a ReadonlyDirectory"),
        }
    }
}

// Implementation for serving requests.
impl AuthFs {
    /// Handles the file associated with `inode` if found. This function returns whatever
    /// `handle_fn` returns.
    fn handle_inode<F, R>(&self, inode: &Inode, handle_fn: F) -> io::Result<R>
    where
        F: FnOnce(&AuthFsEntry) -> io::Result<R>,
    {
        let inode_table = self.inode_table.read().unwrap();
        handle_inode_locked(&inode_table, inode, |inode_state| handle_fn(&inode_state.entry))
    }

    /// Adds a new entry `name` created by `create_fn` at `parent_inode`, with an initial ref count
    /// of one.
    ///
    /// The operation involves two updates: adding the name with a new allocated inode to the
    /// parent directory, and insert the new inode and the actual `AuthFsEntry` to the global inode
    /// table.
    ///
    /// `create_fn` receives the parent directory, through which it can create the new entry at and
    /// register the new inode to. Its returned entry is then added to the inode table.
    fn create_new_entry_with_ref_count<F>(
        &self,
        parent_inode: Inode,
        name: &CStr,
        create_fn: F,
    ) -> io::Result<Inode>
    where
        F: FnOnce(&mut AuthFsEntry, &Path, Inode) -> io::Result<AuthFsEntry>,
    {
        let mut inode_table = self.inode_table.write().unwrap();
        let (new_inode, new_file_entry) = handle_inode_mut_locked(
            &mut inode_table,
            &parent_inode,
            |InodeState { entry, .. }| {
                let new_inode = self.next_inode.fetch_add(1, Ordering::Relaxed);
                let basename: &Path = cstr_to_path(name);
                let new_file_entry = create_fn(entry, basename, new_inode)?;
                Ok((new_inode, new_file_entry))
            },
        )?;

        if let btree_map::Entry::Vacant(entry) = inode_table.entry(new_inode) {
            entry.insert(InodeState::new_with_ref_count(new_file_entry, 1));
            Ok(new_inode)
        } else {
            unreachable!("Unexpected duplication of inode {}", new_inode);
        }
    }

    fn open_dir_store_snapshot(
        &self,
        dir_entries: Vec<AuthFsDirEntry>,
    ) -> io::Result<(Option<Handle>, FuseOpenOptions)> {
        let handle = self.next_handle.fetch_add(1, Ordering::Relaxed);
        let mut dir_handle_table = self.dir_handle_table.write().unwrap();
        if let btree_map::Entry::Vacant(value) = dir_handle_table.entry(handle) {
            value.insert(Arc::new(dir_entries));
            Ok((Some(handle), FuseOpenOptions::empty()))
        } else {
            unreachable!("Unexpected to see new handle {} to existing in the table", handle);
        }
    }
}

fn check_access_mode(flags: u32, mode: libc::c_int) -> io::Result<()> {
    if (flags & libc::O_ACCMODE as u32) == mode as u32 {
        Ok(())
    } else {
        Err(io::Error::from_raw_os_error(libc::EACCES))
    }
}

cfg_if::cfg_if! {
    if #[cfg(all(target_arch = "aarch64", target_pointer_width = "64"))] {
        fn blk_size() -> libc::c_int { CHUNK_SIZE as libc::c_int }
    } else {
        fn blk_size() -> libc::c_long { CHUNK_SIZE as libc::c_long }
    }
}

#[allow(clippy::enum_variant_names)]
enum AccessMode {
    ReadOnly,
    Variable(u32),
}

fn create_stat(
    ino: libc::ino_t,
    file_size: u64,
    access_mode: AccessMode,
) -> io::Result<libc::stat64> {
    // SAFETY: stat64 is a plan C struct without pointer.
    let mut st = unsafe { MaybeUninit::<libc::stat64>::zeroed().assume_init() };

    st.st_ino = ino;
    st.st_mode = match access_mode {
        AccessMode::ReadOnly => {
            // Until needed, let's just grant the owner access.
            libc::S_IFREG | libc::S_IRUSR
        }
        AccessMode::Variable(mode) => libc::S_IFREG | mode,
    };
    st.st_nlink = 1;
    st.st_uid = 0;
    st.st_gid = 0;
    st.st_size = libc::off64_t::try_from(file_size)
        .map_err(|_| io::Error::from_raw_os_error(libc::EFBIG))?;
    st.st_blksize = blk_size();
    // Per man stat(2), st_blocks is "Number of 512B blocks allocated".
    st.st_blocks = libc::c_longlong::try_from(divide_roundup(file_size, 512))
        .map_err(|_| io::Error::from_raw_os_error(libc::EFBIG))?;
    Ok(st)
}

fn create_dir_stat(
    ino: libc::ino_t,
    file_number: u16,
    access_mode: AccessMode,
) -> io::Result<libc::stat64> {
    // SAFETY: stat64 is a plan C struct without pointer.
    let mut st = unsafe { MaybeUninit::<libc::stat64>::zeroed().assume_init() };

    st.st_ino = ino;
    st.st_mode = match access_mode {
        AccessMode::ReadOnly => {
            // Until needed, let's just grant the owner access and search to group and others.
            libc::S_IFDIR | libc::S_IXUSR | libc::S_IRUSR | libc::S_IXGRP | libc::S_IXOTH
        }
        AccessMode::Variable(mode) => libc::S_IFDIR | mode,
    };

    // 2 extra for . and ..
    st.st_nlink = file_number
        .checked_add(2)
        .ok_or_else(|| io::Error::from_raw_os_error(libc::EOVERFLOW))?
        .into();

    st.st_uid = 0;
    st.st_gid = 0;
    Ok(st)
}

fn offset_to_chunk_index(offset: u64) -> u64 {
    offset / CHUNK_SIZE
}

fn read_chunks<W: io::Write, T: ReadByChunk>(
    mut w: W,
    file: &T,
    file_size: u64,
    offset: u64,
    size: u32,
) -> io::Result<usize> {
    let remaining = file_size.saturating_sub(offset);
    let size_to_read = std::cmp::min(size as usize, remaining as usize);
    let total = ChunkedSizeIter::new(size_to_read, offset, CHUNK_SIZE as usize).try_fold(
        0,
        |total, (current_offset, planned_data_size)| {
            // TODO(victorhsieh): There might be a non-trivial way to avoid this copy. For example,
            // instead of accepting a buffer, the writer could expose the final destination buffer
            // for the reader to write to. It might not be generally applicable though, e.g. with
            // virtio transport, the buffer may not be continuous.
            let mut buf = [0u8; CHUNK_SIZE as usize];
            let read_size = file.read_chunk(offset_to_chunk_index(current_offset), &mut buf)?;
            if read_size < planned_data_size {
                return Err(io::Error::from_raw_os_error(libc::ENODATA));
            }

            let begin = (current_offset % CHUNK_SIZE) as usize;
            let end = begin + planned_data_size;
            let s = w.write(&buf[begin..end])?;
            if s != planned_data_size {
                return Err(io::Error::from_raw_os_error(libc::EIO));
            }
            Ok(total + s)
        },
    )?;

    Ok(total)
}

impl FileSystem for AuthFs {
    type Inode = Inode;
    type Handle = Handle;
    type DirIter = DirEntriesSnapshotIterator;

    fn max_buffer_size(&self) -> u32 {
        MAX_WRITE_BYTES
    }

    fn init(&self, _capable: FsOptions) -> io::Result<FsOptions> {
        // Enable writeback cache for better performance especially since our bandwidth to the
        // backend service is limited.
        Ok(FsOptions::WRITEBACK_CACHE)
    }

    fn lookup(&self, _ctx: Context, parent: Inode, name: &CStr) -> io::Result<Entry> {
        let inode_table = self.inode_table.read().unwrap();

        // Look up the entry's inode number in parent directory.
        let inode =
            handle_inode_locked(&inode_table, &parent, |inode_state| match &inode_state.entry {
                AuthFsEntry::ReadonlyDirectory { dir } => {
                    let path = cstr_to_path(name);
                    dir.lookup_inode(path).ok_or_else(|| io::Error::from_raw_os_error(libc::ENOENT))
                }
                AuthFsEntry::VerifiedNewDirectory { dir, .. } => {
                    let path = cstr_to_path(name);
                    dir.find_inode(path)
                }
                _ => Err(io::Error::from_raw_os_error(libc::ENOTDIR)),
            })?;

        // Create the entry's stat if found.
        let st = handle_inode_locked(
            &inode_table,
            &inode,
            |InodeState { entry, handle_ref_count, .. }| {
                let st = match entry {
                    AuthFsEntry::ReadonlyDirectory { dir } => {
                        create_dir_stat(inode, dir.number_of_entries(), AccessMode::ReadOnly)
                    }
                    AuthFsEntry::UnverifiedReadonly { file_size, .. } => {
                        create_stat(inode, *file_size, AccessMode::ReadOnly)
                    }
                    AuthFsEntry::VerifiedReadonly { reader } => {
                        create_stat(inode, reader.file_size()?, AccessMode::ReadOnly)
                    }
                    AuthFsEntry::VerifiedNew { editor, attr, .. } => {
                        create_stat(inode, editor.size(), AccessMode::Variable(attr.mode()))
                    }
                    AuthFsEntry::VerifiedNewDirectory { dir, attr } => create_dir_stat(
                        inode,
                        dir.number_of_entries(),
                        AccessMode::Variable(attr.mode()),
                    ),
                }?;
                if handle_ref_count.fetch_add(1, Ordering::Relaxed) == u64::MAX {
                    panic!("Handle reference count overflow");
                }
                Ok(st)
            },
        )?;

        Ok(Entry {
            inode,
            generation: 0,
            attr: st,
            entry_timeout: DEFAULT_METADATA_TIMEOUT,
            attr_timeout: DEFAULT_METADATA_TIMEOUT,
        })
    }

    fn forget(&self, _ctx: Context, inode: Self::Inode, count: u64) {
        let mut inode_table = self.inode_table.write().unwrap();
        let delete_now = handle_inode_mut_locked(
            &mut inode_table,
            &inode,
            |InodeState { handle_ref_count, unlinked, .. }| {
                let current = handle_ref_count.get_mut();
                if count > *current {
                    error!(
                        "Trying to decrease refcount of inode {} by {} (> current {})",
                        inode, count, *current
                    );
                    panic!(); // log to logcat with error!
                }
                *current -= count;
                Ok(*unlinked && *current == 0)
            },
        );

        match delete_now {
            Ok(true) => {
                let _ignored = inode_table.remove(&inode).expect("Removed an existing entry");
            }
            Ok(false) => { /* Let the inode stay */ }
            Err(e) => {
                warn!(
                    "Unexpected failure when tries to forget an inode {} by refcount {}: {:?}",
                    inode, count, e
                );
            }
        }
    }

    fn getattr(
        &self,
        _ctx: Context,
        inode: Inode,
        _handle: Option<Handle>,
    ) -> io::Result<(libc::stat64, Duration)> {
        self.handle_inode(&inode, |config| {
            Ok((
                match config {
                    AuthFsEntry::ReadonlyDirectory { dir } => {
                        create_dir_stat(inode, dir.number_of_entries(), AccessMode::ReadOnly)
                    }
                    AuthFsEntry::UnverifiedReadonly { file_size, .. } => {
                        create_stat(inode, *file_size, AccessMode::ReadOnly)
                    }
                    AuthFsEntry::VerifiedReadonly { reader } => {
                        create_stat(inode, reader.file_size()?, AccessMode::ReadOnly)
                    }
                    AuthFsEntry::VerifiedNew { editor, attr, .. } => {
                        create_stat(inode, editor.size(), AccessMode::Variable(attr.mode()))
                    }
                    AuthFsEntry::VerifiedNewDirectory { dir, attr } => create_dir_stat(
                        inode,
                        dir.number_of_entries(),
                        AccessMode::Variable(attr.mode()),
                    ),
                }?,
                DEFAULT_METADATA_TIMEOUT,
            ))
        })
    }

    fn open(
        &self,
        _ctx: Context,
        inode: Self::Inode,
        flags: u32,
    ) -> io::Result<(Option<Self::Handle>, FuseOpenOptions)> {
        // Since file handle is not really used in later operations (which use Inode directly),
        // return None as the handle.
        self.handle_inode(&inode, |config| {
            match config {
                AuthFsEntry::VerifiedReadonly { .. } | AuthFsEntry::UnverifiedReadonly { .. } => {
                    check_access_mode(flags, libc::O_RDONLY)?;
                }
                AuthFsEntry::VerifiedNew { .. } => {
                    // TODO(victorhsieh): Imeplement ACL check using the attr and ctx. Always allow
                    // for now.
                }
                AuthFsEntry::ReadonlyDirectory { .. }
                | AuthFsEntry::VerifiedNewDirectory { .. } => {
                    // TODO(victorhsieh): implement when needed.
                    return Err(io::Error::from_raw_os_error(libc::ENOSYS));
                }
            }
            // Always cache the file content. There is currently no need to support direct I/O or
            // avoid the cache buffer. Memory mapping is only possible with cache enabled.
            Ok((None, FuseOpenOptions::KEEP_CACHE))
        })
    }

    fn create(
        &self,
        _ctx: Context,
        parent: Self::Inode,
        name: &CStr,
        mode: u32,
        _flags: u32,
        umask: u32,
    ) -> io::Result<(Entry, Option<Self::Handle>, FuseOpenOptions)> {
        let new_inode = self.create_new_entry_with_ref_count(
            parent,
            name,
            |parent_entry, basename, new_inode| match parent_entry {
                AuthFsEntry::VerifiedNewDirectory { dir, .. } => {
                    if dir.has_entry(basename) {
                        return Err(io::Error::from_raw_os_error(libc::EEXIST));
                    }
                    let mode = mode & !umask;
                    let (new_file, new_attr) = dir.create_file(basename, new_inode, mode)?;
                    Ok(AuthFsEntry::VerifiedNew { editor: new_file, attr: new_attr })
                }
                _ => Err(io::Error::from_raw_os_error(libc::EBADF)),
            },
        )?;

        Ok((
            Entry {
                inode: new_inode,
                generation: 0,
                attr: create_stat(new_inode, /* file_size */ 0, AccessMode::Variable(mode))?,
                entry_timeout: DEFAULT_METADATA_TIMEOUT,
                attr_timeout: DEFAULT_METADATA_TIMEOUT,
            },
            // See also `open`.
            /* handle */ None,
            FuseOpenOptions::KEEP_CACHE,
        ))
    }

    fn read<W: io::Write + ZeroCopyWriter>(
        &self,
        _ctx: Context,
        inode: Inode,
        _handle: Handle,
        w: W,
        size: u32,
        offset: u64,
        _lock_owner: Option<u64>,
        _flags: u32,
    ) -> io::Result<usize> {
        self.handle_inode(&inode, |config| {
            match config {
                AuthFsEntry::VerifiedReadonly { reader } => {
                    read_chunks(w, reader, reader.file_size()?, offset, size)
                }
                AuthFsEntry::UnverifiedReadonly { reader, file_size } => {
                    read_chunks(w, reader, *file_size, offset, size)
                }
                AuthFsEntry::VerifiedNew { editor, .. } => {
                    // Note that with FsOptions::WRITEBACK_CACHE, it's possible for the kernel to
                    // request a read even if the file is open with O_WRONLY.
                    read_chunks(w, editor, editor.size(), offset, size)
                }
                AuthFsEntry::ReadonlyDirectory { .. }
                | AuthFsEntry::VerifiedNewDirectory { .. } => {
                    Err(io::Error::from_raw_os_error(libc::EISDIR))
                }
            }
        })
    }

    fn write<R: io::Read + ZeroCopyReader>(
        &self,
        _ctx: Context,
        inode: Self::Inode,
        _handle: Self::Handle,
        mut r: R,
        size: u32,
        offset: u64,
        _lock_owner: Option<u64>,
        _delayed_write: bool,
        _flags: u32,
    ) -> io::Result<usize> {
        self.handle_inode(&inode, |config| match config {
            AuthFsEntry::VerifiedNew { editor, .. } => {
                let mut buf = vec![0; size as usize];
                r.read_exact(&mut buf)?;
                editor.write_at(&buf, offset)
            }
            AuthFsEntry::VerifiedReadonly { .. } | AuthFsEntry::UnverifiedReadonly { .. } => {
                Err(io::Error::from_raw_os_error(libc::EPERM))
            }
            AuthFsEntry::ReadonlyDirectory { .. } | AuthFsEntry::VerifiedNewDirectory { .. } => {
                Err(io::Error::from_raw_os_error(libc::EISDIR))
            }
        })
    }

    fn setattr(
        &self,
        _ctx: Context,
        inode: Inode,
        in_attr: libc::stat64,
        _handle: Option<Handle>,
        valid: SetattrValid,
    ) -> io::Result<(libc::stat64, Duration)> {
        let mut inode_table = self.inode_table.write().unwrap();
        handle_inode_mut_locked(&mut inode_table, &inode, |InodeState { entry, .. }| match entry {
            AuthFsEntry::VerifiedNew { editor, attr } => {
                check_unsupported_setattr_request(valid)?;

                // Initialize the default stat.
                let mut new_attr =
                    create_stat(inode, editor.size(), AccessMode::Variable(attr.mode()))?;
                // `valid` indicates what fields in `attr` are valid. Update to return correctly.
                if valid.contains(SetattrValid::SIZE) {
                    // st_size is i64, but the cast should be safe since kernel should not give a
                    // negative size.
                    debug_assert!(in_attr.st_size >= 0);
                    new_attr.st_size = in_attr.st_size;
                    editor.resize(in_attr.st_size as u64)?;
                }
                if valid.contains(SetattrValid::MODE) {
                    attr.set_mode(in_attr.st_mode)?;
                    new_attr.st_mode = in_attr.st_mode;
                }
                Ok((new_attr, DEFAULT_METADATA_TIMEOUT))
            }
            AuthFsEntry::VerifiedNewDirectory { dir, attr } => {
                check_unsupported_setattr_request(valid)?;
                if valid.contains(SetattrValid::SIZE) {
                    return Err(io::Error::from_raw_os_error(libc::EISDIR));
                }

                // Initialize the default stat.
                let mut new_attr = create_dir_stat(
                    inode,
                    dir.number_of_entries(),
                    AccessMode::Variable(attr.mode()),
                )?;
                if valid.contains(SetattrValid::MODE) {
                    attr.set_mode(in_attr.st_mode)?;
                    new_attr.st_mode = in_attr.st_mode;
                }
                Ok((new_attr, DEFAULT_METADATA_TIMEOUT))
            }
            _ => Err(io::Error::from_raw_os_error(libc::EPERM)),
        })
    }

    fn getxattr(
        &self,
        _ctx: Context,
        inode: Self::Inode,
        name: &CStr,
        size: u32,
    ) -> io::Result<GetxattrReply> {
        self.handle_inode(&inode, |config| {
            match config {
                AuthFsEntry::VerifiedNew { editor, .. } => {
                    // FUSE ioctl is limited, thus we can't implement fs-verity ioctls without a kernel
                    // change (see b/196635431). Until it's possible, use xattr to expose what we need
                    // as an authfs specific API.
                    if name != CStr::from_bytes_with_nul(b"authfs.fsverity.digest\0").unwrap() {
                        return Err(io::Error::from_raw_os_error(libc::ENODATA));
                    }

                    if size == 0 {
                        // Per protocol, when size is 0, return the value size.
                        Ok(GetxattrReply::Count(editor.get_fsverity_digest_size() as u32))
                    } else {
                        let digest = editor.calculate_fsverity_digest()?;
                        if digest.len() > size as usize {
                            Err(io::Error::from_raw_os_error(libc::ERANGE))
                        } else {
                            Ok(GetxattrReply::Value(digest.to_vec()))
                        }
                    }
                }
                _ => Err(io::Error::from_raw_os_error(libc::ENODATA)),
            }
        })
    }

    fn mkdir(
        &self,
        _ctx: Context,
        parent: Self::Inode,
        name: &CStr,
        mode: u32,
        umask: u32,
    ) -> io::Result<Entry> {
        let new_inode = self.create_new_entry_with_ref_count(
            parent,
            name,
            |parent_entry, basename, new_inode| match parent_entry {
                AuthFsEntry::VerifiedNewDirectory { dir, .. } => {
                    if dir.has_entry(basename) {
                        return Err(io::Error::from_raw_os_error(libc::EEXIST));
                    }
                    let mode = mode & !umask;
                    let (new_dir, new_attr) = dir.mkdir(basename, new_inode, mode)?;
                    Ok(AuthFsEntry::VerifiedNewDirectory { dir: new_dir, attr: new_attr })
                }
                AuthFsEntry::ReadonlyDirectory { .. } => {
                    Err(io::Error::from_raw_os_error(libc::EACCES))
                }
                _ => Err(io::Error::from_raw_os_error(libc::EBADF)),
            },
        )?;

        Ok(Entry {
            inode: new_inode,
            generation: 0,
            attr: create_dir_stat(new_inode, /* file_number */ 0, AccessMode::Variable(mode))?,
            entry_timeout: DEFAULT_METADATA_TIMEOUT,
            attr_timeout: DEFAULT_METADATA_TIMEOUT,
        })
    }

    fn unlink(&self, _ctx: Context, parent: Self::Inode, name: &CStr) -> io::Result<()> {
        let mut inode_table = self.inode_table.write().unwrap();
        handle_inode_mut_locked(
            &mut inode_table,
            &parent,
            |InodeState { entry, unlinked, .. }| match entry {
                AuthFsEntry::VerifiedNewDirectory { dir, .. } => {
                    let basename: &Path = cstr_to_path(name);
                    // Delete the file from in both the local and remote directories.
                    let _inode = dir.delete_file(basename)?;
                    *unlinked = true;
                    Ok(())
                }
                AuthFsEntry::ReadonlyDirectory { .. } => {
                    Err(io::Error::from_raw_os_error(libc::EACCES))
                }
                AuthFsEntry::VerifiedNew { .. } => {
                    // Deleting a entry in filesystem root is not currently supported.
                    Err(io::Error::from_raw_os_error(libc::ENOSYS))
                }
                AuthFsEntry::UnverifiedReadonly { .. } | AuthFsEntry::VerifiedReadonly { .. } => {
                    Err(io::Error::from_raw_os_error(libc::ENOTDIR))
                }
            },
        )
    }

    fn rmdir(&self, _ctx: Context, parent: Self::Inode, name: &CStr) -> io::Result<()> {
        let mut inode_table = self.inode_table.write().unwrap();

        // Check before actual removal, with readonly borrow.
        handle_inode_locked(&inode_table, &parent, |inode_state| match &inode_state.entry {
            AuthFsEntry::VerifiedNewDirectory { dir, .. } => {
                let basename: &Path = cstr_to_path(name);
                let existing_inode = dir.find_inode(basename)?;
                handle_inode_locked(&inode_table, &existing_inode, |inode_state| {
                    inode_state.entry.expect_empty_deletable_directory()
                })
            }
            AuthFsEntry::ReadonlyDirectory { .. } => {
                Err(io::Error::from_raw_os_error(libc::EACCES))
            }
            _ => Err(io::Error::from_raw_os_error(libc::ENOTDIR)),
        })?;

        // Look up again, this time with mutable borrow. This needs to be done separately because
        // the previous lookup needs to borrow multiple entry references in the table.
        handle_inode_mut_locked(
            &mut inode_table,
            &parent,
            |InodeState { entry, unlinked, .. }| match entry {
                AuthFsEntry::VerifiedNewDirectory { dir, .. } => {
                    let basename: &Path = cstr_to_path(name);
                    let _inode = dir.force_delete_directory(basename)?;
                    *unlinked = true;
                    Ok(())
                }
                _ => unreachable!("Mismatched entry type that is just checked"),
            },
        )
    }

    fn opendir(
        &self,
        _ctx: Context,
        inode: Self::Inode,
        _flags: u32,
    ) -> io::Result<(Option<Self::Handle>, FuseOpenOptions)> {
        let entries = self.handle_inode(&inode, |config| match config {
            AuthFsEntry::VerifiedNewDirectory { dir, .. } => dir.retrieve_entries(),
            AuthFsEntry::ReadonlyDirectory { dir } => dir.retrieve_entries(),
            _ => Err(io::Error::from_raw_os_error(libc::ENOTDIR)),
        })?;
        self.open_dir_store_snapshot(entries)
    }

    fn readdir(
        &self,
        _ctx: Context,
        _inode: Self::Inode,
        handle: Self::Handle,
        _size: u32,
        offset: u64,
    ) -> io::Result<Self::DirIter> {
        let dir_handle_table = self.dir_handle_table.read().unwrap();
        if let Some(entry) = dir_handle_table.get(&handle) {
            Ok(DirEntriesSnapshotIterator {
                snapshot: entry.clone(),
                prev_offset: offset.try_into().unwrap(),
            })
        } else {
            Err(io::Error::from_raw_os_error(libc::EBADF))
        }
    }

    fn releasedir(
        &self,
        _ctx: Context,
        inode: Self::Inode,
        _flags: u32,
        handle: Self::Handle,
    ) -> io::Result<()> {
        let mut dir_handle_table = self.dir_handle_table.write().unwrap();
        if dir_handle_table.remove(&handle).is_none() {
            unreachable!("Unknown directory handle {}, inode {}", handle, inode);
        }
        Ok(())
    }

    fn statfs(&self, _ctx: Context, _inode: Self::Inode) -> io::Result<libc::statvfs64> {
        let remote_stat = self.remote_fs_stats_reader.statfs()?;

        // Safe because we are zero-initializing a struct with only POD fields. Not all fields
        // matter to FUSE. See also:
        // https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/fs/fuse/inode.c?h=v5.15#n460
        let mut st: libc::statvfs64 = unsafe { zeroed() };

        // Use the remote stat as a template, since it'd matter the most to consider the writable
        // files/directories that are written to the remote.
        st.f_bsize = remote_stat.block_size;
        st.f_frsize = remote_stat.fragment_size;
        st.f_blocks = remote_stat.block_numbers;
        st.f_bavail = remote_stat.block_available;
        st.f_favail = remote_stat.inodes_available;
        st.f_namemax = remote_stat.max_filename;
        // Assuming we are not privileged to use all free spaces on the remote server, set the free
        // blocks/fragment to the same available amount.
        st.f_bfree = st.f_bavail;
        st.f_ffree = st.f_favail;
        // Number of inodes on the filesystem
        st.f_files = self.inode_table.read().unwrap().len() as u64;

        Ok(st)
    }
}

fn handle_inode_locked<F, R>(
    inode_table: &BTreeMap<Inode, InodeState>,
    inode: &Inode,
    handle_fn: F,
) -> io::Result<R>
where
    F: FnOnce(&InodeState) -> io::Result<R>,
{
    if let Some(inode_state) = inode_table.get(inode) {
        handle_fn(inode_state)
    } else {
        Err(io::Error::from_raw_os_error(libc::ENOENT))
    }
}

fn handle_inode_mut_locked<F, R>(
    inode_table: &mut BTreeMap<Inode, InodeState>,
    inode: &Inode,
    handle_fn: F,
) -> io::Result<R>
where
    F: FnOnce(&mut InodeState) -> io::Result<R>,
{
    if let Some(inode_state) = inode_table.get_mut(inode) {
        handle_fn(inode_state)
    } else {
        Err(io::Error::from_raw_os_error(libc::ENOENT))
    }
}

fn check_unsupported_setattr_request(valid: SetattrValid) -> io::Result<()> {
    if valid.contains(SetattrValid::UID) {
        warn!("Changing st_uid is not currently supported");
        return Err(io::Error::from_raw_os_error(libc::ENOSYS));
    }
    if valid.contains(SetattrValid::GID) {
        warn!("Changing st_gid is not currently supported");
        return Err(io::Error::from_raw_os_error(libc::ENOSYS));
    }
    if valid.intersects(
        SetattrValid::CTIME
            | SetattrValid::ATIME
            | SetattrValid::ATIME_NOW
            | SetattrValid::MTIME
            | SetattrValid::MTIME_NOW,
    ) {
        trace!("Ignoring ctime/atime/mtime change as authfs does not maintain timestamp currently");
    }
    Ok(())
}

fn cstr_to_path(cstr: &CStr) -> &Path {
    OsStr::from_bytes(cstr.to_bytes()).as_ref()
}
