Re-organize authfs directories
* authfs/fd_server -> android/fd_server
* authfs/service -> guest/authfs_service
* authfs -> guest/authfs
* authfs/aidl -> libs/authfs_aidl_interface
* authfs/tests -> tests/authfs
* authfs/testdata -> tests/authfs/testdata
Bug: 352458998
Test: pass TH
Change-Id: I5962d2fafc9f05b240068740ee1b6369406eb1f5
diff --git a/guest/authfs/src/fusefs.rs b/guest/authfs/src/fusefs.rs
new file mode 100644
index 0000000..618b8ac
--- /dev/null
+++ b/guest/authfs/src/fusefs.rs
@@ -0,0 +1,1075 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+mod file;
+mod mount;
+
+use anyhow::{anyhow, bail, Result};
+use fuse::filesystem::{
+ Context, DirEntry, DirectoryIterator, Entry, FileSystem, FsOptions, GetxattrReply,
+ SetattrValid, ZeroCopyReader, ZeroCopyWriter,
+};
+use fuse::sys::OpenOptions as FuseOpenOptions;
+use log::{error, trace, warn};
+use std::collections::{btree_map, BTreeMap};
+use std::convert::{TryFrom, TryInto};
+use std::ffi::{CStr, CString, OsStr};
+use std::io;
+use std::mem::{zeroed, MaybeUninit};
+use std::option::Option;
+use std::os::unix::ffi::OsStrExt;
+use std::path::{Component, Path, PathBuf};
+use std::sync::atomic::{AtomicU64, Ordering};
+use std::sync::{Arc, RwLock};
+use std::time::Duration;
+
+use crate::common::{divide_roundup, ChunkedSizeIter, CHUNK_SIZE};
+use crate::file::{
+ validate_basename, Attr, InMemoryDir, RandomWrite, ReadByChunk, RemoteDirEditor,
+ RemoteFileEditor, RemoteFileReader,
+};
+use crate::fsstat::RemoteFsStatsReader;
+use crate::fsverity::VerifiedFileEditor;
+
+pub use self::file::LazyVerifiedReadonlyFile;
+pub use self::mount::mount_and_enter_message_loop;
+use self::mount::MAX_WRITE_BYTES;
+
+pub type Inode = u64;
+type Handle = u64;
+
+/// Maximum time for a file's metadata to be cached by the kernel. Since any file and directory
+/// changes (if not read-only) has to go through AuthFS to be trusted, the timeout can be maximum.
+const DEFAULT_METADATA_TIMEOUT: Duration = Duration::MAX;
+
+const ROOT_INODE: Inode = 1;
+
+/// `AuthFsEntry` defines the filesystem entry type supported by AuthFS.
+pub enum AuthFsEntry {
+ /// A read-only directory (writable during initialization). Root directory is an example.
+ ReadonlyDirectory { dir: InMemoryDir },
+ /// A file type that is verified against fs-verity signature (thus read-only). The file is
+ /// served from a remote server.
+ VerifiedReadonly { reader: LazyVerifiedReadonlyFile },
+ /// A file type that is a read-only passthrough from a file on a remote server.
+ UnverifiedReadonly { reader: RemoteFileReader, file_size: u64 },
+ /// A file type that is initially empty, and the content is stored on a remote server. File
+ /// integrity is guaranteed with private Merkle tree.
+ VerifiedNew { editor: VerifiedFileEditor<RemoteFileEditor>, attr: Attr },
+ /// A directory type that is initially empty. One can create new file (`VerifiedNew`) and new
+ /// directory (`VerifiedNewDirectory` itself) with integrity guaranteed within the VM.
+ VerifiedNewDirectory { dir: RemoteDirEditor, attr: Attr },
+}
+
+impl AuthFsEntry {
+ fn expect_empty_deletable_directory(&self) -> io::Result<()> {
+ match self {
+ AuthFsEntry::VerifiedNewDirectory { dir, .. } => {
+ if dir.number_of_entries() == 0 {
+ Ok(())
+ } else {
+ Err(io::Error::from_raw_os_error(libc::ENOTEMPTY))
+ }
+ }
+ AuthFsEntry::ReadonlyDirectory { .. } => {
+ Err(io::Error::from_raw_os_error(libc::EACCES))
+ }
+ _ => Err(io::Error::from_raw_os_error(libc::ENOTDIR)),
+ }
+ }
+}
+
+struct InodeState {
+ /// Actual inode entry.
+ entry: AuthFsEntry,
+
+ /// Number of `Handle`s (i.e. file descriptors) that are currently referring to the this inode.
+ ///
+ /// Technically, this does not matter to readonly entries, since they live forever. The
+ /// reference count is only needed for manageing lifetime of writable entries like
+ /// `VerifiedNew` and `VerifiedNewDirectory`. That is, when an entry is deleted, the actual
+ /// entry needs to stay alive until the reference count reaches zero.
+ ///
+ /// Note: This is not to be confused with hardlinks, which AuthFS doesn't currently implement.
+ handle_ref_count: AtomicU64,
+
+ /// Whether the inode is already unlinked, i.e. should be removed, once `handle_ref_count` is
+ /// down to zero.
+ unlinked: bool,
+}
+
+impl InodeState {
+ fn new(entry: AuthFsEntry) -> Self {
+ InodeState { entry, handle_ref_count: AtomicU64::new(0), unlinked: false }
+ }
+
+ fn new_with_ref_count(entry: AuthFsEntry, handle_ref_count: u64) -> Self {
+ InodeState { entry, handle_ref_count: AtomicU64::new(handle_ref_count), unlinked: false }
+ }
+}
+
+/// Data type that a directory implementation should be able to present its entry to `AuthFs`.
+#[derive(Clone)]
+pub struct AuthFsDirEntry {
+ pub inode: Inode,
+ pub name: CString,
+ pub is_dir: bool,
+}
+
+/// A snapshot of a directory entries for supporting `readdir` operation.
+///
+/// The `readdir` implementation is required by FUSE to not return any entries that have been
+/// returned previously (while it's fine to not return new entries). Snapshot is the easiest way to
+/// be compliant. See `fuse::filesystem::readdir` for more details.
+///
+/// A `DirEntriesSnapshot` is created on `opendir`, and is associated with the returned
+/// `Handle`/FD. The snapshot is deleted when the handle is released in `releasedir`.
+type DirEntriesSnapshot = Vec<AuthFsDirEntry>;
+
+/// An iterator for reading from `DirEntriesSnapshot`.
+pub struct DirEntriesSnapshotIterator {
+ /// A reference to the `DirEntriesSnapshot` in `AuthFs`.
+ snapshot: Arc<DirEntriesSnapshot>,
+
+ /// A value determined by `Self` to identify the last entry. 0 is a reserved value by FUSE to
+ /// mean reading from the beginning.
+ prev_offset: usize,
+}
+
+impl DirectoryIterator for DirEntriesSnapshotIterator {
+ fn next(&mut self) -> Option<DirEntry> {
+ // This iterator should not be the only reference to the snapshot. The snapshot should
+ // still be hold in `dir_handle_table`, i.e. when the FD is not yet closed.
+ //
+ // This code is unreachable when `readdir` is called with a closed FD. Only when the FD is
+ // not yet closed, `DirEntriesSnapshotIterator` can be created (but still short-lived
+ // during `readdir`).
+ debug_assert!(Arc::strong_count(&self.snapshot) >= 2);
+
+ // Since 0 is reserved, let's use 1-based index for the offset. This allows us to
+ // resume from the previous read in the snapshot easily.
+ let current_offset = if self.prev_offset == 0 {
+ 1 // first element in the vector
+ } else {
+ self.prev_offset + 1 // next element in the vector
+ };
+ if current_offset > self.snapshot.len() {
+ None
+ } else {
+ let AuthFsDirEntry { inode, name, is_dir } = &self.snapshot[current_offset - 1];
+ let entry = DirEntry {
+ offset: current_offset as u64,
+ ino: *inode,
+ name,
+ type_: if *is_dir { libc::DT_DIR.into() } else { libc::DT_REG.into() },
+ };
+ self.prev_offset = current_offset;
+ Some(entry)
+ }
+ }
+}
+
+type DirHandleTable = BTreeMap<Handle, Arc<DirEntriesSnapshot>>;
+
+// AuthFS needs to be `Sync` to be used with the `fuse` crate.
+pub struct AuthFs {
+ /// Table for `Inode` to `InodeState` lookup.
+ inode_table: RwLock<BTreeMap<Inode, InodeState>>,
+
+ /// The next available inode number.
+ next_inode: AtomicU64,
+
+ /// Table for `Handle` to `Arc<DirEntriesSnapshot>` lookup. On `opendir`, a new directory
+ /// handle is created and the snapshot of the current directory is created. This is not
+ /// super efficient, but is the simplest way to be compliant to the FUSE contract (see
+ /// `fuse::filesystem::readdir`).
+ ///
+ /// Currently, no code locks `dir_handle_table` and `inode_table` at the same time to avoid
+ /// deadlock.
+ dir_handle_table: RwLock<DirHandleTable>,
+
+ /// The next available handle number.
+ next_handle: AtomicU64,
+
+ /// A reader to access the remote filesystem stats, which is supposed to be of "the" output
+ /// directory. We assume all output are stored in the same partition.
+ remote_fs_stats_reader: RemoteFsStatsReader,
+}
+
+// Implementation for preparing an `AuthFs` instance, before starting to serve.
+// TODO(victorhsieh): Consider implement a builder to separate the mutable initialization from the
+// immutable / interiorly mutable serving phase.
+impl AuthFs {
+ pub fn new(remote_fs_stats_reader: RemoteFsStatsReader) -> AuthFs {
+ let mut inode_table = BTreeMap::new();
+ inode_table.insert(
+ ROOT_INODE,
+ InodeState::new(AuthFsEntry::ReadonlyDirectory { dir: InMemoryDir::new() }),
+ );
+
+ AuthFs {
+ inode_table: RwLock::new(inode_table),
+ next_inode: AtomicU64::new(ROOT_INODE + 1),
+ dir_handle_table: RwLock::new(BTreeMap::new()),
+ next_handle: AtomicU64::new(1),
+ remote_fs_stats_reader,
+ }
+ }
+
+ /// Add an `AuthFsEntry` as `basename` to the filesystem root.
+ pub fn add_entry_at_root_dir(
+ &mut self,
+ basename: PathBuf,
+ entry: AuthFsEntry,
+ ) -> Result<Inode> {
+ validate_basename(&basename)?;
+ self.add_entry_at_ro_dir_by_path(ROOT_INODE, &basename, entry)
+ }
+
+ /// Add an `AuthFsEntry` by path from the `ReadonlyDirectory` represented by `dir_inode`. The
+ /// path must be a related path. If some ancestor directories do not exist, they will be
+ /// created (also as `ReadonlyDirectory`) automatically.
+ pub fn add_entry_at_ro_dir_by_path(
+ &mut self,
+ dir_inode: Inode,
+ path: &Path,
+ entry: AuthFsEntry,
+ ) -> Result<Inode> {
+ // 1. Make sure the parent directories all exist. Derive the entry's parent inode.
+ let parent_path =
+ path.parent().ok_or_else(|| anyhow!("No parent directory: {:?}", path))?;
+ let parent_inode =
+ parent_path.components().try_fold(dir_inode, |current_dir_inode, path_component| {
+ match path_component {
+ Component::RootDir => bail!("Absolute path is not supported"),
+ Component::Normal(name) => {
+ let inode_table = self.inode_table.get_mut().unwrap();
+ // Locate the internal directory structure.
+ let current_dir_entry = &mut inode_table
+ .get_mut(¤t_dir_inode)
+ .ok_or_else(|| {
+ anyhow!("Unknown directory inode {}", current_dir_inode)
+ })?
+ .entry;
+ let dir = match current_dir_entry {
+ AuthFsEntry::ReadonlyDirectory { dir } => dir,
+ _ => unreachable!("Not a ReadonlyDirectory"),
+ };
+ // Return directory inode. Create first if not exists.
+ if let Some(existing_inode) = dir.lookup_inode(name.as_ref()) {
+ Ok(existing_inode)
+ } else {
+ let new_inode = self.next_inode.fetch_add(1, Ordering::Relaxed);
+ let new_dir_entry =
+ AuthFsEntry::ReadonlyDirectory { dir: InMemoryDir::new() };
+
+ // Actually update the tables.
+ dir.add_dir(name.as_ref(), new_inode)?;
+ if inode_table
+ .insert(new_inode, InodeState::new(new_dir_entry))
+ .is_some()
+ {
+ bail!("Unexpected to find a duplicated inode");
+ }
+ Ok(new_inode)
+ }
+ }
+ _ => Err(anyhow!("Path is not canonical: {:?}", path)),
+ }
+ })?;
+
+ // 2. Insert the entry to the parent directory, as well as the inode table.
+ let inode_table = self.inode_table.get_mut().unwrap();
+ let inode_state = inode_table.get_mut(&parent_inode).expect("previously returned inode");
+ match &mut inode_state.entry {
+ AuthFsEntry::ReadonlyDirectory { dir } => {
+ let basename =
+ path.file_name().ok_or_else(|| anyhow!("Bad file name: {:?}", path))?;
+ let new_inode = self.next_inode.fetch_add(1, Ordering::Relaxed);
+
+ // Actually update the tables.
+ dir.add_file(basename.as_ref(), new_inode)?;
+ if inode_table.insert(new_inode, InodeState::new(entry)).is_some() {
+ bail!("Unexpected to find a duplicated inode");
+ }
+ Ok(new_inode)
+ }
+ _ => unreachable!("Not a ReadonlyDirectory"),
+ }
+ }
+}
+
+// Implementation for serving requests.
+impl AuthFs {
+ /// Handles the file associated with `inode` if found. This function returns whatever
+ /// `handle_fn` returns.
+ fn handle_inode<F, R>(&self, inode: &Inode, handle_fn: F) -> io::Result<R>
+ where
+ F: FnOnce(&AuthFsEntry) -> io::Result<R>,
+ {
+ let inode_table = self.inode_table.read().unwrap();
+ handle_inode_locked(&inode_table, inode, |inode_state| handle_fn(&inode_state.entry))
+ }
+
+ /// Adds a new entry `name` created by `create_fn` at `parent_inode`, with an initial ref count
+ /// of one.
+ ///
+ /// The operation involves two updates: adding the name with a new allocated inode to the
+ /// parent directory, and insert the new inode and the actual `AuthFsEntry` to the global inode
+ /// table.
+ ///
+ /// `create_fn` receives the parent directory, through which it can create the new entry at and
+ /// register the new inode to. Its returned entry is then added to the inode table.
+ fn create_new_entry_with_ref_count<F>(
+ &self,
+ parent_inode: Inode,
+ name: &CStr,
+ create_fn: F,
+ ) -> io::Result<Inode>
+ where
+ F: FnOnce(&mut AuthFsEntry, &Path, Inode) -> io::Result<AuthFsEntry>,
+ {
+ let mut inode_table = self.inode_table.write().unwrap();
+ let (new_inode, new_file_entry) = handle_inode_mut_locked(
+ &mut inode_table,
+ &parent_inode,
+ |InodeState { entry, .. }| {
+ let new_inode = self.next_inode.fetch_add(1, Ordering::Relaxed);
+ let basename: &Path = cstr_to_path(name);
+ let new_file_entry = create_fn(entry, basename, new_inode)?;
+ Ok((new_inode, new_file_entry))
+ },
+ )?;
+
+ if let btree_map::Entry::Vacant(entry) = inode_table.entry(new_inode) {
+ entry.insert(InodeState::new_with_ref_count(new_file_entry, 1));
+ Ok(new_inode)
+ } else {
+ unreachable!("Unexpected duplication of inode {}", new_inode);
+ }
+ }
+
+ fn open_dir_store_snapshot(
+ &self,
+ dir_entries: Vec<AuthFsDirEntry>,
+ ) -> io::Result<(Option<Handle>, FuseOpenOptions)> {
+ let handle = self.next_handle.fetch_add(1, Ordering::Relaxed);
+ let mut dir_handle_table = self.dir_handle_table.write().unwrap();
+ if let btree_map::Entry::Vacant(value) = dir_handle_table.entry(handle) {
+ value.insert(Arc::new(dir_entries));
+ Ok((Some(handle), FuseOpenOptions::empty()))
+ } else {
+ unreachable!("Unexpected to see new handle {} to existing in the table", handle);
+ }
+ }
+}
+
+fn check_access_mode(flags: u32, mode: libc::c_int) -> io::Result<()> {
+ if (flags & libc::O_ACCMODE as u32) == mode as u32 {
+ Ok(())
+ } else {
+ Err(io::Error::from_raw_os_error(libc::EACCES))
+ }
+}
+
+cfg_if::cfg_if! {
+ if #[cfg(all(any(target_arch = "aarch64", target_arch = "riscv64"),
+ target_pointer_width = "64"))] {
+ fn blk_size() -> libc::c_int { CHUNK_SIZE as libc::c_int }
+ } else {
+ fn blk_size() -> libc::c_long { CHUNK_SIZE as libc::c_long }
+ }
+}
+
+#[allow(clippy::enum_variant_names)]
+enum AccessMode {
+ ReadOnly,
+ Variable(u32),
+}
+
+fn create_stat(
+ ino: libc::ino_t,
+ file_size: u64,
+ access_mode: AccessMode,
+) -> io::Result<libc::stat64> {
+ // SAFETY: stat64 is a plan C struct without pointer.
+ let mut st = unsafe { MaybeUninit::<libc::stat64>::zeroed().assume_init() };
+
+ st.st_ino = ino;
+ st.st_mode = match access_mode {
+ AccessMode::ReadOnly => {
+ // Until needed, let's just grant the owner access.
+ libc::S_IFREG | libc::S_IRUSR
+ }
+ AccessMode::Variable(mode) => libc::S_IFREG | mode,
+ };
+ st.st_nlink = 1;
+ st.st_uid = 0;
+ st.st_gid = 0;
+ st.st_size = libc::off64_t::try_from(file_size)
+ .map_err(|_| io::Error::from_raw_os_error(libc::EFBIG))?;
+ st.st_blksize = blk_size();
+ // Per man stat(2), st_blocks is "Number of 512B blocks allocated".
+ st.st_blocks = libc::c_longlong::try_from(divide_roundup(file_size, 512))
+ .map_err(|_| io::Error::from_raw_os_error(libc::EFBIG))?;
+ Ok(st)
+}
+
+fn create_dir_stat(
+ ino: libc::ino_t,
+ file_number: u16,
+ access_mode: AccessMode,
+) -> io::Result<libc::stat64> {
+ // SAFETY: stat64 is a plan C struct without pointer.
+ let mut st = unsafe { MaybeUninit::<libc::stat64>::zeroed().assume_init() };
+
+ st.st_ino = ino;
+ st.st_mode = match access_mode {
+ AccessMode::ReadOnly => {
+ // Until needed, let's just grant the owner access and search to group and others.
+ libc::S_IFDIR | libc::S_IXUSR | libc::S_IRUSR | libc::S_IXGRP | libc::S_IXOTH
+ }
+ AccessMode::Variable(mode) => libc::S_IFDIR | mode,
+ };
+
+ // 2 extra for . and ..
+ st.st_nlink = file_number
+ .checked_add(2)
+ .ok_or_else(|| io::Error::from_raw_os_error(libc::EOVERFLOW))?
+ .into();
+
+ st.st_uid = 0;
+ st.st_gid = 0;
+ Ok(st)
+}
+
+fn offset_to_chunk_index(offset: u64) -> u64 {
+ offset / CHUNK_SIZE
+}
+
+fn read_chunks<W: io::Write, T: ReadByChunk>(
+ mut w: W,
+ file: &T,
+ file_size: u64,
+ offset: u64,
+ size: u32,
+) -> io::Result<usize> {
+ let remaining = file_size.saturating_sub(offset);
+ let size_to_read = std::cmp::min(size as usize, remaining as usize);
+ let total = ChunkedSizeIter::new(size_to_read, offset, CHUNK_SIZE as usize).try_fold(
+ 0,
+ |total, (current_offset, planned_data_size)| {
+ // TODO(victorhsieh): There might be a non-trivial way to avoid this copy. For example,
+ // instead of accepting a buffer, the writer could expose the final destination buffer
+ // for the reader to write to. It might not be generally applicable though, e.g. with
+ // virtio transport, the buffer may not be continuous.
+ let mut buf = [0u8; CHUNK_SIZE as usize];
+ let read_size = file.read_chunk(offset_to_chunk_index(current_offset), &mut buf)?;
+ if read_size < planned_data_size {
+ return Err(io::Error::from_raw_os_error(libc::ENODATA));
+ }
+
+ let begin = (current_offset % CHUNK_SIZE) as usize;
+ let end = begin + planned_data_size;
+ let s = w.write(&buf[begin..end])?;
+ if s != planned_data_size {
+ return Err(io::Error::from_raw_os_error(libc::EIO));
+ }
+ Ok(total + s)
+ },
+ )?;
+
+ Ok(total)
+}
+
+impl FileSystem for AuthFs {
+ type Inode = Inode;
+ type Handle = Handle;
+ type DirIter = DirEntriesSnapshotIterator;
+
+ fn max_buffer_size(&self) -> u32 {
+ MAX_WRITE_BYTES
+ }
+
+ fn init(&self, _capable: FsOptions) -> io::Result<FsOptions> {
+ // Enable writeback cache for better performance especially since our bandwidth to the
+ // backend service is limited.
+ Ok(FsOptions::WRITEBACK_CACHE)
+ }
+
+ fn lookup(&self, _ctx: Context, parent: Inode, name: &CStr) -> io::Result<Entry> {
+ let inode_table = self.inode_table.read().unwrap();
+
+ // Look up the entry's inode number in parent directory.
+ let inode =
+ handle_inode_locked(&inode_table, &parent, |inode_state| match &inode_state.entry {
+ AuthFsEntry::ReadonlyDirectory { dir } => {
+ let path = cstr_to_path(name);
+ dir.lookup_inode(path).ok_or_else(|| io::Error::from_raw_os_error(libc::ENOENT))
+ }
+ AuthFsEntry::VerifiedNewDirectory { dir, .. } => {
+ let path = cstr_to_path(name);
+ dir.find_inode(path)
+ }
+ _ => Err(io::Error::from_raw_os_error(libc::ENOTDIR)),
+ })?;
+
+ // Create the entry's stat if found.
+ let st = handle_inode_locked(
+ &inode_table,
+ &inode,
+ |InodeState { entry, handle_ref_count, .. }| {
+ let st = match entry {
+ AuthFsEntry::ReadonlyDirectory { dir } => {
+ create_dir_stat(inode, dir.number_of_entries(), AccessMode::ReadOnly)
+ }
+ AuthFsEntry::UnverifiedReadonly { file_size, .. } => {
+ create_stat(inode, *file_size, AccessMode::ReadOnly)
+ }
+ AuthFsEntry::VerifiedReadonly { reader } => {
+ create_stat(inode, reader.file_size()?, AccessMode::ReadOnly)
+ }
+ AuthFsEntry::VerifiedNew { editor, attr, .. } => {
+ create_stat(inode, editor.size(), AccessMode::Variable(attr.mode()))
+ }
+ AuthFsEntry::VerifiedNewDirectory { dir, attr } => create_dir_stat(
+ inode,
+ dir.number_of_entries(),
+ AccessMode::Variable(attr.mode()),
+ ),
+ }?;
+ if handle_ref_count.fetch_add(1, Ordering::Relaxed) == u64::MAX {
+ panic!("Handle reference count overflow");
+ }
+ Ok(st)
+ },
+ )?;
+
+ Ok(Entry {
+ inode,
+ generation: 0,
+ attr: st,
+ entry_timeout: DEFAULT_METADATA_TIMEOUT,
+ attr_timeout: DEFAULT_METADATA_TIMEOUT,
+ })
+ }
+
+ fn forget(&self, _ctx: Context, inode: Self::Inode, count: u64) {
+ let mut inode_table = self.inode_table.write().unwrap();
+ let delete_now = handle_inode_mut_locked(
+ &mut inode_table,
+ &inode,
+ |InodeState { handle_ref_count, unlinked, .. }| {
+ let current = handle_ref_count.get_mut();
+ if count > *current {
+ error!(
+ "Trying to decrease refcount of inode {} by {} (> current {})",
+ inode, count, *current
+ );
+ panic!(); // log to logcat with error!
+ }
+ *current -= count;
+ Ok(*unlinked && *current == 0)
+ },
+ );
+
+ match delete_now {
+ Ok(true) => {
+ let _ignored = inode_table.remove(&inode).expect("Removed an existing entry");
+ }
+ Ok(false) => { /* Let the inode stay */ }
+ Err(e) => {
+ warn!(
+ "Unexpected failure when tries to forget an inode {} by refcount {}: {:?}",
+ inode, count, e
+ );
+ }
+ }
+ }
+
+ fn getattr(
+ &self,
+ _ctx: Context,
+ inode: Inode,
+ _handle: Option<Handle>,
+ ) -> io::Result<(libc::stat64, Duration)> {
+ self.handle_inode(&inode, |config| {
+ Ok((
+ match config {
+ AuthFsEntry::ReadonlyDirectory { dir } => {
+ create_dir_stat(inode, dir.number_of_entries(), AccessMode::ReadOnly)
+ }
+ AuthFsEntry::UnverifiedReadonly { file_size, .. } => {
+ create_stat(inode, *file_size, AccessMode::ReadOnly)
+ }
+ AuthFsEntry::VerifiedReadonly { reader } => {
+ create_stat(inode, reader.file_size()?, AccessMode::ReadOnly)
+ }
+ AuthFsEntry::VerifiedNew { editor, attr, .. } => {
+ create_stat(inode, editor.size(), AccessMode::Variable(attr.mode()))
+ }
+ AuthFsEntry::VerifiedNewDirectory { dir, attr } => create_dir_stat(
+ inode,
+ dir.number_of_entries(),
+ AccessMode::Variable(attr.mode()),
+ ),
+ }?,
+ DEFAULT_METADATA_TIMEOUT,
+ ))
+ })
+ }
+
+ fn open(
+ &self,
+ _ctx: Context,
+ inode: Self::Inode,
+ flags: u32,
+ ) -> io::Result<(Option<Self::Handle>, FuseOpenOptions)> {
+ // Since file handle is not really used in later operations (which use Inode directly),
+ // return None as the handle.
+ self.handle_inode(&inode, |config| {
+ match config {
+ AuthFsEntry::VerifiedReadonly { .. } | AuthFsEntry::UnverifiedReadonly { .. } => {
+ check_access_mode(flags, libc::O_RDONLY)?;
+ }
+ AuthFsEntry::VerifiedNew { .. } => {
+ // TODO(victorhsieh): Imeplement ACL check using the attr and ctx. Always allow
+ // for now.
+ }
+ AuthFsEntry::ReadonlyDirectory { .. }
+ | AuthFsEntry::VerifiedNewDirectory { .. } => {
+ // TODO(victorhsieh): implement when needed.
+ return Err(io::Error::from_raw_os_error(libc::ENOSYS));
+ }
+ }
+ // Always cache the file content. There is currently no need to support direct I/O or
+ // avoid the cache buffer. Memory mapping is only possible with cache enabled.
+ Ok((None, FuseOpenOptions::KEEP_CACHE))
+ })
+ }
+
+ fn create(
+ &self,
+ _ctx: Context,
+ parent: Self::Inode,
+ name: &CStr,
+ mode: u32,
+ _flags: u32,
+ umask: u32,
+ _security_ctx: Option<&CStr>,
+ ) -> io::Result<(Entry, Option<Self::Handle>, FuseOpenOptions)> {
+ let new_inode = self.create_new_entry_with_ref_count(
+ parent,
+ name,
+ |parent_entry, basename, new_inode| match parent_entry {
+ AuthFsEntry::VerifiedNewDirectory { dir, .. } => {
+ if dir.has_entry(basename) {
+ return Err(io::Error::from_raw_os_error(libc::EEXIST));
+ }
+ let mode = mode & !umask;
+ let (new_file, new_attr) = dir.create_file(basename, new_inode, mode)?;
+ Ok(AuthFsEntry::VerifiedNew { editor: new_file, attr: new_attr })
+ }
+ _ => Err(io::Error::from_raw_os_error(libc::EBADF)),
+ },
+ )?;
+
+ Ok((
+ Entry {
+ inode: new_inode,
+ generation: 0,
+ attr: create_stat(new_inode, /* file_size */ 0, AccessMode::Variable(mode))?,
+ entry_timeout: DEFAULT_METADATA_TIMEOUT,
+ attr_timeout: DEFAULT_METADATA_TIMEOUT,
+ },
+ // See also `open`.
+ /* handle */ None,
+ FuseOpenOptions::KEEP_CACHE,
+ ))
+ }
+
+ fn read<W: io::Write + ZeroCopyWriter>(
+ &self,
+ _ctx: Context,
+ inode: Inode,
+ _handle: Handle,
+ w: W,
+ size: u32,
+ offset: u64,
+ _lock_owner: Option<u64>,
+ _flags: u32,
+ ) -> io::Result<usize> {
+ self.handle_inode(&inode, |config| {
+ match config {
+ AuthFsEntry::VerifiedReadonly { reader } => {
+ read_chunks(w, reader, reader.file_size()?, offset, size)
+ }
+ AuthFsEntry::UnverifiedReadonly { reader, file_size } => {
+ read_chunks(w, reader, *file_size, offset, size)
+ }
+ AuthFsEntry::VerifiedNew { editor, .. } => {
+ // Note that with FsOptions::WRITEBACK_CACHE, it's possible for the kernel to
+ // request a read even if the file is open with O_WRONLY.
+ read_chunks(w, editor, editor.size(), offset, size)
+ }
+ AuthFsEntry::ReadonlyDirectory { .. }
+ | AuthFsEntry::VerifiedNewDirectory { .. } => {
+ Err(io::Error::from_raw_os_error(libc::EISDIR))
+ }
+ }
+ })
+ }
+
+ fn write<R: io::Read + ZeroCopyReader>(
+ &self,
+ _ctx: Context,
+ inode: Self::Inode,
+ _handle: Self::Handle,
+ mut r: R,
+ size: u32,
+ offset: u64,
+ _lock_owner: Option<u64>,
+ _delayed_write: bool,
+ _flags: u32,
+ ) -> io::Result<usize> {
+ self.handle_inode(&inode, |config| match config {
+ AuthFsEntry::VerifiedNew { editor, .. } => {
+ let mut buf = vec![0; size as usize];
+ r.read_exact(&mut buf)?;
+ editor.write_at(&buf, offset)
+ }
+ AuthFsEntry::VerifiedReadonly { .. } | AuthFsEntry::UnverifiedReadonly { .. } => {
+ Err(io::Error::from_raw_os_error(libc::EPERM))
+ }
+ AuthFsEntry::ReadonlyDirectory { .. } | AuthFsEntry::VerifiedNewDirectory { .. } => {
+ Err(io::Error::from_raw_os_error(libc::EISDIR))
+ }
+ })
+ }
+
+ fn setattr(
+ &self,
+ _ctx: Context,
+ inode: Inode,
+ in_attr: libc::stat64,
+ _handle: Option<Handle>,
+ valid: SetattrValid,
+ ) -> io::Result<(libc::stat64, Duration)> {
+ let mut inode_table = self.inode_table.write().unwrap();
+ handle_inode_mut_locked(&mut inode_table, &inode, |InodeState { entry, .. }| match entry {
+ AuthFsEntry::VerifiedNew { editor, attr } => {
+ check_unsupported_setattr_request(valid)?;
+
+ // Initialize the default stat.
+ let mut new_attr =
+ create_stat(inode, editor.size(), AccessMode::Variable(attr.mode()))?;
+ // `valid` indicates what fields in `attr` are valid. Update to return correctly.
+ if valid.contains(SetattrValid::SIZE) {
+ // st_size is i64, but the cast should be safe since kernel should not give a
+ // negative size.
+ debug_assert!(in_attr.st_size >= 0);
+ new_attr.st_size = in_attr.st_size;
+ editor.resize(in_attr.st_size as u64)?;
+ }
+ if valid.contains(SetattrValid::MODE) {
+ attr.set_mode(in_attr.st_mode)?;
+ new_attr.st_mode = in_attr.st_mode;
+ }
+ Ok((new_attr, DEFAULT_METADATA_TIMEOUT))
+ }
+ AuthFsEntry::VerifiedNewDirectory { dir, attr } => {
+ check_unsupported_setattr_request(valid)?;
+ if valid.contains(SetattrValid::SIZE) {
+ return Err(io::Error::from_raw_os_error(libc::EISDIR));
+ }
+
+ // Initialize the default stat.
+ let mut new_attr = create_dir_stat(
+ inode,
+ dir.number_of_entries(),
+ AccessMode::Variable(attr.mode()),
+ )?;
+ if valid.contains(SetattrValid::MODE) {
+ attr.set_mode(in_attr.st_mode)?;
+ new_attr.st_mode = in_attr.st_mode;
+ }
+ Ok((new_attr, DEFAULT_METADATA_TIMEOUT))
+ }
+ _ => Err(io::Error::from_raw_os_error(libc::EPERM)),
+ })
+ }
+
+ fn getxattr(
+ &self,
+ _ctx: Context,
+ inode: Self::Inode,
+ name: &CStr,
+ size: u32,
+ ) -> io::Result<GetxattrReply> {
+ self.handle_inode(&inode, |config| {
+ match config {
+ AuthFsEntry::VerifiedNew { editor, .. } => {
+ // FUSE ioctl is limited, thus we can't implement fs-verity ioctls without a
+ // kernel change (see b/196635431). Until it's possible, use
+ // xattr to expose what we need as an authfs specific API.
+ if name != CStr::from_bytes_with_nul(b"authfs.fsverity.digest\0").unwrap() {
+ return Err(io::Error::from_raw_os_error(libc::ENODATA));
+ }
+
+ if size == 0 {
+ // Per protocol, when size is 0, return the value size.
+ Ok(GetxattrReply::Count(editor.get_fsverity_digest_size() as u32))
+ } else {
+ let digest = editor.calculate_fsverity_digest()?;
+ if digest.len() > size as usize {
+ Err(io::Error::from_raw_os_error(libc::ERANGE))
+ } else {
+ Ok(GetxattrReply::Value(digest.to_vec()))
+ }
+ }
+ }
+ _ => Err(io::Error::from_raw_os_error(libc::ENODATA)),
+ }
+ })
+ }
+
+ fn mkdir(
+ &self,
+ _ctx: Context,
+ parent: Self::Inode,
+ name: &CStr,
+ mode: u32,
+ umask: u32,
+ _security_ctx: Option<&CStr>,
+ ) -> io::Result<Entry> {
+ let new_inode = self.create_new_entry_with_ref_count(
+ parent,
+ name,
+ |parent_entry, basename, new_inode| match parent_entry {
+ AuthFsEntry::VerifiedNewDirectory { dir, .. } => {
+ if dir.has_entry(basename) {
+ return Err(io::Error::from_raw_os_error(libc::EEXIST));
+ }
+ let mode = mode & !umask;
+ let (new_dir, new_attr) = dir.mkdir(basename, new_inode, mode)?;
+ Ok(AuthFsEntry::VerifiedNewDirectory { dir: new_dir, attr: new_attr })
+ }
+ AuthFsEntry::ReadonlyDirectory { .. } => {
+ Err(io::Error::from_raw_os_error(libc::EACCES))
+ }
+ _ => Err(io::Error::from_raw_os_error(libc::EBADF)),
+ },
+ )?;
+
+ Ok(Entry {
+ inode: new_inode,
+ generation: 0,
+ attr: create_dir_stat(new_inode, /* file_number */ 0, AccessMode::Variable(mode))?,
+ entry_timeout: DEFAULT_METADATA_TIMEOUT,
+ attr_timeout: DEFAULT_METADATA_TIMEOUT,
+ })
+ }
+
+ fn unlink(&self, _ctx: Context, parent: Self::Inode, name: &CStr) -> io::Result<()> {
+ let mut inode_table = self.inode_table.write().unwrap();
+ handle_inode_mut_locked(
+ &mut inode_table,
+ &parent,
+ |InodeState { entry, unlinked, .. }| match entry {
+ AuthFsEntry::VerifiedNewDirectory { dir, .. } => {
+ let basename: &Path = cstr_to_path(name);
+ // Delete the file from in both the local and remote directories.
+ let _inode = dir.delete_file(basename)?;
+ *unlinked = true;
+ Ok(())
+ }
+ AuthFsEntry::ReadonlyDirectory { .. } => {
+ Err(io::Error::from_raw_os_error(libc::EACCES))
+ }
+ AuthFsEntry::VerifiedNew { .. } => {
+ // Deleting a entry in filesystem root is not currently supported.
+ Err(io::Error::from_raw_os_error(libc::ENOSYS))
+ }
+ AuthFsEntry::UnverifiedReadonly { .. } | AuthFsEntry::VerifiedReadonly { .. } => {
+ Err(io::Error::from_raw_os_error(libc::ENOTDIR))
+ }
+ },
+ )
+ }
+
+ fn rmdir(&self, _ctx: Context, parent: Self::Inode, name: &CStr) -> io::Result<()> {
+ let mut inode_table = self.inode_table.write().unwrap();
+
+ // Check before actual removal, with readonly borrow.
+ handle_inode_locked(&inode_table, &parent, |inode_state| match &inode_state.entry {
+ AuthFsEntry::VerifiedNewDirectory { dir, .. } => {
+ let basename: &Path = cstr_to_path(name);
+ let existing_inode = dir.find_inode(basename)?;
+ handle_inode_locked(&inode_table, &existing_inode, |inode_state| {
+ inode_state.entry.expect_empty_deletable_directory()
+ })
+ }
+ AuthFsEntry::ReadonlyDirectory { .. } => {
+ Err(io::Error::from_raw_os_error(libc::EACCES))
+ }
+ _ => Err(io::Error::from_raw_os_error(libc::ENOTDIR)),
+ })?;
+
+ // Look up again, this time with mutable borrow. This needs to be done separately because
+ // the previous lookup needs to borrow multiple entry references in the table.
+ handle_inode_mut_locked(
+ &mut inode_table,
+ &parent,
+ |InodeState { entry, unlinked, .. }| match entry {
+ AuthFsEntry::VerifiedNewDirectory { dir, .. } => {
+ let basename: &Path = cstr_to_path(name);
+ let _inode = dir.force_delete_directory(basename)?;
+ *unlinked = true;
+ Ok(())
+ }
+ _ => unreachable!("Mismatched entry type that is just checked"),
+ },
+ )
+ }
+
+ fn opendir(
+ &self,
+ _ctx: Context,
+ inode: Self::Inode,
+ _flags: u32,
+ ) -> io::Result<(Option<Self::Handle>, FuseOpenOptions)> {
+ let entries = self.handle_inode(&inode, |config| match config {
+ AuthFsEntry::VerifiedNewDirectory { dir, .. } => dir.retrieve_entries(),
+ AuthFsEntry::ReadonlyDirectory { dir } => dir.retrieve_entries(),
+ _ => Err(io::Error::from_raw_os_error(libc::ENOTDIR)),
+ })?;
+ self.open_dir_store_snapshot(entries)
+ }
+
+ fn readdir(
+ &self,
+ _ctx: Context,
+ _inode: Self::Inode,
+ handle: Self::Handle,
+ _size: u32,
+ offset: u64,
+ ) -> io::Result<Self::DirIter> {
+ let dir_handle_table = self.dir_handle_table.read().unwrap();
+ if let Some(entry) = dir_handle_table.get(&handle) {
+ Ok(DirEntriesSnapshotIterator {
+ snapshot: entry.clone(),
+ prev_offset: offset.try_into().unwrap(),
+ })
+ } else {
+ Err(io::Error::from_raw_os_error(libc::EBADF))
+ }
+ }
+
+ fn releasedir(
+ &self,
+ _ctx: Context,
+ inode: Self::Inode,
+ _flags: u32,
+ handle: Self::Handle,
+ ) -> io::Result<()> {
+ let mut dir_handle_table = self.dir_handle_table.write().unwrap();
+ if dir_handle_table.remove(&handle).is_none() {
+ unreachable!("Unknown directory handle {}, inode {}", handle, inode);
+ }
+ Ok(())
+ }
+
+ fn statfs(&self, _ctx: Context, _inode: Self::Inode) -> io::Result<libc::statvfs64> {
+ let remote_stat = self.remote_fs_stats_reader.statfs()?;
+
+ // SAFETY: We are zero-initializing a struct with only POD fields. Not all fields matter to
+ // FUSE. See also:
+ // https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/fs/fuse/inode.c?h=v5.15#n460
+ let mut st: libc::statvfs64 = unsafe { zeroed() };
+
+ // Use the remote stat as a template, since it'd matter the most to consider the writable
+ // files/directories that are written to the remote.
+ st.f_bsize = remote_stat.block_size;
+ st.f_frsize = remote_stat.fragment_size;
+ st.f_blocks = remote_stat.block_numbers;
+ st.f_bavail = remote_stat.block_available;
+ st.f_favail = remote_stat.inodes_available;
+ st.f_namemax = remote_stat.max_filename;
+ // Assuming we are not privileged to use all free spaces on the remote server, set the free
+ // blocks/fragment to the same available amount.
+ st.f_bfree = st.f_bavail;
+ st.f_ffree = st.f_favail;
+ // Number of inodes on the filesystem
+ st.f_files = self.inode_table.read().unwrap().len() as u64;
+
+ Ok(st)
+ }
+}
+
+fn handle_inode_locked<F, R>(
+ inode_table: &BTreeMap<Inode, InodeState>,
+ inode: &Inode,
+ handle_fn: F,
+) -> io::Result<R>
+where
+ F: FnOnce(&InodeState) -> io::Result<R>,
+{
+ if let Some(inode_state) = inode_table.get(inode) {
+ handle_fn(inode_state)
+ } else {
+ Err(io::Error::from_raw_os_error(libc::ENOENT))
+ }
+}
+
+fn handle_inode_mut_locked<F, R>(
+ inode_table: &mut BTreeMap<Inode, InodeState>,
+ inode: &Inode,
+ handle_fn: F,
+) -> io::Result<R>
+where
+ F: FnOnce(&mut InodeState) -> io::Result<R>,
+{
+ if let Some(inode_state) = inode_table.get_mut(inode) {
+ handle_fn(inode_state)
+ } else {
+ Err(io::Error::from_raw_os_error(libc::ENOENT))
+ }
+}
+
+fn check_unsupported_setattr_request(valid: SetattrValid) -> io::Result<()> {
+ if valid.contains(SetattrValid::UID) {
+ warn!("Changing st_uid is not currently supported");
+ return Err(io::Error::from_raw_os_error(libc::ENOSYS));
+ }
+ if valid.contains(SetattrValid::GID) {
+ warn!("Changing st_gid is not currently supported");
+ return Err(io::Error::from_raw_os_error(libc::ENOSYS));
+ }
+ if valid.intersects(
+ SetattrValid::CTIME
+ | SetattrValid::ATIME
+ | SetattrValid::ATIME_NOW
+ | SetattrValid::MTIME
+ | SetattrValid::MTIME_NOW,
+ ) {
+ trace!("Ignoring ctime/atime/mtime change as authfs does not maintain timestamp currently");
+ }
+ Ok(())
+}
+
+fn cstr_to_path(cstr: &CStr) -> &Path {
+ OsStr::from_bytes(cstr.to_bytes()).as_ref()
+}