zipfuse: a read-only fuse implementation for zip archive

zipfuse is a read-only FUSE implementation for zip archive. It will be
used to make files embedded in APKs (which are zip files) be accessible
as regular files in microdroid.

Bug: 186377508
Test: atest ZipFuseTest
Change-Id: Idf0fb4d32b8a2d957da9188fedeebb5610050e65
diff --git a/zipfuse/src/main.rs b/zipfuse/src/main.rs
new file mode 100644
index 0000000..d6710d6
--- /dev/null
+++ b/zipfuse/src/main.rs
@@ -0,0 +1,599 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//! `zipfuse` is a FUSE filesystem for zip archives. It provides transparent access to the files
+//! in a zip archive. This filesystem does not supporting writing files back to the zip archive.
+//! The filesystem has to be mounted read only.
+
+mod inode;
+
+use anyhow::Result;
+use clap::{App, Arg};
+use fuse::filesystem::*;
+use fuse::mount::*;
+use std::collections::HashMap;
+use std::convert::TryFrom;
+use std::ffi::{CStr, CString, OsStr};
+use std::fs::{File, OpenOptions};
+use std::io;
+use std::io::Read;
+use std::os::unix::ffi::OsStrExt;
+use std::os::unix::io::AsRawFd;
+use std::path::Path;
+use std::sync::Mutex;
+
+use crate::inode::{DirectoryEntry, Inode, InodeData, InodeKind, InodeTable};
+
+fn main() -> Result<()> {
+    let matches = App::new("zipfuse")
+        .arg(Arg::with_name("ZIPFILE").required(true))
+        .arg(Arg::with_name("MOUNTPOINT").required(true))
+        .get_matches();
+
+    let zip_file = matches.value_of("ZIPFILE").unwrap().as_ref();
+    let mount_point = matches.value_of("MOUNTPOINT").unwrap().as_ref();
+    run_fuse(zip_file, mount_point)?;
+    Ok(())
+}
+
+/// Runs a fuse filesystem by mounting `zip_file` on `mount_point`.
+pub fn run_fuse(zip_file: &Path, mount_point: &Path) -> Result<()> {
+    const MAX_READ: u32 = 1 << 20; // TODO(jiyong): tune this
+    const MAX_WRITE: u32 = 1 << 13; // This is a read-only filesystem
+
+    let dev_fuse = OpenOptions::new().read(true).write(true).open("/dev/fuse")?;
+
+    fuse::mount(
+        mount_point,
+        "zipfuse",
+        libc::MS_NOSUID | libc::MS_NODEV | libc::MS_RDONLY,
+        &[
+            MountOption::FD(dev_fuse.as_raw_fd()),
+            MountOption::RootMode(libc::S_IFDIR | libc::S_IXUSR | libc::S_IXGRP | libc::S_IXOTH),
+            MountOption::AllowOther,
+            MountOption::UserId(0),
+            MountOption::GroupId(0),
+            MountOption::MaxRead(MAX_READ),
+        ],
+    )?;
+    Ok(fuse::worker::start_message_loop(dev_fuse, MAX_READ, MAX_WRITE, ZipFuse::new(zip_file)?)?)
+}
+
+struct ZipFuse {
+    zip_archive: Mutex<zip::ZipArchive<File>>,
+    inode_table: InodeTable,
+    open_files: Mutex<HashMap<Handle, OpenFileBuf>>,
+    open_dirs: Mutex<HashMap<Handle, OpenDirBuf>>,
+}
+
+/// Holds the (decompressed) contents of a [`ZipFile`].
+///
+/// This buf is needed because `ZipFile` is in general not seekable due to the compression.
+///
+/// TODO(jiyong): do this only for compressed `ZipFile`s. Uncompressed (store) files don't need
+/// this; they can be directly read from `zip_archive`.
+struct OpenFileBuf {
+    open_count: u32, // multiple opens share the buf because this is a read-only filesystem
+    buf: Box<[u8]>,
+}
+
+/// Holds the directory entries in a directory opened by [`opendir`].
+struct OpenDirBuf {
+    open_count: u32,
+    buf: Box<[(CString, DirectoryEntry)]>,
+}
+
+type Handle = u64;
+
+fn ebadf() -> io::Error {
+    io::Error::from_raw_os_error(libc::EBADF)
+}
+
+fn timeout_max() -> std::time::Duration {
+    std::time::Duration::new(u64::MAX, 1_000_000_000 - 1)
+}
+
+impl ZipFuse {
+    fn new(zip_file: &Path) -> Result<ZipFuse> {
+        // TODO(jiyong): Use O_DIRECT to avoid double caching.
+        // `.custom_flags(nix::fcntl::OFlag::O_DIRECT.bits())` currently doesn't work.
+        let f = OpenOptions::new().read(true).open(zip_file)?;
+        let mut z = zip::ZipArchive::new(f)?;
+        let it = InodeTable::from_zip(&mut z)?;
+        Ok(ZipFuse {
+            zip_archive: Mutex::new(z),
+            inode_table: it,
+            open_files: Mutex::new(HashMap::new()),
+            open_dirs: Mutex::new(HashMap::new()),
+        })
+    }
+
+    fn find_inode(&self, inode: Inode) -> io::Result<&InodeData> {
+        self.inode_table.get(inode).ok_or_else(ebadf)
+    }
+
+    fn stat_from(&self, inode: Inode) -> io::Result<libc::stat64> {
+        let inode_data = self.find_inode(inode)?;
+        let mut st = unsafe { std::mem::MaybeUninit::<libc::stat64>::zeroed().assume_init() };
+        st.st_dev = 0;
+        st.st_nlink = if inode_data.is_dir() {
+            // 2 is for . and ..
+            // unwrap is safe because of the `is_dir` check.
+            2 + inode_data.get_directory().unwrap().len() as libc::nlink_t
+        } else {
+            1
+        };
+        st.st_ino = inode;
+        st.st_mode = if inode_data.is_dir() { libc::S_IFDIR } else { libc::S_IFREG };
+        st.st_mode |= inode_data.mode;
+        st.st_uid = 0;
+        st.st_gid = 0;
+        st.st_size = i64::try_from(inode_data.size).unwrap_or(i64::MAX);
+        Ok(st)
+    }
+}
+
+impl fuse::filesystem::FileSystem for ZipFuse {
+    type Inode = Inode;
+    type Handle = Handle;
+    type DirIter = DirIter;
+
+    fn init(&self, _capable: FsOptions) -> std::io::Result<FsOptions> {
+        // The default options added by the fuse crate are fine. We don't have additional options.
+        Ok(FsOptions::empty())
+    }
+
+    fn lookup(&self, _ctx: Context, parent: Self::Inode, name: &CStr) -> io::Result<Entry> {
+        let inode = self.find_inode(parent)?;
+        let directory = inode.get_directory().ok_or_else(ebadf)?;
+        let name = OsStr::from_bytes(name.to_bytes());
+        let entry = directory.get(name);
+        match entry {
+            Some(e) => Ok(Entry {
+                inode: e.inode,
+                generation: 0,
+                attr: self.stat_from(e.inode)?,
+                attr_timeout: timeout_max(), // this is a read-only fs
+                entry_timeout: timeout_max(),
+            }),
+            _ => Err(io::Error::from_raw_os_error(libc::ENOENT)),
+        }
+    }
+
+    fn getattr(
+        &self,
+        _ctx: Context,
+        inode: Self::Inode,
+        _handle: Option<Self::Handle>,
+    ) -> io::Result<(libc::stat64, std::time::Duration)> {
+        let st = self.stat_from(inode)?;
+        Ok((st, timeout_max()))
+    }
+
+    fn open(
+        &self,
+        _ctx: Context,
+        inode: Self::Inode,
+        _flags: u32,
+    ) -> io::Result<(Option<Self::Handle>, fuse::filesystem::OpenOptions)> {
+        let mut open_files = self.open_files.lock().unwrap();
+        let handle = inode as Handle;
+
+        // If the file is already opened, just increase the reference counter. If not, read the
+        // entire file content to the buffer. When `read` is called, a portion of the buffer is
+        // copied to the kernel.
+        // TODO(jiyong): do this only for compressed zip files. Files that are not compressed
+        // (store) can be directly read from zip_archive. That will help reduce the memory usage.
+        if let Some(ofb) = open_files.get_mut(&handle) {
+            if ofb.open_count == 0 {
+                return Err(ebadf());
+            }
+            ofb.open_count += 1;
+        } else {
+            let inode_data = self.find_inode(inode)?;
+            let zip_index = inode_data.get_zip_index().ok_or_else(ebadf)?;
+            let mut zip_archive = self.zip_archive.lock().unwrap();
+            let mut zip_file = zip_archive.by_index(zip_index)?;
+            let mut buf = Vec::with_capacity(inode_data.size as usize);
+            zip_file.read_to_end(&mut buf)?;
+            open_files.insert(handle, OpenFileBuf { open_count: 1, buf: buf.into_boxed_slice() });
+        }
+        // Note: we don't return `DIRECT_IO` here, because then applications wouldn't be able to
+        // mmap the files.
+        Ok((Some(handle), fuse::filesystem::OpenOptions::empty()))
+    }
+
+    fn release(
+        &self,
+        _ctx: Context,
+        inode: Self::Inode,
+        _flags: u32,
+        _handle: Self::Handle,
+        _flush: bool,
+        _flock_release: bool,
+        _lock_owner: Option<u64>,
+    ) -> io::Result<()> {
+        // Releases the buffer for the `handle` when it is opened for nobody. While this is good
+        // for saving memory, this has a performance implication because we need to decompress
+        // again when the same file is opened in the future.
+        let mut open_files = self.open_files.lock().unwrap();
+        let handle = inode as Handle;
+        if let Some(ofb) = open_files.get_mut(&handle) {
+            if ofb.open_count.checked_sub(1).ok_or_else(ebadf)? == 0 {
+                open_files.remove(&handle);
+            }
+            Ok(())
+        } else {
+            Err(ebadf())
+        }
+    }
+
+    fn read<W: io::Write + ZeroCopyWriter>(
+        &self,
+        _ctx: Context,
+        _inode: Self::Inode,
+        handle: Self::Handle,
+        mut w: W,
+        size: u32,
+        offset: u64,
+        _lock_owner: Option<u64>,
+        _flags: u32,
+    ) -> io::Result<usize> {
+        let open_files = self.open_files.lock().unwrap();
+        let ofb = open_files.get(&handle).ok_or_else(ebadf)?;
+        if ofb.open_count == 0 {
+            return Err(ebadf());
+        }
+        let start = offset as usize;
+        let end = start + size as usize;
+        let end = std::cmp::min(end, ofb.buf.len());
+        let read_len = w.write(&ofb.buf[start..end])?;
+        Ok(read_len)
+    }
+
+    fn opendir(
+        &self,
+        _ctx: Context,
+        inode: Self::Inode,
+        _flags: u32,
+    ) -> io::Result<(Option<Self::Handle>, fuse::filesystem::OpenOptions)> {
+        let mut open_dirs = self.open_dirs.lock().unwrap();
+        let handle = inode as Handle;
+        if let Some(odb) = open_dirs.get_mut(&handle) {
+            if odb.open_count == 0 {
+                return Err(ebadf());
+            }
+            odb.open_count += 1;
+        } else {
+            let inode_data = self.find_inode(inode)?;
+            let directory = inode_data.get_directory().ok_or_else(ebadf)?;
+            let mut buf: Vec<(CString, DirectoryEntry)> = Vec::with_capacity(directory.len());
+            for (name, dir_entry) in directory.iter() {
+                let name = CString::new(name.as_bytes()).unwrap();
+                buf.push((name, dir_entry.clone()));
+            }
+            open_dirs.insert(handle, OpenDirBuf { open_count: 1, buf: buf.into_boxed_slice() });
+        }
+        Ok((Some(handle), fuse::filesystem::OpenOptions::empty()))
+    }
+
+    fn releasedir(
+        &self,
+        _ctx: Context,
+        inode: Self::Inode,
+        _flags: u32,
+        _handle: Self::Handle,
+    ) -> io::Result<()> {
+        let mut open_dirs = self.open_dirs.lock().unwrap();
+        let handle = inode as Handle;
+        if let Some(odb) = open_dirs.get_mut(&handle) {
+            if odb.open_count.checked_sub(1).ok_or_else(ebadf)? == 0 {
+                open_dirs.remove(&handle);
+            }
+            Ok(())
+        } else {
+            Err(ebadf())
+        }
+    }
+
+    fn readdir(
+        &self,
+        _ctx: Context,
+        inode: Self::Inode,
+        _handle: Self::Handle,
+        size: u32,
+        offset: u64,
+    ) -> io::Result<Self::DirIter> {
+        let open_dirs = self.open_dirs.lock().unwrap();
+        let handle = inode as Handle;
+        let odb = open_dirs.get(&handle).ok_or_else(ebadf)?;
+        if odb.open_count == 0 {
+            return Err(ebadf());
+        }
+        let buf = &odb.buf;
+        let start = offset as usize;
+        let end = start + size as usize;
+        let end = std::cmp::min(end, buf.len());
+        let mut new_buf = Vec::with_capacity(end - start);
+        // The portion of `buf` is *copied* to the iterator. This is not ideal, but inevitable
+        // because the `name` field in `fuse::filesystem::DirEntry` is `&CStr` not `CString`.
+        new_buf.extend_from_slice(&buf[start..end]);
+        Ok(DirIter { inner: new_buf, offset, cur: 0 })
+    }
+}
+
+struct DirIter {
+    inner: Vec<(CString, DirectoryEntry)>,
+    offset: u64, // the offset where this iterator begins. `next` doesn't change this.
+    cur: usize,  // the current index in `inner`. `next` advances this.
+}
+
+impl fuse::filesystem::DirectoryIterator for DirIter {
+    fn next(&mut self) -> Option<fuse::filesystem::DirEntry> {
+        if self.cur >= self.inner.len() {
+            return None;
+        }
+
+        let (name, entry) = &self.inner[self.cur];
+        self.cur += 1;
+        Some(fuse::filesystem::DirEntry {
+            ino: entry.inode as libc::ino64_t,
+            offset: self.offset + self.cur as u64,
+            type_: match entry.kind {
+                InodeKind::Directory => libc::DT_DIR.into(),
+                InodeKind::File => libc::DT_REG.into(),
+            },
+            name,
+        })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use anyhow::{bail, Result};
+    use nix::sys::statfs::{statfs, FsType};
+    use std::collections::HashSet;
+    use std::fs;
+    use std::fs::File;
+    use std::io::Write;
+    use std::path::{Path, PathBuf};
+    use std::time::{Duration, Instant};
+    use zip::write::FileOptions;
+
+    #[cfg(not(target_os = "android"))]
+    fn start_fuse(zip_path: &Path, mnt_path: &Path) {
+        let zip_path = PathBuf::from(zip_path);
+        let mnt_path = PathBuf::from(mnt_path);
+        std::thread::spawn(move || {
+            crate::run_fuse(&zip_path, &mnt_path).unwrap();
+        });
+    }
+
+    #[cfg(target_os = "android")]
+    fn start_fuse(zip_path: &Path, mnt_path: &Path) {
+        // Note: for some unknown reason, running a thread to serve fuse doesn't work on Android.
+        // Explicitly spawn a zipfuse process instead.
+        // TODO(jiyong): fix this
+        assert!(std::process::Command::new("sh")
+            .arg("-c")
+            .arg(format!("/data/local/tmp/zipfuse {} {}", zip_path.display(), mnt_path.display()))
+            .spawn()
+            .is_ok());
+    }
+
+    fn wait_for_mount(mount_path: &Path) -> Result<()> {
+        let start_time = Instant::now();
+        const POLL_INTERVAL: Duration = Duration::from_millis(50);
+        const TIMEOUT: Duration = Duration::from_secs(10);
+        const FUSE_SUPER_MAGIC: FsType = FsType(0x65735546);
+        loop {
+            if statfs(mount_path)?.filesystem_type() == FUSE_SUPER_MAGIC {
+                break;
+            }
+
+            if start_time.elapsed() > TIMEOUT {
+                bail!("Time out mounting zipfuse");
+            }
+            std::thread::sleep(POLL_INTERVAL);
+        }
+        Ok(())
+    }
+
+    // Creates a zip file, adds some files to the zip file, mounts it using zipfuse, runs the check
+    // routine, and finally unmounts.
+    fn run_test(add: fn(&mut zip::ZipWriter<File>), check: fn(&std::path::Path)) {
+        // Create an empty zip file
+        let test_dir = tempfile::TempDir::new().unwrap();
+        let zip_path = test_dir.path().join("test.zip");
+        let zip = File::create(&zip_path);
+        assert!(zip.is_ok());
+        let mut zip = zip::ZipWriter::new(zip.unwrap());
+
+        // Let test users add files/dirs to the zip file
+        add(&mut zip);
+        assert!(zip.finish().is_ok());
+        drop(zip);
+
+        // Mount the zip file on the "mnt" dir using zipfuse.
+        let mnt_path = test_dir.path().join("mnt");
+        assert!(fs::create_dir(&mnt_path).is_ok());
+
+        start_fuse(&zip_path, &mnt_path);
+
+        let mnt_path = test_dir.path().join("mnt");
+        // Give some time for the fuse to boot up
+        assert!(wait_for_mount(&mnt_path).is_ok());
+        // Run the check routine, and do the clean up.
+        check(&mnt_path);
+        assert!(nix::mount::umount2(&mnt_path, nix::mount::MntFlags::empty()).is_ok());
+    }
+
+    fn check_file(root: &Path, file: &str, content: &[u8]) {
+        let path = root.join(file);
+        assert!(path.exists());
+
+        let metadata = fs::metadata(&path);
+        assert!(metadata.is_ok());
+
+        let metadata = metadata.unwrap();
+        assert!(metadata.is_file());
+        assert_eq!(content.len(), metadata.len() as usize);
+
+        let read_data = fs::read(&path);
+        assert!(read_data.is_ok());
+        assert_eq!(content, read_data.unwrap().as_slice());
+    }
+
+    fn check_dir(root: &Path, dir: &str, files: &[&str], dirs: &[&str]) {
+        let dir_path = root.join(dir);
+        assert!(dir_path.exists());
+
+        let metadata = fs::metadata(&dir_path);
+        assert!(metadata.is_ok());
+
+        let metadata = metadata.unwrap();
+        assert!(metadata.is_dir());
+
+        let iter = fs::read_dir(&dir_path);
+        assert!(iter.is_ok());
+
+        let iter = iter.unwrap();
+        let mut actual_files = HashSet::new();
+        let mut actual_dirs = HashSet::new();
+        for de in iter {
+            let entry = de.unwrap();
+            let path = entry.path();
+            if path.is_dir() {
+                actual_dirs.insert(path.strip_prefix(&dir_path).unwrap().to_path_buf());
+            } else {
+                actual_files.insert(path.strip_prefix(&dir_path).unwrap().to_path_buf());
+            }
+        }
+        let expected_files: HashSet<PathBuf> = files.iter().map(|&s| PathBuf::from(s)).collect();
+        let expected_dirs: HashSet<PathBuf> = dirs.iter().map(|&s| PathBuf::from(s)).collect();
+
+        assert_eq!(expected_files, actual_files);
+        assert_eq!(expected_dirs, actual_dirs);
+    }
+
+    #[test]
+    fn empty() {
+        run_test(
+            |_| {},
+            |root| {
+                check_dir(root, "", &[], &[]);
+            },
+        );
+    }
+
+    #[test]
+    fn single_file() {
+        run_test(
+            |zip| {
+                zip.start_file("foo", FileOptions::default()).unwrap();
+                zip.write_all(b"0123456789").unwrap();
+            },
+            |root| {
+                check_dir(root, "", &["foo"], &[]);
+                check_file(root, "foo", b"0123456789");
+            },
+        );
+    }
+
+    #[test]
+    fn single_dir() {
+        run_test(
+            |zip| {
+                zip.add_directory("dir", FileOptions::default()).unwrap();
+            },
+            |root| {
+                check_dir(root, "", &[], &["dir"]);
+                check_dir(root, "dir", &[], &[]);
+            },
+        );
+    }
+
+    #[test]
+    fn complex_hierarchy() {
+        // root/
+        //   a/
+        //    b1/
+        //    b2/
+        //      c1 (file)
+        //      c2/
+        //          d1 (file)
+        //          d2 (file)
+        //          d3 (file)
+        //  x/
+        //    y1 (file)
+        //    y2 (file)
+        //    y3/
+        //
+        //  foo (file)
+        //  bar (file)
+        run_test(
+            |zip| {
+                let opt = FileOptions::default();
+                zip.add_directory("a/b1", opt).unwrap();
+
+                zip.start_file("a/b2/c1", opt).unwrap();
+
+                zip.start_file("a/b2/c2/d1", opt).unwrap();
+                zip.start_file("a/b2/c2/d2", opt).unwrap();
+                zip.start_file("a/b2/c2/d3", opt).unwrap();
+
+                zip.start_file("x/y1", opt).unwrap();
+                zip.start_file("x/y2", opt).unwrap();
+                zip.add_directory("x/y3", opt).unwrap();
+
+                zip.start_file("foo", opt).unwrap();
+                zip.start_file("bar", opt).unwrap();
+            },
+            |root| {
+                check_dir(root, "", &["foo", "bar"], &["a", "x"]);
+                check_dir(root, "a", &[], &["b1", "b2"]);
+                check_dir(root, "a/b1", &[], &[]);
+                check_dir(root, "a/b2", &["c1"], &["c2"]);
+                check_dir(root, "a/b2/c2", &["d1", "d2", "d3"], &[]);
+                check_dir(root, "x", &["y1", "y2"], &["y3"]);
+                check_dir(root, "x/y3", &[], &[]);
+                check_file(root, "a/b2/c1", &[]);
+                check_file(root, "a/b2/c2/d1", &[]);
+                check_file(root, "a/b2/c2/d2", &[]);
+                check_file(root, "a/b2/c2/d3", &[]);
+                check_file(root, "x/y1", &[]);
+                check_file(root, "x/y2", &[]);
+                check_file(root, "foo", &[]);
+                check_file(root, "bar", &[]);
+            },
+        );
+    }
+
+    #[test]
+    fn large_file() {
+        run_test(
+            |zip| {
+                let data = vec![10; 2 << 20];
+                zip.start_file("foo", FileOptions::default()).unwrap();
+                zip.write_all(&data).unwrap();
+            },
+            |root| {
+                let data = vec![10; 2 << 20];
+                check_file(root, "foo", &data);
+            },
+        );
+    }
+}