zipfuse: optimize for uncompressed zip entries

Previously, when a zip entry is opened, its entire content was copied
into zipfuse and kept there until the file is closed. This is a waste of
memory because if the zip entry is stored uncompressed, it can be
directly read from the containing zip file.

This CL implements the optimization.

Bug: 187878241
Test: atest zipfuse.test
Change-Id: Ia2d516d4d03d699ee7da72f576f337bd73516427
diff --git a/zipfuse/Android.bp b/zipfuse/Android.bp
index 24cfaa0..46f4b5a 100644
--- a/zipfuse/Android.bp
+++ b/zipfuse/Android.bp
@@ -14,6 +14,8 @@
         "libfuse_rust",
         "liblibc",
         "libzip",
+        "libscopeguard",
+        "liblog_rust",
     ],
     // libfuse_rust, etc don't support 32-bit targets
     multilib: {
diff --git a/zipfuse/Cargo.toml b/zipfuse/Cargo.toml
index c8f2f3a..17fd293 100644
--- a/zipfuse/Cargo.toml
+++ b/zipfuse/Cargo.toml
@@ -12,7 +12,8 @@
 zip = "0.5"
 tempfile = "3.2"
 nix = "0.20"
+scopeguard = "1.1"
+log = "0.4"
 
 [dev-dependencies]
 loopdev = "0.2"
-scopeguard = "1.1"
diff --git a/zipfuse/src/main.rs b/zipfuse/src/main.rs
index 4ab934d..a91642c 100644
--- a/zipfuse/src/main.rs
+++ b/zipfuse/src/main.rs
@@ -87,20 +87,23 @@
 
 struct ZipFuse {
     zip_archive: Mutex<zip::ZipArchive<File>>,
+    raw_file: Mutex<File>,
     inode_table: InodeTable,
-    open_files: Mutex<HashMap<Handle, OpenFileBuf>>,
+    open_files: Mutex<HashMap<Handle, OpenFile>>,
     open_dirs: Mutex<HashMap<Handle, OpenDirBuf>>,
 }
 
-/// Holds the (decompressed) contents of a [`ZipFile`].
-///
-/// This buf is needed because `ZipFile` is in general not seekable due to the compression.
-///
-/// TODO(jiyong): do this only for compressed `ZipFile`s. Uncompressed (store) files don't need
-/// this; they can be directly read from `zip_archive`.
-struct OpenFileBuf {
+/// Represents a [`ZipFile`] that is opened.
+struct OpenFile {
     open_count: u32, // multiple opens share the buf because this is a read-only filesystem
-    buf: Box<[u8]>,
+    content: OpenFileContent,
+}
+
+/// Holds the content of a [`ZipFile`]. Depending on whether it is compressed or not, the
+/// entire content is stored, or only the zip index is stored.
+enum OpenFileContent {
+    Compressed(Box<[u8]>),
+    Uncompressed(usize), // zip index
 }
 
 /// Holds the directory entries in a directory opened by [`opendir`].
@@ -123,11 +126,15 @@
     fn new(zip_file: &Path) -> Result<ZipFuse> {
         // TODO(jiyong): Use O_DIRECT to avoid double caching.
         // `.custom_flags(nix::fcntl::OFlag::O_DIRECT.bits())` currently doesn't work.
-        let f = OpenOptions::new().read(true).open(zip_file)?;
+        let f = File::open(zip_file)?;
         let mut z = zip::ZipArchive::new(f)?;
+        // Open the same file again so that we can directly access it when accessing
+        // uncompressed zip_file entries in it. `ZipFile` doesn't implement `Seek`.
+        let raw_file = File::open(zip_file)?;
         let it = InodeTable::from_zip(&mut z)?;
         Ok(ZipFuse {
             zip_archive: Mutex::new(z),
+            raw_file: Mutex::new(raw_file),
             inode_table: it,
             open_files: Mutex::new(HashMap::new()),
             open_dirs: Mutex::new(HashMap::new()),
@@ -208,21 +215,37 @@
         // If the file is already opened, just increase the reference counter. If not, read the
         // entire file content to the buffer. When `read` is called, a portion of the buffer is
         // copied to the kernel.
-        // TODO(jiyong): do this only for compressed zip files. Files that are not compressed
-        // (store) can be directly read from zip_archive. That will help reduce the memory usage.
-        if let Some(ofb) = open_files.get_mut(&handle) {
-            if ofb.open_count == 0 {
+        if let Some(file) = open_files.get_mut(&handle) {
+            if file.open_count == 0 {
                 return Err(ebadf());
             }
-            ofb.open_count += 1;
+            file.open_count += 1;
         } else {
             let inode_data = self.find_inode(inode)?;
             let zip_index = inode_data.get_zip_index().ok_or_else(ebadf)?;
             let mut zip_archive = self.zip_archive.lock().unwrap();
             let mut zip_file = zip_archive.by_index(zip_index)?;
-            let mut buf = Vec::with_capacity(inode_data.size as usize);
-            zip_file.read_to_end(&mut buf)?;
-            open_files.insert(handle, OpenFileBuf { open_count: 1, buf: buf.into_boxed_slice() });
+            let content = match zip_file.compression() {
+                zip::CompressionMethod::Stored => OpenFileContent::Uncompressed(zip_index),
+                _ => {
+                    if let Some(mode) = zip_file.unix_mode() {
+                        let is_reg_file = zip_file.is_file();
+                        let is_executable =
+                            mode & (libc::S_IXUSR | libc::S_IXGRP | libc::S_IXOTH) != 0;
+                        if is_reg_file && is_executable {
+                            log::warn!(
+                                "Executable file {:?} is stored compressed. Consider \
+                                storing it uncompressed to save memory",
+                                zip_file.mangled_name()
+                            );
+                        }
+                    }
+                    let mut buf = Vec::with_capacity(inode_data.size as usize);
+                    zip_file.read_to_end(&mut buf)?;
+                    OpenFileContent::Compressed(buf.into_boxed_slice())
+                }
+            };
+            open_files.insert(handle, OpenFile { open_count: 1, content });
         }
         // Note: we don't return `DIRECT_IO` here, because then applications wouldn't be able to
         // mmap the files.
@@ -244,8 +267,8 @@
         // again when the same file is opened in the future.
         let mut open_files = self.open_files.lock().unwrap();
         let handle = inode as Handle;
-        if let Some(ofb) = open_files.get_mut(&handle) {
-            if ofb.open_count.checked_sub(1).ok_or_else(ebadf)? == 0 {
+        if let Some(file) = open_files.get_mut(&handle) {
+            if file.open_count.checked_sub(1).ok_or_else(ebadf)? == 0 {
                 open_files.remove(&handle);
             }
             Ok(())
@@ -266,15 +289,28 @@
         _flags: u32,
     ) -> io::Result<usize> {
         let open_files = self.open_files.lock().unwrap();
-        let ofb = open_files.get(&handle).ok_or_else(ebadf)?;
-        if ofb.open_count == 0 {
+        let file = open_files.get(&handle).ok_or_else(ebadf)?;
+        if file.open_count == 0 {
             return Err(ebadf());
         }
-        let start = offset as usize;
-        let end = start + size as usize;
-        let end = std::cmp::min(end, ofb.buf.len());
-        let read_len = w.write(&ofb.buf[start..end])?;
-        Ok(read_len)
+        Ok(match &file.content {
+            OpenFileContent::Uncompressed(zip_index) => {
+                let mut zip_archive = self.zip_archive.lock().unwrap();
+                let zip_file = zip_archive.by_index(*zip_index)?;
+                let start = zip_file.data_start() + offset;
+                let remaining_size = zip_file.size() - offset;
+                let size = std::cmp::min(remaining_size, size.into());
+
+                let mut raw_file = self.raw_file.lock().unwrap();
+                w.write_from(&mut raw_file, size as usize, start)?
+            }
+            OpenFileContent::Compressed(buf) => {
+                let start = offset as usize;
+                let end = start + size as usize;
+                let end = std::cmp::min(end, buf.len());
+                w.write(&buf[start..end])?
+            }
+        })
     }
 
     fn opendir(
@@ -672,6 +708,25 @@
         run_fuse_and_check_test_zip(test_dir.path(), &zip_path);
     }
 
+    #[test]
+    fn supports_store() {
+        run_test(
+            |zip| {
+                let data = vec![10; 2 << 20];
+                zip.start_file(
+                    "foo",
+                    FileOptions::default().compression_method(zip::CompressionMethod::Stored),
+                )
+                .unwrap();
+                zip.write_all(&data).unwrap();
+            },
+            |root| {
+                let data = vec![10; 2 << 20];
+                check_file(root, "foo", &data);
+            },
+        );
+    }
+
     #[cfg(not(target_os = "android"))] // Android doesn't have the loopdev crate
     #[test]
     fn supports_zip_on_block_device() {