zipfuse: optimize for uncompressed zip entries
Previously, when a zip entry is opened, its entire content was copied
into zipfuse and kept there until the file is closed. This is a waste of
memory because if the zip entry is stored uncompressed, it can be
directly read from the containing zip file.
This CL implements the optimization.
Bug: 187878241
Test: atest zipfuse.test
Change-Id: Ia2d516d4d03d699ee7da72f576f337bd73516427
diff --git a/zipfuse/Android.bp b/zipfuse/Android.bp
index 24cfaa0..46f4b5a 100644
--- a/zipfuse/Android.bp
+++ b/zipfuse/Android.bp
@@ -14,6 +14,8 @@
"libfuse_rust",
"liblibc",
"libzip",
+ "libscopeguard",
+ "liblog_rust",
],
// libfuse_rust, etc don't support 32-bit targets
multilib: {
diff --git a/zipfuse/Cargo.toml b/zipfuse/Cargo.toml
index c8f2f3a..17fd293 100644
--- a/zipfuse/Cargo.toml
+++ b/zipfuse/Cargo.toml
@@ -12,7 +12,8 @@
zip = "0.5"
tempfile = "3.2"
nix = "0.20"
+scopeguard = "1.1"
+log = "0.4"
[dev-dependencies]
loopdev = "0.2"
-scopeguard = "1.1"
diff --git a/zipfuse/src/main.rs b/zipfuse/src/main.rs
index 4ab934d..a91642c 100644
--- a/zipfuse/src/main.rs
+++ b/zipfuse/src/main.rs
@@ -87,20 +87,23 @@
struct ZipFuse {
zip_archive: Mutex<zip::ZipArchive<File>>,
+ raw_file: Mutex<File>,
inode_table: InodeTable,
- open_files: Mutex<HashMap<Handle, OpenFileBuf>>,
+ open_files: Mutex<HashMap<Handle, OpenFile>>,
open_dirs: Mutex<HashMap<Handle, OpenDirBuf>>,
}
-/// Holds the (decompressed) contents of a [`ZipFile`].
-///
-/// This buf is needed because `ZipFile` is in general not seekable due to the compression.
-///
-/// TODO(jiyong): do this only for compressed `ZipFile`s. Uncompressed (store) files don't need
-/// this; they can be directly read from `zip_archive`.
-struct OpenFileBuf {
+/// Represents a [`ZipFile`] that is opened.
+struct OpenFile {
open_count: u32, // multiple opens share the buf because this is a read-only filesystem
- buf: Box<[u8]>,
+ content: OpenFileContent,
+}
+
+/// Holds the content of a [`ZipFile`]. Depending on whether it is compressed or not, the
+/// entire content is stored, or only the zip index is stored.
+enum OpenFileContent {
+ Compressed(Box<[u8]>),
+ Uncompressed(usize), // zip index
}
/// Holds the directory entries in a directory opened by [`opendir`].
@@ -123,11 +126,15 @@
fn new(zip_file: &Path) -> Result<ZipFuse> {
// TODO(jiyong): Use O_DIRECT to avoid double caching.
// `.custom_flags(nix::fcntl::OFlag::O_DIRECT.bits())` currently doesn't work.
- let f = OpenOptions::new().read(true).open(zip_file)?;
+ let f = File::open(zip_file)?;
let mut z = zip::ZipArchive::new(f)?;
+ // Open the same file again so that we can directly access it when accessing
+ // uncompressed zip_file entries in it. `ZipFile` doesn't implement `Seek`.
+ let raw_file = File::open(zip_file)?;
let it = InodeTable::from_zip(&mut z)?;
Ok(ZipFuse {
zip_archive: Mutex::new(z),
+ raw_file: Mutex::new(raw_file),
inode_table: it,
open_files: Mutex::new(HashMap::new()),
open_dirs: Mutex::new(HashMap::new()),
@@ -208,21 +215,37 @@
// If the file is already opened, just increase the reference counter. If not, read the
// entire file content to the buffer. When `read` is called, a portion of the buffer is
// copied to the kernel.
- // TODO(jiyong): do this only for compressed zip files. Files that are not compressed
- // (store) can be directly read from zip_archive. That will help reduce the memory usage.
- if let Some(ofb) = open_files.get_mut(&handle) {
- if ofb.open_count == 0 {
+ if let Some(file) = open_files.get_mut(&handle) {
+ if file.open_count == 0 {
return Err(ebadf());
}
- ofb.open_count += 1;
+ file.open_count += 1;
} else {
let inode_data = self.find_inode(inode)?;
let zip_index = inode_data.get_zip_index().ok_or_else(ebadf)?;
let mut zip_archive = self.zip_archive.lock().unwrap();
let mut zip_file = zip_archive.by_index(zip_index)?;
- let mut buf = Vec::with_capacity(inode_data.size as usize);
- zip_file.read_to_end(&mut buf)?;
- open_files.insert(handle, OpenFileBuf { open_count: 1, buf: buf.into_boxed_slice() });
+ let content = match zip_file.compression() {
+ zip::CompressionMethod::Stored => OpenFileContent::Uncompressed(zip_index),
+ _ => {
+ if let Some(mode) = zip_file.unix_mode() {
+ let is_reg_file = zip_file.is_file();
+ let is_executable =
+ mode & (libc::S_IXUSR | libc::S_IXGRP | libc::S_IXOTH) != 0;
+ if is_reg_file && is_executable {
+ log::warn!(
+ "Executable file {:?} is stored compressed. Consider \
+ storing it uncompressed to save memory",
+ zip_file.mangled_name()
+ );
+ }
+ }
+ let mut buf = Vec::with_capacity(inode_data.size as usize);
+ zip_file.read_to_end(&mut buf)?;
+ OpenFileContent::Compressed(buf.into_boxed_slice())
+ }
+ };
+ open_files.insert(handle, OpenFile { open_count: 1, content });
}
// Note: we don't return `DIRECT_IO` here, because then applications wouldn't be able to
// mmap the files.
@@ -244,8 +267,8 @@
// again when the same file is opened in the future.
let mut open_files = self.open_files.lock().unwrap();
let handle = inode as Handle;
- if let Some(ofb) = open_files.get_mut(&handle) {
- if ofb.open_count.checked_sub(1).ok_or_else(ebadf)? == 0 {
+ if let Some(file) = open_files.get_mut(&handle) {
+ if file.open_count.checked_sub(1).ok_or_else(ebadf)? == 0 {
open_files.remove(&handle);
}
Ok(())
@@ -266,15 +289,28 @@
_flags: u32,
) -> io::Result<usize> {
let open_files = self.open_files.lock().unwrap();
- let ofb = open_files.get(&handle).ok_or_else(ebadf)?;
- if ofb.open_count == 0 {
+ let file = open_files.get(&handle).ok_or_else(ebadf)?;
+ if file.open_count == 0 {
return Err(ebadf());
}
- let start = offset as usize;
- let end = start + size as usize;
- let end = std::cmp::min(end, ofb.buf.len());
- let read_len = w.write(&ofb.buf[start..end])?;
- Ok(read_len)
+ Ok(match &file.content {
+ OpenFileContent::Uncompressed(zip_index) => {
+ let mut zip_archive = self.zip_archive.lock().unwrap();
+ let zip_file = zip_archive.by_index(*zip_index)?;
+ let start = zip_file.data_start() + offset;
+ let remaining_size = zip_file.size() - offset;
+ let size = std::cmp::min(remaining_size, size.into());
+
+ let mut raw_file = self.raw_file.lock().unwrap();
+ w.write_from(&mut raw_file, size as usize, start)?
+ }
+ OpenFileContent::Compressed(buf) => {
+ let start = offset as usize;
+ let end = start + size as usize;
+ let end = std::cmp::min(end, buf.len());
+ w.write(&buf[start..end])?
+ }
+ })
}
fn opendir(
@@ -672,6 +708,25 @@
run_fuse_and_check_test_zip(test_dir.path(), &zip_path);
}
+ #[test]
+ fn supports_store() {
+ run_test(
+ |zip| {
+ let data = vec![10; 2 << 20];
+ zip.start_file(
+ "foo",
+ FileOptions::default().compression_method(zip::CompressionMethod::Stored),
+ )
+ .unwrap();
+ zip.write_all(&data).unwrap();
+ },
+ |root| {
+ let data = vec![10; 2 << 20];
+ check_file(root, "foo", &data);
+ },
+ );
+ }
+
#[cfg(not(target_os = "android"))] // Android doesn't have the loopdev crate
#[test]
fn supports_zip_on_block_device() {