blob: d0792d5d4f2c6932462b7cde9c212a1245ffb3c7 [file] [log] [blame]
Jiyong Park331d1ea2021-05-10 11:01:23 +09001/*
2 * Copyright (C) 2021 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//! `zipfuse` is a FUSE filesystem for zip archives. It provides transparent access to the files
18//! in a zip archive. This filesystem does not supporting writing files back to the zip archive.
19//! The filesystem has to be mounted read only.
20
21mod inode;
22
23use anyhow::Result;
24use clap::{App, Arg};
25use fuse::filesystem::*;
26use fuse::mount::*;
27use std::collections::HashMap;
28use std::convert::TryFrom;
Jiyong Park851f68a2021-05-11 21:41:25 +090029use std::ffi::{CStr, CString};
Jiyong Park331d1ea2021-05-10 11:01:23 +090030use std::fs::{File, OpenOptions};
31use std::io;
32use std::io::Read;
Jiyong Park331d1ea2021-05-10 11:01:23 +090033use std::os::unix::io::AsRawFd;
34use std::path::Path;
35use std::sync::Mutex;
36
37use crate::inode::{DirectoryEntry, Inode, InodeData, InodeKind, InodeTable};
38
39fn main() -> Result<()> {
40 let matches = App::new("zipfuse")
41 .arg(Arg::with_name("ZIPFILE").required(true))
42 .arg(Arg::with_name("MOUNTPOINT").required(true))
43 .get_matches();
44
45 let zip_file = matches.value_of("ZIPFILE").unwrap().as_ref();
46 let mount_point = matches.value_of("MOUNTPOINT").unwrap().as_ref();
47 run_fuse(zip_file, mount_point)?;
48 Ok(())
49}
50
51/// Runs a fuse filesystem by mounting `zip_file` on `mount_point`.
52pub fn run_fuse(zip_file: &Path, mount_point: &Path) -> Result<()> {
53 const MAX_READ: u32 = 1 << 20; // TODO(jiyong): tune this
54 const MAX_WRITE: u32 = 1 << 13; // This is a read-only filesystem
55
56 let dev_fuse = OpenOptions::new().read(true).write(true).open("/dev/fuse")?;
57
58 fuse::mount(
59 mount_point,
60 "zipfuse",
61 libc::MS_NOSUID | libc::MS_NODEV | libc::MS_RDONLY,
62 &[
63 MountOption::FD(dev_fuse.as_raw_fd()),
64 MountOption::RootMode(libc::S_IFDIR | libc::S_IXUSR | libc::S_IXGRP | libc::S_IXOTH),
65 MountOption::AllowOther,
66 MountOption::UserId(0),
67 MountOption::GroupId(0),
68 MountOption::MaxRead(MAX_READ),
69 ],
70 )?;
71 Ok(fuse::worker::start_message_loop(dev_fuse, MAX_READ, MAX_WRITE, ZipFuse::new(zip_file)?)?)
72}
73
74struct ZipFuse {
75 zip_archive: Mutex<zip::ZipArchive<File>>,
76 inode_table: InodeTable,
77 open_files: Mutex<HashMap<Handle, OpenFileBuf>>,
78 open_dirs: Mutex<HashMap<Handle, OpenDirBuf>>,
79}
80
81/// Holds the (decompressed) contents of a [`ZipFile`].
82///
83/// This buf is needed because `ZipFile` is in general not seekable due to the compression.
84///
85/// TODO(jiyong): do this only for compressed `ZipFile`s. Uncompressed (store) files don't need
86/// this; they can be directly read from `zip_archive`.
87struct OpenFileBuf {
88 open_count: u32, // multiple opens share the buf because this is a read-only filesystem
89 buf: Box<[u8]>,
90}
91
92/// Holds the directory entries in a directory opened by [`opendir`].
93struct OpenDirBuf {
94 open_count: u32,
95 buf: Box<[(CString, DirectoryEntry)]>,
96}
97
98type Handle = u64;
99
100fn ebadf() -> io::Error {
101 io::Error::from_raw_os_error(libc::EBADF)
102}
103
104fn timeout_max() -> std::time::Duration {
105 std::time::Duration::new(u64::MAX, 1_000_000_000 - 1)
106}
107
108impl ZipFuse {
109 fn new(zip_file: &Path) -> Result<ZipFuse> {
110 // TODO(jiyong): Use O_DIRECT to avoid double caching.
111 // `.custom_flags(nix::fcntl::OFlag::O_DIRECT.bits())` currently doesn't work.
112 let f = OpenOptions::new().read(true).open(zip_file)?;
113 let mut z = zip::ZipArchive::new(f)?;
114 let it = InodeTable::from_zip(&mut z)?;
115 Ok(ZipFuse {
116 zip_archive: Mutex::new(z),
117 inode_table: it,
118 open_files: Mutex::new(HashMap::new()),
119 open_dirs: Mutex::new(HashMap::new()),
120 })
121 }
122
123 fn find_inode(&self, inode: Inode) -> io::Result<&InodeData> {
124 self.inode_table.get(inode).ok_or_else(ebadf)
125 }
126
127 fn stat_from(&self, inode: Inode) -> io::Result<libc::stat64> {
128 let inode_data = self.find_inode(inode)?;
129 let mut st = unsafe { std::mem::MaybeUninit::<libc::stat64>::zeroed().assume_init() };
130 st.st_dev = 0;
131 st.st_nlink = if inode_data.is_dir() {
132 // 2 is for . and ..
133 // unwrap is safe because of the `is_dir` check.
134 2 + inode_data.get_directory().unwrap().len() as libc::nlink_t
135 } else {
136 1
137 };
138 st.st_ino = inode;
139 st.st_mode = if inode_data.is_dir() { libc::S_IFDIR } else { libc::S_IFREG };
140 st.st_mode |= inode_data.mode;
141 st.st_uid = 0;
142 st.st_gid = 0;
143 st.st_size = i64::try_from(inode_data.size).unwrap_or(i64::MAX);
144 Ok(st)
145 }
146}
147
148impl fuse::filesystem::FileSystem for ZipFuse {
149 type Inode = Inode;
150 type Handle = Handle;
151 type DirIter = DirIter;
152
153 fn init(&self, _capable: FsOptions) -> std::io::Result<FsOptions> {
154 // The default options added by the fuse crate are fine. We don't have additional options.
155 Ok(FsOptions::empty())
156 }
157
158 fn lookup(&self, _ctx: Context, parent: Self::Inode, name: &CStr) -> io::Result<Entry> {
159 let inode = self.find_inode(parent)?;
160 let directory = inode.get_directory().ok_or_else(ebadf)?;
Jiyong Park331d1ea2021-05-10 11:01:23 +0900161 let entry = directory.get(name);
162 match entry {
163 Some(e) => Ok(Entry {
164 inode: e.inode,
165 generation: 0,
166 attr: self.stat_from(e.inode)?,
167 attr_timeout: timeout_max(), // this is a read-only fs
168 entry_timeout: timeout_max(),
169 }),
170 _ => Err(io::Error::from_raw_os_error(libc::ENOENT)),
171 }
172 }
173
174 fn getattr(
175 &self,
176 _ctx: Context,
177 inode: Self::Inode,
178 _handle: Option<Self::Handle>,
179 ) -> io::Result<(libc::stat64, std::time::Duration)> {
180 let st = self.stat_from(inode)?;
181 Ok((st, timeout_max()))
182 }
183
184 fn open(
185 &self,
186 _ctx: Context,
187 inode: Self::Inode,
188 _flags: u32,
189 ) -> io::Result<(Option<Self::Handle>, fuse::filesystem::OpenOptions)> {
190 let mut open_files = self.open_files.lock().unwrap();
191 let handle = inode as Handle;
192
193 // If the file is already opened, just increase the reference counter. If not, read the
194 // entire file content to the buffer. When `read` is called, a portion of the buffer is
195 // copied to the kernel.
196 // TODO(jiyong): do this only for compressed zip files. Files that are not compressed
197 // (store) can be directly read from zip_archive. That will help reduce the memory usage.
198 if let Some(ofb) = open_files.get_mut(&handle) {
199 if ofb.open_count == 0 {
200 return Err(ebadf());
201 }
202 ofb.open_count += 1;
203 } else {
204 let inode_data = self.find_inode(inode)?;
205 let zip_index = inode_data.get_zip_index().ok_or_else(ebadf)?;
206 let mut zip_archive = self.zip_archive.lock().unwrap();
207 let mut zip_file = zip_archive.by_index(zip_index)?;
208 let mut buf = Vec::with_capacity(inode_data.size as usize);
209 zip_file.read_to_end(&mut buf)?;
210 open_files.insert(handle, OpenFileBuf { open_count: 1, buf: buf.into_boxed_slice() });
211 }
212 // Note: we don't return `DIRECT_IO` here, because then applications wouldn't be able to
213 // mmap the files.
214 Ok((Some(handle), fuse::filesystem::OpenOptions::empty()))
215 }
216
217 fn release(
218 &self,
219 _ctx: Context,
220 inode: Self::Inode,
221 _flags: u32,
222 _handle: Self::Handle,
223 _flush: bool,
224 _flock_release: bool,
225 _lock_owner: Option<u64>,
226 ) -> io::Result<()> {
227 // Releases the buffer for the `handle` when it is opened for nobody. While this is good
228 // for saving memory, this has a performance implication because we need to decompress
229 // again when the same file is opened in the future.
230 let mut open_files = self.open_files.lock().unwrap();
231 let handle = inode as Handle;
232 if let Some(ofb) = open_files.get_mut(&handle) {
233 if ofb.open_count.checked_sub(1).ok_or_else(ebadf)? == 0 {
234 open_files.remove(&handle);
235 }
236 Ok(())
237 } else {
238 Err(ebadf())
239 }
240 }
241
242 fn read<W: io::Write + ZeroCopyWriter>(
243 &self,
244 _ctx: Context,
245 _inode: Self::Inode,
246 handle: Self::Handle,
247 mut w: W,
248 size: u32,
249 offset: u64,
250 _lock_owner: Option<u64>,
251 _flags: u32,
252 ) -> io::Result<usize> {
253 let open_files = self.open_files.lock().unwrap();
254 let ofb = open_files.get(&handle).ok_or_else(ebadf)?;
255 if ofb.open_count == 0 {
256 return Err(ebadf());
257 }
258 let start = offset as usize;
259 let end = start + size as usize;
260 let end = std::cmp::min(end, ofb.buf.len());
261 let read_len = w.write(&ofb.buf[start..end])?;
262 Ok(read_len)
263 }
264
265 fn opendir(
266 &self,
267 _ctx: Context,
268 inode: Self::Inode,
269 _flags: u32,
270 ) -> io::Result<(Option<Self::Handle>, fuse::filesystem::OpenOptions)> {
271 let mut open_dirs = self.open_dirs.lock().unwrap();
272 let handle = inode as Handle;
273 if let Some(odb) = open_dirs.get_mut(&handle) {
274 if odb.open_count == 0 {
275 return Err(ebadf());
276 }
277 odb.open_count += 1;
278 } else {
279 let inode_data = self.find_inode(inode)?;
280 let directory = inode_data.get_directory().ok_or_else(ebadf)?;
281 let mut buf: Vec<(CString, DirectoryEntry)> = Vec::with_capacity(directory.len());
282 for (name, dir_entry) in directory.iter() {
283 let name = CString::new(name.as_bytes()).unwrap();
284 buf.push((name, dir_entry.clone()));
285 }
286 open_dirs.insert(handle, OpenDirBuf { open_count: 1, buf: buf.into_boxed_slice() });
287 }
288 Ok((Some(handle), fuse::filesystem::OpenOptions::empty()))
289 }
290
291 fn releasedir(
292 &self,
293 _ctx: Context,
294 inode: Self::Inode,
295 _flags: u32,
296 _handle: Self::Handle,
297 ) -> io::Result<()> {
298 let mut open_dirs = self.open_dirs.lock().unwrap();
299 let handle = inode as Handle;
300 if let Some(odb) = open_dirs.get_mut(&handle) {
301 if odb.open_count.checked_sub(1).ok_or_else(ebadf)? == 0 {
302 open_dirs.remove(&handle);
303 }
304 Ok(())
305 } else {
306 Err(ebadf())
307 }
308 }
309
310 fn readdir(
311 &self,
312 _ctx: Context,
313 inode: Self::Inode,
314 _handle: Self::Handle,
315 size: u32,
316 offset: u64,
317 ) -> io::Result<Self::DirIter> {
318 let open_dirs = self.open_dirs.lock().unwrap();
319 let handle = inode as Handle;
320 let odb = open_dirs.get(&handle).ok_or_else(ebadf)?;
321 if odb.open_count == 0 {
322 return Err(ebadf());
323 }
324 let buf = &odb.buf;
325 let start = offset as usize;
326 let end = start + size as usize;
327 let end = std::cmp::min(end, buf.len());
328 let mut new_buf = Vec::with_capacity(end - start);
329 // The portion of `buf` is *copied* to the iterator. This is not ideal, but inevitable
330 // because the `name` field in `fuse::filesystem::DirEntry` is `&CStr` not `CString`.
331 new_buf.extend_from_slice(&buf[start..end]);
332 Ok(DirIter { inner: new_buf, offset, cur: 0 })
333 }
334}
335
336struct DirIter {
337 inner: Vec<(CString, DirectoryEntry)>,
338 offset: u64, // the offset where this iterator begins. `next` doesn't change this.
339 cur: usize, // the current index in `inner`. `next` advances this.
340}
341
342impl fuse::filesystem::DirectoryIterator for DirIter {
343 fn next(&mut self) -> Option<fuse::filesystem::DirEntry> {
344 if self.cur >= self.inner.len() {
345 return None;
346 }
347
348 let (name, entry) = &self.inner[self.cur];
349 self.cur += 1;
350 Some(fuse::filesystem::DirEntry {
351 ino: entry.inode as libc::ino64_t,
352 offset: self.offset + self.cur as u64,
353 type_: match entry.kind {
354 InodeKind::Directory => libc::DT_DIR.into(),
355 InodeKind::File => libc::DT_REG.into(),
356 },
357 name,
358 })
359 }
360}
361
362#[cfg(test)]
363mod tests {
364 use anyhow::{bail, Result};
365 use nix::sys::statfs::{statfs, FsType};
366 use std::collections::HashSet;
367 use std::fs;
368 use std::fs::File;
369 use std::io::Write;
370 use std::path::{Path, PathBuf};
371 use std::time::{Duration, Instant};
372 use zip::write::FileOptions;
373
374 #[cfg(not(target_os = "android"))]
375 fn start_fuse(zip_path: &Path, mnt_path: &Path) {
376 let zip_path = PathBuf::from(zip_path);
377 let mnt_path = PathBuf::from(mnt_path);
378 std::thread::spawn(move || {
379 crate::run_fuse(&zip_path, &mnt_path).unwrap();
380 });
381 }
382
383 #[cfg(target_os = "android")]
384 fn start_fuse(zip_path: &Path, mnt_path: &Path) {
385 // Note: for some unknown reason, running a thread to serve fuse doesn't work on Android.
386 // Explicitly spawn a zipfuse process instead.
387 // TODO(jiyong): fix this
388 assert!(std::process::Command::new("sh")
389 .arg("-c")
390 .arg(format!("/data/local/tmp/zipfuse {} {}", zip_path.display(), mnt_path.display()))
391 .spawn()
392 .is_ok());
393 }
394
395 fn wait_for_mount(mount_path: &Path) -> Result<()> {
396 let start_time = Instant::now();
397 const POLL_INTERVAL: Duration = Duration::from_millis(50);
398 const TIMEOUT: Duration = Duration::from_secs(10);
399 const FUSE_SUPER_MAGIC: FsType = FsType(0x65735546);
400 loop {
401 if statfs(mount_path)?.filesystem_type() == FUSE_SUPER_MAGIC {
402 break;
403 }
404
405 if start_time.elapsed() > TIMEOUT {
406 bail!("Time out mounting zipfuse");
407 }
408 std::thread::sleep(POLL_INTERVAL);
409 }
410 Ok(())
411 }
412
413 // Creates a zip file, adds some files to the zip file, mounts it using zipfuse, runs the check
414 // routine, and finally unmounts.
415 fn run_test(add: fn(&mut zip::ZipWriter<File>), check: fn(&std::path::Path)) {
416 // Create an empty zip file
417 let test_dir = tempfile::TempDir::new().unwrap();
418 let zip_path = test_dir.path().join("test.zip");
419 let zip = File::create(&zip_path);
420 assert!(zip.is_ok());
421 let mut zip = zip::ZipWriter::new(zip.unwrap());
422
423 // Let test users add files/dirs to the zip file
424 add(&mut zip);
425 assert!(zip.finish().is_ok());
426 drop(zip);
427
428 // Mount the zip file on the "mnt" dir using zipfuse.
429 let mnt_path = test_dir.path().join("mnt");
430 assert!(fs::create_dir(&mnt_path).is_ok());
431
432 start_fuse(&zip_path, &mnt_path);
433
434 let mnt_path = test_dir.path().join("mnt");
435 // Give some time for the fuse to boot up
436 assert!(wait_for_mount(&mnt_path).is_ok());
437 // Run the check routine, and do the clean up.
438 check(&mnt_path);
439 assert!(nix::mount::umount2(&mnt_path, nix::mount::MntFlags::empty()).is_ok());
440 }
441
442 fn check_file(root: &Path, file: &str, content: &[u8]) {
443 let path = root.join(file);
444 assert!(path.exists());
445
446 let metadata = fs::metadata(&path);
447 assert!(metadata.is_ok());
448
449 let metadata = metadata.unwrap();
450 assert!(metadata.is_file());
451 assert_eq!(content.len(), metadata.len() as usize);
452
453 let read_data = fs::read(&path);
454 assert!(read_data.is_ok());
455 assert_eq!(content, read_data.unwrap().as_slice());
456 }
457
458 fn check_dir(root: &Path, dir: &str, files: &[&str], dirs: &[&str]) {
459 let dir_path = root.join(dir);
460 assert!(dir_path.exists());
461
462 let metadata = fs::metadata(&dir_path);
463 assert!(metadata.is_ok());
464
465 let metadata = metadata.unwrap();
466 assert!(metadata.is_dir());
467
468 let iter = fs::read_dir(&dir_path);
469 assert!(iter.is_ok());
470
471 let iter = iter.unwrap();
472 let mut actual_files = HashSet::new();
473 let mut actual_dirs = HashSet::new();
474 for de in iter {
475 let entry = de.unwrap();
476 let path = entry.path();
477 if path.is_dir() {
478 actual_dirs.insert(path.strip_prefix(&dir_path).unwrap().to_path_buf());
479 } else {
480 actual_files.insert(path.strip_prefix(&dir_path).unwrap().to_path_buf());
481 }
482 }
483 let expected_files: HashSet<PathBuf> = files.iter().map(|&s| PathBuf::from(s)).collect();
484 let expected_dirs: HashSet<PathBuf> = dirs.iter().map(|&s| PathBuf::from(s)).collect();
485
486 assert_eq!(expected_files, actual_files);
487 assert_eq!(expected_dirs, actual_dirs);
488 }
489
490 #[test]
491 fn empty() {
492 run_test(
493 |_| {},
494 |root| {
495 check_dir(root, "", &[], &[]);
496 },
497 );
498 }
499
500 #[test]
501 fn single_file() {
502 run_test(
503 |zip| {
504 zip.start_file("foo", FileOptions::default()).unwrap();
505 zip.write_all(b"0123456789").unwrap();
506 },
507 |root| {
508 check_dir(root, "", &["foo"], &[]);
509 check_file(root, "foo", b"0123456789");
510 },
511 );
512 }
513
514 #[test]
515 fn single_dir() {
516 run_test(
517 |zip| {
518 zip.add_directory("dir", FileOptions::default()).unwrap();
519 },
520 |root| {
521 check_dir(root, "", &[], &["dir"]);
522 check_dir(root, "dir", &[], &[]);
523 },
524 );
525 }
526
527 #[test]
528 fn complex_hierarchy() {
529 // root/
530 // a/
531 // b1/
532 // b2/
533 // c1 (file)
534 // c2/
535 // d1 (file)
536 // d2 (file)
537 // d3 (file)
538 // x/
539 // y1 (file)
540 // y2 (file)
541 // y3/
542 //
543 // foo (file)
544 // bar (file)
545 run_test(
546 |zip| {
547 let opt = FileOptions::default();
548 zip.add_directory("a/b1", opt).unwrap();
549
550 zip.start_file("a/b2/c1", opt).unwrap();
551
552 zip.start_file("a/b2/c2/d1", opt).unwrap();
553 zip.start_file("a/b2/c2/d2", opt).unwrap();
554 zip.start_file("a/b2/c2/d3", opt).unwrap();
555
556 zip.start_file("x/y1", opt).unwrap();
557 zip.start_file("x/y2", opt).unwrap();
558 zip.add_directory("x/y3", opt).unwrap();
559
560 zip.start_file("foo", opt).unwrap();
561 zip.start_file("bar", opt).unwrap();
562 },
563 |root| {
564 check_dir(root, "", &["foo", "bar"], &["a", "x"]);
565 check_dir(root, "a", &[], &["b1", "b2"]);
566 check_dir(root, "a/b1", &[], &[]);
567 check_dir(root, "a/b2", &["c1"], &["c2"]);
568 check_dir(root, "a/b2/c2", &["d1", "d2", "d3"], &[]);
569 check_dir(root, "x", &["y1", "y2"], &["y3"]);
570 check_dir(root, "x/y3", &[], &[]);
571 check_file(root, "a/b2/c1", &[]);
572 check_file(root, "a/b2/c2/d1", &[]);
573 check_file(root, "a/b2/c2/d2", &[]);
574 check_file(root, "a/b2/c2/d3", &[]);
575 check_file(root, "x/y1", &[]);
576 check_file(root, "x/y2", &[]);
577 check_file(root, "foo", &[]);
578 check_file(root, "bar", &[]);
579 },
580 );
581 }
582
583 #[test]
584 fn large_file() {
585 run_test(
586 |zip| {
587 let data = vec![10; 2 << 20];
588 zip.start_file("foo", FileOptions::default()).unwrap();
589 zip.write_all(&data).unwrap();
590 },
591 |root| {
592 let data = vec![10; 2 << 20];
593 check_file(root, "foo", &data);
594 },
595 );
596 }
597}