blob: d6710d61784a43fc871b1dbbe8f54e26447ac8d0 [file] [log] [blame]
Jiyong Park331d1ea2021-05-10 11:01:23 +09001/*
2 * Copyright (C) 2021 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//! `zipfuse` is a FUSE filesystem for zip archives. It provides transparent access to the files
18//! in a zip archive. This filesystem does not supporting writing files back to the zip archive.
19//! The filesystem has to be mounted read only.
20
21mod inode;
22
23use anyhow::Result;
24use clap::{App, Arg};
25use fuse::filesystem::*;
26use fuse::mount::*;
27use std::collections::HashMap;
28use std::convert::TryFrom;
29use std::ffi::{CStr, CString, OsStr};
30use std::fs::{File, OpenOptions};
31use std::io;
32use std::io::Read;
33use std::os::unix::ffi::OsStrExt;
34use std::os::unix::io::AsRawFd;
35use std::path::Path;
36use std::sync::Mutex;
37
38use crate::inode::{DirectoryEntry, Inode, InodeData, InodeKind, InodeTable};
39
40fn main() -> Result<()> {
41 let matches = App::new("zipfuse")
42 .arg(Arg::with_name("ZIPFILE").required(true))
43 .arg(Arg::with_name("MOUNTPOINT").required(true))
44 .get_matches();
45
46 let zip_file = matches.value_of("ZIPFILE").unwrap().as_ref();
47 let mount_point = matches.value_of("MOUNTPOINT").unwrap().as_ref();
48 run_fuse(zip_file, mount_point)?;
49 Ok(())
50}
51
52/// Runs a fuse filesystem by mounting `zip_file` on `mount_point`.
53pub fn run_fuse(zip_file: &Path, mount_point: &Path) -> Result<()> {
54 const MAX_READ: u32 = 1 << 20; // TODO(jiyong): tune this
55 const MAX_WRITE: u32 = 1 << 13; // This is a read-only filesystem
56
57 let dev_fuse = OpenOptions::new().read(true).write(true).open("/dev/fuse")?;
58
59 fuse::mount(
60 mount_point,
61 "zipfuse",
62 libc::MS_NOSUID | libc::MS_NODEV | libc::MS_RDONLY,
63 &[
64 MountOption::FD(dev_fuse.as_raw_fd()),
65 MountOption::RootMode(libc::S_IFDIR | libc::S_IXUSR | libc::S_IXGRP | libc::S_IXOTH),
66 MountOption::AllowOther,
67 MountOption::UserId(0),
68 MountOption::GroupId(0),
69 MountOption::MaxRead(MAX_READ),
70 ],
71 )?;
72 Ok(fuse::worker::start_message_loop(dev_fuse, MAX_READ, MAX_WRITE, ZipFuse::new(zip_file)?)?)
73}
74
75struct ZipFuse {
76 zip_archive: Mutex<zip::ZipArchive<File>>,
77 inode_table: InodeTable,
78 open_files: Mutex<HashMap<Handle, OpenFileBuf>>,
79 open_dirs: Mutex<HashMap<Handle, OpenDirBuf>>,
80}
81
82/// Holds the (decompressed) contents of a [`ZipFile`].
83///
84/// This buf is needed because `ZipFile` is in general not seekable due to the compression.
85///
86/// TODO(jiyong): do this only for compressed `ZipFile`s. Uncompressed (store) files don't need
87/// this; they can be directly read from `zip_archive`.
88struct OpenFileBuf {
89 open_count: u32, // multiple opens share the buf because this is a read-only filesystem
90 buf: Box<[u8]>,
91}
92
93/// Holds the directory entries in a directory opened by [`opendir`].
94struct OpenDirBuf {
95 open_count: u32,
96 buf: Box<[(CString, DirectoryEntry)]>,
97}
98
99type Handle = u64;
100
101fn ebadf() -> io::Error {
102 io::Error::from_raw_os_error(libc::EBADF)
103}
104
105fn timeout_max() -> std::time::Duration {
106 std::time::Duration::new(u64::MAX, 1_000_000_000 - 1)
107}
108
109impl ZipFuse {
110 fn new(zip_file: &Path) -> Result<ZipFuse> {
111 // TODO(jiyong): Use O_DIRECT to avoid double caching.
112 // `.custom_flags(nix::fcntl::OFlag::O_DIRECT.bits())` currently doesn't work.
113 let f = OpenOptions::new().read(true).open(zip_file)?;
114 let mut z = zip::ZipArchive::new(f)?;
115 let it = InodeTable::from_zip(&mut z)?;
116 Ok(ZipFuse {
117 zip_archive: Mutex::new(z),
118 inode_table: it,
119 open_files: Mutex::new(HashMap::new()),
120 open_dirs: Mutex::new(HashMap::new()),
121 })
122 }
123
124 fn find_inode(&self, inode: Inode) -> io::Result<&InodeData> {
125 self.inode_table.get(inode).ok_or_else(ebadf)
126 }
127
128 fn stat_from(&self, inode: Inode) -> io::Result<libc::stat64> {
129 let inode_data = self.find_inode(inode)?;
130 let mut st = unsafe { std::mem::MaybeUninit::<libc::stat64>::zeroed().assume_init() };
131 st.st_dev = 0;
132 st.st_nlink = if inode_data.is_dir() {
133 // 2 is for . and ..
134 // unwrap is safe because of the `is_dir` check.
135 2 + inode_data.get_directory().unwrap().len() as libc::nlink_t
136 } else {
137 1
138 };
139 st.st_ino = inode;
140 st.st_mode = if inode_data.is_dir() { libc::S_IFDIR } else { libc::S_IFREG };
141 st.st_mode |= inode_data.mode;
142 st.st_uid = 0;
143 st.st_gid = 0;
144 st.st_size = i64::try_from(inode_data.size).unwrap_or(i64::MAX);
145 Ok(st)
146 }
147}
148
149impl fuse::filesystem::FileSystem for ZipFuse {
150 type Inode = Inode;
151 type Handle = Handle;
152 type DirIter = DirIter;
153
154 fn init(&self, _capable: FsOptions) -> std::io::Result<FsOptions> {
155 // The default options added by the fuse crate are fine. We don't have additional options.
156 Ok(FsOptions::empty())
157 }
158
159 fn lookup(&self, _ctx: Context, parent: Self::Inode, name: &CStr) -> io::Result<Entry> {
160 let inode = self.find_inode(parent)?;
161 let directory = inode.get_directory().ok_or_else(ebadf)?;
162 let name = OsStr::from_bytes(name.to_bytes());
163 let entry = directory.get(name);
164 match entry {
165 Some(e) => Ok(Entry {
166 inode: e.inode,
167 generation: 0,
168 attr: self.stat_from(e.inode)?,
169 attr_timeout: timeout_max(), // this is a read-only fs
170 entry_timeout: timeout_max(),
171 }),
172 _ => Err(io::Error::from_raw_os_error(libc::ENOENT)),
173 }
174 }
175
176 fn getattr(
177 &self,
178 _ctx: Context,
179 inode: Self::Inode,
180 _handle: Option<Self::Handle>,
181 ) -> io::Result<(libc::stat64, std::time::Duration)> {
182 let st = self.stat_from(inode)?;
183 Ok((st, timeout_max()))
184 }
185
186 fn open(
187 &self,
188 _ctx: Context,
189 inode: Self::Inode,
190 _flags: u32,
191 ) -> io::Result<(Option<Self::Handle>, fuse::filesystem::OpenOptions)> {
192 let mut open_files = self.open_files.lock().unwrap();
193 let handle = inode as Handle;
194
195 // If the file is already opened, just increase the reference counter. If not, read the
196 // entire file content to the buffer. When `read` is called, a portion of the buffer is
197 // copied to the kernel.
198 // TODO(jiyong): do this only for compressed zip files. Files that are not compressed
199 // (store) can be directly read from zip_archive. That will help reduce the memory usage.
200 if let Some(ofb) = open_files.get_mut(&handle) {
201 if ofb.open_count == 0 {
202 return Err(ebadf());
203 }
204 ofb.open_count += 1;
205 } else {
206 let inode_data = self.find_inode(inode)?;
207 let zip_index = inode_data.get_zip_index().ok_or_else(ebadf)?;
208 let mut zip_archive = self.zip_archive.lock().unwrap();
209 let mut zip_file = zip_archive.by_index(zip_index)?;
210 let mut buf = Vec::with_capacity(inode_data.size as usize);
211 zip_file.read_to_end(&mut buf)?;
212 open_files.insert(handle, OpenFileBuf { open_count: 1, buf: buf.into_boxed_slice() });
213 }
214 // Note: we don't return `DIRECT_IO` here, because then applications wouldn't be able to
215 // mmap the files.
216 Ok((Some(handle), fuse::filesystem::OpenOptions::empty()))
217 }
218
219 fn release(
220 &self,
221 _ctx: Context,
222 inode: Self::Inode,
223 _flags: u32,
224 _handle: Self::Handle,
225 _flush: bool,
226 _flock_release: bool,
227 _lock_owner: Option<u64>,
228 ) -> io::Result<()> {
229 // Releases the buffer for the `handle` when it is opened for nobody. While this is good
230 // for saving memory, this has a performance implication because we need to decompress
231 // again when the same file is opened in the future.
232 let mut open_files = self.open_files.lock().unwrap();
233 let handle = inode as Handle;
234 if let Some(ofb) = open_files.get_mut(&handle) {
235 if ofb.open_count.checked_sub(1).ok_or_else(ebadf)? == 0 {
236 open_files.remove(&handle);
237 }
238 Ok(())
239 } else {
240 Err(ebadf())
241 }
242 }
243
244 fn read<W: io::Write + ZeroCopyWriter>(
245 &self,
246 _ctx: Context,
247 _inode: Self::Inode,
248 handle: Self::Handle,
249 mut w: W,
250 size: u32,
251 offset: u64,
252 _lock_owner: Option<u64>,
253 _flags: u32,
254 ) -> io::Result<usize> {
255 let open_files = self.open_files.lock().unwrap();
256 let ofb = open_files.get(&handle).ok_or_else(ebadf)?;
257 if ofb.open_count == 0 {
258 return Err(ebadf());
259 }
260 let start = offset as usize;
261 let end = start + size as usize;
262 let end = std::cmp::min(end, ofb.buf.len());
263 let read_len = w.write(&ofb.buf[start..end])?;
264 Ok(read_len)
265 }
266
267 fn opendir(
268 &self,
269 _ctx: Context,
270 inode: Self::Inode,
271 _flags: u32,
272 ) -> io::Result<(Option<Self::Handle>, fuse::filesystem::OpenOptions)> {
273 let mut open_dirs = self.open_dirs.lock().unwrap();
274 let handle = inode as Handle;
275 if let Some(odb) = open_dirs.get_mut(&handle) {
276 if odb.open_count == 0 {
277 return Err(ebadf());
278 }
279 odb.open_count += 1;
280 } else {
281 let inode_data = self.find_inode(inode)?;
282 let directory = inode_data.get_directory().ok_or_else(ebadf)?;
283 let mut buf: Vec<(CString, DirectoryEntry)> = Vec::with_capacity(directory.len());
284 for (name, dir_entry) in directory.iter() {
285 let name = CString::new(name.as_bytes()).unwrap();
286 buf.push((name, dir_entry.clone()));
287 }
288 open_dirs.insert(handle, OpenDirBuf { open_count: 1, buf: buf.into_boxed_slice() });
289 }
290 Ok((Some(handle), fuse::filesystem::OpenOptions::empty()))
291 }
292
293 fn releasedir(
294 &self,
295 _ctx: Context,
296 inode: Self::Inode,
297 _flags: u32,
298 _handle: Self::Handle,
299 ) -> io::Result<()> {
300 let mut open_dirs = self.open_dirs.lock().unwrap();
301 let handle = inode as Handle;
302 if let Some(odb) = open_dirs.get_mut(&handle) {
303 if odb.open_count.checked_sub(1).ok_or_else(ebadf)? == 0 {
304 open_dirs.remove(&handle);
305 }
306 Ok(())
307 } else {
308 Err(ebadf())
309 }
310 }
311
312 fn readdir(
313 &self,
314 _ctx: Context,
315 inode: Self::Inode,
316 _handle: Self::Handle,
317 size: u32,
318 offset: u64,
319 ) -> io::Result<Self::DirIter> {
320 let open_dirs = self.open_dirs.lock().unwrap();
321 let handle = inode as Handle;
322 let odb = open_dirs.get(&handle).ok_or_else(ebadf)?;
323 if odb.open_count == 0 {
324 return Err(ebadf());
325 }
326 let buf = &odb.buf;
327 let start = offset as usize;
328 let end = start + size as usize;
329 let end = std::cmp::min(end, buf.len());
330 let mut new_buf = Vec::with_capacity(end - start);
331 // The portion of `buf` is *copied* to the iterator. This is not ideal, but inevitable
332 // because the `name` field in `fuse::filesystem::DirEntry` is `&CStr` not `CString`.
333 new_buf.extend_from_slice(&buf[start..end]);
334 Ok(DirIter { inner: new_buf, offset, cur: 0 })
335 }
336}
337
338struct DirIter {
339 inner: Vec<(CString, DirectoryEntry)>,
340 offset: u64, // the offset where this iterator begins. `next` doesn't change this.
341 cur: usize, // the current index in `inner`. `next` advances this.
342}
343
344impl fuse::filesystem::DirectoryIterator for DirIter {
345 fn next(&mut self) -> Option<fuse::filesystem::DirEntry> {
346 if self.cur >= self.inner.len() {
347 return None;
348 }
349
350 let (name, entry) = &self.inner[self.cur];
351 self.cur += 1;
352 Some(fuse::filesystem::DirEntry {
353 ino: entry.inode as libc::ino64_t,
354 offset: self.offset + self.cur as u64,
355 type_: match entry.kind {
356 InodeKind::Directory => libc::DT_DIR.into(),
357 InodeKind::File => libc::DT_REG.into(),
358 },
359 name,
360 })
361 }
362}
363
364#[cfg(test)]
365mod tests {
366 use anyhow::{bail, Result};
367 use nix::sys::statfs::{statfs, FsType};
368 use std::collections::HashSet;
369 use std::fs;
370 use std::fs::File;
371 use std::io::Write;
372 use std::path::{Path, PathBuf};
373 use std::time::{Duration, Instant};
374 use zip::write::FileOptions;
375
376 #[cfg(not(target_os = "android"))]
377 fn start_fuse(zip_path: &Path, mnt_path: &Path) {
378 let zip_path = PathBuf::from(zip_path);
379 let mnt_path = PathBuf::from(mnt_path);
380 std::thread::spawn(move || {
381 crate::run_fuse(&zip_path, &mnt_path).unwrap();
382 });
383 }
384
385 #[cfg(target_os = "android")]
386 fn start_fuse(zip_path: &Path, mnt_path: &Path) {
387 // Note: for some unknown reason, running a thread to serve fuse doesn't work on Android.
388 // Explicitly spawn a zipfuse process instead.
389 // TODO(jiyong): fix this
390 assert!(std::process::Command::new("sh")
391 .arg("-c")
392 .arg(format!("/data/local/tmp/zipfuse {} {}", zip_path.display(), mnt_path.display()))
393 .spawn()
394 .is_ok());
395 }
396
397 fn wait_for_mount(mount_path: &Path) -> Result<()> {
398 let start_time = Instant::now();
399 const POLL_INTERVAL: Duration = Duration::from_millis(50);
400 const TIMEOUT: Duration = Duration::from_secs(10);
401 const FUSE_SUPER_MAGIC: FsType = FsType(0x65735546);
402 loop {
403 if statfs(mount_path)?.filesystem_type() == FUSE_SUPER_MAGIC {
404 break;
405 }
406
407 if start_time.elapsed() > TIMEOUT {
408 bail!("Time out mounting zipfuse");
409 }
410 std::thread::sleep(POLL_INTERVAL);
411 }
412 Ok(())
413 }
414
415 // Creates a zip file, adds some files to the zip file, mounts it using zipfuse, runs the check
416 // routine, and finally unmounts.
417 fn run_test(add: fn(&mut zip::ZipWriter<File>), check: fn(&std::path::Path)) {
418 // Create an empty zip file
419 let test_dir = tempfile::TempDir::new().unwrap();
420 let zip_path = test_dir.path().join("test.zip");
421 let zip = File::create(&zip_path);
422 assert!(zip.is_ok());
423 let mut zip = zip::ZipWriter::new(zip.unwrap());
424
425 // Let test users add files/dirs to the zip file
426 add(&mut zip);
427 assert!(zip.finish().is_ok());
428 drop(zip);
429
430 // Mount the zip file on the "mnt" dir using zipfuse.
431 let mnt_path = test_dir.path().join("mnt");
432 assert!(fs::create_dir(&mnt_path).is_ok());
433
434 start_fuse(&zip_path, &mnt_path);
435
436 let mnt_path = test_dir.path().join("mnt");
437 // Give some time for the fuse to boot up
438 assert!(wait_for_mount(&mnt_path).is_ok());
439 // Run the check routine, and do the clean up.
440 check(&mnt_path);
441 assert!(nix::mount::umount2(&mnt_path, nix::mount::MntFlags::empty()).is_ok());
442 }
443
444 fn check_file(root: &Path, file: &str, content: &[u8]) {
445 let path = root.join(file);
446 assert!(path.exists());
447
448 let metadata = fs::metadata(&path);
449 assert!(metadata.is_ok());
450
451 let metadata = metadata.unwrap();
452 assert!(metadata.is_file());
453 assert_eq!(content.len(), metadata.len() as usize);
454
455 let read_data = fs::read(&path);
456 assert!(read_data.is_ok());
457 assert_eq!(content, read_data.unwrap().as_slice());
458 }
459
460 fn check_dir(root: &Path, dir: &str, files: &[&str], dirs: &[&str]) {
461 let dir_path = root.join(dir);
462 assert!(dir_path.exists());
463
464 let metadata = fs::metadata(&dir_path);
465 assert!(metadata.is_ok());
466
467 let metadata = metadata.unwrap();
468 assert!(metadata.is_dir());
469
470 let iter = fs::read_dir(&dir_path);
471 assert!(iter.is_ok());
472
473 let iter = iter.unwrap();
474 let mut actual_files = HashSet::new();
475 let mut actual_dirs = HashSet::new();
476 for de in iter {
477 let entry = de.unwrap();
478 let path = entry.path();
479 if path.is_dir() {
480 actual_dirs.insert(path.strip_prefix(&dir_path).unwrap().to_path_buf());
481 } else {
482 actual_files.insert(path.strip_prefix(&dir_path).unwrap().to_path_buf());
483 }
484 }
485 let expected_files: HashSet<PathBuf> = files.iter().map(|&s| PathBuf::from(s)).collect();
486 let expected_dirs: HashSet<PathBuf> = dirs.iter().map(|&s| PathBuf::from(s)).collect();
487
488 assert_eq!(expected_files, actual_files);
489 assert_eq!(expected_dirs, actual_dirs);
490 }
491
492 #[test]
493 fn empty() {
494 run_test(
495 |_| {},
496 |root| {
497 check_dir(root, "", &[], &[]);
498 },
499 );
500 }
501
502 #[test]
503 fn single_file() {
504 run_test(
505 |zip| {
506 zip.start_file("foo", FileOptions::default()).unwrap();
507 zip.write_all(b"0123456789").unwrap();
508 },
509 |root| {
510 check_dir(root, "", &["foo"], &[]);
511 check_file(root, "foo", b"0123456789");
512 },
513 );
514 }
515
516 #[test]
517 fn single_dir() {
518 run_test(
519 |zip| {
520 zip.add_directory("dir", FileOptions::default()).unwrap();
521 },
522 |root| {
523 check_dir(root, "", &[], &["dir"]);
524 check_dir(root, "dir", &[], &[]);
525 },
526 );
527 }
528
529 #[test]
530 fn complex_hierarchy() {
531 // root/
532 // a/
533 // b1/
534 // b2/
535 // c1 (file)
536 // c2/
537 // d1 (file)
538 // d2 (file)
539 // d3 (file)
540 // x/
541 // y1 (file)
542 // y2 (file)
543 // y3/
544 //
545 // foo (file)
546 // bar (file)
547 run_test(
548 |zip| {
549 let opt = FileOptions::default();
550 zip.add_directory("a/b1", opt).unwrap();
551
552 zip.start_file("a/b2/c1", opt).unwrap();
553
554 zip.start_file("a/b2/c2/d1", opt).unwrap();
555 zip.start_file("a/b2/c2/d2", opt).unwrap();
556 zip.start_file("a/b2/c2/d3", opt).unwrap();
557
558 zip.start_file("x/y1", opt).unwrap();
559 zip.start_file("x/y2", opt).unwrap();
560 zip.add_directory("x/y3", opt).unwrap();
561
562 zip.start_file("foo", opt).unwrap();
563 zip.start_file("bar", opt).unwrap();
564 },
565 |root| {
566 check_dir(root, "", &["foo", "bar"], &["a", "x"]);
567 check_dir(root, "a", &[], &["b1", "b2"]);
568 check_dir(root, "a/b1", &[], &[]);
569 check_dir(root, "a/b2", &["c1"], &["c2"]);
570 check_dir(root, "a/b2/c2", &["d1", "d2", "d3"], &[]);
571 check_dir(root, "x", &["y1", "y2"], &["y3"]);
572 check_dir(root, "x/y3", &[], &[]);
573 check_file(root, "a/b2/c1", &[]);
574 check_file(root, "a/b2/c2/d1", &[]);
575 check_file(root, "a/b2/c2/d2", &[]);
576 check_file(root, "a/b2/c2/d3", &[]);
577 check_file(root, "x/y1", &[]);
578 check_file(root, "x/y2", &[]);
579 check_file(root, "foo", &[]);
580 check_file(root, "bar", &[]);
581 },
582 );
583 }
584
585 #[test]
586 fn large_file() {
587 run_test(
588 |zip| {
589 let data = vec![10; 2 << 20];
590 zip.start_file("foo", FileOptions::default()).unwrap();
591 zip.write_all(&data).unwrap();
592 },
593 |root| {
594 let data = vec![10; 2 << 20];
595 check_file(root, "foo", &data);
596 },
597 );
598 }
599}