blob: 56d7cabaf681201a2d5c99c1d1ca48987b54ce88 [file] [log] [blame]
Jiyong Park331d1ea2021-05-10 11:01:23 +09001/*
2 * Copyright (C) 2021 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//! `zipfuse` is a FUSE filesystem for zip archives. It provides transparent access to the files
18//! in a zip archive. This filesystem does not supporting writing files back to the zip archive.
19//! The filesystem has to be mounted read only.
20
21mod inode;
22
23use anyhow::Result;
24use clap::{App, Arg};
25use fuse::filesystem::*;
26use fuse::mount::*;
27use std::collections::HashMap;
28use std::convert::TryFrom;
Jiyong Park851f68a2021-05-11 21:41:25 +090029use std::ffi::{CStr, CString};
Jiyong Park331d1ea2021-05-10 11:01:23 +090030use std::fs::{File, OpenOptions};
31use std::io;
32use std::io::Read;
Jiyong Park331d1ea2021-05-10 11:01:23 +090033use std::os::unix::io::AsRawFd;
34use std::path::Path;
35use std::sync::Mutex;
36
37use crate::inode::{DirectoryEntry, Inode, InodeData, InodeKind, InodeTable};
38
39fn main() -> Result<()> {
40 let matches = App::new("zipfuse")
41 .arg(Arg::with_name("ZIPFILE").required(true))
42 .arg(Arg::with_name("MOUNTPOINT").required(true))
43 .get_matches();
44
45 let zip_file = matches.value_of("ZIPFILE").unwrap().as_ref();
46 let mount_point = matches.value_of("MOUNTPOINT").unwrap().as_ref();
47 run_fuse(zip_file, mount_point)?;
48 Ok(())
49}
50
51/// Runs a fuse filesystem by mounting `zip_file` on `mount_point`.
52pub fn run_fuse(zip_file: &Path, mount_point: &Path) -> Result<()> {
53 const MAX_READ: u32 = 1 << 20; // TODO(jiyong): tune this
54 const MAX_WRITE: u32 = 1 << 13; // This is a read-only filesystem
55
56 let dev_fuse = OpenOptions::new().read(true).write(true).open("/dev/fuse")?;
57
58 fuse::mount(
59 mount_point,
60 "zipfuse",
61 libc::MS_NOSUID | libc::MS_NODEV | libc::MS_RDONLY,
62 &[
63 MountOption::FD(dev_fuse.as_raw_fd()),
64 MountOption::RootMode(libc::S_IFDIR | libc::S_IXUSR | libc::S_IXGRP | libc::S_IXOTH),
65 MountOption::AllowOther,
66 MountOption::UserId(0),
67 MountOption::GroupId(0),
68 MountOption::MaxRead(MAX_READ),
69 ],
70 )?;
71 Ok(fuse::worker::start_message_loop(dev_fuse, MAX_READ, MAX_WRITE, ZipFuse::new(zip_file)?)?)
72}
73
74struct ZipFuse {
75 zip_archive: Mutex<zip::ZipArchive<File>>,
76 inode_table: InodeTable,
77 open_files: Mutex<HashMap<Handle, OpenFileBuf>>,
78 open_dirs: Mutex<HashMap<Handle, OpenDirBuf>>,
79}
80
81/// Holds the (decompressed) contents of a [`ZipFile`].
82///
83/// This buf is needed because `ZipFile` is in general not seekable due to the compression.
84///
85/// TODO(jiyong): do this only for compressed `ZipFile`s. Uncompressed (store) files don't need
86/// this; they can be directly read from `zip_archive`.
87struct OpenFileBuf {
88 open_count: u32, // multiple opens share the buf because this is a read-only filesystem
89 buf: Box<[u8]>,
90}
91
92/// Holds the directory entries in a directory opened by [`opendir`].
93struct OpenDirBuf {
94 open_count: u32,
95 buf: Box<[(CString, DirectoryEntry)]>,
96}
97
98type Handle = u64;
99
100fn ebadf() -> io::Error {
101 io::Error::from_raw_os_error(libc::EBADF)
102}
103
104fn timeout_max() -> std::time::Duration {
105 std::time::Duration::new(u64::MAX, 1_000_000_000 - 1)
106}
107
108impl ZipFuse {
109 fn new(zip_file: &Path) -> Result<ZipFuse> {
110 // TODO(jiyong): Use O_DIRECT to avoid double caching.
111 // `.custom_flags(nix::fcntl::OFlag::O_DIRECT.bits())` currently doesn't work.
112 let f = OpenOptions::new().read(true).open(zip_file)?;
113 let mut z = zip::ZipArchive::new(f)?;
114 let it = InodeTable::from_zip(&mut z)?;
115 Ok(ZipFuse {
116 zip_archive: Mutex::new(z),
117 inode_table: it,
118 open_files: Mutex::new(HashMap::new()),
119 open_dirs: Mutex::new(HashMap::new()),
120 })
121 }
122
123 fn find_inode(&self, inode: Inode) -> io::Result<&InodeData> {
124 self.inode_table.get(inode).ok_or_else(ebadf)
125 }
126
Jiyong Parkd5df9562021-05-13 00:50:23 +0900127 // TODO(jiyong) remove this. Right now this is needed to do the nlink_t to u64 conversion below
128 // on aosp_x86_64 target. That however is a useless conversion on other targets.
129 #[allow(clippy::useless_conversion)]
Jiyong Park331d1ea2021-05-10 11:01:23 +0900130 fn stat_from(&self, inode: Inode) -> io::Result<libc::stat64> {
131 let inode_data = self.find_inode(inode)?;
132 let mut st = unsafe { std::mem::MaybeUninit::<libc::stat64>::zeroed().assume_init() };
133 st.st_dev = 0;
Jiyong Parkd5df9562021-05-13 00:50:23 +0900134 st.st_nlink = if let Some(directory) = inode_data.get_directory() {
135 (2 + directory.len() as libc::nlink_t).into()
Jiyong Park331d1ea2021-05-10 11:01:23 +0900136 } else {
137 1
138 };
139 st.st_ino = inode;
140 st.st_mode = if inode_data.is_dir() { libc::S_IFDIR } else { libc::S_IFREG };
141 st.st_mode |= inode_data.mode;
142 st.st_uid = 0;
143 st.st_gid = 0;
144 st.st_size = i64::try_from(inode_data.size).unwrap_or(i64::MAX);
145 Ok(st)
146 }
147}
148
149impl fuse::filesystem::FileSystem for ZipFuse {
150 type Inode = Inode;
151 type Handle = Handle;
152 type DirIter = DirIter;
153
154 fn init(&self, _capable: FsOptions) -> std::io::Result<FsOptions> {
155 // The default options added by the fuse crate are fine. We don't have additional options.
156 Ok(FsOptions::empty())
157 }
158
159 fn lookup(&self, _ctx: Context, parent: Self::Inode, name: &CStr) -> io::Result<Entry> {
160 let inode = self.find_inode(parent)?;
161 let directory = inode.get_directory().ok_or_else(ebadf)?;
Jiyong Park331d1ea2021-05-10 11:01:23 +0900162 let entry = directory.get(name);
163 match entry {
164 Some(e) => Ok(Entry {
165 inode: e.inode,
166 generation: 0,
167 attr: self.stat_from(e.inode)?,
168 attr_timeout: timeout_max(), // this is a read-only fs
169 entry_timeout: timeout_max(),
170 }),
171 _ => Err(io::Error::from_raw_os_error(libc::ENOENT)),
172 }
173 }
174
175 fn getattr(
176 &self,
177 _ctx: Context,
178 inode: Self::Inode,
179 _handle: Option<Self::Handle>,
180 ) -> io::Result<(libc::stat64, std::time::Duration)> {
181 let st = self.stat_from(inode)?;
182 Ok((st, timeout_max()))
183 }
184
185 fn open(
186 &self,
187 _ctx: Context,
188 inode: Self::Inode,
189 _flags: u32,
190 ) -> io::Result<(Option<Self::Handle>, fuse::filesystem::OpenOptions)> {
191 let mut open_files = self.open_files.lock().unwrap();
192 let handle = inode as Handle;
193
194 // If the file is already opened, just increase the reference counter. If not, read the
195 // entire file content to the buffer. When `read` is called, a portion of the buffer is
196 // copied to the kernel.
197 // TODO(jiyong): do this only for compressed zip files. Files that are not compressed
198 // (store) can be directly read from zip_archive. That will help reduce the memory usage.
199 if let Some(ofb) = open_files.get_mut(&handle) {
200 if ofb.open_count == 0 {
201 return Err(ebadf());
202 }
203 ofb.open_count += 1;
204 } else {
205 let inode_data = self.find_inode(inode)?;
206 let zip_index = inode_data.get_zip_index().ok_or_else(ebadf)?;
207 let mut zip_archive = self.zip_archive.lock().unwrap();
208 let mut zip_file = zip_archive.by_index(zip_index)?;
209 let mut buf = Vec::with_capacity(inode_data.size as usize);
210 zip_file.read_to_end(&mut buf)?;
211 open_files.insert(handle, OpenFileBuf { open_count: 1, buf: buf.into_boxed_slice() });
212 }
213 // Note: we don't return `DIRECT_IO` here, because then applications wouldn't be able to
214 // mmap the files.
215 Ok((Some(handle), fuse::filesystem::OpenOptions::empty()))
216 }
217
218 fn release(
219 &self,
220 _ctx: Context,
221 inode: Self::Inode,
222 _flags: u32,
223 _handle: Self::Handle,
224 _flush: bool,
225 _flock_release: bool,
226 _lock_owner: Option<u64>,
227 ) -> io::Result<()> {
228 // Releases the buffer for the `handle` when it is opened for nobody. While this is good
229 // for saving memory, this has a performance implication because we need to decompress
230 // again when the same file is opened in the future.
231 let mut open_files = self.open_files.lock().unwrap();
232 let handle = inode as Handle;
233 if let Some(ofb) = open_files.get_mut(&handle) {
234 if ofb.open_count.checked_sub(1).ok_or_else(ebadf)? == 0 {
235 open_files.remove(&handle);
236 }
237 Ok(())
238 } else {
239 Err(ebadf())
240 }
241 }
242
243 fn read<W: io::Write + ZeroCopyWriter>(
244 &self,
245 _ctx: Context,
246 _inode: Self::Inode,
247 handle: Self::Handle,
248 mut w: W,
249 size: u32,
250 offset: u64,
251 _lock_owner: Option<u64>,
252 _flags: u32,
253 ) -> io::Result<usize> {
254 let open_files = self.open_files.lock().unwrap();
255 let ofb = open_files.get(&handle).ok_or_else(ebadf)?;
256 if ofb.open_count == 0 {
257 return Err(ebadf());
258 }
259 let start = offset as usize;
260 let end = start + size as usize;
261 let end = std::cmp::min(end, ofb.buf.len());
262 let read_len = w.write(&ofb.buf[start..end])?;
263 Ok(read_len)
264 }
265
266 fn opendir(
267 &self,
268 _ctx: Context,
269 inode: Self::Inode,
270 _flags: u32,
271 ) -> io::Result<(Option<Self::Handle>, fuse::filesystem::OpenOptions)> {
272 let mut open_dirs = self.open_dirs.lock().unwrap();
273 let handle = inode as Handle;
274 if let Some(odb) = open_dirs.get_mut(&handle) {
275 if odb.open_count == 0 {
276 return Err(ebadf());
277 }
278 odb.open_count += 1;
279 } else {
280 let inode_data = self.find_inode(inode)?;
281 let directory = inode_data.get_directory().ok_or_else(ebadf)?;
282 let mut buf: Vec<(CString, DirectoryEntry)> = Vec::with_capacity(directory.len());
283 for (name, dir_entry) in directory.iter() {
284 let name = CString::new(name.as_bytes()).unwrap();
285 buf.push((name, dir_entry.clone()));
286 }
287 open_dirs.insert(handle, OpenDirBuf { open_count: 1, buf: buf.into_boxed_slice() });
288 }
289 Ok((Some(handle), fuse::filesystem::OpenOptions::empty()))
290 }
291
292 fn releasedir(
293 &self,
294 _ctx: Context,
295 inode: Self::Inode,
296 _flags: u32,
297 _handle: Self::Handle,
298 ) -> io::Result<()> {
299 let mut open_dirs = self.open_dirs.lock().unwrap();
300 let handle = inode as Handle;
301 if let Some(odb) = open_dirs.get_mut(&handle) {
302 if odb.open_count.checked_sub(1).ok_or_else(ebadf)? == 0 {
303 open_dirs.remove(&handle);
304 }
305 Ok(())
306 } else {
307 Err(ebadf())
308 }
309 }
310
311 fn readdir(
312 &self,
313 _ctx: Context,
314 inode: Self::Inode,
315 _handle: Self::Handle,
316 size: u32,
317 offset: u64,
318 ) -> io::Result<Self::DirIter> {
319 let open_dirs = self.open_dirs.lock().unwrap();
320 let handle = inode as Handle;
321 let odb = open_dirs.get(&handle).ok_or_else(ebadf)?;
322 if odb.open_count == 0 {
323 return Err(ebadf());
324 }
325 let buf = &odb.buf;
326 let start = offset as usize;
327 let end = start + size as usize;
328 let end = std::cmp::min(end, buf.len());
329 let mut new_buf = Vec::with_capacity(end - start);
330 // The portion of `buf` is *copied* to the iterator. This is not ideal, but inevitable
331 // because the `name` field in `fuse::filesystem::DirEntry` is `&CStr` not `CString`.
332 new_buf.extend_from_slice(&buf[start..end]);
333 Ok(DirIter { inner: new_buf, offset, cur: 0 })
334 }
335}
336
337struct DirIter {
338 inner: Vec<(CString, DirectoryEntry)>,
339 offset: u64, // the offset where this iterator begins. `next` doesn't change this.
340 cur: usize, // the current index in `inner`. `next` advances this.
341}
342
343impl fuse::filesystem::DirectoryIterator for DirIter {
344 fn next(&mut self) -> Option<fuse::filesystem::DirEntry> {
345 if self.cur >= self.inner.len() {
346 return None;
347 }
348
349 let (name, entry) = &self.inner[self.cur];
350 self.cur += 1;
351 Some(fuse::filesystem::DirEntry {
352 ino: entry.inode as libc::ino64_t,
353 offset: self.offset + self.cur as u64,
354 type_: match entry.kind {
355 InodeKind::Directory => libc::DT_DIR.into(),
356 InodeKind::File => libc::DT_REG.into(),
357 },
358 name,
359 })
360 }
361}
362
363#[cfg(test)]
364mod tests {
365 use anyhow::{bail, Result};
366 use nix::sys::statfs::{statfs, FsType};
367 use std::collections::HashSet;
368 use std::fs;
369 use std::fs::File;
370 use std::io::Write;
371 use std::path::{Path, PathBuf};
372 use std::time::{Duration, Instant};
373 use zip::write::FileOptions;
374
375 #[cfg(not(target_os = "android"))]
376 fn start_fuse(zip_path: &Path, mnt_path: &Path) {
377 let zip_path = PathBuf::from(zip_path);
378 let mnt_path = PathBuf::from(mnt_path);
379 std::thread::spawn(move || {
380 crate::run_fuse(&zip_path, &mnt_path).unwrap();
381 });
382 }
383
384 #[cfg(target_os = "android")]
385 fn start_fuse(zip_path: &Path, mnt_path: &Path) {
386 // Note: for some unknown reason, running a thread to serve fuse doesn't work on Android.
387 // Explicitly spawn a zipfuse process instead.
388 // TODO(jiyong): fix this
389 assert!(std::process::Command::new("sh")
390 .arg("-c")
391 .arg(format!("/data/local/tmp/zipfuse {} {}", zip_path.display(), mnt_path.display()))
392 .spawn()
393 .is_ok());
394 }
395
396 fn wait_for_mount(mount_path: &Path) -> Result<()> {
397 let start_time = Instant::now();
398 const POLL_INTERVAL: Duration = Duration::from_millis(50);
399 const TIMEOUT: Duration = Duration::from_secs(10);
400 const FUSE_SUPER_MAGIC: FsType = FsType(0x65735546);
401 loop {
402 if statfs(mount_path)?.filesystem_type() == FUSE_SUPER_MAGIC {
403 break;
404 }
405
406 if start_time.elapsed() > TIMEOUT {
407 bail!("Time out mounting zipfuse");
408 }
409 std::thread::sleep(POLL_INTERVAL);
410 }
411 Ok(())
412 }
413
414 // Creates a zip file, adds some files to the zip file, mounts it using zipfuse, runs the check
415 // routine, and finally unmounts.
416 fn run_test(add: fn(&mut zip::ZipWriter<File>), check: fn(&std::path::Path)) {
417 // Create an empty zip file
418 let test_dir = tempfile::TempDir::new().unwrap();
419 let zip_path = test_dir.path().join("test.zip");
420 let zip = File::create(&zip_path);
421 assert!(zip.is_ok());
422 let mut zip = zip::ZipWriter::new(zip.unwrap());
423
424 // Let test users add files/dirs to the zip file
425 add(&mut zip);
426 assert!(zip.finish().is_ok());
427 drop(zip);
428
429 // Mount the zip file on the "mnt" dir using zipfuse.
430 let mnt_path = test_dir.path().join("mnt");
431 assert!(fs::create_dir(&mnt_path).is_ok());
432
433 start_fuse(&zip_path, &mnt_path);
434
435 let mnt_path = test_dir.path().join("mnt");
436 // Give some time for the fuse to boot up
437 assert!(wait_for_mount(&mnt_path).is_ok());
438 // Run the check routine, and do the clean up.
439 check(&mnt_path);
440 assert!(nix::mount::umount2(&mnt_path, nix::mount::MntFlags::empty()).is_ok());
441 }
442
443 fn check_file(root: &Path, file: &str, content: &[u8]) {
444 let path = root.join(file);
445 assert!(path.exists());
446
447 let metadata = fs::metadata(&path);
448 assert!(metadata.is_ok());
449
450 let metadata = metadata.unwrap();
451 assert!(metadata.is_file());
452 assert_eq!(content.len(), metadata.len() as usize);
453
454 let read_data = fs::read(&path);
455 assert!(read_data.is_ok());
456 assert_eq!(content, read_data.unwrap().as_slice());
457 }
458
459 fn check_dir(root: &Path, dir: &str, files: &[&str], dirs: &[&str]) {
460 let dir_path = root.join(dir);
461 assert!(dir_path.exists());
462
463 let metadata = fs::metadata(&dir_path);
464 assert!(metadata.is_ok());
465
466 let metadata = metadata.unwrap();
467 assert!(metadata.is_dir());
468
469 let iter = fs::read_dir(&dir_path);
470 assert!(iter.is_ok());
471
472 let iter = iter.unwrap();
473 let mut actual_files = HashSet::new();
474 let mut actual_dirs = HashSet::new();
475 for de in iter {
476 let entry = de.unwrap();
477 let path = entry.path();
478 if path.is_dir() {
479 actual_dirs.insert(path.strip_prefix(&dir_path).unwrap().to_path_buf());
480 } else {
481 actual_files.insert(path.strip_prefix(&dir_path).unwrap().to_path_buf());
482 }
483 }
484 let expected_files: HashSet<PathBuf> = files.iter().map(|&s| PathBuf::from(s)).collect();
485 let expected_dirs: HashSet<PathBuf> = dirs.iter().map(|&s| PathBuf::from(s)).collect();
486
487 assert_eq!(expected_files, actual_files);
488 assert_eq!(expected_dirs, actual_dirs);
489 }
490
491 #[test]
492 fn empty() {
493 run_test(
494 |_| {},
495 |root| {
496 check_dir(root, "", &[], &[]);
497 },
498 );
499 }
500
501 #[test]
502 fn single_file() {
503 run_test(
504 |zip| {
505 zip.start_file("foo", FileOptions::default()).unwrap();
506 zip.write_all(b"0123456789").unwrap();
507 },
508 |root| {
509 check_dir(root, "", &["foo"], &[]);
510 check_file(root, "foo", b"0123456789");
511 },
512 );
513 }
514
515 #[test]
516 fn single_dir() {
517 run_test(
518 |zip| {
519 zip.add_directory("dir", FileOptions::default()).unwrap();
520 },
521 |root| {
522 check_dir(root, "", &[], &["dir"]);
523 check_dir(root, "dir", &[], &[]);
524 },
525 );
526 }
527
528 #[test]
529 fn complex_hierarchy() {
530 // root/
531 // a/
532 // b1/
533 // b2/
534 // c1 (file)
535 // c2/
536 // d1 (file)
537 // d2 (file)
538 // d3 (file)
539 // x/
540 // y1 (file)
541 // y2 (file)
542 // y3/
543 //
544 // foo (file)
545 // bar (file)
546 run_test(
547 |zip| {
548 let opt = FileOptions::default();
549 zip.add_directory("a/b1", opt).unwrap();
550
551 zip.start_file("a/b2/c1", opt).unwrap();
552
553 zip.start_file("a/b2/c2/d1", opt).unwrap();
554 zip.start_file("a/b2/c2/d2", opt).unwrap();
555 zip.start_file("a/b2/c2/d3", opt).unwrap();
556
557 zip.start_file("x/y1", opt).unwrap();
558 zip.start_file("x/y2", opt).unwrap();
559 zip.add_directory("x/y3", opt).unwrap();
560
561 zip.start_file("foo", opt).unwrap();
562 zip.start_file("bar", opt).unwrap();
563 },
564 |root| {
565 check_dir(root, "", &["foo", "bar"], &["a", "x"]);
566 check_dir(root, "a", &[], &["b1", "b2"]);
567 check_dir(root, "a/b1", &[], &[]);
568 check_dir(root, "a/b2", &["c1"], &["c2"]);
569 check_dir(root, "a/b2/c2", &["d1", "d2", "d3"], &[]);
570 check_dir(root, "x", &["y1", "y2"], &["y3"]);
571 check_dir(root, "x/y3", &[], &[]);
572 check_file(root, "a/b2/c1", &[]);
573 check_file(root, "a/b2/c2/d1", &[]);
574 check_file(root, "a/b2/c2/d2", &[]);
575 check_file(root, "a/b2/c2/d3", &[]);
576 check_file(root, "x/y1", &[]);
577 check_file(root, "x/y2", &[]);
578 check_file(root, "foo", &[]);
579 check_file(root, "bar", &[]);
580 },
581 );
582 }
583
584 #[test]
585 fn large_file() {
586 run_test(
587 |zip| {
588 let data = vec![10; 2 << 20];
589 zip.start_file("foo", FileOptions::default()).unwrap();
590 zip.write_all(&data).unwrap();
591 },
592 |root| {
593 let data = vec![10; 2 << 20];
594 check_file(root, "foo", &data);
595 },
596 );
597 }
598}