Jiyong Park | 331d1ea | 2021-05-10 11:01:23 +0900 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2021 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | use anyhow::{anyhow, bail, Result}; |
| 17 | use std::collections::HashMap; |
Jiyong Park | 851f68a | 2021-05-11 21:41:25 +0900 | [diff] [blame] | 18 | use std::ffi::{CStr, CString}; |
Jiyong Park | 331d1ea | 2021-05-10 11:01:23 +0900 | [diff] [blame] | 19 | use std::io; |
Jiyong Park | 851f68a | 2021-05-11 21:41:25 +0900 | [diff] [blame] | 20 | use std::os::unix::ffi::OsStrExt; |
Jiyong Park | 331d1ea | 2021-05-10 11:01:23 +0900 | [diff] [blame] | 21 | |
| 22 | /// `InodeTable` is a table of `InodeData` indexed by `Inode`. |
| 23 | #[derive(Debug)] |
| 24 | pub struct InodeTable { |
| 25 | table: Vec<InodeData>, |
| 26 | } |
| 27 | |
| 28 | /// `Inode` is the handle (or index in the table) to `InodeData` which represents an inode. |
| 29 | pub type Inode = u64; |
| 30 | |
| 31 | const INVALID: Inode = 0; |
| 32 | const ROOT: Inode = 1; |
| 33 | |
| 34 | /// `InodeData` represents an inode which has metadata about a file or a directory |
| 35 | #[derive(Debug)] |
| 36 | pub struct InodeData { |
| 37 | /// Size of the file that this inode represents. In case when the file is a directory, this |
| 38 | // is zero. |
| 39 | pub size: u64, |
| 40 | /// unix mode of this inode. It may not have `S_IFDIR` and `S_IFREG` in case the original zip |
| 41 | /// doesn't have the information in the external_attributes fields. To test if this inode |
| 42 | /// is for a regular file or a directory, use `is_dir`. |
| 43 | pub mode: u32, |
| 44 | data: InodeDataData, |
| 45 | } |
| 46 | |
| 47 | type ZipIndex = usize; |
| 48 | |
| 49 | /// `InodeDataData` is the actual data (or a means to access the data) of the file or the directory |
| 50 | /// that an inode is representing. In case of a directory, this data is the hash table of the |
| 51 | /// directory entries. In case of a file, this data is the index of the file in `ZipArchive` which |
| 52 | /// can be used to retrieve `ZipFile` that provides access to the content of the file. |
| 53 | #[derive(Debug)] |
| 54 | enum InodeDataData { |
Jiyong Park | 851f68a | 2021-05-11 21:41:25 +0900 | [diff] [blame] | 55 | Directory(HashMap<CString, DirectoryEntry>), |
Jiyong Park | 331d1ea | 2021-05-10 11:01:23 +0900 | [diff] [blame] | 56 | File(ZipIndex), |
| 57 | } |
| 58 | |
| 59 | #[derive(Debug, Clone)] |
| 60 | pub struct DirectoryEntry { |
| 61 | pub inode: Inode, |
| 62 | pub kind: InodeKind, |
| 63 | } |
| 64 | |
| 65 | #[derive(Debug, Clone, PartialEq, Copy)] |
| 66 | pub enum InodeKind { |
| 67 | Directory, |
| 68 | File, |
| 69 | } |
| 70 | |
| 71 | impl InodeData { |
| 72 | pub fn is_dir(&self) -> bool { |
| 73 | matches!(&self.data, InodeDataData::Directory(_)) |
| 74 | } |
| 75 | |
Jiyong Park | 851f68a | 2021-05-11 21:41:25 +0900 | [diff] [blame] | 76 | pub fn get_directory(&self) -> Option<&HashMap<CString, DirectoryEntry>> { |
Jiyong Park | 331d1ea | 2021-05-10 11:01:23 +0900 | [diff] [blame] | 77 | match &self.data { |
| 78 | InodeDataData::Directory(hash) => Some(hash), |
| 79 | _ => None, |
| 80 | } |
| 81 | } |
| 82 | |
| 83 | pub fn get_zip_index(&self) -> Option<ZipIndex> { |
| 84 | match &self.data { |
| 85 | InodeDataData::File(zip_index) => Some(*zip_index), |
| 86 | _ => None, |
| 87 | } |
| 88 | } |
| 89 | |
| 90 | // Below methods are used to construct the inode table when initializing the filesystem. Once |
| 91 | // the initialization is done, these are not used because this is a read-only filesystem. |
| 92 | |
| 93 | fn new_dir(mode: u32) -> InodeData { |
| 94 | InodeData { mode, size: 0, data: InodeDataData::Directory(HashMap::new()) } |
| 95 | } |
| 96 | |
| 97 | fn new_file(zip_index: ZipIndex, zip_file: &zip::read::ZipFile) -> InodeData { |
| 98 | InodeData { |
| 99 | mode: zip_file.unix_mode().unwrap_or(0), |
| 100 | size: zip_file.size(), |
| 101 | data: InodeDataData::File(zip_index), |
| 102 | } |
| 103 | } |
| 104 | |
Jiyong Park | 851f68a | 2021-05-11 21:41:25 +0900 | [diff] [blame] | 105 | fn add_to_directory(&mut self, name: CString, entry: DirectoryEntry) { |
Jiyong Park | 331d1ea | 2021-05-10 11:01:23 +0900 | [diff] [blame] | 106 | match &mut self.data { |
| 107 | InodeDataData::Directory(hashtable) => { |
| 108 | let existing = hashtable.insert(name, entry); |
| 109 | assert!(existing.is_none()); |
| 110 | } |
| 111 | _ => { |
| 112 | panic!("can't add a directory entry to a file inode"); |
| 113 | } |
| 114 | } |
| 115 | } |
| 116 | } |
| 117 | |
| 118 | impl InodeTable { |
| 119 | /// Gets `InodeData` at a specific index. |
| 120 | pub fn get(&self, inode: Inode) -> Option<&InodeData> { |
| 121 | match inode { |
| 122 | INVALID => None, |
| 123 | _ => self.table.get(inode as usize), |
| 124 | } |
| 125 | } |
| 126 | |
| 127 | fn get_mut(&mut self, inode: Inode) -> Option<&mut InodeData> { |
| 128 | match inode { |
| 129 | INVALID => None, |
| 130 | _ => self.table.get_mut(inode as usize), |
| 131 | } |
| 132 | } |
| 133 | |
| 134 | fn put(&mut self, data: InodeData) -> Inode { |
| 135 | let inode = self.table.len() as Inode; |
| 136 | self.table.push(data); |
| 137 | inode |
| 138 | } |
| 139 | |
| 140 | /// Finds the inode number of a file named `name` in the `parent` inode. The `parent` inode |
| 141 | /// must exist and be a directory. |
Jiyong Park | 851f68a | 2021-05-11 21:41:25 +0900 | [diff] [blame] | 142 | fn find(&self, parent: Inode, name: &CStr) -> Option<Inode> { |
Jiyong Park | 331d1ea | 2021-05-10 11:01:23 +0900 | [diff] [blame] | 143 | let data = self.get(parent).unwrap(); |
| 144 | match data.get_directory().unwrap().get(name) { |
| 145 | Some(DirectoryEntry { inode, .. }) => Some(*inode), |
| 146 | _ => None, |
| 147 | } |
| 148 | } |
| 149 | |
| 150 | // Adds the inode `data` to the inode table and also links it to the `parent` inode as a file |
| 151 | // named `name`. The `parent` inode must exist and be a directory. |
Jiyong Park | 851f68a | 2021-05-11 21:41:25 +0900 | [diff] [blame] | 152 | fn add(&mut self, parent: Inode, name: CString, data: InodeData) -> Inode { |
| 153 | assert!(self.find(parent, &name).is_none()); |
Jiyong Park | 331d1ea | 2021-05-10 11:01:23 +0900 | [diff] [blame] | 154 | |
| 155 | let kind = if data.is_dir() { InodeKind::Directory } else { InodeKind::File }; |
| 156 | // Add the inode to the table |
| 157 | let inode = self.put(data); |
| 158 | |
| 159 | // ... and then register it to the directory of the parent inode |
Jiyong Park | 851f68a | 2021-05-11 21:41:25 +0900 | [diff] [blame] | 160 | self.get_mut(parent).unwrap().add_to_directory(name, DirectoryEntry { inode, kind }); |
Jiyong Park | 331d1ea | 2021-05-10 11:01:23 +0900 | [diff] [blame] | 161 | inode |
| 162 | } |
| 163 | |
| 164 | /// Constructs `InodeTable` from a zip archive `archive`. |
| 165 | pub fn from_zip<R: io::Read + io::Seek>( |
| 166 | archive: &mut zip::ZipArchive<R>, |
| 167 | ) -> Result<InodeTable> { |
| 168 | let mut table = InodeTable { table: Vec::new() }; |
| 169 | |
| 170 | // Add the inodes for the invalid and the root directory |
| 171 | assert_eq!(INVALID, table.put(InodeData::new_dir(0))); |
| 172 | assert_eq!(ROOT, table.put(InodeData::new_dir(0))); |
| 173 | |
| 174 | // For each zip file in the archive, create an inode and add it to the table. If the file's |
| 175 | // parent directories don't have corresponding inodes in the table, handle them too. |
| 176 | for i in 0..archive.len() { |
| 177 | let file = archive.by_index(i)?; |
| 178 | let path = file |
| 179 | .enclosed_name() |
| 180 | .ok_or_else(|| anyhow!("{} is an invalid name", file.name()))?; |
| 181 | // TODO(jiyong): normalize this (e.g. a/b/c/../d -> a/b/d). We can't use |
| 182 | // fs::canonicalize as this is a non-existing path yet. |
| 183 | |
| 184 | let mut parent = ROOT; |
| 185 | let mut iter = path.iter().peekable(); |
| 186 | while let Some(name) = iter.next() { |
| 187 | // TODO(jiyong): remove this check by canonicalizing `path` |
| 188 | if name == ".." { |
| 189 | bail!(".. is not allowed"); |
| 190 | } |
| 191 | |
| 192 | let is_leaf = iter.peek().is_none(); |
| 193 | let is_file = file.is_file() && is_leaf; |
| 194 | |
| 195 | // The happy path; the inode for `name` is already in the `parent` inode. Move on |
| 196 | // to the next path element. |
Jiyong Park | 851f68a | 2021-05-11 21:41:25 +0900 | [diff] [blame] | 197 | let name = CString::new(name.as_bytes()).unwrap(); |
| 198 | if let Some(found) = table.find(parent, &name) { |
Jiyong Park | 331d1ea | 2021-05-10 11:01:23 +0900 | [diff] [blame] | 199 | parent = found; |
| 200 | // Update the mode if this is a directory leaf. |
| 201 | if !is_file && is_leaf { |
| 202 | let mut inode = table.get_mut(parent).unwrap(); |
| 203 | inode.mode = file.unix_mode().unwrap_or(0); |
| 204 | } |
| 205 | continue; |
| 206 | } |
| 207 | |
| 208 | const DEFAULT_DIR_MODE: u32 = libc::S_IRUSR | libc::S_IXUSR; |
| 209 | |
| 210 | // No inode found. Create a new inode and add it to the inode table. |
| 211 | let inode = if is_file { |
| 212 | InodeData::new_file(i, &file) |
| 213 | } else if is_leaf { |
| 214 | InodeData::new_dir(file.unix_mode().unwrap_or(DEFAULT_DIR_MODE)) |
| 215 | } else { |
| 216 | InodeData::new_dir(DEFAULT_DIR_MODE) |
| 217 | }; |
| 218 | let new = table.add(parent, name, inode); |
| 219 | parent = new; |
| 220 | } |
| 221 | } |
| 222 | Ok(table) |
| 223 | } |
| 224 | } |
| 225 | |
| 226 | #[cfg(test)] |
| 227 | mod tests { |
| 228 | use crate::inode::*; |
| 229 | use std::io::{Cursor, Write}; |
| 230 | use zip::write::FileOptions; |
| 231 | |
| 232 | // Creates an in-memory zip buffer, adds some files to it, and converts it to InodeTable |
| 233 | fn setup(add: fn(&mut zip::ZipWriter<&mut std::io::Cursor<Vec<u8>>>)) -> InodeTable { |
| 234 | let mut buf: Cursor<Vec<u8>> = Cursor::new(Vec::new()); |
| 235 | let mut writer = zip::ZipWriter::new(&mut buf); |
| 236 | add(&mut writer); |
| 237 | assert!(writer.finish().is_ok()); |
| 238 | drop(writer); |
| 239 | |
| 240 | let zip = zip::ZipArchive::new(buf); |
| 241 | assert!(zip.is_ok()); |
| 242 | let it = InodeTable::from_zip(&mut zip.unwrap()); |
| 243 | assert!(it.is_ok()); |
| 244 | it.unwrap() |
| 245 | } |
| 246 | |
| 247 | fn check_dir(it: &InodeTable, parent: Inode, name: &str) -> Inode { |
Jiyong Park | 851f68a | 2021-05-11 21:41:25 +0900 | [diff] [blame] | 248 | let name = CString::new(name.as_bytes()).unwrap(); |
| 249 | let inode = it.find(parent, &name); |
Jiyong Park | 331d1ea | 2021-05-10 11:01:23 +0900 | [diff] [blame] | 250 | assert!(inode.is_some()); |
| 251 | let inode = inode.unwrap(); |
| 252 | let inode_data = it.get(inode); |
| 253 | assert!(inode_data.is_some()); |
| 254 | let inode_data = inode_data.unwrap(); |
| 255 | assert_eq!(0, inode_data.size); |
| 256 | assert!(inode_data.is_dir()); |
| 257 | inode |
| 258 | } |
| 259 | |
| 260 | fn check_file<'a>(it: &'a InodeTable, parent: Inode, name: &str) -> &'a InodeData { |
Jiyong Park | 851f68a | 2021-05-11 21:41:25 +0900 | [diff] [blame] | 261 | let name = CString::new(name.as_bytes()).unwrap(); |
| 262 | let inode = it.find(parent, &name); |
Jiyong Park | 331d1ea | 2021-05-10 11:01:23 +0900 | [diff] [blame] | 263 | assert!(inode.is_some()); |
| 264 | let inode = inode.unwrap(); |
| 265 | let inode_data = it.get(inode); |
| 266 | assert!(inode_data.is_some()); |
| 267 | let inode_data = inode_data.unwrap(); |
| 268 | assert!(!inode_data.is_dir()); |
| 269 | inode_data |
| 270 | } |
| 271 | |
| 272 | #[test] |
| 273 | fn empty_zip_has_two_inodes() { |
| 274 | let it = setup(|_| {}); |
| 275 | assert_eq!(2, it.table.len()); |
| 276 | assert!(it.get(INVALID).is_none()); |
| 277 | assert!(it.get(ROOT).is_some()); |
| 278 | } |
| 279 | |
| 280 | #[test] |
| 281 | fn one_file() { |
| 282 | let it = setup(|zip| { |
| 283 | zip.start_file("foo", FileOptions::default()).unwrap(); |
| 284 | zip.write_all(b"0123456789").unwrap(); |
| 285 | }); |
| 286 | let inode_data = check_file(&it, ROOT, "foo"); |
| 287 | assert_eq!(b"0123456789".len() as u64, inode_data.size); |
| 288 | } |
| 289 | |
| 290 | #[test] |
| 291 | fn one_dir() { |
| 292 | let it = setup(|zip| { |
| 293 | zip.add_directory("foo", FileOptions::default()).unwrap(); |
| 294 | }); |
| 295 | let inode = check_dir(&it, ROOT, "foo"); |
| 296 | // The directory doesn't have any entries |
| 297 | assert_eq!(0, it.get(inode).unwrap().get_directory().unwrap().len()); |
| 298 | } |
| 299 | |
| 300 | #[test] |
| 301 | fn one_file_in_subdirs() { |
| 302 | let it = setup(|zip| { |
| 303 | zip.start_file("a/b/c/d", FileOptions::default()).unwrap(); |
| 304 | zip.write_all(b"0123456789").unwrap(); |
| 305 | }); |
| 306 | |
| 307 | assert_eq!(6, it.table.len()); |
| 308 | let a = check_dir(&it, ROOT, "a"); |
| 309 | let b = check_dir(&it, a, "b"); |
| 310 | let c = check_dir(&it, b, "c"); |
| 311 | let d = check_file(&it, c, "d"); |
| 312 | assert_eq!(10, d.size); |
| 313 | } |
| 314 | |
| 315 | #[test] |
| 316 | fn complex_hierarchy() { |
| 317 | // root/ |
| 318 | // a/ |
| 319 | // b1/ |
| 320 | // b2/ |
| 321 | // c1 (file) |
| 322 | // c2/ |
| 323 | // d1 (file) |
| 324 | // d2 (file) |
| 325 | // d3 (file) |
| 326 | // x/ |
| 327 | // y1 (file) |
| 328 | // y2 (file) |
| 329 | // y3/ |
| 330 | // |
| 331 | // foo (file) |
| 332 | // bar (file) |
| 333 | let it = setup(|zip| { |
| 334 | let opt = FileOptions::default(); |
| 335 | zip.add_directory("a/b1", opt).unwrap(); |
| 336 | |
| 337 | zip.start_file("a/b2/c1", opt).unwrap(); |
| 338 | |
| 339 | zip.start_file("a/b2/c2/d1", opt).unwrap(); |
| 340 | zip.start_file("a/b2/c2/d2", opt).unwrap(); |
| 341 | zip.start_file("a/b2/c2/d3", opt).unwrap(); |
| 342 | |
| 343 | zip.start_file("x/y1", opt).unwrap(); |
| 344 | zip.start_file("x/y2", opt).unwrap(); |
| 345 | zip.add_directory("x/y3", opt).unwrap(); |
| 346 | |
| 347 | zip.start_file("foo", opt).unwrap(); |
| 348 | zip.start_file("bar", opt).unwrap(); |
| 349 | }); |
| 350 | |
| 351 | assert_eq!(16, it.table.len()); // 8 files, 6 dirs, and 2 (for root and the invalid inode) |
| 352 | let a = check_dir(&it, ROOT, "a"); |
| 353 | let _b1 = check_dir(&it, a, "b1"); |
| 354 | let b2 = check_dir(&it, a, "b2"); |
| 355 | let _c1 = check_file(&it, b2, "c1"); |
| 356 | |
| 357 | let c2 = check_dir(&it, b2, "c2"); |
| 358 | let _d1 = check_file(&it, c2, "d1"); |
| 359 | let _d2 = check_file(&it, c2, "d3"); |
| 360 | let _d3 = check_file(&it, c2, "d3"); |
| 361 | |
| 362 | let x = check_dir(&it, ROOT, "x"); |
| 363 | let _y1 = check_file(&it, x, "y1"); |
| 364 | let _y2 = check_file(&it, x, "y2"); |
| 365 | let _y3 = check_dir(&it, x, "y3"); |
| 366 | |
| 367 | let _foo = check_file(&it, ROOT, "foo"); |
| 368 | let _bar = check_file(&it, ROOT, "bar"); |
| 369 | } |
| 370 | |
| 371 | #[test] |
| 372 | fn file_size() { |
| 373 | let it = setup(|zip| { |
| 374 | let opt = FileOptions::default(); |
| 375 | zip.start_file("empty", opt).unwrap(); |
| 376 | |
| 377 | zip.start_file("10bytes", opt).unwrap(); |
| 378 | zip.write_all(&[0; 10]).unwrap(); |
| 379 | |
| 380 | zip.start_file("1234bytes", opt).unwrap(); |
| 381 | zip.write_all(&[0; 1234]).unwrap(); |
| 382 | |
| 383 | zip.start_file("2^20bytes", opt).unwrap(); |
| 384 | zip.write_all(&[0; 2 << 20]).unwrap(); |
| 385 | }); |
| 386 | |
| 387 | let f = check_file(&it, ROOT, "empty"); |
| 388 | assert_eq!(0, f.size); |
| 389 | |
| 390 | let f = check_file(&it, ROOT, "10bytes"); |
| 391 | assert_eq!(10, f.size); |
| 392 | |
| 393 | let f = check_file(&it, ROOT, "1234bytes"); |
| 394 | assert_eq!(1234, f.size); |
| 395 | |
| 396 | let f = check_file(&it, ROOT, "2^20bytes"); |
| 397 | assert_eq!(2 << 20, f.size); |
| 398 | } |
| 399 | |
| 400 | #[test] |
| 401 | fn rejects_invalid_paths() { |
| 402 | let invalid_paths = [ |
| 403 | "a/../../b", // escapes the root |
| 404 | "a/..", // escapes the root |
| 405 | "a/../../b/c", // escape the root |
| 406 | "a/b/../c", // doesn't escape the root, but not normalized |
| 407 | ]; |
| 408 | for path in invalid_paths.iter() { |
| 409 | let mut buf: Cursor<Vec<u8>> = Cursor::new(Vec::new()); |
| 410 | let mut writer = zip::ZipWriter::new(&mut buf); |
| 411 | writer.start_file(*path, FileOptions::default()).unwrap(); |
| 412 | assert!(writer.finish().is_ok()); |
| 413 | drop(writer); |
| 414 | |
| 415 | let zip = zip::ZipArchive::new(buf); |
| 416 | assert!(zip.is_ok()); |
| 417 | let it = InodeTable::from_zip(&mut zip.unwrap()); |
| 418 | assert!(it.is_err()); |
| 419 | } |
| 420 | } |
| 421 | } |