blob: 484aad4df9440a14113f9842d761e2452021e043 [file] [log] [blame]
Victor Hsieh88ac6ca2020-11-13 15:20:24 -08001/*
2 * Copyright (C) 2021 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17use anyhow::Result;
18use std::collections::BTreeMap;
19use std::convert::TryFrom;
20use std::ffi::CStr;
21use std::fs::OpenOptions;
22use std::io;
23use std::mem::MaybeUninit;
24use std::option::Option;
25use std::os::unix::io::AsRawFd;
26use std::path::Path;
27use std::time::Duration;
28
29use fuse::filesystem::{Context, DirEntry, DirectoryIterator, Entry, FileSystem, ZeroCopyWriter};
30use fuse::mount::MountOption;
31
32use crate::common::{divide_roundup, COMMON_PAGE_SIZE};
33use crate::fsverity::FsverityChunkedFileReader;
34use crate::reader::{ChunkedFileReader, ReadOnlyDataByChunk};
35
36// We're reading the backing file by chunk, so setting the block size to be the same.
37const BLOCK_SIZE: usize = COMMON_PAGE_SIZE as usize;
38
39const DEFAULT_METADATA_TIMEOUT: std::time::Duration = Duration::from_secs(5);
40
41pub type Inode = u64;
42type Handle = u64;
43
44// A debug only type where everything are stored as local files.
45type FileBackedFsverityChunkedFileReader =
46 FsverityChunkedFileReader<ChunkedFileReader, ChunkedFileReader>;
47
48pub enum FileConfig {
49 LocalVerifiedFile(FileBackedFsverityChunkedFileReader, u64),
50 LocalUnverifiedFile(ChunkedFileReader, u64),
51}
52
53struct AuthFs {
54 /// Store `FileConfig`s using the `Inode` number as the search index.
55 ///
56 /// For further optimization to minimize the search cost, since Inode is integer, we may
57 /// consider storing them in a Vec if we can guarantee that the numbers are small and
58 /// consecutive.
59 file_pool: BTreeMap<Inode, FileConfig>,
60
61 /// Maximum bytes in the write transaction to the FUSE device. This limits the maximum size to
62 /// a read request (including FUSE protocol overhead).
63 max_write: u32,
64}
65
66impl AuthFs {
67 pub fn new(file_pool: BTreeMap<Inode, FileConfig>, max_write: u32) -> AuthFs {
68 AuthFs { file_pool, max_write }
69 }
70
71 fn get_file_config(&self, inode: &Inode) -> io::Result<&FileConfig> {
72 self.file_pool.get(&inode).ok_or_else(|| io::Error::from_raw_os_error(libc::ENOENT))
73 }
74}
75
76fn check_access_mode(flags: u32, mode: libc::c_int) -> io::Result<()> {
77 if (flags & libc::O_ACCMODE as u32) == mode as u32 {
78 Ok(())
79 } else {
80 Err(io::Error::from_raw_os_error(libc::EACCES))
81 }
82}
83
84cfg_if::cfg_if! {
85 if #[cfg(all(target_arch = "aarch64", target_pointer_width = "64"))] {
86 fn blk_size() -> libc::c_int { BLOCK_SIZE as libc::c_int }
87 } else {
88 fn blk_size() -> libc::c_long { BLOCK_SIZE as libc::c_long }
89 }
90}
91
92fn create_stat(ino: libc::ino_t, file_size: u64) -> io::Result<libc::stat64> {
93 let mut st = unsafe { MaybeUninit::<libc::stat64>::zeroed().assume_init() };
94
95 st.st_ino = ino;
96 st.st_mode = libc::S_IFREG | libc::S_IRUSR | libc::S_IRGRP | libc::S_IROTH;
97 st.st_dev = 0;
98 st.st_nlink = 1;
99 st.st_uid = 0;
100 st.st_gid = 0;
101 st.st_rdev = 0;
102 st.st_size = libc::off64_t::try_from(file_size)
103 .map_err(|_| io::Error::from_raw_os_error(libc::EFBIG))?;
104 st.st_blksize = blk_size();
105 // Per man stat(2), st_blocks is "Number of 512B blocks allocated".
106 st.st_blocks = libc::c_longlong::try_from(divide_roundup(file_size, 512))
107 .map_err(|_| io::Error::from_raw_os_error(libc::EFBIG))?;
108 Ok(st)
109}
110
111/// An iterator that generates (offset, size) for a chunked read operation, where offset is the
112/// global file offset, and size is the amount of read from the offset.
113struct ChunkReadIter {
114 remaining: usize,
115 offset: u64,
116}
117
118impl ChunkReadIter {
119 pub fn new(remaining: usize, offset: u64) -> Self {
120 ChunkReadIter { remaining, offset }
121 }
122}
123
124impl Iterator for ChunkReadIter {
125 type Item = (u64, usize);
126
127 fn next(&mut self) -> Option<Self::Item> {
128 if self.remaining == 0 {
129 return None;
130 }
131 let chunk_data_size =
132 std::cmp::min(self.remaining, BLOCK_SIZE - (self.offset % BLOCK_SIZE as u64) as usize);
133 let retval = (self.offset, chunk_data_size);
134 self.offset += chunk_data_size as u64;
135 self.remaining = self.remaining.saturating_sub(chunk_data_size);
136 Some(retval)
137 }
138}
139
140fn offset_to_chunk_index(offset: u64) -> u64 {
141 offset / BLOCK_SIZE as u64
142}
143
144fn read_chunks<W: io::Write, T: ReadOnlyDataByChunk>(
145 mut w: W,
146 file: &T,
147 file_size: u64,
148 offset: u64,
149 size: u32,
150) -> io::Result<usize> {
151 let remaining = file_size.saturating_sub(offset);
152 let size_to_read = std::cmp::min(size as usize, remaining as usize);
153 let total = ChunkReadIter::new(size_to_read, offset).try_fold(
154 0,
155 |total, (current_offset, planned_data_size)| {
156 // TODO(victorhsieh): There might be a non-trivial way to avoid this copy. For example,
157 // instead of accepting a buffer, the writer could expose the final destination buffer
158 // for the reader to write to. It might not be generally applicable though, e.g. with
159 // virtio transport, the buffer may not be continuous.
160 let mut buf = [0u8; BLOCK_SIZE];
161 let read_size = file.read_chunk(offset_to_chunk_index(current_offset), &mut buf)?;
162 if read_size < planned_data_size {
163 return Err(io::Error::from_raw_os_error(libc::ENODATA));
164 }
165
166 let begin = (current_offset % BLOCK_SIZE as u64) as usize;
167 let end = begin + planned_data_size;
168 let s = w.write(&buf[begin..end])?;
169 if s != planned_data_size {
170 return Err(io::Error::from_raw_os_error(libc::EIO));
171 }
172 Ok(total + s)
173 },
174 )?;
175
176 Ok(total)
177}
178
179// No need to support enumerating directory entries.
180struct EmptyDirectoryIterator {}
181
182impl DirectoryIterator for EmptyDirectoryIterator {
183 fn next(&mut self) -> Option<DirEntry> {
184 None
185 }
186}
187
188impl FileSystem for AuthFs {
189 type Inode = Inode;
190 type Handle = Handle;
191 type DirIter = EmptyDirectoryIterator;
192
193 fn max_buffer_size(&self) -> u32 {
194 self.max_write
195 }
196
197 fn lookup(&self, _ctx: Context, _parent: Inode, name: &CStr) -> io::Result<Entry> {
198 // Only accept file name that looks like an integrer. Files in the pool are simply exposed
199 // by their inode number. Also, there is currently no directory structure.
200 let num = name.to_str().map_err(|_| io::Error::from_raw_os_error(libc::EINVAL))?;
201 // Normally, `lookup` is required to increase a reference count for the inode (while
202 // `forget` will decrease it). It is not necessary here since the files are configured to
203 // be static.
204 let inode = num.parse::<Inode>().map_err(|_| io::Error::from_raw_os_error(libc::ENOENT))?;
205 let st = match self.get_file_config(&inode)? {
206 FileConfig::LocalVerifiedFile(_, file_size)
207 | FileConfig::LocalUnverifiedFile(_, file_size) => create_stat(inode, *file_size)?,
208 };
209 Ok(Entry {
210 inode,
211 generation: 0,
212 attr: st,
213 entry_timeout: DEFAULT_METADATA_TIMEOUT,
214 attr_timeout: DEFAULT_METADATA_TIMEOUT,
215 })
216 }
217
218 fn getattr(
219 &self,
220 _ctx: Context,
221 inode: Inode,
222 _handle: Option<Handle>,
223 ) -> io::Result<(libc::stat64, Duration)> {
224 Ok((
225 match self.get_file_config(&inode)? {
226 FileConfig::LocalVerifiedFile(_, file_size)
227 | FileConfig::LocalUnverifiedFile(_, file_size) => create_stat(inode, *file_size)?,
228 },
229 DEFAULT_METADATA_TIMEOUT,
230 ))
231 }
232
233 fn open(
234 &self,
235 _ctx: Context,
236 inode: Self::Inode,
237 flags: u32,
238 ) -> io::Result<(Option<Self::Handle>, fuse::sys::OpenOptions)> {
239 // Since file handle is not really used in later operations (which use Inode directly),
240 // return None as the handle..
241 match self.get_file_config(&inode)? {
242 FileConfig::LocalVerifiedFile(_, _) => {
243 check_access_mode(flags, libc::O_RDONLY)?;
244 // Once verified, and only if verified, the file content can be cached. This is not
245 // really needed for a local file, but is the behavior of RemoteVerifiedFile later.
246 Ok((None, fuse::sys::OpenOptions::KEEP_CACHE))
247 }
248 FileConfig::LocalUnverifiedFile(_, _) => {
249 check_access_mode(flags, libc::O_RDONLY)?;
250 // Do not cache the content. This type of file is supposed to be verified using
251 // dm-verity. The filesystem mount over dm-verity already is already cached, so use
252 // direct I/O here to avoid double cache.
253 Ok((None, fuse::sys::OpenOptions::DIRECT_IO))
254 }
255 }
256 }
257
258 fn read<W: io::Write + ZeroCopyWriter>(
259 &self,
260 _ctx: Context,
261 inode: Inode,
262 _handle: Handle,
263 w: W,
264 size: u32,
265 offset: u64,
266 _lock_owner: Option<u64>,
267 _flags: u32,
268 ) -> io::Result<usize> {
269 match self.get_file_config(&inode)? {
270 FileConfig::LocalVerifiedFile(file, file_size) => {
271 read_chunks(w, file, *file_size, offset, size)
272 }
273 FileConfig::LocalUnverifiedFile(file, file_size) => {
274 read_chunks(w, file, *file_size, offset, size)
275 }
276 }
277 }
278}
279
280/// Mount and start the FUSE instance. This requires CAP_SYS_ADMIN.
281pub fn loop_forever(
282 file_pool: BTreeMap<Inode, FileConfig>,
283 mountpoint: &Path,
284) -> Result<(), fuse::Error> {
285 let max_read: u32 = 65536;
286 let max_write: u32 = 65536;
287 let dev_fuse = OpenOptions::new()
288 .read(true)
289 .write(true)
290 .open("/dev/fuse")
291 .expect("Failed to open /dev/fuse");
292
293 fuse::mount(
294 mountpoint,
295 "authfs",
296 libc::MS_NOSUID | libc::MS_NODEV,
297 &[
298 MountOption::FD(dev_fuse.as_raw_fd()),
299 MountOption::RootMode(libc::S_IFDIR | libc::S_IXUSR | libc::S_IXGRP | libc::S_IXOTH),
300 MountOption::AllowOther,
301 MountOption::UserId(0),
302 MountOption::GroupId(0),
303 MountOption::MaxRead(max_read),
304 ],
305 )
306 .expect("Failed to mount fuse");
307
308 fuse::worker::start_message_loop(
309 dev_fuse,
310 max_write,
311 max_read,
312 AuthFs::new(file_pool, max_write),
313 )
314}
315
316#[cfg(test)]
317mod tests {
318 use super::*;
319
320 fn collect_chunk_read_iter(remaining: usize, offset: u64) -> Vec<(u64, usize)> {
321 ChunkReadIter::new(remaining, offset).collect::<Vec<_>>()
322 }
323
324 #[test]
325 fn test_chunk_read_iter() {
326 assert_eq!(collect_chunk_read_iter(4096, 0), [(0, 4096)]);
327 assert_eq!(collect_chunk_read_iter(8192, 0), [(0, 4096), (4096, 4096)]);
328 assert_eq!(collect_chunk_read_iter(8192, 4096), [(4096, 4096), (8192, 4096)]);
329
330 assert_eq!(
331 collect_chunk_read_iter(16384, 1),
332 [(1, 4095), (4096, 4096), (8192, 4096), (12288, 4096), (16384, 1)]
333 );
334
335 assert_eq!(collect_chunk_read_iter(0, 0), []);
336 assert_eq!(collect_chunk_read_iter(0, 100), []);
337 }
338}