blob: 0dfd0afbb58b4bb132242d5195a533c44ec0d1b6 [file] [log] [blame]
Victor Hsieh88ac6ca2020-11-13 15:20:24 -08001/*
2 * Copyright (C) 2021 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17use anyhow::Result;
18use std::collections::BTreeMap;
19use std::convert::TryFrom;
20use std::ffi::CStr;
21use std::fs::OpenOptions;
22use std::io;
23use std::mem::MaybeUninit;
24use std::option::Option;
25use std::os::unix::io::AsRawFd;
26use std::path::Path;
27use std::time::Duration;
28
29use fuse::filesystem::{Context, DirEntry, DirectoryIterator, Entry, FileSystem, ZeroCopyWriter};
30use fuse::mount::MountOption;
31
32use crate::common::{divide_roundup, COMMON_PAGE_SIZE};
33use crate::fsverity::FsverityChunkedFileReader;
34use crate::reader::{ChunkedFileReader, ReadOnlyDataByChunk};
Victor Hsiehf01f3232020-12-11 13:31:31 -080035use crate::remote_file::{RemoteChunkedFileReader, RemoteFsverityMerkleTreeReader};
Victor Hsieh88ac6ca2020-11-13 15:20:24 -080036
37// We're reading the backing file by chunk, so setting the block size to be the same.
38const BLOCK_SIZE: usize = COMMON_PAGE_SIZE as usize;
39
40const DEFAULT_METADATA_TIMEOUT: std::time::Duration = Duration::from_secs(5);
41
42pub type Inode = u64;
43type Handle = u64;
44
Victor Hsiehf01f3232020-12-11 13:31:31 -080045type RemoteFsverityChunkedFileReader =
46 FsverityChunkedFileReader<RemoteChunkedFileReader, RemoteFsverityMerkleTreeReader>;
47
Victor Hsieh88ac6ca2020-11-13 15:20:24 -080048// A debug only type where everything are stored as local files.
49type FileBackedFsverityChunkedFileReader =
50 FsverityChunkedFileReader<ChunkedFileReader, ChunkedFileReader>;
51
52pub enum FileConfig {
53 LocalVerifiedFile(FileBackedFsverityChunkedFileReader, u64),
54 LocalUnverifiedFile(ChunkedFileReader, u64),
Victor Hsiehf01f3232020-12-11 13:31:31 -080055 RemoteVerifiedFile(RemoteFsverityChunkedFileReader, u64),
56 RemoteUnverifiedFile(RemoteChunkedFileReader, u64),
Victor Hsieh88ac6ca2020-11-13 15:20:24 -080057}
58
59struct AuthFs {
60 /// Store `FileConfig`s using the `Inode` number as the search index.
61 ///
62 /// For further optimization to minimize the search cost, since Inode is integer, we may
63 /// consider storing them in a Vec if we can guarantee that the numbers are small and
64 /// consecutive.
65 file_pool: BTreeMap<Inode, FileConfig>,
66
67 /// Maximum bytes in the write transaction to the FUSE device. This limits the maximum size to
68 /// a read request (including FUSE protocol overhead).
69 max_write: u32,
70}
71
72impl AuthFs {
73 pub fn new(file_pool: BTreeMap<Inode, FileConfig>, max_write: u32) -> AuthFs {
74 AuthFs { file_pool, max_write }
75 }
76
77 fn get_file_config(&self, inode: &Inode) -> io::Result<&FileConfig> {
78 self.file_pool.get(&inode).ok_or_else(|| io::Error::from_raw_os_error(libc::ENOENT))
79 }
80}
81
82fn check_access_mode(flags: u32, mode: libc::c_int) -> io::Result<()> {
83 if (flags & libc::O_ACCMODE as u32) == mode as u32 {
84 Ok(())
85 } else {
86 Err(io::Error::from_raw_os_error(libc::EACCES))
87 }
88}
89
90cfg_if::cfg_if! {
91 if #[cfg(all(target_arch = "aarch64", target_pointer_width = "64"))] {
92 fn blk_size() -> libc::c_int { BLOCK_SIZE as libc::c_int }
93 } else {
94 fn blk_size() -> libc::c_long { BLOCK_SIZE as libc::c_long }
95 }
96}
97
98fn create_stat(ino: libc::ino_t, file_size: u64) -> io::Result<libc::stat64> {
99 let mut st = unsafe { MaybeUninit::<libc::stat64>::zeroed().assume_init() };
100
101 st.st_ino = ino;
102 st.st_mode = libc::S_IFREG | libc::S_IRUSR | libc::S_IRGRP | libc::S_IROTH;
103 st.st_dev = 0;
104 st.st_nlink = 1;
105 st.st_uid = 0;
106 st.st_gid = 0;
107 st.st_rdev = 0;
108 st.st_size = libc::off64_t::try_from(file_size)
109 .map_err(|_| io::Error::from_raw_os_error(libc::EFBIG))?;
110 st.st_blksize = blk_size();
111 // Per man stat(2), st_blocks is "Number of 512B blocks allocated".
112 st.st_blocks = libc::c_longlong::try_from(divide_roundup(file_size, 512))
113 .map_err(|_| io::Error::from_raw_os_error(libc::EFBIG))?;
114 Ok(st)
115}
116
117/// An iterator that generates (offset, size) for a chunked read operation, where offset is the
118/// global file offset, and size is the amount of read from the offset.
119struct ChunkReadIter {
120 remaining: usize,
121 offset: u64,
122}
123
124impl ChunkReadIter {
125 pub fn new(remaining: usize, offset: u64) -> Self {
126 ChunkReadIter { remaining, offset }
127 }
128}
129
130impl Iterator for ChunkReadIter {
131 type Item = (u64, usize);
132
133 fn next(&mut self) -> Option<Self::Item> {
134 if self.remaining == 0 {
135 return None;
136 }
137 let chunk_data_size =
138 std::cmp::min(self.remaining, BLOCK_SIZE - (self.offset % BLOCK_SIZE as u64) as usize);
139 let retval = (self.offset, chunk_data_size);
140 self.offset += chunk_data_size as u64;
141 self.remaining = self.remaining.saturating_sub(chunk_data_size);
142 Some(retval)
143 }
144}
145
146fn offset_to_chunk_index(offset: u64) -> u64 {
147 offset / BLOCK_SIZE as u64
148}
149
150fn read_chunks<W: io::Write, T: ReadOnlyDataByChunk>(
151 mut w: W,
152 file: &T,
153 file_size: u64,
154 offset: u64,
155 size: u32,
156) -> io::Result<usize> {
157 let remaining = file_size.saturating_sub(offset);
158 let size_to_read = std::cmp::min(size as usize, remaining as usize);
159 let total = ChunkReadIter::new(size_to_read, offset).try_fold(
160 0,
161 |total, (current_offset, planned_data_size)| {
162 // TODO(victorhsieh): There might be a non-trivial way to avoid this copy. For example,
163 // instead of accepting a buffer, the writer could expose the final destination buffer
164 // for the reader to write to. It might not be generally applicable though, e.g. with
165 // virtio transport, the buffer may not be continuous.
166 let mut buf = [0u8; BLOCK_SIZE];
167 let read_size = file.read_chunk(offset_to_chunk_index(current_offset), &mut buf)?;
168 if read_size < planned_data_size {
169 return Err(io::Error::from_raw_os_error(libc::ENODATA));
170 }
171
172 let begin = (current_offset % BLOCK_SIZE as u64) as usize;
173 let end = begin + planned_data_size;
174 let s = w.write(&buf[begin..end])?;
175 if s != planned_data_size {
176 return Err(io::Error::from_raw_os_error(libc::EIO));
177 }
178 Ok(total + s)
179 },
180 )?;
181
182 Ok(total)
183}
184
185// No need to support enumerating directory entries.
186struct EmptyDirectoryIterator {}
187
188impl DirectoryIterator for EmptyDirectoryIterator {
189 fn next(&mut self) -> Option<DirEntry> {
190 None
191 }
192}
193
194impl FileSystem for AuthFs {
195 type Inode = Inode;
196 type Handle = Handle;
197 type DirIter = EmptyDirectoryIterator;
198
199 fn max_buffer_size(&self) -> u32 {
200 self.max_write
201 }
202
203 fn lookup(&self, _ctx: Context, _parent: Inode, name: &CStr) -> io::Result<Entry> {
204 // Only accept file name that looks like an integrer. Files in the pool are simply exposed
205 // by their inode number. Also, there is currently no directory structure.
206 let num = name.to_str().map_err(|_| io::Error::from_raw_os_error(libc::EINVAL))?;
207 // Normally, `lookup` is required to increase a reference count for the inode (while
208 // `forget` will decrease it). It is not necessary here since the files are configured to
209 // be static.
210 let inode = num.parse::<Inode>().map_err(|_| io::Error::from_raw_os_error(libc::ENOENT))?;
211 let st = match self.get_file_config(&inode)? {
212 FileConfig::LocalVerifiedFile(_, file_size)
Victor Hsiehf01f3232020-12-11 13:31:31 -0800213 | FileConfig::LocalUnverifiedFile(_, file_size)
214 | FileConfig::RemoteUnverifiedFile(_, file_size)
215 | FileConfig::RemoteVerifiedFile(_, file_size) => create_stat(inode, *file_size)?,
Victor Hsieh88ac6ca2020-11-13 15:20:24 -0800216 };
217 Ok(Entry {
218 inode,
219 generation: 0,
220 attr: st,
221 entry_timeout: DEFAULT_METADATA_TIMEOUT,
222 attr_timeout: DEFAULT_METADATA_TIMEOUT,
223 })
224 }
225
226 fn getattr(
227 &self,
228 _ctx: Context,
229 inode: Inode,
230 _handle: Option<Handle>,
231 ) -> io::Result<(libc::stat64, Duration)> {
232 Ok((
233 match self.get_file_config(&inode)? {
234 FileConfig::LocalVerifiedFile(_, file_size)
Victor Hsiehf01f3232020-12-11 13:31:31 -0800235 | FileConfig::LocalUnverifiedFile(_, file_size)
236 | FileConfig::RemoteUnverifiedFile(_, file_size)
237 | FileConfig::RemoteVerifiedFile(_, file_size) => create_stat(inode, *file_size)?,
Victor Hsieh88ac6ca2020-11-13 15:20:24 -0800238 },
239 DEFAULT_METADATA_TIMEOUT,
240 ))
241 }
242
243 fn open(
244 &self,
245 _ctx: Context,
246 inode: Self::Inode,
247 flags: u32,
248 ) -> io::Result<(Option<Self::Handle>, fuse::sys::OpenOptions)> {
249 // Since file handle is not really used in later operations (which use Inode directly),
250 // return None as the handle..
251 match self.get_file_config(&inode)? {
Victor Hsiehf01f3232020-12-11 13:31:31 -0800252 FileConfig::LocalVerifiedFile(_, _) | FileConfig::RemoteVerifiedFile(_, _) => {
Victor Hsieh88ac6ca2020-11-13 15:20:24 -0800253 check_access_mode(flags, libc::O_RDONLY)?;
254 // Once verified, and only if verified, the file content can be cached. This is not
255 // really needed for a local file, but is the behavior of RemoteVerifiedFile later.
256 Ok((None, fuse::sys::OpenOptions::KEEP_CACHE))
257 }
Victor Hsiehf01f3232020-12-11 13:31:31 -0800258 FileConfig::LocalUnverifiedFile(_, _) | FileConfig::RemoteUnverifiedFile(_, _) => {
Victor Hsieh88ac6ca2020-11-13 15:20:24 -0800259 check_access_mode(flags, libc::O_RDONLY)?;
260 // Do not cache the content. This type of file is supposed to be verified using
261 // dm-verity. The filesystem mount over dm-verity already is already cached, so use
262 // direct I/O here to avoid double cache.
263 Ok((None, fuse::sys::OpenOptions::DIRECT_IO))
264 }
265 }
266 }
267
268 fn read<W: io::Write + ZeroCopyWriter>(
269 &self,
270 _ctx: Context,
271 inode: Inode,
272 _handle: Handle,
273 w: W,
274 size: u32,
275 offset: u64,
276 _lock_owner: Option<u64>,
277 _flags: u32,
278 ) -> io::Result<usize> {
279 match self.get_file_config(&inode)? {
280 FileConfig::LocalVerifiedFile(file, file_size) => {
281 read_chunks(w, file, *file_size, offset, size)
282 }
283 FileConfig::LocalUnverifiedFile(file, file_size) => {
284 read_chunks(w, file, *file_size, offset, size)
285 }
Victor Hsiehf01f3232020-12-11 13:31:31 -0800286 FileConfig::RemoteVerifiedFile(file, file_size) => {
287 read_chunks(w, file, *file_size, offset, size)
288 }
289 FileConfig::RemoteUnverifiedFile(file, file_size) => {
290 read_chunks(w, file, *file_size, offset, size)
291 }
Victor Hsieh88ac6ca2020-11-13 15:20:24 -0800292 }
293 }
294}
295
296/// Mount and start the FUSE instance. This requires CAP_SYS_ADMIN.
297pub fn loop_forever(
298 file_pool: BTreeMap<Inode, FileConfig>,
299 mountpoint: &Path,
300) -> Result<(), fuse::Error> {
301 let max_read: u32 = 65536;
302 let max_write: u32 = 65536;
303 let dev_fuse = OpenOptions::new()
304 .read(true)
305 .write(true)
306 .open("/dev/fuse")
307 .expect("Failed to open /dev/fuse");
308
309 fuse::mount(
310 mountpoint,
311 "authfs",
312 libc::MS_NOSUID | libc::MS_NODEV,
313 &[
314 MountOption::FD(dev_fuse.as_raw_fd()),
315 MountOption::RootMode(libc::S_IFDIR | libc::S_IXUSR | libc::S_IXGRP | libc::S_IXOTH),
316 MountOption::AllowOther,
317 MountOption::UserId(0),
318 MountOption::GroupId(0),
319 MountOption::MaxRead(max_read),
320 ],
321 )
322 .expect("Failed to mount fuse");
323
324 fuse::worker::start_message_loop(
325 dev_fuse,
326 max_write,
327 max_read,
328 AuthFs::new(file_pool, max_write),
329 )
330}
331
332#[cfg(test)]
333mod tests {
334 use super::*;
335
336 fn collect_chunk_read_iter(remaining: usize, offset: u64) -> Vec<(u64, usize)> {
337 ChunkReadIter::new(remaining, offset).collect::<Vec<_>>()
338 }
339
340 #[test]
341 fn test_chunk_read_iter() {
342 assert_eq!(collect_chunk_read_iter(4096, 0), [(0, 4096)]);
343 assert_eq!(collect_chunk_read_iter(8192, 0), [(0, 4096), (4096, 4096)]);
344 assert_eq!(collect_chunk_read_iter(8192, 4096), [(4096, 4096), (8192, 4096)]);
345
346 assert_eq!(
347 collect_chunk_read_iter(16384, 1),
348 [(1, 4095), (4096, 4096), (8192, 4096), (12288, 4096), (16384, 1)]
349 );
350
351 assert_eq!(collect_chunk_read_iter(0, 0), []);
352 assert_eq!(collect_chunk_read_iter(0, 100), []);
353 }
354}