David Brazdil | afc9a9e | 2023-01-12 16:08:10 +0000 | [diff] [blame] | 1 | // Copyright 2021, The Android Open Source Project |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | //! Implementation of the AIDL interface of the VirtualizationService. |
| 16 | |
| 17 | use crate::{get_calling_pid, get_calling_uid}; |
David Brazdil | 33a3102 | 2023-01-12 16:55:16 +0000 | [diff] [blame] | 18 | use crate::atom::{forward_vm_booted_atom, forward_vm_creation_atom, forward_vm_exited_atom}; |
David Brazdil | afc9a9e | 2023-01-12 16:08:10 +0000 | [diff] [blame] | 19 | use android_os_permissions_aidl::aidl::android::os::IPermissionController; |
| 20 | use android_system_virtualizationservice::aidl::android::system::virtualizationservice::VirtualMachineDebugInfo::VirtualMachineDebugInfo; |
| 21 | use android_system_virtualizationservice_internal::aidl::android::system::virtualizationservice_internal::{ |
| 22 | AtomVmBooted::AtomVmBooted, |
| 23 | AtomVmCreationRequested::AtomVmCreationRequested, |
| 24 | AtomVmExited::AtomVmExited, |
| 25 | IGlobalVmContext::{BnGlobalVmContext, IGlobalVmContext}, |
| 26 | IVirtualizationServiceInternal::IVirtualizationServiceInternal, |
| 27 | }; |
| 28 | use android_system_virtualmachineservice::aidl::android::system::virtualmachineservice::IVirtualMachineService::VM_TOMBSTONES_SERVICE_PORT; |
| 29 | use anyhow::{anyhow, bail, Context, Result}; |
| 30 | use binder::{self, BinderFeatures, ExceptionCode, Interface, LazyServiceGuard, Status, Strong}; |
| 31 | use libc::VMADDR_CID_HOST; |
| 32 | use log::{error, info, warn}; |
| 33 | use rustutils::system_properties; |
| 34 | use std::collections::HashMap; |
| 35 | use std::fs::{create_dir, read_dir, remove_dir, remove_file, set_permissions, Permissions}; |
| 36 | use std::io::{Read, Write}; |
| 37 | use std::os::unix::fs::PermissionsExt; |
| 38 | use std::os::unix::raw::{pid_t, uid_t}; |
| 39 | use std::path::PathBuf; |
| 40 | use std::sync::{Arc, Mutex, Weak}; |
| 41 | use tombstoned_client::{DebuggerdDumpType, TombstonedConnection}; |
| 42 | use vsock::{VsockListener, VsockStream}; |
| 43 | use nix::unistd::{chown, Uid}; |
| 44 | |
| 45 | /// The unique ID of a VM used (together with a port number) for vsock communication. |
| 46 | pub type Cid = u32; |
| 47 | |
| 48 | pub const BINDER_SERVICE_IDENTIFIER: &str = "android.system.virtualizationservice"; |
| 49 | |
| 50 | /// Directory in which to write disk image files used while running VMs. |
| 51 | pub const TEMPORARY_DIRECTORY: &str = "/data/misc/virtualizationservice"; |
| 52 | |
| 53 | /// The first CID to assign to a guest VM managed by the VirtualizationService. CIDs lower than this |
| 54 | /// are reserved for the host or other usage. |
| 55 | const GUEST_CID_MIN: Cid = 2048; |
| 56 | const GUEST_CID_MAX: Cid = 65535; |
| 57 | |
| 58 | const SYSPROP_LAST_CID: &str = "virtualizationservice.state.last_cid"; |
| 59 | |
| 60 | const CHUNK_RECV_MAX_LEN: usize = 1024; |
| 61 | |
| 62 | fn is_valid_guest_cid(cid: Cid) -> bool { |
| 63 | (GUEST_CID_MIN..=GUEST_CID_MAX).contains(&cid) |
| 64 | } |
| 65 | |
| 66 | /// Singleton service for allocating globally-unique VM resources, such as the CID, and running |
| 67 | /// singleton servers, like tombstone receiver. |
| 68 | #[derive(Debug, Default)] |
| 69 | pub struct VirtualizationServiceInternal { |
| 70 | state: Arc<Mutex<GlobalState>>, |
| 71 | } |
| 72 | |
| 73 | impl VirtualizationServiceInternal { |
| 74 | pub fn init() -> VirtualizationServiceInternal { |
| 75 | let service = VirtualizationServiceInternal::default(); |
| 76 | |
| 77 | std::thread::spawn(|| { |
| 78 | if let Err(e) = handle_stream_connection_tombstoned() { |
| 79 | warn!("Error receiving tombstone from guest or writing them. Error: {:?}", e); |
| 80 | } |
| 81 | }); |
| 82 | |
| 83 | service |
| 84 | } |
| 85 | } |
| 86 | |
| 87 | impl Interface for VirtualizationServiceInternal {} |
| 88 | |
| 89 | impl IVirtualizationServiceInternal for VirtualizationServiceInternal { |
| 90 | fn removeMemlockRlimit(&self) -> binder::Result<()> { |
| 91 | let pid = get_calling_pid(); |
| 92 | let lim = libc::rlimit { rlim_cur: libc::RLIM_INFINITY, rlim_max: libc::RLIM_INFINITY }; |
| 93 | |
| 94 | // SAFETY - borrowing the new limit struct only |
| 95 | let ret = unsafe { libc::prlimit(pid, libc::RLIMIT_MEMLOCK, &lim, std::ptr::null_mut()) }; |
| 96 | |
| 97 | match ret { |
| 98 | 0 => Ok(()), |
| 99 | -1 => Err(Status::new_exception_str( |
| 100 | ExceptionCode::ILLEGAL_STATE, |
| 101 | Some(std::io::Error::last_os_error().to_string()), |
| 102 | )), |
| 103 | n => Err(Status::new_exception_str( |
| 104 | ExceptionCode::ILLEGAL_STATE, |
| 105 | Some(format!("Unexpected return value from prlimit(): {n}")), |
| 106 | )), |
| 107 | } |
| 108 | } |
| 109 | |
| 110 | fn allocateGlobalVmContext( |
| 111 | &self, |
| 112 | requester_debug_pid: i32, |
| 113 | ) -> binder::Result<Strong<dyn IGlobalVmContext>> { |
| 114 | check_manage_access()?; |
| 115 | |
| 116 | let requester_uid = get_calling_uid(); |
| 117 | let requester_debug_pid = requester_debug_pid as pid_t; |
| 118 | let state = &mut *self.state.lock().unwrap(); |
| 119 | state.allocate_vm_context(requester_uid, requester_debug_pid).map_err(|e| { |
| 120 | Status::new_exception_str(ExceptionCode::ILLEGAL_STATE, Some(e.to_string())) |
| 121 | }) |
| 122 | } |
| 123 | |
| 124 | fn atomVmBooted(&self, atom: &AtomVmBooted) -> Result<(), Status> { |
| 125 | forward_vm_booted_atom(atom); |
| 126 | Ok(()) |
| 127 | } |
| 128 | |
| 129 | fn atomVmCreationRequested(&self, atom: &AtomVmCreationRequested) -> Result<(), Status> { |
| 130 | forward_vm_creation_atom(atom); |
| 131 | Ok(()) |
| 132 | } |
| 133 | |
| 134 | fn atomVmExited(&self, atom: &AtomVmExited) -> Result<(), Status> { |
| 135 | forward_vm_exited_atom(atom); |
| 136 | Ok(()) |
| 137 | } |
| 138 | |
| 139 | fn debugListVms(&self) -> binder::Result<Vec<VirtualMachineDebugInfo>> { |
| 140 | check_debug_access()?; |
| 141 | |
| 142 | let state = &mut *self.state.lock().unwrap(); |
| 143 | let cids = state |
| 144 | .held_contexts |
| 145 | .iter() |
| 146 | .filter_map(|(_, inst)| Weak::upgrade(inst)) |
| 147 | .map(|vm| VirtualMachineDebugInfo { |
| 148 | cid: vm.cid as i32, |
| 149 | temporaryDirectory: vm.get_temp_dir().to_string_lossy().to_string(), |
| 150 | requesterUid: vm.requester_uid as i32, |
Charisee | 96113f3 | 2023-01-26 09:00:42 +0000 | [diff] [blame^] | 151 | requesterPid: vm.requester_debug_pid, |
David Brazdil | afc9a9e | 2023-01-12 16:08:10 +0000 | [diff] [blame] | 152 | }) |
| 153 | .collect(); |
| 154 | Ok(cids) |
| 155 | } |
| 156 | } |
| 157 | |
| 158 | #[derive(Debug, Default)] |
| 159 | struct GlobalVmInstance { |
| 160 | /// The unique CID assigned to the VM for vsock communication. |
| 161 | cid: Cid, |
| 162 | /// UID of the client who requested this VM instance. |
| 163 | requester_uid: uid_t, |
| 164 | /// PID of the client who requested this VM instance. |
| 165 | requester_debug_pid: pid_t, |
| 166 | } |
| 167 | |
| 168 | impl GlobalVmInstance { |
| 169 | fn get_temp_dir(&self) -> PathBuf { |
| 170 | let cid = self.cid; |
| 171 | format!("{TEMPORARY_DIRECTORY}/{cid}").into() |
| 172 | } |
| 173 | } |
| 174 | |
| 175 | /// The mutable state of the VirtualizationServiceInternal. There should only be one instance |
| 176 | /// of this struct. |
| 177 | #[derive(Debug, Default)] |
| 178 | struct GlobalState { |
| 179 | /// VM contexts currently allocated to running VMs. A CID is never recycled as long |
| 180 | /// as there is a strong reference held by a GlobalVmContext. |
| 181 | held_contexts: HashMap<Cid, Weak<GlobalVmInstance>>, |
| 182 | } |
| 183 | |
| 184 | impl GlobalState { |
| 185 | /// Get the next available CID, or an error if we have run out. The last CID used is stored in |
| 186 | /// a system property so that restart of virtualizationservice doesn't reuse CID while the host |
| 187 | /// Android is up. |
| 188 | fn get_next_available_cid(&mut self) -> Result<Cid> { |
| 189 | // Start trying to find a CID from the last used CID + 1. This ensures |
| 190 | // that we do not eagerly recycle CIDs. It makes debugging easier but |
| 191 | // also means that retrying to allocate a CID, eg. because it is |
| 192 | // erroneously occupied by a process, will not recycle the same CID. |
| 193 | let last_cid_prop = |
| 194 | system_properties::read(SYSPROP_LAST_CID)?.and_then(|val| match val.parse::<Cid>() { |
| 195 | Ok(num) => { |
| 196 | if is_valid_guest_cid(num) { |
| 197 | Some(num) |
| 198 | } else { |
| 199 | error!("Invalid value '{}' of property '{}'", num, SYSPROP_LAST_CID); |
| 200 | None |
| 201 | } |
| 202 | } |
| 203 | Err(_) => { |
| 204 | error!("Invalid value '{}' of property '{}'", val, SYSPROP_LAST_CID); |
| 205 | None |
| 206 | } |
| 207 | }); |
| 208 | |
| 209 | let first_cid = if let Some(last_cid) = last_cid_prop { |
| 210 | if last_cid == GUEST_CID_MAX { |
| 211 | GUEST_CID_MIN |
| 212 | } else { |
| 213 | last_cid + 1 |
| 214 | } |
| 215 | } else { |
| 216 | GUEST_CID_MIN |
| 217 | }; |
| 218 | |
| 219 | let cid = self |
| 220 | .find_available_cid(first_cid..=GUEST_CID_MAX) |
| 221 | .or_else(|| self.find_available_cid(GUEST_CID_MIN..first_cid)) |
| 222 | .ok_or_else(|| anyhow!("Could not find an available CID."))?; |
| 223 | |
| 224 | system_properties::write(SYSPROP_LAST_CID, &format!("{}", cid))?; |
| 225 | Ok(cid) |
| 226 | } |
| 227 | |
| 228 | fn find_available_cid<I>(&self, mut range: I) -> Option<Cid> |
| 229 | where |
| 230 | I: Iterator<Item = Cid>, |
| 231 | { |
| 232 | range.find(|cid| !self.held_contexts.contains_key(cid)) |
| 233 | } |
| 234 | |
| 235 | fn allocate_vm_context( |
| 236 | &mut self, |
| 237 | requester_uid: uid_t, |
| 238 | requester_debug_pid: pid_t, |
| 239 | ) -> Result<Strong<dyn IGlobalVmContext>> { |
| 240 | // Garbage collect unused VM contexts. |
| 241 | self.held_contexts.retain(|_, instance| instance.strong_count() > 0); |
| 242 | |
| 243 | let cid = self.get_next_available_cid()?; |
| 244 | let instance = Arc::new(GlobalVmInstance { cid, requester_uid, requester_debug_pid }); |
| 245 | create_temporary_directory(&instance.get_temp_dir(), requester_uid)?; |
| 246 | |
| 247 | self.held_contexts.insert(cid, Arc::downgrade(&instance)); |
| 248 | let binder = GlobalVmContext { instance, ..Default::default() }; |
| 249 | Ok(BnGlobalVmContext::new_binder(binder, BinderFeatures::default())) |
| 250 | } |
| 251 | } |
| 252 | |
| 253 | fn create_temporary_directory(path: &PathBuf, requester_uid: uid_t) -> Result<()> { |
| 254 | if path.as_path().exists() { |
| 255 | remove_temporary_dir(path).unwrap_or_else(|e| { |
| 256 | warn!("Could not delete temporary directory {:?}: {}", path, e); |
| 257 | }); |
| 258 | } |
| 259 | // Create a directory that is owned by client's UID but system's GID, and permissions 0700. |
| 260 | // If the chown() fails, this will leave behind an empty directory that will get removed |
| 261 | // at the next attempt, or if virtualizationservice is restarted. |
| 262 | create_dir(path).with_context(|| format!("Could not create temporary directory {:?}", path))?; |
| 263 | chown(path, Some(Uid::from_raw(requester_uid)), None) |
| 264 | .with_context(|| format!("Could not set ownership of temporary directory {:?}", path))?; |
| 265 | Ok(()) |
| 266 | } |
| 267 | |
| 268 | /// Removes a directory owned by a different user by first changing its owner back |
| 269 | /// to VirtualizationService. |
| 270 | pub fn remove_temporary_dir(path: &PathBuf) -> Result<()> { |
| 271 | if !path.as_path().is_dir() { |
| 272 | bail!("Path {:?} is not a directory", path); |
| 273 | } |
| 274 | chown(path, Some(Uid::current()), None)?; |
| 275 | set_permissions(path, Permissions::from_mode(0o700))?; |
| 276 | remove_temporary_files(path)?; |
| 277 | remove_dir(path)?; |
| 278 | Ok(()) |
| 279 | } |
| 280 | |
| 281 | pub fn remove_temporary_files(path: &PathBuf) -> Result<()> { |
| 282 | for dir_entry in read_dir(path)? { |
| 283 | remove_file(dir_entry?.path())?; |
| 284 | } |
| 285 | Ok(()) |
| 286 | } |
| 287 | |
| 288 | /// Implementation of the AIDL `IGlobalVmContext` interface. |
| 289 | #[derive(Debug, Default)] |
| 290 | struct GlobalVmContext { |
| 291 | /// Strong reference to the context's instance data structure. |
| 292 | instance: Arc<GlobalVmInstance>, |
| 293 | /// Keeps our service process running as long as this VM context exists. |
| 294 | #[allow(dead_code)] |
| 295 | lazy_service_guard: LazyServiceGuard, |
| 296 | } |
| 297 | |
| 298 | impl Interface for GlobalVmContext {} |
| 299 | |
| 300 | impl IGlobalVmContext for GlobalVmContext { |
| 301 | fn getCid(&self) -> binder::Result<i32> { |
| 302 | Ok(self.instance.cid as i32) |
| 303 | } |
| 304 | |
| 305 | fn getTemporaryDirectory(&self) -> binder::Result<String> { |
| 306 | Ok(self.instance.get_temp_dir().to_string_lossy().to_string()) |
| 307 | } |
| 308 | } |
| 309 | |
| 310 | fn handle_stream_connection_tombstoned() -> Result<()> { |
| 311 | // Should not listen for tombstones on a guest VM's port. |
| 312 | assert!(!is_valid_guest_cid(VM_TOMBSTONES_SERVICE_PORT as Cid)); |
| 313 | let listener = |
| 314 | VsockListener::bind_with_cid_port(VMADDR_CID_HOST, VM_TOMBSTONES_SERVICE_PORT as Cid)?; |
| 315 | for incoming_stream in listener.incoming() { |
| 316 | let mut incoming_stream = match incoming_stream { |
| 317 | Err(e) => { |
| 318 | warn!("invalid incoming connection: {:?}", e); |
| 319 | continue; |
| 320 | } |
| 321 | Ok(s) => s, |
| 322 | }; |
| 323 | std::thread::spawn(move || { |
| 324 | if let Err(e) = handle_tombstone(&mut incoming_stream) { |
| 325 | error!("Failed to write tombstone- {:?}", e); |
| 326 | } |
| 327 | }); |
| 328 | } |
| 329 | Ok(()) |
| 330 | } |
| 331 | |
| 332 | fn handle_tombstone(stream: &mut VsockStream) -> Result<()> { |
| 333 | if let Ok(addr) = stream.peer_addr() { |
| 334 | info!("Vsock Stream connected to cid={} for tombstones", addr.cid()); |
| 335 | } |
| 336 | let tb_connection = |
| 337 | TombstonedConnection::connect(std::process::id() as i32, DebuggerdDumpType::Tombstone) |
| 338 | .context("Failed to connect to tombstoned")?; |
| 339 | let mut text_output = tb_connection |
| 340 | .text_output |
| 341 | .as_ref() |
| 342 | .ok_or_else(|| anyhow!("Could not get file to write the tombstones on"))?; |
| 343 | let mut num_bytes_read = 0; |
| 344 | loop { |
| 345 | let mut chunk_recv = [0; CHUNK_RECV_MAX_LEN]; |
| 346 | let n = stream |
| 347 | .read(&mut chunk_recv) |
| 348 | .context("Failed to read tombstone data from Vsock stream")?; |
| 349 | if n == 0 { |
| 350 | break; |
| 351 | } |
| 352 | num_bytes_read += n; |
| 353 | text_output.write_all(&chunk_recv[0..n]).context("Failed to write guests tombstones")?; |
| 354 | } |
| 355 | info!("Received {} bytes from guest & wrote to tombstone file", num_bytes_read); |
| 356 | tb_connection.notify_completion()?; |
| 357 | Ok(()) |
| 358 | } |
| 359 | |
| 360 | /// Checks whether the caller has a specific permission |
| 361 | fn check_permission(perm: &str) -> binder::Result<()> { |
| 362 | let calling_pid = get_calling_pid(); |
| 363 | let calling_uid = get_calling_uid(); |
| 364 | // Root can do anything |
| 365 | if calling_uid == 0 { |
| 366 | return Ok(()); |
| 367 | } |
| 368 | let perm_svc: Strong<dyn IPermissionController::IPermissionController> = |
| 369 | binder::get_interface("permission")?; |
| 370 | if perm_svc.checkPermission(perm, calling_pid, calling_uid as i32)? { |
| 371 | Ok(()) |
| 372 | } else { |
| 373 | Err(Status::new_exception_str( |
| 374 | ExceptionCode::SECURITY, |
| 375 | Some(format!("does not have the {} permission", perm)), |
| 376 | )) |
| 377 | } |
| 378 | } |
| 379 | |
| 380 | /// Check whether the caller of the current Binder method is allowed to call debug methods. |
| 381 | fn check_debug_access() -> binder::Result<()> { |
| 382 | check_permission("android.permission.DEBUG_VIRTUAL_MACHINE") |
| 383 | } |
| 384 | |
| 385 | /// Check whether the caller of the current Binder method is allowed to manage VMs |
| 386 | fn check_manage_access() -> binder::Result<()> { |
| 387 | check_permission("android.permission.MANAGE_VIRTUAL_MACHINE") |
| 388 | } |