David Brazdil | afc9a9e | 2023-01-12 16:08:10 +0000 | [diff] [blame] | 1 | // Copyright 2021, The Android Open Source Project |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | //! Implementation of the AIDL interface of the VirtualizationService. |
| 16 | |
| 17 | use crate::{get_calling_pid, get_calling_uid}; |
David Brazdil | 33a3102 | 2023-01-12 16:55:16 +0000 | [diff] [blame] | 18 | use crate::atom::{forward_vm_booted_atom, forward_vm_creation_atom, forward_vm_exited_atom}; |
Alice Wang | c2fec93 | 2023-02-23 16:24:02 +0000 | [diff] [blame] | 19 | use crate::rkpvm::request_certificate; |
David Brazdil | afc9a9e | 2023-01-12 16:08:10 +0000 | [diff] [blame] | 20 | use android_os_permissions_aidl::aidl::android::os::IPermissionController; |
Alice Wang | c2fec93 | 2023-02-23 16:24:02 +0000 | [diff] [blame] | 21 | use android_system_virtualizationservice::{ |
Inseob Kim | 53d0b21 | 2023-07-20 16:58:37 +0900 | [diff] [blame^] | 22 | aidl::android::system::virtualizationservice::AssignableDevice::AssignableDevice, |
Alice Wang | c2fec93 | 2023-02-23 16:24:02 +0000 | [diff] [blame] | 23 | aidl::android::system::virtualizationservice::VirtualMachineDebugInfo::VirtualMachineDebugInfo, |
| 24 | binder::ParcelFileDescriptor, |
| 25 | }; |
David Brazdil | afc9a9e | 2023-01-12 16:08:10 +0000 | [diff] [blame] | 26 | use android_system_virtualizationservice_internal::aidl::android::system::virtualizationservice_internal::{ |
| 27 | AtomVmBooted::AtomVmBooted, |
| 28 | AtomVmCreationRequested::AtomVmCreationRequested, |
| 29 | AtomVmExited::AtomVmExited, |
| 30 | IGlobalVmContext::{BnGlobalVmContext, IGlobalVmContext}, |
| 31 | IVirtualizationServiceInternal::IVirtualizationServiceInternal, |
| 32 | }; |
| 33 | use android_system_virtualmachineservice::aidl::android::system::virtualmachineservice::IVirtualMachineService::VM_TOMBSTONES_SERVICE_PORT; |
Alice Wang | d1b11a0 | 2023-04-18 12:30:20 +0000 | [diff] [blame] | 34 | use anyhow::{anyhow, ensure, Context, Result}; |
David Brazdil | afc9a9e | 2023-01-12 16:08:10 +0000 | [diff] [blame] | 35 | use binder::{self, BinderFeatures, ExceptionCode, Interface, LazyServiceGuard, Status, Strong}; |
| 36 | use libc::VMADDR_CID_HOST; |
| 37 | use log::{error, info, warn}; |
| 38 | use rustutils::system_properties; |
| 39 | use std::collections::HashMap; |
Alice Wang | d1b11a0 | 2023-04-18 12:30:20 +0000 | [diff] [blame] | 40 | use std::fs::{create_dir, remove_dir_all, set_permissions, Permissions}; |
David Brazdil | afc9a9e | 2023-01-12 16:08:10 +0000 | [diff] [blame] | 41 | use std::io::{Read, Write}; |
| 42 | use std::os::unix::fs::PermissionsExt; |
| 43 | use std::os::unix::raw::{pid_t, uid_t}; |
| 44 | use std::path::PathBuf; |
| 45 | use std::sync::{Arc, Mutex, Weak}; |
| 46 | use tombstoned_client::{DebuggerdDumpType, TombstonedConnection}; |
| 47 | use vsock::{VsockListener, VsockStream}; |
| 48 | use nix::unistd::{chown, Uid}; |
| 49 | |
| 50 | /// The unique ID of a VM used (together with a port number) for vsock communication. |
| 51 | pub type Cid = u32; |
| 52 | |
| 53 | pub const BINDER_SERVICE_IDENTIFIER: &str = "android.system.virtualizationservice"; |
| 54 | |
| 55 | /// Directory in which to write disk image files used while running VMs. |
| 56 | pub const TEMPORARY_DIRECTORY: &str = "/data/misc/virtualizationservice"; |
| 57 | |
| 58 | /// The first CID to assign to a guest VM managed by the VirtualizationService. CIDs lower than this |
| 59 | /// are reserved for the host or other usage. |
| 60 | const GUEST_CID_MIN: Cid = 2048; |
| 61 | const GUEST_CID_MAX: Cid = 65535; |
| 62 | |
| 63 | const SYSPROP_LAST_CID: &str = "virtualizationservice.state.last_cid"; |
| 64 | |
| 65 | const CHUNK_RECV_MAX_LEN: usize = 1024; |
| 66 | |
| 67 | fn is_valid_guest_cid(cid: Cid) -> bool { |
| 68 | (GUEST_CID_MIN..=GUEST_CID_MAX).contains(&cid) |
| 69 | } |
| 70 | |
| 71 | /// Singleton service for allocating globally-unique VM resources, such as the CID, and running |
| 72 | /// singleton servers, like tombstone receiver. |
| 73 | #[derive(Debug, Default)] |
| 74 | pub struct VirtualizationServiceInternal { |
| 75 | state: Arc<Mutex<GlobalState>>, |
| 76 | } |
| 77 | |
| 78 | impl VirtualizationServiceInternal { |
| 79 | pub fn init() -> VirtualizationServiceInternal { |
| 80 | let service = VirtualizationServiceInternal::default(); |
| 81 | |
| 82 | std::thread::spawn(|| { |
| 83 | if let Err(e) = handle_stream_connection_tombstoned() { |
| 84 | warn!("Error receiving tombstone from guest or writing them. Error: {:?}", e); |
| 85 | } |
| 86 | }); |
| 87 | |
| 88 | service |
| 89 | } |
| 90 | } |
| 91 | |
| 92 | impl Interface for VirtualizationServiceInternal {} |
| 93 | |
| 94 | impl IVirtualizationServiceInternal for VirtualizationServiceInternal { |
| 95 | fn removeMemlockRlimit(&self) -> binder::Result<()> { |
| 96 | let pid = get_calling_pid(); |
| 97 | let lim = libc::rlimit { rlim_cur: libc::RLIM_INFINITY, rlim_max: libc::RLIM_INFINITY }; |
| 98 | |
Andrew Walbran | b58d1b4 | 2023-07-07 13:54:49 +0100 | [diff] [blame] | 99 | // SAFETY: borrowing the new limit struct only |
David Brazdil | afc9a9e | 2023-01-12 16:08:10 +0000 | [diff] [blame] | 100 | let ret = unsafe { libc::prlimit(pid, libc::RLIMIT_MEMLOCK, &lim, std::ptr::null_mut()) }; |
| 101 | |
| 102 | match ret { |
| 103 | 0 => Ok(()), |
| 104 | -1 => Err(Status::new_exception_str( |
| 105 | ExceptionCode::ILLEGAL_STATE, |
| 106 | Some(std::io::Error::last_os_error().to_string()), |
| 107 | )), |
| 108 | n => Err(Status::new_exception_str( |
| 109 | ExceptionCode::ILLEGAL_STATE, |
| 110 | Some(format!("Unexpected return value from prlimit(): {n}")), |
| 111 | )), |
| 112 | } |
| 113 | } |
| 114 | |
| 115 | fn allocateGlobalVmContext( |
| 116 | &self, |
| 117 | requester_debug_pid: i32, |
| 118 | ) -> binder::Result<Strong<dyn IGlobalVmContext>> { |
| 119 | check_manage_access()?; |
| 120 | |
| 121 | let requester_uid = get_calling_uid(); |
| 122 | let requester_debug_pid = requester_debug_pid as pid_t; |
| 123 | let state = &mut *self.state.lock().unwrap(); |
| 124 | state.allocate_vm_context(requester_uid, requester_debug_pid).map_err(|e| { |
| 125 | Status::new_exception_str(ExceptionCode::ILLEGAL_STATE, Some(e.to_string())) |
| 126 | }) |
| 127 | } |
| 128 | |
| 129 | fn atomVmBooted(&self, atom: &AtomVmBooted) -> Result<(), Status> { |
| 130 | forward_vm_booted_atom(atom); |
| 131 | Ok(()) |
| 132 | } |
| 133 | |
| 134 | fn atomVmCreationRequested(&self, atom: &AtomVmCreationRequested) -> Result<(), Status> { |
| 135 | forward_vm_creation_atom(atom); |
| 136 | Ok(()) |
| 137 | } |
| 138 | |
| 139 | fn atomVmExited(&self, atom: &AtomVmExited) -> Result<(), Status> { |
| 140 | forward_vm_exited_atom(atom); |
| 141 | Ok(()) |
| 142 | } |
| 143 | |
| 144 | fn debugListVms(&self) -> binder::Result<Vec<VirtualMachineDebugInfo>> { |
| 145 | check_debug_access()?; |
| 146 | |
| 147 | let state = &mut *self.state.lock().unwrap(); |
| 148 | let cids = state |
| 149 | .held_contexts |
| 150 | .iter() |
| 151 | .filter_map(|(_, inst)| Weak::upgrade(inst)) |
| 152 | .map(|vm| VirtualMachineDebugInfo { |
| 153 | cid: vm.cid as i32, |
| 154 | temporaryDirectory: vm.get_temp_dir().to_string_lossy().to_string(), |
| 155 | requesterUid: vm.requester_uid as i32, |
Charisee | 96113f3 | 2023-01-26 09:00:42 +0000 | [diff] [blame] | 156 | requesterPid: vm.requester_debug_pid, |
David Brazdil | afc9a9e | 2023-01-12 16:08:10 +0000 | [diff] [blame] | 157 | }) |
| 158 | .collect(); |
| 159 | Ok(cids) |
| 160 | } |
Alice Wang | c2fec93 | 2023-02-23 16:24:02 +0000 | [diff] [blame] | 161 | |
| 162 | fn requestCertificate( |
| 163 | &self, |
| 164 | csr: &[u8], |
| 165 | instance_img_fd: &ParcelFileDescriptor, |
| 166 | ) -> binder::Result<Vec<u8>> { |
| 167 | check_manage_access()?; |
| 168 | info!("Received csr. Getting certificate..."); |
| 169 | request_certificate(csr, instance_img_fd).map_err(|e| { |
| 170 | error!("Failed to get certificate. Error: {e:?}"); |
| 171 | Status::new_exception_str(ExceptionCode::SERVICE_SPECIFIC, Some(e.to_string())) |
| 172 | }) |
| 173 | } |
Inseob Kim | 53d0b21 | 2023-07-20 16:58:37 +0900 | [diff] [blame^] | 174 | |
| 175 | fn getAssignableDevices(&self) -> binder::Result<Vec<AssignableDevice>> { |
| 176 | check_use_custom_virtual_machine()?; |
| 177 | |
| 178 | // TODO(b/291191362): read VM DTBO to find assignable devices. |
| 179 | Ok(vec![AssignableDevice { |
| 180 | kind: "eh".to_owned(), |
| 181 | node: "/sys/bus/platform/devices/16d00000.eh".to_owned(), |
| 182 | }]) |
| 183 | } |
David Brazdil | afc9a9e | 2023-01-12 16:08:10 +0000 | [diff] [blame] | 184 | } |
| 185 | |
| 186 | #[derive(Debug, Default)] |
| 187 | struct GlobalVmInstance { |
| 188 | /// The unique CID assigned to the VM for vsock communication. |
| 189 | cid: Cid, |
| 190 | /// UID of the client who requested this VM instance. |
| 191 | requester_uid: uid_t, |
| 192 | /// PID of the client who requested this VM instance. |
| 193 | requester_debug_pid: pid_t, |
| 194 | } |
| 195 | |
| 196 | impl GlobalVmInstance { |
| 197 | fn get_temp_dir(&self) -> PathBuf { |
| 198 | let cid = self.cid; |
| 199 | format!("{TEMPORARY_DIRECTORY}/{cid}").into() |
| 200 | } |
| 201 | } |
| 202 | |
| 203 | /// The mutable state of the VirtualizationServiceInternal. There should only be one instance |
| 204 | /// of this struct. |
| 205 | #[derive(Debug, Default)] |
| 206 | struct GlobalState { |
| 207 | /// VM contexts currently allocated to running VMs. A CID is never recycled as long |
| 208 | /// as there is a strong reference held by a GlobalVmContext. |
| 209 | held_contexts: HashMap<Cid, Weak<GlobalVmInstance>>, |
| 210 | } |
| 211 | |
| 212 | impl GlobalState { |
| 213 | /// Get the next available CID, or an error if we have run out. The last CID used is stored in |
| 214 | /// a system property so that restart of virtualizationservice doesn't reuse CID while the host |
| 215 | /// Android is up. |
| 216 | fn get_next_available_cid(&mut self) -> Result<Cid> { |
| 217 | // Start trying to find a CID from the last used CID + 1. This ensures |
| 218 | // that we do not eagerly recycle CIDs. It makes debugging easier but |
| 219 | // also means that retrying to allocate a CID, eg. because it is |
| 220 | // erroneously occupied by a process, will not recycle the same CID. |
| 221 | let last_cid_prop = |
| 222 | system_properties::read(SYSPROP_LAST_CID)?.and_then(|val| match val.parse::<Cid>() { |
| 223 | Ok(num) => { |
| 224 | if is_valid_guest_cid(num) { |
| 225 | Some(num) |
| 226 | } else { |
| 227 | error!("Invalid value '{}' of property '{}'", num, SYSPROP_LAST_CID); |
| 228 | None |
| 229 | } |
| 230 | } |
| 231 | Err(_) => { |
| 232 | error!("Invalid value '{}' of property '{}'", val, SYSPROP_LAST_CID); |
| 233 | None |
| 234 | } |
| 235 | }); |
| 236 | |
| 237 | let first_cid = if let Some(last_cid) = last_cid_prop { |
| 238 | if last_cid == GUEST_CID_MAX { |
| 239 | GUEST_CID_MIN |
| 240 | } else { |
| 241 | last_cid + 1 |
| 242 | } |
| 243 | } else { |
| 244 | GUEST_CID_MIN |
| 245 | }; |
| 246 | |
| 247 | let cid = self |
| 248 | .find_available_cid(first_cid..=GUEST_CID_MAX) |
| 249 | .or_else(|| self.find_available_cid(GUEST_CID_MIN..first_cid)) |
| 250 | .ok_or_else(|| anyhow!("Could not find an available CID."))?; |
| 251 | |
| 252 | system_properties::write(SYSPROP_LAST_CID, &format!("{}", cid))?; |
| 253 | Ok(cid) |
| 254 | } |
| 255 | |
| 256 | fn find_available_cid<I>(&self, mut range: I) -> Option<Cid> |
| 257 | where |
| 258 | I: Iterator<Item = Cid>, |
| 259 | { |
| 260 | range.find(|cid| !self.held_contexts.contains_key(cid)) |
| 261 | } |
| 262 | |
| 263 | fn allocate_vm_context( |
| 264 | &mut self, |
| 265 | requester_uid: uid_t, |
| 266 | requester_debug_pid: pid_t, |
| 267 | ) -> Result<Strong<dyn IGlobalVmContext>> { |
| 268 | // Garbage collect unused VM contexts. |
| 269 | self.held_contexts.retain(|_, instance| instance.strong_count() > 0); |
| 270 | |
| 271 | let cid = self.get_next_available_cid()?; |
| 272 | let instance = Arc::new(GlobalVmInstance { cid, requester_uid, requester_debug_pid }); |
| 273 | create_temporary_directory(&instance.get_temp_dir(), requester_uid)?; |
| 274 | |
| 275 | self.held_contexts.insert(cid, Arc::downgrade(&instance)); |
| 276 | let binder = GlobalVmContext { instance, ..Default::default() }; |
| 277 | Ok(BnGlobalVmContext::new_binder(binder, BinderFeatures::default())) |
| 278 | } |
| 279 | } |
| 280 | |
| 281 | fn create_temporary_directory(path: &PathBuf, requester_uid: uid_t) -> Result<()> { |
| 282 | if path.as_path().exists() { |
| 283 | remove_temporary_dir(path).unwrap_or_else(|e| { |
| 284 | warn!("Could not delete temporary directory {:?}: {}", path, e); |
| 285 | }); |
| 286 | } |
| 287 | // Create a directory that is owned by client's UID but system's GID, and permissions 0700. |
| 288 | // If the chown() fails, this will leave behind an empty directory that will get removed |
| 289 | // at the next attempt, or if virtualizationservice is restarted. |
| 290 | create_dir(path).with_context(|| format!("Could not create temporary directory {:?}", path))?; |
| 291 | chown(path, Some(Uid::from_raw(requester_uid)), None) |
| 292 | .with_context(|| format!("Could not set ownership of temporary directory {:?}", path))?; |
| 293 | Ok(()) |
| 294 | } |
| 295 | |
| 296 | /// Removes a directory owned by a different user by first changing its owner back |
| 297 | /// to VirtualizationService. |
| 298 | pub fn remove_temporary_dir(path: &PathBuf) -> Result<()> { |
Alice Wang | d1b11a0 | 2023-04-18 12:30:20 +0000 | [diff] [blame] | 299 | ensure!(path.as_path().is_dir(), "Path {:?} is not a directory", path); |
David Brazdil | afc9a9e | 2023-01-12 16:08:10 +0000 | [diff] [blame] | 300 | chown(path, Some(Uid::current()), None)?; |
| 301 | set_permissions(path, Permissions::from_mode(0o700))?; |
Alice Wang | d1b11a0 | 2023-04-18 12:30:20 +0000 | [diff] [blame] | 302 | remove_dir_all(path)?; |
David Brazdil | afc9a9e | 2023-01-12 16:08:10 +0000 | [diff] [blame] | 303 | Ok(()) |
| 304 | } |
| 305 | |
| 306 | /// Implementation of the AIDL `IGlobalVmContext` interface. |
| 307 | #[derive(Debug, Default)] |
| 308 | struct GlobalVmContext { |
| 309 | /// Strong reference to the context's instance data structure. |
| 310 | instance: Arc<GlobalVmInstance>, |
| 311 | /// Keeps our service process running as long as this VM context exists. |
| 312 | #[allow(dead_code)] |
| 313 | lazy_service_guard: LazyServiceGuard, |
| 314 | } |
| 315 | |
| 316 | impl Interface for GlobalVmContext {} |
| 317 | |
| 318 | impl IGlobalVmContext for GlobalVmContext { |
| 319 | fn getCid(&self) -> binder::Result<i32> { |
| 320 | Ok(self.instance.cid as i32) |
| 321 | } |
| 322 | |
| 323 | fn getTemporaryDirectory(&self) -> binder::Result<String> { |
| 324 | Ok(self.instance.get_temp_dir().to_string_lossy().to_string()) |
| 325 | } |
| 326 | } |
| 327 | |
| 328 | fn handle_stream_connection_tombstoned() -> Result<()> { |
| 329 | // Should not listen for tombstones on a guest VM's port. |
| 330 | assert!(!is_valid_guest_cid(VM_TOMBSTONES_SERVICE_PORT as Cid)); |
| 331 | let listener = |
| 332 | VsockListener::bind_with_cid_port(VMADDR_CID_HOST, VM_TOMBSTONES_SERVICE_PORT as Cid)?; |
| 333 | for incoming_stream in listener.incoming() { |
| 334 | let mut incoming_stream = match incoming_stream { |
| 335 | Err(e) => { |
| 336 | warn!("invalid incoming connection: {:?}", e); |
| 337 | continue; |
| 338 | } |
| 339 | Ok(s) => s, |
| 340 | }; |
| 341 | std::thread::spawn(move || { |
| 342 | if let Err(e) = handle_tombstone(&mut incoming_stream) { |
| 343 | error!("Failed to write tombstone- {:?}", e); |
| 344 | } |
| 345 | }); |
| 346 | } |
| 347 | Ok(()) |
| 348 | } |
| 349 | |
| 350 | fn handle_tombstone(stream: &mut VsockStream) -> Result<()> { |
| 351 | if let Ok(addr) = stream.peer_addr() { |
| 352 | info!("Vsock Stream connected to cid={} for tombstones", addr.cid()); |
| 353 | } |
| 354 | let tb_connection = |
| 355 | TombstonedConnection::connect(std::process::id() as i32, DebuggerdDumpType::Tombstone) |
| 356 | .context("Failed to connect to tombstoned")?; |
| 357 | let mut text_output = tb_connection |
| 358 | .text_output |
| 359 | .as_ref() |
| 360 | .ok_or_else(|| anyhow!("Could not get file to write the tombstones on"))?; |
| 361 | let mut num_bytes_read = 0; |
| 362 | loop { |
| 363 | let mut chunk_recv = [0; CHUNK_RECV_MAX_LEN]; |
| 364 | let n = stream |
| 365 | .read(&mut chunk_recv) |
| 366 | .context("Failed to read tombstone data from Vsock stream")?; |
| 367 | if n == 0 { |
| 368 | break; |
| 369 | } |
| 370 | num_bytes_read += n; |
| 371 | text_output.write_all(&chunk_recv[0..n]).context("Failed to write guests tombstones")?; |
| 372 | } |
| 373 | info!("Received {} bytes from guest & wrote to tombstone file", num_bytes_read); |
| 374 | tb_connection.notify_completion()?; |
| 375 | Ok(()) |
| 376 | } |
| 377 | |
| 378 | /// Checks whether the caller has a specific permission |
| 379 | fn check_permission(perm: &str) -> binder::Result<()> { |
| 380 | let calling_pid = get_calling_pid(); |
| 381 | let calling_uid = get_calling_uid(); |
| 382 | // Root can do anything |
| 383 | if calling_uid == 0 { |
| 384 | return Ok(()); |
| 385 | } |
| 386 | let perm_svc: Strong<dyn IPermissionController::IPermissionController> = |
| 387 | binder::get_interface("permission")?; |
| 388 | if perm_svc.checkPermission(perm, calling_pid, calling_uid as i32)? { |
| 389 | Ok(()) |
| 390 | } else { |
| 391 | Err(Status::new_exception_str( |
| 392 | ExceptionCode::SECURITY, |
| 393 | Some(format!("does not have the {} permission", perm)), |
| 394 | )) |
| 395 | } |
| 396 | } |
| 397 | |
| 398 | /// Check whether the caller of the current Binder method is allowed to call debug methods. |
| 399 | fn check_debug_access() -> binder::Result<()> { |
| 400 | check_permission("android.permission.DEBUG_VIRTUAL_MACHINE") |
| 401 | } |
| 402 | |
| 403 | /// Check whether the caller of the current Binder method is allowed to manage VMs |
| 404 | fn check_manage_access() -> binder::Result<()> { |
| 405 | check_permission("android.permission.MANAGE_VIRTUAL_MACHINE") |
| 406 | } |
Inseob Kim | 53d0b21 | 2023-07-20 16:58:37 +0900 | [diff] [blame^] | 407 | |
| 408 | /// Check whether the caller of the current Binder method is allowed to use custom VMs |
| 409 | fn check_use_custom_virtual_machine() -> binder::Result<()> { |
| 410 | check_permission("android.permission.USE_CUSTOM_VIRTUAL_MACHINE") |
| 411 | } |