blob: 4daa0cffececcafe3bd82ca0ae31daf9397796df [file] [log] [blame]
// Copyright 2021, The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//! Implementation of the AIDL interface of the VirtualizationService.
use crate::{get_calling_pid, get_calling_uid};
use crate::atom::{forward_vm_booted_atom, forward_vm_creation_atom, forward_vm_exited_atom};
use crate::rkpvm::request_attestation;
use android_os_permissions_aidl::aidl::android::os::IPermissionController;
use android_system_virtualizationservice::{
aidl::android::system::virtualizationservice::AssignableDevice::AssignableDevice,
aidl::android::system::virtualizationservice::VirtualMachineDebugInfo::VirtualMachineDebugInfo,
binder::ParcelFileDescriptor,
};
use android_system_virtualizationservice_internal::aidl::android::system::virtualizationservice_internal::{
AtomVmBooted::AtomVmBooted,
AtomVmCreationRequested::AtomVmCreationRequested,
AtomVmExited::AtomVmExited,
IGlobalVmContext::{BnGlobalVmContext, IGlobalVmContext},
IVirtualizationServiceInternal::BoundDevice::BoundDevice,
IVirtualizationServiceInternal::IVirtualizationServiceInternal,
IVfioHandler::{BpVfioHandler, IVfioHandler},
};
use android_system_virtualmachineservice::aidl::android::system::virtualmachineservice::IVirtualMachineService::VM_TOMBSTONES_SERVICE_PORT;
use anyhow::{anyhow, ensure, Context, Result};
use avflog::LogResult;
use binder::{self, wait_for_interface, BinderFeatures, ExceptionCode, Interface, LazyServiceGuard, Status, Strong, IntoBinderResult};
use libc::VMADDR_CID_HOST;
use log::{error, info, warn};
use rustutils::system_properties;
use serde::Deserialize;
use std::collections::{HashMap, HashSet};
use std::fs::{self, create_dir, remove_dir_all, set_permissions, File, Permissions};
use std::io::{Read, Write};
use std::os::unix::fs::PermissionsExt;
use std::os::unix::raw::{pid_t, uid_t};
use std::path::{Path, PathBuf};
use std::sync::{Arc, Mutex, Weak};
use tombstoned_client::{DebuggerdDumpType, TombstonedConnection};
use vsock::{VsockListener, VsockStream};
use nix::unistd::{chown, Uid};
/// The unique ID of a VM used (together with a port number) for vsock communication.
pub type Cid = u32;
pub const BINDER_SERVICE_IDENTIFIER: &str = "android.system.virtualizationservice";
/// Directory in which to write disk image files used while running VMs.
pub const TEMPORARY_DIRECTORY: &str = "/data/misc/virtualizationservice";
/// The first CID to assign to a guest VM managed by the VirtualizationService. CIDs lower than this
/// are reserved for the host or other usage.
const GUEST_CID_MIN: Cid = 2048;
const GUEST_CID_MAX: Cid = 65535;
const SYSPROP_LAST_CID: &str = "virtualizationservice.state.last_cid";
const CHUNK_RECV_MAX_LEN: usize = 1024;
fn is_valid_guest_cid(cid: Cid) -> bool {
(GUEST_CID_MIN..=GUEST_CID_MAX).contains(&cid)
}
/// Singleton service for allocating globally-unique VM resources, such as the CID, and running
/// singleton servers, like tombstone receiver.
#[derive(Debug, Default)]
pub struct VirtualizationServiceInternal {
state: Arc<Mutex<GlobalState>>,
}
impl VirtualizationServiceInternal {
pub fn init() -> VirtualizationServiceInternal {
let service = VirtualizationServiceInternal::default();
std::thread::spawn(|| {
if let Err(e) = handle_stream_connection_tombstoned() {
warn!("Error receiving tombstone from guest or writing them. Error: {:?}", e);
}
});
service
}
}
impl Interface for VirtualizationServiceInternal {}
impl IVirtualizationServiceInternal for VirtualizationServiceInternal {
fn removeMemlockRlimit(&self) -> binder::Result<()> {
let pid = get_calling_pid();
let lim = libc::rlimit { rlim_cur: libc::RLIM_INFINITY, rlim_max: libc::RLIM_INFINITY };
// SAFETY: borrowing the new limit struct only
let ret = unsafe { libc::prlimit(pid, libc::RLIMIT_MEMLOCK, &lim, std::ptr::null_mut()) };
match ret {
0 => Ok(()),
-1 => Err(std::io::Error::last_os_error().into()),
n => Err(anyhow!("Unexpected return value from prlimit(): {n}")),
}
.or_binder_exception(ExceptionCode::ILLEGAL_STATE)
}
fn allocateGlobalVmContext(
&self,
requester_debug_pid: i32,
) -> binder::Result<Strong<dyn IGlobalVmContext>> {
check_manage_access()?;
let requester_uid = get_calling_uid();
let requester_debug_pid = requester_debug_pid as pid_t;
let state = &mut *self.state.lock().unwrap();
state
.allocate_vm_context(requester_uid, requester_debug_pid)
.or_binder_exception(ExceptionCode::ILLEGAL_STATE)
}
fn atomVmBooted(&self, atom: &AtomVmBooted) -> Result<(), Status> {
forward_vm_booted_atom(atom);
Ok(())
}
fn atomVmCreationRequested(&self, atom: &AtomVmCreationRequested) -> Result<(), Status> {
forward_vm_creation_atom(atom);
Ok(())
}
fn atomVmExited(&self, atom: &AtomVmExited) -> Result<(), Status> {
forward_vm_exited_atom(atom);
Ok(())
}
fn debugListVms(&self) -> binder::Result<Vec<VirtualMachineDebugInfo>> {
check_debug_access()?;
let state = &mut *self.state.lock().unwrap();
let cids = state
.held_contexts
.iter()
.filter_map(|(_, inst)| Weak::upgrade(inst))
.map(|vm| VirtualMachineDebugInfo {
cid: vm.cid as i32,
temporaryDirectory: vm.get_temp_dir().to_string_lossy().to_string(),
requesterUid: vm.requester_uid as i32,
requesterPid: vm.requester_debug_pid,
})
.collect();
Ok(cids)
}
fn requestAttestation(&self, csr: &[u8]) -> binder::Result<Vec<u8>> {
check_manage_access()?;
info!("Received csr. Requestting attestation...");
if cfg!(remote_attestation) {
request_attestation(csr)
.context("Failed to request attestation")
.with_log()
.or_service_specific_exception(-1)
} else {
Err(Status::new_exception_str(
ExceptionCode::UNSUPPORTED_OPERATION,
Some(
"requestAttestation is not supported with the remote_attestation feature \
disabled",
),
))
.with_log()
}
}
fn getAssignableDevices(&self) -> binder::Result<Vec<AssignableDevice>> {
check_use_custom_virtual_machine()?;
Ok(get_assignable_devices()?
.device
.into_iter()
.map(|x| AssignableDevice { node: x.sysfs_path, kind: x.kind })
.collect::<Vec<_>>())
}
fn bindDevicesToVfioDriver(&self, devices: &[String]) -> binder::Result<Vec<BoundDevice>> {
check_use_custom_virtual_machine()?;
let vfio_service: Strong<dyn IVfioHandler> =
wait_for_interface(<BpVfioHandler as IVfioHandler>::get_descriptor())?;
vfio_service.bindDevicesToVfioDriver(devices)?;
let dtbo_path = Path::new(TEMPORARY_DIRECTORY).join("common").join("dtbo");
if !dtbo_path.exists() {
// open a writable file descriptor for vfio_handler
let dtbo = File::create(&dtbo_path)
.context("Failed to create VM DTBO file")
.or_service_specific_exception(-1)?;
vfio_service.writeVmDtbo(&ParcelFileDescriptor::new(dtbo))?;
}
Ok(get_assignable_devices()?
.device
.into_iter()
.filter_map(|x| {
if devices.contains(&x.sysfs_path) {
Some(BoundDevice { sysfsPath: x.sysfs_path, dtboLabel: x.dtbo_label })
} else {
None
}
})
.collect::<Vec<_>>())
}
}
// KEEP IN SYNC WITH assignable_devices.xsd
#[derive(Debug, Deserialize)]
struct Device {
kind: String,
dtbo_label: String,
sysfs_path: String,
}
#[derive(Debug, Default, Deserialize)]
struct Devices {
device: Vec<Device>,
}
fn get_assignable_devices() -> binder::Result<Devices> {
let xml_path = Path::new("/vendor/etc/avf/assignable_devices.xml");
if !xml_path.exists() {
return Ok(Devices { ..Default::default() });
}
let xml = fs::read(xml_path)
.context("Failed to read assignable_devices.xml")
.with_log()
.or_service_specific_exception(-1)?;
let xml = String::from_utf8(xml)
.context("assignable_devices.xml is not a valid UTF-8 file")
.with_log()
.or_service_specific_exception(-1)?;
let mut devices: Devices = serde_xml_rs::from_str(&xml)
.context("can't parse assignable_devices.xml")
.with_log()
.or_service_specific_exception(-1)?;
let mut device_set = HashSet::new();
devices.device.retain(move |device| {
if device_set.contains(&device.sysfs_path) {
warn!("duplicated assignable device {device:?}; ignoring...");
return false;
}
if !Path::new(&device.sysfs_path).exists() {
warn!("assignable device {device:?} doesn't exist; ignoring...");
return false;
}
device_set.insert(device.sysfs_path.clone());
true
});
Ok(devices)
}
#[derive(Debug, Default)]
struct GlobalVmInstance {
/// The unique CID assigned to the VM for vsock communication.
cid: Cid,
/// UID of the client who requested this VM instance.
requester_uid: uid_t,
/// PID of the client who requested this VM instance.
requester_debug_pid: pid_t,
}
impl GlobalVmInstance {
fn get_temp_dir(&self) -> PathBuf {
let cid = self.cid;
format!("{TEMPORARY_DIRECTORY}/{cid}").into()
}
}
/// The mutable state of the VirtualizationServiceInternal. There should only be one instance
/// of this struct.
#[derive(Debug, Default)]
struct GlobalState {
/// VM contexts currently allocated to running VMs. A CID is never recycled as long
/// as there is a strong reference held by a GlobalVmContext.
held_contexts: HashMap<Cid, Weak<GlobalVmInstance>>,
}
impl GlobalState {
/// Get the next available CID, or an error if we have run out. The last CID used is stored in
/// a system property so that restart of virtualizationservice doesn't reuse CID while the host
/// Android is up.
fn get_next_available_cid(&mut self) -> Result<Cid> {
// Start trying to find a CID from the last used CID + 1. This ensures
// that we do not eagerly recycle CIDs. It makes debugging easier but
// also means that retrying to allocate a CID, eg. because it is
// erroneously occupied by a process, will not recycle the same CID.
let last_cid_prop =
system_properties::read(SYSPROP_LAST_CID)?.and_then(|val| match val.parse::<Cid>() {
Ok(num) => {
if is_valid_guest_cid(num) {
Some(num)
} else {
error!("Invalid value '{}' of property '{}'", num, SYSPROP_LAST_CID);
None
}
}
Err(_) => {
error!("Invalid value '{}' of property '{}'", val, SYSPROP_LAST_CID);
None
}
});
let first_cid = if let Some(last_cid) = last_cid_prop {
if last_cid == GUEST_CID_MAX {
GUEST_CID_MIN
} else {
last_cid + 1
}
} else {
GUEST_CID_MIN
};
let cid = self
.find_available_cid(first_cid..=GUEST_CID_MAX)
.or_else(|| self.find_available_cid(GUEST_CID_MIN..first_cid))
.ok_or_else(|| anyhow!("Could not find an available CID."))?;
system_properties::write(SYSPROP_LAST_CID, &format!("{}", cid))?;
Ok(cid)
}
fn find_available_cid<I>(&self, mut range: I) -> Option<Cid>
where
I: Iterator<Item = Cid>,
{
range.find(|cid| !self.held_contexts.contains_key(cid))
}
fn allocate_vm_context(
&mut self,
requester_uid: uid_t,
requester_debug_pid: pid_t,
) -> Result<Strong<dyn IGlobalVmContext>> {
// Garbage collect unused VM contexts.
self.held_contexts.retain(|_, instance| instance.strong_count() > 0);
let cid = self.get_next_available_cid()?;
let instance = Arc::new(GlobalVmInstance { cid, requester_uid, requester_debug_pid });
create_temporary_directory(&instance.get_temp_dir(), requester_uid)?;
self.held_contexts.insert(cid, Arc::downgrade(&instance));
let binder = GlobalVmContext { instance, ..Default::default() };
Ok(BnGlobalVmContext::new_binder(binder, BinderFeatures::default()))
}
}
fn create_temporary_directory(path: &PathBuf, requester_uid: uid_t) -> Result<()> {
if path.as_path().exists() {
remove_temporary_dir(path).unwrap_or_else(|e| {
warn!("Could not delete temporary directory {:?}: {}", path, e);
});
}
// Create a directory that is owned by client's UID but system's GID, and permissions 0700.
// If the chown() fails, this will leave behind an empty directory that will get removed
// at the next attempt, or if virtualizationservice is restarted.
create_dir(path).with_context(|| format!("Could not create temporary directory {:?}", path))?;
chown(path, Some(Uid::from_raw(requester_uid)), None)
.with_context(|| format!("Could not set ownership of temporary directory {:?}", path))?;
Ok(())
}
/// Removes a directory owned by a different user by first changing its owner back
/// to VirtualizationService.
pub fn remove_temporary_dir(path: &PathBuf) -> Result<()> {
ensure!(path.as_path().is_dir(), "Path {:?} is not a directory", path);
chown(path, Some(Uid::current()), None)?;
set_permissions(path, Permissions::from_mode(0o700))?;
remove_dir_all(path)?;
Ok(())
}
/// Implementation of the AIDL `IGlobalVmContext` interface.
#[derive(Debug, Default)]
struct GlobalVmContext {
/// Strong reference to the context's instance data structure.
instance: Arc<GlobalVmInstance>,
/// Keeps our service process running as long as this VM context exists.
#[allow(dead_code)]
lazy_service_guard: LazyServiceGuard,
}
impl Interface for GlobalVmContext {}
impl IGlobalVmContext for GlobalVmContext {
fn getCid(&self) -> binder::Result<i32> {
Ok(self.instance.cid as i32)
}
fn getTemporaryDirectory(&self) -> binder::Result<String> {
Ok(self.instance.get_temp_dir().to_string_lossy().to_string())
}
}
fn handle_stream_connection_tombstoned() -> Result<()> {
// Should not listen for tombstones on a guest VM's port.
assert!(!is_valid_guest_cid(VM_TOMBSTONES_SERVICE_PORT as Cid));
let listener =
VsockListener::bind_with_cid_port(VMADDR_CID_HOST, VM_TOMBSTONES_SERVICE_PORT as Cid)?;
for incoming_stream in listener.incoming() {
let mut incoming_stream = match incoming_stream {
Err(e) => {
warn!("invalid incoming connection: {:?}", e);
continue;
}
Ok(s) => s,
};
std::thread::spawn(move || {
if let Err(e) = handle_tombstone(&mut incoming_stream) {
error!("Failed to write tombstone- {:?}", e);
}
});
}
Ok(())
}
fn handle_tombstone(stream: &mut VsockStream) -> Result<()> {
if let Ok(addr) = stream.peer_addr() {
info!("Vsock Stream connected to cid={} for tombstones", addr.cid());
}
let tb_connection =
TombstonedConnection::connect(std::process::id() as i32, DebuggerdDumpType::Tombstone)
.context("Failed to connect to tombstoned")?;
let mut text_output = tb_connection
.text_output
.as_ref()
.ok_or_else(|| anyhow!("Could not get file to write the tombstones on"))?;
let mut num_bytes_read = 0;
loop {
let mut chunk_recv = [0; CHUNK_RECV_MAX_LEN];
let n = stream
.read(&mut chunk_recv)
.context("Failed to read tombstone data from Vsock stream")?;
if n == 0 {
break;
}
num_bytes_read += n;
text_output.write_all(&chunk_recv[0..n]).context("Failed to write guests tombstones")?;
}
info!("Received {} bytes from guest & wrote to tombstone file", num_bytes_read);
tb_connection.notify_completion()?;
Ok(())
}
/// Checks whether the caller has a specific permission
fn check_permission(perm: &str) -> binder::Result<()> {
let calling_pid = get_calling_pid();
let calling_uid = get_calling_uid();
// Root can do anything
if calling_uid == 0 {
return Ok(());
}
let perm_svc: Strong<dyn IPermissionController::IPermissionController> =
binder::get_interface("permission")?;
if perm_svc.checkPermission(perm, calling_pid, calling_uid as i32)? {
Ok(())
} else {
Err(anyhow!("does not have the {} permission", perm))
.or_binder_exception(ExceptionCode::SECURITY)
}
}
/// Check whether the caller of the current Binder method is allowed to call debug methods.
fn check_debug_access() -> binder::Result<()> {
check_permission("android.permission.DEBUG_VIRTUAL_MACHINE")
}
/// Check whether the caller of the current Binder method is allowed to manage VMs
fn check_manage_access() -> binder::Result<()> {
check_permission("android.permission.MANAGE_VIRTUAL_MACHINE")
}
/// Check whether the caller of the current Binder method is allowed to use custom VMs
fn check_use_custom_virtual_machine() -> binder::Result<()> {
check_permission("android.permission.USE_CUSTOM_VIRTUAL_MACHINE")
}