Wait for crosvm in a separate thread, and keep track of when it dies.
This lets us tell whether a VM is still running or has finished. Also
add callback so clients can get notified when crosvm dies.
Bug: 171277638
Test: Ran on VIM3L
Test: atest VirtualizationTestCases
Change-Id: I52c1625af45cfcfe7aa0be465ea08f427ec5bc43
diff --git a/virtmanager/src/aidl.rs b/virtmanager/src/aidl.rs
index 96ba04f..5a4eedc 100644
--- a/virtmanager/src/aidl.rs
+++ b/virtmanager/src/aidl.rs
@@ -21,11 +21,12 @@
use android_system_virtmanager::aidl::android::system::virtmanager::IVirtualMachine::{
BnVirtualMachine, IVirtualMachine,
};
+use android_system_virtmanager::aidl::android::system::virtmanager::IVirtualMachineCallback::IVirtualMachineCallback;
use android_system_virtmanager::aidl::android::system::virtmanager::VirtualMachineDebugInfo::VirtualMachineDebugInfo;
use android_system_virtmanager::binder::{
self, Interface, ParcelFileDescriptor, StatusCode, Strong, ThreadState,
};
-use log::error;
+use log::{debug, error};
use std::ffi::CStr;
use std::fs::File;
use std::sync::{Arc, Mutex, Weak};
@@ -54,7 +55,6 @@
log_fd: Option<&ParcelFileDescriptor>,
) -> binder::Result<Strong<dyn IVirtualMachine>> {
let state = &mut *self.state.lock().unwrap();
- let cid = state.next_cid;
let log_fd = log_fd
.map(|fd| fd.as_ref().try_clone().map_err(|_| StatusCode::UNKNOWN_ERROR))
.transpose()?;
@@ -69,16 +69,9 @@
})
});
let requester_pid = ThreadState::get_calling_pid();
- let instance = Arc::new(start_vm(
- config_fd.as_ref(),
- cid,
- log_fd,
- requester_uid,
- requester_sid,
- requester_pid,
- )?);
- // TODO(qwandor): keep track of which CIDs are currently in use so that we can reuse them.
- state.next_cid = state.next_cid.checked_add(1).ok_or(StatusCode::UNKNOWN_ERROR)?;
+ let cid = state.allocate_cid()?;
+ let instance =
+ start_vm(config_fd.as_ref(), cid, log_fd, requester_uid, requester_sid, requester_pid)?;
state.add_vm(Arc::downgrade(&instance));
Ok(VirtualMachine::create(instance))
}
@@ -99,6 +92,7 @@
requesterUid: vm.requester_uid as i32,
requesterSid: vm.requester_sid.clone(),
requesterPid: vm.requester_pid,
+ running: vm.running(),
})
.collect();
Ok(cids)
@@ -155,6 +149,48 @@
fn getCid(&self) -> binder::Result<i32> {
Ok(self.instance.cid as i32)
}
+
+ fn isRunning(&self) -> binder::Result<bool> {
+ Ok(self.instance.running())
+ }
+
+ fn registerCallback(
+ &self,
+ callback: &Strong<dyn IVirtualMachineCallback>,
+ ) -> binder::Result<()> {
+ // TODO: Should this give an error if the VM is already dead?
+ self.instance.callbacks.add(callback.clone());
+ Ok(())
+ }
+}
+
+impl Drop for VirtualMachine {
+ fn drop(&mut self) {
+ debug!("Dropping {:?}", self);
+ self.instance.kill();
+ }
+}
+
+/// A set of Binders to be called back in response to various events on the VM, such as when it
+/// dies.
+#[derive(Debug, Default)]
+pub struct VirtualMachineCallbacks(Mutex<Vec<Strong<dyn IVirtualMachineCallback>>>);
+
+impl VirtualMachineCallbacks {
+ /// Call all registered callbacks to say that the VM has died.
+ pub fn callback_on_died(&self, cid: Cid) {
+ let callbacks = &*self.0.lock().unwrap();
+ for callback in callbacks {
+ if let Err(e) = callback.onDied(cid as i32) {
+ error!("Error calling callback: {}", e);
+ }
+ }
+ }
+
+ /// Add a new callback to the set.
+ fn add(&self, callback: Strong<dyn IVirtualMachineCallback>) {
+ self.0.lock().unwrap().push(callback);
+ }
}
/// The mutable state of the Virt Manager. There should only be one instance of this struct.
@@ -175,7 +211,7 @@
}
impl State {
- /// Get a list of VMs which are currently running.
+ /// Get a list of VMs which still have Binder references to them.
fn vms(&self) -> Vec<Arc<VmInstance>> {
// Attempt to upgrade the weak pointers to strong pointers.
self.vms.iter().filter_map(Weak::upgrade).collect()
@@ -200,6 +236,14 @@
let pos = self.debug_held_vms.iter().position(|vm| vm.getCid() == Ok(cid))?;
Some(self.debug_held_vms.swap_remove(pos))
}
+
+ /// Get the next available CID, or an error if we have run out.
+ fn allocate_cid(&mut self) -> binder::Result<Cid> {
+ // TODO(qwandor): keep track of which CIDs are currently in use so that we can reuse them.
+ let cid = self.next_cid;
+ self.next_cid = self.next_cid.checked_add(1).ok_or(StatusCode::UNKNOWN_ERROR)?;
+ Ok(cid)
+ }
}
impl Default for State {
@@ -217,7 +261,7 @@
requester_uid: u32,
requester_sid: Option<String>,
requester_pid: i32,
-) -> binder::Result<VmInstance> {
+) -> binder::Result<Arc<VmInstance>> {
let config = VmConfig::load(config_file).map_err(|e| {
error!("Failed to load VM config from {:?}: {:?}", config_file, e);
StatusCode::BAD_VALUE
diff --git a/virtmanager/src/crosvm.rs b/virtmanager/src/crosvm.rs
index bef9982..5e6f658 100644
--- a/virtmanager/src/crosvm.rs
+++ b/virtmanager/src/crosvm.rs
@@ -14,12 +14,17 @@
//! Functions for running instances of `crosvm`.
+use crate::aidl::VirtualMachineCallbacks;
use crate::config::VmConfig;
use crate::Cid;
use anyhow::Error;
-use log::{debug, error, info};
+use log::{error, info};
+use shared_child::SharedChild;
use std::fs::File;
-use std::process::{Child, Command};
+use std::process::Command;
+use std::sync::atomic::{AtomicBool, Ordering};
+use std::sync::Arc;
+use std::thread;
const CROSVM_PATH: &str = "/apex/com.android.virt/bin/crosvm";
@@ -27,7 +32,7 @@
#[derive(Debug)]
pub struct VmInstance {
/// The crosvm child process.
- child: Child,
+ child: SharedChild,
/// The CID assigned to the VM for vsock communication.
pub cid: Cid,
/// The UID of the process which requested the VM.
@@ -37,18 +42,30 @@
/// The PID of the process which requested the VM. Note that this process may no longer exist
/// and the PID may have been reused for a different process, so this should not be trusted.
pub requester_pid: i32,
+ /// Whether the VM is still running.
+ running: AtomicBool,
+ /// Callbacks to clients of the VM.
+ pub callbacks: VirtualMachineCallbacks,
}
impl VmInstance {
/// Create a new `VmInstance` for the given process.
fn new(
- child: Child,
+ child: SharedChild,
cid: Cid,
requester_uid: u32,
requester_sid: Option<String>,
requester_pid: i32,
) -> VmInstance {
- VmInstance { child, cid, requester_uid, requester_sid, requester_pid }
+ VmInstance {
+ child,
+ cid,
+ requester_uid,
+ requester_sid,
+ requester_pid,
+ running: AtomicBool::new(true),
+ callbacks: Default::default(),
+ }
}
/// Start an instance of `crosvm` to manage a new VM. The `crosvm` instance will be killed when
@@ -60,29 +77,46 @@
requester_uid: u32,
requester_sid: Option<String>,
requester_pid: i32,
- ) -> Result<VmInstance, Error> {
+ ) -> Result<Arc<VmInstance>, Error> {
let child = run_vm(config, cid, log_fd)?;
- Ok(VmInstance::new(child, cid, requester_uid, requester_sid, requester_pid))
- }
-}
+ let instance =
+ Arc::new(VmInstance::new(child, cid, requester_uid, requester_sid, requester_pid));
-impl Drop for VmInstance {
- fn drop(&mut self) {
- debug!("Dropping {:?}", self);
+ let instance_clone = instance.clone();
+ thread::spawn(move || {
+ instance_clone.monitor();
+ });
+
+ Ok(instance)
+ }
+
+ /// Wait for the crosvm child process to finish, then mark the VM as no longer running and call
+ /// any callbacks.
+ fn monitor(&self) {
+ match self.child.wait() {
+ Err(e) => error!("Error waiting for crosvm instance to die: {}", e),
+ Ok(status) => info!("crosvm exited with status {}", status),
+ }
+ self.running.store(false, Ordering::Release);
+ self.callbacks.callback_on_died(self.cid);
+ }
+
+ /// Return whether `crosvm` is still running the VM.
+ pub fn running(&self) -> bool {
+ self.running.load(Ordering::Acquire)
+ }
+
+ /// Kill the crosvm instance.
+ pub fn kill(&self) {
// TODO: Talk to crosvm to shutdown cleanly.
if let Err(e) = self.child.kill() {
error!("Error killing crosvm instance: {}", e);
}
- // We need to wait on the process after killing it to avoid zombies.
- match self.child.wait() {
- Err(e) => error!("Error waiting for crosvm instance to die: {}", e),
- Ok(status) => info!("Crosvm exited with status {}", status),
- }
}
}
/// Start an instance of `crosvm` to manage a new VM.
-fn run_vm(config: &VmConfig, cid: Cid, log_fd: Option<File>) -> Result<Child, Error> {
+fn run_vm(config: &VmConfig, cid: Cid, log_fd: Option<File>) -> Result<SharedChild, Error> {
config.validate()?;
let mut command = Command::new(CROSVM_PATH);
@@ -110,6 +144,5 @@
command.arg(kernel);
}
info!("Running {:?}", command);
- // TODO: Monitor child process, and remove from VM map if it dies.
- Ok(command.spawn()?)
+ Ok(SharedChild::spawn(&mut command)?)
}