Wait for crosvm in a separate thread, and keep track of when it dies.
This lets us tell whether a VM is still running or has finished. Also
add callback so clients can get notified when crosvm dies.
Bug: 171277638
Test: Ran on VIM3L
Test: atest VirtualizationTestCases
Change-Id: I52c1625af45cfcfe7aa0be465ea08f427ec5bc43
diff --git a/virtmanager/Android.bp b/virtmanager/Android.bp
index 83ac3d6..f1971dc 100644
--- a/virtmanager/Android.bp
+++ b/virtmanager/Android.bp
@@ -14,6 +14,7 @@
"liblog_rust",
"libserde_json",
"libserde",
+ "libshared_child",
"libanyhow",
],
apex_available: ["com.android.virt"],
diff --git a/virtmanager/aidl/android/system/virtmanager/IVirtualMachine.aidl b/virtmanager/aidl/android/system/virtmanager/IVirtualMachine.aidl
index 0358bfd..26aad0c 100644
--- a/virtmanager/aidl/android/system/virtmanager/IVirtualMachine.aidl
+++ b/virtmanager/aidl/android/system/virtmanager/IVirtualMachine.aidl
@@ -15,7 +15,18 @@
*/
package android.system.virtmanager;
+import android.system.virtmanager.IVirtualMachineCallback;
+
interface IVirtualMachine {
/** Get the CID allocated to the VM. */
int getCid();
+
+ /** Returns true if the VM is still running, or false if it has exited for any reason. */
+ boolean isRunning();
+
+ /**
+ * Register a Binder object to get callbacks when the state of the VM changes, such as if it
+ * dies.
+ */
+ void registerCallback(IVirtualMachineCallback callback);
}
diff --git a/virtmanager/aidl/android/system/virtmanager/IVirtualMachineCallback.aidl b/virtmanager/aidl/android/system/virtmanager/IVirtualMachineCallback.aidl
new file mode 100644
index 0000000..65a685d
--- /dev/null
+++ b/virtmanager/aidl/android/system/virtmanager/IVirtualMachineCallback.aidl
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package android.system.virtmanager;
+
+import android.system.virtmanager.IVirtualMachine;
+
+/**
+ * An object which a client may register with the Virt Manager to get callbacks about the state of
+ * a particular VM.
+ */
+oneway interface IVirtualMachineCallback {
+ /**
+ * Called when the VM dies.
+ *
+ * Note that this will not be called if the Virt Manager itself dies, so you should also use
+ * `link_to_death` to handle that.
+ */
+ void onDied(int cid);
+}
diff --git a/virtmanager/aidl/android/system/virtmanager/VirtualMachineDebugInfo.aidl b/virtmanager/aidl/android/system/virtmanager/VirtualMachineDebugInfo.aidl
index 3a271cb..7bb77ce 100644
--- a/virtmanager/aidl/android/system/virtmanager/VirtualMachineDebugInfo.aidl
+++ b/virtmanager/aidl/android/system/virtmanager/VirtualMachineDebugInfo.aidl
@@ -31,4 +31,7 @@
* the PID may have been reused for a different process, so this should not be trusted.
*/
int requesterPid;
+
+ /** Whether the VM is still running. */
+ boolean running;
}
diff --git a/virtmanager/src/aidl.rs b/virtmanager/src/aidl.rs
index 96ba04f..5a4eedc 100644
--- a/virtmanager/src/aidl.rs
+++ b/virtmanager/src/aidl.rs
@@ -21,11 +21,12 @@
use android_system_virtmanager::aidl::android::system::virtmanager::IVirtualMachine::{
BnVirtualMachine, IVirtualMachine,
};
+use android_system_virtmanager::aidl::android::system::virtmanager::IVirtualMachineCallback::IVirtualMachineCallback;
use android_system_virtmanager::aidl::android::system::virtmanager::VirtualMachineDebugInfo::VirtualMachineDebugInfo;
use android_system_virtmanager::binder::{
self, Interface, ParcelFileDescriptor, StatusCode, Strong, ThreadState,
};
-use log::error;
+use log::{debug, error};
use std::ffi::CStr;
use std::fs::File;
use std::sync::{Arc, Mutex, Weak};
@@ -54,7 +55,6 @@
log_fd: Option<&ParcelFileDescriptor>,
) -> binder::Result<Strong<dyn IVirtualMachine>> {
let state = &mut *self.state.lock().unwrap();
- let cid = state.next_cid;
let log_fd = log_fd
.map(|fd| fd.as_ref().try_clone().map_err(|_| StatusCode::UNKNOWN_ERROR))
.transpose()?;
@@ -69,16 +69,9 @@
})
});
let requester_pid = ThreadState::get_calling_pid();
- let instance = Arc::new(start_vm(
- config_fd.as_ref(),
- cid,
- log_fd,
- requester_uid,
- requester_sid,
- requester_pid,
- )?);
- // TODO(qwandor): keep track of which CIDs are currently in use so that we can reuse them.
- state.next_cid = state.next_cid.checked_add(1).ok_or(StatusCode::UNKNOWN_ERROR)?;
+ let cid = state.allocate_cid()?;
+ let instance =
+ start_vm(config_fd.as_ref(), cid, log_fd, requester_uid, requester_sid, requester_pid)?;
state.add_vm(Arc::downgrade(&instance));
Ok(VirtualMachine::create(instance))
}
@@ -99,6 +92,7 @@
requesterUid: vm.requester_uid as i32,
requesterSid: vm.requester_sid.clone(),
requesterPid: vm.requester_pid,
+ running: vm.running(),
})
.collect();
Ok(cids)
@@ -155,6 +149,48 @@
fn getCid(&self) -> binder::Result<i32> {
Ok(self.instance.cid as i32)
}
+
+ fn isRunning(&self) -> binder::Result<bool> {
+ Ok(self.instance.running())
+ }
+
+ fn registerCallback(
+ &self,
+ callback: &Strong<dyn IVirtualMachineCallback>,
+ ) -> binder::Result<()> {
+ // TODO: Should this give an error if the VM is already dead?
+ self.instance.callbacks.add(callback.clone());
+ Ok(())
+ }
+}
+
+impl Drop for VirtualMachine {
+ fn drop(&mut self) {
+ debug!("Dropping {:?}", self);
+ self.instance.kill();
+ }
+}
+
+/// A set of Binders to be called back in response to various events on the VM, such as when it
+/// dies.
+#[derive(Debug, Default)]
+pub struct VirtualMachineCallbacks(Mutex<Vec<Strong<dyn IVirtualMachineCallback>>>);
+
+impl VirtualMachineCallbacks {
+ /// Call all registered callbacks to say that the VM has died.
+ pub fn callback_on_died(&self, cid: Cid) {
+ let callbacks = &*self.0.lock().unwrap();
+ for callback in callbacks {
+ if let Err(e) = callback.onDied(cid as i32) {
+ error!("Error calling callback: {}", e);
+ }
+ }
+ }
+
+ /// Add a new callback to the set.
+ fn add(&self, callback: Strong<dyn IVirtualMachineCallback>) {
+ self.0.lock().unwrap().push(callback);
+ }
}
/// The mutable state of the Virt Manager. There should only be one instance of this struct.
@@ -175,7 +211,7 @@
}
impl State {
- /// Get a list of VMs which are currently running.
+ /// Get a list of VMs which still have Binder references to them.
fn vms(&self) -> Vec<Arc<VmInstance>> {
// Attempt to upgrade the weak pointers to strong pointers.
self.vms.iter().filter_map(Weak::upgrade).collect()
@@ -200,6 +236,14 @@
let pos = self.debug_held_vms.iter().position(|vm| vm.getCid() == Ok(cid))?;
Some(self.debug_held_vms.swap_remove(pos))
}
+
+ /// Get the next available CID, or an error if we have run out.
+ fn allocate_cid(&mut self) -> binder::Result<Cid> {
+ // TODO(qwandor): keep track of which CIDs are currently in use so that we can reuse them.
+ let cid = self.next_cid;
+ self.next_cid = self.next_cid.checked_add(1).ok_or(StatusCode::UNKNOWN_ERROR)?;
+ Ok(cid)
+ }
}
impl Default for State {
@@ -217,7 +261,7 @@
requester_uid: u32,
requester_sid: Option<String>,
requester_pid: i32,
-) -> binder::Result<VmInstance> {
+) -> binder::Result<Arc<VmInstance>> {
let config = VmConfig::load(config_file).map_err(|e| {
error!("Failed to load VM config from {:?}: {:?}", config_file, e);
StatusCode::BAD_VALUE
diff --git a/virtmanager/src/crosvm.rs b/virtmanager/src/crosvm.rs
index bef9982..5e6f658 100644
--- a/virtmanager/src/crosvm.rs
+++ b/virtmanager/src/crosvm.rs
@@ -14,12 +14,17 @@
//! Functions for running instances of `crosvm`.
+use crate::aidl::VirtualMachineCallbacks;
use crate::config::VmConfig;
use crate::Cid;
use anyhow::Error;
-use log::{debug, error, info};
+use log::{error, info};
+use shared_child::SharedChild;
use std::fs::File;
-use std::process::{Child, Command};
+use std::process::Command;
+use std::sync::atomic::{AtomicBool, Ordering};
+use std::sync::Arc;
+use std::thread;
const CROSVM_PATH: &str = "/apex/com.android.virt/bin/crosvm";
@@ -27,7 +32,7 @@
#[derive(Debug)]
pub struct VmInstance {
/// The crosvm child process.
- child: Child,
+ child: SharedChild,
/// The CID assigned to the VM for vsock communication.
pub cid: Cid,
/// The UID of the process which requested the VM.
@@ -37,18 +42,30 @@
/// The PID of the process which requested the VM. Note that this process may no longer exist
/// and the PID may have been reused for a different process, so this should not be trusted.
pub requester_pid: i32,
+ /// Whether the VM is still running.
+ running: AtomicBool,
+ /// Callbacks to clients of the VM.
+ pub callbacks: VirtualMachineCallbacks,
}
impl VmInstance {
/// Create a new `VmInstance` for the given process.
fn new(
- child: Child,
+ child: SharedChild,
cid: Cid,
requester_uid: u32,
requester_sid: Option<String>,
requester_pid: i32,
) -> VmInstance {
- VmInstance { child, cid, requester_uid, requester_sid, requester_pid }
+ VmInstance {
+ child,
+ cid,
+ requester_uid,
+ requester_sid,
+ requester_pid,
+ running: AtomicBool::new(true),
+ callbacks: Default::default(),
+ }
}
/// Start an instance of `crosvm` to manage a new VM. The `crosvm` instance will be killed when
@@ -60,29 +77,46 @@
requester_uid: u32,
requester_sid: Option<String>,
requester_pid: i32,
- ) -> Result<VmInstance, Error> {
+ ) -> Result<Arc<VmInstance>, Error> {
let child = run_vm(config, cid, log_fd)?;
- Ok(VmInstance::new(child, cid, requester_uid, requester_sid, requester_pid))
- }
-}
+ let instance =
+ Arc::new(VmInstance::new(child, cid, requester_uid, requester_sid, requester_pid));
-impl Drop for VmInstance {
- fn drop(&mut self) {
- debug!("Dropping {:?}", self);
+ let instance_clone = instance.clone();
+ thread::spawn(move || {
+ instance_clone.monitor();
+ });
+
+ Ok(instance)
+ }
+
+ /// Wait for the crosvm child process to finish, then mark the VM as no longer running and call
+ /// any callbacks.
+ fn monitor(&self) {
+ match self.child.wait() {
+ Err(e) => error!("Error waiting for crosvm instance to die: {}", e),
+ Ok(status) => info!("crosvm exited with status {}", status),
+ }
+ self.running.store(false, Ordering::Release);
+ self.callbacks.callback_on_died(self.cid);
+ }
+
+ /// Return whether `crosvm` is still running the VM.
+ pub fn running(&self) -> bool {
+ self.running.load(Ordering::Acquire)
+ }
+
+ /// Kill the crosvm instance.
+ pub fn kill(&self) {
// TODO: Talk to crosvm to shutdown cleanly.
if let Err(e) = self.child.kill() {
error!("Error killing crosvm instance: {}", e);
}
- // We need to wait on the process after killing it to avoid zombies.
- match self.child.wait() {
- Err(e) => error!("Error waiting for crosvm instance to die: {}", e),
- Ok(status) => info!("Crosvm exited with status {}", status),
- }
}
}
/// Start an instance of `crosvm` to manage a new VM.
-fn run_vm(config: &VmConfig, cid: Cid, log_fd: Option<File>) -> Result<Child, Error> {
+fn run_vm(config: &VmConfig, cid: Cid, log_fd: Option<File>) -> Result<SharedChild, Error> {
config.validate()?;
let mut command = Command::new(CROSVM_PATH);
@@ -110,6 +144,5 @@
command.arg(kernel);
}
info!("Running {:?}", command);
- // TODO: Monitor child process, and remove from VM map if it dies.
- Ok(command.spawn()?)
+ Ok(SharedChild::spawn(&mut command)?)
}