virtualizationservice: Fix race in monitor_vm_exit
monitor_vm_exit() runs in a detached thread, waiting for an instance of
crosvm to exit. When it does, it cleans up after it and submits
telemetry to statsd.
However, there is currently no guarantee that the thread will get to
complete its task. If exit() is called, eg. because there are no
remaining clients of the service, the thread is interrupted and
terminated together with the rest of the process. Whether it completes
its job or not therefore can depend on the delay between the last
VirtualMachine reference is dropped and LazyServiceGuard terminating
the process.
Fix this race by joining the monitor_vm_exit as the VirtualMachine
object is dropped. This prevents the process from exiting until the
thread has completed.
Bug: 245727626
Test: atest -p packages/modules/Virtualization:avf-presubmit
Change-Id: Iefbd4181a8ca95bbb3a4fed0f85d9929d081ff73
diff --git a/virtualizationservice/src/crosvm.rs b/virtualizationservice/src/crosvm.rs
index 5125f19..2c6e370 100644
--- a/virtualizationservice/src/crosvm.rs
+++ b/virtualizationservice/src/crosvm.rs
@@ -39,7 +39,7 @@
use std::process::{Command, ExitStatus};
use std::sync::{Arc, Condvar, Mutex};
use std::time::{Duration, SystemTime};
-use std::thread;
+use std::thread::{self, JoinHandle};
use android_system_virtualizationservice::aidl::android::system::virtualizationservice::{
DeathReason::DeathReason,
MemoryTrimLevel::MemoryTrimLevel,
@@ -140,6 +140,8 @@
Running {
/// The crosvm child process.
child: Arc<SharedChild>,
+ /// The thread waiting for crosvm to finish.
+ monitor_vm_exit_thread: Option<JoinHandle<()>>,
},
/// The VM died or was killed.
Dead,
@@ -187,9 +189,9 @@
let child_clone = child.clone();
let instance_clone = instance.clone();
- thread::spawn(move || {
+ let monitor_vm_exit_thread = Some(thread::spawn(move || {
instance_clone.monitor_vm_exit(child_clone, failure_pipe_read);
- });
+ }));
if detect_hangup {
let child_clone = child.clone();
@@ -199,7 +201,7 @@
}
// If it started correctly, update the state.
- *self = VmState::Running { child };
+ *self = VmState::Running { child, monitor_vm_exit_thread };
Ok(())
} else {
*self = state;
@@ -465,19 +467,24 @@
/// Kills the crosvm instance, if it is running.
pub fn kill(&self) -> Result<(), Error> {
- let vm_state = &*self.vm_state.lock().unwrap();
- if let VmState::Running { child } = vm_state {
- let id = child.id();
- debug!("Killing crosvm({})", id);
- // TODO: Talk to crosvm to shutdown cleanly.
- if let Err(e) = child.kill() {
- bail!("Error killing crosvm({}) instance: {}", id, e);
+ let monitor_vm_exit_thread = {
+ let vm_state = &mut *self.vm_state.lock().unwrap();
+ if let VmState::Running { child, monitor_vm_exit_thread } = vm_state {
+ let id = child.id();
+ debug!("Killing crosvm({})", id);
+ // TODO: Talk to crosvm to shutdown cleanly.
+ child.kill().with_context(|| format!("Error killing crosvm({id}) instance"))?;
+ monitor_vm_exit_thread.take()
} else {
- Ok(())
+ bail!("VM is not running")
}
- } else {
- bail!("VM is not running")
- }
+ };
+
+ // Wait for monitor_vm_exit() to finish. Must release vm_state lock
+ // first, as monitor_vm_exit() takes it as well.
+ monitor_vm_exit_thread.map(JoinHandle::join);
+
+ Ok(())
}
/// Responds to memory-trimming notifications by inflating the virtio