Shut down CompOS VM after compilation completes
Add a method to CompOsInstance to consume the instance and shut it
down cleanly, allowing logs to be written. Call this when compilation
ends, before calling back to our client (since they might then drop
their reference to us, which could tear everything down prematurely).
I also created a method to combind the calls to quit() and
wait_for_shutdown() to reduce duplication.
I also increased the shutdown timeout, since my initial value doesn't
seem to be enough for CompOS (see b/236588647#comment38).
Bug: 236581575
Test: composd_cmd test-compile, observe logs
Change-Id: I34a227db0e377cb9aded57461379e17d0574ea0c
diff --git a/compos/common/compos_client.rs b/compos/common/compos_client.rs
index 30c55b3..cd1ece4 100644
--- a/compos/common/compos_client.rs
+++ b/compos/common/compos_client.rs
@@ -148,11 +148,19 @@
self.0.connect_service(COMPOS_VSOCK_PORT).context("Connecting to CompOS service")
}
+ /// Shut down the VM cleanly, by sending a quit request to the service, giving time for any
+ /// relevant logs to be written.
+ pub fn shutdown(self, service: Strong<dyn ICompOsService>) {
+ info!("Requesting CompOS VM to shutdown");
+ let _ = service.quit(); // If this fails, the VM is probably dying anyway
+ self.wait_for_shutdown();
+ }
+
/// Wait for the instance to shut down. If it fails to shutdown within a reasonable time the
/// instance is dropped, which forcibly terminates it.
/// This should only be called when the instance has been requested to quit, or we believe that
/// it is already in the process of exiting due to some failure.
- pub fn wait_for_shutdown(self) {
+ fn wait_for_shutdown(self) {
let death_reason = self.0.wait_for_death_with_timeout(TIMEOUTS.vm_max_time_to_exit);
match death_reason {
Some(vmclient::DeathReason::Shutdown) => info!("VM has exited normally"),
diff --git a/compos/common/timeouts.rs b/compos/common/timeouts.rs
index 952be0a..7bd7679 100644
--- a/compos/common/timeouts.rs
+++ b/compos/common/timeouts.rs
@@ -46,7 +46,7 @@
// Note: the source of truth for this odrefresh timeout is art/odrefresh/odrefresh.cc.
odrefresh_max_execution_time: Duration::from_secs(300),
vm_max_time_to_ready: Duration::from_secs(15),
- vm_max_time_to_exit: Duration::from_secs(3),
+ vm_max_time_to_exit: Duration::from_secs(5),
};
/// The timeouts that we use when running under nested virtualization.
@@ -54,5 +54,5 @@
// Note: the source of truth for this odrefresh timeout is art/odrefresh/odrefresh.cc.
odrefresh_max_execution_time: Duration::from_secs(480),
vm_max_time_to_ready: Duration::from_secs(120),
- vm_max_time_to_exit: Duration::from_secs(10),
+ vm_max_time_to_exit: Duration::from_secs(20),
};
diff --git a/compos/composd/src/instance_starter.rs b/compos/composd/src/instance_starter.rs
index 6e6253e..e51db13 100644
--- a/compos/composd/src/instance_starter.rs
+++ b/compos/composd/src/instance_starter.rs
@@ -42,6 +42,14 @@
pub fn get_service(&self) -> Strong<dyn ICompOsService> {
self.service.clone()
}
+
+ // Attempt to shut down the VM cleanly, giving time for any relevant logs to be written.
+ pub fn shutdown(self) -> LazyServiceGuard {
+ self.vm_instance.shutdown(self.service);
+ // Return the guard to the caller, since we might be terminated at any point after it is
+ // dropped, and there might still be things to do.
+ self.lazy_service_guard
+ }
}
pub struct InstanceStarter {
diff --git a/compos/composd/src/odrefresh_task.rs b/compos/composd/src/odrefresh_task.rs
index 51e866f..07ede3c 100644
--- a/compos/composd/src/odrefresh_task.rs
+++ b/compos/composd/src/odrefresh_task.rs
@@ -49,7 +49,9 @@
impl ICompilationTask for OdrefreshTask {
fn cancel(&self) -> BinderResult<()> {
let task = self.take();
- // Drop the VM, which should end compilation - and cause our thread to exit
+ // Drop the VM, which should end compilation - and cause our thread to exit.
+ // Note that we don't do a graceful shutdown here; we've been asked to give up our resources
+ // ASAP, and the VM has not failed so we don't need to ensure VM logs are written.
drop(task);
Ok(())
}
@@ -95,27 +97,33 @@
let task = self.take();
// We don't do the callback if cancel has already happened.
- if let Some(task) = task {
+ if let Some(RunningTask { callback, comp_os }) = task {
+ // If we are the last owners of the instance (and we probably are), then we
+ // shut it down now, so that logs get written.
+ let comp_os = Arc::try_unwrap(comp_os);
+ // Make sure we keep our service alive until we have called the callback.
+ let lazy_service_guard = comp_os.map(CompOsInstance::shutdown);
+
let result = match exit_code {
Ok(ExitCode::CompilationSuccess) => {
info!("CompilationSuccess");
- task.callback.onSuccess()
+ callback.onSuccess()
}
Ok(exit_code) => {
let message = format!("Unexpected odrefresh result: {:?}", exit_code);
error!("{}", message);
- task.callback
- .onFailure(FailureReason::UnexpectedCompilationResult, &message)
+ callback.onFailure(FailureReason::UnexpectedCompilationResult, &message)
}
Err(e) => {
let message = format!("Running odrefresh failed: {:?}", e);
error!("{}", message);
- task.callback.onFailure(FailureReason::CompilationFailed, &message)
+ callback.onFailure(FailureReason::CompilationFailed, &message)
}
};
if let Err(e) = result {
warn!("Failed to deliver callback: {:?}", e);
}
+ drop(lazy_service_guard);
}
});
}
diff --git a/compos/src/compsvc.rs b/compos/src/compsvc.rs
index 9fa68d6..5d58221 100644
--- a/compos/src/compsvc.rs
+++ b/compos/src/compsvc.rs
@@ -20,7 +20,7 @@
use anyhow::{bail, Context, Result};
use binder_common::new_binder_exception;
-use log::error;
+use log::{error, info};
use rustutils::system_properties;
use std::default::Default;
use std::fs::read_dir;
@@ -153,8 +153,8 @@
}
fn quit(&self) -> BinderResult<()> {
- // TODO(b/236581575) Consider shutting down the binder server a bit more gracefully.
// When our process exits, Microdroid will shut down the VM.
+ info!("Received quit request, exiting");
std::process::exit(0);
}
}
diff --git a/compos/verify/verify.rs b/compos/verify/verify.rs
index e6848c7..224cde7 100644
--- a/compos/verify/verify.rs
+++ b/compos/verify/verify.rs
@@ -110,9 +110,7 @@
let service = vm_instance.connect_service()?;
let public_key = service.getPublicKey().context("Getting public key");
- // Shut down the VM cleanly, giving time for any relevant logs to be written
- let _ = service.quit(); // If this fails, the VM is probably dying anyway
- vm_instance.wait_for_shutdown();
+ vm_instance.shutdown(service);
if !compos_verify_native::verify(&public_key?, &signature, &info) {
bail!("Signature verification failed");