Retry if the VM's RpcServer cannot be started
If virtualizationservice cannot start an RpcServer on the port==CID,
request a new CID and retry. This is a precaution in case a port is
occupied by another process.
Bug: 245727626
Test: atest -p packages/modules/Virtualization:avf-presubmit
Change-Id: Iacbc5886833abf0b90b90583c5aeef6d66d215ad
diff --git a/virtualizationservice/src/aidl.rs b/virtualizationservice/src/aidl.rs
index 297cf68..040c0d8 100644
--- a/virtualizationservice/src/aidl.rs
+++ b/virtualizationservice/src/aidl.rs
@@ -16,7 +16,7 @@
use crate::atom::{write_vm_booted_stats, write_vm_creation_stats};
use crate::composite::make_composite_image;
-use crate::crosvm::{CrosvmConfig, DiskFile, PayloadState, VmInstance, VmState};
+use crate::crosvm::{CrosvmConfig, DiskFile, PayloadState, VmContext, VmInstance, VmState};
use crate::payload::{add_microdroid_payload_images, add_microdroid_system_images};
use crate::selinux::{getfilecon, SeContext};
use android_os_permissions_aidl::aidl::android::os::IPermissionController;
@@ -165,7 +165,9 @@
self.held_cids.retain(|_, cid| cid.strong_count() > 0);
// Start trying to find a CID from the last used CID + 1. This ensures
- // that we do not eagerly recycle CIDs, which makes debugging easier.
+ // that we do not eagerly recycle CIDs. It makes debugging easier but
+ // also means that retrying to allocate a CID, eg. because it is
+ // erroneously occupied by a process, will not recycle the same CID.
let last_cid_prop =
system_properties::read(SYSPROP_LAST_CID)?.and_then(|val| match val.parse::<Cid>() {
Ok(num) => {
@@ -451,6 +453,30 @@
VirtualizationService { global_service, state: Default::default() }
}
+ fn create_vm_context(&self) -> Result<(VmContext, Cid)> {
+ const NUM_ATTEMPTS: usize = 5;
+
+ for _ in 0..NUM_ATTEMPTS {
+ let global_context = self.global_service.allocateGlobalVmContext()?;
+ let cid = global_context.getCid()? as Cid;
+ let service = VirtualMachineService::new_binder(self.state.clone(), cid).as_binder();
+
+ // Start VM service listening for connections from the new CID on port=CID.
+ // TODO(b/245727626): Only accept connections from the new VM.
+ let port = cid;
+ match RpcServer::new_vsock(service, port) {
+ Ok(vm_server) => {
+ vm_server.start();
+ return Ok((VmContext::new(global_context, vm_server), cid));
+ }
+ Err(err) => {
+ warn!("Could not start RpcServer on port {}: {}", port, err);
+ }
+ }
+ }
+ bail!("Too many attempts to create VM context failed.");
+ }
+
fn create_vm_internal(
&self,
config: &VirtualMachineConfig,
@@ -474,8 +500,13 @@
check_use_custom_virtual_machine()?;
}
- let vm_context = self.global_service.allocateGlobalVmContext()?;
- let cid = vm_context.getCid()? as Cid;
+ let (vm_context, cid) = self.create_vm_context().map_err(|e| {
+ error!("Failed to create VmContext: {:?}", e);
+ Status::new_service_specific_error_str(
+ -1,
+ Some(format!("Failed to create VmContext: {:?}", e)),
+ )
+ })?;
let state = &mut *self.state.lock().unwrap();
let console_fd = console_fd.map(clone_file).transpose()?;
@@ -577,18 +608,6 @@
)
})?;
- // Start VM service listening for connections from the new CID on port=CID.
- // TODO(b/245727626): Only accept connections from the new VM.
- let vm_service = VirtualMachineService::new_binder(self.state.clone(), cid).as_binder();
- let vm_server = RpcServer::new_vsock(vm_service, cid).map_err(|e| {
- error!("Failed to start VirtualMachineService: {:?}", e);
- Status::new_service_specific_error_str(
- -1,
- Some(format!("Failed to start VirtualMachineService: {:?}", e)),
- )
- })?;
- vm_server.start();
-
// Actually start the VM.
let crosvm_config = CrosvmConfig {
cid,
@@ -616,7 +635,6 @@
requester_uid,
requester_debug_pid,
vm_context,
- vm_server,
)
.map_err(|e| {
error!("Failed to create VM with config {:?}: {:?}", config, e);
diff --git a/virtualizationservice/src/crosvm.rs b/virtualizationservice/src/crosvm.rs
index 33ebd8b..76e18db 100644
--- a/virtualizationservice/src/crosvm.rs
+++ b/virtualizationservice/src/crosvm.rs
@@ -199,17 +199,30 @@
}
}
+/// Internal struct that holds the handles to globally unique resources of a VM.
+#[derive(Debug)]
+pub struct VmContext {
+ #[allow(dead_code)] // Keeps the global context alive
+ global_context: Strong<dyn IGlobalVmContext>,
+ #[allow(dead_code)] // Keeps the server alive
+ vm_server: RpcServer,
+}
+
+impl VmContext {
+ /// Construct new VmContext.
+ pub fn new(global_context: Strong<dyn IGlobalVmContext>, vm_server: RpcServer) -> VmContext {
+ VmContext { global_context, vm_server }
+ }
+}
+
/// Information about a particular instance of a VM which may be running.
#[derive(Debug)]
pub struct VmInstance {
/// The current state of the VM.
pub vm_state: Mutex<VmState>,
- /// Handle to global resources allocated for this VM.
- #[allow(dead_code)] // The handle is never read, we only need to hold it.
- vm_context: Strong<dyn IGlobalVmContext>,
- /// Handle to global resources allocated for this VM.
- #[allow(dead_code)] // The handle is never read, we only need to hold it.
- vm_server: RpcServer,
+ /// Global resources allocated for this VM.
+ #[allow(dead_code)] // Keeps the context alive
+ vm_context: VmContext,
/// The CID assigned to the VM for vsock communication.
pub cid: Cid,
/// The name of the VM.
@@ -242,8 +255,7 @@
temporary_directory: PathBuf,
requester_uid: u32,
requester_debug_pid: i32,
- vm_context: Strong<dyn IGlobalVmContext>,
- vm_server: RpcServer,
+ vm_context: VmContext,
) -> Result<VmInstance, Error> {
validate_config(&config)?;
let cid = config.cid;
@@ -252,7 +264,6 @@
Ok(VmInstance {
vm_state: Mutex::new(VmState::NotStarted { config }),
vm_context,
- vm_server,
cid,
name,
protected,