Retry if the VM's RpcServer cannot be started

If virtualizationservice cannot start an RpcServer on the port==CID,
request a new CID and retry. This is a precaution in case a port is
occupied by another process.

Bug: 245727626
Test: atest -p packages/modules/Virtualization:avf-presubmit
Change-Id: Iacbc5886833abf0b90b90583c5aeef6d66d215ad
diff --git a/virtualizationservice/src/aidl.rs b/virtualizationservice/src/aidl.rs
index 297cf68..040c0d8 100644
--- a/virtualizationservice/src/aidl.rs
+++ b/virtualizationservice/src/aidl.rs
@@ -16,7 +16,7 @@
 
 use crate::atom::{write_vm_booted_stats, write_vm_creation_stats};
 use crate::composite::make_composite_image;
-use crate::crosvm::{CrosvmConfig, DiskFile, PayloadState, VmInstance, VmState};
+use crate::crosvm::{CrosvmConfig, DiskFile, PayloadState, VmContext, VmInstance, VmState};
 use crate::payload::{add_microdroid_payload_images, add_microdroid_system_images};
 use crate::selinux::{getfilecon, SeContext};
 use android_os_permissions_aidl::aidl::android::os::IPermissionController;
@@ -165,7 +165,9 @@
         self.held_cids.retain(|_, cid| cid.strong_count() > 0);
 
         // Start trying to find a CID from the last used CID + 1. This ensures
-        // that we do not eagerly recycle CIDs, which makes debugging easier.
+        // that we do not eagerly recycle CIDs. It makes debugging easier but
+        // also means that retrying to allocate a CID, eg. because it is
+        // erroneously occupied by a process, will not recycle the same CID.
         let last_cid_prop =
             system_properties::read(SYSPROP_LAST_CID)?.and_then(|val| match val.parse::<Cid>() {
                 Ok(num) => {
@@ -451,6 +453,30 @@
         VirtualizationService { global_service, state: Default::default() }
     }
 
+    fn create_vm_context(&self) -> Result<(VmContext, Cid)> {
+        const NUM_ATTEMPTS: usize = 5;
+
+        for _ in 0..NUM_ATTEMPTS {
+            let global_context = self.global_service.allocateGlobalVmContext()?;
+            let cid = global_context.getCid()? as Cid;
+            let service = VirtualMachineService::new_binder(self.state.clone(), cid).as_binder();
+
+            // Start VM service listening for connections from the new CID on port=CID.
+            // TODO(b/245727626): Only accept connections from the new VM.
+            let port = cid;
+            match RpcServer::new_vsock(service, port) {
+                Ok(vm_server) => {
+                    vm_server.start();
+                    return Ok((VmContext::new(global_context, vm_server), cid));
+                }
+                Err(err) => {
+                    warn!("Could not start RpcServer on port {}: {}", port, err);
+                }
+            }
+        }
+        bail!("Too many attempts to create VM context failed.");
+    }
+
     fn create_vm_internal(
         &self,
         config: &VirtualMachineConfig,
@@ -474,8 +500,13 @@
             check_use_custom_virtual_machine()?;
         }
 
-        let vm_context = self.global_service.allocateGlobalVmContext()?;
-        let cid = vm_context.getCid()? as Cid;
+        let (vm_context, cid) = self.create_vm_context().map_err(|e| {
+            error!("Failed to create VmContext: {:?}", e);
+            Status::new_service_specific_error_str(
+                -1,
+                Some(format!("Failed to create VmContext: {:?}", e)),
+            )
+        })?;
 
         let state = &mut *self.state.lock().unwrap();
         let console_fd = console_fd.map(clone_file).transpose()?;
@@ -577,18 +608,6 @@
             )
         })?;
 
-        // Start VM service listening for connections from the new CID on port=CID.
-        // TODO(b/245727626): Only accept connections from the new VM.
-        let vm_service = VirtualMachineService::new_binder(self.state.clone(), cid).as_binder();
-        let vm_server = RpcServer::new_vsock(vm_service, cid).map_err(|e| {
-            error!("Failed to start VirtualMachineService: {:?}", e);
-            Status::new_service_specific_error_str(
-                -1,
-                Some(format!("Failed to start VirtualMachineService: {:?}", e)),
-            )
-        })?;
-        vm_server.start();
-
         // Actually start the VM.
         let crosvm_config = CrosvmConfig {
             cid,
@@ -616,7 +635,6 @@
                 requester_uid,
                 requester_debug_pid,
                 vm_context,
-                vm_server,
             )
             .map_err(|e| {
                 error!("Failed to create VM with config {:?}: {:?}", config, e);
diff --git a/virtualizationservice/src/crosvm.rs b/virtualizationservice/src/crosvm.rs
index 33ebd8b..76e18db 100644
--- a/virtualizationservice/src/crosvm.rs
+++ b/virtualizationservice/src/crosvm.rs
@@ -199,17 +199,30 @@
     }
 }
 
+/// Internal struct that holds the handles to globally unique resources of a VM.
+#[derive(Debug)]
+pub struct VmContext {
+    #[allow(dead_code)] // Keeps the global context alive
+    global_context: Strong<dyn IGlobalVmContext>,
+    #[allow(dead_code)] // Keeps the server alive
+    vm_server: RpcServer,
+}
+
+impl VmContext {
+    /// Construct new VmContext.
+    pub fn new(global_context: Strong<dyn IGlobalVmContext>, vm_server: RpcServer) -> VmContext {
+        VmContext { global_context, vm_server }
+    }
+}
+
 /// Information about a particular instance of a VM which may be running.
 #[derive(Debug)]
 pub struct VmInstance {
     /// The current state of the VM.
     pub vm_state: Mutex<VmState>,
-    /// Handle to global resources allocated for this VM.
-    #[allow(dead_code)] // The handle is never read, we only need to hold it.
-    vm_context: Strong<dyn IGlobalVmContext>,
-    /// Handle to global resources allocated for this VM.
-    #[allow(dead_code)] // The handle is never read, we only need to hold it.
-    vm_server: RpcServer,
+    /// Global resources allocated for this VM.
+    #[allow(dead_code)] // Keeps the context alive
+    vm_context: VmContext,
     /// The CID assigned to the VM for vsock communication.
     pub cid: Cid,
     /// The name of the VM.
@@ -242,8 +255,7 @@
         temporary_directory: PathBuf,
         requester_uid: u32,
         requester_debug_pid: i32,
-        vm_context: Strong<dyn IGlobalVmContext>,
-        vm_server: RpcServer,
+        vm_context: VmContext,
     ) -> Result<VmInstance, Error> {
         validate_config(&config)?;
         let cid = config.cid;
@@ -252,7 +264,6 @@
         Ok(VmInstance {
             vm_state: Mutex::new(VmState::NotStarted { config }),
             vm_context,
-            vm_server,
             cid,
             name,
             protected,