Merge "Retry mechanism for VM secrets" into main
diff --git a/guest/microdroid_manager/src/vm_secret.rs b/guest/microdroid_manager/src/vm_secret.rs
index f031859..674f010 100644
--- a/guest/microdroid_manager/src/vm_secret.rs
+++ b/guest/microdroid_manager/src/vm_secret.rs
@@ -35,6 +35,9 @@
     StoreSecretRequest, GetSecretResponse, GetSecretRequest};
 use secretkeeper_comm::data_types::error::SecretkeeperError;
 use std::fs;
+use std::thread;
+use rand::Rng;
+use std::time::Duration;
 use zeroize::Zeroizing;
 use std::sync::Mutex;
 use std::sync::Arc;
@@ -63,6 +66,8 @@
     0x55, 0xF8, 0x08, 0x23, 0x81, 0x5F, 0xF5, 0x16, 0x20, 0x3E, 0xBE, 0xBA, 0xB7, 0xA8, 0x43, 0x92,
 ];
 
+const BACKOFF_SK_ACCESS_MS: u64 = 100;
+
 pub enum VmSecret {
     // V2 secrets are derived from 2 independently secured secrets:
     //      1. Secretkeeper protected secrets (skp secret).
@@ -118,15 +123,19 @@
             .map_err(|e| anyhow!("Failed to build a sealing_policy: {e}"))?;
         let session = SkVmSession::new(vm_service, &explicit_dice, policy)?;
         let mut skp_secret = Zeroizing::new([0u8; SECRET_SIZE]);
-        if let Some(secret) = session.get_secret(id)? {
-            *skp_secret = secret;
-            *state = VmInstanceState::PreviouslySeen;
-        } else {
-            log::warn!("No entry found in Secretkeeper for this VM instance, creating new secret.");
-            *skp_secret = rand::random();
-            session.store_secret(id, skp_secret.clone())?;
-            *state = VmInstanceState::NewlyCreated;
-        }
+        get_or_create_sk_secret(&session, id, &mut skp_secret, state).or_else(|e| {
+            // TODO(b/399304956): Secretkeeper rejects requests when overloaded with
+            // connections from multiple clients. Backoff & retry again, hoping it is
+            // less busy then. Secretkeeper changes are required for more robust solutions.
+            log::info!(
+                "get_or_create_sk_secret failed with {e:?}. Refreshing connection & retrying!"
+            );
+            let mut rng = rand::thread_rng();
+            let backoff = rng.gen_range(BACKOFF_SK_ACCESS_MS..2 * BACKOFF_SK_ACCESS_MS);
+            thread::sleep(Duration::from_millis(backoff));
+            session.refresh()?;
+            get_or_create_sk_secret(&session, id, &mut skp_secret, state)
+        })?;
         Ok(Self::V2 {
             instance_id: id,
             dice_artifacts: explicit_dice,
@@ -283,8 +292,6 @@
     sealing_policy: Vec<u8>,
 }
 
-// TODO(b/378911776): This get_secret/store_secret fails on expired session.
-// Introduce retry after refreshing the session
 impl SkVmSession {
     fn new(
         vm_service: &Strong<dyn IVirtualMachineService>,
@@ -366,3 +373,21 @@
             ))
         })?)
 }
+
+fn get_or_create_sk_secret(
+    session: &SkVmSession,
+    id: [u8; ID_SIZE],
+    skp_secret: &mut Zeroizing<[u8; SECRET_SIZE]>,
+    state: &mut VmInstanceState,
+) -> Result<()> {
+    if let Some(secret) = session.get_secret(id)? {
+        **skp_secret = secret;
+        *state = VmInstanceState::PreviouslySeen;
+    } else {
+        log::warn!("No entry found in Secretkeeper for this VM instance, creating new secret.");
+        **skp_secret = rand::random();
+        session.store_secret(id, skp_secret.clone())?;
+        *state = VmInstanceState::NewlyCreated;
+    }
+    Ok(())
+}