Merge "Await boot_completed before allowing GC" into main
diff --git a/keystore2/src/database.rs b/keystore2/src/database.rs
index 66b123e..626a1c0 100644
--- a/keystore2/src/database.rs
+++ b/keystore2/src/database.rs
@@ -2440,8 +2440,10 @@
             .context("Trying to delete grants.")?;
         // The associated blobentry rows are not immediately deleted when the owning keyentry is
         // removed, because a KeyMint `deleteKey()` invocation is needed (specifically for the
-        // `KEY_BLOB`).  Mark the affected rows with `state=Orphaned` so a subsequent garbage
-        // collection can do this.
+        // `KEY_BLOB`).  That should not be done from within the database transaction.  Also, calls
+        // to `deleteKey()` need to be delayed until the boot has completed, to avoid making
+        // permanent changes during an OTA before the point of no return.  Mark the affected rows
+        // with `state=Orphaned` so a subsequent garbage collection can do the `deleteKey()`.
         tx.execute(
             "UPDATE persistent.blobentry SET state = ? WHERE keyentryid = ?",
             params![BlobState::Orphaned, key_id],
diff --git a/keystore2/src/gc.rs b/keystore2/src/gc.rs
index f2341e3..9741671 100644
--- a/keystore2/src/gc.rs
+++ b/keystore2/src/gc.rs
@@ -22,6 +22,7 @@
 use crate::{
     async_task,
     database::{KeystoreDB, SupersededBlob, Uuid},
+    globals,
     super_key::SuperKeyManager,
 };
 use anyhow::{Context, Result};
@@ -135,6 +136,17 @@
     /// Processes one key and then schedules another attempt until it runs out of blobs to delete.
     fn step(&mut self) {
         self.notified.store(0, Ordering::Relaxed);
+        if !globals::boot_completed() {
+            // Garbage collection involves a operation (`IKeyMintDevice::deleteKey()`) that cannot
+            // be rolled back in some cases (specifically, when the key is rollback-resistant), even
+            // if the Keystore database is restored to the version of an earlier userdata filesystem
+            // checkpoint.
+            //
+            // This means that we should not perform GC until boot has fully completed, and any
+            // in-progress OTA is definitely not going to be rolled back.
+            log::info!("skip GC as boot not completed");
+            return;
+        }
         if let Err(e) = self.process_one_key() {
             log::error!("Error trying to delete blob entry. {:?}", e);
         }
diff --git a/keystore2/src/globals.rs b/keystore2/src/globals.rs
index 3b9c631..9ee2a1e 100644
--- a/keystore2/src/globals.rs
+++ b/keystore2/src/globals.rs
@@ -46,7 +46,11 @@
 use anyhow::{Context, Result};
 use binder::FromIBinder;
 use binder::{get_declared_instances, is_declared};
-use std::sync::{Arc, LazyLock, Mutex, RwLock};
+use rustutils::system_properties::PropertyWatcher;
+use std::sync::{
+    atomic::{AtomicBool, Ordering},
+    Arc, LazyLock, Mutex, RwLock,
+};
 use std::{cell::RefCell, sync::Once};
 use std::{collections::HashMap, path::Path, path::PathBuf};
 
@@ -449,3 +453,40 @@
     .ok_or(Error::Km(ErrorCode::HARDWARE_TYPE_UNAVAILABLE))
     .context(ks_err!("Failed to get rpc for sec level {:?}", *security_level))
 }
+
+/// Whether boot is complete.
+static BOOT_COMPLETED: AtomicBool = AtomicBool::new(false);
+
+/// Indicate whether boot is complete.
+///
+/// This in turn indicates whether it is safe to make permanent changes to state.
+pub fn boot_completed() -> bool {
+    BOOT_COMPLETED.load(Ordering::Acquire)
+}
+
+/// Monitor the system property for boot complete.  This blocks and so needs to be run in a separate
+/// thread.
+pub fn await_boot_completed() {
+    // Use a fairly long watchdog timeout of 5 minutes here. This blocks until the device
+    // boots, which on a very slow device (e.g., emulator for a non-native architecture) can
+    // take minutes. Blocking here would be unexpected only if it never finishes.
+    let _wp = wd::watch_millis("await_boot_completed", 300_000);
+    log::info!("monitoring for sys.boot_completed=1");
+    while let Err(e) = watch_for_boot_completed() {
+        log::error!("failed to watch for boot_completed: {e:?}");
+        std::thread::sleep(std::time::Duration::from_secs(5));
+    }
+
+    BOOT_COMPLETED.store(true, Ordering::Release);
+    log::info!("wait_for_boot_completed done, triggering GC");
+
+    // Garbage collection may have been skipped until now, so trigger a check.
+    GC.notify_gc();
+}
+
+fn watch_for_boot_completed() -> Result<()> {
+    let mut w = PropertyWatcher::new("sys.boot_completed")
+        .context(ks_err!("PropertyWatcher::new failed"))?;
+    w.wait_for_value("1", None).context(ks_err!("Failed to wait for sys.boot_completed"))?;
+    Ok(())
+}
diff --git a/keystore2/src/keystore2_main.rs b/keystore2/src/keystore2_main.rs
index 178b36c..008e6fe 100644
--- a/keystore2/src/keystore2_main.rs
+++ b/keystore2/src/keystore2_main.rs
@@ -93,6 +93,7 @@
 
     ENFORCEMENTS.install_confirmation_token_receiver(confirmation_token_receiver);
 
+    std::thread::spawn(keystore2::globals::await_boot_completed);
     entropy::register_feeder();
     shared_secret_negotiation::perform_shared_secret_negotiation();