[refactor] Split watchdog in a standalone library for reuse
This simplifies the task of creating an independent library of
rkpd_client later.
Test: atest keystore2_test
Bug: 241428146
Change-Id: I2834c9be9f5100d52829e6392f0dd48e7c76beb1
diff --git a/keystore2/src/lib.rs b/keystore2/src/lib.rs
index 3233017..8c08d3e 100644
--- a/keystore2/src/lib.rs
+++ b/keystore2/src/lib.rs
@@ -50,6 +50,4 @@
mod km_compat;
mod super_key;
mod sw_keyblob;
-
-#[cfg(feature = "watchdog")]
-mod watchdog;
+mod watchdog_helper;
diff --git a/keystore2/src/rkpd_client.rs b/keystore2/src/rkpd_client.rs
index 7b4131d..5009278 100644
--- a/keystore2/src/rkpd_client.rs
+++ b/keystore2/src/rkpd_client.rs
@@ -17,7 +17,7 @@
use crate::error::{map_binder_status_code, Error, ResponseCode};
use crate::globals::get_remotely_provisioned_component_name;
use crate::ks_err;
-use crate::utils::watchdog as wd;
+use crate::watchdog_helper::watchdog as wd;
use android_hardware_security_keymint::aidl::android::hardware::security::keymint::SecurityLevel::SecurityLevel;
use android_security_rkp_aidl::aidl::android::security::rkp::{
IGetKeyCallback::BnGetKeyCallback, IGetKeyCallback::ErrorCode::ErrorCode as GetKeyErrorCode,
diff --git a/keystore2/src/utils.rs b/keystore2/src/utils.rs
index 80aa7c3..f028491 100644
--- a/keystore2/src/utils.rs
+++ b/keystore2/src/utils.rs
@@ -20,6 +20,7 @@
use crate::ks_err;
use crate::permission;
use crate::permission::{KeyPerm, KeyPermSet, KeystorePerm};
+pub use crate::watchdog_helper::watchdog;
use crate::{
database::{KeyType, KeystoreDB},
globals::LEGACY_IMPORTER,
@@ -421,36 +422,6 @@
Ok((legacy_keys.len() + num_keys_in_db) as i32)
}
-/// This module provides helpers for simplified use of the watchdog module.
-#[cfg(feature = "watchdog")]
-pub mod watchdog {
- pub use crate::watchdog::WatchPoint;
- use crate::watchdog::Watchdog;
- use lazy_static::lazy_static;
- use std::sync::Arc;
- use std::time::Duration;
-
- lazy_static! {
- /// A Watchdog thread, that can be used to create watch points.
- static ref WD: Arc<Watchdog> = Watchdog::new(Duration::from_secs(10));
- }
-
- /// Sets a watch point with `id` and a timeout of `millis` milliseconds.
- pub fn watch_millis(id: &'static str, millis: u64) -> Option<WatchPoint> {
- Watchdog::watch(&WD, id, Duration::from_millis(millis))
- }
-
- /// Like `watch_millis` but with a callback that is called every time a report
- /// is printed about this watch point.
- pub fn watch_millis_with(
- id: &'static str,
- millis: u64,
- callback: impl Fn() -> String + Send + 'static,
- ) -> Option<WatchPoint> {
- Watchdog::watch_with(&WD, id, Duration::from_millis(millis), callback)
- }
-}
-
/// Trait implemented by objects that can be used to decrypt cipher text using AES-GCM.
pub trait AesGcm {
/// Deciphers `data` using the initialization vector `iv` and AEAD tag `tag`
@@ -480,25 +451,6 @@
}
}
-/// This module provides empty/noop implementations of the watch dog utility functions.
-#[cfg(not(feature = "watchdog"))]
-pub mod watchdog {
- /// Noop watch point.
- pub struct WatchPoint();
- /// Sets a Noop watch point.
- fn watch_millis(_: &'static str, _: u64) -> Option<WatchPoint> {
- None
- }
-
- pub fn watch_millis_with(
- _: &'static str,
- _: u64,
- _: impl Fn() -> String + Send + 'static,
- ) -> Option<WatchPoint> {
- None
- }
-}
-
#[cfg(test)]
mod tests {
use super::*;
diff --git a/keystore2/src/watchdog.rs b/keystore2/src/watchdog.rs
deleted file mode 100644
index 01043c5..0000000
--- a/keystore2/src/watchdog.rs
+++ /dev/null
@@ -1,360 +0,0 @@
-// Copyright 2021, The Android Open Source Project
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-// Can be removed when instrumentations are added to keystore.
-#![allow(dead_code)]
-
-//! This module implements a watchdog thread.
-
-use std::{
- cmp::min,
- collections::HashMap,
- sync::Arc,
- sync::{Condvar, Mutex, MutexGuard},
- thread,
-};
-use std::{
- marker::PhantomData,
- time::{Duration, Instant},
-};
-
-/// Represents a Watchdog record. It can be created with `Watchdog::watch` or
-/// `Watchdog::watch_with`. It disarms the record when dropped.
-pub struct WatchPoint {
- id: &'static str,
- wd: Arc<Watchdog>,
- not_send: PhantomData<*mut ()>, // WatchPoint must not be Send.
-}
-
-impl Drop for WatchPoint {
- fn drop(&mut self) {
- self.wd.disarm(self.id)
- }
-}
-
-#[derive(Debug, PartialEq, Eq)]
-enum State {
- NotRunning,
- Running,
-}
-
-#[derive(Debug, Clone, Hash, PartialEq, Eq)]
-struct Index {
- tid: thread::ThreadId,
- id: &'static str,
-}
-
-struct Record {
- started: Instant,
- deadline: Instant,
- callback: Option<Box<dyn Fn() -> String + Send + 'static>>,
-}
-
-struct WatchdogState {
- state: State,
- thread: Option<thread::JoinHandle<()>>,
- timeout: Duration,
- records: HashMap<Index, Record>,
- last_report: Instant,
- has_overdue: bool,
-}
-
-impl WatchdogState {
- fn update_overdue_and_find_next_timeout(&mut self) -> (bool, Option<Duration>) {
- let now = Instant::now();
- let mut next_timeout: Option<Duration> = None;
- let mut has_overdue = false;
- for (_, r) in self.records.iter() {
- let timeout = r.deadline.saturating_duration_since(now);
- if timeout == Duration::new(0, 0) {
- has_overdue = true;
- continue;
- }
- next_timeout = match next_timeout {
- Some(nt) => {
- if timeout < nt {
- Some(timeout)
- } else {
- Some(nt)
- }
- }
- None => Some(timeout),
- };
- }
- (has_overdue, next_timeout)
- }
-
- fn log_report(&mut self, has_overdue: bool) -> bool {
- match (self.has_overdue, has_overdue) {
- (true, true) => {
- if self.last_report.elapsed() < Watchdog::NOISY_REPORT_TIMEOUT {
- self.has_overdue = false;
- return false;
- }
- }
- (_, false) => {
- self.has_overdue = false;
- return false;
- }
- (false, true) => {}
- }
- self.last_report = Instant::now();
- self.has_overdue = has_overdue;
- log::warn!("### Keystore Watchdog report - BEGIN ###");
-
- let now = Instant::now();
- let mut overdue_records: Vec<(&Index, &Record)> = self
- .records
- .iter()
- .filter(|(_, r)| r.deadline.saturating_duration_since(now) == Duration::new(0, 0))
- .collect();
-
- log::warn!("When extracting from a bug report, please include this header");
- log::warn!("and all {} records below.", overdue_records.len());
-
- // Watch points can be nested, i.e., a single thread may have multiple armed
- // watch points. And the most recent on each thread (thread recent) is closest to the point
- // where something is blocked. Furthermore, keystore2 has various critical section
- // and common backend resources KeyMint that can only be entered serialized. So if one
- // thread hangs, the others will soon follow suite. Thus the oldest "thread recent" watch
- // point is most likely pointing toward the culprit.
- // Thus, sort by start time first.
- overdue_records.sort_unstable_by(|(_, r1), (_, r2)| r1.started.cmp(&r2.started));
- // Then we groups all of the watch points per thread preserving the order within
- // groups.
- let groups = overdue_records.iter().fold(
- HashMap::<thread::ThreadId, Vec<(&Index, &Record)>>::new(),
- |mut acc, (i, r)| {
- acc.entry(i.tid).or_default().push((i, r));
- acc
- },
- );
- // Put the groups back into a vector.
- let mut groups: Vec<Vec<(&Index, &Record)>> = groups.into_values().collect();
- // Sort the groups by start time of the most recent (.last()) of each group.
- // It is panic safe to use unwrap() here because we never add empty vectors to
- // the map.
- groups.sort_by(|v1, v2| v1.last().unwrap().1.started.cmp(&v2.last().unwrap().1.started));
-
- for g in groups.iter() {
- for (i, r) in g.iter() {
- match &r.callback {
- Some(cb) => {
- log::warn!(
- "{:?} {} Pending: {:?} Overdue {:?}: {}",
- i.tid,
- i.id,
- r.started.elapsed(),
- r.deadline.elapsed(),
- (cb)()
- );
- }
- None => {
- log::warn!(
- "{:?} {} Pending: {:?} Overdue {:?}",
- i.tid,
- i.id,
- r.started.elapsed(),
- r.deadline.elapsed()
- );
- }
- }
- }
- }
- log::warn!("### Keystore Watchdog report - END ###");
- true
- }
-
- fn disarm(&mut self, index: Index) {
- self.records.remove(&index);
- }
-
- fn arm(&mut self, index: Index, record: Record) {
- if self.records.insert(index.clone(), record).is_some() {
- log::warn!("Recursive watchdog record at \"{:?}\" replaces previous record.", index);
- }
- }
-}
-
-/// Watchdog spawns a thread that logs records of all overdue watch points when a deadline
-/// is missed and at least every second as long as overdue watch points exist.
-/// The thread terminates when idle for a given period of time.
-pub struct Watchdog {
- state: Arc<(Condvar, Mutex<WatchdogState>)>,
-}
-
-impl Watchdog {
- /// If we have overdue records, we want to be noisy about it and log a report
- /// at least every `NOISY_REPORT_TIMEOUT` interval.
- const NOISY_REPORT_TIMEOUT: Duration = Duration::from_secs(1);
-
- /// Construct a [`Watchdog`]. When `timeout` has elapsed since the watchdog thread became
- /// idle, i.e., there are no more active or overdue watch points, the watchdog thread
- /// terminates.
- pub fn new(timeout: Duration) -> Arc<Self> {
- Arc::new(Self {
- state: Arc::new((
- Condvar::new(),
- Mutex::new(WatchdogState {
- state: State::NotRunning,
- thread: None,
- timeout,
- records: HashMap::new(),
- last_report: Instant::now(),
- has_overdue: false,
- }),
- )),
- })
- }
-
- fn watch_with_optional(
- wd: &Arc<Self>,
- callback: Option<Box<dyn Fn() -> String + Send + 'static>>,
- id: &'static str,
- timeout: Duration,
- ) -> Option<WatchPoint> {
- let deadline = Instant::now().checked_add(timeout);
- if deadline.is_none() {
- log::warn!("Deadline computation failed for WatchPoint \"{}\"", id);
- log::warn!("WatchPoint not armed.");
- return None;
- }
- wd.arm(callback, id, deadline.unwrap());
- Some(WatchPoint { id, wd: wd.clone(), not_send: Default::default() })
- }
-
- /// Create a new watch point. If the WatchPoint is not dropped before the timeout
- /// expires, a report is logged at least every second, which includes the id string
- /// and whatever string the callback returns.
- pub fn watch_with(
- wd: &Arc<Self>,
- id: &'static str,
- timeout: Duration,
- callback: impl Fn() -> String + Send + 'static,
- ) -> Option<WatchPoint> {
- Self::watch_with_optional(wd, Some(Box::new(callback)), id, timeout)
- }
-
- /// Like `watch_with`, but without a callback.
- pub fn watch(wd: &Arc<Self>, id: &'static str, timeout: Duration) -> Option<WatchPoint> {
- Self::watch_with_optional(wd, None, id, timeout)
- }
-
- fn arm(
- &self,
- callback: Option<Box<dyn Fn() -> String + Send + 'static>>,
- id: &'static str,
- deadline: Instant,
- ) {
- let tid = thread::current().id();
- let index = Index { tid, id };
- let record = Record { started: Instant::now(), deadline, callback };
-
- let (ref condvar, ref state) = *self.state;
-
- let mut state = state.lock().unwrap();
- state.arm(index, record);
-
- if state.state != State::Running {
- self.spawn_thread(&mut state);
- }
- drop(state);
- condvar.notify_all();
- }
-
- fn disarm(&self, id: &'static str) {
- let tid = thread::current().id();
- let index = Index { tid, id };
- let (_, ref state) = *self.state;
-
- let mut state = state.lock().unwrap();
- state.disarm(index);
- // There is no need to notify condvar. There is no action required for the
- // watchdog thread before the next deadline.
- }
-
- fn spawn_thread(&self, state: &mut MutexGuard<WatchdogState>) {
- if let Some(t) = state.thread.take() {
- t.join().expect("Watchdog thread panicked.");
- }
-
- let cloned_state = self.state.clone();
-
- state.thread = Some(thread::spawn(move || {
- let (ref condvar, ref state) = *cloned_state;
-
- let mut state = state.lock().unwrap();
-
- loop {
- let (has_overdue, next_timeout) = state.update_overdue_and_find_next_timeout();
- state.log_report(has_overdue);
- let (next_timeout, idle) = match (has_overdue, next_timeout) {
- (true, Some(next_timeout)) => {
- (min(next_timeout, Self::NOISY_REPORT_TIMEOUT), false)
- }
- (false, Some(next_timeout)) => (next_timeout, false),
- (true, None) => (Self::NOISY_REPORT_TIMEOUT, false),
- (false, None) => (state.timeout, true),
- };
-
- let (s, timeout) = condvar.wait_timeout(state, next_timeout).unwrap();
- state = s;
-
- if idle && timeout.timed_out() && state.records.is_empty() {
- state.state = State::NotRunning;
- break;
- }
- }
- log::info!("Watchdog thread idle -> terminating. Have a great day.");
- }));
- state.state = State::Running;
- }
-}
-
-#[cfg(test)]
-mod tests {
-
- use super::*;
- use std::sync::atomic;
- use std::thread;
- use std::time::Duration;
-
- #[test]
- fn test_watchdog() {
- android_logger::init_once(
- android_logger::Config::default()
- .with_tag("keystore2_watchdog_tests")
- .with_min_level(log::Level::Debug),
- );
-
- let wd = Watchdog::new(Watchdog::NOISY_REPORT_TIMEOUT.checked_mul(3).unwrap());
- let hit_count = Arc::new(atomic::AtomicU8::new(0));
- let hit_count_clone = hit_count.clone();
- let wp =
- Watchdog::watch_with(&wd, "test_watchdog", Duration::from_millis(100), move || {
- format!("hit_count: {}", hit_count_clone.fetch_add(1, atomic::Ordering::Relaxed))
- });
- assert_eq!(0, hit_count.load(atomic::Ordering::Relaxed));
- thread::sleep(Duration::from_millis(500));
- assert_eq!(1, hit_count.load(atomic::Ordering::Relaxed));
- thread::sleep(Watchdog::NOISY_REPORT_TIMEOUT);
- assert_eq!(2, hit_count.load(atomic::Ordering::Relaxed));
- drop(wp);
- thread::sleep(Watchdog::NOISY_REPORT_TIMEOUT.checked_mul(4).unwrap());
- assert_eq!(2, hit_count.load(atomic::Ordering::Relaxed));
- let (_, ref state) = *wd.state;
- let state = state.lock().unwrap();
- assert_eq!(state.state, State::NotRunning);
- }
-}
diff --git a/keystore2/src/watchdog_helper.rs b/keystore2/src/watchdog_helper.rs
new file mode 100644
index 0000000..92a0abc
--- /dev/null
+++ b/keystore2/src/watchdog_helper.rs
@@ -0,0 +1,64 @@
+// Copyright 2023, The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! Helpers for the watchdog module.
+
+/// This module provides helpers for simplified use of the watchdog module.
+#[cfg(feature = "watchdog")]
+pub mod watchdog {
+ use lazy_static::lazy_static;
+ use std::sync::Arc;
+ use std::time::Duration;
+ pub use watchdog_rs::WatchPoint;
+ use watchdog_rs::Watchdog;
+
+ lazy_static! {
+ /// A Watchdog thread, that can be used to create watch points.
+ static ref WD: Arc<Watchdog> = Watchdog::new(Duration::from_secs(10));
+ }
+
+ /// Sets a watch point with `id` and a timeout of `millis` milliseconds.
+ pub fn watch_millis(id: &'static str, millis: u64) -> Option<WatchPoint> {
+ Watchdog::watch(&WD, id, Duration::from_millis(millis))
+ }
+
+ /// Like `watch_millis` but with a callback that is called every time a report
+ /// is printed about this watch point.
+ pub fn watch_millis_with(
+ id: &'static str,
+ millis: u64,
+ callback: impl Fn() -> String + Send + 'static,
+ ) -> Option<WatchPoint> {
+ Watchdog::watch_with(&WD, id, Duration::from_millis(millis), callback)
+ }
+}
+
+/// This module provides empty/noop implementations of the watch dog utility functions.
+#[cfg(not(feature = "watchdog"))]
+pub mod watchdog {
+ /// Noop watch point.
+ pub struct WatchPoint();
+ /// Sets a Noop watch point.
+ fn watch_millis(_: &'static str, _: u64) -> Option<WatchPoint> {
+ None
+ }
+
+ pub fn watch_millis_with(
+ _: &'static str,
+ _: u64,
+ _: impl Fn() -> String + Send + 'static,
+ ) -> Option<WatchPoint> {
+ None
+ }
+}