Blame - keystore2/src/operation.rs - android_system_security

blob: 7c3d2013bc86ac4f6cd45ca59f051545c2faa1de [file] [log] [blame]

Janis Danisevskis	1af9126	2020-08-10 14:58:08 -0700	[diff] [blame]	1	// Copyright 2020, The Android Open Source Project
				2	//
				3	// Licensed under the Apache License, Version 2.0 (the "License");
				4	// you may not use this file except in compliance with the License.
				5	// You may obtain a copy of the License at
				6	//
				7	// http://www.apache.org/licenses/LICENSE-2.0
				8	//
				9	// Unless required by applicable law or agreed to in writing, software
				10	// distributed under the License is distributed on an "AS IS" BASIS,
				11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				12	// See the License for the specific language governing permissions and
				13	// limitations under the License.
				14
				15	//! This crate implements the `IKeystoreOperation` AIDL interface, which represents
				16	//! an ongoing key operation, as well as the operation database, which is mainly
				17	//! required for tracking operations for the purpose of pruning.
				18	//! This crate also implements an operation pruning strategy.
				19	//!
				20	//! Operations implement the API calls update, finish, and abort.
				21	//! Additionally, an operation can be dropped and pruned. The former
				22	//! happens if the client deletes a binder to the operation object.
				23	//! An existing operation may get pruned when running out of operation
				24	//! slots and a new operation takes precedence.
				25	//!
				26	//! ## Operation Lifecycle
				27	//! An operation gets created when the client calls `IKeystoreSecurityLevel::create`.
				28	//! It may receive zero or more update request. The lifecycle ends when:
				29	//! * `update` yields an error.
				30	//! * `finish` is called.
				31	//! * `abort` is called.
				32	//! * The operation gets dropped.
				33	//! * The operation gets pruned.
				34	//! `Operation` has an `Outcome` member. While the outcome is `Outcome::Unknown`,
				35	//! the operation is active and in a good state. Any of the above conditions may
				36	//! change the outcome to one of the defined outcomes Success, Abort, Dropped,
				37	//! Pruned, or ErrorCode. The latter is chosen in the case of an unexpected error, during
				38	//! `update` or `finish`. `Success` is chosen iff `finish` completes without error.
				39	//! Note that all operations get dropped eventually in the sense that they lose
				40	//! their last reference and get destroyed. At that point, the fate of the operation
				41	//! gets logged. However, an operation will transition to `Outcome::Dropped` iff
				42	//! the operation was still active (`Outcome::Unknown`) at that time.
				43	//!
				44	//! ## Operation Dropping
				45	//! To observe the dropping of an operation, we have to make sure that there
				46	//! are no strong references to the IBinder representing this operation.
				47	//! This would be simple enough if the operation object would need to be accessed
				48	//! only by transactions. But to perform pruning, we have to retain a reference to the
				49	//! original operation object.
				50	//!
				51	//! ## Operation Pruning
				52	//! Pruning an operation happens during the creation of a new operation.
				53	//! We have to iterate through the operation database to find a suitable
				54	//! candidate. Then we abort and finalize this operation setting its outcome to
				55	//! `Outcome::Pruned`. The corresponding KeyMint operation slot will have been freed
				56	//! up at this point, but the `Operation` object lingers. When the client
				57	//! attempts to use the operation again they will receive
				58	//! ErrorCode::INVALID_OPERATION_HANDLE indicating that the operation no longer
				59	//! exits. This should be the cue for the client to destroy its binder.
				60	//! At that point the operation gets dropped.
				61	//!
				62	//! ## Architecture
				63	//! The `IKeystoreOperation` trait is implemented by `KeystoreOperation`.
				64	//! This acts as a proxy object holding a strong reference to actual operation
				65	//! implementation `Operation`.
				66	//!
				67	//! ```
				68	//! struct KeystoreOperation {
				69	//! operation: Mutex<Option<Arc<Operation>>>,
				70	//! }
				71	//! ```
				72	//!
				73	//! The `Mutex` serves two purposes. It provides interior mutability allowing
				74	//! us to set the Option to None. We do this when the life cycle ends during
				75	//! a call to `update`, `finish`, or `abort`. As a result most of the Operation
				76	//! related resources are freed. The `KeystoreOperation` proxy object still
				77	//! lingers until dropped by the client.
				78	//! The second purpose is to protect operations against concurrent usage.
				79	//! Failing to lock this mutex yields `ResponseCode::OPERATION_BUSY` and indicates
				80	//! a programming error in the client.
				81	//!
				82	//! Note that the Mutex only protects the operation against concurrent client calls.
				83	//! We still retain weak references to the operation in the operation database:
				84	//!
				85	//! ```
				86	//! struct OperationDb {
				87	//! operations: Mutex<Vec<Weak<Operation>>>
				88	//! }
				89	//! ```
				90	//!
				91	//! This allows us to access the operations for the purpose of pruning.
				92	//! We do this in three phases.
				93	//! 1. We gather the pruning information. Besides non mutable information,
				94	//! we access `last_usage` which is protected by a mutex.
				95	//! We only lock this mutex for single statements at a time. During
				96	//! this phase we hold the operation db lock.
				97	//! 2. We choose a pruning candidate by computing the pruning resistance
				98	//! of each operation. We do this entirely with information we now
				99	//! have on the stack without holding any locks.
				100	//! (See `OperationDb::prune` for more details on the pruning strategy.)
				101	//! 3. During pruning we briefly lock the operation database again to get the
				102	//! the pruning candidate by index. We then attempt to abort the candidate.
				103	//! If the candidate was touched in the meantime or is currently fulfilling
				104	//! a request (i.e., the client calls update, finish, or abort),
				105	//! we go back to 1 and try again.
				106	//!
				107	//! So the outer Mutex in `KeystoreOperation::operation` only protects
				108	//! operations against concurrent client calls but not against concurrent
				109	//! pruning attempts. This is what the `Operation::outcome` mutex is used for.
				110	//!
				111	//! ```
				112	//! struct Operation {
				113	//! ...
				114	//! outcome: Mutex<Outcome>,
				115	//! ...
				116	//! }
				117	//! ```
				118	//!
				119	//! Any request that can change the outcome, i.e., `update`, `finish`, `abort`,
				120	//! `drop`, and `prune` has to take the outcome lock and check if the outcome
				121	//! is still `Outcome::Unknown` before entering. `prune` is special in that
				122	//! it will `try_lock`, because we don't want to be blocked on a potentially
				123	//! long running request at another operation. If it fails to get the lock
				124	//! the operation is either being touched, which changes its pruning resistance,
				125	//! or it transitions to its end-of-life, which means we may get a free slot.
				126	//! Either way, we have to revaluate the pruning scores.
				127
				128	use std::{
				129	collections::HashMap,
				130	sync::{Arc, Mutex, MutexGuard, Weak},
				131	time::Duration,
				132	time::Instant,
				133	};
				134
				135	use crate::error::{map_km_error, map_or_log_err, Error, ErrorCode, ResponseCode};
				136	use crate::utils::Asp;
Shawn Willden	708744a	2020-12-11 13:05:27 +0000	[diff] [blame]	137	use android_hardware_security_keymint::aidl::android::hardware::security::keymint::{
Janis Danisevskis	85d4793	2020-10-23 16:12:59 -0700	[diff] [blame]	138	ByteArray::ByteArray, IKeyMintOperation::IKeyMintOperation,
				139	KeyParameter::KeyParameter as KmParam, KeyParameterArray::KeyParameterArray, Tag::Tag,
Janis Danisevskis	1af9126	2020-08-10 14:58:08 -0700	[diff] [blame]	140	};
				141	use android_system_keystore2::aidl::android::system::keystore2::{
				142	IKeystoreOperation::BnKeystoreOperation, IKeystoreOperation::IKeystoreOperation,
				143	};
				144	use anyhow::{anyhow, Context, Result};
				145	use binder::{IBinder, Interface};
				146
				147	/// Operations have `Outcome::Unknown` as long as they are active. They transition
				148	/// to one of the other variants exactly once. The distinction in outcome is mainly
				149	/// for the statistic.
				150	#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd)]
				151	enum Outcome {
				152	Unknown,
				153	Success,
				154	Abort,
				155	Dropped,
				156	Pruned,
				157	ErrorCode(ErrorCode),
				158	}
				159
				160	/// Operation bundles all of the operation related resources and tracks the operation's
				161	/// outcome.
				162	#[derive(Debug)]
				163	pub struct Operation {
				164	// The index of this operation in the OperationDb.
				165	index: usize,
				166	km_op: Asp,
				167	last_usage: Mutex<Instant>,
				168	outcome: Mutex<Outcome>,
				169	owner: u32, // Uid of the operation's owner.
				170	}
				171
				172	struct PruningInfo {
				173	last_usage: Instant,
				174	owner: u32,
				175	index: usize,
				176	}
				177
Janis Danisevskis	1af9126	2020-08-10 14:58:08 -0700	[diff] [blame]	178	// We don't except more than 32KiB of data in `update`, `updateAad`, and `finish`.
				179	const MAX_RECEIVE_DATA: usize = 0x8000;
				180
				181	impl Operation {
				182	/// Constructor
				183	pub fn new(index: usize, km_op: Box<dyn IKeyMintOperation>, owner: u32) -> Self {
				184	Self {
				185	index,
				186	km_op: Asp::new(km_op.as_binder()),
				187	last_usage: Mutex::new(Instant::now()),
				188	outcome: Mutex::new(Outcome::Unknown),
				189	owner,
				190	}
				191	}
				192
Janis Danisevskis	45c5c97	2020-10-26 09:35:16 -0700	[diff] [blame]	193	fn get_pruning_info(&self) -> Option<PruningInfo> {
				194	// An operation may be finalized.
				195	if let Ok(guard) = self.outcome.try_lock() {
				196	match *guard {
				197	Outcome::Unknown => {}
				198	// If the outcome is any other than unknown, it has been finalized,
				199	// and we can no longer consider it for pruning.
				200	_ => return None,
				201	}
				202	}
				203	// Else: If we could not grab the lock, this means that the operation is currently
				204	// being used and it may be transitioning to finalized or it was simply updated.
				205	// In any case it is fair game to consider it for pruning. If the operation
				206	// transitioned to a final state, we will notice when we attempt to prune, and
				207	// a subsequent attempt to create a new operation will succeed.
				208	Some(PruningInfo {
				209	// Expect safety:
				210	// `last_usage` is locked only for primitive single line statements.
				211	// There is no chance to panic and poison the mutex.
Janis Danisevskis	1af9126	2020-08-10 14:58:08 -0700	[diff] [blame]	212	last_usage: *self.last_usage.lock().expect("In get_pruning_info."),
				213	owner: self.owner,
				214	index: self.index,
Janis Danisevskis	45c5c97	2020-10-26 09:35:16 -0700	[diff] [blame]	215	})
Janis Danisevskis	1af9126	2020-08-10 14:58:08 -0700	[diff] [blame]	216	}
				217
				218	fn prune(&self, last_usage: Instant) -> Result<(), Error> {
				219	let mut locked_outcome = match self.outcome.try_lock() {
				220	Ok(guard) => match *guard {
				221	Outcome::Unknown => guard,
				222	_ => return Err(Error::Km(ErrorCode::INVALID_OPERATION_HANDLE)),
				223	},
				224	Err(_) => return Err(Error::Rc(ResponseCode::OPERATION_BUSY)),
				225	};
				226
				227	// In `OperationDb::prune`, which is our caller, we first gather the pruning
				228	// information including the last usage. When we select a candidate
				229	// we call `prune` on that candidate passing the last_usage
				230	// that we gathered earlier. If the actual last usage
				231	// has changed since than, it means the operation was busy in the
				232	// meantime, which means that we have to reevaluate the pruning score.
				233	//
				234	// Expect safety:
				235	// `last_usage` is locked only for primitive single line statements.
				236	// There is no chance to panic and poison the mutex.
				237	if *self.last_usage.lock().expect("In Operation::prune()") != last_usage {
				238	return Err(Error::Rc(ResponseCode::OPERATION_BUSY));
				239	}
				240	*locked_outcome = Outcome::Pruned;
				241
				242	let km_op: Box<dyn IKeyMintOperation> = match self.km_op.get_interface() {
				243	Ok(km_op) => km_op,
				244	Err(e) => {
				245	log::error!("In prune: Failed to get KeyMintOperation interface.\n {:?}", e);
				246	return Err(Error::sys());
				247	}
				248	};
				249
				250	// We abort the operation. If there was an error we log it but ignore it.
				251	if let Err(e) = map_km_error(km_op.abort()) {
				252	log::error!("In prune: KeyMint::abort failed with {:?}.", e);
				253	}
				254
				255	Ok(())
				256	}
				257
				258	// This function takes a Result from a KeyMint call and inspects it for errors.
				259	// If an error was found it updates the given `locked_outcome` accordingly.
				260	// It forwards the Result unmodified.
				261	// The precondition to this call must be *locked_outcome == Outcome::Unknown.
				262	// Ideally the `locked_outcome` came from a successful call to `check_active`
				263	// see below.
				264	fn update_outcome<T>(
				265	&self,
				266	locked_outcome: &mut Outcome,
				267	err: Result<T, Error>,
				268	) -> Result<T, Error> {
				269	match &err {
				270	Err(Error::Km(e)) => locked_outcome = Outcome::ErrorCode(e),
				271	Err(_) => *locked_outcome = Outcome::ErrorCode(ErrorCode::UNKNOWN_ERROR),
				272	Ok(_) => (),
				273	}
				274	err
				275	}
				276
				277	// This function grabs the outcome lock and checks the current outcome state.
				278	// If the outcome is still `Outcome::Unknown`, this function returns
				279	// the locked outcome for further updates. In any other case it returns
				280	// ErrorCode::INVALID_OPERATION_HANDLE indicating that this operation has
				281	// been finalized and is no longer active.
				282	fn check_active(&self) -> Result<MutexGuard<Outcome>> {
				283	let guard = self.outcome.lock().expect("In check_active.");
				284	match *guard {
				285	Outcome::Unknown => Ok(guard),
				286	_ => Err(Error::Km(ErrorCode::INVALID_OPERATION_HANDLE)).context(format!(
				287	"In check_active: Call on finalized operation with outcome: {:?}.",
				288	*guard
				289	)),
				290	}
				291	}
				292
				293	// This function checks the amount of input data sent to us. We reject any buffer
				294	// exceeding MAX_RECEIVE_DATA bytes as input to `update`, `update_aad`, and `finish`
				295	// in order to force clients into using reasonable limits.
				296	fn check_input_length(data: &[u8]) -> Result<()> {
				297	if data.len() > MAX_RECEIVE_DATA {
				298	// This error code is unique, no context required here.
				299	return Err(anyhow!(Error::Rc(ResponseCode::TOO_MUCH_DATA)));
				300	}
				301	Ok(())
				302	}
				303
				304	// Update the last usage to now.
				305	fn touch(&self) {
				306	// Expect safety:
				307	// `last_usage` is locked only for primitive single line statements.
				308	// There is no chance to panic and poison the mutex.
				309	*self.last_usage.lock().expect("In touch.") = Instant::now();
				310	}
				311
				312	/// Implementation of `IKeystoreOperation::updateAad`.
				313	/// Refer to the AIDL spec at system/hardware/interfaces/keystore2 for details.
				314	fn update_aad(&self, aad_input: &[u8]) -> Result<()> {
				315	let mut outcome = self.check_active().context("In update_aad")?;
				316	Self::check_input_length(aad_input).context("In update_aad")?;
				317	self.touch();
				318
Janis Danisevskis	85d4793	2020-10-23 16:12:59 -0700	[diff] [blame]	319	let params = KeyParameterArray {
				320	params: vec![KmParam {
				321	tag: Tag::ASSOCIATED_DATA,
				322	blob: aad_input.into(),
				323	..Default::default()
				324	}],
				325	};
Janis Danisevskis	1af9126	2020-08-10 14:58:08 -0700	[diff] [blame]	326
Janis Danisevskis	85d4793	2020-10-23 16:12:59 -0700	[diff] [blame]	327	let mut out_params: Option<KeyParameterArray> = None;
				328	let mut output: Option<ByteArray> = None;
Janis Danisevskis	1af9126	2020-08-10 14:58:08 -0700	[diff] [blame]	329
				330	let km_op: Box<dyn IKeyMintOperation> =
				331	self.km_op.get_interface().context("In update: Failed to get KeyMintOperation.")?;
				332
				333	self.update_outcome(
				334	&mut *outcome,
				335	map_km_error(km_op.update(
Janis Danisevskis	85d4793	2020-10-23 16:12:59 -0700	[diff] [blame]	336	Some(&params),
				337	None,
				338	// TODO Get auth token from enforcement module if required.
				339	None,
				340	// TODO Get verification token from enforcement module if required.
				341	None,
Janis Danisevskis	1af9126	2020-08-10 14:58:08 -0700	[diff] [blame]	342	&mut out_params,
				343	&mut output,
				344	)),
				345	)
				346	.context("In update_aad: KeyMint::update failed.")?;
				347
				348	Ok(())
				349	}
				350
				351	/// Implementation of `IKeystoreOperation::update`.
				352	/// Refer to the AIDL spec at system/hardware/interfaces/keystore2 for details.
				353	fn update(&self, input: &[u8]) -> Result<Option<Vec<u8>>> {
				354	let mut outcome = self.check_active().context("In update")?;
				355	Self::check_input_length(input).context("In update")?;
				356	self.touch();
				357
Janis Danisevskis	85d4793	2020-10-23 16:12:59 -0700	[diff] [blame]	358	let mut out_params: Option<KeyParameterArray> = None;
				359	let mut output: Option<ByteArray> = None;
Janis Danisevskis	1af9126	2020-08-10 14:58:08 -0700	[diff] [blame]	360
				361	let km_op: Box<dyn IKeyMintOperation> =
				362	self.km_op.get_interface().context("In update: Failed to get KeyMintOperation.")?;
				363
				364	self.update_outcome(
				365	&mut *outcome,
				366	map_km_error(km_op.update(
Janis Danisevskis	85d4793	2020-10-23 16:12:59 -0700	[diff] [blame]	367	None,
				368	Some(input),
				369	// TODO Get auth token from enforcement module if required.
				370	None,
				371	// TODO Get verification token from enforcement module if required.
				372	None,
Janis Danisevskis	1af9126	2020-08-10 14:58:08 -0700	[diff] [blame]	373	&mut out_params,
				374	&mut output,
				375	)),
				376	)
				377	.context("In update: KeyMint::update failed.")?;
				378
Janis Danisevskis	85d4793	2020-10-23 16:12:59 -0700	[diff] [blame]	379	match output {
Janis Danisevskis	3cfd4a4	2020-11-23 13:42:38 -0800	[diff] [blame^]	380	Some(blob) => {
				381	if blob.data.is_empty() {
				382	Ok(None)
				383	} else {
				384	Ok(Some(blob.data))
				385	}
				386	}
Janis Danisevskis	85d4793	2020-10-23 16:12:59 -0700	[diff] [blame]	387	None => Ok(None),
Janis Danisevskis	1af9126	2020-08-10 14:58:08 -0700	[diff] [blame]	388	}
				389	}
				390
				391	/// Implementation of `IKeystoreOperation::finish`.
				392	/// Refer to the AIDL spec at system/hardware/interfaces/keystore2 for details.
				393	fn finish(&self, input: Option<&[u8]>, signature: Option<&[u8]>) -> Result<Option<Vec<u8>>> {
				394	let mut outcome = self.check_active().context("In finish")?;
				395	if let Some(input) = input {
				396	Self::check_input_length(input).context("In finish")?;
				397	}
				398	self.touch();
Janis Danisevskis	1af9126	2020-08-10 14:58:08 -0700	[diff] [blame]	399
Janis Danisevskis	85d4793	2020-10-23 16:12:59 -0700	[diff] [blame]	400	let mut out_params: Option<KeyParameterArray> = None;
Janis Danisevskis	1af9126	2020-08-10 14:58:08 -0700	[diff] [blame]	401
				402	let km_op: Box<dyn IKeyMintOperation> =
				403	self.km_op.get_interface().context("In finish: Failed to get KeyMintOperation.")?;
				404
Janis Danisevskis	85d4793	2020-10-23 16:12:59 -0700	[diff] [blame]	405	let output = self
				406	.update_outcome(
				407	&mut *outcome,
				408	map_km_error(km_op.finish(
				409	None,
				410	input,
				411	signature,
				412	// TODO Get auth token from enforcement module if required.
				413	None,
				414	// TODO Get verification token from enforcement module if required.
				415	None,
				416	&mut out_params,
				417	)),
				418	)
				419	.context("In finish: KeyMint::finish failed.")?;
Janis Danisevskis	1af9126	2020-08-10 14:58:08 -0700	[diff] [blame]	420
				421	// At this point the operation concluded successfully.
				422	*outcome = Outcome::Success;
				423
				424	if output.is_empty() {
				425	Ok(None)
				426	} else {
				427	Ok(Some(output))
				428	}
				429	}
				430
				431	/// Aborts the operation if it is active. IFF the operation is aborted the outcome is
				432	/// set to `outcome`. `outcome` must reflect the reason for the abort. Since the operation
				433	/// gets aborted `outcome` must not be `Operation::Success` or `Operation::Unknown`.
				434	fn abort(&self, outcome: Outcome) -> Result<()> {
				435	let mut locked_outcome = self.check_active().context("In abort")?;
				436	*locked_outcome = outcome;
				437	let km_op: Box<dyn IKeyMintOperation> =
				438	self.km_op.get_interface().context("In abort: Failed to get KeyMintOperation.")?;
				439
				440	map_km_error(km_op.abort()).context("In abort: KeyMint::abort failed.")
				441	}
				442	}
				443
				444	impl Drop for Operation {
				445	fn drop(&mut self) {
				446	if let Ok(Outcome::Unknown) = self.outcome.get_mut() {
				447	// If the operation was still active we call abort, setting
				448	// the outcome to `Outcome::Dropped`
				449	if let Err(e) = self.abort(Outcome::Dropped) {
				450	log::error!("While dropping Operation: abort failed:\n {:?}", e);
				451	}
				452	}
				453	}
				454	}
				455
				456	/// The OperationDb holds weak references to all ongoing operations.
				457	/// Its main purpose is to facilitate operation pruning.
				458	#[derive(Debug, Default)]
				459	pub struct OperationDb {
				460	// TODO replace Vec with WeakTable when the weak_table crate becomes
				461	// available.
				462	operations: Mutex<Vec<Weak<Operation>>>,
				463	}
				464
				465	impl OperationDb {
				466	/// Creates a new OperationDb.
				467	pub fn new() -> Self {
				468	Self { operations: Mutex::new(Vec::new()) }
				469	}
				470
				471	/// Creates a new operation.
				472	/// This function takes a KeyMint operation and an associated
				473	/// owner uid and returns a new Operation wrapped in a `std::sync::Arc`.
				474	pub fn create_operation(
				475	&self,
				476	km_op: Box<dyn IKeyMintOperation>,
				477	owner: u32,
				478	) -> Arc<Operation> {
				479	// We use unwrap because we don't allow code that can panic while locked.
				480	let mut operations = self.operations.lock().expect("In create_operation.");
				481
				482	let mut index: usize = 0;
				483	// First we iterate through the operation slots to try and find an unused
				484	// slot. If we don't find one, we append the new entry instead.
				485	match (*operations).iter_mut().find(\|s\| {
				486	index += 1;
				487	s.upgrade().is_none()
				488	}) {
				489	Some(free_slot) => {
				490	let new_op = Arc::new(Operation::new(index - 1, km_op, owner));
				491	*free_slot = Arc::downgrade(&new_op);
				492	new_op
				493	}
				494	None => {
				495	let new_op = Arc::new(Operation::new(operations.len(), km_op, owner));
				496	operations.push(Arc::downgrade(&new_op));
				497	new_op
				498	}
				499	}
				500	}
				501
				502	fn get(&self, index: usize) -> Option<Arc<Operation>> {
				503	self.operations.lock().expect("In OperationDb::get.").get(index).and_then(\|op\| op.upgrade())
				504	}
				505
				506	/// Attempts to prune an operation.
				507	///
				508	/// This function is used during operation creation, i.e., by
				509	/// `KeystoreSecurityLevel::create_operation`, to try and free up an operation slot
				510	/// if it got `ErrorCode::TOO_MANY_OPERATIONS` from the KeyMint backend. It is not
				511	/// guaranteed that an operation slot is available after this call successfully
				512	/// returned for various reasons. E.g., another thread may have snatched up the newly
				513	/// available slot. Callers may have to call prune multiple times before they get a
				514	/// free operation slot. Prune may also return `Err(Error::Rc(ResponseCode::BACKEND_BUSY))`
				515	/// which indicates that no prunable operation was found.
				516	///
				517	/// To find a suitable candidate we compute the malus for the caller and each existing
				518	/// operation. The malus is the inverse of the pruning power (caller) or pruning
				519	/// resistance (existing operation).
Janis Danisevskis	45c5c97	2020-10-26 09:35:16 -0700	[diff] [blame]	520	///
Janis Danisevskis	1af9126	2020-08-10 14:58:08 -0700	[diff] [blame]	521	/// The malus is based on the number of sibling operations and age. Sibling
				522	/// operations are operations that have the same owner (UID).
Janis Danisevskis	45c5c97	2020-10-26 09:35:16 -0700	[diff] [blame]	523	///
Janis Danisevskis	1af9126	2020-08-10 14:58:08 -0700	[diff] [blame]	524	/// Every operation, existing or new, starts with a malus of 1. Every sibling
				525	/// increases the malus by one. The age is the time since an operation was last touched.
				526	/// It increases the malus by log6(<age in seconds> + 1) rounded down to the next
				527	/// integer. So the malus increases stepwise after 5s, 35s, 215s, ...
				528	/// Of two operations with the same malus the least recently used one is considered
				529	/// weaker.
Janis Danisevskis	45c5c97	2020-10-26 09:35:16 -0700	[diff] [blame]	530	///
Janis Danisevskis	1af9126	2020-08-10 14:58:08 -0700	[diff] [blame]	531	/// For the caller to be able to prune an operation it must find an operation
				532	/// with a malus higher than its own.
				533	///
				534	/// The malus can be expressed as
				535	/// ```
				536	/// malus = 1 + no_of_siblings + floor(log6(age_in_seconds + 1))
				537	/// ```
				538	/// where the constant `1` accounts for the operation under consideration.
				539	/// In reality we compute it as
				540	/// ```
				541	/// caller_malus = 1 + running_siblings
				542	/// ```
				543	/// because the new operation has no age and is not included in the `running_siblings`,
				544	/// and
				545	/// ```
				546	/// running_malus = running_siblings + floor(log6(age_in_seconds + 1))
				547	/// ```
				548	/// because a running operation is included in the `running_siblings` and it has
				549	/// an age.
				550	///
				551	/// ## Example
				552	/// A caller with no running operations has a malus of 1. Young (age < 5s) operations
				553	/// also with no siblings have a malus of one and cannot be pruned by the caller.
				554	/// We have to find an operation that has at least one sibling or is older than 5s.
				555	///
				556	/// A caller with one running operation has a malus of 2. Now even young siblings
				557	/// or single child aging (5s <= age < 35s) operations are off limit. An aging
				558	/// sibling of two, however, would have a malus of 3 and would be fair game.
				559	///
				560	/// ## Rationale
				561	/// Due to the limitation of KeyMint operation slots, we cannot get around pruning or
				562	/// a single app could easily DoS KeyMint.
				563	/// Keystore 1.0 used to always prune the least recently used operation. This at least
				564	/// guaranteed that new operations can always be started. With the increased usage
				565	/// of Keystore we saw increased pruning activity which can lead to a livelock
				566	/// situation in the worst case.
Janis Danisevskis	45c5c97	2020-10-26 09:35:16 -0700	[diff] [blame]	567	///
Janis Danisevskis	1af9126	2020-08-10 14:58:08 -0700	[diff] [blame]	568	/// With the new pruning strategy we want to provide well behaved clients with
				569	/// progress assurances while punishing DoS attempts. As a result of this
				570	/// strategy we can be in the situation where no operation can be pruned and the
				571	/// creation of a new operation fails. This allows single child operations which
				572	/// are frequently updated to complete, thereby breaking up livelock situations
				573	/// and facilitating system wide progress.
Janis Danisevskis	45c5c97	2020-10-26 09:35:16 -0700	[diff] [blame]	574	///
				575	/// ## Update
				576	/// We also allow callers to cannibalize their own sibling operations if no other
				577	/// slot can be found. In this case the least recently used sibling is pruned.
Janis Danisevskis	1af9126	2020-08-10 14:58:08 -0700	[diff] [blame]	578	pub fn prune(&self, caller: u32) -> Result<(), Error> {
				579	loop {
				580	// Maps the uid of the owner to the number of operations that owner has
				581	// (running_siblings). More operations per owner lowers the pruning
				582	// resistance of the operations of that owner. Whereas the number of
				583	// ongoing operations of the caller lowers the pruning power of the caller.
				584	let mut owners: HashMap<u32, u64> = HashMap::new();
				585	let mut pruning_info: Vec<PruningInfo> = Vec::new();
				586
				587	let now = Instant::now();
				588	self.operations
				589	.lock()
				590	.expect("In OperationDb::prune: Trying to lock self.operations.")
				591	.iter()
				592	.for_each(\|op\| {
				593	if let Some(op) = op.upgrade() {
Janis Danisevskis	45c5c97	2020-10-26 09:35:16 -0700	[diff] [blame]	594	if let Some(p_info) = op.get_pruning_info() {
				595	let owner = p_info.owner;
				596	pruning_info.push(p_info);
				597	// Count operations per owner.
				598	*owners.entry(owner).or_insert(0) += 1;
				599	}
Janis Danisevskis	1af9126	2020-08-10 14:58:08 -0700	[diff] [blame]	600	}
				601	});
				602
				603	let caller_malus = 1u64 + *owners.entry(caller).or_default();
				604
				605	// We iterate through all operations computing the malus and finding
				606	// the candidate with the highest malus which must also be higher
				607	// than the caller_malus.
				608	struct CandidateInfo {
				609	index: usize,
				610	malus: u64,
				611	last_usage: Instant,
				612	age: Duration,
				613	}
Janis Danisevskis	45c5c97	2020-10-26 09:35:16 -0700	[diff] [blame]	614	let mut oldest_caller_op: Option<CandidateInfo> = None;
Janis Danisevskis	1af9126	2020-08-10 14:58:08 -0700	[diff] [blame]	615	let candidate = pruning_info.iter().fold(
				616	None,
				617	\|acc: Option<CandidateInfo>, &PruningInfo { last_usage, owner, index }\| {
				618	// Compute the age of the current operation.
				619	let age = now
				620	.checked_duration_since(last_usage)
				621	.unwrap_or_else(\|\| Duration::new(0, 0));
				622
Janis Danisevskis	45c5c97	2020-10-26 09:35:16 -0700	[diff] [blame]	623	// Find the least recently used sibling as an alternative pruning candidate.
				624	if owner == caller {
				625	if let Some(CandidateInfo { age: a, .. }) = oldest_caller_op {
				626	if age > a {
				627	oldest_caller_op =
				628	Some(CandidateInfo { index, malus: 0, last_usage, age });
				629	}
				630	} else {
				631	oldest_caller_op =
				632	Some(CandidateInfo { index, malus: 0, last_usage, age });
				633	}
				634	}
				635
Janis Danisevskis	1af9126	2020-08-10 14:58:08 -0700	[diff] [blame]	636	// Compute the malus of the current operation.
				637	// Expect safety: Every owner in pruning_info was counted in
				638	// the owners map. So this unwrap cannot panic.
				639	let malus = *owners
				640	.get(&owner)
				641	.expect("This is odd. We should have counted every owner in pruning_info.")
				642	+ ((age.as_secs() + 1) as f64).log(6.0).floor() as u64;
				643
				644	// Now check if the current operation is a viable/better candidate
				645	// the one currently stored in the accumulator.
				646	match acc {
				647	// First we have to find any operation that is prunable by the caller.
				648	None => {
				649	if caller_malus < malus {
				650	Some(CandidateInfo { index, malus, last_usage, age })
				651	} else {
				652	None
				653	}
				654	}
				655	// If we have found one we look for the operation with the worst score.
				656	// If there is a tie, the older operation is considered weaker.
				657	Some(CandidateInfo { index: i, malus: m, last_usage: l, age: a }) => {
				658	if malus > m \|\| (malus == m && age > a) {
				659	Some(CandidateInfo { index, malus, last_usage, age })
				660	} else {
				661	Some(CandidateInfo { index: i, malus: m, last_usage: l, age: a })
				662	}
				663	}
				664	}
				665	},
				666	);
				667
Janis Danisevskis	45c5c97	2020-10-26 09:35:16 -0700	[diff] [blame]	668	// If we did not find a suitable candidate we may cannibalize our oldest sibling.
				669	let candidate = candidate.or(oldest_caller_op);
				670
Janis Danisevskis	1af9126	2020-08-10 14:58:08 -0700	[diff] [blame]	671	match candidate {
				672	Some(CandidateInfo { index, malus: _, last_usage, age: _ }) => {
				673	match self.get(index) {
				674	Some(op) => {
				675	match op.prune(last_usage) {
				676	// We successfully freed up a slot.
				677	Ok(()) => break Ok(()),
				678	// This means the operation we tried to prune was on its way
				679	// out. It also means that the slot it had occupied was freed up.
				680	Err(Error::Km(ErrorCode::INVALID_OPERATION_HANDLE)) => break Ok(()),
				681	// This means the operation we tried to prune was currently
				682	// servicing a request. There are two options.
				683	// * Assume that it was touched, which means that its
				684	// pruning resistance increased. In that case we have
				685	// to start over and find another candidate.
				686	// * Assume that the operation is transitioning to end-of-life.
				687	// which means that we got a free slot for free.
				688	// If we assume the first but the second is true, we prune
				689	// a good operation without need (aggressive approach).
				690	// If we assume the second but the first is true, our
				691	// caller will attempt to create a new KeyMint operation,
				692	// fail with `ErrorCode::TOO_MANY_OPERATIONS`, and call
				693	// us again (conservative approach).
				694	Err(Error::Rc(ResponseCode::OPERATION_BUSY)) => {
				695	// We choose the conservative approach, because
				696	// every needlessly pruned operation can impact
				697	// the user experience.
				698	// To switch to the aggressive approach replace
				699	// the following line with `continue`.
				700	break Ok(());
				701	}
				702
				703	// The candidate may have been touched so the score
				704	// has changed since our evaluation.
				705	_ => continue,
				706	}
				707	}
				708	// This index does not exist any more. The operation
				709	// in this slot was dropped. Good news, a slot
				710	// has freed up.
				711	None => break Ok(()),
				712	}
				713	}
				714	// We did not get a pruning candidate.
				715	None => break Err(Error::Rc(ResponseCode::BACKEND_BUSY)),
				716	}
				717	}
				718	}
				719	}
				720
				721	/// Implementation of IKeystoreOperation.
				722	pub struct KeystoreOperation {
				723	operation: Mutex<Option<Arc<Operation>>>,
				724	}
				725
				726	impl KeystoreOperation {
				727	/// Creates a new operation instance wrapped in a
				728	/// BnKeystoreOperation proxy object. It also
				729	/// calls `IBinder::set_requesting_sid` on the new interface, because
				730	/// we need it for checking Keystore permissions.
				731	pub fn new_native_binder(operation: Arc<Operation>) -> impl IKeystoreOperation + Send {
				732	let result =
				733	BnKeystoreOperation::new_binder(Self { operation: Mutex::new(Some(operation)) });
				734	result.as_binder().set_requesting_sid(true);
				735	result
				736	}
				737
				738	/// Grabs the outer operation mutex and calls `f` on the locked operation.
				739	/// The function also deletes the operation if it returns with an error or if
				740	/// `delete_op` is true.
				741	fn with_locked_operation<T, F>(&self, f: F, delete_op: bool) -> Result<T>
				742	where
				743	for<'a> F: FnOnce(&'a Operation) -> Result<T>,
				744	{
				745	let mut delete_op: bool = delete_op;
				746	match self.operation.try_lock() {
				747	Ok(mut mutex_guard) => {
				748	let result = match &*mutex_guard {
				749	Some(op) => {
				750	let result = f(&*op);
				751	// Any error here means we can discard the operation.
				752	if result.is_err() {
				753	delete_op = true;
				754	}
				755	result
				756	}
				757	None => Err(Error::Km(ErrorCode::INVALID_OPERATION_HANDLE))
				758	.context("In KeystoreOperation::with_locked_operation"),
				759	};
				760
				761	if delete_op {
				762	// We give up our reference to the Operation, thereby freeing up our
				763	// internal resources and ending the wrapped KeyMint operation.
				764	// This KeystoreOperation object will still be owned by an SpIBinder
				765	// until the client drops its remote reference.
				766	*mutex_guard = None;
				767	}
				768	result
				769	}
				770	Err(_) => Err(Error::Rc(ResponseCode::OPERATION_BUSY))
				771	.context("In KeystoreOperation::with_locked_operation"),
				772	}
				773	}
				774	}
				775
				776	impl binder::Interface for KeystoreOperation {}
				777
				778	impl IKeystoreOperation for KeystoreOperation {
				779	fn updateAad(&self, aad_input: &[u8]) -> binder::public_api::Result<()> {
				780	map_or_log_err(
				781	self.with_locked_operation(
				782	\|op\| op.update_aad(aad_input).context("In KeystoreOperation::updateAad"),
				783	false,
				784	),
				785	Ok,
				786	)
				787	}
				788
				789	fn update(&self, input: &[u8]) -> binder::public_api::Result<Option<Vec<u8>>> {
				790	map_or_log_err(
				791	self.with_locked_operation(
				792	\|op\| op.update(input).context("In KeystoreOperation::update"),
				793	false,
				794	),
				795	Ok,
				796	)
				797	}
				798	fn finish(
				799	&self,
				800	input: Option<&[u8]>,
				801	signature: Option<&[u8]>,
				802	) -> binder::public_api::Result<Option<Vec<u8>>> {
				803	map_or_log_err(
				804	self.with_locked_operation(
				805	\|op\| op.finish(input, signature).context("In KeystoreOperation::finish"),
				806	true,
				807	),
				808	Ok,
				809	)
				810	}
				811
				812	fn abort(&self) -> binder::public_api::Result<()> {
				813	map_or_log_err(
				814	self.with_locked_operation(
				815	\|op\| op.abort(Outcome::Abort).context("In KeystoreOperation::abort"),
				816	true,
				817	),
				818	Ok,
				819	)
				820	}
				821	}