Janis Danisevskis | 1af9126 | 2020-08-10 14:58:08 -0700 | [diff] [blame] | 1 | // Copyright 2020, The Android Open Source Project |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | //! This crate implements the `IKeystoreOperation` AIDL interface, which represents |
| 16 | //! an ongoing key operation, as well as the operation database, which is mainly |
| 17 | //! required for tracking operations for the purpose of pruning. |
| 18 | //! This crate also implements an operation pruning strategy. |
| 19 | //! |
| 20 | //! Operations implement the API calls update, finish, and abort. |
| 21 | //! Additionally, an operation can be dropped and pruned. The former |
| 22 | //! happens if the client deletes a binder to the operation object. |
| 23 | //! An existing operation may get pruned when running out of operation |
| 24 | //! slots and a new operation takes precedence. |
| 25 | //! |
| 26 | //! ## Operation Lifecycle |
| 27 | //! An operation gets created when the client calls `IKeystoreSecurityLevel::create`. |
| 28 | //! It may receive zero or more update request. The lifecycle ends when: |
| 29 | //! * `update` yields an error. |
| 30 | //! * `finish` is called. |
| 31 | //! * `abort` is called. |
| 32 | //! * The operation gets dropped. |
| 33 | //! * The operation gets pruned. |
| 34 | //! `Operation` has an `Outcome` member. While the outcome is `Outcome::Unknown`, |
| 35 | //! the operation is active and in a good state. Any of the above conditions may |
| 36 | //! change the outcome to one of the defined outcomes Success, Abort, Dropped, |
| 37 | //! Pruned, or ErrorCode. The latter is chosen in the case of an unexpected error, during |
| 38 | //! `update` or `finish`. `Success` is chosen iff `finish` completes without error. |
| 39 | //! Note that all operations get dropped eventually in the sense that they lose |
| 40 | //! their last reference and get destroyed. At that point, the fate of the operation |
| 41 | //! gets logged. However, an operation will transition to `Outcome::Dropped` iff |
| 42 | //! the operation was still active (`Outcome::Unknown`) at that time. |
| 43 | //! |
| 44 | //! ## Operation Dropping |
| 45 | //! To observe the dropping of an operation, we have to make sure that there |
| 46 | //! are no strong references to the IBinder representing this operation. |
| 47 | //! This would be simple enough if the operation object would need to be accessed |
| 48 | //! only by transactions. But to perform pruning, we have to retain a reference to the |
| 49 | //! original operation object. |
| 50 | //! |
| 51 | //! ## Operation Pruning |
| 52 | //! Pruning an operation happens during the creation of a new operation. |
| 53 | //! We have to iterate through the operation database to find a suitable |
| 54 | //! candidate. Then we abort and finalize this operation setting its outcome to |
| 55 | //! `Outcome::Pruned`. The corresponding KeyMint operation slot will have been freed |
| 56 | //! up at this point, but the `Operation` object lingers. When the client |
| 57 | //! attempts to use the operation again they will receive |
| 58 | //! ErrorCode::INVALID_OPERATION_HANDLE indicating that the operation no longer |
| 59 | //! exits. This should be the cue for the client to destroy its binder. |
| 60 | //! At that point the operation gets dropped. |
| 61 | //! |
| 62 | //! ## Architecture |
| 63 | //! The `IKeystoreOperation` trait is implemented by `KeystoreOperation`. |
| 64 | //! This acts as a proxy object holding a strong reference to actual operation |
| 65 | //! implementation `Operation`. |
| 66 | //! |
| 67 | //! ``` |
| 68 | //! struct KeystoreOperation { |
| 69 | //! operation: Mutex<Option<Arc<Operation>>>, |
| 70 | //! } |
| 71 | //! ``` |
| 72 | //! |
| 73 | //! The `Mutex` serves two purposes. It provides interior mutability allowing |
| 74 | //! us to set the Option to None. We do this when the life cycle ends during |
| 75 | //! a call to `update`, `finish`, or `abort`. As a result most of the Operation |
| 76 | //! related resources are freed. The `KeystoreOperation` proxy object still |
| 77 | //! lingers until dropped by the client. |
| 78 | //! The second purpose is to protect operations against concurrent usage. |
| 79 | //! Failing to lock this mutex yields `ResponseCode::OPERATION_BUSY` and indicates |
| 80 | //! a programming error in the client. |
| 81 | //! |
| 82 | //! Note that the Mutex only protects the operation against concurrent client calls. |
| 83 | //! We still retain weak references to the operation in the operation database: |
| 84 | //! |
| 85 | //! ``` |
| 86 | //! struct OperationDb { |
| 87 | //! operations: Mutex<Vec<Weak<Operation>>> |
| 88 | //! } |
| 89 | //! ``` |
| 90 | //! |
| 91 | //! This allows us to access the operations for the purpose of pruning. |
| 92 | //! We do this in three phases. |
| 93 | //! 1. We gather the pruning information. Besides non mutable information, |
| 94 | //! we access `last_usage` which is protected by a mutex. |
| 95 | //! We only lock this mutex for single statements at a time. During |
| 96 | //! this phase we hold the operation db lock. |
| 97 | //! 2. We choose a pruning candidate by computing the pruning resistance |
| 98 | //! of each operation. We do this entirely with information we now |
| 99 | //! have on the stack without holding any locks. |
| 100 | //! (See `OperationDb::prune` for more details on the pruning strategy.) |
| 101 | //! 3. During pruning we briefly lock the operation database again to get the |
| 102 | //! the pruning candidate by index. We then attempt to abort the candidate. |
| 103 | //! If the candidate was touched in the meantime or is currently fulfilling |
| 104 | //! a request (i.e., the client calls update, finish, or abort), |
| 105 | //! we go back to 1 and try again. |
| 106 | //! |
| 107 | //! So the outer Mutex in `KeystoreOperation::operation` only protects |
| 108 | //! operations against concurrent client calls but not against concurrent |
| 109 | //! pruning attempts. This is what the `Operation::outcome` mutex is used for. |
| 110 | //! |
| 111 | //! ``` |
| 112 | //! struct Operation { |
| 113 | //! ... |
| 114 | //! outcome: Mutex<Outcome>, |
| 115 | //! ... |
| 116 | //! } |
| 117 | //! ``` |
| 118 | //! |
| 119 | //! Any request that can change the outcome, i.e., `update`, `finish`, `abort`, |
| 120 | //! `drop`, and `prune` has to take the outcome lock and check if the outcome |
| 121 | //! is still `Outcome::Unknown` before entering. `prune` is special in that |
| 122 | //! it will `try_lock`, because we don't want to be blocked on a potentially |
| 123 | //! long running request at another operation. If it fails to get the lock |
| 124 | //! the operation is either being touched, which changes its pruning resistance, |
| 125 | //! or it transitions to its end-of-life, which means we may get a free slot. |
| 126 | //! Either way, we have to revaluate the pruning scores. |
| 127 | |
| 128 | use std::{ |
| 129 | collections::HashMap, |
| 130 | sync::{Arc, Mutex, MutexGuard, Weak}, |
| 131 | time::Duration, |
| 132 | time::Instant, |
| 133 | }; |
| 134 | |
| 135 | use crate::error::{map_km_error, map_or_log_err, Error, ErrorCode, ResponseCode}; |
| 136 | use crate::utils::Asp; |
| 137 | use android_hardware_keymint::aidl::android::hardware::keymint::{ |
Janis Danisevskis | 85d4793 | 2020-10-23 16:12:59 -0700 | [diff] [blame^] | 138 | ByteArray::ByteArray, IKeyMintOperation::IKeyMintOperation, |
| 139 | KeyParameter::KeyParameter as KmParam, KeyParameterArray::KeyParameterArray, Tag::Tag, |
Janis Danisevskis | 1af9126 | 2020-08-10 14:58:08 -0700 | [diff] [blame] | 140 | }; |
| 141 | use android_system_keystore2::aidl::android::system::keystore2::{ |
| 142 | IKeystoreOperation::BnKeystoreOperation, IKeystoreOperation::IKeystoreOperation, |
| 143 | }; |
| 144 | use anyhow::{anyhow, Context, Result}; |
| 145 | use binder::{IBinder, Interface}; |
| 146 | |
| 147 | /// Operations have `Outcome::Unknown` as long as they are active. They transition |
| 148 | /// to one of the other variants exactly once. The distinction in outcome is mainly |
| 149 | /// for the statistic. |
| 150 | #[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd)] |
| 151 | enum Outcome { |
| 152 | Unknown, |
| 153 | Success, |
| 154 | Abort, |
| 155 | Dropped, |
| 156 | Pruned, |
| 157 | ErrorCode(ErrorCode), |
| 158 | } |
| 159 | |
| 160 | /// Operation bundles all of the operation related resources and tracks the operation's |
| 161 | /// outcome. |
| 162 | #[derive(Debug)] |
| 163 | pub struct Operation { |
| 164 | // The index of this operation in the OperationDb. |
| 165 | index: usize, |
| 166 | km_op: Asp, |
| 167 | last_usage: Mutex<Instant>, |
| 168 | outcome: Mutex<Outcome>, |
| 169 | owner: u32, // Uid of the operation's owner. |
| 170 | } |
| 171 | |
| 172 | struct PruningInfo { |
| 173 | last_usage: Instant, |
| 174 | owner: u32, |
| 175 | index: usize, |
| 176 | } |
| 177 | |
Janis Danisevskis | 1af9126 | 2020-08-10 14:58:08 -0700 | [diff] [blame] | 178 | // We don't except more than 32KiB of data in `update`, `updateAad`, and `finish`. |
| 179 | const MAX_RECEIVE_DATA: usize = 0x8000; |
| 180 | |
| 181 | impl Operation { |
| 182 | /// Constructor |
| 183 | pub fn new(index: usize, km_op: Box<dyn IKeyMintOperation>, owner: u32) -> Self { |
| 184 | Self { |
| 185 | index, |
| 186 | km_op: Asp::new(km_op.as_binder()), |
| 187 | last_usage: Mutex::new(Instant::now()), |
| 188 | outcome: Mutex::new(Outcome::Unknown), |
| 189 | owner, |
| 190 | } |
| 191 | } |
| 192 | |
| 193 | fn get_pruning_info(&self) -> PruningInfo { |
| 194 | // Expect safety: |
| 195 | // `last_usage` is locked only for primitive single line statements. |
| 196 | // There is no chance to panic and poison the mutex. |
| 197 | PruningInfo { |
| 198 | last_usage: *self.last_usage.lock().expect("In get_pruning_info."), |
| 199 | owner: self.owner, |
| 200 | index: self.index, |
| 201 | } |
| 202 | } |
| 203 | |
| 204 | fn prune(&self, last_usage: Instant) -> Result<(), Error> { |
| 205 | let mut locked_outcome = match self.outcome.try_lock() { |
| 206 | Ok(guard) => match *guard { |
| 207 | Outcome::Unknown => guard, |
| 208 | _ => return Err(Error::Km(ErrorCode::INVALID_OPERATION_HANDLE)), |
| 209 | }, |
| 210 | Err(_) => return Err(Error::Rc(ResponseCode::OPERATION_BUSY)), |
| 211 | }; |
| 212 | |
| 213 | // In `OperationDb::prune`, which is our caller, we first gather the pruning |
| 214 | // information including the last usage. When we select a candidate |
| 215 | // we call `prune` on that candidate passing the last_usage |
| 216 | // that we gathered earlier. If the actual last usage |
| 217 | // has changed since than, it means the operation was busy in the |
| 218 | // meantime, which means that we have to reevaluate the pruning score. |
| 219 | // |
| 220 | // Expect safety: |
| 221 | // `last_usage` is locked only for primitive single line statements. |
| 222 | // There is no chance to panic and poison the mutex. |
| 223 | if *self.last_usage.lock().expect("In Operation::prune()") != last_usage { |
| 224 | return Err(Error::Rc(ResponseCode::OPERATION_BUSY)); |
| 225 | } |
| 226 | *locked_outcome = Outcome::Pruned; |
| 227 | |
| 228 | let km_op: Box<dyn IKeyMintOperation> = match self.km_op.get_interface() { |
| 229 | Ok(km_op) => km_op, |
| 230 | Err(e) => { |
| 231 | log::error!("In prune: Failed to get KeyMintOperation interface.\n {:?}", e); |
| 232 | return Err(Error::sys()); |
| 233 | } |
| 234 | }; |
| 235 | |
| 236 | // We abort the operation. If there was an error we log it but ignore it. |
| 237 | if let Err(e) = map_km_error(km_op.abort()) { |
| 238 | log::error!("In prune: KeyMint::abort failed with {:?}.", e); |
| 239 | } |
| 240 | |
| 241 | Ok(()) |
| 242 | } |
| 243 | |
| 244 | // This function takes a Result from a KeyMint call and inspects it for errors. |
| 245 | // If an error was found it updates the given `locked_outcome` accordingly. |
| 246 | // It forwards the Result unmodified. |
| 247 | // The precondition to this call must be *locked_outcome == Outcome::Unknown. |
| 248 | // Ideally the `locked_outcome` came from a successful call to `check_active` |
| 249 | // see below. |
| 250 | fn update_outcome<T>( |
| 251 | &self, |
| 252 | locked_outcome: &mut Outcome, |
| 253 | err: Result<T, Error>, |
| 254 | ) -> Result<T, Error> { |
| 255 | match &err { |
| 256 | Err(Error::Km(e)) => *locked_outcome = Outcome::ErrorCode(*e), |
| 257 | Err(_) => *locked_outcome = Outcome::ErrorCode(ErrorCode::UNKNOWN_ERROR), |
| 258 | Ok(_) => (), |
| 259 | } |
| 260 | err |
| 261 | } |
| 262 | |
| 263 | // This function grabs the outcome lock and checks the current outcome state. |
| 264 | // If the outcome is still `Outcome::Unknown`, this function returns |
| 265 | // the locked outcome for further updates. In any other case it returns |
| 266 | // ErrorCode::INVALID_OPERATION_HANDLE indicating that this operation has |
| 267 | // been finalized and is no longer active. |
| 268 | fn check_active(&self) -> Result<MutexGuard<Outcome>> { |
| 269 | let guard = self.outcome.lock().expect("In check_active."); |
| 270 | match *guard { |
| 271 | Outcome::Unknown => Ok(guard), |
| 272 | _ => Err(Error::Km(ErrorCode::INVALID_OPERATION_HANDLE)).context(format!( |
| 273 | "In check_active: Call on finalized operation with outcome: {:?}.", |
| 274 | *guard |
| 275 | )), |
| 276 | } |
| 277 | } |
| 278 | |
| 279 | // This function checks the amount of input data sent to us. We reject any buffer |
| 280 | // exceeding MAX_RECEIVE_DATA bytes as input to `update`, `update_aad`, and `finish` |
| 281 | // in order to force clients into using reasonable limits. |
| 282 | fn check_input_length(data: &[u8]) -> Result<()> { |
| 283 | if data.len() > MAX_RECEIVE_DATA { |
| 284 | // This error code is unique, no context required here. |
| 285 | return Err(anyhow!(Error::Rc(ResponseCode::TOO_MUCH_DATA))); |
| 286 | } |
| 287 | Ok(()) |
| 288 | } |
| 289 | |
| 290 | // Update the last usage to now. |
| 291 | fn touch(&self) { |
| 292 | // Expect safety: |
| 293 | // `last_usage` is locked only for primitive single line statements. |
| 294 | // There is no chance to panic and poison the mutex. |
| 295 | *self.last_usage.lock().expect("In touch.") = Instant::now(); |
| 296 | } |
| 297 | |
| 298 | /// Implementation of `IKeystoreOperation::updateAad`. |
| 299 | /// Refer to the AIDL spec at system/hardware/interfaces/keystore2 for details. |
| 300 | fn update_aad(&self, aad_input: &[u8]) -> Result<()> { |
| 301 | let mut outcome = self.check_active().context("In update_aad")?; |
| 302 | Self::check_input_length(aad_input).context("In update_aad")?; |
| 303 | self.touch(); |
| 304 | |
Janis Danisevskis | 85d4793 | 2020-10-23 16:12:59 -0700 | [diff] [blame^] | 305 | let params = KeyParameterArray { |
| 306 | params: vec![KmParam { |
| 307 | tag: Tag::ASSOCIATED_DATA, |
| 308 | blob: aad_input.into(), |
| 309 | ..Default::default() |
| 310 | }], |
| 311 | }; |
Janis Danisevskis | 1af9126 | 2020-08-10 14:58:08 -0700 | [diff] [blame] | 312 | |
Janis Danisevskis | 85d4793 | 2020-10-23 16:12:59 -0700 | [diff] [blame^] | 313 | let mut out_params: Option<KeyParameterArray> = None; |
| 314 | let mut output: Option<ByteArray> = None; |
Janis Danisevskis | 1af9126 | 2020-08-10 14:58:08 -0700 | [diff] [blame] | 315 | |
| 316 | let km_op: Box<dyn IKeyMintOperation> = |
| 317 | self.km_op.get_interface().context("In update: Failed to get KeyMintOperation.")?; |
| 318 | |
| 319 | self.update_outcome( |
| 320 | &mut *outcome, |
| 321 | map_km_error(km_op.update( |
Janis Danisevskis | 85d4793 | 2020-10-23 16:12:59 -0700 | [diff] [blame^] | 322 | Some(¶ms), |
| 323 | None, |
| 324 | // TODO Get auth token from enforcement module if required. |
| 325 | None, |
| 326 | // TODO Get verification token from enforcement module if required. |
| 327 | None, |
Janis Danisevskis | 1af9126 | 2020-08-10 14:58:08 -0700 | [diff] [blame] | 328 | &mut out_params, |
| 329 | &mut output, |
| 330 | )), |
| 331 | ) |
| 332 | .context("In update_aad: KeyMint::update failed.")?; |
| 333 | |
| 334 | Ok(()) |
| 335 | } |
| 336 | |
| 337 | /// Implementation of `IKeystoreOperation::update`. |
| 338 | /// Refer to the AIDL spec at system/hardware/interfaces/keystore2 for details. |
| 339 | fn update(&self, input: &[u8]) -> Result<Option<Vec<u8>>> { |
| 340 | let mut outcome = self.check_active().context("In update")?; |
| 341 | Self::check_input_length(input).context("In update")?; |
| 342 | self.touch(); |
| 343 | |
Janis Danisevskis | 85d4793 | 2020-10-23 16:12:59 -0700 | [diff] [blame^] | 344 | let mut out_params: Option<KeyParameterArray> = None; |
| 345 | let mut output: Option<ByteArray> = None; |
Janis Danisevskis | 1af9126 | 2020-08-10 14:58:08 -0700 | [diff] [blame] | 346 | |
| 347 | let km_op: Box<dyn IKeyMintOperation> = |
| 348 | self.km_op.get_interface().context("In update: Failed to get KeyMintOperation.")?; |
| 349 | |
| 350 | self.update_outcome( |
| 351 | &mut *outcome, |
| 352 | map_km_error(km_op.update( |
Janis Danisevskis | 85d4793 | 2020-10-23 16:12:59 -0700 | [diff] [blame^] | 353 | None, |
| 354 | Some(input), |
| 355 | // TODO Get auth token from enforcement module if required. |
| 356 | None, |
| 357 | // TODO Get verification token from enforcement module if required. |
| 358 | None, |
Janis Danisevskis | 1af9126 | 2020-08-10 14:58:08 -0700 | [diff] [blame] | 359 | &mut out_params, |
| 360 | &mut output, |
| 361 | )), |
| 362 | ) |
| 363 | .context("In update: KeyMint::update failed.")?; |
| 364 | |
Janis Danisevskis | 85d4793 | 2020-10-23 16:12:59 -0700 | [diff] [blame^] | 365 | match output { |
| 366 | Some(blob) => Ok(Some(blob.data)), |
| 367 | None => Ok(None), |
Janis Danisevskis | 1af9126 | 2020-08-10 14:58:08 -0700 | [diff] [blame] | 368 | } |
| 369 | } |
| 370 | |
| 371 | /// Implementation of `IKeystoreOperation::finish`. |
| 372 | /// Refer to the AIDL spec at system/hardware/interfaces/keystore2 for details. |
| 373 | fn finish(&self, input: Option<&[u8]>, signature: Option<&[u8]>) -> Result<Option<Vec<u8>>> { |
| 374 | let mut outcome = self.check_active().context("In finish")?; |
| 375 | if let Some(input) = input { |
| 376 | Self::check_input_length(input).context("In finish")?; |
| 377 | } |
| 378 | self.touch(); |
Janis Danisevskis | 1af9126 | 2020-08-10 14:58:08 -0700 | [diff] [blame] | 379 | |
Janis Danisevskis | 85d4793 | 2020-10-23 16:12:59 -0700 | [diff] [blame^] | 380 | let mut out_params: Option<KeyParameterArray> = None; |
Janis Danisevskis | 1af9126 | 2020-08-10 14:58:08 -0700 | [diff] [blame] | 381 | |
| 382 | let km_op: Box<dyn IKeyMintOperation> = |
| 383 | self.km_op.get_interface().context("In finish: Failed to get KeyMintOperation.")?; |
| 384 | |
Janis Danisevskis | 85d4793 | 2020-10-23 16:12:59 -0700 | [diff] [blame^] | 385 | let output = self |
| 386 | .update_outcome( |
| 387 | &mut *outcome, |
| 388 | map_km_error(km_op.finish( |
| 389 | None, |
| 390 | input, |
| 391 | signature, |
| 392 | // TODO Get auth token from enforcement module if required. |
| 393 | None, |
| 394 | // TODO Get verification token from enforcement module if required. |
| 395 | None, |
| 396 | &mut out_params, |
| 397 | )), |
| 398 | ) |
| 399 | .context("In finish: KeyMint::finish failed.")?; |
Janis Danisevskis | 1af9126 | 2020-08-10 14:58:08 -0700 | [diff] [blame] | 400 | |
| 401 | // At this point the operation concluded successfully. |
| 402 | *outcome = Outcome::Success; |
| 403 | |
| 404 | if output.is_empty() { |
| 405 | Ok(None) |
| 406 | } else { |
| 407 | Ok(Some(output)) |
| 408 | } |
| 409 | } |
| 410 | |
| 411 | /// Aborts the operation if it is active. IFF the operation is aborted the outcome is |
| 412 | /// set to `outcome`. `outcome` must reflect the reason for the abort. Since the operation |
| 413 | /// gets aborted `outcome` must not be `Operation::Success` or `Operation::Unknown`. |
| 414 | fn abort(&self, outcome: Outcome) -> Result<()> { |
| 415 | let mut locked_outcome = self.check_active().context("In abort")?; |
| 416 | *locked_outcome = outcome; |
| 417 | let km_op: Box<dyn IKeyMintOperation> = |
| 418 | self.km_op.get_interface().context("In abort: Failed to get KeyMintOperation.")?; |
| 419 | |
| 420 | map_km_error(km_op.abort()).context("In abort: KeyMint::abort failed.") |
| 421 | } |
| 422 | } |
| 423 | |
| 424 | impl Drop for Operation { |
| 425 | fn drop(&mut self) { |
| 426 | if let Ok(Outcome::Unknown) = self.outcome.get_mut() { |
| 427 | // If the operation was still active we call abort, setting |
| 428 | // the outcome to `Outcome::Dropped` |
| 429 | if let Err(e) = self.abort(Outcome::Dropped) { |
| 430 | log::error!("While dropping Operation: abort failed:\n {:?}", e); |
| 431 | } |
| 432 | } |
| 433 | } |
| 434 | } |
| 435 | |
| 436 | /// The OperationDb holds weak references to all ongoing operations. |
| 437 | /// Its main purpose is to facilitate operation pruning. |
| 438 | #[derive(Debug, Default)] |
| 439 | pub struct OperationDb { |
| 440 | // TODO replace Vec with WeakTable when the weak_table crate becomes |
| 441 | // available. |
| 442 | operations: Mutex<Vec<Weak<Operation>>>, |
| 443 | } |
| 444 | |
| 445 | impl OperationDb { |
| 446 | /// Creates a new OperationDb. |
| 447 | pub fn new() -> Self { |
| 448 | Self { operations: Mutex::new(Vec::new()) } |
| 449 | } |
| 450 | |
| 451 | /// Creates a new operation. |
| 452 | /// This function takes a KeyMint operation and an associated |
| 453 | /// owner uid and returns a new Operation wrapped in a `std::sync::Arc`. |
| 454 | pub fn create_operation( |
| 455 | &self, |
| 456 | km_op: Box<dyn IKeyMintOperation>, |
| 457 | owner: u32, |
| 458 | ) -> Arc<Operation> { |
| 459 | // We use unwrap because we don't allow code that can panic while locked. |
| 460 | let mut operations = self.operations.lock().expect("In create_operation."); |
| 461 | |
| 462 | let mut index: usize = 0; |
| 463 | // First we iterate through the operation slots to try and find an unused |
| 464 | // slot. If we don't find one, we append the new entry instead. |
| 465 | match (*operations).iter_mut().find(|s| { |
| 466 | index += 1; |
| 467 | s.upgrade().is_none() |
| 468 | }) { |
| 469 | Some(free_slot) => { |
| 470 | let new_op = Arc::new(Operation::new(index - 1, km_op, owner)); |
| 471 | *free_slot = Arc::downgrade(&new_op); |
| 472 | new_op |
| 473 | } |
| 474 | None => { |
| 475 | let new_op = Arc::new(Operation::new(operations.len(), km_op, owner)); |
| 476 | operations.push(Arc::downgrade(&new_op)); |
| 477 | new_op |
| 478 | } |
| 479 | } |
| 480 | } |
| 481 | |
| 482 | fn get(&self, index: usize) -> Option<Arc<Operation>> { |
| 483 | self.operations.lock().expect("In OperationDb::get.").get(index).and_then(|op| op.upgrade()) |
| 484 | } |
| 485 | |
| 486 | /// Attempts to prune an operation. |
| 487 | /// |
| 488 | /// This function is used during operation creation, i.e., by |
| 489 | /// `KeystoreSecurityLevel::create_operation`, to try and free up an operation slot |
| 490 | /// if it got `ErrorCode::TOO_MANY_OPERATIONS` from the KeyMint backend. It is not |
| 491 | /// guaranteed that an operation slot is available after this call successfully |
| 492 | /// returned for various reasons. E.g., another thread may have snatched up the newly |
| 493 | /// available slot. Callers may have to call prune multiple times before they get a |
| 494 | /// free operation slot. Prune may also return `Err(Error::Rc(ResponseCode::BACKEND_BUSY))` |
| 495 | /// which indicates that no prunable operation was found. |
| 496 | /// |
| 497 | /// To find a suitable candidate we compute the malus for the caller and each existing |
| 498 | /// operation. The malus is the inverse of the pruning power (caller) or pruning |
| 499 | /// resistance (existing operation). |
| 500 | /// The malus is based on the number of sibling operations and age. Sibling |
| 501 | /// operations are operations that have the same owner (UID). |
| 502 | /// Every operation, existing or new, starts with a malus of 1. Every sibling |
| 503 | /// increases the malus by one. The age is the time since an operation was last touched. |
| 504 | /// It increases the malus by log6(<age in seconds> + 1) rounded down to the next |
| 505 | /// integer. So the malus increases stepwise after 5s, 35s, 215s, ... |
| 506 | /// Of two operations with the same malus the least recently used one is considered |
| 507 | /// weaker. |
| 508 | /// For the caller to be able to prune an operation it must find an operation |
| 509 | /// with a malus higher than its own. |
| 510 | /// |
| 511 | /// The malus can be expressed as |
| 512 | /// ``` |
| 513 | /// malus = 1 + no_of_siblings + floor(log6(age_in_seconds + 1)) |
| 514 | /// ``` |
| 515 | /// where the constant `1` accounts for the operation under consideration. |
| 516 | /// In reality we compute it as |
| 517 | /// ``` |
| 518 | /// caller_malus = 1 + running_siblings |
| 519 | /// ``` |
| 520 | /// because the new operation has no age and is not included in the `running_siblings`, |
| 521 | /// and |
| 522 | /// ``` |
| 523 | /// running_malus = running_siblings + floor(log6(age_in_seconds + 1)) |
| 524 | /// ``` |
| 525 | /// because a running operation is included in the `running_siblings` and it has |
| 526 | /// an age. |
| 527 | /// |
| 528 | /// ## Example |
| 529 | /// A caller with no running operations has a malus of 1. Young (age < 5s) operations |
| 530 | /// also with no siblings have a malus of one and cannot be pruned by the caller. |
| 531 | /// We have to find an operation that has at least one sibling or is older than 5s. |
| 532 | /// |
| 533 | /// A caller with one running operation has a malus of 2. Now even young siblings |
| 534 | /// or single child aging (5s <= age < 35s) operations are off limit. An aging |
| 535 | /// sibling of two, however, would have a malus of 3 and would be fair game. |
| 536 | /// |
| 537 | /// ## Rationale |
| 538 | /// Due to the limitation of KeyMint operation slots, we cannot get around pruning or |
| 539 | /// a single app could easily DoS KeyMint. |
| 540 | /// Keystore 1.0 used to always prune the least recently used operation. This at least |
| 541 | /// guaranteed that new operations can always be started. With the increased usage |
| 542 | /// of Keystore we saw increased pruning activity which can lead to a livelock |
| 543 | /// situation in the worst case. |
| 544 | /// With the new pruning strategy we want to provide well behaved clients with |
| 545 | /// progress assurances while punishing DoS attempts. As a result of this |
| 546 | /// strategy we can be in the situation where no operation can be pruned and the |
| 547 | /// creation of a new operation fails. This allows single child operations which |
| 548 | /// are frequently updated to complete, thereby breaking up livelock situations |
| 549 | /// and facilitating system wide progress. |
| 550 | pub fn prune(&self, caller: u32) -> Result<(), Error> { |
| 551 | loop { |
| 552 | // Maps the uid of the owner to the number of operations that owner has |
| 553 | // (running_siblings). More operations per owner lowers the pruning |
| 554 | // resistance of the operations of that owner. Whereas the number of |
| 555 | // ongoing operations of the caller lowers the pruning power of the caller. |
| 556 | let mut owners: HashMap<u32, u64> = HashMap::new(); |
| 557 | let mut pruning_info: Vec<PruningInfo> = Vec::new(); |
| 558 | |
| 559 | let now = Instant::now(); |
| 560 | self.operations |
| 561 | .lock() |
| 562 | .expect("In OperationDb::prune: Trying to lock self.operations.") |
| 563 | .iter() |
| 564 | .for_each(|op| { |
| 565 | if let Some(op) = op.upgrade() { |
| 566 | let p_info = op.get_pruning_info(); |
| 567 | let owner = p_info.owner; |
| 568 | pruning_info.push(p_info); |
| 569 | // Count operations per owner. |
| 570 | *owners.entry(owner).or_insert(0) += 1; |
| 571 | } |
| 572 | }); |
| 573 | |
| 574 | let caller_malus = 1u64 + *owners.entry(caller).or_default(); |
| 575 | |
| 576 | // We iterate through all operations computing the malus and finding |
| 577 | // the candidate with the highest malus which must also be higher |
| 578 | // than the caller_malus. |
| 579 | struct CandidateInfo { |
| 580 | index: usize, |
| 581 | malus: u64, |
| 582 | last_usage: Instant, |
| 583 | age: Duration, |
| 584 | } |
| 585 | let candidate = pruning_info.iter().fold( |
| 586 | None, |
| 587 | |acc: Option<CandidateInfo>, &PruningInfo { last_usage, owner, index }| { |
| 588 | // Compute the age of the current operation. |
| 589 | let age = now |
| 590 | .checked_duration_since(last_usage) |
| 591 | .unwrap_or_else(|| Duration::new(0, 0)); |
| 592 | |
| 593 | // Compute the malus of the current operation. |
| 594 | // Expect safety: Every owner in pruning_info was counted in |
| 595 | // the owners map. So this unwrap cannot panic. |
| 596 | let malus = *owners |
| 597 | .get(&owner) |
| 598 | .expect("This is odd. We should have counted every owner in pruning_info.") |
| 599 | + ((age.as_secs() + 1) as f64).log(6.0).floor() as u64; |
| 600 | |
| 601 | // Now check if the current operation is a viable/better candidate |
| 602 | // the one currently stored in the accumulator. |
| 603 | match acc { |
| 604 | // First we have to find any operation that is prunable by the caller. |
| 605 | None => { |
| 606 | if caller_malus < malus { |
| 607 | Some(CandidateInfo { index, malus, last_usage, age }) |
| 608 | } else { |
| 609 | None |
| 610 | } |
| 611 | } |
| 612 | // If we have found one we look for the operation with the worst score. |
| 613 | // If there is a tie, the older operation is considered weaker. |
| 614 | Some(CandidateInfo { index: i, malus: m, last_usage: l, age: a }) => { |
| 615 | if malus > m || (malus == m && age > a) { |
| 616 | Some(CandidateInfo { index, malus, last_usage, age }) |
| 617 | } else { |
| 618 | Some(CandidateInfo { index: i, malus: m, last_usage: l, age: a }) |
| 619 | } |
| 620 | } |
| 621 | } |
| 622 | }, |
| 623 | ); |
| 624 | |
| 625 | match candidate { |
| 626 | Some(CandidateInfo { index, malus: _, last_usage, age: _ }) => { |
| 627 | match self.get(index) { |
| 628 | Some(op) => { |
| 629 | match op.prune(last_usage) { |
| 630 | // We successfully freed up a slot. |
| 631 | Ok(()) => break Ok(()), |
| 632 | // This means the operation we tried to prune was on its way |
| 633 | // out. It also means that the slot it had occupied was freed up. |
| 634 | Err(Error::Km(ErrorCode::INVALID_OPERATION_HANDLE)) => break Ok(()), |
| 635 | // This means the operation we tried to prune was currently |
| 636 | // servicing a request. There are two options. |
| 637 | // * Assume that it was touched, which means that its |
| 638 | // pruning resistance increased. In that case we have |
| 639 | // to start over and find another candidate. |
| 640 | // * Assume that the operation is transitioning to end-of-life. |
| 641 | // which means that we got a free slot for free. |
| 642 | // If we assume the first but the second is true, we prune |
| 643 | // a good operation without need (aggressive approach). |
| 644 | // If we assume the second but the first is true, our |
| 645 | // caller will attempt to create a new KeyMint operation, |
| 646 | // fail with `ErrorCode::TOO_MANY_OPERATIONS`, and call |
| 647 | // us again (conservative approach). |
| 648 | Err(Error::Rc(ResponseCode::OPERATION_BUSY)) => { |
| 649 | // We choose the conservative approach, because |
| 650 | // every needlessly pruned operation can impact |
| 651 | // the user experience. |
| 652 | // To switch to the aggressive approach replace |
| 653 | // the following line with `continue`. |
| 654 | break Ok(()); |
| 655 | } |
| 656 | |
| 657 | // The candidate may have been touched so the score |
| 658 | // has changed since our evaluation. |
| 659 | _ => continue, |
| 660 | } |
| 661 | } |
| 662 | // This index does not exist any more. The operation |
| 663 | // in this slot was dropped. Good news, a slot |
| 664 | // has freed up. |
| 665 | None => break Ok(()), |
| 666 | } |
| 667 | } |
| 668 | // We did not get a pruning candidate. |
| 669 | None => break Err(Error::Rc(ResponseCode::BACKEND_BUSY)), |
| 670 | } |
| 671 | } |
| 672 | } |
| 673 | } |
| 674 | |
| 675 | /// Implementation of IKeystoreOperation. |
| 676 | pub struct KeystoreOperation { |
| 677 | operation: Mutex<Option<Arc<Operation>>>, |
| 678 | } |
| 679 | |
| 680 | impl KeystoreOperation { |
| 681 | /// Creates a new operation instance wrapped in a |
| 682 | /// BnKeystoreOperation proxy object. It also |
| 683 | /// calls `IBinder::set_requesting_sid` on the new interface, because |
| 684 | /// we need it for checking Keystore permissions. |
| 685 | pub fn new_native_binder(operation: Arc<Operation>) -> impl IKeystoreOperation + Send { |
| 686 | let result = |
| 687 | BnKeystoreOperation::new_binder(Self { operation: Mutex::new(Some(operation)) }); |
| 688 | result.as_binder().set_requesting_sid(true); |
| 689 | result |
| 690 | } |
| 691 | |
| 692 | /// Grabs the outer operation mutex and calls `f` on the locked operation. |
| 693 | /// The function also deletes the operation if it returns with an error or if |
| 694 | /// `delete_op` is true. |
| 695 | fn with_locked_operation<T, F>(&self, f: F, delete_op: bool) -> Result<T> |
| 696 | where |
| 697 | for<'a> F: FnOnce(&'a Operation) -> Result<T>, |
| 698 | { |
| 699 | let mut delete_op: bool = delete_op; |
| 700 | match self.operation.try_lock() { |
| 701 | Ok(mut mutex_guard) => { |
| 702 | let result = match &*mutex_guard { |
| 703 | Some(op) => { |
| 704 | let result = f(&*op); |
| 705 | // Any error here means we can discard the operation. |
| 706 | if result.is_err() { |
| 707 | delete_op = true; |
| 708 | } |
| 709 | result |
| 710 | } |
| 711 | None => Err(Error::Km(ErrorCode::INVALID_OPERATION_HANDLE)) |
| 712 | .context("In KeystoreOperation::with_locked_operation"), |
| 713 | }; |
| 714 | |
| 715 | if delete_op { |
| 716 | // We give up our reference to the Operation, thereby freeing up our |
| 717 | // internal resources and ending the wrapped KeyMint operation. |
| 718 | // This KeystoreOperation object will still be owned by an SpIBinder |
| 719 | // until the client drops its remote reference. |
| 720 | *mutex_guard = None; |
| 721 | } |
| 722 | result |
| 723 | } |
| 724 | Err(_) => Err(Error::Rc(ResponseCode::OPERATION_BUSY)) |
| 725 | .context("In KeystoreOperation::with_locked_operation"), |
| 726 | } |
| 727 | } |
| 728 | } |
| 729 | |
| 730 | impl binder::Interface for KeystoreOperation {} |
| 731 | |
| 732 | impl IKeystoreOperation for KeystoreOperation { |
| 733 | fn updateAad(&self, aad_input: &[u8]) -> binder::public_api::Result<()> { |
| 734 | map_or_log_err( |
| 735 | self.with_locked_operation( |
| 736 | |op| op.update_aad(aad_input).context("In KeystoreOperation::updateAad"), |
| 737 | false, |
| 738 | ), |
| 739 | Ok, |
| 740 | ) |
| 741 | } |
| 742 | |
| 743 | fn update(&self, input: &[u8]) -> binder::public_api::Result<Option<Vec<u8>>> { |
| 744 | map_or_log_err( |
| 745 | self.with_locked_operation( |
| 746 | |op| op.update(input).context("In KeystoreOperation::update"), |
| 747 | false, |
| 748 | ), |
| 749 | Ok, |
| 750 | ) |
| 751 | } |
| 752 | fn finish( |
| 753 | &self, |
| 754 | input: Option<&[u8]>, |
| 755 | signature: Option<&[u8]>, |
| 756 | ) -> binder::public_api::Result<Option<Vec<u8>>> { |
| 757 | map_or_log_err( |
| 758 | self.with_locked_operation( |
| 759 | |op| op.finish(input, signature).context("In KeystoreOperation::finish"), |
| 760 | true, |
| 761 | ), |
| 762 | Ok, |
| 763 | ) |
| 764 | } |
| 765 | |
| 766 | fn abort(&self) -> binder::public_api::Result<()> { |
| 767 | map_or_log_err( |
| 768 | self.with_locked_operation( |
| 769 | |op| op.abort(Outcome::Abort).context("In KeystoreOperation::abort"), |
| 770 | true, |
| 771 | ), |
| 772 | Ok, |
| 773 | ) |
| 774 | } |
| 775 | } |