Blame - keystore2/src/operation.rs - android_system_security

2020-08-10 14:58:08 -0700

[diff] [blame]

1

2

//

3

// Licensed under the Apache License, Version 2.0 (the "License");

4

// you may not use this file except in compliance with the License.

5

// You may obtain a copy of the License at

6

//

7

// http://www.apache.org/licenses/LICENSE-2.0

8

//

9

// Unless required by applicable law or agreed to in writing, software

10

// distributed under the License is distributed on an "AS IS" BASIS,

11

// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12

// See the License for the specific language governing permissions and

13

// limitations under the License.

14

15

//! This crate implements the `IKeystoreOperation` AIDL interface, which represents

16

//! an ongoing key operation, as well as the operation database, which is mainly

17

//! required for tracking operations for the purpose of pruning.

18

//! This crate also implements an operation pruning strategy.

19

//!

20

//! Operations implement the API calls update, finish, and abort.

21

//! Additionally, an operation can be dropped and pruned. The former

22

//! happens if the client deletes a binder to the operation object.

23

//! An existing operation may get pruned when running out of operation

24

//! slots and a new operation takes precedence.

25

//!

26

//! ## Operation Lifecycle

27

//! An operation gets created when the client calls `IKeystoreSecurityLevel::create`.

28

//! It may receive zero or more update request. The lifecycle ends when:

29

//! * `update` yields an error.

30

//! * `finish` is called.

31

//! * `abort` is called.

32

//! * The operation gets dropped.

33

//! * The operation gets pruned.

34

//! `Operation` has an `Outcome` member. While the outcome is `Outcome::Unknown`,

35

//! the operation is active and in a good state. Any of the above conditions may

36

//! change the outcome to one of the defined outcomes Success, Abort, Dropped,

37

//! Pruned, or ErrorCode. The latter is chosen in the case of an unexpected error, during

38

//! `update` or `finish`. `Success` is chosen iff `finish` completes without error.

39

//! Note that all operations get dropped eventually in the sense that they lose

40

//! their last reference and get destroyed. At that point, the fate of the operation

41

//! gets logged. However, an operation will transition to `Outcome::Dropped` iff

42

//! the operation was still active (`Outcome::Unknown`) at that time.

43

//!

44

//! ## Operation Dropping

45

//! To observe the dropping of an operation, we have to make sure that there

46

//! are no strong references to the IBinder representing this operation.

47

//! This would be simple enough if the operation object would need to be accessed

48

//! only by transactions. But to perform pruning, we have to retain a reference to the

49

//! original operation object.

50

//!

51

//! ## Operation Pruning

52

//! Pruning an operation happens during the creation of a new operation.

53

//! We have to iterate through the operation database to find a suitable

54

//! candidate. Then we abort and finalize this operation setting its outcome to

55

//! `Outcome::Pruned`. The corresponding KeyMint operation slot will have been freed

56

//! up at this point, but the `Operation` object lingers. When the client

57

//! attempts to use the operation again they will receive

58

//! ErrorCode::INVALID_OPERATION_HANDLE indicating that the operation no longer

59

//! exits. This should be the cue for the client to destroy its binder.

60

//! At that point the operation gets dropped.

61

//!

62

//! ## Architecture

63

//! The `IKeystoreOperation` trait is implemented by `KeystoreOperation`.

64

//! This acts as a proxy object holding a strong reference to actual operation

65

//! implementation `Operation`.

66

//!

67

//! ```

68

//! struct KeystoreOperation {

69

//! operation: Mutex<Option<Arc<Operation>>>,

//! }

//! ```

//!

//! The `Mutex` serves two purposes. It provides interior mutability allowing

74

//! us to set the Option to None. We do this when the life cycle ends during

75

//! a call to `update`, `finish`, or `abort`. As a result most of the Operation

76

//! related resources are freed. The `KeystoreOperation` proxy object still

77

//! lingers until dropped by the client.

78

//! The second purpose is to protect operations against concurrent usage.

79

//! Failing to lock this mutex yields `ResponseCode::OPERATION_BUSY` and indicates

80

//! a programming error in the client.

81

//!

82

//! Note that the Mutex only protects the operation against concurrent client calls.

83

//! We still retain weak references to the operation in the operation database:

84

//!

85

//! ```

86

//! struct OperationDb {

87

//! operations: Mutex<Vec<Weak<Operation>>>

//! }

//! ```

//!

//! This allows us to access the operations for the purpose of pruning.

92

//! We do this in three phases.

93

//! 1. We gather the pruning information. Besides non mutable information,

94

//! we access `last_usage` which is protected by a mutex.

95

//! We only lock this mutex for single statements at a time. During

96

//! this phase we hold the operation db lock.

97

//! 2. We choose a pruning candidate by computing the pruning resistance

98

//! of each operation. We do this entirely with information we now

99

//! have on the stack without holding any locks.

100

//! (See `OperationDb::prune` for more details on the pruning strategy.)

101

//! 3. During pruning we briefly lock the operation database again to get the

102

//! the pruning candidate by index. We then attempt to abort the candidate.

103

//! If the candidate was touched in the meantime or is currently fulfilling

104

//! a request (i.e., the client calls update, finish, or abort),

105

//! we go back to 1 and try again.

106

//!

107

//! So the outer Mutex in `KeystoreOperation::operation` only protects

108

//! operations against concurrent client calls but not against concurrent

109

//! pruning attempts. This is what the `Operation::outcome` mutex is used for.

110

//!

111

//! ```

112

//! struct Operation {

113

//! ...

114

//! outcome: Mutex<Outcome>,

//! ...

//! }

//! ```

//!

//! Any request that can change the outcome, i.e., `update`, `finish`, `abort`,

120

//! `drop`, and `prune` has to take the outcome lock and check if the outcome

121

//! is still `Outcome::Unknown` before entering. `prune` is special in that

122

//! it will `try_lock`, because we don't want to be blocked on a potentially

123

//! long running request at another operation. If it fails to get the lock

124

//! the operation is either being touched, which changes its pruning resistance,

125

//! or it transitions to its end-of-life, which means we may get a free slot.

126

//! Either way, we have to revaluate the pruning scores.

127

Hasini Gunasinghe

2020-11-17 23:08:39 +0000

[diff] [blame]

128

use crate::auth_token_handler::AuthTokenHandler;

129

use crate::error::{map_km_error, map_or_log_err, Error, ErrorCode, ResponseCode};

130

use crate::globals::ENFORCEMENTS;

131

use crate::key_parameter::KeyParameter;

132

use crate::utils::Asp;

133

use android_hardware_security_keymint::aidl::android::hardware::security::keymint::{

134

ByteArray::ByteArray, HardwareAuthToken::HardwareAuthToken,

135

IKeyMintOperation::IKeyMintOperation, KeyParameter::KeyParameter as KmParam,

136

KeyParameterArray::KeyParameterArray, KeyParameterValue::KeyParameterValue as KmParamValue,

Janis Danisevskis

2021-01-05 10:37:22 -0800

[diff] [blame]

137

Tag::Tag,

138

};

139

use android_hardware_security_secureclock::aidl::android::hardware::security::secureclock::{

140

TimeStampToken::TimeStampToken,

Hasini Gunasinghe

2020-11-17 23:08:39 +0000

[diff] [blame]

141

};

142

use android_system_keystore2::aidl::android::system::keystore2::{

143

IKeystoreOperation::BnKeystoreOperation, IKeystoreOperation::IKeystoreOperation,

144

OperationChallenge::OperationChallenge,

145

};

146

use anyhow::{anyhow, Context, Result};

147

use binder::{IBinder, Interface};

Janis Danisevskis

2020-08-10 14:58:08 -0700

[diff] [blame]

148

use std::{

149

collections::HashMap,

150

sync::{Arc, Mutex, MutexGuard, Weak},

time::Duration,

time::Instant,

};

Janis Danisevskis

2020-08-10 14:58:08 -0700

[diff] [blame]

155

/// Operations have `Outcome::Unknown` as long as they are active. They transition

156

/// to one of the other variants exactly once. The distinction in outcome is mainly

157

/// for the statistic.

158

#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd)]

enum Outcome {

Unknown,

Success,

Abort,

Dropped,

Pruned,

ErrorCode(ErrorCode),

166

}

167

168

/// Operation bundles all of the operation related resources and tracks the operation's

169

/// outcome.

170

#[derive(Debug)]

171

pub struct Operation {

172

// The index of this operation in the OperationDb.

173

index: usize,

174

km_op: Asp,

175

last_usage: Mutex<Instant>,

176

outcome: Mutex<Outcome>,

177

owner: u32, // Uid of the operation's owner.

Hasini Gunasinghe

2020-11-17 23:08:39 +0000

[diff] [blame]

178

auth_token_handler: Mutex<AuthTokenHandler>,

179

// optional because in create_operation, there is a case in which we might not load

180

// key parameters

181

key_params: Option<Vec<KeyParameter>>,

182

op_challenge: Option<OperationChallenge>,

Janis Danisevskis

2020-08-10 14:58:08 -0700

[diff] [blame]

}

struct PruningInfo {

last_usage: Instant,

owner: u32,

index: usize,

}

Janis Danisevskis

2020-08-10 14:58:08 -0700

[diff] [blame]

191

// We don't except more than 32KiB of data in `update`, `updateAad`, and `finish`.

192

const MAX_RECEIVE_DATA: usize = 0x8000;

193

194

impl Operation {

195

/// Constructor

Hasini Gunasinghe

2020-11-17 23:08:39 +0000

[diff] [blame]

196

pub fn new(

197

index: usize,

198

km_op: Box<dyn IKeyMintOperation>,

199

owner: u32,

200

auth_token_handler: AuthTokenHandler,

201

key_params: Option<Vec<KeyParameter>>,

202

op_challenge: Option<OperationChallenge>,

203

) -> Self {

Janis Danisevskis

2020-08-10 14:58:08 -0700

[diff] [blame]

204

Self {

205

index,

206

km_op: Asp::new(km_op.as_binder()),

207

last_usage: Mutex::new(Instant::now()),

208

outcome: Mutex::new(Outcome::Unknown),

209

owner,

Hasini Gunasinghe

2020-11-17 23:08:39 +0000

[diff] [blame]

210

auth_token_handler: Mutex::new(auth_token_handler),

211

key_params,

212

op_challenge,

Janis Danisevskis

2020-08-10 14:58:08 -0700

[diff] [blame]

}

}

Janis Danisevskis

2020-10-26 09:35:16 -0700

[diff] [blame]

216

fn get_pruning_info(&self) -> Option<PruningInfo> {

217

// An operation may be finalized.

218

if let Ok(guard) = self.outcome.try_lock() {

219

match *guard {

220

Outcome::Unknown => {}

221

// If the outcome is any other than unknown, it has been finalized,

222

// and we can no longer consider it for pruning.

_ => return None,

}

}

// Else: If we could not grab the lock, this means that the operation is currently

227

// being used and it may be transitioning to finalized or it was simply updated.

228

// In any case it is fair game to consider it for pruning. If the operation

229

// transitioned to a final state, we will notice when we attempt to prune, and

230

// a subsequent attempt to create a new operation will succeed.

231

Some(PruningInfo {

232

// Expect safety:

233

// `last_usage` is locked only for primitive single line statements.

234

// There is no chance to panic and poison the mutex.

Janis Danisevskis

2020-08-10 14:58:08 -0700

[diff] [blame]

235

last_usage: *self.last_usage.lock().expect("In get_pruning_info."),

236

owner: self.owner,

237

index: self.index,

Janis Danisevskis

2020-10-26 09:35:16 -0700

[diff] [blame]

238

})

Janis Danisevskis

2020-08-10 14:58:08 -0700

[diff] [blame]

239

}

240

241

fn prune(&self, last_usage: Instant) -> Result<(), Error> {

242

let mut locked_outcome = match self.outcome.try_lock() {

243

Ok(guard) => match *guard {

244

Outcome::Unknown => guard,

245

_ => return Err(Error::Km(ErrorCode::INVALID_OPERATION_HANDLE)),

246

},

247

Err(_) => return Err(Error::Rc(ResponseCode::OPERATION_BUSY)),

248

};

249

250

// In `OperationDb::prune`, which is our caller, we first gather the pruning

251

// information including the last usage. When we select a candidate

252

// we call `prune` on that candidate passing the last_usage

253

// that we gathered earlier. If the actual last usage

254

// has changed since than, it means the operation was busy in the

255

// meantime, which means that we have to reevaluate the pruning score.

256

//

257

// Expect safety:

258

// `last_usage` is locked only for primitive single line statements.

259

// There is no chance to panic and poison the mutex.

260

if *self.last_usage.lock().expect("In Operation::prune()") != last_usage {

261

return Err(Error::Rc(ResponseCode::OPERATION_BUSY));

262

}

263

*locked_outcome = Outcome::Pruned;

264

265

let km_op: Box<dyn IKeyMintOperation> = match self.km_op.get_interface() {

266

Ok(km_op) => km_op,

267

Err(e) => {

268

log::error!("In prune: Failed to get KeyMintOperation interface.\n {:?}", e);

269

return Err(Error::sys());

}

};

// We abort the operation. If there was an error we log it but ignore it.

274

if let Err(e) = map_km_error(km_op.abort()) {

275

log::error!("In prune: KeyMint::abort failed with {:?}.", e);

}

Ok(())

}

// This function takes a Result from a KeyMint call and inspects it for errors.

282

// If an error was found it updates the given `locked_outcome` accordingly.

283

// It forwards the Result unmodified.

284

// The precondition to this call must be *locked_outcome == Outcome::Unknown.

285

// Ideally the `locked_outcome` came from a successful call to `check_active`

286

// see below.

287

fn update_outcome<T>(

288

&self,

289

locked_outcome: &mut Outcome,

290

err: Result<T, Error>,

291

) -> Result<T, Error> {

292

match &err {

293

Err(Error::Km(e)) => *locked_outcome = Outcome::ErrorCode(*e),

294

Err(_) => *locked_outcome = Outcome::ErrorCode(ErrorCode::UNKNOWN_ERROR),

Ok(_) => (),

}

err

}

// This function grabs the outcome lock and checks the current outcome state.

301

// If the outcome is still `Outcome::Unknown`, this function returns

302

// the locked outcome for further updates. In any other case it returns

303

// ErrorCode::INVALID_OPERATION_HANDLE indicating that this operation has

304

// been finalized and is no longer active.

305

fn check_active(&self) -> Result<MutexGuard<Outcome>> {

306

let guard = self.outcome.lock().expect("In check_active.");

307

match *guard {

308

Outcome::Unknown => Ok(guard),

309

_ => Err(Error::Km(ErrorCode::INVALID_OPERATION_HANDLE)).context(format!(

310

"In check_active: Call on finalized operation with outcome: {:?}.",

*guard

)),

}

}

// This function checks the amount of input data sent to us. We reject any buffer

317

// exceeding MAX_RECEIVE_DATA bytes as input to `update`, `update_aad`, and `finish`

318

// in order to force clients into using reasonable limits.

319

fn check_input_length(data: &[u8]) -> Result<()> {

320

if data.len() > MAX_RECEIVE_DATA {

321

// This error code is unique, no context required here.

322

return Err(anyhow!(Error::Rc(ResponseCode::TOO_MUCH_DATA)));

}

Ok(())

}

// Update the last usage to now.

328

fn touch(&self) {

329

// Expect safety:

330

// `last_usage` is locked only for primitive single line statements.

331

// There is no chance to panic and poison the mutex.

332

*self.last_usage.lock().expect("In touch.") = Instant::now();

333

}

334

335

/// Implementation of `IKeystoreOperation::updateAad`.

336

/// Refer to the AIDL spec at system/hardware/interfaces/keystore2 for details.

337

fn update_aad(&self, aad_input: &[u8]) -> Result<()> {

338

let mut outcome = self.check_active().context("In update_aad")?;

339

Self::check_input_length(aad_input).context("In update_aad")?;

340

self.touch();

341

Janis Danisevskis

2020-10-23 16:12:59 -0700

[diff] [blame]

342

let params = KeyParameterArray {

343

params: vec![KmParam {

344

tag: Tag::ASSOCIATED_DATA,

Janis Danisevskis

398e6be

2020-12-17 09:29:25 -0800

[diff] [blame]

345

value: KmParamValue::Blob(aad_input.into()),

Janis Danisevskis

2020-10-23 16:12:59 -0700

[diff] [blame]

346

}],

347

};

Janis Danisevskis

2020-08-10 14:58:08 -0700

[diff] [blame]

348

Janis Danisevskis

2020-10-23 16:12:59 -0700

[diff] [blame]

349

let mut out_params: Option<KeyParameterArray> = None;

350

let mut output: Option<ByteArray> = None;

Janis Danisevskis

2020-08-10 14:58:08 -0700

[diff] [blame]

351

352

let km_op: Box<dyn IKeyMintOperation> =

353

self.km_op.get_interface().context("In update: Failed to get KeyMintOperation.")?;

self.update_outcome(

&mut *outcome,

map_km_error(km_op.update(

Janis Danisevskis

2020-10-23 16:12:59 -0700

[diff] [blame]

358

Some(&params),

359

None,

360

// TODO Get auth token from enforcement module if required.

361

None,

Janis Danisevskis

2021-01-05 10:37:22 -0800

[diff] [blame]

362

// TODO Get timestamp token from enforcement module if required.

Janis Danisevskis

2020-10-23 16:12:59 -0700

[diff] [blame]

363

None,

Janis Danisevskis

2020-08-10 14:58:08 -0700

[diff] [blame]

&mut out_params,

&mut output,

)),

)

.context("In update_aad: KeyMint::update failed.")?;

Ok(())

}

Hasini Gunasinghe

2020-11-17 23:08:39 +0000

[diff] [blame]

373

/// Based on the authorization information stored in the operation during create_operation(),

374

/// and any previous calls to update(), this function returns appropriate auth token and

Janis Danisevskis

2021-01-05 10:37:22 -0800

[diff] [blame]

375

/// timestamp token to be passed to keymint.

Hasini Gunasinghe

2020-11-17 23:08:39 +0000

[diff] [blame]

376

/// Note that the call to the global enforcement object happens only during the first call to

377

/// update or if finish() is called right after create_opertation.

378

fn handle_authorization<'a>(

379

auth_token_handler: &'a mut AuthTokenHandler,

380

key_params: Option<&Vec<KeyParameter>>,

381

op_challenge: Option<&OperationChallenge>,

Janis Danisevskis

2021-01-05 10:37:22 -0800

[diff] [blame]

382

) -> Result<(Option<&'a HardwareAuthToken>, Option<&'a TimeStampToken>)> {

Hasini Gunasinghe

2020-11-17 23:08:39 +0000

[diff] [blame]

383

// keystore performs authorization only if key parameters have been loaded during

384

// create_operation()

385

if let Some(key_parameters) = key_params {

386

match *auth_token_handler {

387

// this variant is found only in a first call to update or if finish is called

388

// right after create_operation.

389

AuthTokenHandler::OpAuthRequired => {

390

*auth_token_handler = ENFORCEMENTS

391

.authorize_update_or_finish(key_parameters.as_slice(), op_challenge)

392

.context("In handle_authorization.")?;

393

Ok((auth_token_handler.get_auth_token(), None))

394

}

395

// this variant is found only in a first call to update or if finish is called

396

// right after create_operation.

397

AuthTokenHandler::Channel(_)|

398

// this variant is found in every subsequent call to update/finish,

399

// unless the authorization is not required for the key

400

AuthTokenHandler::Token(_, _) => {

Janis Danisevskis

2021-01-05 10:37:22 -0800

[diff] [blame]

401

auth_token_handler.retrieve_auth_and_timestamp_tokens()

Hasini Gunasinghe

2020-11-17 23:08:39 +0000

[diff] [blame]

402

}

403

_ => Ok((None, None))

}

} else {

Ok((None, None))

}

}

Janis Danisevskis

2020-08-10 14:58:08 -0700

[diff] [blame]

410

/// Implementation of `IKeystoreOperation::update`.

411

/// Refer to the AIDL spec at system/hardware/interfaces/keystore2 for details.

412

fn update(&self, input: &[u8]) -> Result<Option<Vec<u8>>> {

413

let mut outcome = self.check_active().context("In update")?;

414

Self::check_input_length(input).context("In update")?;

415

self.touch();

416

Janis Danisevskis

2020-10-23 16:12:59 -0700

[diff] [blame]

417

let mut out_params: Option<KeyParameterArray> = None;

418

let mut output: Option<ByteArray> = None;

Janis Danisevskis

2020-08-10 14:58:08 -0700

[diff] [blame]

419

420

let km_op: Box<dyn IKeyMintOperation> =

421

self.km_op.get_interface().context("In update: Failed to get KeyMintOperation.")?;

422

Hasini Gunasinghe

2020-11-17 23:08:39 +0000

[diff] [blame]

423

let mut auth_handler = self.auth_token_handler.lock().unwrap();

Janis Danisevskis

2021-01-05 10:37:22 -0800

[diff] [blame]

424

let (auth_token_for_km, timestamp_token_for_km) = Self::handle_authorization(

Hasini Gunasinghe

2020-11-17 23:08:39 +0000

[diff] [blame]

425

&mut auth_handler,

426

self.key_params.as_ref(),

427

self.op_challenge.as_ref(),

428

)

429

.context("In update.")?;

430

Janis Danisevskis

2020-08-10 14:58:08 -0700

[diff] [blame]

431

self.update_outcome(

432

&mut *outcome,

433

map_km_error(km_op.update(

Janis Danisevskis

2020-10-23 16:12:59 -0700

[diff] [blame]

434

None,

435

Some(input),

Hasini Gunasinghe

2020-11-17 23:08:39 +0000

[diff] [blame]

436

auth_token_for_km,

Janis Danisevskis

2021-01-05 10:37:22 -0800

[diff] [blame]

437

timestamp_token_for_km,

Janis Danisevskis

2020-08-10 14:58:08 -0700

[diff] [blame]

&mut out_params,

&mut output,

)),

)

.context("In update: KeyMint::update failed.")?;

443

Janis Danisevskis

2020-10-23 16:12:59 -0700

[diff] [blame]

444

match output {

Janis Danisevskis

3cfd4a4

2020-11-23 13:42:38 -0800

[diff] [blame]

445

Some(blob) => {

446

if blob.data.is_empty() {

Ok(None)

} else {

Ok(Some(blob.data))

}

}

Janis Danisevskis

2020-10-23 16:12:59 -0700

[diff] [blame]

452

None => Ok(None),

Janis Danisevskis

2020-08-10 14:58:08 -0700

[diff] [blame]

}

}

/// Implementation of `IKeystoreOperation::finish`.

457

/// Refer to the AIDL spec at system/hardware/interfaces/keystore2 for details.

458

fn finish(&self, input: Option<&[u8]>, signature: Option<&[u8]>) -> Result<Option<Vec<u8>>> {

459

let mut outcome = self.check_active().context("In finish")?;

460

if let Some(input) = input {

461

Self::check_input_length(input).context("In finish")?;

462

}

463

self.touch();

Janis Danisevskis

2020-08-10 14:58:08 -0700

[diff] [blame]

464

Janis Danisevskis

2020-10-23 16:12:59 -0700

[diff] [blame]

465

let mut out_params: Option<KeyParameterArray> = None;

Janis Danisevskis

2020-08-10 14:58:08 -0700

[diff] [blame]

466

467

let km_op: Box<dyn IKeyMintOperation> =

468

self.km_op.get_interface().context("In finish: Failed to get KeyMintOperation.")?;

469

Hasini Gunasinghe

2020-11-17 23:08:39 +0000

[diff] [blame]

470

let mut auth_handler = self.auth_token_handler.lock().unwrap();

Janis Danisevskis

2021-01-05 10:37:22 -0800

[diff] [blame]

471

let (auth_token_for_km, timestamp_token_for_km) = Self::handle_authorization(

Hasini Gunasinghe

2020-11-17 23:08:39 +0000

[diff] [blame]

472

&mut auth_handler,

473

self.key_params.as_ref(),

474

self.op_challenge.as_ref(),

475

)

476

.context("In finish.")?;

477

Janis Danisevskis

2020-10-23 16:12:59 -0700

[diff] [blame]

let output = self

.update_outcome(

&mut *outcome,

map_km_error(km_op.finish(

482

None,

483

input,

484

signature,

Hasini Gunasinghe

2020-11-17 23:08:39 +0000

[diff] [blame]

485

auth_token_for_km,

Janis Danisevskis

2021-01-05 10:37:22 -0800

[diff] [blame]

486

timestamp_token_for_km,

Janis Danisevskis

2020-10-23 16:12:59 -0700

[diff] [blame]

&mut out_params,

)),

)

.context("In finish: KeyMint::finish failed.")?;

Janis Danisevskis

2020-08-10 14:58:08 -0700

[diff] [blame]

491

492

// At this point the operation concluded successfully.

493

*outcome = Outcome::Success;

494

495

if output.is_empty() {

Ok(None)

} else {

Ok(Some(output))

}

}

/// Aborts the operation if it is active. IFF the operation is aborted the outcome is

503

/// set to `outcome`. `outcome` must reflect the reason for the abort. Since the operation

504

/// gets aborted `outcome` must not be `Operation::Success` or `Operation::Unknown`.

505

fn abort(&self, outcome: Outcome) -> Result<()> {

506

let mut locked_outcome = self.check_active().context("In abort")?;

507

*locked_outcome = outcome;

508

let km_op: Box<dyn IKeyMintOperation> =

509

self.km_op.get_interface().context("In abort: Failed to get KeyMintOperation.")?;

510

511

map_km_error(km_op.abort()).context("In abort: KeyMint::abort failed.")

}

}

impl Drop for Operation {

516

fn drop(&mut self) {

517

if let Ok(Outcome::Unknown) = self.outcome.get_mut() {

518

// If the operation was still active we call abort, setting

519

// the outcome to `Outcome::Dropped`

520

if let Err(e) = self.abort(Outcome::Dropped) {

521

log::error!("While dropping Operation: abort failed:\n {:?}", e);

}

}

}

}

/// The OperationDb holds weak references to all ongoing operations.

528

/// Its main purpose is to facilitate operation pruning.

529

#[derive(Debug, Default)]

530

pub struct OperationDb {

531

// TODO replace Vec with WeakTable when the weak_table crate becomes

532

// available.

533

operations: Mutex<Vec<Weak<Operation>>>,

}

impl OperationDb {

/// Creates a new OperationDb.

538

pub fn new() -> Self {

539

Self { operations: Mutex::new(Vec::new()) }

540

}

541

542

/// Creates a new operation.

543

/// This function takes a KeyMint operation and an associated

544

/// owner uid and returns a new Operation wrapped in a `std::sync::Arc`.

545

pub fn create_operation(

546

&self,

547

km_op: Box<dyn IKeyMintOperation>,

548

owner: u32,

Hasini Gunasinghe

2020-11-17 23:08:39 +0000

[diff] [blame]

549

auth_token_handler: AuthTokenHandler,

550

key_params: Option<Vec<KeyParameter>>,

551

op_challenge: Option<OperationChallenge>,

Janis Danisevskis

2020-08-10 14:58:08 -0700

[diff] [blame]

552

) -> Arc<Operation> {

553

// We use unwrap because we don't allow code that can panic while locked.

554

let mut operations = self.operations.lock().expect("In create_operation.");

555

556

let mut index: usize = 0;

557

// First we iterate through the operation slots to try and find an unused

558

// slot. If we don't find one, we append the new entry instead.

559

match (*operations).iter_mut().find(|s| {

560

index += 1;

561

s.upgrade().is_none()

562

}) {

563

Some(free_slot) => {

Hasini Gunasinghe

2020-11-17 23:08:39 +0000

[diff] [blame]

564

let new_op = Arc::new(Operation::new(

index - 1,

km_op,

owner,

auth_token_handler,

key_params,

op_challenge,

));

Janis Danisevskis

2020-08-10 14:58:08 -0700

[diff] [blame]

572

*free_slot = Arc::downgrade(&new_op);

573

new_op

574

}

575

None => {

Hasini Gunasinghe

2020-11-17 23:08:39 +0000

[diff] [blame]

576

let new_op = Arc::new(Operation::new(

operations.len(),

km_op,

owner,

auth_token_handler,

key_params,

op_challenge,

));

Janis Danisevskis

2020-08-10 14:58:08 -0700

[diff] [blame]

584

operations.push(Arc::downgrade(&new_op));

new_op

}

}

}

fn get(&self, index: usize) -> Option<Arc<Operation>> {

591

self.operations.lock().expect("In OperationDb::get.").get(index).and_then(|op| op.upgrade())

592

}

593

594

/// Attempts to prune an operation.

595

///

596

/// This function is used during operation creation, i.e., by

597

/// `KeystoreSecurityLevel::create_operation`, to try and free up an operation slot

598

/// if it got `ErrorCode::TOO_MANY_OPERATIONS` from the KeyMint backend. It is not

599

/// guaranteed that an operation slot is available after this call successfully

600

/// returned for various reasons. E.g., another thread may have snatched up the newly

601

/// available slot. Callers may have to call prune multiple times before they get a

602

/// free operation slot. Prune may also return `Err(Error::Rc(ResponseCode::BACKEND_BUSY))`

603

/// which indicates that no prunable operation was found.

604

///

605

/// To find a suitable candidate we compute the malus for the caller and each existing

606

/// operation. The malus is the inverse of the pruning power (caller) or pruning

607

/// resistance (existing operation).

Janis Danisevskis

2020-10-26 09:35:16 -0700

[diff] [blame]

608

///

Janis Danisevskis

2020-08-10 14:58:08 -0700

[diff] [blame]

609

/// The malus is based on the number of sibling operations and age. Sibling

610

/// operations are operations that have the same owner (UID).

Janis Danisevskis

2020-10-26 09:35:16 -0700

[diff] [blame]

611

///

Janis Danisevskis

2020-08-10 14:58:08 -0700

[diff] [blame]

612

/// Every operation, existing or new, starts with a malus of 1. Every sibling

613

/// increases the malus by one. The age is the time since an operation was last touched.

614

/// It increases the malus by log6(<age in seconds> + 1) rounded down to the next

615

/// integer. So the malus increases stepwise after 5s, 35s, 215s, ...

616

/// Of two operations with the same malus the least recently used one is considered

617

/// weaker.

Janis Danisevskis

2020-10-26 09:35:16 -0700

[diff] [blame]

618

///

Janis Danisevskis

2020-08-10 14:58:08 -0700

[diff] [blame]

619

/// For the caller to be able to prune an operation it must find an operation

620

/// with a malus higher than its own.

621

///

622

/// The malus can be expressed as

623

/// ```

624

/// malus = 1 + no_of_siblings + floor(log6(age_in_seconds + 1))

625

/// ```

626

/// where the constant `1` accounts for the operation under consideration.

627

/// In reality we compute it as

628

/// ```

629

/// caller_malus = 1 + running_siblings

630

/// ```

631

/// because the new operation has no age and is not included in the `running_siblings`,

632

/// and

633

/// ```

634

/// running_malus = running_siblings + floor(log6(age_in_seconds + 1))

635

/// ```

636

/// because a running operation is included in the `running_siblings` and it has

/// an age.

///

/// ## Example

/// A caller with no running operations has a malus of 1. Young (age < 5s) operations

641

/// also with no siblings have a malus of one and cannot be pruned by the caller.

642

/// We have to find an operation that has at least one sibling or is older than 5s.

643

///

644

/// A caller with one running operation has a malus of 2. Now even young siblings

645

/// or single child aging (5s <= age < 35s) operations are off limit. An aging

646

/// sibling of two, however, would have a malus of 3 and would be fair game.

647

///

648

/// ## Rationale

649

/// Due to the limitation of KeyMint operation slots, we cannot get around pruning or

650

/// a single app could easily DoS KeyMint.

651

/// Keystore 1.0 used to always prune the least recently used operation. This at least

652

/// guaranteed that new operations can always be started. With the increased usage

653

/// of Keystore we saw increased pruning activity which can lead to a livelock

654

/// situation in the worst case.

Janis Danisevskis

2020-10-26 09:35:16 -0700

[diff] [blame]

655

///

Janis Danisevskis

2020-08-10 14:58:08 -0700

[diff] [blame]

656

/// With the new pruning strategy we want to provide well behaved clients with

657

/// progress assurances while punishing DoS attempts. As a result of this

658

/// strategy we can be in the situation where no operation can be pruned and the

659

/// creation of a new operation fails. This allows single child operations which

660

/// are frequently updated to complete, thereby breaking up livelock situations

661

/// and facilitating system wide progress.

Janis Danisevskis

2020-10-26 09:35:16 -0700

[diff] [blame]

662

///

663

/// ## Update

664

/// We also allow callers to cannibalize their own sibling operations if no other

665

/// slot can be found. In this case the least recently used sibling is pruned.

Janis Danisevskis

2020-08-10 14:58:08 -0700

[diff] [blame]

666

pub fn prune(&self, caller: u32) -> Result<(), Error> {

667

loop {

668

// Maps the uid of the owner to the number of operations that owner has

669

// (running_siblings). More operations per owner lowers the pruning

670

// resistance of the operations of that owner. Whereas the number of

671

// ongoing operations of the caller lowers the pruning power of the caller.

672

let mut owners: HashMap<u32, u64> = HashMap::new();

673

let mut pruning_info: Vec<PruningInfo> = Vec::new();

674

675

let now = Instant::now();

676

self.operations

677

.lock()

678

.expect("In OperationDb::prune: Trying to lock self.operations.")

679

.iter()

680

.for_each(|op| {

681

if let Some(op) = op.upgrade() {

Janis Danisevskis

2020-10-26 09:35:16 -0700

[diff] [blame]

682

if let Some(p_info) = op.get_pruning_info() {

683

let owner = p_info.owner;

684

pruning_info.push(p_info);

685

// Count operations per owner.

686

*owners.entry(owner).or_insert(0) += 1;

687

}

Janis Danisevskis

2020-08-10 14:58:08 -0700

[diff] [blame]

}

});

let caller_malus = 1u64 + *owners.entry(caller).or_default();

692

693

// We iterate through all operations computing the malus and finding

694

// the candidate with the highest malus which must also be higher

695

// than the caller_malus.

696

struct CandidateInfo {

index: usize,

malus: u64,

last_usage: Instant,

age: Duration,

}

Janis Danisevskis

2020-10-26 09:35:16 -0700

[diff] [blame]

702

let mut oldest_caller_op: Option<CandidateInfo> = None;

Janis Danisevskis

2020-08-10 14:58:08 -0700

[diff] [blame]

703

let candidate = pruning_info.iter().fold(

704

None,

705

|acc: Option<CandidateInfo>, &PruningInfo { last_usage, owner, index }| {

706

// Compute the age of the current operation.

707

let age = now

708

.checked_duration_since(last_usage)

709

.unwrap_or_else(|| Duration::new(0, 0));

710

Janis Danisevskis

2020-10-26 09:35:16 -0700

[diff] [blame]

711

// Find the least recently used sibling as an alternative pruning candidate.

712

if owner == caller {

713

if let Some(CandidateInfo { age: a, .. }) = oldest_caller_op {

714

if age > a {

715

oldest_caller_op =

716

Some(CandidateInfo { index, malus: 0, last_usage, age });

}

} else {

oldest_caller_op =

Some(CandidateInfo { index, malus: 0, last_usage, age });

}

}

Janis Danisevskis

2020-08-10 14:58:08 -0700

[diff] [blame]

724

// Compute the malus of the current operation.

725

// Expect safety: Every owner in pruning_info was counted in

726

// the owners map. So this unwrap cannot panic.

727

let malus = *owners

728

.get(&owner)

729

.expect("This is odd. We should have counted every owner in pruning_info.")

730

+ ((age.as_secs() + 1) as f64).log(6.0).floor() as u64;

731

732

// Now check if the current operation is a viable/better candidate

733

// the one currently stored in the accumulator.

734

match acc {

735

// First we have to find any operation that is prunable by the caller.

736

None => {

737

if caller_malus < malus {

738

Some(CandidateInfo { index, malus, last_usage, age })

} else {

None

}

}

// If we have found one we look for the operation with the worst score.

744

// If there is a tie, the older operation is considered weaker.

745

Some(CandidateInfo { index: i, malus: m, last_usage: l, age: a }) => {

746

if malus > m || (malus == m && age > a) {

747

Some(CandidateInfo { index, malus, last_usage, age })

748

} else {

749

Some(CandidateInfo { index: i, malus: m, last_usage: l, age: a })

}

}

}

},

);

Janis Danisevskis

2020-10-26 09:35:16 -0700

[diff] [blame]

756

// If we did not find a suitable candidate we may cannibalize our oldest sibling.

757

let candidate = candidate.or(oldest_caller_op);

758

Janis Danisevskis