blob: 89ed6f82548a57a1c117d6bce9ab37ec6e075b98 [file] [log] [blame]
Yifan Hongdad0af82020-02-19 17:19:49 -08001//
2// Copyright (C) 2020 The Android Open Source Project
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15//
16#include "update_engine/cleanup_previous_update_action.h"
17
Yifan Hongd976cc52020-02-25 14:51:42 -080018#include <chrono> // NOLINT(build/c++11) -- for merge times
Yifan Hongdad0af82020-02-19 17:19:49 -080019#include <functional>
20#include <string>
Yifan Hong24031712020-03-19 19:25:38 -070021#include <type_traits>
Yifan Hongdad0af82020-02-19 17:19:49 -080022
23#include <android-base/properties.h>
24#include <base/bind.h>
25
Yifan Hongd976cc52020-02-25 14:51:42 -080026#ifndef __ANDROID_RECOVERY__
27#include <statslog.h>
28#endif
29
Yifan Hongdad0af82020-02-19 17:19:49 -080030#include "update_engine/common/utils.h"
31#include "update_engine/payload_consumer/delta_performer.h"
32
Howard Chen224aed92020-04-17 11:22:13 +080033using android::base::GetBoolProperty;
Yifan Hongf9cb4492020-04-15 13:00:20 -070034using android::snapshot::ISnapshotManager;
Yifan Hongd976cc52020-02-25 14:51:42 -080035using android::snapshot::SnapshotMergeStats;
Yifan Hongdad0af82020-02-19 17:19:49 -080036using android::snapshot::UpdateState;
37using brillo::MessageLoop;
38
39constexpr char kBootCompletedProp[] = "sys.boot_completed";
40// Interval to check sys.boot_completed.
41constexpr auto kCheckBootCompletedInterval = base::TimeDelta::FromSeconds(2);
42// Interval to check IBootControl::isSlotMarkedSuccessful
43constexpr auto kCheckSlotMarkedSuccessfulInterval =
44 base::TimeDelta::FromSeconds(2);
45// Interval to call SnapshotManager::ProcessUpdateState
46constexpr auto kWaitForMergeInterval = base::TimeDelta::FromSeconds(2);
47
Yifan Hong5cd63fa2020-03-16 12:31:16 -070048#ifdef __ANDROID_RECOVERY__
49static constexpr bool kIsRecovery = true;
50#else
51static constexpr bool kIsRecovery = false;
52#endif
53
Yifan Hongdad0af82020-02-19 17:19:49 -080054namespace chromeos_update_engine {
55
56CleanupPreviousUpdateAction::CleanupPreviousUpdateAction(
57 PrefsInterface* prefs,
58 BootControlInterface* boot_control,
Yifan Hongf9cb4492020-04-15 13:00:20 -070059 android::snapshot::ISnapshotManager* snapshot,
Yifan Hongdad0af82020-02-19 17:19:49 -080060 CleanupPreviousUpdateActionDelegateInterface* delegate)
61 : prefs_(prefs),
62 boot_control_(boot_control),
63 snapshot_(snapshot),
64 delegate_(delegate),
65 running_(false),
66 cancel_failed_(false),
Yifan Hongd976cc52020-02-25 14:51:42 -080067 last_percentage_(0),
Yifan Hongf9cb4492020-04-15 13:00:20 -070068 merge_stats_(nullptr) {}
Yifan Hongdad0af82020-02-19 17:19:49 -080069
Yifan Hongd1d52a02020-09-25 15:09:07 -070070CleanupPreviousUpdateAction::~CleanupPreviousUpdateAction() {
71 StopActionInternal();
72}
73
Yifan Hongdad0af82020-02-19 17:19:49 -080074void CleanupPreviousUpdateAction::PerformAction() {
Yifan Hongd506dee2020-09-25 15:08:19 -070075 StartActionInternal();
Yifan Hongdad0af82020-02-19 17:19:49 -080076}
77
78void CleanupPreviousUpdateAction::TerminateProcessing() {
Yifan Hongd506dee2020-09-25 15:08:19 -070079 StopActionInternal();
Yifan Hongdad0af82020-02-19 17:19:49 -080080}
81
82void CleanupPreviousUpdateAction::ResumeAction() {
Yifan Hongdad0af82020-02-19 17:19:49 -080083 StartActionInternal();
84}
85
86void CleanupPreviousUpdateAction::SuspendAction() {
Yifan Hongd506dee2020-09-25 15:08:19 -070087 StopActionInternal();
Yifan Hongdad0af82020-02-19 17:19:49 -080088}
89
90void CleanupPreviousUpdateAction::ActionCompleted(ErrorCode error_code) {
Yifan Hongd506dee2020-09-25 15:08:19 -070091 StopActionInternal();
Yifan Hongd976cc52020-02-25 14:51:42 -080092 ReportMergeStats();
Yifan Hong5cd63fa2020-03-16 12:31:16 -070093 metadata_device_ = nullptr;
Yifan Hongdad0af82020-02-19 17:19:49 -080094}
95
96std::string CleanupPreviousUpdateAction::Type() const {
97 return StaticType();
98}
99
100std::string CleanupPreviousUpdateAction::StaticType() {
101 return "CleanupPreviousUpdateAction";
102}
103
Yifan Hongd1d52a02020-09-25 15:09:07 -0700104// This function is called at the beginning of all delayed functions. By
105// resetting |scheduled_task_|, the delayed function acknowledges that the task
106// has already been executed, therefore there's no need to cancel it in the
107// future. This avoids StopActionInternal() from resetting task IDs in an
108// unexpected way because task IDs could be reused.
109void CleanupPreviousUpdateAction::AcknowledgeTaskExecuted() {
110 if (scheduled_task_ != MessageLoop::kTaskIdNull) {
111 LOG(INFO) << "Executing task " << scheduled_task_;
112 }
113 scheduled_task_ = MessageLoop::kTaskIdNull;
114}
115
116// Check that scheduled_task_ is a valid task ID. Otherwise, terminate the
117// action.
118void CleanupPreviousUpdateAction::CheckTaskScheduled(std::string_view name) {
119 if (scheduled_task_ == MessageLoop::kTaskIdNull) {
120 LOG(ERROR) << "Unable to schedule " << name;
121 processor_->ActionComplete(this, ErrorCode::kError);
122 } else {
123 LOG(INFO) << "CleanupPreviousUpdateAction scheduled task ID "
124 << scheduled_task_ << " for " << name;
125 }
126}
127
Yifan Hongd506dee2020-09-25 15:08:19 -0700128void CleanupPreviousUpdateAction::StopActionInternal() {
129 LOG(INFO) << "Stopping/suspending/completing CleanupPreviousUpdateAction";
130 running_ = false;
Yifan Hongd1d52a02020-09-25 15:09:07 -0700131
132 if (scheduled_task_ != MessageLoop::kTaskIdNull) {
133 if (MessageLoop::current()->CancelTask(scheduled_task_)) {
134 LOG(INFO) << "CleanupPreviousUpdateAction cancelled pending task ID "
135 << scheduled_task_;
136 } else {
137 LOG(ERROR) << "CleanupPreviousUpdateAction unable to cancel task ID "
138 << scheduled_task_;
139 }
140 }
141 scheduled_task_ = MessageLoop::kTaskIdNull;
Yifan Hongd506dee2020-09-25 15:08:19 -0700142}
143
Yifan Hongdad0af82020-02-19 17:19:49 -0800144void CleanupPreviousUpdateAction::StartActionInternal() {
Yifan Hongd506dee2020-09-25 15:08:19 -0700145 CHECK(prefs_);
146 CHECK(boot_control_);
147
148 LOG(INFO) << "Starting/resuming CleanupPreviousUpdateAction";
149 running_ = true;
Yifan Hongdad0af82020-02-19 17:19:49 -0800150 // Do nothing on non-VAB device.
151 if (!boot_control_->GetDynamicPartitionControl()
152 ->GetVirtualAbFeatureFlag()
153 .IsEnabled()) {
154 processor_->ActionComplete(this, ErrorCode::kSuccess);
155 return;
156 }
Yifan Hongf9cb4492020-04-15 13:00:20 -0700157 // SnapshotManager must be available on VAB devices.
158 CHECK(snapshot_ != nullptr);
159 merge_stats_ = snapshot_->GetSnapshotMergeStatsInstance();
160 CHECK(merge_stats_ != nullptr);
Yifan Hongdad0af82020-02-19 17:19:49 -0800161 WaitBootCompletedOrSchedule();
162}
163
164void CleanupPreviousUpdateAction::ScheduleWaitBootCompleted() {
165 TEST_AND_RETURN(running_);
Yifan Hongd1d52a02020-09-25 15:09:07 -0700166 scheduled_task_ = MessageLoop::current()->PostDelayedTask(
Yifan Hongdad0af82020-02-19 17:19:49 -0800167 FROM_HERE,
168 base::Bind(&CleanupPreviousUpdateAction::WaitBootCompletedOrSchedule,
169 base::Unretained(this)),
170 kCheckBootCompletedInterval);
Yifan Hongd1d52a02020-09-25 15:09:07 -0700171 CheckTaskScheduled("WaitBootCompleted");
Yifan Hongdad0af82020-02-19 17:19:49 -0800172}
173
174void CleanupPreviousUpdateAction::WaitBootCompletedOrSchedule() {
Yifan Hongd1d52a02020-09-25 15:09:07 -0700175 AcknowledgeTaskExecuted();
Yifan Hongdad0af82020-02-19 17:19:49 -0800176 TEST_AND_RETURN(running_);
Yifan Hong5cd63fa2020-03-16 12:31:16 -0700177 if (!kIsRecovery &&
178 !android::base::GetBoolProperty(kBootCompletedProp, false)) {
Yifan Hongdad0af82020-02-19 17:19:49 -0800179 // repeat
180 ScheduleWaitBootCompleted();
181 return;
182 }
183
184 LOG(INFO) << "Boot completed, waiting on markBootSuccessful()";
185 CheckSlotMarkedSuccessfulOrSchedule();
186}
187
188void CleanupPreviousUpdateAction::ScheduleWaitMarkBootSuccessful() {
189 TEST_AND_RETURN(running_);
Yifan Hongd1d52a02020-09-25 15:09:07 -0700190 scheduled_task_ = MessageLoop::current()->PostDelayedTask(
Yifan Hongdad0af82020-02-19 17:19:49 -0800191 FROM_HERE,
192 base::Bind(
193 &CleanupPreviousUpdateAction::CheckSlotMarkedSuccessfulOrSchedule,
194 base::Unretained(this)),
195 kCheckSlotMarkedSuccessfulInterval);
Yifan Hongd1d52a02020-09-25 15:09:07 -0700196 CheckTaskScheduled("WaitMarkBootSuccessful");
Yifan Hongdad0af82020-02-19 17:19:49 -0800197}
198
199void CleanupPreviousUpdateAction::CheckSlotMarkedSuccessfulOrSchedule() {
Yifan Hongd1d52a02020-09-25 15:09:07 -0700200 AcknowledgeTaskExecuted();
Yifan Hongdad0af82020-02-19 17:19:49 -0800201 TEST_AND_RETURN(running_);
Yifan Hong5cd63fa2020-03-16 12:31:16 -0700202 if (!kIsRecovery &&
203 !boot_control_->IsSlotMarkedSuccessful(boot_control_->GetCurrentSlot())) {
Yifan Hongdad0af82020-02-19 17:19:49 -0800204 ScheduleWaitMarkBootSuccessful();
205 }
Yifan Hong5cd63fa2020-03-16 12:31:16 -0700206
207 if (metadata_device_ == nullptr) {
208 metadata_device_ = snapshot_->EnsureMetadataMounted();
209 }
210
211 if (metadata_device_ == nullptr) {
212 LOG(ERROR) << "Failed to mount /metadata.";
Yifan Hong4d7c5eb2020-04-03 11:31:50 -0700213 // If metadata is erased but not formatted, it is possible to not mount
214 // it in recovery. It is safe to skip CleanupPreviousUpdateAction.
215 processor_->ActionComplete(
216 this, kIsRecovery ? ErrorCode::kSuccess : ErrorCode::kError);
Yifan Hong5cd63fa2020-03-16 12:31:16 -0700217 return;
218 }
219
Yifan Hong24031712020-03-19 19:25:38 -0700220 if (kIsRecovery) {
221 auto snapshots_created =
222 snapshot_->RecoveryCreateSnapshotDevices(metadata_device_);
223 switch (snapshots_created) {
224 case android::snapshot::CreateResult::CREATED: {
225 // If previous update has not finished merging, snapshots exists and are
226 // created here so that ProcessUpdateState can proceed.
227 LOG(INFO) << "Snapshot devices are created";
228 break;
229 }
230 case android::snapshot::CreateResult::NOT_CREATED: {
231 // If there is no previous update, no snapshot devices are created and
232 // ProcessUpdateState will return immediately. Hence, NOT_CREATED is not
233 // considered an error.
234 LOG(INFO) << "Snapshot devices are not created";
235 break;
236 }
237 case android::snapshot::CreateResult::ERROR:
238 default: {
239 LOG(ERROR)
240 << "Failed to create snapshot devices (CreateResult = "
241 << static_cast<
242 std::underlying_type_t<android::snapshot::CreateResult>>(
243 snapshots_created);
244 processor_->ActionComplete(this, ErrorCode::kError);
245 return;
246 }
247 }
248 }
249
Yifan Hongd976cc52020-02-25 14:51:42 -0800250 if (!merge_stats_->Start()) {
251 // Not an error because CleanupPreviousUpdateAction may be paused and
252 // resumed while kernel continues merging snapshots in the background.
253 LOG(WARNING) << "SnapshotMergeStats::Start failed.";
254 }
Yifan Hongdad0af82020-02-19 17:19:49 -0800255 LOG(INFO) << "Waiting for any previous merge request to complete. "
256 << "This can take up to several minutes.";
257 WaitForMergeOrSchedule();
258}
259
260void CleanupPreviousUpdateAction::ScheduleWaitForMerge() {
261 TEST_AND_RETURN(running_);
Yifan Hongd1d52a02020-09-25 15:09:07 -0700262 scheduled_task_ = MessageLoop::current()->PostDelayedTask(
Yifan Hongdad0af82020-02-19 17:19:49 -0800263 FROM_HERE,
264 base::Bind(&CleanupPreviousUpdateAction::WaitForMergeOrSchedule,
265 base::Unretained(this)),
266 kWaitForMergeInterval);
Yifan Hongd1d52a02020-09-25 15:09:07 -0700267 CheckTaskScheduled("WaitForMerge");
Yifan Hongdad0af82020-02-19 17:19:49 -0800268}
269
270void CleanupPreviousUpdateAction::WaitForMergeOrSchedule() {
Yifan Hongd1d52a02020-09-25 15:09:07 -0700271 AcknowledgeTaskExecuted();
Yifan Hongdad0af82020-02-19 17:19:49 -0800272 TEST_AND_RETURN(running_);
273 auto state = snapshot_->ProcessUpdateState(
274 std::bind(&CleanupPreviousUpdateAction::OnMergePercentageUpdate, this),
275 std::bind(&CleanupPreviousUpdateAction::BeforeCancel, this));
Yifan Hongd976cc52020-02-25 14:51:42 -0800276 merge_stats_->set_state(state);
Yifan Hongdad0af82020-02-19 17:19:49 -0800277
Yifan Hongdad0af82020-02-19 17:19:49 -0800278 switch (state) {
279 case UpdateState::None: {
280 LOG(INFO) << "Can't find any snapshot to merge.";
Yifan Hong16b594a2020-03-05 21:02:36 -0800281 ErrorCode error_code = ErrorCode::kSuccess;
282 if (!snapshot_->CancelUpdate()) {
283 error_code = ErrorCode::kError;
284 LOG(INFO) << "Failed to call SnapshotManager::CancelUpdate().";
285 }
286 processor_->ActionComplete(this, error_code);
Yifan Hongdad0af82020-02-19 17:19:49 -0800287 return;
288 }
289
290 case UpdateState::Initiated: {
291 LOG(ERROR) << "Previous update has not been completed, not cleaning up";
292 processor_->ActionComplete(this, ErrorCode::kSuccess);
293 return;
294 }
295
296 case UpdateState::Unverified: {
297 InitiateMergeAndWait();
298 return;
299 }
300
301 case UpdateState::Merging: {
302 ScheduleWaitForMerge();
303 return;
304 }
305
306 case UpdateState::MergeNeedsReboot: {
307 LOG(ERROR) << "Need reboot to finish merging.";
308 processor_->ActionComplete(this, ErrorCode::kError);
309 return;
310 }
311
312 case UpdateState::MergeCompleted: {
313 LOG(INFO) << "Merge finished with state MergeCompleted.";
314 processor_->ActionComplete(this, ErrorCode::kSuccess);
315 return;
316 }
317
318 case UpdateState::MergeFailed: {
319 LOG(ERROR) << "Merge failed. Device may be corrupted.";
320 processor_->ActionComplete(this, ErrorCode::kDeviceCorrupted);
321 return;
322 }
323
324 case UpdateState::Cancelled: {
325 // DeltaPerformer::ResetUpdateProgress failed, hence snapshots are
326 // not deleted to avoid inconsistency.
327 // Nothing can be done here; just try next time.
328 ErrorCode error_code =
329 cancel_failed_ ? ErrorCode::kError : ErrorCode::kSuccess;
330 processor_->ActionComplete(this, error_code);
331 return;
332 }
333
334 default: {
335 // Protobuf has some reserved enum values, so a default case is needed.
336 LOG(FATAL) << "SnapshotManager::ProcessUpdateState returns "
337 << static_cast<int32_t>(state);
338 }
339 }
340}
341
342bool CleanupPreviousUpdateAction::OnMergePercentageUpdate() {
343 double percentage = 0.0;
344 snapshot_->GetUpdateState(&percentage);
345 if (delegate_) {
346 // libsnapshot uses [0, 100] percentage but update_engine uses [0, 1].
347 delegate_->OnCleanupProgressUpdate(percentage / 100);
348 }
349
350 // Log if percentage increments by at least 1.
351 if (last_percentage_ < static_cast<unsigned int>(percentage)) {
352 last_percentage_ = percentage;
353 LOG(INFO) << "Waiting for merge to complete: " << last_percentage_ << "%.";
354 }
355
356 // Do not continue to wait for merge. Instead, let ProcessUpdateState
357 // return Merging directly so that we can ScheduleWaitForMerge() in
358 // MessageLoop.
359 return false;
360}
361
362bool CleanupPreviousUpdateAction::BeforeCancel() {
363 if (DeltaPerformer::ResetUpdateProgress(
364 prefs_,
365 false /* quick */,
366 false /* skip dynamic partitions metadata*/)) {
367 return true;
368 }
369
370 // ResetUpdateProgress might not work on stub prefs. Do additional checks.
371 LOG(WARNING) << "ProcessUpdateState returns Cancelled but cleanup failed.";
372
373 std::string val;
374 ignore_result(prefs_->GetString(kPrefsDynamicPartitionMetadataUpdated, &val));
375 if (val.empty()) {
376 LOG(INFO) << kPrefsDynamicPartitionMetadataUpdated
377 << " is empty, assuming successful cleanup";
378 return true;
379 }
380 LOG(WARNING)
381 << kPrefsDynamicPartitionMetadataUpdated << " is " << val
382 << ", not deleting snapshots even though UpdateState is Cancelled.";
383 cancel_failed_ = true;
384 return false;
385}
386
387void CleanupPreviousUpdateAction::InitiateMergeAndWait() {
388 TEST_AND_RETURN(running_);
389 LOG(INFO) << "Attempting to initiate merge.";
Howard Chen224aed92020-04-17 11:22:13 +0800390 // suspend the VAB merge when running a DSU
391 if (GetBoolProperty("ro.gsid.image_running", false)) {
392 LOG(WARNING) << "Suspend the VAB merge when running a DSU.";
393 processor_->ActionComplete(this, ErrorCode::kError);
394 return;
395 }
Yifan Hongdad0af82020-02-19 17:19:49 -0800396
Alessio Balsini4ed05122020-05-26 22:17:03 +0100397 uint64_t cow_file_size;
398 if (snapshot_->InitiateMerge(&cow_file_size)) {
399 merge_stats_->set_cow_file_size(cow_file_size);
Yifan Hongdad0af82020-02-19 17:19:49 -0800400 WaitForMergeOrSchedule();
401 return;
402 }
403
404 LOG(WARNING) << "InitiateMerge failed.";
405 auto state = snapshot_->GetUpdateState();
Yifan Hongd976cc52020-02-25 14:51:42 -0800406 merge_stats_->set_state(state);
Yifan Hongdad0af82020-02-19 17:19:49 -0800407 if (state == UpdateState::Unverified) {
408 // We are stuck at unverified state. This can happen if the update has
409 // been applied, but it has not even been attempted yet (in libsnapshot,
410 // rollback indicator does not exist); for example, if update_engine
411 // restarts before the device reboots, then this state may be reached.
412 // Nothing should be done here.
413 LOG(WARNING) << "InitiateMerge leaves the device at "
414 << "UpdateState::Unverified. (Did update_engine "
415 << "restarted?)";
416 processor_->ActionComplete(this, ErrorCode::kSuccess);
417 return;
418 }
419
420 // State does seems to be advanced.
421 // It is possibly racy. For example, on a userdebug build, the user may
422 // manually initiate a merge with snapshotctl between last time
423 // update_engine checks UpdateState. Hence, just call
424 // WaitForMergeOrSchedule one more time.
425 LOG(WARNING) << "IniitateMerge failed but GetUpdateState returned "
426 << android::snapshot::UpdateState_Name(state)
427 << ", try to wait for merge again.";
428 WaitForMergeOrSchedule();
429 return;
430}
431
Yifan Hongd976cc52020-02-25 14:51:42 -0800432void CleanupPreviousUpdateAction::ReportMergeStats() {
433 auto result = merge_stats_->Finish();
434 if (result == nullptr) {
435 LOG(WARNING) << "Not reporting merge stats because "
436 "SnapshotMergeStats::Finish failed.";
437 return;
438 }
439
440#ifdef __ANDROID_RECOVERY__
441 LOG(INFO) << "Skip reporting merge stats in recovery.";
442#else
443 const auto& report = result->report();
444
445 if (report.state() == UpdateState::None ||
446 report.state() == UpdateState::Initiated ||
447 report.state() == UpdateState::Unverified) {
448 LOG(INFO) << "Not reporting merge stats because state is "
449 << android::snapshot::UpdateState_Name(report.state());
450 return;
451 }
452
453 auto passed_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
454 result->merge_time());
Alessio Balsini4ed05122020-05-26 22:17:03 +0100455
456 bool vab_retrofit = boot_control_->GetDynamicPartitionControl()
457 ->GetVirtualAbFeatureFlag()
458 .IsRetrofit();
459
Yifan Hongd976cc52020-02-25 14:51:42 -0800460 LOG(INFO) << "Reporting merge stats: "
461 << android::snapshot::UpdateState_Name(report.state()) << " in "
462 << passed_ms.count() << "ms (resumed " << report.resume_count()
Alessio Balsini4ed05122020-05-26 22:17:03 +0100463 << " times), using " << report.cow_file_size()
464 << " bytes of COW image.";
Yifan Hongbe59a002020-03-02 15:45:14 -0800465 android::util::stats_write(android::util::SNAPSHOT_MERGE_REPORTED,
466 static_cast<int32_t>(report.state()),
467 static_cast<int64_t>(passed_ms.count()),
Alessio Balsini4ed05122020-05-26 22:17:03 +0100468 static_cast<int32_t>(report.resume_count()),
469 vab_retrofit,
470 static_cast<int64_t>(report.cow_file_size()));
Yifan Hongd976cc52020-02-25 14:51:42 -0800471#endif
472}
473
Yifan Hongdad0af82020-02-19 17:19:49 -0800474} // namespace chromeos_update_engine