blob: 3a7a7502e95fd0f645dffce7f8a30898d52d151c [file] [log] [blame]
Yifan Hongdad0af82020-02-19 17:19:49 -08001//
2// Copyright (C) 2020 The Android Open Source Project
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15//
16#include "update_engine/cleanup_previous_update_action.h"
17
Yifan Hongd976cc52020-02-25 14:51:42 -080018#include <chrono> // NOLINT(build/c++11) -- for merge times
Yifan Hongdad0af82020-02-19 17:19:49 -080019#include <functional>
20#include <string>
Yifan Hong24031712020-03-19 19:25:38 -070021#include <type_traits>
Yifan Hongdad0af82020-02-19 17:19:49 -080022
23#include <android-base/properties.h>
24#include <base/bind.h>
25
Yifan Hongd976cc52020-02-25 14:51:42 -080026#ifndef __ANDROID_RECOVERY__
27#include <statslog.h>
28#endif
29
Yifan Hongdad0af82020-02-19 17:19:49 -080030#include "update_engine/common/utils.h"
31#include "update_engine/payload_consumer/delta_performer.h"
32
Howard Chen224aed92020-04-17 11:22:13 +080033using android::base::GetBoolProperty;
Yifan Hongf9cb4492020-04-15 13:00:20 -070034using android::snapshot::ISnapshotManager;
Yifan Hongd976cc52020-02-25 14:51:42 -080035using android::snapshot::SnapshotMergeStats;
Yifan Hongdad0af82020-02-19 17:19:49 -080036using android::snapshot::UpdateState;
37using brillo::MessageLoop;
38
39constexpr char kBootCompletedProp[] = "sys.boot_completed";
40// Interval to check sys.boot_completed.
41constexpr auto kCheckBootCompletedInterval = base::TimeDelta::FromSeconds(2);
42// Interval to check IBootControl::isSlotMarkedSuccessful
43constexpr auto kCheckSlotMarkedSuccessfulInterval =
44 base::TimeDelta::FromSeconds(2);
45// Interval to call SnapshotManager::ProcessUpdateState
46constexpr auto kWaitForMergeInterval = base::TimeDelta::FromSeconds(2);
47
Yifan Hong5cd63fa2020-03-16 12:31:16 -070048#ifdef __ANDROID_RECOVERY__
49static constexpr bool kIsRecovery = true;
50#else
51static constexpr bool kIsRecovery = false;
52#endif
53
Yifan Hongdad0af82020-02-19 17:19:49 -080054namespace chromeos_update_engine {
55
56CleanupPreviousUpdateAction::CleanupPreviousUpdateAction(
57 PrefsInterface* prefs,
58 BootControlInterface* boot_control,
Yifan Hongf9cb4492020-04-15 13:00:20 -070059 android::snapshot::ISnapshotManager* snapshot,
Yifan Hongdad0af82020-02-19 17:19:49 -080060 CleanupPreviousUpdateActionDelegateInterface* delegate)
61 : prefs_(prefs),
62 boot_control_(boot_control),
63 snapshot_(snapshot),
64 delegate_(delegate),
65 running_(false),
66 cancel_failed_(false),
Yifan Hongd976cc52020-02-25 14:51:42 -080067 last_percentage_(0),
Yifan Hongf9cb4492020-04-15 13:00:20 -070068 merge_stats_(nullptr) {}
Yifan Hongdad0af82020-02-19 17:19:49 -080069
70void CleanupPreviousUpdateAction::PerformAction() {
Yifan Hongd506dee2020-09-25 15:08:19 -070071 StartActionInternal();
Yifan Hongdad0af82020-02-19 17:19:49 -080072}
73
74void CleanupPreviousUpdateAction::TerminateProcessing() {
Yifan Hongd506dee2020-09-25 15:08:19 -070075 StopActionInternal();
Yifan Hongdad0af82020-02-19 17:19:49 -080076}
77
78void CleanupPreviousUpdateAction::ResumeAction() {
Yifan Hongdad0af82020-02-19 17:19:49 -080079 StartActionInternal();
80}
81
82void CleanupPreviousUpdateAction::SuspendAction() {
Yifan Hongd506dee2020-09-25 15:08:19 -070083 StopActionInternal();
Yifan Hongdad0af82020-02-19 17:19:49 -080084}
85
86void CleanupPreviousUpdateAction::ActionCompleted(ErrorCode error_code) {
Yifan Hongd506dee2020-09-25 15:08:19 -070087 StopActionInternal();
Yifan Hongd976cc52020-02-25 14:51:42 -080088 ReportMergeStats();
Yifan Hong5cd63fa2020-03-16 12:31:16 -070089 metadata_device_ = nullptr;
Yifan Hongdad0af82020-02-19 17:19:49 -080090}
91
92std::string CleanupPreviousUpdateAction::Type() const {
93 return StaticType();
94}
95
96std::string CleanupPreviousUpdateAction::StaticType() {
97 return "CleanupPreviousUpdateAction";
98}
99
Yifan Hongd506dee2020-09-25 15:08:19 -0700100void CleanupPreviousUpdateAction::StopActionInternal() {
101 LOG(INFO) << "Stopping/suspending/completing CleanupPreviousUpdateAction";
102 running_ = false;
103}
104
Yifan Hongdad0af82020-02-19 17:19:49 -0800105void CleanupPreviousUpdateAction::StartActionInternal() {
Yifan Hongd506dee2020-09-25 15:08:19 -0700106 CHECK(prefs_);
107 CHECK(boot_control_);
108
109 LOG(INFO) << "Starting/resuming CleanupPreviousUpdateAction";
110 running_ = true;
Yifan Hongdad0af82020-02-19 17:19:49 -0800111 // Do nothing on non-VAB device.
112 if (!boot_control_->GetDynamicPartitionControl()
113 ->GetVirtualAbFeatureFlag()
114 .IsEnabled()) {
115 processor_->ActionComplete(this, ErrorCode::kSuccess);
116 return;
117 }
Yifan Hongf9cb4492020-04-15 13:00:20 -0700118 // SnapshotManager must be available on VAB devices.
119 CHECK(snapshot_ != nullptr);
120 merge_stats_ = snapshot_->GetSnapshotMergeStatsInstance();
121 CHECK(merge_stats_ != nullptr);
Yifan Hongdad0af82020-02-19 17:19:49 -0800122 WaitBootCompletedOrSchedule();
123}
124
125void CleanupPreviousUpdateAction::ScheduleWaitBootCompleted() {
126 TEST_AND_RETURN(running_);
127 MessageLoop::current()->PostDelayedTask(
128 FROM_HERE,
129 base::Bind(&CleanupPreviousUpdateAction::WaitBootCompletedOrSchedule,
130 base::Unretained(this)),
131 kCheckBootCompletedInterval);
132}
133
134void CleanupPreviousUpdateAction::WaitBootCompletedOrSchedule() {
135 TEST_AND_RETURN(running_);
Yifan Hong5cd63fa2020-03-16 12:31:16 -0700136 if (!kIsRecovery &&
137 !android::base::GetBoolProperty(kBootCompletedProp, false)) {
Yifan Hongdad0af82020-02-19 17:19:49 -0800138 // repeat
139 ScheduleWaitBootCompleted();
140 return;
141 }
142
143 LOG(INFO) << "Boot completed, waiting on markBootSuccessful()";
144 CheckSlotMarkedSuccessfulOrSchedule();
145}
146
147void CleanupPreviousUpdateAction::ScheduleWaitMarkBootSuccessful() {
148 TEST_AND_RETURN(running_);
149 MessageLoop::current()->PostDelayedTask(
150 FROM_HERE,
151 base::Bind(
152 &CleanupPreviousUpdateAction::CheckSlotMarkedSuccessfulOrSchedule,
153 base::Unretained(this)),
154 kCheckSlotMarkedSuccessfulInterval);
155}
156
157void CleanupPreviousUpdateAction::CheckSlotMarkedSuccessfulOrSchedule() {
158 TEST_AND_RETURN(running_);
Yifan Hong5cd63fa2020-03-16 12:31:16 -0700159 if (!kIsRecovery &&
160 !boot_control_->IsSlotMarkedSuccessful(boot_control_->GetCurrentSlot())) {
Yifan Hongdad0af82020-02-19 17:19:49 -0800161 ScheduleWaitMarkBootSuccessful();
162 }
Yifan Hong5cd63fa2020-03-16 12:31:16 -0700163
164 if (metadata_device_ == nullptr) {
165 metadata_device_ = snapshot_->EnsureMetadataMounted();
166 }
167
168 if (metadata_device_ == nullptr) {
169 LOG(ERROR) << "Failed to mount /metadata.";
Yifan Hong4d7c5eb2020-04-03 11:31:50 -0700170 // If metadata is erased but not formatted, it is possible to not mount
171 // it in recovery. It is safe to skip CleanupPreviousUpdateAction.
172 processor_->ActionComplete(
173 this, kIsRecovery ? ErrorCode::kSuccess : ErrorCode::kError);
Yifan Hong5cd63fa2020-03-16 12:31:16 -0700174 return;
175 }
176
Yifan Hong24031712020-03-19 19:25:38 -0700177 if (kIsRecovery) {
178 auto snapshots_created =
179 snapshot_->RecoveryCreateSnapshotDevices(metadata_device_);
180 switch (snapshots_created) {
181 case android::snapshot::CreateResult::CREATED: {
182 // If previous update has not finished merging, snapshots exists and are
183 // created here so that ProcessUpdateState can proceed.
184 LOG(INFO) << "Snapshot devices are created";
185 break;
186 }
187 case android::snapshot::CreateResult::NOT_CREATED: {
188 // If there is no previous update, no snapshot devices are created and
189 // ProcessUpdateState will return immediately. Hence, NOT_CREATED is not
190 // considered an error.
191 LOG(INFO) << "Snapshot devices are not created";
192 break;
193 }
194 case android::snapshot::CreateResult::ERROR:
195 default: {
196 LOG(ERROR)
197 << "Failed to create snapshot devices (CreateResult = "
198 << static_cast<
199 std::underlying_type_t<android::snapshot::CreateResult>>(
200 snapshots_created);
201 processor_->ActionComplete(this, ErrorCode::kError);
202 return;
203 }
204 }
205 }
206
Yifan Hongd976cc52020-02-25 14:51:42 -0800207 if (!merge_stats_->Start()) {
208 // Not an error because CleanupPreviousUpdateAction may be paused and
209 // resumed while kernel continues merging snapshots in the background.
210 LOG(WARNING) << "SnapshotMergeStats::Start failed.";
211 }
Yifan Hongdad0af82020-02-19 17:19:49 -0800212 LOG(INFO) << "Waiting for any previous merge request to complete. "
213 << "This can take up to several minutes.";
214 WaitForMergeOrSchedule();
215}
216
217void CleanupPreviousUpdateAction::ScheduleWaitForMerge() {
218 TEST_AND_RETURN(running_);
219 MessageLoop::current()->PostDelayedTask(
220 FROM_HERE,
221 base::Bind(&CleanupPreviousUpdateAction::WaitForMergeOrSchedule,
222 base::Unretained(this)),
223 kWaitForMergeInterval);
224}
225
226void CleanupPreviousUpdateAction::WaitForMergeOrSchedule() {
227 TEST_AND_RETURN(running_);
228 auto state = snapshot_->ProcessUpdateState(
229 std::bind(&CleanupPreviousUpdateAction::OnMergePercentageUpdate, this),
230 std::bind(&CleanupPreviousUpdateAction::BeforeCancel, this));
Yifan Hongd976cc52020-02-25 14:51:42 -0800231 merge_stats_->set_state(state);
Yifan Hongdad0af82020-02-19 17:19:49 -0800232
Yifan Hongdad0af82020-02-19 17:19:49 -0800233 switch (state) {
234 case UpdateState::None: {
235 LOG(INFO) << "Can't find any snapshot to merge.";
Yifan Hong16b594a2020-03-05 21:02:36 -0800236 ErrorCode error_code = ErrorCode::kSuccess;
237 if (!snapshot_->CancelUpdate()) {
238 error_code = ErrorCode::kError;
239 LOG(INFO) << "Failed to call SnapshotManager::CancelUpdate().";
240 }
241 processor_->ActionComplete(this, error_code);
Yifan Hongdad0af82020-02-19 17:19:49 -0800242 return;
243 }
244
245 case UpdateState::Initiated: {
246 LOG(ERROR) << "Previous update has not been completed, not cleaning up";
247 processor_->ActionComplete(this, ErrorCode::kSuccess);
248 return;
249 }
250
251 case UpdateState::Unverified: {
252 InitiateMergeAndWait();
253 return;
254 }
255
256 case UpdateState::Merging: {
257 ScheduleWaitForMerge();
258 return;
259 }
260
261 case UpdateState::MergeNeedsReboot: {
262 LOG(ERROR) << "Need reboot to finish merging.";
263 processor_->ActionComplete(this, ErrorCode::kError);
264 return;
265 }
266
267 case UpdateState::MergeCompleted: {
268 LOG(INFO) << "Merge finished with state MergeCompleted.";
269 processor_->ActionComplete(this, ErrorCode::kSuccess);
270 return;
271 }
272
273 case UpdateState::MergeFailed: {
274 LOG(ERROR) << "Merge failed. Device may be corrupted.";
275 processor_->ActionComplete(this, ErrorCode::kDeviceCorrupted);
276 return;
277 }
278
279 case UpdateState::Cancelled: {
280 // DeltaPerformer::ResetUpdateProgress failed, hence snapshots are
281 // not deleted to avoid inconsistency.
282 // Nothing can be done here; just try next time.
283 ErrorCode error_code =
284 cancel_failed_ ? ErrorCode::kError : ErrorCode::kSuccess;
285 processor_->ActionComplete(this, error_code);
286 return;
287 }
288
289 default: {
290 // Protobuf has some reserved enum values, so a default case is needed.
291 LOG(FATAL) << "SnapshotManager::ProcessUpdateState returns "
292 << static_cast<int32_t>(state);
293 }
294 }
295}
296
297bool CleanupPreviousUpdateAction::OnMergePercentageUpdate() {
298 double percentage = 0.0;
299 snapshot_->GetUpdateState(&percentage);
300 if (delegate_) {
301 // libsnapshot uses [0, 100] percentage but update_engine uses [0, 1].
302 delegate_->OnCleanupProgressUpdate(percentage / 100);
303 }
304
305 // Log if percentage increments by at least 1.
306 if (last_percentage_ < static_cast<unsigned int>(percentage)) {
307 last_percentage_ = percentage;
308 LOG(INFO) << "Waiting for merge to complete: " << last_percentage_ << "%.";
309 }
310
311 // Do not continue to wait for merge. Instead, let ProcessUpdateState
312 // return Merging directly so that we can ScheduleWaitForMerge() in
313 // MessageLoop.
314 return false;
315}
316
317bool CleanupPreviousUpdateAction::BeforeCancel() {
318 if (DeltaPerformer::ResetUpdateProgress(
319 prefs_,
320 false /* quick */,
321 false /* skip dynamic partitions metadata*/)) {
322 return true;
323 }
324
325 // ResetUpdateProgress might not work on stub prefs. Do additional checks.
326 LOG(WARNING) << "ProcessUpdateState returns Cancelled but cleanup failed.";
327
328 std::string val;
329 ignore_result(prefs_->GetString(kPrefsDynamicPartitionMetadataUpdated, &val));
330 if (val.empty()) {
331 LOG(INFO) << kPrefsDynamicPartitionMetadataUpdated
332 << " is empty, assuming successful cleanup";
333 return true;
334 }
335 LOG(WARNING)
336 << kPrefsDynamicPartitionMetadataUpdated << " is " << val
337 << ", not deleting snapshots even though UpdateState is Cancelled.";
338 cancel_failed_ = true;
339 return false;
340}
341
342void CleanupPreviousUpdateAction::InitiateMergeAndWait() {
343 TEST_AND_RETURN(running_);
344 LOG(INFO) << "Attempting to initiate merge.";
Howard Chen224aed92020-04-17 11:22:13 +0800345 // suspend the VAB merge when running a DSU
346 if (GetBoolProperty("ro.gsid.image_running", false)) {
347 LOG(WARNING) << "Suspend the VAB merge when running a DSU.";
348 processor_->ActionComplete(this, ErrorCode::kError);
349 return;
350 }
Yifan Hongdad0af82020-02-19 17:19:49 -0800351
Alessio Balsini4ed05122020-05-26 22:17:03 +0100352 uint64_t cow_file_size;
353 if (snapshot_->InitiateMerge(&cow_file_size)) {
354 merge_stats_->set_cow_file_size(cow_file_size);
Yifan Hongdad0af82020-02-19 17:19:49 -0800355 WaitForMergeOrSchedule();
356 return;
357 }
358
359 LOG(WARNING) << "InitiateMerge failed.";
360 auto state = snapshot_->GetUpdateState();
Yifan Hongd976cc52020-02-25 14:51:42 -0800361 merge_stats_->set_state(state);
Yifan Hongdad0af82020-02-19 17:19:49 -0800362 if (state == UpdateState::Unverified) {
363 // We are stuck at unverified state. This can happen if the update has
364 // been applied, but it has not even been attempted yet (in libsnapshot,
365 // rollback indicator does not exist); for example, if update_engine
366 // restarts before the device reboots, then this state may be reached.
367 // Nothing should be done here.
368 LOG(WARNING) << "InitiateMerge leaves the device at "
369 << "UpdateState::Unverified. (Did update_engine "
370 << "restarted?)";
371 processor_->ActionComplete(this, ErrorCode::kSuccess);
372 return;
373 }
374
375 // State does seems to be advanced.
376 // It is possibly racy. For example, on a userdebug build, the user may
377 // manually initiate a merge with snapshotctl between last time
378 // update_engine checks UpdateState. Hence, just call
379 // WaitForMergeOrSchedule one more time.
380 LOG(WARNING) << "IniitateMerge failed but GetUpdateState returned "
381 << android::snapshot::UpdateState_Name(state)
382 << ", try to wait for merge again.";
383 WaitForMergeOrSchedule();
384 return;
385}
386
Yifan Hongd976cc52020-02-25 14:51:42 -0800387void CleanupPreviousUpdateAction::ReportMergeStats() {
388 auto result = merge_stats_->Finish();
389 if (result == nullptr) {
390 LOG(WARNING) << "Not reporting merge stats because "
391 "SnapshotMergeStats::Finish failed.";
392 return;
393 }
394
395#ifdef __ANDROID_RECOVERY__
396 LOG(INFO) << "Skip reporting merge stats in recovery.";
397#else
398 const auto& report = result->report();
399
400 if (report.state() == UpdateState::None ||
401 report.state() == UpdateState::Initiated ||
402 report.state() == UpdateState::Unverified) {
403 LOG(INFO) << "Not reporting merge stats because state is "
404 << android::snapshot::UpdateState_Name(report.state());
405 return;
406 }
407
408 auto passed_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
409 result->merge_time());
Alessio Balsini4ed05122020-05-26 22:17:03 +0100410
411 bool vab_retrofit = boot_control_->GetDynamicPartitionControl()
412 ->GetVirtualAbFeatureFlag()
413 .IsRetrofit();
414
Yifan Hongd976cc52020-02-25 14:51:42 -0800415 LOG(INFO) << "Reporting merge stats: "
416 << android::snapshot::UpdateState_Name(report.state()) << " in "
417 << passed_ms.count() << "ms (resumed " << report.resume_count()
Alessio Balsini4ed05122020-05-26 22:17:03 +0100418 << " times), using " << report.cow_file_size()
419 << " bytes of COW image.";
Yifan Hongbe59a002020-03-02 15:45:14 -0800420 android::util::stats_write(android::util::SNAPSHOT_MERGE_REPORTED,
421 static_cast<int32_t>(report.state()),
422 static_cast<int64_t>(passed_ms.count()),
Alessio Balsini4ed05122020-05-26 22:17:03 +0100423 static_cast<int32_t>(report.resume_count()),
424 vab_retrofit,
425 static_cast<int64_t>(report.cow_file_size()));
Yifan Hongd976cc52020-02-25 14:51:42 -0800426#endif
427}
428
Yifan Hongdad0af82020-02-19 17:19:49 -0800429} // namespace chromeos_update_engine