blob: 26cc6be98784a1d68eea9e5f03efcd6499c45d18 [file] [log] [blame]
Yifan Hongdad0af82020-02-19 17:19:49 -08001//
2// Copyright (C) 2020 The Android Open Source Project
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15//
16#include "update_engine/cleanup_previous_update_action.h"
17
Yifan Hongd976cc52020-02-25 14:51:42 -080018#include <chrono> // NOLINT(build/c++11) -- for merge times
Yifan Hongdad0af82020-02-19 17:19:49 -080019#include <functional>
20#include <string>
Yifan Hong24031712020-03-19 19:25:38 -070021#include <type_traits>
Yifan Hongdad0af82020-02-19 17:19:49 -080022
23#include <android-base/properties.h>
24#include <base/bind.h>
25
Yifan Hongd976cc52020-02-25 14:51:42 -080026#ifndef __ANDROID_RECOVERY__
27#include <statslog.h>
28#endif
29
Yifan Hongdad0af82020-02-19 17:19:49 -080030#include "update_engine/common/utils.h"
31#include "update_engine/payload_consumer/delta_performer.h"
32
33using android::snapshot::SnapshotManager;
Yifan Hongd976cc52020-02-25 14:51:42 -080034using android::snapshot::SnapshotMergeStats;
Yifan Hongdad0af82020-02-19 17:19:49 -080035using android::snapshot::UpdateState;
36using brillo::MessageLoop;
37
38constexpr char kBootCompletedProp[] = "sys.boot_completed";
39// Interval to check sys.boot_completed.
40constexpr auto kCheckBootCompletedInterval = base::TimeDelta::FromSeconds(2);
41// Interval to check IBootControl::isSlotMarkedSuccessful
42constexpr auto kCheckSlotMarkedSuccessfulInterval =
43 base::TimeDelta::FromSeconds(2);
44// Interval to call SnapshotManager::ProcessUpdateState
45constexpr auto kWaitForMergeInterval = base::TimeDelta::FromSeconds(2);
46
Yifan Hong5cd63fa2020-03-16 12:31:16 -070047#ifdef __ANDROID_RECOVERY__
48static constexpr bool kIsRecovery = true;
49#else
50static constexpr bool kIsRecovery = false;
51#endif
52
Yifan Hongdad0af82020-02-19 17:19:49 -080053namespace chromeos_update_engine {
54
55CleanupPreviousUpdateAction::CleanupPreviousUpdateAction(
56 PrefsInterface* prefs,
57 BootControlInterface* boot_control,
58 android::snapshot::SnapshotManager* snapshot,
59 CleanupPreviousUpdateActionDelegateInterface* delegate)
60 : prefs_(prefs),
61 boot_control_(boot_control),
62 snapshot_(snapshot),
63 delegate_(delegate),
64 running_(false),
65 cancel_failed_(false),
Yifan Hongd976cc52020-02-25 14:51:42 -080066 last_percentage_(0),
67 merge_stats_(SnapshotMergeStats::GetInstance(*snapshot)) {}
Yifan Hongdad0af82020-02-19 17:19:49 -080068
69void CleanupPreviousUpdateAction::PerformAction() {
70 ResumeAction();
71}
72
73void CleanupPreviousUpdateAction::TerminateProcessing() {
74 SuspendAction();
75}
76
77void CleanupPreviousUpdateAction::ResumeAction() {
78 CHECK(prefs_);
79 CHECK(boot_control_);
80
81 LOG(INFO) << "Starting/resuming CleanupPreviousUpdateAction";
82 running_ = true;
83 StartActionInternal();
84}
85
86void CleanupPreviousUpdateAction::SuspendAction() {
87 LOG(INFO) << "Stopping/suspending CleanupPreviousUpdateAction";
88 running_ = false;
89}
90
91void CleanupPreviousUpdateAction::ActionCompleted(ErrorCode error_code) {
92 running_ = false;
Yifan Hongd976cc52020-02-25 14:51:42 -080093 ReportMergeStats();
Yifan Hong5cd63fa2020-03-16 12:31:16 -070094 metadata_device_ = nullptr;
Yifan Hongdad0af82020-02-19 17:19:49 -080095}
96
97std::string CleanupPreviousUpdateAction::Type() const {
98 return StaticType();
99}
100
101std::string CleanupPreviousUpdateAction::StaticType() {
102 return "CleanupPreviousUpdateAction";
103}
104
105void CleanupPreviousUpdateAction::StartActionInternal() {
106 // Do nothing on non-VAB device.
107 if (!boot_control_->GetDynamicPartitionControl()
108 ->GetVirtualAbFeatureFlag()
109 .IsEnabled()) {
110 processor_->ActionComplete(this, ErrorCode::kSuccess);
111 return;
112 }
113 // SnapshotManager is only available on VAB devices.
114 CHECK(snapshot_);
115 WaitBootCompletedOrSchedule();
116}
117
118void CleanupPreviousUpdateAction::ScheduleWaitBootCompleted() {
119 TEST_AND_RETURN(running_);
120 MessageLoop::current()->PostDelayedTask(
121 FROM_HERE,
122 base::Bind(&CleanupPreviousUpdateAction::WaitBootCompletedOrSchedule,
123 base::Unretained(this)),
124 kCheckBootCompletedInterval);
125}
126
127void CleanupPreviousUpdateAction::WaitBootCompletedOrSchedule() {
128 TEST_AND_RETURN(running_);
Yifan Hong5cd63fa2020-03-16 12:31:16 -0700129 if (!kIsRecovery &&
130 !android::base::GetBoolProperty(kBootCompletedProp, false)) {
Yifan Hongdad0af82020-02-19 17:19:49 -0800131 // repeat
132 ScheduleWaitBootCompleted();
133 return;
134 }
135
136 LOG(INFO) << "Boot completed, waiting on markBootSuccessful()";
137 CheckSlotMarkedSuccessfulOrSchedule();
138}
139
140void CleanupPreviousUpdateAction::ScheduleWaitMarkBootSuccessful() {
141 TEST_AND_RETURN(running_);
142 MessageLoop::current()->PostDelayedTask(
143 FROM_HERE,
144 base::Bind(
145 &CleanupPreviousUpdateAction::CheckSlotMarkedSuccessfulOrSchedule,
146 base::Unretained(this)),
147 kCheckSlotMarkedSuccessfulInterval);
148}
149
150void CleanupPreviousUpdateAction::CheckSlotMarkedSuccessfulOrSchedule() {
151 TEST_AND_RETURN(running_);
Yifan Hong5cd63fa2020-03-16 12:31:16 -0700152 if (!kIsRecovery &&
153 !boot_control_->IsSlotMarkedSuccessful(boot_control_->GetCurrentSlot())) {
Yifan Hongdad0af82020-02-19 17:19:49 -0800154 ScheduleWaitMarkBootSuccessful();
155 }
Yifan Hong5cd63fa2020-03-16 12:31:16 -0700156
157 if (metadata_device_ == nullptr) {
158 metadata_device_ = snapshot_->EnsureMetadataMounted();
159 }
160
161 if (metadata_device_ == nullptr) {
162 LOG(ERROR) << "Failed to mount /metadata.";
Yifan Hong4d7c5eb2020-04-03 11:31:50 -0700163 // If metadata is erased but not formatted, it is possible to not mount
164 // it in recovery. It is safe to skip CleanupPreviousUpdateAction.
165 processor_->ActionComplete(
166 this, kIsRecovery ? ErrorCode::kSuccess : ErrorCode::kError);
Yifan Hong5cd63fa2020-03-16 12:31:16 -0700167 return;
168 }
169
Yifan Hong24031712020-03-19 19:25:38 -0700170 if (kIsRecovery) {
171 auto snapshots_created =
172 snapshot_->RecoveryCreateSnapshotDevices(metadata_device_);
173 switch (snapshots_created) {
174 case android::snapshot::CreateResult::CREATED: {
175 // If previous update has not finished merging, snapshots exists and are
176 // created here so that ProcessUpdateState can proceed.
177 LOG(INFO) << "Snapshot devices are created";
178 break;
179 }
180 case android::snapshot::CreateResult::NOT_CREATED: {
181 // If there is no previous update, no snapshot devices are created and
182 // ProcessUpdateState will return immediately. Hence, NOT_CREATED is not
183 // considered an error.
184 LOG(INFO) << "Snapshot devices are not created";
185 break;
186 }
187 case android::snapshot::CreateResult::ERROR:
188 default: {
189 LOG(ERROR)
190 << "Failed to create snapshot devices (CreateResult = "
191 << static_cast<
192 std::underlying_type_t<android::snapshot::CreateResult>>(
193 snapshots_created);
194 processor_->ActionComplete(this, ErrorCode::kError);
195 return;
196 }
197 }
198 }
199
Yifan Hongd976cc52020-02-25 14:51:42 -0800200 if (!merge_stats_->Start()) {
201 // Not an error because CleanupPreviousUpdateAction may be paused and
202 // resumed while kernel continues merging snapshots in the background.
203 LOG(WARNING) << "SnapshotMergeStats::Start failed.";
204 }
Yifan Hongdad0af82020-02-19 17:19:49 -0800205 LOG(INFO) << "Waiting for any previous merge request to complete. "
206 << "This can take up to several minutes.";
207 WaitForMergeOrSchedule();
208}
209
210void CleanupPreviousUpdateAction::ScheduleWaitForMerge() {
211 TEST_AND_RETURN(running_);
212 MessageLoop::current()->PostDelayedTask(
213 FROM_HERE,
214 base::Bind(&CleanupPreviousUpdateAction::WaitForMergeOrSchedule,
215 base::Unretained(this)),
216 kWaitForMergeInterval);
217}
218
219void CleanupPreviousUpdateAction::WaitForMergeOrSchedule() {
220 TEST_AND_RETURN(running_);
221 auto state = snapshot_->ProcessUpdateState(
222 std::bind(&CleanupPreviousUpdateAction::OnMergePercentageUpdate, this),
223 std::bind(&CleanupPreviousUpdateAction::BeforeCancel, this));
Yifan Hongd976cc52020-02-25 14:51:42 -0800224 merge_stats_->set_state(state);
Yifan Hongdad0af82020-02-19 17:19:49 -0800225
Yifan Hongdad0af82020-02-19 17:19:49 -0800226 switch (state) {
227 case UpdateState::None: {
228 LOG(INFO) << "Can't find any snapshot to merge.";
Yifan Hong16b594a2020-03-05 21:02:36 -0800229 ErrorCode error_code = ErrorCode::kSuccess;
230 if (!snapshot_->CancelUpdate()) {
231 error_code = ErrorCode::kError;
232 LOG(INFO) << "Failed to call SnapshotManager::CancelUpdate().";
233 }
234 processor_->ActionComplete(this, error_code);
Yifan Hongdad0af82020-02-19 17:19:49 -0800235 return;
236 }
237
238 case UpdateState::Initiated: {
239 LOG(ERROR) << "Previous update has not been completed, not cleaning up";
240 processor_->ActionComplete(this, ErrorCode::kSuccess);
241 return;
242 }
243
244 case UpdateState::Unverified: {
245 InitiateMergeAndWait();
246 return;
247 }
248
249 case UpdateState::Merging: {
250 ScheduleWaitForMerge();
251 return;
252 }
253
254 case UpdateState::MergeNeedsReboot: {
255 LOG(ERROR) << "Need reboot to finish merging.";
256 processor_->ActionComplete(this, ErrorCode::kError);
257 return;
258 }
259
260 case UpdateState::MergeCompleted: {
261 LOG(INFO) << "Merge finished with state MergeCompleted.";
262 processor_->ActionComplete(this, ErrorCode::kSuccess);
263 return;
264 }
265
266 case UpdateState::MergeFailed: {
267 LOG(ERROR) << "Merge failed. Device may be corrupted.";
268 processor_->ActionComplete(this, ErrorCode::kDeviceCorrupted);
269 return;
270 }
271
272 case UpdateState::Cancelled: {
273 // DeltaPerformer::ResetUpdateProgress failed, hence snapshots are
274 // not deleted to avoid inconsistency.
275 // Nothing can be done here; just try next time.
276 ErrorCode error_code =
277 cancel_failed_ ? ErrorCode::kError : ErrorCode::kSuccess;
278 processor_->ActionComplete(this, error_code);
279 return;
280 }
281
282 default: {
283 // Protobuf has some reserved enum values, so a default case is needed.
284 LOG(FATAL) << "SnapshotManager::ProcessUpdateState returns "
285 << static_cast<int32_t>(state);
286 }
287 }
288}
289
290bool CleanupPreviousUpdateAction::OnMergePercentageUpdate() {
291 double percentage = 0.0;
292 snapshot_->GetUpdateState(&percentage);
293 if (delegate_) {
294 // libsnapshot uses [0, 100] percentage but update_engine uses [0, 1].
295 delegate_->OnCleanupProgressUpdate(percentage / 100);
296 }
297
298 // Log if percentage increments by at least 1.
299 if (last_percentage_ < static_cast<unsigned int>(percentage)) {
300 last_percentage_ = percentage;
301 LOG(INFO) << "Waiting for merge to complete: " << last_percentage_ << "%.";
302 }
303
304 // Do not continue to wait for merge. Instead, let ProcessUpdateState
305 // return Merging directly so that we can ScheduleWaitForMerge() in
306 // MessageLoop.
307 return false;
308}
309
310bool CleanupPreviousUpdateAction::BeforeCancel() {
311 if (DeltaPerformer::ResetUpdateProgress(
312 prefs_,
313 false /* quick */,
314 false /* skip dynamic partitions metadata*/)) {
315 return true;
316 }
317
318 // ResetUpdateProgress might not work on stub prefs. Do additional checks.
319 LOG(WARNING) << "ProcessUpdateState returns Cancelled but cleanup failed.";
320
321 std::string val;
322 ignore_result(prefs_->GetString(kPrefsDynamicPartitionMetadataUpdated, &val));
323 if (val.empty()) {
324 LOG(INFO) << kPrefsDynamicPartitionMetadataUpdated
325 << " is empty, assuming successful cleanup";
326 return true;
327 }
328 LOG(WARNING)
329 << kPrefsDynamicPartitionMetadataUpdated << " is " << val
330 << ", not deleting snapshots even though UpdateState is Cancelled.";
331 cancel_failed_ = true;
332 return false;
333}
334
335void CleanupPreviousUpdateAction::InitiateMergeAndWait() {
336 TEST_AND_RETURN(running_);
337 LOG(INFO) << "Attempting to initiate merge.";
338
339 if (snapshot_->InitiateMerge()) {
340 WaitForMergeOrSchedule();
341 return;
342 }
343
344 LOG(WARNING) << "InitiateMerge failed.";
345 auto state = snapshot_->GetUpdateState();
Yifan Hongd976cc52020-02-25 14:51:42 -0800346 merge_stats_->set_state(state);
Yifan Hongdad0af82020-02-19 17:19:49 -0800347 if (state == UpdateState::Unverified) {
348 // We are stuck at unverified state. This can happen if the update has
349 // been applied, but it has not even been attempted yet (in libsnapshot,
350 // rollback indicator does not exist); for example, if update_engine
351 // restarts before the device reboots, then this state may be reached.
352 // Nothing should be done here.
353 LOG(WARNING) << "InitiateMerge leaves the device at "
354 << "UpdateState::Unverified. (Did update_engine "
355 << "restarted?)";
356 processor_->ActionComplete(this, ErrorCode::kSuccess);
357 return;
358 }
359
360 // State does seems to be advanced.
361 // It is possibly racy. For example, on a userdebug build, the user may
362 // manually initiate a merge with snapshotctl between last time
363 // update_engine checks UpdateState. Hence, just call
364 // WaitForMergeOrSchedule one more time.
365 LOG(WARNING) << "IniitateMerge failed but GetUpdateState returned "
366 << android::snapshot::UpdateState_Name(state)
367 << ", try to wait for merge again.";
368 WaitForMergeOrSchedule();
369 return;
370}
371
Yifan Hongd976cc52020-02-25 14:51:42 -0800372void CleanupPreviousUpdateAction::ReportMergeStats() {
373 auto result = merge_stats_->Finish();
374 if (result == nullptr) {
375 LOG(WARNING) << "Not reporting merge stats because "
376 "SnapshotMergeStats::Finish failed.";
377 return;
378 }
379
380#ifdef __ANDROID_RECOVERY__
381 LOG(INFO) << "Skip reporting merge stats in recovery.";
382#else
383 const auto& report = result->report();
384
385 if (report.state() == UpdateState::None ||
386 report.state() == UpdateState::Initiated ||
387 report.state() == UpdateState::Unverified) {
388 LOG(INFO) << "Not reporting merge stats because state is "
389 << android::snapshot::UpdateState_Name(report.state());
390 return;
391 }
392
393 auto passed_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
394 result->merge_time());
395 LOG(INFO) << "Reporting merge stats: "
396 << android::snapshot::UpdateState_Name(report.state()) << " in "
397 << passed_ms.count() << "ms (resumed " << report.resume_count()
398 << " times)";
Yifan Hongbe59a002020-03-02 15:45:14 -0800399 android::util::stats_write(android::util::SNAPSHOT_MERGE_REPORTED,
400 static_cast<int32_t>(report.state()),
401 static_cast<int64_t>(passed_ms.count()),
402 static_cast<int32_t>(report.resume_count()));
Yifan Hongd976cc52020-02-25 14:51:42 -0800403#endif
404}
405
Yifan Hongdad0af82020-02-19 17:19:49 -0800406} // namespace chromeos_update_engine