blob: ee689472f37664cb61cda46ec42f9c5d8b23a532 [file] [log] [blame]
Yifan Hongdad0af82020-02-19 17:19:49 -08001//
2// Copyright (C) 2020 The Android Open Source Project
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15//
16#include "update_engine/cleanup_previous_update_action.h"
17
Yifan Hongd976cc52020-02-25 14:51:42 -080018#include <chrono> // NOLINT(build/c++11) -- for merge times
Yifan Hongdad0af82020-02-19 17:19:49 -080019#include <functional>
20#include <string>
Yifan Hong24031712020-03-19 19:25:38 -070021#include <type_traits>
Yifan Hongdad0af82020-02-19 17:19:49 -080022
23#include <android-base/properties.h>
24#include <base/bind.h>
25
Yifan Hongd976cc52020-02-25 14:51:42 -080026#ifndef __ANDROID_RECOVERY__
27#include <statslog.h>
28#endif
29
Yifan Hongdad0af82020-02-19 17:19:49 -080030#include "update_engine/common/utils.h"
31#include "update_engine/payload_consumer/delta_performer.h"
32
33using android::snapshot::SnapshotManager;
Yifan Hongd976cc52020-02-25 14:51:42 -080034using android::snapshot::SnapshotMergeStats;
Yifan Hongdad0af82020-02-19 17:19:49 -080035using android::snapshot::UpdateState;
36using brillo::MessageLoop;
37
38constexpr char kBootCompletedProp[] = "sys.boot_completed";
39// Interval to check sys.boot_completed.
40constexpr auto kCheckBootCompletedInterval = base::TimeDelta::FromSeconds(2);
41// Interval to check IBootControl::isSlotMarkedSuccessful
42constexpr auto kCheckSlotMarkedSuccessfulInterval =
43 base::TimeDelta::FromSeconds(2);
44// Interval to call SnapshotManager::ProcessUpdateState
45constexpr auto kWaitForMergeInterval = base::TimeDelta::FromSeconds(2);
46
Yifan Hong5cd63fa2020-03-16 12:31:16 -070047#ifdef __ANDROID_RECOVERY__
48static constexpr bool kIsRecovery = true;
49#else
50static constexpr bool kIsRecovery = false;
51#endif
52
Yifan Hongdad0af82020-02-19 17:19:49 -080053namespace chromeos_update_engine {
54
55CleanupPreviousUpdateAction::CleanupPreviousUpdateAction(
56 PrefsInterface* prefs,
57 BootControlInterface* boot_control,
58 android::snapshot::SnapshotManager* snapshot,
59 CleanupPreviousUpdateActionDelegateInterface* delegate)
60 : prefs_(prefs),
61 boot_control_(boot_control),
62 snapshot_(snapshot),
63 delegate_(delegate),
64 running_(false),
65 cancel_failed_(false),
Yifan Hongd976cc52020-02-25 14:51:42 -080066 last_percentage_(0),
67 merge_stats_(SnapshotMergeStats::GetInstance(*snapshot)) {}
Yifan Hongdad0af82020-02-19 17:19:49 -080068
69void CleanupPreviousUpdateAction::PerformAction() {
70 ResumeAction();
71}
72
73void CleanupPreviousUpdateAction::TerminateProcessing() {
74 SuspendAction();
75}
76
77void CleanupPreviousUpdateAction::ResumeAction() {
78 CHECK(prefs_);
79 CHECK(boot_control_);
80
81 LOG(INFO) << "Starting/resuming CleanupPreviousUpdateAction";
82 running_ = true;
83 StartActionInternal();
84}
85
86void CleanupPreviousUpdateAction::SuspendAction() {
87 LOG(INFO) << "Stopping/suspending CleanupPreviousUpdateAction";
88 running_ = false;
89}
90
91void CleanupPreviousUpdateAction::ActionCompleted(ErrorCode error_code) {
92 running_ = false;
Yifan Hongd976cc52020-02-25 14:51:42 -080093 ReportMergeStats();
Yifan Hong5cd63fa2020-03-16 12:31:16 -070094 metadata_device_ = nullptr;
Yifan Hongdad0af82020-02-19 17:19:49 -080095}
96
97std::string CleanupPreviousUpdateAction::Type() const {
98 return StaticType();
99}
100
101std::string CleanupPreviousUpdateAction::StaticType() {
102 return "CleanupPreviousUpdateAction";
103}
104
105void CleanupPreviousUpdateAction::StartActionInternal() {
106 // Do nothing on non-VAB device.
107 if (!boot_control_->GetDynamicPartitionControl()
108 ->GetVirtualAbFeatureFlag()
109 .IsEnabled()) {
110 processor_->ActionComplete(this, ErrorCode::kSuccess);
111 return;
112 }
113 // SnapshotManager is only available on VAB devices.
114 CHECK(snapshot_);
115 WaitBootCompletedOrSchedule();
116}
117
118void CleanupPreviousUpdateAction::ScheduleWaitBootCompleted() {
119 TEST_AND_RETURN(running_);
120 MessageLoop::current()->PostDelayedTask(
121 FROM_HERE,
122 base::Bind(&CleanupPreviousUpdateAction::WaitBootCompletedOrSchedule,
123 base::Unretained(this)),
124 kCheckBootCompletedInterval);
125}
126
127void CleanupPreviousUpdateAction::WaitBootCompletedOrSchedule() {
128 TEST_AND_RETURN(running_);
Yifan Hong5cd63fa2020-03-16 12:31:16 -0700129 if (!kIsRecovery &&
130 !android::base::GetBoolProperty(kBootCompletedProp, false)) {
Yifan Hongdad0af82020-02-19 17:19:49 -0800131 // repeat
132 ScheduleWaitBootCompleted();
133 return;
134 }
135
136 LOG(INFO) << "Boot completed, waiting on markBootSuccessful()";
137 CheckSlotMarkedSuccessfulOrSchedule();
138}
139
140void CleanupPreviousUpdateAction::ScheduleWaitMarkBootSuccessful() {
141 TEST_AND_RETURN(running_);
142 MessageLoop::current()->PostDelayedTask(
143 FROM_HERE,
144 base::Bind(
145 &CleanupPreviousUpdateAction::CheckSlotMarkedSuccessfulOrSchedule,
146 base::Unretained(this)),
147 kCheckSlotMarkedSuccessfulInterval);
148}
149
150void CleanupPreviousUpdateAction::CheckSlotMarkedSuccessfulOrSchedule() {
151 TEST_AND_RETURN(running_);
Yifan Hong5cd63fa2020-03-16 12:31:16 -0700152 if (!kIsRecovery &&
153 !boot_control_->IsSlotMarkedSuccessful(boot_control_->GetCurrentSlot())) {
Yifan Hongdad0af82020-02-19 17:19:49 -0800154 ScheduleWaitMarkBootSuccessful();
155 }
Yifan Hong5cd63fa2020-03-16 12:31:16 -0700156
157 if (metadata_device_ == nullptr) {
158 metadata_device_ = snapshot_->EnsureMetadataMounted();
159 }
160
161 if (metadata_device_ == nullptr) {
162 LOG(ERROR) << "Failed to mount /metadata.";
163 processor_->ActionComplete(this, ErrorCode::kError);
164 return;
165 }
166
Yifan Hong24031712020-03-19 19:25:38 -0700167 if (kIsRecovery) {
168 auto snapshots_created =
169 snapshot_->RecoveryCreateSnapshotDevices(metadata_device_);
170 switch (snapshots_created) {
171 case android::snapshot::CreateResult::CREATED: {
172 // If previous update has not finished merging, snapshots exists and are
173 // created here so that ProcessUpdateState can proceed.
174 LOG(INFO) << "Snapshot devices are created";
175 break;
176 }
177 case android::snapshot::CreateResult::NOT_CREATED: {
178 // If there is no previous update, no snapshot devices are created and
179 // ProcessUpdateState will return immediately. Hence, NOT_CREATED is not
180 // considered an error.
181 LOG(INFO) << "Snapshot devices are not created";
182 break;
183 }
184 case android::snapshot::CreateResult::ERROR:
185 default: {
186 LOG(ERROR)
187 << "Failed to create snapshot devices (CreateResult = "
188 << static_cast<
189 std::underlying_type_t<android::snapshot::CreateResult>>(
190 snapshots_created);
191 processor_->ActionComplete(this, ErrorCode::kError);
192 return;
193 }
194 }
195 }
196
Yifan Hongd976cc52020-02-25 14:51:42 -0800197 if (!merge_stats_->Start()) {
198 // Not an error because CleanupPreviousUpdateAction may be paused and
199 // resumed while kernel continues merging snapshots in the background.
200 LOG(WARNING) << "SnapshotMergeStats::Start failed.";
201 }
Yifan Hongdad0af82020-02-19 17:19:49 -0800202 LOG(INFO) << "Waiting for any previous merge request to complete. "
203 << "This can take up to several minutes.";
204 WaitForMergeOrSchedule();
205}
206
207void CleanupPreviousUpdateAction::ScheduleWaitForMerge() {
208 TEST_AND_RETURN(running_);
209 MessageLoop::current()->PostDelayedTask(
210 FROM_HERE,
211 base::Bind(&CleanupPreviousUpdateAction::WaitForMergeOrSchedule,
212 base::Unretained(this)),
213 kWaitForMergeInterval);
214}
215
216void CleanupPreviousUpdateAction::WaitForMergeOrSchedule() {
217 TEST_AND_RETURN(running_);
218 auto state = snapshot_->ProcessUpdateState(
219 std::bind(&CleanupPreviousUpdateAction::OnMergePercentageUpdate, this),
220 std::bind(&CleanupPreviousUpdateAction::BeforeCancel, this));
Yifan Hongd976cc52020-02-25 14:51:42 -0800221 merge_stats_->set_state(state);
Yifan Hongdad0af82020-02-19 17:19:49 -0800222
Yifan Hongdad0af82020-02-19 17:19:49 -0800223 switch (state) {
224 case UpdateState::None: {
225 LOG(INFO) << "Can't find any snapshot to merge.";
Yifan Hong16b594a2020-03-05 21:02:36 -0800226 ErrorCode error_code = ErrorCode::kSuccess;
227 if (!snapshot_->CancelUpdate()) {
228 error_code = ErrorCode::kError;
229 LOG(INFO) << "Failed to call SnapshotManager::CancelUpdate().";
230 }
231 processor_->ActionComplete(this, error_code);
Yifan Hongdad0af82020-02-19 17:19:49 -0800232 return;
233 }
234
235 case UpdateState::Initiated: {
236 LOG(ERROR) << "Previous update has not been completed, not cleaning up";
237 processor_->ActionComplete(this, ErrorCode::kSuccess);
238 return;
239 }
240
241 case UpdateState::Unverified: {
242 InitiateMergeAndWait();
243 return;
244 }
245
246 case UpdateState::Merging: {
247 ScheduleWaitForMerge();
248 return;
249 }
250
251 case UpdateState::MergeNeedsReboot: {
252 LOG(ERROR) << "Need reboot to finish merging.";
253 processor_->ActionComplete(this, ErrorCode::kError);
254 return;
255 }
256
257 case UpdateState::MergeCompleted: {
258 LOG(INFO) << "Merge finished with state MergeCompleted.";
259 processor_->ActionComplete(this, ErrorCode::kSuccess);
260 return;
261 }
262
263 case UpdateState::MergeFailed: {
264 LOG(ERROR) << "Merge failed. Device may be corrupted.";
265 processor_->ActionComplete(this, ErrorCode::kDeviceCorrupted);
266 return;
267 }
268
269 case UpdateState::Cancelled: {
270 // DeltaPerformer::ResetUpdateProgress failed, hence snapshots are
271 // not deleted to avoid inconsistency.
272 // Nothing can be done here; just try next time.
273 ErrorCode error_code =
274 cancel_failed_ ? ErrorCode::kError : ErrorCode::kSuccess;
275 processor_->ActionComplete(this, error_code);
276 return;
277 }
278
279 default: {
280 // Protobuf has some reserved enum values, so a default case is needed.
281 LOG(FATAL) << "SnapshotManager::ProcessUpdateState returns "
282 << static_cast<int32_t>(state);
283 }
284 }
285}
286
287bool CleanupPreviousUpdateAction::OnMergePercentageUpdate() {
288 double percentage = 0.0;
289 snapshot_->GetUpdateState(&percentage);
290 if (delegate_) {
291 // libsnapshot uses [0, 100] percentage but update_engine uses [0, 1].
292 delegate_->OnCleanupProgressUpdate(percentage / 100);
293 }
294
295 // Log if percentage increments by at least 1.
296 if (last_percentage_ < static_cast<unsigned int>(percentage)) {
297 last_percentage_ = percentage;
298 LOG(INFO) << "Waiting for merge to complete: " << last_percentage_ << "%.";
299 }
300
301 // Do not continue to wait for merge. Instead, let ProcessUpdateState
302 // return Merging directly so that we can ScheduleWaitForMerge() in
303 // MessageLoop.
304 return false;
305}
306
307bool CleanupPreviousUpdateAction::BeforeCancel() {
308 if (DeltaPerformer::ResetUpdateProgress(
309 prefs_,
310 false /* quick */,
311 false /* skip dynamic partitions metadata*/)) {
312 return true;
313 }
314
315 // ResetUpdateProgress might not work on stub prefs. Do additional checks.
316 LOG(WARNING) << "ProcessUpdateState returns Cancelled but cleanup failed.";
317
318 std::string val;
319 ignore_result(prefs_->GetString(kPrefsDynamicPartitionMetadataUpdated, &val));
320 if (val.empty()) {
321 LOG(INFO) << kPrefsDynamicPartitionMetadataUpdated
322 << " is empty, assuming successful cleanup";
323 return true;
324 }
325 LOG(WARNING)
326 << kPrefsDynamicPartitionMetadataUpdated << " is " << val
327 << ", not deleting snapshots even though UpdateState is Cancelled.";
328 cancel_failed_ = true;
329 return false;
330}
331
332void CleanupPreviousUpdateAction::InitiateMergeAndWait() {
333 TEST_AND_RETURN(running_);
334 LOG(INFO) << "Attempting to initiate merge.";
335
336 if (snapshot_->InitiateMerge()) {
337 WaitForMergeOrSchedule();
338 return;
339 }
340
341 LOG(WARNING) << "InitiateMerge failed.";
342 auto state = snapshot_->GetUpdateState();
Yifan Hongd976cc52020-02-25 14:51:42 -0800343 merge_stats_->set_state(state);
Yifan Hongdad0af82020-02-19 17:19:49 -0800344 if (state == UpdateState::Unverified) {
345 // We are stuck at unverified state. This can happen if the update has
346 // been applied, but it has not even been attempted yet (in libsnapshot,
347 // rollback indicator does not exist); for example, if update_engine
348 // restarts before the device reboots, then this state may be reached.
349 // Nothing should be done here.
350 LOG(WARNING) << "InitiateMerge leaves the device at "
351 << "UpdateState::Unverified. (Did update_engine "
352 << "restarted?)";
353 processor_->ActionComplete(this, ErrorCode::kSuccess);
354 return;
355 }
356
357 // State does seems to be advanced.
358 // It is possibly racy. For example, on a userdebug build, the user may
359 // manually initiate a merge with snapshotctl between last time
360 // update_engine checks UpdateState. Hence, just call
361 // WaitForMergeOrSchedule one more time.
362 LOG(WARNING) << "IniitateMerge failed but GetUpdateState returned "
363 << android::snapshot::UpdateState_Name(state)
364 << ", try to wait for merge again.";
365 WaitForMergeOrSchedule();
366 return;
367}
368
Yifan Hongd976cc52020-02-25 14:51:42 -0800369void CleanupPreviousUpdateAction::ReportMergeStats() {
370 auto result = merge_stats_->Finish();
371 if (result == nullptr) {
372 LOG(WARNING) << "Not reporting merge stats because "
373 "SnapshotMergeStats::Finish failed.";
374 return;
375 }
376
377#ifdef __ANDROID_RECOVERY__
378 LOG(INFO) << "Skip reporting merge stats in recovery.";
379#else
380 const auto& report = result->report();
381
382 if (report.state() == UpdateState::None ||
383 report.state() == UpdateState::Initiated ||
384 report.state() == UpdateState::Unverified) {
385 LOG(INFO) << "Not reporting merge stats because state is "
386 << android::snapshot::UpdateState_Name(report.state());
387 return;
388 }
389
390 auto passed_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
391 result->merge_time());
392 LOG(INFO) << "Reporting merge stats: "
393 << android::snapshot::UpdateState_Name(report.state()) << " in "
394 << passed_ms.count() << "ms (resumed " << report.resume_count()
395 << " times)";
Yifan Hongbe59a002020-03-02 15:45:14 -0800396 android::util::stats_write(android::util::SNAPSHOT_MERGE_REPORTED,
397 static_cast<int32_t>(report.state()),
398 static_cast<int64_t>(passed_ms.count()),
399 static_cast<int32_t>(report.resume_count()));
Yifan Hongd976cc52020-02-25 14:51:42 -0800400#endif
401}
402
Yifan Hongdad0af82020-02-19 17:19:49 -0800403} // namespace chromeos_update_engine