blob: 6490ca8cbf8d12c3aa756b041e9826190bdade48 [file] [log] [blame]
Yifan Hongdad0af82020-02-19 17:19:49 -08001//
2// Copyright (C) 2020 The Android Open Source Project
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15//
16#include "update_engine/cleanup_previous_update_action.h"
17
Yifan Hongd976cc52020-02-25 14:51:42 -080018#include <chrono> // NOLINT(build/c++11) -- for merge times
Yifan Hongdad0af82020-02-19 17:19:49 -080019#include <functional>
20#include <string>
Yifan Hong24031712020-03-19 19:25:38 -070021#include <type_traits>
Yifan Hongdad0af82020-02-19 17:19:49 -080022
23#include <android-base/properties.h>
24#include <base/bind.h>
25
Yifan Hongd976cc52020-02-25 14:51:42 -080026#ifndef __ANDROID_RECOVERY__
27#include <statslog.h>
28#endif
29
Yifan Hongdad0af82020-02-19 17:19:49 -080030#include "update_engine/common/utils.h"
31#include "update_engine/payload_consumer/delta_performer.h"
32
Yifan Hongf9cb4492020-04-15 13:00:20 -070033using android::snapshot::ISnapshotManager;
Yifan Hongd976cc52020-02-25 14:51:42 -080034using android::snapshot::SnapshotMergeStats;
Yifan Hongdad0af82020-02-19 17:19:49 -080035using android::snapshot::UpdateState;
36using brillo::MessageLoop;
37
38constexpr char kBootCompletedProp[] = "sys.boot_completed";
39// Interval to check sys.boot_completed.
40constexpr auto kCheckBootCompletedInterval = base::TimeDelta::FromSeconds(2);
41// Interval to check IBootControl::isSlotMarkedSuccessful
42constexpr auto kCheckSlotMarkedSuccessfulInterval =
43 base::TimeDelta::FromSeconds(2);
44// Interval to call SnapshotManager::ProcessUpdateState
45constexpr auto kWaitForMergeInterval = base::TimeDelta::FromSeconds(2);
46
Yifan Hong5cd63fa2020-03-16 12:31:16 -070047#ifdef __ANDROID_RECOVERY__
48static constexpr bool kIsRecovery = true;
49#else
50static constexpr bool kIsRecovery = false;
51#endif
52
Yifan Hongdad0af82020-02-19 17:19:49 -080053namespace chromeos_update_engine {
54
55CleanupPreviousUpdateAction::CleanupPreviousUpdateAction(
56 PrefsInterface* prefs,
57 BootControlInterface* boot_control,
Yifan Hongf9cb4492020-04-15 13:00:20 -070058 android::snapshot::ISnapshotManager* snapshot,
Yifan Hongdad0af82020-02-19 17:19:49 -080059 CleanupPreviousUpdateActionDelegateInterface* delegate)
60 : prefs_(prefs),
61 boot_control_(boot_control),
62 snapshot_(snapshot),
63 delegate_(delegate),
64 running_(false),
65 cancel_failed_(false),
Yifan Hongd976cc52020-02-25 14:51:42 -080066 last_percentage_(0),
Yifan Hongf9cb4492020-04-15 13:00:20 -070067 merge_stats_(nullptr) {}
Yifan Hongdad0af82020-02-19 17:19:49 -080068
69void CleanupPreviousUpdateAction::PerformAction() {
70 ResumeAction();
71}
72
73void CleanupPreviousUpdateAction::TerminateProcessing() {
74 SuspendAction();
75}
76
77void CleanupPreviousUpdateAction::ResumeAction() {
78 CHECK(prefs_);
79 CHECK(boot_control_);
80
81 LOG(INFO) << "Starting/resuming CleanupPreviousUpdateAction";
82 running_ = true;
83 StartActionInternal();
84}
85
86void CleanupPreviousUpdateAction::SuspendAction() {
87 LOG(INFO) << "Stopping/suspending CleanupPreviousUpdateAction";
88 running_ = false;
89}
90
91void CleanupPreviousUpdateAction::ActionCompleted(ErrorCode error_code) {
92 running_ = false;
Yifan Hongd976cc52020-02-25 14:51:42 -080093 ReportMergeStats();
Yifan Hong5cd63fa2020-03-16 12:31:16 -070094 metadata_device_ = nullptr;
Yifan Hongdad0af82020-02-19 17:19:49 -080095}
96
97std::string CleanupPreviousUpdateAction::Type() const {
98 return StaticType();
99}
100
101std::string CleanupPreviousUpdateAction::StaticType() {
102 return "CleanupPreviousUpdateAction";
103}
104
105void CleanupPreviousUpdateAction::StartActionInternal() {
106 // Do nothing on non-VAB device.
107 if (!boot_control_->GetDynamicPartitionControl()
108 ->GetVirtualAbFeatureFlag()
109 .IsEnabled()) {
110 processor_->ActionComplete(this, ErrorCode::kSuccess);
111 return;
112 }
Yifan Hongf9cb4492020-04-15 13:00:20 -0700113 // SnapshotManager must be available on VAB devices.
114 CHECK(snapshot_ != nullptr);
115 merge_stats_ = snapshot_->GetSnapshotMergeStatsInstance();
116 CHECK(merge_stats_ != nullptr);
Yifan Hongdad0af82020-02-19 17:19:49 -0800117 WaitBootCompletedOrSchedule();
118}
119
120void CleanupPreviousUpdateAction::ScheduleWaitBootCompleted() {
121 TEST_AND_RETURN(running_);
122 MessageLoop::current()->PostDelayedTask(
123 FROM_HERE,
124 base::Bind(&CleanupPreviousUpdateAction::WaitBootCompletedOrSchedule,
125 base::Unretained(this)),
126 kCheckBootCompletedInterval);
127}
128
129void CleanupPreviousUpdateAction::WaitBootCompletedOrSchedule() {
130 TEST_AND_RETURN(running_);
Yifan Hong5cd63fa2020-03-16 12:31:16 -0700131 if (!kIsRecovery &&
132 !android::base::GetBoolProperty(kBootCompletedProp, false)) {
Yifan Hongdad0af82020-02-19 17:19:49 -0800133 // repeat
134 ScheduleWaitBootCompleted();
135 return;
136 }
137
138 LOG(INFO) << "Boot completed, waiting on markBootSuccessful()";
139 CheckSlotMarkedSuccessfulOrSchedule();
140}
141
142void CleanupPreviousUpdateAction::ScheduleWaitMarkBootSuccessful() {
143 TEST_AND_RETURN(running_);
144 MessageLoop::current()->PostDelayedTask(
145 FROM_HERE,
146 base::Bind(
147 &CleanupPreviousUpdateAction::CheckSlotMarkedSuccessfulOrSchedule,
148 base::Unretained(this)),
149 kCheckSlotMarkedSuccessfulInterval);
150}
151
152void CleanupPreviousUpdateAction::CheckSlotMarkedSuccessfulOrSchedule() {
153 TEST_AND_RETURN(running_);
Yifan Hong5cd63fa2020-03-16 12:31:16 -0700154 if (!kIsRecovery &&
155 !boot_control_->IsSlotMarkedSuccessful(boot_control_->GetCurrentSlot())) {
Yifan Hongdad0af82020-02-19 17:19:49 -0800156 ScheduleWaitMarkBootSuccessful();
157 }
Yifan Hong5cd63fa2020-03-16 12:31:16 -0700158
159 if (metadata_device_ == nullptr) {
160 metadata_device_ = snapshot_->EnsureMetadataMounted();
161 }
162
163 if (metadata_device_ == nullptr) {
164 LOG(ERROR) << "Failed to mount /metadata.";
Yifan Hong4d7c5eb2020-04-03 11:31:50 -0700165 // If metadata is erased but not formatted, it is possible to not mount
166 // it in recovery. It is safe to skip CleanupPreviousUpdateAction.
167 processor_->ActionComplete(
168 this, kIsRecovery ? ErrorCode::kSuccess : ErrorCode::kError);
Yifan Hong5cd63fa2020-03-16 12:31:16 -0700169 return;
170 }
171
Yifan Hong24031712020-03-19 19:25:38 -0700172 if (kIsRecovery) {
173 auto snapshots_created =
174 snapshot_->RecoveryCreateSnapshotDevices(metadata_device_);
175 switch (snapshots_created) {
176 case android::snapshot::CreateResult::CREATED: {
177 // If previous update has not finished merging, snapshots exists and are
178 // created here so that ProcessUpdateState can proceed.
179 LOG(INFO) << "Snapshot devices are created";
180 break;
181 }
182 case android::snapshot::CreateResult::NOT_CREATED: {
183 // If there is no previous update, no snapshot devices are created and
184 // ProcessUpdateState will return immediately. Hence, NOT_CREATED is not
185 // considered an error.
186 LOG(INFO) << "Snapshot devices are not created";
187 break;
188 }
189 case android::snapshot::CreateResult::ERROR:
190 default: {
191 LOG(ERROR)
192 << "Failed to create snapshot devices (CreateResult = "
193 << static_cast<
194 std::underlying_type_t<android::snapshot::CreateResult>>(
195 snapshots_created);
196 processor_->ActionComplete(this, ErrorCode::kError);
197 return;
198 }
199 }
200 }
201
Yifan Hongd976cc52020-02-25 14:51:42 -0800202 if (!merge_stats_->Start()) {
203 // Not an error because CleanupPreviousUpdateAction may be paused and
204 // resumed while kernel continues merging snapshots in the background.
205 LOG(WARNING) << "SnapshotMergeStats::Start failed.";
206 }
Yifan Hongdad0af82020-02-19 17:19:49 -0800207 LOG(INFO) << "Waiting for any previous merge request to complete. "
208 << "This can take up to several minutes.";
209 WaitForMergeOrSchedule();
210}
211
212void CleanupPreviousUpdateAction::ScheduleWaitForMerge() {
213 TEST_AND_RETURN(running_);
214 MessageLoop::current()->PostDelayedTask(
215 FROM_HERE,
216 base::Bind(&CleanupPreviousUpdateAction::WaitForMergeOrSchedule,
217 base::Unretained(this)),
218 kWaitForMergeInterval);
219}
220
221void CleanupPreviousUpdateAction::WaitForMergeOrSchedule() {
222 TEST_AND_RETURN(running_);
223 auto state = snapshot_->ProcessUpdateState(
224 std::bind(&CleanupPreviousUpdateAction::OnMergePercentageUpdate, this),
225 std::bind(&CleanupPreviousUpdateAction::BeforeCancel, this));
Yifan Hongd976cc52020-02-25 14:51:42 -0800226 merge_stats_->set_state(state);
Yifan Hongdad0af82020-02-19 17:19:49 -0800227
Yifan Hongdad0af82020-02-19 17:19:49 -0800228 switch (state) {
229 case UpdateState::None: {
230 LOG(INFO) << "Can't find any snapshot to merge.";
Yifan Hong16b594a2020-03-05 21:02:36 -0800231 ErrorCode error_code = ErrorCode::kSuccess;
232 if (!snapshot_->CancelUpdate()) {
233 error_code = ErrorCode::kError;
234 LOG(INFO) << "Failed to call SnapshotManager::CancelUpdate().";
235 }
236 processor_->ActionComplete(this, error_code);
Yifan Hongdad0af82020-02-19 17:19:49 -0800237 return;
238 }
239
240 case UpdateState::Initiated: {
241 LOG(ERROR) << "Previous update has not been completed, not cleaning up";
242 processor_->ActionComplete(this, ErrorCode::kSuccess);
243 return;
244 }
245
246 case UpdateState::Unverified: {
247 InitiateMergeAndWait();
248 return;
249 }
250
251 case UpdateState::Merging: {
252 ScheduleWaitForMerge();
253 return;
254 }
255
256 case UpdateState::MergeNeedsReboot: {
257 LOG(ERROR) << "Need reboot to finish merging.";
258 processor_->ActionComplete(this, ErrorCode::kError);
259 return;
260 }
261
262 case UpdateState::MergeCompleted: {
263 LOG(INFO) << "Merge finished with state MergeCompleted.";
264 processor_->ActionComplete(this, ErrorCode::kSuccess);
265 return;
266 }
267
268 case UpdateState::MergeFailed: {
269 LOG(ERROR) << "Merge failed. Device may be corrupted.";
270 processor_->ActionComplete(this, ErrorCode::kDeviceCorrupted);
271 return;
272 }
273
274 case UpdateState::Cancelled: {
275 // DeltaPerformer::ResetUpdateProgress failed, hence snapshots are
276 // not deleted to avoid inconsistency.
277 // Nothing can be done here; just try next time.
278 ErrorCode error_code =
279 cancel_failed_ ? ErrorCode::kError : ErrorCode::kSuccess;
280 processor_->ActionComplete(this, error_code);
281 return;
282 }
283
284 default: {
285 // Protobuf has some reserved enum values, so a default case is needed.
286 LOG(FATAL) << "SnapshotManager::ProcessUpdateState returns "
287 << static_cast<int32_t>(state);
288 }
289 }
290}
291
292bool CleanupPreviousUpdateAction::OnMergePercentageUpdate() {
293 double percentage = 0.0;
294 snapshot_->GetUpdateState(&percentage);
295 if (delegate_) {
296 // libsnapshot uses [0, 100] percentage but update_engine uses [0, 1].
297 delegate_->OnCleanupProgressUpdate(percentage / 100);
298 }
299
300 // Log if percentage increments by at least 1.
301 if (last_percentage_ < static_cast<unsigned int>(percentage)) {
302 last_percentage_ = percentage;
303 LOG(INFO) << "Waiting for merge to complete: " << last_percentage_ << "%.";
304 }
305
306 // Do not continue to wait for merge. Instead, let ProcessUpdateState
307 // return Merging directly so that we can ScheduleWaitForMerge() in
308 // MessageLoop.
309 return false;
310}
311
312bool CleanupPreviousUpdateAction::BeforeCancel() {
313 if (DeltaPerformer::ResetUpdateProgress(
314 prefs_,
315 false /* quick */,
316 false /* skip dynamic partitions metadata*/)) {
317 return true;
318 }
319
320 // ResetUpdateProgress might not work on stub prefs. Do additional checks.
321 LOG(WARNING) << "ProcessUpdateState returns Cancelled but cleanup failed.";
322
323 std::string val;
324 ignore_result(prefs_->GetString(kPrefsDynamicPartitionMetadataUpdated, &val));
325 if (val.empty()) {
326 LOG(INFO) << kPrefsDynamicPartitionMetadataUpdated
327 << " is empty, assuming successful cleanup";
328 return true;
329 }
330 LOG(WARNING)
331 << kPrefsDynamicPartitionMetadataUpdated << " is " << val
332 << ", not deleting snapshots even though UpdateState is Cancelled.";
333 cancel_failed_ = true;
334 return false;
335}
336
337void CleanupPreviousUpdateAction::InitiateMergeAndWait() {
338 TEST_AND_RETURN(running_);
339 LOG(INFO) << "Attempting to initiate merge.";
340
341 if (snapshot_->InitiateMerge()) {
342 WaitForMergeOrSchedule();
343 return;
344 }
345
346 LOG(WARNING) << "InitiateMerge failed.";
347 auto state = snapshot_->GetUpdateState();
Yifan Hongd976cc52020-02-25 14:51:42 -0800348 merge_stats_->set_state(state);
Yifan Hongdad0af82020-02-19 17:19:49 -0800349 if (state == UpdateState::Unverified) {
350 // We are stuck at unverified state. This can happen if the update has
351 // been applied, but it has not even been attempted yet (in libsnapshot,
352 // rollback indicator does not exist); for example, if update_engine
353 // restarts before the device reboots, then this state may be reached.
354 // Nothing should be done here.
355 LOG(WARNING) << "InitiateMerge leaves the device at "
356 << "UpdateState::Unverified. (Did update_engine "
357 << "restarted?)";
358 processor_->ActionComplete(this, ErrorCode::kSuccess);
359 return;
360 }
361
362 // State does seems to be advanced.
363 // It is possibly racy. For example, on a userdebug build, the user may
364 // manually initiate a merge with snapshotctl between last time
365 // update_engine checks UpdateState. Hence, just call
366 // WaitForMergeOrSchedule one more time.
367 LOG(WARNING) << "IniitateMerge failed but GetUpdateState returned "
368 << android::snapshot::UpdateState_Name(state)
369 << ", try to wait for merge again.";
370 WaitForMergeOrSchedule();
371 return;
372}
373
Yifan Hongd976cc52020-02-25 14:51:42 -0800374void CleanupPreviousUpdateAction::ReportMergeStats() {
375 auto result = merge_stats_->Finish();
376 if (result == nullptr) {
377 LOG(WARNING) << "Not reporting merge stats because "
378 "SnapshotMergeStats::Finish failed.";
379 return;
380 }
381
382#ifdef __ANDROID_RECOVERY__
383 LOG(INFO) << "Skip reporting merge stats in recovery.";
384#else
385 const auto& report = result->report();
386
387 if (report.state() == UpdateState::None ||
388 report.state() == UpdateState::Initiated ||
389 report.state() == UpdateState::Unverified) {
390 LOG(INFO) << "Not reporting merge stats because state is "
391 << android::snapshot::UpdateState_Name(report.state());
392 return;
393 }
394
395 auto passed_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
396 result->merge_time());
397 LOG(INFO) << "Reporting merge stats: "
398 << android::snapshot::UpdateState_Name(report.state()) << " in "
399 << passed_ms.count() << "ms (resumed " << report.resume_count()
400 << " times)";
Yifan Hongbe59a002020-03-02 15:45:14 -0800401 android::util::stats_write(android::util::SNAPSHOT_MERGE_REPORTED,
402 static_cast<int32_t>(report.state()),
403 static_cast<int64_t>(passed_ms.count()),
404 static_cast<int32_t>(report.resume_count()));
Yifan Hongd976cc52020-02-25 14:51:42 -0800405#endif
406}
407
Yifan Hongdad0af82020-02-19 17:19:49 -0800408} // namespace chromeos_update_engine