blob: dd9a1cad0db8e039ce6d1d39a93454a573ab6890 [file] [log] [blame]
Yifan Hongdad0af82020-02-19 17:19:49 -08001//
2// Copyright (C) 2020 The Android Open Source Project
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15//
16#include "update_engine/cleanup_previous_update_action.h"
17
Yifan Hongd976cc52020-02-25 14:51:42 -080018#include <chrono> // NOLINT(build/c++11) -- for merge times
Yifan Hongdad0af82020-02-19 17:19:49 -080019#include <functional>
20#include <string>
Yifan Hong24031712020-03-19 19:25:38 -070021#include <type_traits>
Yifan Hongdad0af82020-02-19 17:19:49 -080022
23#include <android-base/properties.h>
24#include <base/bind.h>
25
Yifan Hongd976cc52020-02-25 14:51:42 -080026#ifndef __ANDROID_RECOVERY__
27#include <statslog.h>
28#endif
29
Yifan Hongdad0af82020-02-19 17:19:49 -080030#include "update_engine/common/utils.h"
31#include "update_engine/payload_consumer/delta_performer.h"
32
Howard Chen224aed92020-04-17 11:22:13 +080033using android::base::GetBoolProperty;
Yifan Hongf9cb4492020-04-15 13:00:20 -070034using android::snapshot::ISnapshotManager;
Yifan Hongd976cc52020-02-25 14:51:42 -080035using android::snapshot::SnapshotMergeStats;
Yifan Hongdad0af82020-02-19 17:19:49 -080036using android::snapshot::UpdateState;
37using brillo::MessageLoop;
38
39constexpr char kBootCompletedProp[] = "sys.boot_completed";
40// Interval to check sys.boot_completed.
41constexpr auto kCheckBootCompletedInterval = base::TimeDelta::FromSeconds(2);
42// Interval to check IBootControl::isSlotMarkedSuccessful
43constexpr auto kCheckSlotMarkedSuccessfulInterval =
44 base::TimeDelta::FromSeconds(2);
45// Interval to call SnapshotManager::ProcessUpdateState
46constexpr auto kWaitForMergeInterval = base::TimeDelta::FromSeconds(2);
47
Yifan Hong5cd63fa2020-03-16 12:31:16 -070048#ifdef __ANDROID_RECOVERY__
49static constexpr bool kIsRecovery = true;
50#else
51static constexpr bool kIsRecovery = false;
52#endif
53
Yifan Hongdad0af82020-02-19 17:19:49 -080054namespace chromeos_update_engine {
55
56CleanupPreviousUpdateAction::CleanupPreviousUpdateAction(
57 PrefsInterface* prefs,
58 BootControlInterface* boot_control,
Yifan Hongf9cb4492020-04-15 13:00:20 -070059 android::snapshot::ISnapshotManager* snapshot,
Yifan Hongdad0af82020-02-19 17:19:49 -080060 CleanupPreviousUpdateActionDelegateInterface* delegate)
61 : prefs_(prefs),
62 boot_control_(boot_control),
63 snapshot_(snapshot),
64 delegate_(delegate),
65 running_(false),
66 cancel_failed_(false),
Yifan Hongd976cc52020-02-25 14:51:42 -080067 last_percentage_(0),
Yifan Hongf9cb4492020-04-15 13:00:20 -070068 merge_stats_(nullptr) {}
Yifan Hongdad0af82020-02-19 17:19:49 -080069
70void CleanupPreviousUpdateAction::PerformAction() {
71 ResumeAction();
72}
73
74void CleanupPreviousUpdateAction::TerminateProcessing() {
75 SuspendAction();
76}
77
78void CleanupPreviousUpdateAction::ResumeAction() {
79 CHECK(prefs_);
80 CHECK(boot_control_);
81
82 LOG(INFO) << "Starting/resuming CleanupPreviousUpdateAction";
83 running_ = true;
84 StartActionInternal();
85}
86
87void CleanupPreviousUpdateAction::SuspendAction() {
88 LOG(INFO) << "Stopping/suspending CleanupPreviousUpdateAction";
89 running_ = false;
90}
91
92void CleanupPreviousUpdateAction::ActionCompleted(ErrorCode error_code) {
93 running_ = false;
Yifan Hongd976cc52020-02-25 14:51:42 -080094 ReportMergeStats();
Yifan Hong5cd63fa2020-03-16 12:31:16 -070095 metadata_device_ = nullptr;
Yifan Hongdad0af82020-02-19 17:19:49 -080096}
97
98std::string CleanupPreviousUpdateAction::Type() const {
99 return StaticType();
100}
101
102std::string CleanupPreviousUpdateAction::StaticType() {
103 return "CleanupPreviousUpdateAction";
104}
105
106void CleanupPreviousUpdateAction::StartActionInternal() {
107 // Do nothing on non-VAB device.
108 if (!boot_control_->GetDynamicPartitionControl()
109 ->GetVirtualAbFeatureFlag()
110 .IsEnabled()) {
111 processor_->ActionComplete(this, ErrorCode::kSuccess);
112 return;
113 }
Yifan Hongf9cb4492020-04-15 13:00:20 -0700114 // SnapshotManager must be available on VAB devices.
115 CHECK(snapshot_ != nullptr);
116 merge_stats_ = snapshot_->GetSnapshotMergeStatsInstance();
117 CHECK(merge_stats_ != nullptr);
Yifan Hongdad0af82020-02-19 17:19:49 -0800118 WaitBootCompletedOrSchedule();
119}
120
121void CleanupPreviousUpdateAction::ScheduleWaitBootCompleted() {
122 TEST_AND_RETURN(running_);
123 MessageLoop::current()->PostDelayedTask(
124 FROM_HERE,
125 base::Bind(&CleanupPreviousUpdateAction::WaitBootCompletedOrSchedule,
126 base::Unretained(this)),
127 kCheckBootCompletedInterval);
128}
129
130void CleanupPreviousUpdateAction::WaitBootCompletedOrSchedule() {
131 TEST_AND_RETURN(running_);
Yifan Hong5cd63fa2020-03-16 12:31:16 -0700132 if (!kIsRecovery &&
133 !android::base::GetBoolProperty(kBootCompletedProp, false)) {
Yifan Hongdad0af82020-02-19 17:19:49 -0800134 // repeat
135 ScheduleWaitBootCompleted();
136 return;
137 }
138
139 LOG(INFO) << "Boot completed, waiting on markBootSuccessful()";
140 CheckSlotMarkedSuccessfulOrSchedule();
141}
142
143void CleanupPreviousUpdateAction::ScheduleWaitMarkBootSuccessful() {
144 TEST_AND_RETURN(running_);
145 MessageLoop::current()->PostDelayedTask(
146 FROM_HERE,
147 base::Bind(
148 &CleanupPreviousUpdateAction::CheckSlotMarkedSuccessfulOrSchedule,
149 base::Unretained(this)),
150 kCheckSlotMarkedSuccessfulInterval);
151}
152
153void CleanupPreviousUpdateAction::CheckSlotMarkedSuccessfulOrSchedule() {
154 TEST_AND_RETURN(running_);
Yifan Hong5cd63fa2020-03-16 12:31:16 -0700155 if (!kIsRecovery &&
156 !boot_control_->IsSlotMarkedSuccessful(boot_control_->GetCurrentSlot())) {
Yifan Hongdad0af82020-02-19 17:19:49 -0800157 ScheduleWaitMarkBootSuccessful();
158 }
Yifan Hong5cd63fa2020-03-16 12:31:16 -0700159
160 if (metadata_device_ == nullptr) {
161 metadata_device_ = snapshot_->EnsureMetadataMounted();
162 }
163
164 if (metadata_device_ == nullptr) {
165 LOG(ERROR) << "Failed to mount /metadata.";
Yifan Hong4d7c5eb2020-04-03 11:31:50 -0700166 // If metadata is erased but not formatted, it is possible to not mount
167 // it in recovery. It is safe to skip CleanupPreviousUpdateAction.
168 processor_->ActionComplete(
169 this, kIsRecovery ? ErrorCode::kSuccess : ErrorCode::kError);
Yifan Hong5cd63fa2020-03-16 12:31:16 -0700170 return;
171 }
172
Yifan Hong24031712020-03-19 19:25:38 -0700173 if (kIsRecovery) {
174 auto snapshots_created =
175 snapshot_->RecoveryCreateSnapshotDevices(metadata_device_);
176 switch (snapshots_created) {
177 case android::snapshot::CreateResult::CREATED: {
178 // If previous update has not finished merging, snapshots exists and are
179 // created here so that ProcessUpdateState can proceed.
180 LOG(INFO) << "Snapshot devices are created";
181 break;
182 }
183 case android::snapshot::CreateResult::NOT_CREATED: {
184 // If there is no previous update, no snapshot devices are created and
185 // ProcessUpdateState will return immediately. Hence, NOT_CREATED is not
186 // considered an error.
187 LOG(INFO) << "Snapshot devices are not created";
188 break;
189 }
190 case android::snapshot::CreateResult::ERROR:
191 default: {
192 LOG(ERROR)
193 << "Failed to create snapshot devices (CreateResult = "
194 << static_cast<
195 std::underlying_type_t<android::snapshot::CreateResult>>(
196 snapshots_created);
197 processor_->ActionComplete(this, ErrorCode::kError);
198 return;
199 }
200 }
201 }
202
Yifan Hongd976cc52020-02-25 14:51:42 -0800203 if (!merge_stats_->Start()) {
204 // Not an error because CleanupPreviousUpdateAction may be paused and
205 // resumed while kernel continues merging snapshots in the background.
206 LOG(WARNING) << "SnapshotMergeStats::Start failed.";
207 }
Yifan Hongdad0af82020-02-19 17:19:49 -0800208 LOG(INFO) << "Waiting for any previous merge request to complete. "
209 << "This can take up to several minutes.";
210 WaitForMergeOrSchedule();
211}
212
213void CleanupPreviousUpdateAction::ScheduleWaitForMerge() {
214 TEST_AND_RETURN(running_);
215 MessageLoop::current()->PostDelayedTask(
216 FROM_HERE,
217 base::Bind(&CleanupPreviousUpdateAction::WaitForMergeOrSchedule,
218 base::Unretained(this)),
219 kWaitForMergeInterval);
220}
221
222void CleanupPreviousUpdateAction::WaitForMergeOrSchedule() {
223 TEST_AND_RETURN(running_);
224 auto state = snapshot_->ProcessUpdateState(
225 std::bind(&CleanupPreviousUpdateAction::OnMergePercentageUpdate, this),
226 std::bind(&CleanupPreviousUpdateAction::BeforeCancel, this));
Yifan Hongd976cc52020-02-25 14:51:42 -0800227 merge_stats_->set_state(state);
Yifan Hongdad0af82020-02-19 17:19:49 -0800228
Yifan Hongdad0af82020-02-19 17:19:49 -0800229 switch (state) {
230 case UpdateState::None: {
231 LOG(INFO) << "Can't find any snapshot to merge.";
Yifan Hong16b594a2020-03-05 21:02:36 -0800232 ErrorCode error_code = ErrorCode::kSuccess;
233 if (!snapshot_->CancelUpdate()) {
234 error_code = ErrorCode::kError;
235 LOG(INFO) << "Failed to call SnapshotManager::CancelUpdate().";
236 }
237 processor_->ActionComplete(this, error_code);
Yifan Hongdad0af82020-02-19 17:19:49 -0800238 return;
239 }
240
241 case UpdateState::Initiated: {
242 LOG(ERROR) << "Previous update has not been completed, not cleaning up";
243 processor_->ActionComplete(this, ErrorCode::kSuccess);
244 return;
245 }
246
247 case UpdateState::Unverified: {
248 InitiateMergeAndWait();
249 return;
250 }
251
252 case UpdateState::Merging: {
253 ScheduleWaitForMerge();
254 return;
255 }
256
257 case UpdateState::MergeNeedsReboot: {
258 LOG(ERROR) << "Need reboot to finish merging.";
259 processor_->ActionComplete(this, ErrorCode::kError);
260 return;
261 }
262
263 case UpdateState::MergeCompleted: {
264 LOG(INFO) << "Merge finished with state MergeCompleted.";
265 processor_->ActionComplete(this, ErrorCode::kSuccess);
266 return;
267 }
268
269 case UpdateState::MergeFailed: {
270 LOG(ERROR) << "Merge failed. Device may be corrupted.";
271 processor_->ActionComplete(this, ErrorCode::kDeviceCorrupted);
272 return;
273 }
274
275 case UpdateState::Cancelled: {
276 // DeltaPerformer::ResetUpdateProgress failed, hence snapshots are
277 // not deleted to avoid inconsistency.
278 // Nothing can be done here; just try next time.
279 ErrorCode error_code =
280 cancel_failed_ ? ErrorCode::kError : ErrorCode::kSuccess;
281 processor_->ActionComplete(this, error_code);
282 return;
283 }
284
285 default: {
286 // Protobuf has some reserved enum values, so a default case is needed.
287 LOG(FATAL) << "SnapshotManager::ProcessUpdateState returns "
288 << static_cast<int32_t>(state);
289 }
290 }
291}
292
293bool CleanupPreviousUpdateAction::OnMergePercentageUpdate() {
294 double percentage = 0.0;
295 snapshot_->GetUpdateState(&percentage);
296 if (delegate_) {
297 // libsnapshot uses [0, 100] percentage but update_engine uses [0, 1].
298 delegate_->OnCleanupProgressUpdate(percentage / 100);
299 }
300
301 // Log if percentage increments by at least 1.
302 if (last_percentage_ < static_cast<unsigned int>(percentage)) {
303 last_percentage_ = percentage;
304 LOG(INFO) << "Waiting for merge to complete: " << last_percentage_ << "%.";
305 }
306
307 // Do not continue to wait for merge. Instead, let ProcessUpdateState
308 // return Merging directly so that we can ScheduleWaitForMerge() in
309 // MessageLoop.
310 return false;
311}
312
313bool CleanupPreviousUpdateAction::BeforeCancel() {
314 if (DeltaPerformer::ResetUpdateProgress(
315 prefs_,
316 false /* quick */,
317 false /* skip dynamic partitions metadata*/)) {
318 return true;
319 }
320
321 // ResetUpdateProgress might not work on stub prefs. Do additional checks.
322 LOG(WARNING) << "ProcessUpdateState returns Cancelled but cleanup failed.";
323
324 std::string val;
325 ignore_result(prefs_->GetString(kPrefsDynamicPartitionMetadataUpdated, &val));
326 if (val.empty()) {
327 LOG(INFO) << kPrefsDynamicPartitionMetadataUpdated
328 << " is empty, assuming successful cleanup";
329 return true;
330 }
331 LOG(WARNING)
332 << kPrefsDynamicPartitionMetadataUpdated << " is " << val
333 << ", not deleting snapshots even though UpdateState is Cancelled.";
334 cancel_failed_ = true;
335 return false;
336}
337
338void CleanupPreviousUpdateAction::InitiateMergeAndWait() {
339 TEST_AND_RETURN(running_);
340 LOG(INFO) << "Attempting to initiate merge.";
Howard Chen224aed92020-04-17 11:22:13 +0800341 // suspend the VAB merge when running a DSU
342 if (GetBoolProperty("ro.gsid.image_running", false)) {
343 LOG(WARNING) << "Suspend the VAB merge when running a DSU.";
344 processor_->ActionComplete(this, ErrorCode::kError);
345 return;
346 }
Yifan Hongdad0af82020-02-19 17:19:49 -0800347
348 if (snapshot_->InitiateMerge()) {
349 WaitForMergeOrSchedule();
350 return;
351 }
352
353 LOG(WARNING) << "InitiateMerge failed.";
354 auto state = snapshot_->GetUpdateState();
Yifan Hongd976cc52020-02-25 14:51:42 -0800355 merge_stats_->set_state(state);
Yifan Hongdad0af82020-02-19 17:19:49 -0800356 if (state == UpdateState::Unverified) {
357 // We are stuck at unverified state. This can happen if the update has
358 // been applied, but it has not even been attempted yet (in libsnapshot,
359 // rollback indicator does not exist); for example, if update_engine
360 // restarts before the device reboots, then this state may be reached.
361 // Nothing should be done here.
362 LOG(WARNING) << "InitiateMerge leaves the device at "
363 << "UpdateState::Unverified. (Did update_engine "
364 << "restarted?)";
365 processor_->ActionComplete(this, ErrorCode::kSuccess);
366 return;
367 }
368
369 // State does seems to be advanced.
370 // It is possibly racy. For example, on a userdebug build, the user may
371 // manually initiate a merge with snapshotctl between last time
372 // update_engine checks UpdateState. Hence, just call
373 // WaitForMergeOrSchedule one more time.
374 LOG(WARNING) << "IniitateMerge failed but GetUpdateState returned "
375 << android::snapshot::UpdateState_Name(state)
376 << ", try to wait for merge again.";
377 WaitForMergeOrSchedule();
378 return;
379}
380
Yifan Hongd976cc52020-02-25 14:51:42 -0800381void CleanupPreviousUpdateAction::ReportMergeStats() {
382 auto result = merge_stats_->Finish();
383 if (result == nullptr) {
384 LOG(WARNING) << "Not reporting merge stats because "
385 "SnapshotMergeStats::Finish failed.";
386 return;
387 }
388
389#ifdef __ANDROID_RECOVERY__
390 LOG(INFO) << "Skip reporting merge stats in recovery.";
391#else
392 const auto& report = result->report();
393
394 if (report.state() == UpdateState::None ||
395 report.state() == UpdateState::Initiated ||
396 report.state() == UpdateState::Unverified) {
397 LOG(INFO) << "Not reporting merge stats because state is "
398 << android::snapshot::UpdateState_Name(report.state());
399 return;
400 }
401
402 auto passed_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
403 result->merge_time());
404 LOG(INFO) << "Reporting merge stats: "
405 << android::snapshot::UpdateState_Name(report.state()) << " in "
406 << passed_ms.count() << "ms (resumed " << report.resume_count()
407 << " times)";
Yifan Hongbe59a002020-03-02 15:45:14 -0800408 android::util::stats_write(android::util::SNAPSHOT_MERGE_REPORTED,
409 static_cast<int32_t>(report.state()),
410 static_cast<int64_t>(passed_ms.count()),
411 static_cast<int32_t>(report.resume_count()));
Yifan Hongd976cc52020-02-25 14:51:42 -0800412#endif
413}
414
Yifan Hongdad0af82020-02-19 17:19:49 -0800415} // namespace chromeos_update_engine