blob: e43730d3c5ef1ecb4b0c67cfbbac2dee706944ba [file] [log] [blame]
Yifan Hong120c3162020-02-19 17:19:49 -08001//
2// Copyright (C) 2020 The Android Open Source Project
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15//
16#include "update_engine/cleanup_previous_update_action.h"
17
Yifan Hong4ed0e692020-02-25 14:51:42 -080018#include <chrono> // NOLINT(build/c++11) -- for merge times
Yifan Hong120c3162020-02-19 17:19:49 -080019#include <functional>
20#include <string>
Yifan Hong58397102020-03-19 19:25:38 -070021#include <type_traits>
Yifan Hong120c3162020-02-19 17:19:49 -080022
23#include <android-base/properties.h>
24#include <base/bind.h>
25
Yifan Hong4ed0e692020-02-25 14:51:42 -080026#ifndef __ANDROID_RECOVERY__
27#include <statslog.h>
28#endif
29
Yifan Hong120c3162020-02-19 17:19:49 -080030#include "update_engine/common/utils.h"
31#include "update_engine/payload_consumer/delta_performer.h"
32
Howard Chen23279922020-04-17 11:22:13 +080033using android::base::GetBoolProperty;
Yifan Hong120c3162020-02-19 17:19:49 -080034using android::snapshot::SnapshotManager;
Yifan Hong4ed0e692020-02-25 14:51:42 -080035using android::snapshot::SnapshotMergeStats;
Yifan Hong120c3162020-02-19 17:19:49 -080036using android::snapshot::UpdateState;
37using brillo::MessageLoop;
38
39constexpr char kBootCompletedProp[] = "sys.boot_completed";
40// Interval to check sys.boot_completed.
41constexpr auto kCheckBootCompletedInterval = base::TimeDelta::FromSeconds(2);
42// Interval to check IBootControl::isSlotMarkedSuccessful
43constexpr auto kCheckSlotMarkedSuccessfulInterval =
44 base::TimeDelta::FromSeconds(2);
45// Interval to call SnapshotManager::ProcessUpdateState
46constexpr auto kWaitForMergeInterval = base::TimeDelta::FromSeconds(2);
47
Yifan Hong832b6172020-03-16 12:31:16 -070048#ifdef __ANDROID_RECOVERY__
49static constexpr bool kIsRecovery = true;
50#else
51static constexpr bool kIsRecovery = false;
52#endif
53
Yifan Hong120c3162020-02-19 17:19:49 -080054namespace chromeos_update_engine {
55
56CleanupPreviousUpdateAction::CleanupPreviousUpdateAction(
57 PrefsInterface* prefs,
58 BootControlInterface* boot_control,
59 android::snapshot::SnapshotManager* snapshot,
60 CleanupPreviousUpdateActionDelegateInterface* delegate)
61 : prefs_(prefs),
62 boot_control_(boot_control),
63 snapshot_(snapshot),
64 delegate_(delegate),
65 running_(false),
66 cancel_failed_(false),
Yifan Hong4ed0e692020-02-25 14:51:42 -080067 last_percentage_(0),
68 merge_stats_(SnapshotMergeStats::GetInstance(*snapshot)) {}
Yifan Hong120c3162020-02-19 17:19:49 -080069
70void CleanupPreviousUpdateAction::PerformAction() {
71 ResumeAction();
72}
73
74void CleanupPreviousUpdateAction::TerminateProcessing() {
75 SuspendAction();
76}
77
78void CleanupPreviousUpdateAction::ResumeAction() {
79 CHECK(prefs_);
80 CHECK(boot_control_);
81
82 LOG(INFO) << "Starting/resuming CleanupPreviousUpdateAction";
83 running_ = true;
84 StartActionInternal();
85}
86
87void CleanupPreviousUpdateAction::SuspendAction() {
88 LOG(INFO) << "Stopping/suspending CleanupPreviousUpdateAction";
89 running_ = false;
90}
91
92void CleanupPreviousUpdateAction::ActionCompleted(ErrorCode error_code) {
93 running_ = false;
Yifan Hong4ed0e692020-02-25 14:51:42 -080094 ReportMergeStats();
Yifan Hong832b6172020-03-16 12:31:16 -070095 metadata_device_ = nullptr;
Yifan Hong120c3162020-02-19 17:19:49 -080096}
97
98std::string CleanupPreviousUpdateAction::Type() const {
99 return StaticType();
100}
101
102std::string CleanupPreviousUpdateAction::StaticType() {
103 return "CleanupPreviousUpdateAction";
104}
105
106void CleanupPreviousUpdateAction::StartActionInternal() {
107 // Do nothing on non-VAB device.
108 if (!boot_control_->GetDynamicPartitionControl()
109 ->GetVirtualAbFeatureFlag()
110 .IsEnabled()) {
111 processor_->ActionComplete(this, ErrorCode::kSuccess);
112 return;
113 }
114 // SnapshotManager is only available on VAB devices.
115 CHECK(snapshot_);
116 WaitBootCompletedOrSchedule();
117}
118
119void CleanupPreviousUpdateAction::ScheduleWaitBootCompleted() {
120 TEST_AND_RETURN(running_);
121 MessageLoop::current()->PostDelayedTask(
122 FROM_HERE,
123 base::Bind(&CleanupPreviousUpdateAction::WaitBootCompletedOrSchedule,
124 base::Unretained(this)),
125 kCheckBootCompletedInterval);
126}
127
128void CleanupPreviousUpdateAction::WaitBootCompletedOrSchedule() {
129 TEST_AND_RETURN(running_);
Yifan Hong832b6172020-03-16 12:31:16 -0700130 if (!kIsRecovery &&
131 !android::base::GetBoolProperty(kBootCompletedProp, false)) {
Yifan Hong120c3162020-02-19 17:19:49 -0800132 // repeat
133 ScheduleWaitBootCompleted();
134 return;
135 }
136
137 LOG(INFO) << "Boot completed, waiting on markBootSuccessful()";
138 CheckSlotMarkedSuccessfulOrSchedule();
139}
140
141void CleanupPreviousUpdateAction::ScheduleWaitMarkBootSuccessful() {
142 TEST_AND_RETURN(running_);
143 MessageLoop::current()->PostDelayedTask(
144 FROM_HERE,
145 base::Bind(
146 &CleanupPreviousUpdateAction::CheckSlotMarkedSuccessfulOrSchedule,
147 base::Unretained(this)),
148 kCheckSlotMarkedSuccessfulInterval);
149}
150
151void CleanupPreviousUpdateAction::CheckSlotMarkedSuccessfulOrSchedule() {
152 TEST_AND_RETURN(running_);
Yifan Hong832b6172020-03-16 12:31:16 -0700153 if (!kIsRecovery &&
154 !boot_control_->IsSlotMarkedSuccessful(boot_control_->GetCurrentSlot())) {
Yifan Hong120c3162020-02-19 17:19:49 -0800155 ScheduleWaitMarkBootSuccessful();
156 }
Yifan Hong832b6172020-03-16 12:31:16 -0700157
158 if (metadata_device_ == nullptr) {
159 metadata_device_ = snapshot_->EnsureMetadataMounted();
160 }
161
162 if (metadata_device_ == nullptr) {
163 LOG(ERROR) << "Failed to mount /metadata.";
Yifan Hong700e6b02020-04-03 11:31:50 -0700164 // If metadata is erased but not formatted, it is possible to not mount
165 // it in recovery. It is safe to skip CleanupPreviousUpdateAction.
166 processor_->ActionComplete(
167 this, kIsRecovery ? ErrorCode::kSuccess : ErrorCode::kError);
Yifan Hong832b6172020-03-16 12:31:16 -0700168 return;
169 }
170
Yifan Hong58397102020-03-19 19:25:38 -0700171 if (kIsRecovery) {
172 auto snapshots_created =
173 snapshot_->RecoveryCreateSnapshotDevices(metadata_device_);
174 switch (snapshots_created) {
175 case android::snapshot::CreateResult::CREATED: {
176 // If previous update has not finished merging, snapshots exists and are
177 // created here so that ProcessUpdateState can proceed.
178 LOG(INFO) << "Snapshot devices are created";
179 break;
180 }
181 case android::snapshot::CreateResult::NOT_CREATED: {
182 // If there is no previous update, no snapshot devices are created and
183 // ProcessUpdateState will return immediately. Hence, NOT_CREATED is not
184 // considered an error.
185 LOG(INFO) << "Snapshot devices are not created";
186 break;
187 }
188 case android::snapshot::CreateResult::ERROR:
189 default: {
190 LOG(ERROR)
191 << "Failed to create snapshot devices (CreateResult = "
192 << static_cast<
193 std::underlying_type_t<android::snapshot::CreateResult>>(
194 snapshots_created);
195 processor_->ActionComplete(this, ErrorCode::kError);
196 return;
197 }
198 }
199 }
200
Yifan Hong4ed0e692020-02-25 14:51:42 -0800201 if (!merge_stats_->Start()) {
202 // Not an error because CleanupPreviousUpdateAction may be paused and
203 // resumed while kernel continues merging snapshots in the background.
204 LOG(WARNING) << "SnapshotMergeStats::Start failed.";
205 }
Yifan Hong120c3162020-02-19 17:19:49 -0800206 LOG(INFO) << "Waiting for any previous merge request to complete. "
207 << "This can take up to several minutes.";
208 WaitForMergeOrSchedule();
209}
210
211void CleanupPreviousUpdateAction::ScheduleWaitForMerge() {
212 TEST_AND_RETURN(running_);
213 MessageLoop::current()->PostDelayedTask(
214 FROM_HERE,
215 base::Bind(&CleanupPreviousUpdateAction::WaitForMergeOrSchedule,
216 base::Unretained(this)),
217 kWaitForMergeInterval);
218}
219
220void CleanupPreviousUpdateAction::WaitForMergeOrSchedule() {
221 TEST_AND_RETURN(running_);
222 auto state = snapshot_->ProcessUpdateState(
223 std::bind(&CleanupPreviousUpdateAction::OnMergePercentageUpdate, this),
224 std::bind(&CleanupPreviousUpdateAction::BeforeCancel, this));
Yifan Hong4ed0e692020-02-25 14:51:42 -0800225 merge_stats_->set_state(state);
Yifan Hong120c3162020-02-19 17:19:49 -0800226
Yifan Hong120c3162020-02-19 17:19:49 -0800227 switch (state) {
228 case UpdateState::None: {
229 LOG(INFO) << "Can't find any snapshot to merge.";
Yifan Hong48fe4252020-03-05 21:02:36 -0800230 ErrorCode error_code = ErrorCode::kSuccess;
231 if (!snapshot_->CancelUpdate()) {
232 error_code = ErrorCode::kError;
233 LOG(INFO) << "Failed to call SnapshotManager::CancelUpdate().";
234 }
235 processor_->ActionComplete(this, error_code);
Yifan Hong120c3162020-02-19 17:19:49 -0800236 return;
237 }
238
239 case UpdateState::Initiated: {
240 LOG(ERROR) << "Previous update has not been completed, not cleaning up";
241 processor_->ActionComplete(this, ErrorCode::kSuccess);
242 return;
243 }
244
245 case UpdateState::Unverified: {
246 InitiateMergeAndWait();
247 return;
248 }
249
250 case UpdateState::Merging: {
251 ScheduleWaitForMerge();
252 return;
253 }
254
255 case UpdateState::MergeNeedsReboot: {
256 LOG(ERROR) << "Need reboot to finish merging.";
257 processor_->ActionComplete(this, ErrorCode::kError);
258 return;
259 }
260
261 case UpdateState::MergeCompleted: {
262 LOG(INFO) << "Merge finished with state MergeCompleted.";
263 processor_->ActionComplete(this, ErrorCode::kSuccess);
264 return;
265 }
266
267 case UpdateState::MergeFailed: {
268 LOG(ERROR) << "Merge failed. Device may be corrupted.";
269 processor_->ActionComplete(this, ErrorCode::kDeviceCorrupted);
270 return;
271 }
272
273 case UpdateState::Cancelled: {
274 // DeltaPerformer::ResetUpdateProgress failed, hence snapshots are
275 // not deleted to avoid inconsistency.
276 // Nothing can be done here; just try next time.
277 ErrorCode error_code =
278 cancel_failed_ ? ErrorCode::kError : ErrorCode::kSuccess;
279 processor_->ActionComplete(this, error_code);
280 return;
281 }
282
283 default: {
284 // Protobuf has some reserved enum values, so a default case is needed.
285 LOG(FATAL) << "SnapshotManager::ProcessUpdateState returns "
286 << static_cast<int32_t>(state);
287 }
288 }
289}
290
291bool CleanupPreviousUpdateAction::OnMergePercentageUpdate() {
292 double percentage = 0.0;
293 snapshot_->GetUpdateState(&percentage);
294 if (delegate_) {
295 // libsnapshot uses [0, 100] percentage but update_engine uses [0, 1].
296 delegate_->OnCleanupProgressUpdate(percentage / 100);
297 }
298
299 // Log if percentage increments by at least 1.
300 if (last_percentage_ < static_cast<unsigned int>(percentage)) {
301 last_percentage_ = percentage;
302 LOG(INFO) << "Waiting for merge to complete: " << last_percentage_ << "%.";
303 }
304
305 // Do not continue to wait for merge. Instead, let ProcessUpdateState
306 // return Merging directly so that we can ScheduleWaitForMerge() in
307 // MessageLoop.
308 return false;
309}
310
311bool CleanupPreviousUpdateAction::BeforeCancel() {
312 if (DeltaPerformer::ResetUpdateProgress(
313 prefs_,
314 false /* quick */,
315 false /* skip dynamic partitions metadata*/)) {
316 return true;
317 }
318
319 // ResetUpdateProgress might not work on stub prefs. Do additional checks.
320 LOG(WARNING) << "ProcessUpdateState returns Cancelled but cleanup failed.";
321
322 std::string val;
323 ignore_result(prefs_->GetString(kPrefsDynamicPartitionMetadataUpdated, &val));
324 if (val.empty()) {
325 LOG(INFO) << kPrefsDynamicPartitionMetadataUpdated
326 << " is empty, assuming successful cleanup";
327 return true;
328 }
329 LOG(WARNING)
330 << kPrefsDynamicPartitionMetadataUpdated << " is " << val
331 << ", not deleting snapshots even though UpdateState is Cancelled.";
332 cancel_failed_ = true;
333 return false;
334}
335
336void CleanupPreviousUpdateAction::InitiateMergeAndWait() {
337 TEST_AND_RETURN(running_);
338 LOG(INFO) << "Attempting to initiate merge.";
Howard Chen23279922020-04-17 11:22:13 +0800339 // suspend the VAB merge when running a DSU
340 if (GetBoolProperty("ro.gsid.image_running", false)) {
341 LOG(WARNING) << "Suspend the VAB merge when running a DSU.";
342 processor_->ActionComplete(this, ErrorCode::kError);
343 return;
344 }
Yifan Hong120c3162020-02-19 17:19:49 -0800345
346 if (snapshot_->InitiateMerge()) {
347 WaitForMergeOrSchedule();
348 return;
349 }
350
351 LOG(WARNING) << "InitiateMerge failed.";
352 auto state = snapshot_->GetUpdateState();
Yifan Hong4ed0e692020-02-25 14:51:42 -0800353 merge_stats_->set_state(state);
Yifan Hong120c3162020-02-19 17:19:49 -0800354 if (state == UpdateState::Unverified) {
355 // We are stuck at unverified state. This can happen if the update has
356 // been applied, but it has not even been attempted yet (in libsnapshot,
357 // rollback indicator does not exist); for example, if update_engine
358 // restarts before the device reboots, then this state may be reached.
359 // Nothing should be done here.
360 LOG(WARNING) << "InitiateMerge leaves the device at "
361 << "UpdateState::Unverified. (Did update_engine "
362 << "restarted?)";
363 processor_->ActionComplete(this, ErrorCode::kSuccess);
364 return;
365 }
366
367 // State does seems to be advanced.
368 // It is possibly racy. For example, on a userdebug build, the user may
369 // manually initiate a merge with snapshotctl between last time
370 // update_engine checks UpdateState. Hence, just call
371 // WaitForMergeOrSchedule one more time.
372 LOG(WARNING) << "IniitateMerge failed but GetUpdateState returned "
373 << android::snapshot::UpdateState_Name(state)
374 << ", try to wait for merge again.";
375 WaitForMergeOrSchedule();
376 return;
377}
378
Yifan Hong4ed0e692020-02-25 14:51:42 -0800379void CleanupPreviousUpdateAction::ReportMergeStats() {
380 auto result = merge_stats_->Finish();
381 if (result == nullptr) {
382 LOG(WARNING) << "Not reporting merge stats because "
383 "SnapshotMergeStats::Finish failed.";
384 return;
385 }
386
387#ifdef __ANDROID_RECOVERY__
388 LOG(INFO) << "Skip reporting merge stats in recovery.";
389#else
390 const auto& report = result->report();
391
392 if (report.state() == UpdateState::None ||
393 report.state() == UpdateState::Initiated ||
394 report.state() == UpdateState::Unverified) {
395 LOG(INFO) << "Not reporting merge stats because state is "
396 << android::snapshot::UpdateState_Name(report.state());
397 return;
398 }
399
400 auto passed_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
401 result->merge_time());
402 LOG(INFO) << "Reporting merge stats: "
403 << android::snapshot::UpdateState_Name(report.state()) << " in "
404 << passed_ms.count() << "ms (resumed " << report.resume_count()
405 << " times)";
Yifan Hong0c4ed292020-03-02 15:45:14 -0800406 android::util::stats_write(android::util::SNAPSHOT_MERGE_REPORTED,
407 static_cast<int32_t>(report.state()),
408 static_cast<int64_t>(passed_ms.count()),
409 static_cast<int32_t>(report.resume_count()));
Yifan Hong4ed0e692020-02-25 14:51:42 -0800410#endif
411}
412
Yifan Hong120c3162020-02-19 17:19:49 -0800413} // namespace chromeos_update_engine