blob: 91373d45fe58b9f399b8fcd7d2ffe854c758780c [file] [log] [blame]
Yifan Hongdad0af82020-02-19 17:19:49 -08001//
2// Copyright (C) 2020 The Android Open Source Project
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15//
16#include "update_engine/cleanup_previous_update_action.h"
17
Yifan Hongd976cc52020-02-25 14:51:42 -080018#include <chrono> // NOLINT(build/c++11) -- for merge times
Yifan Hongdad0af82020-02-19 17:19:49 -080019#include <functional>
20#include <string>
21
22#include <android-base/properties.h>
23#include <base/bind.h>
24
Yifan Hongd976cc52020-02-25 14:51:42 -080025#ifndef __ANDROID_RECOVERY__
26#include <statslog.h>
27#endif
28
Yifan Hongdad0af82020-02-19 17:19:49 -080029#include "update_engine/common/utils.h"
30#include "update_engine/payload_consumer/delta_performer.h"
31
32using android::snapshot::SnapshotManager;
Yifan Hongd976cc52020-02-25 14:51:42 -080033using android::snapshot::SnapshotMergeStats;
Yifan Hongdad0af82020-02-19 17:19:49 -080034using android::snapshot::UpdateState;
35using brillo::MessageLoop;
36
37constexpr char kBootCompletedProp[] = "sys.boot_completed";
38// Interval to check sys.boot_completed.
39constexpr auto kCheckBootCompletedInterval = base::TimeDelta::FromSeconds(2);
40// Interval to check IBootControl::isSlotMarkedSuccessful
41constexpr auto kCheckSlotMarkedSuccessfulInterval =
42 base::TimeDelta::FromSeconds(2);
43// Interval to call SnapshotManager::ProcessUpdateState
44constexpr auto kWaitForMergeInterval = base::TimeDelta::FromSeconds(2);
45
Yifan Hong5cd63fa2020-03-16 12:31:16 -070046#ifdef __ANDROID_RECOVERY__
47static constexpr bool kIsRecovery = true;
48#else
49static constexpr bool kIsRecovery = false;
50#endif
51
Yifan Hongdad0af82020-02-19 17:19:49 -080052namespace chromeos_update_engine {
53
54CleanupPreviousUpdateAction::CleanupPreviousUpdateAction(
55 PrefsInterface* prefs,
56 BootControlInterface* boot_control,
57 android::snapshot::SnapshotManager* snapshot,
58 CleanupPreviousUpdateActionDelegateInterface* delegate)
59 : prefs_(prefs),
60 boot_control_(boot_control),
61 snapshot_(snapshot),
62 delegate_(delegate),
63 running_(false),
64 cancel_failed_(false),
Yifan Hongd976cc52020-02-25 14:51:42 -080065 last_percentage_(0),
66 merge_stats_(SnapshotMergeStats::GetInstance(*snapshot)) {}
Yifan Hongdad0af82020-02-19 17:19:49 -080067
68void CleanupPreviousUpdateAction::PerformAction() {
69 ResumeAction();
70}
71
72void CleanupPreviousUpdateAction::TerminateProcessing() {
73 SuspendAction();
74}
75
76void CleanupPreviousUpdateAction::ResumeAction() {
77 CHECK(prefs_);
78 CHECK(boot_control_);
79
80 LOG(INFO) << "Starting/resuming CleanupPreviousUpdateAction";
81 running_ = true;
82 StartActionInternal();
83}
84
85void CleanupPreviousUpdateAction::SuspendAction() {
86 LOG(INFO) << "Stopping/suspending CleanupPreviousUpdateAction";
87 running_ = false;
88}
89
90void CleanupPreviousUpdateAction::ActionCompleted(ErrorCode error_code) {
91 running_ = false;
Yifan Hongd976cc52020-02-25 14:51:42 -080092 ReportMergeStats();
Yifan Hong5cd63fa2020-03-16 12:31:16 -070093 metadata_device_ = nullptr;
Yifan Hongdad0af82020-02-19 17:19:49 -080094}
95
96std::string CleanupPreviousUpdateAction::Type() const {
97 return StaticType();
98}
99
100std::string CleanupPreviousUpdateAction::StaticType() {
101 return "CleanupPreviousUpdateAction";
102}
103
104void CleanupPreviousUpdateAction::StartActionInternal() {
105 // Do nothing on non-VAB device.
106 if (!boot_control_->GetDynamicPartitionControl()
107 ->GetVirtualAbFeatureFlag()
108 .IsEnabled()) {
109 processor_->ActionComplete(this, ErrorCode::kSuccess);
110 return;
111 }
112 // SnapshotManager is only available on VAB devices.
113 CHECK(snapshot_);
114 WaitBootCompletedOrSchedule();
115}
116
117void CleanupPreviousUpdateAction::ScheduleWaitBootCompleted() {
118 TEST_AND_RETURN(running_);
119 MessageLoop::current()->PostDelayedTask(
120 FROM_HERE,
121 base::Bind(&CleanupPreviousUpdateAction::WaitBootCompletedOrSchedule,
122 base::Unretained(this)),
123 kCheckBootCompletedInterval);
124}
125
126void CleanupPreviousUpdateAction::WaitBootCompletedOrSchedule() {
127 TEST_AND_RETURN(running_);
Yifan Hong5cd63fa2020-03-16 12:31:16 -0700128 if (!kIsRecovery &&
129 !android::base::GetBoolProperty(kBootCompletedProp, false)) {
Yifan Hongdad0af82020-02-19 17:19:49 -0800130 // repeat
131 ScheduleWaitBootCompleted();
132 return;
133 }
134
135 LOG(INFO) << "Boot completed, waiting on markBootSuccessful()";
136 CheckSlotMarkedSuccessfulOrSchedule();
137}
138
139void CleanupPreviousUpdateAction::ScheduleWaitMarkBootSuccessful() {
140 TEST_AND_RETURN(running_);
141 MessageLoop::current()->PostDelayedTask(
142 FROM_HERE,
143 base::Bind(
144 &CleanupPreviousUpdateAction::CheckSlotMarkedSuccessfulOrSchedule,
145 base::Unretained(this)),
146 kCheckSlotMarkedSuccessfulInterval);
147}
148
149void CleanupPreviousUpdateAction::CheckSlotMarkedSuccessfulOrSchedule() {
150 TEST_AND_RETURN(running_);
Yifan Hong5cd63fa2020-03-16 12:31:16 -0700151 if (!kIsRecovery &&
152 !boot_control_->IsSlotMarkedSuccessful(boot_control_->GetCurrentSlot())) {
Yifan Hongdad0af82020-02-19 17:19:49 -0800153 ScheduleWaitMarkBootSuccessful();
154 }
Yifan Hong5cd63fa2020-03-16 12:31:16 -0700155
156 if (metadata_device_ == nullptr) {
157 metadata_device_ = snapshot_->EnsureMetadataMounted();
158 }
159
160 if (metadata_device_ == nullptr) {
161 LOG(ERROR) << "Failed to mount /metadata.";
162 processor_->ActionComplete(this, ErrorCode::kError);
163 return;
164 }
165
Yifan Hongd976cc52020-02-25 14:51:42 -0800166 if (!merge_stats_->Start()) {
167 // Not an error because CleanupPreviousUpdateAction may be paused and
168 // resumed while kernel continues merging snapshots in the background.
169 LOG(WARNING) << "SnapshotMergeStats::Start failed.";
170 }
Yifan Hongdad0af82020-02-19 17:19:49 -0800171 LOG(INFO) << "Waiting for any previous merge request to complete. "
172 << "This can take up to several minutes.";
173 WaitForMergeOrSchedule();
174}
175
176void CleanupPreviousUpdateAction::ScheduleWaitForMerge() {
177 TEST_AND_RETURN(running_);
178 MessageLoop::current()->PostDelayedTask(
179 FROM_HERE,
180 base::Bind(&CleanupPreviousUpdateAction::WaitForMergeOrSchedule,
181 base::Unretained(this)),
182 kWaitForMergeInterval);
183}
184
185void CleanupPreviousUpdateAction::WaitForMergeOrSchedule() {
186 TEST_AND_RETURN(running_);
187 auto state = snapshot_->ProcessUpdateState(
188 std::bind(&CleanupPreviousUpdateAction::OnMergePercentageUpdate, this),
189 std::bind(&CleanupPreviousUpdateAction::BeforeCancel, this));
Yifan Hongd976cc52020-02-25 14:51:42 -0800190 merge_stats_->set_state(state);
Yifan Hongdad0af82020-02-19 17:19:49 -0800191
Yifan Hongdad0af82020-02-19 17:19:49 -0800192 switch (state) {
193 case UpdateState::None: {
194 LOG(INFO) << "Can't find any snapshot to merge.";
Yifan Hong16b594a2020-03-05 21:02:36 -0800195 ErrorCode error_code = ErrorCode::kSuccess;
196 if (!snapshot_->CancelUpdate()) {
197 error_code = ErrorCode::kError;
198 LOG(INFO) << "Failed to call SnapshotManager::CancelUpdate().";
199 }
200 processor_->ActionComplete(this, error_code);
Yifan Hongdad0af82020-02-19 17:19:49 -0800201 return;
202 }
203
204 case UpdateState::Initiated: {
205 LOG(ERROR) << "Previous update has not been completed, not cleaning up";
206 processor_->ActionComplete(this, ErrorCode::kSuccess);
207 return;
208 }
209
210 case UpdateState::Unverified: {
211 InitiateMergeAndWait();
212 return;
213 }
214
215 case UpdateState::Merging: {
216 ScheduleWaitForMerge();
217 return;
218 }
219
220 case UpdateState::MergeNeedsReboot: {
221 LOG(ERROR) << "Need reboot to finish merging.";
222 processor_->ActionComplete(this, ErrorCode::kError);
223 return;
224 }
225
226 case UpdateState::MergeCompleted: {
227 LOG(INFO) << "Merge finished with state MergeCompleted.";
228 processor_->ActionComplete(this, ErrorCode::kSuccess);
229 return;
230 }
231
232 case UpdateState::MergeFailed: {
233 LOG(ERROR) << "Merge failed. Device may be corrupted.";
234 processor_->ActionComplete(this, ErrorCode::kDeviceCorrupted);
235 return;
236 }
237
238 case UpdateState::Cancelled: {
239 // DeltaPerformer::ResetUpdateProgress failed, hence snapshots are
240 // not deleted to avoid inconsistency.
241 // Nothing can be done here; just try next time.
242 ErrorCode error_code =
243 cancel_failed_ ? ErrorCode::kError : ErrorCode::kSuccess;
244 processor_->ActionComplete(this, error_code);
245 return;
246 }
247
248 default: {
249 // Protobuf has some reserved enum values, so a default case is needed.
250 LOG(FATAL) << "SnapshotManager::ProcessUpdateState returns "
251 << static_cast<int32_t>(state);
252 }
253 }
254}
255
256bool CleanupPreviousUpdateAction::OnMergePercentageUpdate() {
257 double percentage = 0.0;
258 snapshot_->GetUpdateState(&percentage);
259 if (delegate_) {
260 // libsnapshot uses [0, 100] percentage but update_engine uses [0, 1].
261 delegate_->OnCleanupProgressUpdate(percentage / 100);
262 }
263
264 // Log if percentage increments by at least 1.
265 if (last_percentage_ < static_cast<unsigned int>(percentage)) {
266 last_percentage_ = percentage;
267 LOG(INFO) << "Waiting for merge to complete: " << last_percentage_ << "%.";
268 }
269
270 // Do not continue to wait for merge. Instead, let ProcessUpdateState
271 // return Merging directly so that we can ScheduleWaitForMerge() in
272 // MessageLoop.
273 return false;
274}
275
276bool CleanupPreviousUpdateAction::BeforeCancel() {
277 if (DeltaPerformer::ResetUpdateProgress(
278 prefs_,
279 false /* quick */,
280 false /* skip dynamic partitions metadata*/)) {
281 return true;
282 }
283
284 // ResetUpdateProgress might not work on stub prefs. Do additional checks.
285 LOG(WARNING) << "ProcessUpdateState returns Cancelled but cleanup failed.";
286
287 std::string val;
288 ignore_result(prefs_->GetString(kPrefsDynamicPartitionMetadataUpdated, &val));
289 if (val.empty()) {
290 LOG(INFO) << kPrefsDynamicPartitionMetadataUpdated
291 << " is empty, assuming successful cleanup";
292 return true;
293 }
294 LOG(WARNING)
295 << kPrefsDynamicPartitionMetadataUpdated << " is " << val
296 << ", not deleting snapshots even though UpdateState is Cancelled.";
297 cancel_failed_ = true;
298 return false;
299}
300
301void CleanupPreviousUpdateAction::InitiateMergeAndWait() {
302 TEST_AND_RETURN(running_);
303 LOG(INFO) << "Attempting to initiate merge.";
304
305 if (snapshot_->InitiateMerge()) {
306 WaitForMergeOrSchedule();
307 return;
308 }
309
310 LOG(WARNING) << "InitiateMerge failed.";
311 auto state = snapshot_->GetUpdateState();
Yifan Hongd976cc52020-02-25 14:51:42 -0800312 merge_stats_->set_state(state);
Yifan Hongdad0af82020-02-19 17:19:49 -0800313 if (state == UpdateState::Unverified) {
314 // We are stuck at unverified state. This can happen if the update has
315 // been applied, but it has not even been attempted yet (in libsnapshot,
316 // rollback indicator does not exist); for example, if update_engine
317 // restarts before the device reboots, then this state may be reached.
318 // Nothing should be done here.
319 LOG(WARNING) << "InitiateMerge leaves the device at "
320 << "UpdateState::Unverified. (Did update_engine "
321 << "restarted?)";
322 processor_->ActionComplete(this, ErrorCode::kSuccess);
323 return;
324 }
325
326 // State does seems to be advanced.
327 // It is possibly racy. For example, on a userdebug build, the user may
328 // manually initiate a merge with snapshotctl between last time
329 // update_engine checks UpdateState. Hence, just call
330 // WaitForMergeOrSchedule one more time.
331 LOG(WARNING) << "IniitateMerge failed but GetUpdateState returned "
332 << android::snapshot::UpdateState_Name(state)
333 << ", try to wait for merge again.";
334 WaitForMergeOrSchedule();
335 return;
336}
337
Yifan Hongd976cc52020-02-25 14:51:42 -0800338void CleanupPreviousUpdateAction::ReportMergeStats() {
339 auto result = merge_stats_->Finish();
340 if (result == nullptr) {
341 LOG(WARNING) << "Not reporting merge stats because "
342 "SnapshotMergeStats::Finish failed.";
343 return;
344 }
345
346#ifdef __ANDROID_RECOVERY__
347 LOG(INFO) << "Skip reporting merge stats in recovery.";
348#else
349 const auto& report = result->report();
350
351 if (report.state() == UpdateState::None ||
352 report.state() == UpdateState::Initiated ||
353 report.state() == UpdateState::Unverified) {
354 LOG(INFO) << "Not reporting merge stats because state is "
355 << android::snapshot::UpdateState_Name(report.state());
356 return;
357 }
358
359 auto passed_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
360 result->merge_time());
361 LOG(INFO) << "Reporting merge stats: "
362 << android::snapshot::UpdateState_Name(report.state()) << " in "
363 << passed_ms.count() << "ms (resumed " << report.resume_count()
364 << " times)";
Yifan Hongbe59a002020-03-02 15:45:14 -0800365 android::util::stats_write(android::util::SNAPSHOT_MERGE_REPORTED,
366 static_cast<int32_t>(report.state()),
367 static_cast<int64_t>(passed_ms.count()),
368 static_cast<int32_t>(report.resume_count()));
Yifan Hongd976cc52020-02-25 14:51:42 -0800369#endif
370}
371
Yifan Hongdad0af82020-02-19 17:19:49 -0800372} // namespace chromeos_update_engine