DL lifecycle: handle slow DL binding.
Bug: 182214420
Test: atest PackageManagerShellCommandTest PackageManagerShellCommandIncrementalTest IncrementalServiceTest PackageManagerServiceTest ChecksumsTest
Change-Id: I5959e01177ab702de1f754f4ba433004925ce98b
diff --git a/services/incremental/IncrementalService.cpp b/services/incremental/IncrementalService.cpp
index 1fcc284..c38d0b3 100644
--- a/services/incremental/IncrementalService.cpp
+++ b/services/incremental/IncrementalService.cpp
@@ -74,6 +74,13 @@
// If DL was up and not crashing for 10mins, we consider it healthy and reset all delays.
static constexpr auto healthyDataLoaderUptime = 10min;
+
+ // For healthy DLs, we'll retry every ~5secs for ~10min
+ static constexpr auto bindRetryInterval = 5s;
+ static constexpr auto bindGracePeriod = 10min;
+
+ static constexpr auto bindingTimeout = 1min;
+
// 10s, 100s (~2min), 1000s (~15min), 10000s (~3hrs)
static constexpr auto minBindDelay = 10s;
static constexpr auto maxBindDelay = 10000s;
@@ -293,6 +300,7 @@
mTimedQueue(sm.getTimedQueue()),
mProgressUpdateJobQueue(sm.getProgressUpdateJobQueue()),
mFs(sm.getFs()),
+ mClock(sm.getClock()),
mIncrementalDir(rootDir) {
CHECK(mVold) << "Vold service is unavailable";
CHECK(mDataLoaderManager) << "DataLoaderManagerService is unavailable";
@@ -302,6 +310,7 @@
CHECK(mTimedQueue) << "TimedQueue is unavailable";
CHECK(mProgressUpdateJobQueue) << "mProgressUpdateJobQueue is unavailable";
CHECK(mFs) << "Fs is unavailable";
+ CHECK(mClock) << "Clock is unavailable";
mJobQueue.reserve(16);
mJobProcessor = std::thread([this]() {
@@ -2241,17 +2250,44 @@
<< status << " (current " << mCurrentStatus << ")";
}
-Milliseconds IncrementalService::DataLoaderStub::updateBindDelay() {
+std::optional<Milliseconds> IncrementalService::DataLoaderStub::needToBind() {
std::unique_lock lock(mMutex);
+
+ const auto now = mService.mClock->now();
+ const bool healthy = (mPreviousBindDelay == 0ms);
+
+ if (mCurrentStatus == IDataLoaderStatusListener::DATA_LOADER_BINDING &&
+ now - mCurrentStatusTs <= Constants::bindingTimeout) {
+ LOG(INFO) << "Binding still in progress. "
+ << (healthy ? "The DL is healthy/freshly bound, ok to retry for a few times."
+ : "Already unhealthy, don't do anything.");
+ // Binding still in progress.
+ if (!healthy) {
+ // Already unhealthy, don't do anything.
+ return {};
+ }
+ // The DL is healthy/freshly bound, ok to retry for a few times.
+ if (now - mPreviousBindTs <= Constants::bindGracePeriod) {
+ // Still within grace period.
+ if (now - mCurrentStatusTs >= Constants::bindRetryInterval) {
+ // Retry interval passed, retrying.
+ mCurrentStatusTs = now;
+ mPreviousBindDelay = 0ms;
+ return 0ms;
+ }
+ return {};
+ }
+ // fallthrough, mark as unhealthy, and retry with delay
+ }
+
const auto previousBindTs = mPreviousBindTs;
- const auto now = Clock::now();
mPreviousBindTs = now;
const auto nonCrashingInterval = std::max(castToMs(now - previousBindTs), 100ms);
if (previousBindTs.time_since_epoch() == Clock::duration::zero() ||
nonCrashingInterval > Constants::healthyDataLoaderUptime) {
mPreviousBindDelay = 0ms;
- return mPreviousBindDelay;
+ return 0ms;
}
constexpr auto minBindDelayMs = castToMs(Constants::minBindDelay);
@@ -2264,12 +2300,16 @@
const auto bindDelayJitterRangeMs = bindDelayMs / Constants::bindDelayJitterDivider;
const auto bindDelayJitterMs = rand() % (bindDelayJitterRangeMs * 2) - bindDelayJitterRangeMs;
mPreviousBindDelay = std::chrono::milliseconds(bindDelayMs + bindDelayJitterMs);
-
return mPreviousBindDelay;
}
bool IncrementalService::DataLoaderStub::bind() {
- const auto bindDelay = updateBindDelay();
+ const auto maybeBindDelay = needToBind();
+ if (!maybeBindDelay) {
+ LOG(DEBUG) << "Skipping bind to " << mParams.packageName << " because of pending bind.";
+ return true;
+ }
+ const auto bindDelay = *maybeBindDelay;
if (bindDelay > 1s) {
LOG(INFO) << "Delaying bind to " << mParams.packageName << " by "
<< bindDelay.count() / 1000 << "s";
@@ -2279,7 +2319,21 @@
auto status = mService.mDataLoaderManager->bindToDataLoader(id(), mParams, bindDelay.count(),
this, &result);
if (!status.isOk() || !result) {
- LOG(ERROR) << "Failed to bind a data loader for mount " << id();
+ const bool healthy = (bindDelay == 0ms);
+ LOG(ERROR) << "Failed to bind a data loader for mount " << id()
+ << (healthy ? ", retrying." : "");
+
+ // Internal error, retry for healthy/new DLs.
+ // Let needToBind migrate it to unhealthy after too many retries.
+ if (healthy) {
+ if (mService.addTimedJob(*mService.mTimedQueue, id(), Constants::bindRetryInterval,
+ [this]() { fsmStep(); })) {
+ // Mark as binding so that we know it's not the DL's fault.
+ setCurrentStatus(IDataLoaderStatusListener::DATA_LOADER_BINDING);
+ return true;
+ }
+ }
+
return false;
}
return true;
@@ -2339,7 +2393,14 @@
// Do nothing, this is a reset state.
break;
case IDataLoaderStatusListener::DATA_LOADER_DESTROYED: {
- return destroy();
+ switch (currentStatus) {
+ case IDataLoaderStatusListener::DATA_LOADER_BINDING:
+ setCurrentStatus(IDataLoaderStatusListener::DATA_LOADER_DESTROYED);
+ return true;
+ default:
+ return destroy();
+ }
+ break;
}
case IDataLoaderStatusListener::DATA_LOADER_STARTED: {
switch (currentStatus) {
@@ -2353,6 +2414,7 @@
switch (currentStatus) {
case IDataLoaderStatusListener::DATA_LOADER_DESTROYED:
case IDataLoaderStatusListener::DATA_LOADER_UNAVAILABLE:
+ case IDataLoaderStatusListener::DATA_LOADER_BINDING:
return bind();
case IDataLoaderStatusListener::DATA_LOADER_BOUND:
return create();
@@ -2372,7 +2434,8 @@
fromServiceSpecificError(-EINVAL, "onStatusChange came to invalid DataLoaderStub");
}
if (id() != mountId) {
- LOG(ERROR) << "Mount ID mismatch: expected " << id() << ", but got: " << mountId;
+ LOG(ERROR) << "onStatusChanged: mount ID mismatch: expected " << id()
+ << ", but got: " << mountId;
return binder::Status::fromServiceSpecificError(-EPERM, "Mount ID mismatch.");
}
if (newStatus == IDataLoaderStatusListener::DATA_LOADER_UNRECOVERABLE) {
@@ -2396,11 +2459,13 @@
}
oldStatus = mCurrentStatus;
- mCurrentStatus = newStatus;
targetStatus = mTargetStatus;
-
listener = mStatusListener;
+ // Change the status.
+ mCurrentStatus = newStatus;
+ mCurrentStatusTs = mService.mClock->now();
+
if (mCurrentStatus == IDataLoaderStatusListener::DATA_LOADER_UNAVAILABLE ||
mCurrentStatus == IDataLoaderStatusListener::DATA_LOADER_UNRECOVERABLE) {
// For unavailable, unbind from DataLoader to ensure proper re-commit.
@@ -2428,7 +2493,8 @@
"reportStreamHealth came to invalid DataLoaderStub");
}
if (id() != mountId) {
- LOG(ERROR) << "Mount ID mismatch: expected " << id() << ", but got: " << mountId;
+ LOG(ERROR) << "reportStreamHealth: mount ID mismatch: expected " << id()
+ << ", but got: " << mountId;
return binder::Status::fromServiceSpecificError(-EPERM, "Mount ID mismatch.");
}
{
@@ -2694,6 +2760,8 @@
void IncrementalService::DataLoaderStub::onDump(int fd) {
dprintf(fd, " dataLoader: {\n");
dprintf(fd, " currentStatus: %d\n", mCurrentStatus);
+ dprintf(fd, " currentStatusTs: %lldmcs\n",
+ (long long)(elapsedMcs(mCurrentStatusTs, Clock::now())));
dprintf(fd, " targetStatus: %d\n", mTargetStatus);
dprintf(fd, " targetStatusTs: %lldmcs\n",
(long long)(elapsedMcs(mTargetStatusTs, Clock::now())));
diff --git a/services/incremental/IncrementalService.h b/services/incremental/IncrementalService.h
index 14e5a77..4eb5138 100644
--- a/services/incremental/IncrementalService.h
+++ b/services/incremental/IncrementalService.h
@@ -267,7 +267,10 @@
BootClockTsUs getOldestTsFromLastPendingReads();
Milliseconds elapsedMsSinceKernelTs(TimePoint now, BootClockTsUs kernelTsUs);
- Milliseconds updateBindDelay();
+ // If the stub has to bind to the DL.
+ // Returns {} if bind operation is already in progress.
+ // Or bind delay in ms.
+ std::optional<Milliseconds> needToBind();
void registerForPendingReads();
void unregisterFromPendingReads();
@@ -283,6 +286,7 @@
std::condition_variable mStatusCondition;
int mCurrentStatus = content::pm::IDataLoaderStatusListener::DATA_LOADER_DESTROYED;
+ TimePoint mCurrentStatusTs = {};
int mTargetStatus = content::pm::IDataLoaderStatusListener::DATA_LOADER_DESTROYED;
TimePoint mTargetStatusTs = {};
@@ -443,6 +447,7 @@
const std::unique_ptr<TimedQueueWrapper> mTimedQueue;
const std::unique_ptr<TimedQueueWrapper> mProgressUpdateJobQueue;
const std::unique_ptr<FsWrapper> mFs;
+ const std::unique_ptr<ClockWrapper> mClock;
const std::string mIncrementalDir;
mutable std::mutex mLock;
diff --git a/services/incremental/ServiceWrappers.cpp b/services/incremental/ServiceWrappers.cpp
index d613289..80f409f 100644
--- a/services/incremental/ServiceWrappers.cpp
+++ b/services/incremental/ServiceWrappers.cpp
@@ -329,6 +329,14 @@
}
};
+class RealClockWrapper final : public ClockWrapper {
+public:
+ RealClockWrapper() = default;
+ ~RealClockWrapper() = default;
+
+ TimePoint now() const final { return Clock::now(); }
+};
+
RealServiceManager::RealServiceManager(sp<IServiceManager> serviceManager, JNIEnv* env)
: mServiceManager(std::move(serviceManager)), mJvm(RealJniWrapper::getJvm(env)) {}
@@ -388,6 +396,10 @@
return std::make_unique<RealFsWrapper>();
}
+std::unique_ptr<ClockWrapper> RealServiceManager::getClock() {
+ return std::make_unique<RealClockWrapper>();
+}
+
static JavaVM* getJavaVm(JNIEnv* env) {
CHECK(env);
JavaVM* jvm = nullptr;
diff --git a/services/incremental/ServiceWrappers.h b/services/incremental/ServiceWrappers.h
index 245bb31..d113f99 100644
--- a/services/incremental/ServiceWrappers.h
+++ b/services/incremental/ServiceWrappers.h
@@ -158,6 +158,12 @@
virtual void listFilesRecursive(std::string_view directoryPath, FileCallback onFile) const = 0;
};
+class ClockWrapper {
+public:
+ virtual ~ClockWrapper() = default;
+ virtual TimePoint now() const = 0;
+};
+
class ServiceManagerWrapper {
public:
virtual ~ServiceManagerWrapper() = default;
@@ -170,6 +176,7 @@
virtual std::unique_ptr<TimedQueueWrapper> getTimedQueue() = 0;
virtual std::unique_ptr<TimedQueueWrapper> getProgressUpdateJobQueue() = 0;
virtual std::unique_ptr<FsWrapper> getFs() = 0;
+ virtual std::unique_ptr<ClockWrapper> getClock() = 0;
};
// --- Real stuff ---
@@ -187,6 +194,7 @@
std::unique_ptr<TimedQueueWrapper> getTimedQueue() final;
std::unique_ptr<TimedQueueWrapper> getProgressUpdateJobQueue() final;
std::unique_ptr<FsWrapper> getFs() final;
+ std::unique_ptr<ClockWrapper> getClock() final;
private:
template <class INTERFACE>
diff --git a/services/incremental/test/IncrementalServiceTest.cpp b/services/incremental/test/IncrementalServiceTest.cpp
index 5236983..25b34b56 100644
--- a/services/incremental/test/IncrementalServiceTest.cpp
+++ b/services/incremental/test/IncrementalServiceTest.cpp
@@ -248,6 +248,27 @@
}
return binder::Status::ok();
}
+ binder::Status bindToDataLoaderNotOkWithNoDelay(int32_t mountId,
+ const DataLoaderParamsParcel& params,
+ int bindDelayMs,
+ const sp<IDataLoaderStatusListener>& listener,
+ bool* _aidl_return) {
+ CHECK(bindDelayMs == 0) << bindDelayMs;
+ *_aidl_return = false;
+ return binder::Status::ok();
+ }
+ binder::Status bindToDataLoaderBindingWithNoDelay(int32_t mountId,
+ const DataLoaderParamsParcel& params,
+ int bindDelayMs,
+ const sp<IDataLoaderStatusListener>& listener,
+ bool* _aidl_return) {
+ CHECK(bindDelayMs == 0) << bindDelayMs;
+ *_aidl_return = true;
+ if (listener) {
+ listener->onStatusChanged(mId, IDataLoaderStatusListener::DATA_LOADER_BINDING);
+ }
+ return binder::Status::ok();
+ }
binder::Status bindToDataLoaderOkWith10sDelay(int32_t mountId,
const DataLoaderParamsParcel& params,
int bindDelayMs,
@@ -557,6 +578,21 @@
}
};
+class MockClockWrapper : public ClockWrapper {
+public:
+ MOCK_CONST_METHOD0(now, TimePoint());
+
+ void start() { ON_CALL(*this, now()).WillByDefault(Invoke(this, &MockClockWrapper::getClock)); }
+ template <class Delta>
+ void advance(Delta delta) {
+ mClock += delta;
+ }
+
+ TimePoint getClock() const { return mClock; }
+
+ TimePoint mClock = Clock::now();
+};
+
class MockStorageHealthListener : public os::incremental::BnStorageHealthListener {
public:
MOCK_METHOD2(onHealthStatus, binder::Status(int32_t storageId, int32_t status));
@@ -594,7 +630,7 @@
std::unique_ptr<MockLooperWrapper> looper,
std::unique_ptr<MockTimedQueueWrapper> timedQueue,
std::unique_ptr<MockTimedQueueWrapper> progressUpdateJobQueue,
- std::unique_ptr<MockFsWrapper> fs)
+ std::unique_ptr<MockFsWrapper> fs, std::unique_ptr<MockClockWrapper> clock)
: mVold(std::move(vold)),
mDataLoaderManager(std::move(dataLoaderManager)),
mIncFs(std::move(incfs)),
@@ -603,7 +639,8 @@
mLooper(std::move(looper)),
mTimedQueue(std::move(timedQueue)),
mProgressUpdateJobQueue(std::move(progressUpdateJobQueue)),
- mFs(std::move(fs)) {}
+ mFs(std::move(fs)),
+ mClock(std::move(clock)) {}
std::unique_ptr<VoldServiceWrapper> getVoldService() final { return std::move(mVold); }
std::unique_ptr<DataLoaderManagerWrapper> getDataLoaderManager() final {
return std::move(mDataLoaderManager);
@@ -619,6 +656,7 @@
return std::move(mProgressUpdateJobQueue);
}
std::unique_ptr<FsWrapper> getFs() final { return std::move(mFs); }
+ std::unique_ptr<ClockWrapper> getClock() final { return std::move(mClock); }
private:
std::unique_ptr<MockVoldService> mVold;
@@ -630,6 +668,7 @@
std::unique_ptr<MockTimedQueueWrapper> mTimedQueue;
std::unique_ptr<MockTimedQueueWrapper> mProgressUpdateJobQueue;
std::unique_ptr<MockFsWrapper> mFs;
+ std::unique_ptr<MockClockWrapper> mClock;
};
// --- IncrementalServiceTest ---
@@ -657,6 +696,8 @@
mProgressUpdateJobQueue = progressUpdateJobQueue.get();
auto fs = std::make_unique<NiceMock<MockFsWrapper>>();
mFs = fs.get();
+ auto clock = std::make_unique<NiceMock<MockClockWrapper>>();
+ mClock = clock.get();
mIncrementalService = std::make_unique<
IncrementalService>(MockServiceManager(std::move(vold),
std::move(dataloaderManager),
@@ -664,12 +705,13 @@
std::move(jni), std::move(looper),
std::move(timedQueue),
std::move(progressUpdateJobQueue),
- std::move(fs)),
+ std::move(fs), std::move(clock)),
mRootDir.path);
mDataLoaderParcel.packageName = "com.test";
mDataLoaderParcel.arguments = "uri";
mDataLoaderManager->unbindFromDataLoaderSuccess();
mIncrementalService->onSystemReady();
+ mClock->start();
setupSuccess();
}
@@ -724,6 +766,7 @@
NiceMock<MockTimedQueueWrapper>* mTimedQueue = nullptr;
NiceMock<MockTimedQueueWrapper>* mProgressUpdateJobQueue = nullptr;
NiceMock<MockFsWrapper>* mFs = nullptr;
+ NiceMock<MockClockWrapper>* mClock = nullptr;
NiceMock<MockDataLoader>* mDataLoader = nullptr;
std::unique_ptr<IncrementalService> mIncrementalService;
TemporaryDir mRootDir;
@@ -853,6 +896,119 @@
mDataLoaderManager->setDataLoaderStatusDestroyed();
}
+TEST_F(IncrementalServiceTest, testDataLoaderOnRestart) {
+ mIncFs->waitForPendingReadsSuccess();
+ mIncFs->openMountSuccess();
+
+ constexpr auto bindRetryInterval = 5s;
+
+ EXPECT_CALL(*mDataLoaderManager, bindToDataLoader(_, _, _, _, _)).Times(10);
+ EXPECT_CALL(*mDataLoaderManager, unbindFromDataLoader(_)).Times(1);
+ EXPECT_CALL(*mDataLoader, create(_, _, _, _)).Times(6);
+ EXPECT_CALL(*mDataLoader, start(_)).Times(6);
+ EXPECT_CALL(*mDataLoader, destroy(_)).Times(1);
+ EXPECT_CALL(*mVold, unmountIncFs(_)).Times(2);
+ EXPECT_CALL(*mTimedQueue, addJob(_, _, _)).Times(2);
+ TemporaryDir tempDir;
+ int storageId =
+ mIncrementalService->createStorage(tempDir.path, mDataLoaderParcel,
+ IncrementalService::CreateOptions::CreateNew);
+ ASSERT_GE(storageId, 0);
+
+ // First binds to DataLoader fails... because it's restart.
+ ON_CALL(*mDataLoaderManager, bindToDataLoader(_, _, _, _, _))
+ .WillByDefault(Invoke(mDataLoaderManager,
+ &MockDataLoaderManager::bindToDataLoaderNotOkWithNoDelay));
+
+ // Request DL start.
+ mIncrementalService->startLoading(storageId, std::move(mDataLoaderParcel), {}, {}, {}, {});
+
+ // Retry callback present.
+ ASSERT_EQ(storageId, mTimedQueue->mId);
+ ASSERT_EQ(mTimedQueue->mAfter, bindRetryInterval);
+ auto retryCallback = mTimedQueue->mWhat;
+ mTimedQueue->clearJob(storageId);
+
+ // Expecting the same bindToDataLoaderNotOkWithNoDelay call.
+ mClock->advance(5s);
+
+ retryCallback();
+ // Retry callback present.
+ ASSERT_EQ(storageId, mTimedQueue->mId);
+ ASSERT_EQ(mTimedQueue->mAfter, bindRetryInterval);
+ retryCallback = mTimedQueue->mWhat;
+ mTimedQueue->clearJob(storageId);
+
+ // Returning "binding" so that we can retry.
+ ON_CALL(*mDataLoaderManager, bindToDataLoader(_, _, _, _, _))
+ .WillByDefault(Invoke(mDataLoaderManager,
+ &MockDataLoaderManager::bindToDataLoaderBindingWithNoDelay));
+
+ // Expecting bindToDataLoaderBindingWithNoDelay call.
+ mClock->advance(5s);
+
+ retryCallback();
+ // No retry callback.
+ ASSERT_EQ(mTimedQueue->mAfter, 0ms);
+ ASSERT_EQ(mTimedQueue->mWhat, nullptr);
+
+ // Should not change the bindToDataLoader call count
+ ASSERT_NE(nullptr, mLooper->mCallback);
+ ASSERT_NE(nullptr, mLooper->mCallbackData);
+ auto looperCb = mLooper->mCallback;
+ auto looperCbData = mLooper->mCallbackData;
+ looperCb(-1, -1, looperCbData);
+
+ // Expecting the same bindToDataLoaderBindingWithNoDelay call.
+ mClock->advance(5s);
+
+ // Use pending reads callback to trigger binding.
+ looperCb(-1, -1, looperCbData);
+
+ // No retry callback.
+ ASSERT_EQ(mTimedQueue->mAfter, 0ms);
+ ASSERT_EQ(mTimedQueue->mWhat, nullptr);
+
+ // Now we are out of 10m "retry" budget, let's finally bind.
+ ON_CALL(*mDataLoaderManager, bindToDataLoader(_, _, _, _, _))
+ .WillByDefault(Invoke(mDataLoaderManager, &MockDataLoaderManager::bindToDataLoaderOk));
+ mClock->advance(11min);
+
+ // Use pending reads callback to trigger binding.
+ looperCb(-1, -1, looperCbData);
+
+ // No retry callback.
+ ASSERT_EQ(mTimedQueue->mAfter, 0ms);
+ ASSERT_EQ(mTimedQueue->mWhat, nullptr);
+
+ // And test the rest of the backoff.
+ // Simulated crash/other connection breakage.
+ ON_CALL(*mDataLoaderManager, bindToDataLoader(_, _, _, _, _))
+ .WillByDefault(Invoke(mDataLoaderManager,
+ &MockDataLoaderManager::bindToDataLoaderOkWith10sDelay));
+ mDataLoaderManager->setDataLoaderStatusDestroyed();
+
+ ON_CALL(*mDataLoaderManager, bindToDataLoader(_, _, _, _, _))
+ .WillByDefault(Invoke(mDataLoaderManager,
+ &MockDataLoaderManager::bindToDataLoaderOkWith100sDelay));
+ mDataLoaderManager->setDataLoaderStatusDestroyed();
+
+ ON_CALL(*mDataLoaderManager, bindToDataLoader(_, _, _, _, _))
+ .WillByDefault(Invoke(mDataLoaderManager,
+ &MockDataLoaderManager::bindToDataLoaderOkWith1000sDelay));
+ mDataLoaderManager->setDataLoaderStatusDestroyed();
+
+ ON_CALL(*mDataLoaderManager, bindToDataLoader(_, _, _, _, _))
+ .WillByDefault(Invoke(mDataLoaderManager,
+ &MockDataLoaderManager::bindToDataLoaderOkWith10000sDelay));
+ mDataLoaderManager->setDataLoaderStatusDestroyed();
+
+ ON_CALL(*mDataLoaderManager, bindToDataLoader(_, _, _, _, _))
+ .WillByDefault(Invoke(mDataLoaderManager,
+ &MockDataLoaderManager::bindToDataLoaderOkWith10000sDelay));
+ mDataLoaderManager->setDataLoaderStatusDestroyed();
+}
+
TEST_F(IncrementalServiceTest, testStartDataLoaderCreate) {
mDataLoader->initializeCreateOkNoStatus();
EXPECT_CALL(*mDataLoaderManager, bindToDataLoader(_, _, _, _, _)).Times(1);