Skip setting the state for paused binder thread invocations in waitUntilStateThenRelock
Because of a potential deadlock that exists between reconfigureCamera
and createInputStreams/createStreams (b/241137777), idle state from
reconfigureCamera should not notify the places waiting for idle
as that state is immediately transitioned to active. This way
reconfigureCamera will be able to complete successfully.
This will allow us to fix the deadlock by allowing us to revert
ag/10211554, as it handles the serialization problem that the revert
introduces.
Test: Camera CTS continue to pass
Bug: 241137777
Bug: 269092476
Change-Id: I9d3f13be59cf3f5f4ce695b045cd1a33e5ea1233
diff --git a/services/camera/libcameraservice/device3/Camera3Device.cpp b/services/camera/libcameraservice/device3/Camera3Device.cpp
index 1127eaa..9de14a2 100644
--- a/services/camera/libcameraservice/device3/Camera3Device.cpp
+++ b/services/camera/libcameraservice/device3/Camera3Device.cpp
@@ -283,7 +283,8 @@
SET_ERR_L("Can't stop streaming");
// Continue to close device even in case of error
} else {
- res = waitUntilStateThenRelock(/*active*/ false, maxExpectedDuration);
+ res = waitUntilStateThenRelock(/*active*/ false, maxExpectedDuration,
+ /*requestThreadInvocation*/ false);
if (res != OK) {
SET_ERR_L("Timeout waiting for HAL to drain (% " PRIi64 " ns)",
maxExpectedDuration);
@@ -839,7 +840,7 @@
}
if (res == OK) {
- waitUntilStateThenRelock(/*active*/true, kActiveTimeout);
+ waitUntilStateThenRelock(/*active*/true, kActiveTimeout, /*requestThreadInvocation*/false);
if (res != OK) {
SET_ERR_L("Can't transition to active in %f seconds!",
kActiveTimeout/1e9);
@@ -964,7 +965,8 @@
break;
case STATUS_ACTIVE:
ALOGV("%s: Stopping activity to reconfigure streams", __FUNCTION__);
- res = internalPauseAndWaitLocked(maxExpectedDuration);
+ res = internalPauseAndWaitLocked(maxExpectedDuration,
+ /*requestThreadInvocation*/ false);
if (res != OK) {
SET_ERR_L("Can't pause captures to reconfigure streams!");
return res;
@@ -1081,7 +1083,8 @@
break;
case STATUS_ACTIVE:
ALOGV("%s: Stopping activity to reconfigure streams", __FUNCTION__);
- res = internalPauseAndWaitLocked(maxExpectedDuration);
+ res = internalPauseAndWaitLocked(maxExpectedDuration,
+ /*requestThreadInvocation*/ false);
if (res != OK) {
SET_ERR_L("Can't pause captures to reconfigure streams!");
return res;
@@ -1554,7 +1557,8 @@
}
ALOGV("%s: Camera %s: Waiting until idle (%" PRIi64 "ns)", __FUNCTION__, mId.string(),
maxExpectedDuration);
- status_t res = waitUntilStateThenRelock(/*active*/ false, maxExpectedDuration);
+ status_t res = waitUntilStateThenRelock(/*active*/ false, maxExpectedDuration,
+ /*requestThreadInvocation*/ false);
if (res != OK) {
mStatusTracker->dumpActiveComponents();
SET_ERR_L("Error waiting for HAL to drain: %s (%d)", strerror(-res),
@@ -1565,12 +1569,14 @@
void Camera3Device::internalUpdateStatusLocked(Status status) {
mStatus = status;
- mRecentStatusUpdates.add(mStatus);
+ mStatusIsInternal = mPauseStateNotify ? true : false;
+ mRecentStatusUpdates.add({mStatus, mStatusIsInternal});
mStatusChanged.broadcast();
}
// Pause to reconfigure
-status_t Camera3Device::internalPauseAndWaitLocked(nsecs_t maxExpectedDuration) {
+status_t Camera3Device::internalPauseAndWaitLocked(nsecs_t maxExpectedDuration,
+ bool requestThreadInvocation) {
if (mRequestThread.get() != nullptr) {
mRequestThread->setPaused(true);
} else {
@@ -1579,7 +1585,8 @@
ALOGV("%s: Camera %s: Internal wait until idle (% " PRIi64 " ns)", __FUNCTION__, mId.string(),
maxExpectedDuration);
- status_t res = waitUntilStateThenRelock(/*active*/ false, maxExpectedDuration);
+ status_t res = waitUntilStateThenRelock(/*active*/ false, maxExpectedDuration,
+ requestThreadInvocation);
if (res != OK) {
mStatusTracker->dumpActiveComponents();
SET_ERR_L("Can't idle device in %f seconds!",
@@ -1597,7 +1604,9 @@
ALOGV("%s: Camera %s: Internal wait until active (% " PRIi64 " ns)", __FUNCTION__, mId.string(),
kActiveTimeout);
- res = waitUntilStateThenRelock(/*active*/ true, kActiveTimeout);
+ // internalResumeLocked is always called from a binder thread.
+ res = waitUntilStateThenRelock(/*active*/ true, kActiveTimeout,
+ /*requestThreadInvocation*/ false);
if (res != OK) {
SET_ERR_L("Can't transition to active in %f seconds!",
kActiveTimeout/1e9);
@@ -1606,7 +1615,8 @@
return OK;
}
-status_t Camera3Device::waitUntilStateThenRelock(bool active, nsecs_t timeout) {
+status_t Camera3Device::waitUntilStateThenRelock(bool active, nsecs_t timeout,
+ bool requestThreadInvocation) {
status_t res = OK;
size_t startIndex = 0;
@@ -1635,7 +1645,8 @@
bool stateSeen = false;
nsecs_t startTime = systemTime();
do {
- if (active == (mStatus == STATUS_ACTIVE)) {
+ if (active == (mStatus == STATUS_ACTIVE) &&
+ (requestThreadInvocation || !mStatusIsInternal)) {
// Desired state is current
break;
}
@@ -1657,9 +1668,14 @@
"%s: Skipping status updates in Camera3Device, may result in deadlock.",
__FUNCTION__);
- // Encountered desired state since we began waiting
+ // Encountered desired state since we began waiting. Internal invocations coming from
+ // request threads (such as reconfigureCamera) should be woken up immediately, whereas
+ // invocations from binder threads (such as createInputStream) should only be woken up if
+ // they are not paused. This avoids intermediate pause signals from reconfigureCamera as it
+ // changes the status to active right after.
for (size_t i = startIndex; i < mRecentStatusUpdates.size(); i++) {
- if (active == (mRecentStatusUpdates[i] == STATUS_ACTIVE) ) {
+ if (active == (mRecentStatusUpdates[i].status == STATUS_ACTIVE) &&
+ (requestThreadInvocation || !mRecentStatusUpdates[i].isInternal)) {
stateSeen = true;
break;
}
@@ -2323,8 +2339,18 @@
if (mStatus == STATUS_ACTIVE) {
markClientActive = true;
mPauseStateNotify = true;
+ mStatusTracker->markComponentIdle(clientStatusId, Fence::NO_FENCE);
- rc = internalPauseAndWaitLocked(maxExpectedDuration);
+ // This is essentially the same as calling rc = internalPauseAndWaitLocked(..), except that
+ // we don't want to call setPaused(true) to avoid it interfering with setPaused() called
+ // from createInputStream/createStream.
+ rc = waitUntilStateThenRelock(/*active*/ false, maxExpectedDuration,
+ /*requestThreadInvocation*/ true);
+ if (rc != OK) {
+ mStatusTracker->dumpActiveComponents();
+ SET_ERR_L("Can't idle device in %f seconds!",
+ maxExpectedDuration/1e9);
+ }
}
if (rc == NO_ERROR) {
@@ -3565,13 +3591,8 @@
if (res == OK) {
sp<Camera3Device> parent = mParent.promote();
if (parent != nullptr) {
- sp<StatusTracker> statusTracker = mStatusTracker.promote();
- if (statusTracker != nullptr) {
- statusTracker->markComponentIdle(mStatusId, Fence::NO_FENCE);
- }
mReconfigured |= parent->reconfigureCamera(mLatestSessionParams, mStatusId);
}
- setPaused(false);
if (mNextRequests[0].captureRequest->mInputStream != nullptr) {
mNextRequests[0].captureRequest->mInputStream->restoreConfiguredState();
diff --git a/services/camera/libcameraservice/device3/Camera3Device.h b/services/camera/libcameraservice/device3/Camera3Device.h
index 977b57f..5cc5ab0 100644
--- a/services/camera/libcameraservice/device3/Camera3Device.h
+++ b/services/camera/libcameraservice/device3/Camera3Device.h
@@ -572,8 +572,15 @@
STATUS_ACTIVE
} mStatus;
+ struct StatusInfo {
+ Status status;
+ bool isInternal; // status triggered by internal reconfigureCamera.
+ };
+
+ bool mStatusIsInternal;
+
// Only clear mRecentStatusUpdates, mStatusWaiters from waitUntilStateThenRelock
- Vector<Status> mRecentStatusUpdates;
+ Vector<StatusInfo> mRecentStatusUpdates;
int mStatusWaiters;
Condition mStatusChanged;
@@ -718,7 +725,8 @@
* CameraDeviceBase interface we shouldn't need to.
* Must be called with mLock and mInterfaceLock both held.
*/
- status_t internalPauseAndWaitLocked(nsecs_t maxExpectedDuration);
+ status_t internalPauseAndWaitLocked(nsecs_t maxExpectedDuration,
+ bool requestThreadInvocation);
/**
* Resume work after internalPauseAndWaitLocked()
@@ -737,7 +745,8 @@
* During the wait mLock is released.
*
*/
- status_t waitUntilStateThenRelock(bool active, nsecs_t timeout);
+ status_t waitUntilStateThenRelock(bool active, nsecs_t timeout,
+ bool requestThreadInvocation);
/**
* Implementation of waitUntilDrained. On success, will transition to IDLE state.
diff --git a/services/camera/libcameraservice/device3/Camera3DeviceInjectionMethods.cpp b/services/camera/libcameraservice/device3/Camera3DeviceInjectionMethods.cpp
index 031c255..ab772d4 100644
--- a/services/camera/libcameraservice/device3/Camera3DeviceInjectionMethods.cpp
+++ b/services/camera/libcameraservice/device3/Camera3DeviceInjectionMethods.cpp
@@ -58,7 +58,8 @@
if (parent->mStatus == STATUS_ACTIVE) {
ALOGV("%s: Let the device be IDLE and the request thread is paused",
__FUNCTION__);
- res = parent->internalPauseAndWaitLocked(maxExpectedDuration);
+ res = parent->internalPauseAndWaitLocked(maxExpectedDuration,
+ /*requestThreadInvocation*/false);
if (res != OK) {
ALOGE("%s: Can't pause captures to inject camera!", __FUNCTION__);
return res;
@@ -117,7 +118,8 @@
if (parent->mStatus == STATUS_ACTIVE) {
ALOGV("%s: Let the device be IDLE and the request thread is paused",
__FUNCTION__);
- res = parent->internalPauseAndWaitLocked(maxExpectedDuration);
+ res = parent->internalPauseAndWaitLocked(maxExpectedDuration,
+ /*requestThreadInvocation*/false);
if (res != OK) {
ALOGE("%s: Can't pause captures to stop injection!", __FUNCTION__);
return res;