Add CameraService Watchdog
This CL adds a watchdog used to detect issues in cameraservice/
Camera HAL. It uses a timer that can be customized to monitor calls
and kills the process if a fixed amount of time has elapsed without
a successful execution of the monitored call.
The following cases are included in this CL: flush(), close().
Further cases can be added with the APIs in watchdog.
Test: Manual; Tested by force triggering watchdog with custom timer
values and while(true) or sleep in monitored calls
Bug: 229145451
Merged-In: Ia7b4a3fd9395210800a391c62b4ea757efbb3b00
Change-Id: Ia7b4a3fd9395210800a391c62b4ea757efbb3b00
diff --git a/services/camera/libcameraservice/Android.bp b/services/camera/libcameraservice/Android.bp
index 1e2dccb..bf7d0c2 100644
--- a/services/camera/libcameraservice/Android.bp
+++ b/services/camera/libcameraservice/Android.bp
@@ -42,6 +42,7 @@
srcs: [
"CameraService.cpp",
+ "CameraServiceWatchdog.cpp",
"CameraFlashlight.cpp",
"common/Camera2ClientBase.cpp",
"common/CameraDeviceBase.cpp",
diff --git a/services/camera/libcameraservice/CameraServiceWatchdog.cpp b/services/camera/libcameraservice/CameraServiceWatchdog.cpp
new file mode 100644
index 0000000..fcd6ebe
--- /dev/null
+++ b/services/camera/libcameraservice/CameraServiceWatchdog.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define LOG_TAG "CameraServiceWatchdog"
+
+#include "CameraServiceWatchdog.h"
+
+namespace android {
+
+bool CameraServiceWatchdog::threadLoop()
+{
+ {
+ AutoMutex _l(mWatchdogLock);
+
+ while (mPause) {
+ mWatchdogCondition.wait(mWatchdogLock);
+ }
+ }
+
+ std::this_thread::sleep_for(std::chrono::milliseconds(mCycleLengthMs));
+
+ {
+ AutoMutex _l(mWatchdogLock);
+
+ for (auto it = tidToCycleCounterMap.begin(); it != tidToCycleCounterMap.end(); it++) {
+ uint32_t currentThreadId = it->first;
+
+ tidToCycleCounterMap[currentThreadId]++;
+
+ if (tidToCycleCounterMap[currentThreadId] >= mMaxCycles) {
+ ALOGW("CameraServiceWatchdog triggering kill for pid: %d", getpid());
+ kill(getpid(), SIGKILL);
+ }
+ }
+ }
+
+ return true;
+}
+
+void CameraServiceWatchdog::requestExit()
+{
+ Thread::requestExit();
+
+ AutoMutex _l(mWatchdogLock);
+
+ tidToCycleCounterMap.clear();
+
+ if (mPause) {
+ mPause = false;
+ mWatchdogCondition.signal();
+ }
+}
+
+void CameraServiceWatchdog::stop(uint32_t tid)
+{
+ AutoMutex _l(mWatchdogLock);
+
+ tidToCycleCounterMap.erase(tid);
+
+ if (tidToCycleCounterMap.empty()) {
+ mPause = true;
+ }
+}
+
+void CameraServiceWatchdog::start(uint32_t tid)
+{
+ AutoMutex _l(mWatchdogLock);
+
+ tidToCycleCounterMap[tid] = 0;
+
+ if (mPause) {
+ mPause = false;
+ mWatchdogCondition.signal();
+ }
+}
+
+} // namespace android
diff --git a/services/camera/libcameraservice/CameraServiceWatchdog.h b/services/camera/libcameraservice/CameraServiceWatchdog.h
new file mode 100644
index 0000000..f4955e2
--- /dev/null
+++ b/services/camera/libcameraservice/CameraServiceWatchdog.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * The CameraService watchdog is used to help detect bad states in the
+ * Camera HAL. The threadloop uses cycle counters, assigned to each calling
+ * thread, to monitor the elapsing time and kills the process when the
+ * expected duration has exceeded.
+ * Notes on multi-threaded behaviors:
+ * - The threadloop is blocked/paused when there are no calls being
+ * monitored.
+ * - The start and stop functions handle simultaneous call monitoring
+ * and single call monitoring differently. See function documentation for
+ * more details.
+ */
+
+#include <chrono>
+#include <thread>
+#include <time.h>
+#include <utils/Thread.h>
+#include <utils/Log.h>
+#include <unordered_map>
+
+// Used to wrap the call of interest in start and stop calls
+#define WATCH(toMonitor) watchThread([&]() { return toMonitor;}, gettid())
+#define WATCH_CUSTOM_TIMER(toMonitor, cycles, cycleLength) \
+ watchThread([&]() { return toMonitor;}, gettid(), cycles, cycleLength);
+
+// Default cycles and cycle length values used to calculate permitted elapsed time
+const static size_t kMaxCycles = 100;
+const static uint32_t kCycleLengthMs = 100;
+
+namespace android {
+
+class CameraServiceWatchdog : public Thread {
+
+public:
+ explicit CameraServiceWatchdog() : mPause(true), mMaxCycles(kMaxCycles),
+ mCycleLengthMs(kCycleLengthMs) {};
+
+ explicit CameraServiceWatchdog (size_t maxCycles, uint32_t cycleLengthMs) :
+ mPause(true), mMaxCycles(maxCycles), mCycleLengthMs(cycleLengthMs) {};
+
+ virtual ~CameraServiceWatchdog() {};
+
+ virtual void requestExit();
+
+ /** Used to wrap monitored calls in start and stop functions using custom timer values */
+ template<typename T>
+ auto watchThread(T func, uint32_t tid, uint32_t cycles, uint32_t cycleLength) {
+ auto res = NULL;
+
+ if (cycles != mMaxCycles || cycleLength != mCycleLengthMs) {
+ // Create another instance of the watchdog to prevent disruption
+ // of timer for current monitored calls
+ sp<CameraServiceWatchdog> tempWatchdog =
+ new CameraServiceWatchdog(cycles, cycleLength);
+ tempWatchdog->run("CameraServiceWatchdog");
+ res = tempWatchdog->watchThread(func, tid);
+ tempWatchdog->requestExit();
+ tempWatchdog.clear();
+ } else {
+ // If custom timer values are equivalent to set class timer values, use
+ // current thread
+ res = watchThread(func, tid);
+ }
+
+ return res;
+ }
+
+ /** Used to wrap monitored calls in start and stop functions using class timer values */
+ template<typename T>
+ auto watchThread(T func, uint32_t tid) {
+ auto res = NULL;
+
+ start(tid);
+ res = func();
+ stop(tid);
+
+ return res;
+ }
+
+private:
+
+ /**
+ * Start adds a cycle counter for the calling thread. When threadloop is blocked/paused,
+ * start() unblocks and starts the watchdog
+ */
+ void start(uint32_t tid);
+
+ /**
+ * If there are no calls left to be monitored, stop blocks/pauses threadloop
+ * otherwise stop() erases the cycle counter to end watchdog for the calling thread
+ */
+ void stop(uint32_t tid);
+
+ virtual bool threadLoop();
+
+ Mutex mWatchdogLock; // Lock for condition variable
+ Condition mWatchdogCondition; // Condition variable for stop/start
+ bool mPause; // True if thread is currently paused
+ uint32_t mMaxCycles; // Max cycles
+ uint32_t mCycleLengthMs; // Length of time elapsed per cycle
+
+ std::unordered_map<uint32_t, uint32_t> tidToCycleCounterMap; // Thread Id to cycle counter map
+};
+
+} // namespace android
diff --git a/services/camera/libcameraservice/device3/Camera3Device.cpp b/services/camera/libcameraservice/device3/Camera3Device.cpp
index 04e65d4..ddbc8ba 100644
--- a/services/camera/libcameraservice/device3/Camera3Device.cpp
+++ b/services/camera/libcameraservice/device3/Camera3Device.cpp
@@ -115,6 +115,10 @@
status_t Camera3Device::initializeCommonLocked() {
+ /** Start watchdog thread */
+ mCameraServiceWatchdog = new CameraServiceWatchdog();
+ mCameraServiceWatchdog->run("CameraServiceWatchdog");
+
/** Start up status tracker thread */
mStatusTracker = new StatusTracker(this);
status_t res = mStatusTracker->run(String8::format("C3Dev-%s-Status", mId.string()).string());
@@ -316,7 +320,7 @@
// Call close without internal mutex held, as the HAL close may need to
// wait on assorted callbacks,etc, to complete before it can return.
- interface->close();
+ mCameraServiceWatchdog->WATCH(interface->close());
flushInflightRequests();
@@ -339,6 +343,12 @@
}
}
ALOGI("%s: X", __FUNCTION__);
+
+ if (mCameraServiceWatchdog != NULL) {
+ mCameraServiceWatchdog->requestExit();
+ mCameraServiceWatchdog.clear();
+ }
+
return res;
}
@@ -1719,7 +1729,12 @@
mSessionStatsBuilder.stopCounter();
}
- return mRequestThread->flush();
+ // Calculate expected duration for flush with additional buffer time in ms for watchdog
+ uint64_t maxExpectedDuration = (getExpectedInFlightDuration() + kBaseGetBufferWait) / 1e6;
+ status_t res = mCameraServiceWatchdog->WATCH_CUSTOM_TIMER(mRequestThread->flush(),
+ maxExpectedDuration / kCycleLengthMs, kCycleLengthMs);
+
+ return res;
}
status_t Camera3Device::prepare(int streamId) {
diff --git a/services/camera/libcameraservice/device3/Camera3Device.h b/services/camera/libcameraservice/device3/Camera3Device.h
index 749b342..748d063 100644
--- a/services/camera/libcameraservice/device3/Camera3Device.h
+++ b/services/camera/libcameraservice/device3/Camera3Device.h
@@ -32,6 +32,7 @@
#include <camera/CaptureResult.h>
#include "android/hardware/camera/metadata/3.8/types.h"
+#include "CameraServiceWatchdog.h"
#include "common/CameraDeviceBase.h"
#include "device3/BufferUtils.h"
#include "device3/StatusTracker.h"
@@ -98,6 +99,9 @@
metadata_vendor_id_t getVendorTagId() const override { return mVendorTagId; }
+ // Watchdog thread
+ sp<CameraServiceWatchdog> mCameraServiceWatchdog;
+
// Transitions to idle state on success.
virtual status_t initialize(sp<CameraProviderManager> /*manager*/,
const String8& /*monitorTags*/) = 0;