Add CameraService Watchdog

This CL adds a watchdog used to detect issues in cameraservice/
Camera HAL. It uses a timer that can be customized to monitor calls
and kills the process if a fixed amount of time has elapsed without
a successful execution of the monitored call.

The following cases are included in this CL: flush(), close().
Further cases can be added with the APIs in watchdog.

Test: Manual; Tested by force triggering watchdog with custom timer
values and while(true) or sleep in monitored calls
Bug: 229145451
Merged-In: Ia7b4a3fd9395210800a391c62b4ea757efbb3b00

Change-Id: Ia7b4a3fd9395210800a391c62b4ea757efbb3b00
diff --git a/services/camera/libcameraservice/Android.bp b/services/camera/libcameraservice/Android.bp
index 1e2dccb..bf7d0c2 100644
--- a/services/camera/libcameraservice/Android.bp
+++ b/services/camera/libcameraservice/Android.bp
@@ -42,6 +42,7 @@
 
     srcs: [
         "CameraService.cpp",
+        "CameraServiceWatchdog.cpp",
         "CameraFlashlight.cpp",
         "common/Camera2ClientBase.cpp",
         "common/CameraDeviceBase.cpp",
diff --git a/services/camera/libcameraservice/CameraServiceWatchdog.cpp b/services/camera/libcameraservice/CameraServiceWatchdog.cpp
new file mode 100644
index 0000000..fcd6ebe
--- /dev/null
+++ b/services/camera/libcameraservice/CameraServiceWatchdog.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define LOG_TAG "CameraServiceWatchdog"
+
+#include "CameraServiceWatchdog.h"
+
+namespace android {
+
+bool CameraServiceWatchdog::threadLoop()
+{
+    {
+        AutoMutex _l(mWatchdogLock);
+
+        while (mPause) {
+            mWatchdogCondition.wait(mWatchdogLock);
+        }
+    }
+
+    std::this_thread::sleep_for(std::chrono::milliseconds(mCycleLengthMs));
+
+    {
+        AutoMutex _l(mWatchdogLock);
+
+        for (auto it = tidToCycleCounterMap.begin(); it != tidToCycleCounterMap.end(); it++) {
+            uint32_t currentThreadId = it->first;
+
+            tidToCycleCounterMap[currentThreadId]++;
+
+            if (tidToCycleCounterMap[currentThreadId] >= mMaxCycles) {
+                ALOGW("CameraServiceWatchdog triggering kill for pid: %d", getpid());
+                kill(getpid(), SIGKILL);
+            }
+        }
+    }
+
+    return true;
+}
+
+void CameraServiceWatchdog::requestExit()
+{
+    Thread::requestExit();
+
+    AutoMutex _l(mWatchdogLock);
+
+    tidToCycleCounterMap.clear();
+
+    if (mPause) {
+        mPause = false;
+        mWatchdogCondition.signal();
+    }
+}
+
+void CameraServiceWatchdog::stop(uint32_t tid)
+{
+    AutoMutex _l(mWatchdogLock);
+
+    tidToCycleCounterMap.erase(tid);
+
+    if (tidToCycleCounterMap.empty()) {
+        mPause = true;
+    }
+}
+
+void CameraServiceWatchdog::start(uint32_t tid)
+{
+    AutoMutex _l(mWatchdogLock);
+
+    tidToCycleCounterMap[tid] = 0;
+
+    if (mPause) {
+        mPause = false;
+        mWatchdogCondition.signal();
+    }
+}
+
+}   // namespace android
diff --git a/services/camera/libcameraservice/CameraServiceWatchdog.h b/services/camera/libcameraservice/CameraServiceWatchdog.h
new file mode 100644
index 0000000..f4955e2
--- /dev/null
+++ b/services/camera/libcameraservice/CameraServiceWatchdog.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright (C) 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * The CameraService watchdog is used to help detect bad states in the
+ * Camera HAL. The threadloop uses cycle counters, assigned to each calling
+ * thread, to monitor the elapsing time and kills the process when the
+ * expected duration has exceeded.
+ * Notes on multi-threaded behaviors:
+ *    - The threadloop is blocked/paused when there are no calls being
+ *   monitored.
+ *   - The start and stop functions handle simultaneous call monitoring
+ *   and single call monitoring differently. See function documentation for
+ *   more details.
+ */
+
+#include <chrono>
+#include <thread>
+#include <time.h>
+#include <utils/Thread.h>
+#include <utils/Log.h>
+#include <unordered_map>
+
+// Used to wrap the call of interest in start and stop calls
+#define WATCH(toMonitor) watchThread([&]() { return toMonitor;}, gettid())
+#define WATCH_CUSTOM_TIMER(toMonitor, cycles, cycleLength) \
+        watchThread([&]() { return toMonitor;}, gettid(), cycles, cycleLength);
+
+// Default cycles and cycle length values used to calculate permitted elapsed time
+const static size_t   kMaxCycles     = 100;
+const static uint32_t kCycleLengthMs = 100;
+
+namespace android {
+
+class CameraServiceWatchdog : public Thread {
+
+public:
+    explicit CameraServiceWatchdog() : mPause(true), mMaxCycles(kMaxCycles),
+            mCycleLengthMs(kCycleLengthMs) {};
+
+    explicit CameraServiceWatchdog (size_t maxCycles, uint32_t cycleLengthMs) :
+            mPause(true), mMaxCycles(maxCycles), mCycleLengthMs(cycleLengthMs) {};
+
+    virtual ~CameraServiceWatchdog() {};
+
+    virtual void requestExit();
+
+    /** Used to wrap monitored calls in start and stop functions using custom timer values */
+    template<typename T>
+    auto watchThread(T func, uint32_t tid, uint32_t cycles, uint32_t cycleLength) {
+        auto res = NULL;
+
+        if (cycles != mMaxCycles || cycleLength != mCycleLengthMs) {
+            // Create another instance of the watchdog to prevent disruption
+            // of timer for current monitored calls
+            sp<CameraServiceWatchdog> tempWatchdog =
+                    new CameraServiceWatchdog(cycles, cycleLength);
+            tempWatchdog->run("CameraServiceWatchdog");
+            res = tempWatchdog->watchThread(func, tid);
+            tempWatchdog->requestExit();
+            tempWatchdog.clear();
+        } else {
+            // If custom timer values are equivalent to set class timer values, use
+            // current thread
+            res = watchThread(func, tid);
+        }
+
+        return res;
+    }
+
+    /** Used to wrap monitored calls in start and stop functions using class timer values */
+    template<typename T>
+    auto watchThread(T func, uint32_t tid) {
+        auto res = NULL;
+
+        start(tid);
+        res = func();
+        stop(tid);
+
+        return res;
+    }
+
+private:
+
+    /**
+     * Start adds a cycle counter for the calling thread. When threadloop is blocked/paused,
+     * start() unblocks and starts the watchdog
+     */
+    void start(uint32_t tid);
+
+    /**
+     * If there are no calls left to be monitored, stop blocks/pauses threadloop
+     * otherwise stop() erases the cycle counter to end watchdog for the calling thread
+     */
+    void stop(uint32_t tid);
+
+    virtual bool    threadLoop();
+
+    Mutex           mWatchdogLock;        // Lock for condition variable
+    Condition       mWatchdogCondition;   // Condition variable for stop/start
+    bool            mPause;               // True if thread is currently paused
+    uint32_t        mMaxCycles;           // Max cycles
+    uint32_t        mCycleLengthMs;       // Length of time elapsed per cycle
+
+    std::unordered_map<uint32_t, uint32_t> tidToCycleCounterMap; // Thread Id to cycle counter map
+};
+
+}   // namespace android
diff --git a/services/camera/libcameraservice/device3/Camera3Device.cpp b/services/camera/libcameraservice/device3/Camera3Device.cpp
index 04e65d4..ddbc8ba 100644
--- a/services/camera/libcameraservice/device3/Camera3Device.cpp
+++ b/services/camera/libcameraservice/device3/Camera3Device.cpp
@@ -115,6 +115,10 @@
 
 status_t Camera3Device::initializeCommonLocked() {
 
+    /** Start watchdog thread */
+    mCameraServiceWatchdog = new CameraServiceWatchdog();
+    mCameraServiceWatchdog->run("CameraServiceWatchdog");
+
     /** Start up status tracker thread */
     mStatusTracker = new StatusTracker(this);
     status_t res = mStatusTracker->run(String8::format("C3Dev-%s-Status", mId.string()).string());
@@ -316,7 +320,7 @@
 
         // Call close without internal mutex held, as the HAL close may need to
         // wait on assorted callbacks,etc, to complete before it can return.
-        interface->close();
+        mCameraServiceWatchdog->WATCH(interface->close());
 
         flushInflightRequests();
 
@@ -339,6 +343,12 @@
         }
     }
     ALOGI("%s: X", __FUNCTION__);
+
+    if (mCameraServiceWatchdog != NULL) {
+        mCameraServiceWatchdog->requestExit();
+        mCameraServiceWatchdog.clear();
+    }
+
     return res;
 }
 
@@ -1719,7 +1729,12 @@
         mSessionStatsBuilder.stopCounter();
     }
 
-    return mRequestThread->flush();
+    // Calculate expected duration for flush with additional buffer time in ms for watchdog
+    uint64_t maxExpectedDuration = (getExpectedInFlightDuration() + kBaseGetBufferWait) / 1e6;
+    status_t res = mCameraServiceWatchdog->WATCH_CUSTOM_TIMER(mRequestThread->flush(),
+            maxExpectedDuration / kCycleLengthMs, kCycleLengthMs);
+
+    return res;
 }
 
 status_t Camera3Device::prepare(int streamId) {
diff --git a/services/camera/libcameraservice/device3/Camera3Device.h b/services/camera/libcameraservice/device3/Camera3Device.h
index 749b342..748d063 100644
--- a/services/camera/libcameraservice/device3/Camera3Device.h
+++ b/services/camera/libcameraservice/device3/Camera3Device.h
@@ -32,6 +32,7 @@
 #include <camera/CaptureResult.h>
 
 #include "android/hardware/camera/metadata/3.8/types.h"
+#include "CameraServiceWatchdog.h"
 #include "common/CameraDeviceBase.h"
 #include "device3/BufferUtils.h"
 #include "device3/StatusTracker.h"
@@ -98,6 +99,9 @@
 
     metadata_vendor_id_t getVendorTagId() const override { return mVendorTagId; }
 
+    // Watchdog thread
+    sp<CameraServiceWatchdog> mCameraServiceWatchdog;
+
     // Transitions to idle state on success.
     virtual status_t initialize(sp<CameraProviderManager> /*manager*/,
             const String8& /*monitorTags*/) = 0;