TimeCheck: check audio_mutex activity

Add mutex wait chain information and callstacks
to TimeCheck log.

Test: induced wait in PlaybackThread
Bug: 309479717
Change-Id: I4b93b31b23dc13af0d386bc19bb2fcc329b8d2f6
diff --git a/media/utils/TimeCheck.cpp b/media/utils/TimeCheck.cpp
index a5378e6..ec68de7 100644
--- a/media/utils/TimeCheck.cpp
+++ b/media/utils/TimeCheck.cpp
@@ -287,6 +287,11 @@
             .append(halPids).append("\n")
             .append(snapshotAnalysis.toString());
 
+    // In many cases, the initial timeout stack differs from the abort backtrace because
+    // (1) the time difference between initial timeout and the final abort signal
+    // and (2) signalling the HAL audio service may cause
+    // the thread to unblock and continue.
+
     // Note: LOG_ALWAYS_FATAL limits the size of the string - per log/log.h:
     // Log message text may be truncated to less than an
     // implementation-specific limit (1023 bytes).
diff --git a/media/utils/TimerThread.cpp b/media/utils/TimerThread.cpp
index 3966103..ad27af3 100644
--- a/media/utils/TimerThread.cpp
+++ b/media/utils/TimerThread.cpp
@@ -21,6 +21,7 @@
 #include <unistd.h>
 #include <vector>
 
+#include <audio_utils/mutex.h>
 #include <mediautils/MediaUtilsDelayed.h>
 #include <mediautils/TidWrapper.h>
 #include <mediautils/TimerThread.h>
@@ -59,14 +60,14 @@
     return true;
 }
 
-
-std::string TimerThread::SnapshotAnalysis::toString() const {
+std::string TimerThread::SnapshotAnalysis::toString(bool showTimeoutStack) const {
     // Note: These request queues are snapshot very close together but
     // not at "identical" times as we don't use a class-wide lock.
     std::string analysisSummary = std::string("\nanalysis [ ").append(description).append(" ]");
     std::string timeoutStack;
     std::string blockedStack;
-    if (timeoutTid != -1) {
+    std::string mutexWaitChainStack;
+    if (showTimeoutStack && timeoutTid != -1) {
         timeoutStack = std::string(suspectTid == timeoutTid ? "\ntimeout/blocked(" : "\ntimeout(")
                 .append(std::to_string(timeoutTid)).append(") callstack [\n")
                 .append(getCallStackStringForTid(timeoutTid)).append("]");
@@ -78,6 +79,29 @@
                 .append(getCallStackStringForTid(suspectTid)).append("]");
     }
 
+    if (!mutexWaitChain.empty()) {
+        mutexWaitChainStack.append("\nmutex wait chain [\n");
+        // the wait chain omits the initial timeout tid (which is good as we don't
+        // need to suppress it).
+        for (size_t i = 0; i < mutexWaitChain.size(); ++i) {
+            const auto& [tid, name] = mutexWaitChain[i];
+            mutexWaitChainStack.append("{ tid: ").append(std::to_string(tid))
+                    .append(" (holding ").append(name).append(")");
+            if (tid == timeoutTid) {
+                mutexWaitChainStack.append(" TIMEOUT_STACK }\n");
+            } else if (tid == suspectTid) {
+                mutexWaitChainStack.append(" BLOCKED_STACK }\n");
+            } else if (hasMutexCycle && i == mutexWaitChain.size() - 1) {
+                // for a cycle, the last pid in the chain is repeated.
+                mutexWaitChainStack.append(" CYCLE_STACK }\n");
+            } else {
+                mutexWaitChainStack.append(" callstack [\n")
+                        .append(getCallStackStringForTid(tid)).append("] }\n");
+            }
+        }
+        mutexWaitChainStack.append("]");
+    }
+
     return std::string("now ")
             .append(formatTime(std::chrono::system_clock::now()))
             .append("\nsecondChanceCount ")
@@ -91,7 +115,8 @@
             .append(requestsToString(retiredRequests))
             .append(" ]")
             .append(timeoutStack)
-            .append(blockedStack);
+            .append(blockedStack)
+            .append(mutexWaitChainStack);
 }
 
 // A HAL method is where the substring "Hidl" is in the class name.
@@ -121,13 +146,33 @@
     analysis.pendingRequests = getPendingRequests();
     analysis.secondChanceCount = mMonitorThread.getSecondChanceCount();
     // No call has timed out, so there is no analysis to be done.
-    if (analysis.timeoutRequests.empty())
+    if (analysis.timeoutRequests.empty()) {
         return analysis;
+    }
+
     // for now look at last timeout (in our case, the only timeout)
     const std::shared_ptr<const Request> timeout = analysis.timeoutRequests.back();
     analysis.timeoutTid = timeout->tid;
-    if (analysis.pendingRequests.empty())
-      return analysis;
+
+    std::string& description = analysis.description;
+
+    // audio mutex specific wait chain analysis
+    auto deadlockInfo = audio_utils::mutex::deadlock_detection(analysis.timeoutTid);
+    ALOGD("%s: deadlockInfo: %s", __func__, deadlockInfo.to_string().c_str());
+
+    if (!deadlockInfo.empty()) {
+        if (!description.empty()) description.append("\n");
+        description.append(deadlockInfo.to_string());
+    }
+
+    analysis.hasMutexCycle = deadlockInfo.has_cycle;
+    analysis.mutexWaitChain = std::move(deadlockInfo.chain);
+
+    // no pending requests, we don't attempt to use temporal correlation between a recent call.
+    if (analysis.pendingRequests.empty()) {
+        return analysis;
+    }
+
     // pending Requests that are problematic.
     std::vector<std::shared_ptr<const Request>> pendingExact;
     std::vector<std::shared_ptr<const Request>> pendingPossible;
@@ -151,15 +196,15 @@
         }
     }
 
-    std::string& description = analysis.description;
     if (!pendingExact.empty()) {
         const auto& request = pendingExact.front();
         const bool hal = isRequestFromHal(request);
 
         if (hal) {
-            description = std::string("Blocked directly due to HAL call: ")
+            if (!description.empty()) description.append("\n");
+            description.append("Blocked directly due to HAL call: ")
                 .append(request->toString());
-            analysis.suspectTid= request->tid;
+            analysis.suspectTid = request->tid;
         }
     }
     if (description.empty() && !pendingPossible.empty()) {
diff --git a/media/utils/include/mediautils/TimerThread.h b/media/utils/include/mediautils/TimerThread.h
index d84d682..320567b 100644
--- a/media/utils/include/mediautils/TimerThread.h
+++ b/media/utils/include/mediautils/TimerThread.h
@@ -269,9 +269,12 @@
         std::vector<std::shared_ptr<const Request>> timeoutRequests;
         // List of retired requests
         std::vector<std::shared_ptr<const Request>> retiredRequests;
+        // mutex deadlock / wait detection information.
+        bool hasMutexCycle = false;
+        std::vector<std::pair<pid_t, std::string>> mutexWaitChain;
         // Dumps the information contained above as well as additional call
         // stacks where applicable.
-        std::string toString() const;
+        std::string toString(bool showTimeoutStack = true) const;
     };
 
   private: