TimeCheck: check audio_mutex activity
Add mutex wait chain information and callstacks
to TimeCheck log.
Test: induced wait in PlaybackThread
Bug: 309479717
Change-Id: I4b93b31b23dc13af0d386bc19bb2fcc329b8d2f6
diff --git a/media/utils/TimeCheck.cpp b/media/utils/TimeCheck.cpp
index a5378e6..ec68de7 100644
--- a/media/utils/TimeCheck.cpp
+++ b/media/utils/TimeCheck.cpp
@@ -287,6 +287,11 @@
.append(halPids).append("\n")
.append(snapshotAnalysis.toString());
+ // In many cases, the initial timeout stack differs from the abort backtrace because
+ // (1) the time difference between initial timeout and the final abort signal
+ // and (2) signalling the HAL audio service may cause
+ // the thread to unblock and continue.
+
// Note: LOG_ALWAYS_FATAL limits the size of the string - per log/log.h:
// Log message text may be truncated to less than an
// implementation-specific limit (1023 bytes).
diff --git a/media/utils/TimerThread.cpp b/media/utils/TimerThread.cpp
index 3966103..ad27af3 100644
--- a/media/utils/TimerThread.cpp
+++ b/media/utils/TimerThread.cpp
@@ -21,6 +21,7 @@
#include <unistd.h>
#include <vector>
+#include <audio_utils/mutex.h>
#include <mediautils/MediaUtilsDelayed.h>
#include <mediautils/TidWrapper.h>
#include <mediautils/TimerThread.h>
@@ -59,14 +60,14 @@
return true;
}
-
-std::string TimerThread::SnapshotAnalysis::toString() const {
+std::string TimerThread::SnapshotAnalysis::toString(bool showTimeoutStack) const {
// Note: These request queues are snapshot very close together but
// not at "identical" times as we don't use a class-wide lock.
std::string analysisSummary = std::string("\nanalysis [ ").append(description).append(" ]");
std::string timeoutStack;
std::string blockedStack;
- if (timeoutTid != -1) {
+ std::string mutexWaitChainStack;
+ if (showTimeoutStack && timeoutTid != -1) {
timeoutStack = std::string(suspectTid == timeoutTid ? "\ntimeout/blocked(" : "\ntimeout(")
.append(std::to_string(timeoutTid)).append(") callstack [\n")
.append(getCallStackStringForTid(timeoutTid)).append("]");
@@ -78,6 +79,29 @@
.append(getCallStackStringForTid(suspectTid)).append("]");
}
+ if (!mutexWaitChain.empty()) {
+ mutexWaitChainStack.append("\nmutex wait chain [\n");
+ // the wait chain omits the initial timeout tid (which is good as we don't
+ // need to suppress it).
+ for (size_t i = 0; i < mutexWaitChain.size(); ++i) {
+ const auto& [tid, name] = mutexWaitChain[i];
+ mutexWaitChainStack.append("{ tid: ").append(std::to_string(tid))
+ .append(" (holding ").append(name).append(")");
+ if (tid == timeoutTid) {
+ mutexWaitChainStack.append(" TIMEOUT_STACK }\n");
+ } else if (tid == suspectTid) {
+ mutexWaitChainStack.append(" BLOCKED_STACK }\n");
+ } else if (hasMutexCycle && i == mutexWaitChain.size() - 1) {
+ // for a cycle, the last pid in the chain is repeated.
+ mutexWaitChainStack.append(" CYCLE_STACK }\n");
+ } else {
+ mutexWaitChainStack.append(" callstack [\n")
+ .append(getCallStackStringForTid(tid)).append("] }\n");
+ }
+ }
+ mutexWaitChainStack.append("]");
+ }
+
return std::string("now ")
.append(formatTime(std::chrono::system_clock::now()))
.append("\nsecondChanceCount ")
@@ -91,7 +115,8 @@
.append(requestsToString(retiredRequests))
.append(" ]")
.append(timeoutStack)
- .append(blockedStack);
+ .append(blockedStack)
+ .append(mutexWaitChainStack);
}
// A HAL method is where the substring "Hidl" is in the class name.
@@ -121,13 +146,33 @@
analysis.pendingRequests = getPendingRequests();
analysis.secondChanceCount = mMonitorThread.getSecondChanceCount();
// No call has timed out, so there is no analysis to be done.
- if (analysis.timeoutRequests.empty())
+ if (analysis.timeoutRequests.empty()) {
return analysis;
+ }
+
// for now look at last timeout (in our case, the only timeout)
const std::shared_ptr<const Request> timeout = analysis.timeoutRequests.back();
analysis.timeoutTid = timeout->tid;
- if (analysis.pendingRequests.empty())
- return analysis;
+
+ std::string& description = analysis.description;
+
+ // audio mutex specific wait chain analysis
+ auto deadlockInfo = audio_utils::mutex::deadlock_detection(analysis.timeoutTid);
+ ALOGD("%s: deadlockInfo: %s", __func__, deadlockInfo.to_string().c_str());
+
+ if (!deadlockInfo.empty()) {
+ if (!description.empty()) description.append("\n");
+ description.append(deadlockInfo.to_string());
+ }
+
+ analysis.hasMutexCycle = deadlockInfo.has_cycle;
+ analysis.mutexWaitChain = std::move(deadlockInfo.chain);
+
+ // no pending requests, we don't attempt to use temporal correlation between a recent call.
+ if (analysis.pendingRequests.empty()) {
+ return analysis;
+ }
+
// pending Requests that are problematic.
std::vector<std::shared_ptr<const Request>> pendingExact;
std::vector<std::shared_ptr<const Request>> pendingPossible;
@@ -151,15 +196,15 @@
}
}
- std::string& description = analysis.description;
if (!pendingExact.empty()) {
const auto& request = pendingExact.front();
const bool hal = isRequestFromHal(request);
if (hal) {
- description = std::string("Blocked directly due to HAL call: ")
+ if (!description.empty()) description.append("\n");
+ description.append("Blocked directly due to HAL call: ")
.append(request->toString());
- analysis.suspectTid= request->tid;
+ analysis.suspectTid = request->tid;
}
}
if (description.empty() && !pendingPossible.empty()) {
diff --git a/media/utils/include/mediautils/TimerThread.h b/media/utils/include/mediautils/TimerThread.h
index d84d682..320567b 100644
--- a/media/utils/include/mediautils/TimerThread.h
+++ b/media/utils/include/mediautils/TimerThread.h
@@ -269,9 +269,12 @@
std::vector<std::shared_ptr<const Request>> timeoutRequests;
// List of retired requests
std::vector<std::shared_ptr<const Request>> retiredRequests;
+ // mutex deadlock / wait detection information.
+ bool hasMutexCycle = false;
+ std::vector<std::pair<pid_t, std::string>> mutexWaitChain;
// Dumps the information contained above as well as additional call
// stacks where applicable.
- std::string toString() const;
+ std::string toString(bool showTimeoutStack = true) const;
};
private: