TimerThread: Add timeout analysis logic
Automatically dumps TimeCheck analysis if
there is an actionable blocked binder call.
Test: dedicated "crasher" test, check logcat
Test: adb shell dumpsys media.audio_flinger
Bug: 219958414
Change-Id: Ie21ef8940404fa7174938277bbf9c7aab2817ef7
diff --git a/media/utils/TimeCheck.cpp b/media/utils/TimeCheck.cpp
index d0dc9e1..3e35970 100644
--- a/media/utils/TimeCheck.cpp
+++ b/media/utils/TimeCheck.cpp
@@ -197,6 +197,7 @@
} else {
ALOGI("No HAL process pid available, skipping tombstones");
}
+
LOG_EVENT_STRING(LOGTAG_AUDIO_BINDER_TIMEOUT, tag.c_str());
LOG_ALWAYS_FATAL("TimeCheck timeout for %s scheduled %s on thread %d\n%s",
tag.c_str(), formatTime(startTime).c_str(), tid, summary.c_str());
diff --git a/media/utils/TimerThread.cpp b/media/utils/TimerThread.cpp
index 3bc4549..b82816f 100644
--- a/media/utils/TimerThread.cpp
+++ b/media/utils/TimerThread.cpp
@@ -53,17 +53,112 @@
}
std::string TimerThread::toString(size_t retiredCount) const {
+ // Note: These request queues are snapshot very close together but
+ // not at "identical" times as we don't use a class-wide lock.
+
+ std::vector<std::shared_ptr<const Request>> timeoutRequests;
+ std::vector<std::shared_ptr<const Request>> retiredRequests;
+ mTimeoutQueue.copyRequests(timeoutRequests);
+ mRetiredQueue.copyRequests(retiredRequests, retiredCount);
+ std::vector<std::shared_ptr<const Request>> pendingRequests =
+ getPendingRequests();
+
+ struct Analysis analysis = analyzeTimeout(timeoutRequests, pendingRequests);
+ std::string analysisSummary;
+ if (!analysis.summary.empty()) {
+ analysisSummary = std::string("\nanalysis [ ").append(analysis.summary).append(" ]");
+ }
return std::string("now ")
.append(formatTime(std::chrono::system_clock::now()))
- .append("\npending [ ")
- .append(pendingToString())
- .append(" ]\ntimeout [ ")
- .append(timeoutToString())
+ .append(analysisSummary)
+ .append("\ntimeout [ ")
+ .append(requestsToString(timeoutRequests))
+ .append(" ]\npending [ ")
+ .append(requestsToString(pendingRequests))
.append(" ]\nretired [ ")
- .append(retiredToString(retiredCount))
+ .append(requestsToString(retiredRequests))
.append(" ]");
}
+// A HAL method is where the substring "Hidl" is in the class name.
+// The tag should look like: ... Hidl ... :: ...
+// When the audio HAL is updated to AIDL perhaps we will use instead
+// a global directory of HAL classes.
+//
+// See MethodStatistics.cpp:
+// mediautils::getStatisticsClassesForModule(METHOD_STATISTICS_MODULE_NAME_AUDIO_HIDL)
+//
+/* static */
+bool TimerThread::isRequestFromHal(const std::shared_ptr<const Request>& request) {
+ const size_t hidlPos = request->tag.find("Hidl");
+ if (hidlPos == std::string::npos) return false;
+ // should be a separator afterwards Hidl which indicates the string was in the class.
+ const size_t separatorPos = request->tag.find("::", hidlPos);
+ return separatorPos != std::string::npos;
+}
+
+/* static */
+struct TimerThread::Analysis TimerThread::analyzeTimeout(
+ const std::vector<std::shared_ptr<const Request>>& timeoutRequests,
+ const std::vector<std::shared_ptr<const Request>>& pendingRequests) {
+
+ if (timeoutRequests.empty() || pendingRequests.empty()) return {}; // nothing to say.
+
+ // for now look at last timeout (in our case, the only timeout)
+ const std::shared_ptr<const Request> timeout = timeoutRequests.back();
+
+ // pending Requests that are problematic.
+ std::vector<std::shared_ptr<const Request>> pendingExact;
+ std::vector<std::shared_ptr<const Request>> pendingPossible;
+
+ // We look at pending requests that were scheduled no later than kDuration
+ // after the timeout request. This prevents false matches with calls
+ // that naturally block for a short period of time
+ // such as HAL write() and read().
+ //
+ auto constexpr kDuration = std::chrono::milliseconds(1000);
+ for (const auto& pending : pendingRequests) {
+ // If the pending tid is the same as timeout tid, problem identified.
+ if (pending->tid == timeout->tid) {
+ pendingExact.emplace_back(pending);
+ continue;
+ }
+
+ // if the pending tid is scheduled within time limit
+ if (pending->scheduled - timeout->scheduled < kDuration) {
+ pendingPossible.emplace_back(pending);
+ }
+ }
+
+ struct Analysis analysis{};
+
+ analysis.timeoutTid = timeout->tid;
+ std::string& summary = analysis.summary;
+ if (!pendingExact.empty()) {
+ const auto& request = pendingExact.front();
+ const bool hal = isRequestFromHal(request);
+
+ if (hal) {
+ summary = std::string("Blocked directly due to HAL call: ")
+ .append(request->toString());
+ }
+ }
+ if (summary.empty() && !pendingPossible.empty()) {
+ for (const auto& request : pendingPossible) {
+ const bool hal = isRequestFromHal(request);
+ if (hal) {
+ // The first blocked call is the most likely one.
+ // Recent calls might be temporarily blocked
+ // calls such as write() or read() depending on kDuration.
+ summary = std::string("Blocked possibly due to HAL call: ")
+ .append(request->toString());
+ analysis.HALBlockedTid = request->tid;
+ }
+ }
+ }
+ return analysis;
+}
+
std::vector<std::shared_ptr<const TimerThread::Request>> TimerThread::getPendingRequests() const {
constexpr size_t kEstimatedPendingRequests = 8; // approx 128 byte alloc.
std::vector<std::shared_ptr<const Request>> pendingRequests;
diff --git a/media/utils/include/mediautils/TimerThread.h b/media/utils/include/mediautils/TimerThread.h
index 2dfe499..ffee602 100644
--- a/media/utils/include/mediautils/TimerThread.h
+++ b/media/utils/include/mediautils/TimerThread.h
@@ -212,6 +212,36 @@
void copyRequests(std::vector<std::shared_ptr<const Request>>& requests) const;
};
+ // Analysis contains info deduced by analysisTimeout().
+ //
+ // Summary is the result string from checking timeoutRequests to see if
+ // any might be caused by blocked calls in pendingRequests.
+ //
+ // Summary string is empty if there is no automatic actionable info.
+ //
+ // timeoutTid is the tid selected from timeoutRequests (if any).
+ //
+ // HALBlockedTid is the tid that is blocked from pendingRequests believed
+ // to cause the timeout.
+ // HALBlockedTid may be INVALID_PID if no suspected tid is found,
+ // and if HALBlockedTid is valid, it will not be the same as timeoutTid.
+ //
+ static constexpr pid_t INVALID_PID = -1;
+ struct Analysis {
+ std::string summary;
+ pid_t timeoutTid = INVALID_PID;
+ pid_t HALBlockedTid = INVALID_PID;
+ };
+
+ // A HAL method is where the substring "Hidl" is in the class name.
+ // The tag should look like: ... Hidl ... :: ...
+ static bool isRequestFromHal(const std::shared_ptr<const Request>& request);
+
+ // Returns analysis from the requests.
+ static Analysis analyzeTimeout(
+ const std::vector<std::shared_ptr<const Request>>& timeoutRequests,
+ const std::vector<std::shared_ptr<const Request>>& pendingRequests);
+
std::vector<std::shared_ptr<const Request>> getPendingRequests() const;
// A no-timeout request is represented by a handles at the end of steady_clock time,