blob: efa53f8a481b6bbe718ff1d9612613bf8464eee4 [file] [log] [blame]
Eric Laurent3528c932018-02-23 17:17:22 -08001/*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
Eric Laurent42896a02019-09-27 15:40:33 -070017#define LOG_TAG "TimeCheck"
Eric Laurent3528c932018-02-23 17:17:22 -080018
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -080019#include <optional>
20
Andy Hung10ac7112022-03-28 08:00:40 -070021#include <android-base/logging.h>
Andy Hunga2a1ac32022-03-18 16:12:11 -070022#include <audio_utils/clock.h>
Marco Nelissencf90b492019-09-26 11:20:54 -070023#include <mediautils/EventLog.h>
Andy Hung35f96152022-07-15 15:18:59 -070024#include <mediautils/FixedString.h>
Andy Hung224f82f2022-03-22 00:00:49 -070025#include <mediautils/MethodStatistics.h>
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -080026#include <mediautils/TimeCheck.h>
Atneya Nairf5b68512022-05-23 20:02:49 -040027#include <mediautils/TidWrapper.h>
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -080028#include <utils/Log.h>
Atneya Nairf5b68512022-05-23 20:02:49 -040029
30#if defined(__BIONIC__)
Eric Laurent42896a02019-09-27 15:40:33 -070031#include "debuggerd/handler.h"
Atneya Nairf5b68512022-05-23 20:02:49 -040032#endif
33
34// This function appropriately signals a pid to dump a backtrace if we are
35// running on device.
36static inline void signalAudioHAL([[maybe_unused]] pid_t pid) {
37#if defined(__BIONIC__)
38 sigqueue(pid, DEBUGGER_SIGNAL, {.sival_int = 0});
39#endif
40}
Eric Laurent3528c932018-02-23 17:17:22 -080041
Andy Hung5c6d68a2022-03-09 21:54:59 -080042namespace android::mediautils {
Eric Laurent3528c932018-02-23 17:17:22 -080043
Andy Hunga2a1ac32022-03-18 16:12:11 -070044/**
45 * Returns the std::string "HH:MM:SS.MSc" from a system_clock time_point.
46 */
Ytai Ben-Tsvi34f26b12021-12-02 13:58:38 -080047std::string formatTime(std::chrono::system_clock::time_point t) {
Andy Hunga2a1ac32022-03-18 16:12:11 -070048 auto time_string = audio_utils_time_string_from_ns(
49 std::chrono::nanoseconds(t.time_since_epoch()).count());
50
51 // The time string is 19 characters (including null termination).
52 // Example: "03-27 16:47:06.187"
53 // MM DD HH MM SS MS
54 // We offset by 6 to get HH:MM:SS.MSc
55 //
56 return time_string.time + 6; // offset to remove month/day.
Ytai Ben-Tsvi34f26b12021-12-02 13:58:38 -080057}
58
Andy Hunga2a1ac32022-03-18 16:12:11 -070059/**
60 * Finds the end of the common time prefix.
61 *
62 * This is as an option to remove the common time prefix to avoid
63 * unnecessary duplicated strings.
64 *
65 * \param time1 a time string
66 * \param time2 a time string
67 * \return the position where the common time prefix ends. For abbreviated
68 * printing of time2, offset the character pointer by this position.
69 */
70static size_t commonTimePrefixPosition(std::string_view time1, std::string_view time2) {
71 const size_t endPos = std::min(time1.size(), time2.size());
72 size_t i;
73
74 // Find location of the first mismatch between strings
75 for (i = 0; ; ++i) {
76 if (i == endPos) {
77 return i; // strings match completely to the length of one of the strings.
78 }
79 if (time1[i] != time2[i]) {
80 break;
81 }
82 if (time1[i] == '\0') {
83 return i; // "printed" strings match completely. No need to check further.
84 }
85 }
86
87 // Go backwards until we find a delimeter or space.
88 for (; i > 0
89 && isdigit(time1[i]) // still a number
90 && time1[i - 1] != ' '
91 ; --i) {
92 }
93 return i;
94}
95
96/**
97 * Returns the unique suffix of time2 that isn't present in time1.
98 *
99 * If time2 is identical to time1, then an empty string_view is returned.
100 * This method is used to elide the common prefix when printing times.
101 */
102std::string_view timeSuffix(std::string_view time1, std::string_view time2) {
103 const size_t pos = commonTimePrefixPosition(time1, time2);
104 return time2.substr(pos);
105}
Ytai Ben-Tsvi34f26b12021-12-02 13:58:38 -0800106
Eric Laurent42896a02019-09-27 15:40:33 -0700107// Audio HAL server pids vector used to generate audio HAL processes tombstone
108// when audioserver watchdog triggers.
109// We use a lockless storage to avoid potential deadlocks in the context of watchdog
110// trigger.
111// Protection again simultaneous writes is not needed given one update takes place
112// during AudioFlinger construction and other comes necessarily later once the IAudioFlinger
113// interface is available.
114// The use of an atomic index just guaranties that current vector is fully initialized
115// when read.
116/* static */
117void TimeCheck::accessAudioHalPids(std::vector<pid_t>* pids, bool update) {
118 static constexpr int kNumAudioHalPidsVectors = 3;
119 static std::vector<pid_t> audioHalPids[kNumAudioHalPidsVectors];
Andy Hung5c6d68a2022-03-09 21:54:59 -0800120 static std::atomic<unsigned> curAudioHalPids = 0;
Eric Laurent42896a02019-09-27 15:40:33 -0700121
122 if (update) {
Eric Laurent1ad278b2021-03-05 18:09:01 +0100123 audioHalPids[(curAudioHalPids++ + 1) % kNumAudioHalPidsVectors] = *pids;
Eric Laurent42896a02019-09-27 15:40:33 -0700124 } else {
Eric Laurent1ad278b2021-03-05 18:09:01 +0100125 *pids = audioHalPids[curAudioHalPids % kNumAudioHalPidsVectors];
Eric Laurent42896a02019-09-27 15:40:33 -0700126 }
127}
128
129/* static */
130void TimeCheck::setAudioHalPids(const std::vector<pid_t>& pids) {
131 accessAudioHalPids(&(const_cast<std::vector<pid_t>&>(pids)), true);
132}
133
134/* static */
135std::vector<pid_t> TimeCheck::getAudioHalPids() {
136 std::vector<pid_t> pids;
137 accessAudioHalPids(&pids, false);
138 return pids;
139}
140
Eric Laurent3528c932018-02-23 17:17:22 -0800141/* static */
Andy Hung5c6d68a2022-03-09 21:54:59 -0800142TimerThread& TimeCheck::getTimeCheckThread() {
143 static TimerThread sTimeCheckThread{};
Eric Laurent3528c932018-02-23 17:17:22 -0800144 return sTimeCheckThread;
145}
146
Andy Hunga2a1ac32022-03-18 16:12:11 -0700147/* static */
148std::string TimeCheck::toString() {
149 // note pending and retired are individually locked for maximum concurrency,
150 // snapshot is not instantaneous at a single time.
151 return getTimeCheckThread().toString();
152}
153
Andy Hungf8ab0932022-06-13 19:49:43 -0700154TimeCheck::TimeCheck(std::string_view tag, OnTimerFunc&& onTimer, Duration requestedTimeoutDuration,
155 Duration secondChanceDuration, bool crashOnTimeout)
Andy Hung35f96152022-07-15 15:18:59 -0700156 : mTimeCheckHandler{ std::make_shared<TimeCheckHandler>(
Andy Hungf8ab0932022-06-13 19:49:43 -0700157 tag, std::move(onTimer), crashOnTimeout, requestedTimeoutDuration,
Atneya Nairf5b68512022-05-23 20:02:49 -0400158 secondChanceDuration, std::chrono::system_clock::now(), getThreadIdWrapper()) }
Andy Hungf8ab0932022-06-13 19:49:43 -0700159 , mTimerHandle(requestedTimeoutDuration.count() == 0
160 /* for TimeCheck we don't consider a non-zero secondChanceDuration here */
Andy Hunga2a1ac32022-03-18 16:12:11 -0700161 ? getTimeCheckThread().trackTask(mTimeCheckHandler->tag)
162 : getTimeCheckThread().scheduleTask(
163 mTimeCheckHandler->tag,
164 // Pass in all the arguments by value to this task for safety.
165 // The thread could call the callback before the constructor is finished.
166 // The destructor is not blocked on callback.
Andy Hung2aa15102022-06-13 19:49:43 -0700167 [ timeCheckHandler = mTimeCheckHandler ](TimerThread::Handle timerHandle) {
168 timeCheckHandler->onTimeout(timerHandle);
Andy Hunga2a1ac32022-03-18 16:12:11 -0700169 },
Andy Hungf8ab0932022-06-13 19:49:43 -0700170 requestedTimeoutDuration,
171 secondChanceDuration)) {}
Eric Laurent3528c932018-02-23 17:17:22 -0800172
173TimeCheck::~TimeCheck() {
Andy Hunga2a1ac32022-03-18 16:12:11 -0700174 if (mTimeCheckHandler) {
175 mTimeCheckHandler->onCancel(mTimerHandle);
176 }
Eric Laurent3528c932018-02-23 17:17:22 -0800177}
178
Andy Hung2aa15102022-06-13 19:49:43 -0700179/* static */
180std::string TimeCheck::analyzeTimeouts(
181 float requestedTimeoutMs, float elapsedSteadyMs, float elapsedSystemMs) {
182 // Track any OS clock issues with suspend.
183 // It is possible that the elapsedSystemMs is much greater than elapsedSteadyMs if
184 // a suspend occurs; however, we always expect the timeout ms should always be slightly
185 // less than the elapsed steady ms regardless of whether a suspend occurs or not.
186
187 std::string s("Timeout ms ");
188 s.append(std::to_string(requestedTimeoutMs))
189 .append(" elapsed steady ms ").append(std::to_string(elapsedSteadyMs))
190 .append(" elapsed system ms ").append(std::to_string(elapsedSystemMs));
191
192 // Is there something unusual?
193 static constexpr float TOLERANCE_CONTEXT_SWITCH_MS = 200.f;
194
195 if (requestedTimeoutMs > elapsedSteadyMs || requestedTimeoutMs > elapsedSystemMs) {
196 s.append("\nError: early expiration - "
197 "requestedTimeoutMs should be less than elapsed time");
198 }
199
200 if (elapsedSteadyMs > elapsedSystemMs + TOLERANCE_CONTEXT_SWITCH_MS) {
201 s.append("\nWarning: steady time should not advance faster than system time");
202 }
203
204 // This has been found in suspend stress testing.
205 if (elapsedSteadyMs > requestedTimeoutMs + TOLERANCE_CONTEXT_SWITCH_MS) {
206 s.append("\nWarning: steady time significantly exceeds timeout "
207 "- possible thread stall or aborted suspend");
208 }
209
210 // This has been found in suspend stress testing.
211 if (elapsedSystemMs > requestedTimeoutMs + TOLERANCE_CONTEXT_SWITCH_MS) {
212 s.append("\nInformation: system time significantly exceeds timeout "
213 "- possible suspend");
214 }
215 return s;
216}
217
218// To avoid any potential race conditions, the timer handle
219// (expiration = clock steady start + timeout) is passed into the callback.
Andy Hung5c6d68a2022-03-09 21:54:59 -0800220void TimeCheck::TimeCheckHandler::onCancel(TimerThread::Handle timerHandle) const
221{
222 if (TimeCheck::getTimeCheckThread().cancelTask(timerHandle) && onTimer) {
Andy Hung2aa15102022-06-13 19:49:43 -0700223 const std::chrono::steady_clock::time_point endSteadyTime =
224 std::chrono::steady_clock::now();
225 const float elapsedSteadyMs = std::chrono::duration_cast<FloatMs>(
226 endSteadyTime - timerHandle + timeoutDuration).count();
227 // send the elapsed steady time for statistics.
228 onTimer(false /* timeout */, elapsedSteadyMs);
Andy Hung5c6d68a2022-03-09 21:54:59 -0800229 }
230}
231
Andy Hung2aa15102022-06-13 19:49:43 -0700232// To avoid any potential race conditions, the timer handle
233// (expiration = clock steady start + timeout) is passed into the callback.
234void TimeCheck::TimeCheckHandler::onTimeout(TimerThread::Handle timerHandle) const
Andy Hung5c6d68a2022-03-09 21:54:59 -0800235{
Andy Hung2aa15102022-06-13 19:49:43 -0700236 const std::chrono::steady_clock::time_point endSteadyTime = std::chrono::steady_clock::now();
237 const std::chrono::system_clock::time_point endSystemTime = std::chrono::system_clock::now();
238 // timerHandle incorporates the timeout
239 const float elapsedSteadyMs = std::chrono::duration_cast<FloatMs>(
240 endSteadyTime - (timerHandle - timeoutDuration)).count();
241 const float elapsedSystemMs = std::chrono::duration_cast<FloatMs>(
242 endSystemTime - startSystemTime).count();
243 const float requestedTimeoutMs = std::chrono::duration_cast<FloatMs>(
244 timeoutDuration).count();
Andy Hungf8ab0932022-06-13 19:49:43 -0700245 const float secondChanceMs = std::chrono::duration_cast<FloatMs>(
246 secondChanceDuration).count();
Andy Hung2aa15102022-06-13 19:49:43 -0700247
Andy Hung5c6d68a2022-03-09 21:54:59 -0800248 if (onTimer) {
Andy Hung2aa15102022-06-13 19:49:43 -0700249 onTimer(true /* timeout */, elapsedSteadyMs);
Andy Hung5c6d68a2022-03-09 21:54:59 -0800250 }
251
252 if (!crashOnTimeout) return;
Ytai Ben-Tsvi34f26b12021-12-02 13:58:38 -0800253
Andy Hunga2a1ac32022-03-18 16:12:11 -0700254 // Generate the TimerThread summary string early before sending signals to the
255 // HAL processes which can affect thread behavior.
256 const std::string summary = getTimeCheckThread().toString(4 /* retiredCount */);
257
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -0800258 // Generate audio HAL processes tombstones and allow time to complete
259 // before forcing restart
Andy Hung5c6d68a2022-03-09 21:54:59 -0800260 std::vector<pid_t> pids = TimeCheck::getAudioHalPids();
Jaideep Sharmaeaf9d9c2022-05-13 15:15:27 +0530261 std::string halPids = "HAL pids [ ";
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -0800262 if (pids.size() != 0) {
263 for (const auto& pid : pids) {
264 ALOGI("requesting tombstone for pid: %d", pid);
Jaideep Sharmaeaf9d9c2022-05-13 15:15:27 +0530265 halPids.append(std::to_string(pid)).append(" ");
Atneya Nairf5b68512022-05-23 20:02:49 -0400266 signalAudioHAL(pid);
Eric Laurent3528c932018-02-23 17:17:22 -0800267 }
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -0800268 sleep(1);
269 } else {
270 ALOGI("No HAL process pid available, skipping tombstones");
Eric Laurent39b09b52018-06-29 12:24:40 -0700271 }
Jaideep Sharmaeaf9d9c2022-05-13 15:15:27 +0530272 halPids.append("]");
Andy Hungf45f34c2022-03-25 13:09:03 -0700273
Andy Hung5c6d68a2022-03-09 21:54:59 -0800274 LOG_EVENT_STRING(LOGTAG_AUDIO_BINDER_TIMEOUT, tag.c_str());
Andy Hung10ac7112022-03-28 08:00:40 -0700275
276 // Create abort message string - caution: this can be very large.
277 const std::string abortMessage = std::string("TimeCheck timeout for ")
278 .append(tag)
Andy Hung2aa15102022-06-13 19:49:43 -0700279 .append(" scheduled ").append(formatTime(startSystemTime))
Andy Hung10ac7112022-03-28 08:00:40 -0700280 .append(" on thread ").append(std::to_string(tid)).append("\n")
Andy Hungf8ab0932022-06-13 19:49:43 -0700281 .append(analyzeTimeouts(requestedTimeoutMs + secondChanceMs,
282 elapsedSteadyMs, elapsedSystemMs)).append("\n")
Jaideep Sharmaeaf9d9c2022-05-13 15:15:27 +0530283 .append(halPids).append("\n")
Andy Hung10ac7112022-03-28 08:00:40 -0700284 .append(summary);
285
286 // Note: LOG_ALWAYS_FATAL limits the size of the string - per log/log.h:
287 // Log message text may be truncated to less than an
288 // implementation-specific limit (1023 bytes).
289 //
290 // Here, we send the string through android-base/logging.h LOG()
291 // to avoid the size limitation. LOG(FATAL) does an abort whereas
292 // LOG(FATAL_WITHOUT_ABORT) does not abort.
293
294 LOG(FATAL) << abortMessage;
Eric Laurent3528c932018-02-23 17:17:22 -0800295}
296
Andy Hung224f82f2022-03-22 00:00:49 -0700297// Automatically create a TimeCheck class for a class and method.
298// This is used for Audio HIDL support.
299mediautils::TimeCheck makeTimeCheckStatsForClassMethod(
300 std::string_view className, std::string_view methodName) {
301 std::shared_ptr<MethodStatistics<std::string>> statistics =
302 mediautils::getStatisticsForClass(className);
303 if (!statistics) return {}; // empty TimeCheck.
304 return mediautils::TimeCheck(
Andy Hung35f96152022-07-15 15:18:59 -0700305 FixedString62(className).append("::").append(methodName),
306 [ safeMethodName = FixedString30(methodName),
Andy Hung224f82f2022-03-22 00:00:49 -0700307 stats = std::move(statistics) ]
308 (bool timeout, float elapsedMs) {
309 if (timeout) {
310 ; // ignored, there is no timeout value.
311 } else {
Andy Hung35f96152022-07-15 15:18:59 -0700312 stats->event(safeMethodName.asStringView(), elapsedMs);
Andy Hung224f82f2022-03-22 00:00:49 -0700313 }
Andy Hungf8ab0932022-06-13 19:49:43 -0700314 }, {} /* timeoutDuration */, {} /* secondChanceDuration */, false /* crashOnTimeout */);
Andy Hung224f82f2022-03-22 00:00:49 -0700315}
316
Andy Hung5c6d68a2022-03-09 21:54:59 -0800317} // namespace android::mediautils