Eric Laurent | 3528c93 | 2018-02-23 17:17:22 -0800 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2018 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
Atneya Nair | cce1420 | 2022-09-05 20:17:50 -0700 | [diff] [blame] | 17 | #include <csignal> |
| 18 | #include "mediautils/TimerThread.h" |
Eric Laurent | 42896a0 | 2019-09-27 15:40:33 -0700 | [diff] [blame] | 19 | #define LOG_TAG "TimeCheck" |
Eric Laurent | 3528c93 | 2018-02-23 17:17:22 -0800 | [diff] [blame] | 20 | |
Ytai Ben-Tsvi | 1ea62c9 | 2021-11-10 14:38:27 -0800 | [diff] [blame] | 21 | #include <optional> |
| 22 | |
Andy Hung | 10ac711 | 2022-03-28 08:00:40 -0700 | [diff] [blame] | 23 | #include <android-base/logging.h> |
Atneya Nair | cce1420 | 2022-09-05 20:17:50 -0700 | [diff] [blame] | 24 | #include <android-base/strings.h> |
Andy Hung | a2a1ac3 | 2022-03-18 16:12:11 -0700 | [diff] [blame] | 25 | #include <audio_utils/clock.h> |
Andy Hung | cb89910 | 2024-08-06 15:17:16 -0700 | [diff] [blame] | 26 | #include <cutils/properties.h> |
Marco Nelissen | cf90b49 | 2019-09-26 11:20:54 -0700 | [diff] [blame] | 27 | #include <mediautils/EventLog.h> |
Andy Hung | 35f9615 | 2022-07-15 15:18:59 -0700 | [diff] [blame] | 28 | #include <mediautils/FixedString.h> |
Andy Hung | 224f82f | 2022-03-22 00:00:49 -0700 | [diff] [blame] | 29 | #include <mediautils/MethodStatistics.h> |
Ytai Ben-Tsvi | 1ea62c9 | 2021-11-10 14:38:27 -0800 | [diff] [blame] | 30 | #include <mediautils/TimeCheck.h> |
Atneya Nair | f5b6851 | 2022-05-23 20:02:49 -0400 | [diff] [blame] | 31 | #include <mediautils/TidWrapper.h> |
Ytai Ben-Tsvi | 1ea62c9 | 2021-11-10 14:38:27 -0800 | [diff] [blame] | 32 | #include <utils/Log.h> |
Atneya Nair | f5b6851 | 2022-05-23 20:02:49 -0400 | [diff] [blame] | 33 | |
Atneya Nair | cce1420 | 2022-09-05 20:17:50 -0700 | [diff] [blame] | 34 | #if defined(__ANDROID__) |
Eric Laurent | 42896a0 | 2019-09-27 15:40:33 -0700 | [diff] [blame] | 35 | #include "debuggerd/handler.h" |
Atneya Nair | f5b6851 | 2022-05-23 20:02:49 -0400 | [diff] [blame] | 36 | #endif |
| 37 | |
Atneya Nair | cce1420 | 2022-09-05 20:17:50 -0700 | [diff] [blame] | 38 | |
| 39 | namespace android::mediautils { |
Andy Hung | cb89910 | 2024-08-06 15:17:16 -0700 | [diff] [blame] | 40 | |
| 41 | // Note: The sum of kDefaultTimeOutDurationMs and kDefaultSecondChanceDurationMs |
| 42 | // should be no less than 2 seconds, otherwise spurious timeouts |
| 43 | // may occur with system suspend. |
| 44 | static constexpr int kDefaultTimeoutDurationMs = 3000; |
| 45 | |
| 46 | // Due to suspend abort not incrementing the monotonic clock, |
| 47 | // we allow another second chance timeout after the first timeout expires. |
| 48 | // |
| 49 | // The total timeout is therefore kDefaultTimeoutDuration + kDefaultSecondChanceDuration, |
| 50 | // and the result is more stable when the monotonic clock increments during suspend. |
| 51 | // |
| 52 | static constexpr int kDefaultSecondChanceDurationMs = 2000; |
| 53 | |
| 54 | /* static */ |
| 55 | TimeCheck::Duration TimeCheck::getDefaultTimeoutDuration() { |
| 56 | static constinit std::atomic<int> defaultTimeoutDurationMs{}; |
| 57 | auto defaultMs = defaultTimeoutDurationMs.load(std::memory_order_relaxed); |
| 58 | if (defaultMs == 0) { |
| 59 | defaultMs = property_get_int32( |
| 60 | "audio.timecheck.timeout_duration_ms", kDefaultTimeoutDurationMs); |
| 61 | if (defaultMs < 1) defaultMs = kDefaultTimeoutDurationMs; |
| 62 | defaultTimeoutDurationMs.store(defaultMs, std::memory_order_relaxed); |
| 63 | } |
| 64 | return std::chrono::milliseconds(defaultMs); |
| 65 | } |
| 66 | |
| 67 | /* static */ |
| 68 | TimeCheck::Duration TimeCheck::getDefaultSecondChanceDuration() { |
| 69 | static constinit std::atomic<int> defaultSecondChanceDurationMs{}; |
| 70 | auto defaultMs = defaultSecondChanceDurationMs.load(std::memory_order_relaxed); |
| 71 | if (defaultMs == 0) { |
| 72 | defaultMs = property_get_int32( |
| 73 | "audio.timecheck.second_chance_duration_ms", kDefaultSecondChanceDurationMs); |
| 74 | if (defaultMs < 1) defaultMs = kDefaultSecondChanceDurationMs; |
| 75 | defaultSecondChanceDurationMs.store(defaultMs, std::memory_order_relaxed); |
| 76 | } |
| 77 | return std::chrono::milliseconds(defaultMs); |
| 78 | } |
| 79 | |
Atneya Nair | f5b6851 | 2022-05-23 20:02:49 -0400 | [diff] [blame] | 80 | // This function appropriately signals a pid to dump a backtrace if we are |
Atneya Nair | cce1420 | 2022-09-05 20:17:50 -0700 | [diff] [blame] | 81 | // running on device (and the HAL exists). If we are not running on an Android |
| 82 | // device, there is no HAL to signal (so we do nothing). |
Atneya Nair | f5b6851 | 2022-05-23 20:02:49 -0400 | [diff] [blame] | 83 | static inline void signalAudioHAL([[maybe_unused]] pid_t pid) { |
Atneya Nair | cce1420 | 2022-09-05 20:17:50 -0700 | [diff] [blame] | 84 | #if defined(__ANDROID__) |
Atneya Nair | f5b6851 | 2022-05-23 20:02:49 -0400 | [diff] [blame] | 85 | sigqueue(pid, DEBUGGER_SIGNAL, {.sival_int = 0}); |
| 86 | #endif |
| 87 | } |
Eric Laurent | 3528c93 | 2018-02-23 17:17:22 -0800 | [diff] [blame] | 88 | |
Andy Hung | a2a1ac3 | 2022-03-18 16:12:11 -0700 | [diff] [blame] | 89 | /** |
| 90 | * Returns the std::string "HH:MM:SS.MSc" from a system_clock time_point. |
| 91 | */ |
Ytai Ben-Tsvi | 34f26b1 | 2021-12-02 13:58:38 -0800 | [diff] [blame] | 92 | std::string formatTime(std::chrono::system_clock::time_point t) { |
Andy Hung | a2a1ac3 | 2022-03-18 16:12:11 -0700 | [diff] [blame] | 93 | auto time_string = audio_utils_time_string_from_ns( |
| 94 | std::chrono::nanoseconds(t.time_since_epoch()).count()); |
| 95 | |
| 96 | // The time string is 19 characters (including null termination). |
| 97 | // Example: "03-27 16:47:06.187" |
| 98 | // MM DD HH MM SS MS |
| 99 | // We offset by 6 to get HH:MM:SS.MSc |
| 100 | // |
| 101 | return time_string.time + 6; // offset to remove month/day. |
Ytai Ben-Tsvi | 34f26b1 | 2021-12-02 13:58:38 -0800 | [diff] [blame] | 102 | } |
| 103 | |
Andy Hung | a2a1ac3 | 2022-03-18 16:12:11 -0700 | [diff] [blame] | 104 | /** |
| 105 | * Finds the end of the common time prefix. |
| 106 | * |
| 107 | * This is as an option to remove the common time prefix to avoid |
| 108 | * unnecessary duplicated strings. |
| 109 | * |
| 110 | * \param time1 a time string |
| 111 | * \param time2 a time string |
| 112 | * \return the position where the common time prefix ends. For abbreviated |
| 113 | * printing of time2, offset the character pointer by this position. |
| 114 | */ |
| 115 | static size_t commonTimePrefixPosition(std::string_view time1, std::string_view time2) { |
| 116 | const size_t endPos = std::min(time1.size(), time2.size()); |
| 117 | size_t i; |
| 118 | |
| 119 | // Find location of the first mismatch between strings |
| 120 | for (i = 0; ; ++i) { |
| 121 | if (i == endPos) { |
| 122 | return i; // strings match completely to the length of one of the strings. |
| 123 | } |
| 124 | if (time1[i] != time2[i]) { |
| 125 | break; |
| 126 | } |
| 127 | if (time1[i] == '\0') { |
| 128 | return i; // "printed" strings match completely. No need to check further. |
| 129 | } |
| 130 | } |
| 131 | |
| 132 | // Go backwards until we find a delimeter or space. |
| 133 | for (; i > 0 |
| 134 | && isdigit(time1[i]) // still a number |
| 135 | && time1[i - 1] != ' ' |
| 136 | ; --i) { |
| 137 | } |
| 138 | return i; |
| 139 | } |
| 140 | |
| 141 | /** |
| 142 | * Returns the unique suffix of time2 that isn't present in time1. |
| 143 | * |
| 144 | * If time2 is identical to time1, then an empty string_view is returned. |
| 145 | * This method is used to elide the common prefix when printing times. |
| 146 | */ |
| 147 | std::string_view timeSuffix(std::string_view time1, std::string_view time2) { |
| 148 | const size_t pos = commonTimePrefixPosition(time1, time2); |
| 149 | return time2.substr(pos); |
| 150 | } |
Ytai Ben-Tsvi | 34f26b1 | 2021-12-02 13:58:38 -0800 | [diff] [blame] | 151 | |
Eric Laurent | 42896a0 | 2019-09-27 15:40:33 -0700 | [diff] [blame] | 152 | // Audio HAL server pids vector used to generate audio HAL processes tombstone |
| 153 | // when audioserver watchdog triggers. |
| 154 | // We use a lockless storage to avoid potential deadlocks in the context of watchdog |
| 155 | // trigger. |
| 156 | // Protection again simultaneous writes is not needed given one update takes place |
| 157 | // during AudioFlinger construction and other comes necessarily later once the IAudioFlinger |
| 158 | // interface is available. |
| 159 | // The use of an atomic index just guaranties that current vector is fully initialized |
| 160 | // when read. |
| 161 | /* static */ |
| 162 | void TimeCheck::accessAudioHalPids(std::vector<pid_t>* pids, bool update) { |
| 163 | static constexpr int kNumAudioHalPidsVectors = 3; |
| 164 | static std::vector<pid_t> audioHalPids[kNumAudioHalPidsVectors]; |
Andy Hung | 5c6d68a | 2022-03-09 21:54:59 -0800 | [diff] [blame] | 165 | static std::atomic<unsigned> curAudioHalPids = 0; |
Eric Laurent | 42896a0 | 2019-09-27 15:40:33 -0700 | [diff] [blame] | 166 | |
| 167 | if (update) { |
Eric Laurent | 1ad278b | 2021-03-05 18:09:01 +0100 | [diff] [blame] | 168 | audioHalPids[(curAudioHalPids++ + 1) % kNumAudioHalPidsVectors] = *pids; |
Eric Laurent | 42896a0 | 2019-09-27 15:40:33 -0700 | [diff] [blame] | 169 | } else { |
Eric Laurent | 1ad278b | 2021-03-05 18:09:01 +0100 | [diff] [blame] | 170 | *pids = audioHalPids[curAudioHalPids % kNumAudioHalPidsVectors]; |
Eric Laurent | 42896a0 | 2019-09-27 15:40:33 -0700 | [diff] [blame] | 171 | } |
| 172 | } |
| 173 | |
| 174 | /* static */ |
| 175 | void TimeCheck::setAudioHalPids(const std::vector<pid_t>& pids) { |
| 176 | accessAudioHalPids(&(const_cast<std::vector<pid_t>&>(pids)), true); |
| 177 | } |
| 178 | |
| 179 | /* static */ |
| 180 | std::vector<pid_t> TimeCheck::getAudioHalPids() { |
| 181 | std::vector<pid_t> pids; |
| 182 | accessAudioHalPids(&pids, false); |
| 183 | return pids; |
| 184 | } |
| 185 | |
Eric Laurent | 3528c93 | 2018-02-23 17:17:22 -0800 | [diff] [blame] | 186 | /* static */ |
Mikhail Naganov | 380cf5d | 2024-09-18 14:08:16 -0700 | [diff] [blame] | 187 | std::string TimeCheck::signalAudioHals() { |
| 188 | std::vector<pid_t> pids = getAudioHalPids(); |
| 189 | std::string halPids; |
| 190 | if (pids.size() != 0) { |
| 191 | for (const auto& pid : pids) { |
| 192 | ALOGI("requesting tombstone for pid: %d", pid); |
| 193 | halPids.append(std::to_string(pid)).append(" "); |
| 194 | signalAudioHAL(pid); |
| 195 | } |
| 196 | // Allow time to complete, usually the caller is forcing restart afterwards. |
| 197 | sleep(1); |
| 198 | } |
| 199 | return halPids; |
| 200 | } |
| 201 | |
| 202 | /* static */ |
Andy Hung | 5c6d68a | 2022-03-09 21:54:59 -0800 | [diff] [blame] | 203 | TimerThread& TimeCheck::getTimeCheckThread() { |
| 204 | static TimerThread sTimeCheckThread{}; |
Eric Laurent | 3528c93 | 2018-02-23 17:17:22 -0800 | [diff] [blame] | 205 | return sTimeCheckThread; |
| 206 | } |
| 207 | |
Andy Hung | a2a1ac3 | 2022-03-18 16:12:11 -0700 | [diff] [blame] | 208 | /* static */ |
| 209 | std::string TimeCheck::toString() { |
| 210 | // note pending and retired are individually locked for maximum concurrency, |
| 211 | // snapshot is not instantaneous at a single time. |
Atneya Nair | cce1420 | 2022-09-05 20:17:50 -0700 | [diff] [blame] | 212 | return getTimeCheckThread().getSnapshotAnalysis().toString(); |
Andy Hung | a2a1ac3 | 2022-03-18 16:12:11 -0700 | [diff] [blame] | 213 | } |
| 214 | |
Andy Hung | f8ab093 | 2022-06-13 19:49:43 -0700 | [diff] [blame] | 215 | TimeCheck::TimeCheck(std::string_view tag, OnTimerFunc&& onTimer, Duration requestedTimeoutDuration, |
| 216 | Duration secondChanceDuration, bool crashOnTimeout) |
Andy Hung | 35f9615 | 2022-07-15 15:18:59 -0700 | [diff] [blame] | 217 | : mTimeCheckHandler{ std::make_shared<TimeCheckHandler>( |
Andy Hung | f8ab093 | 2022-06-13 19:49:43 -0700 | [diff] [blame] | 218 | tag, std::move(onTimer), crashOnTimeout, requestedTimeoutDuration, |
Atneya Nair | f5b6851 | 2022-05-23 20:02:49 -0400 | [diff] [blame] | 219 | secondChanceDuration, std::chrono::system_clock::now(), getThreadIdWrapper()) } |
Andy Hung | f8ab093 | 2022-06-13 19:49:43 -0700 | [diff] [blame] | 220 | , mTimerHandle(requestedTimeoutDuration.count() == 0 |
| 221 | /* for TimeCheck we don't consider a non-zero secondChanceDuration here */ |
Andy Hung | a2a1ac3 | 2022-03-18 16:12:11 -0700 | [diff] [blame] | 222 | ? getTimeCheckThread().trackTask(mTimeCheckHandler->tag) |
| 223 | : getTimeCheckThread().scheduleTask( |
| 224 | mTimeCheckHandler->tag, |
| 225 | // Pass in all the arguments by value to this task for safety. |
| 226 | // The thread could call the callback before the constructor is finished. |
| 227 | // The destructor is not blocked on callback. |
Andy Hung | 2aa1510 | 2022-06-13 19:49:43 -0700 | [diff] [blame] | 228 | [ timeCheckHandler = mTimeCheckHandler ](TimerThread::Handle timerHandle) { |
| 229 | timeCheckHandler->onTimeout(timerHandle); |
Andy Hung | a2a1ac3 | 2022-03-18 16:12:11 -0700 | [diff] [blame] | 230 | }, |
Andy Hung | f8ab093 | 2022-06-13 19:49:43 -0700 | [diff] [blame] | 231 | requestedTimeoutDuration, |
| 232 | secondChanceDuration)) {} |
Eric Laurent | 3528c93 | 2018-02-23 17:17:22 -0800 | [diff] [blame] | 233 | |
| 234 | TimeCheck::~TimeCheck() { |
Andy Hung | a2a1ac3 | 2022-03-18 16:12:11 -0700 | [diff] [blame] | 235 | if (mTimeCheckHandler) { |
| 236 | mTimeCheckHandler->onCancel(mTimerHandle); |
| 237 | } |
Eric Laurent | 3528c93 | 2018-02-23 17:17:22 -0800 | [diff] [blame] | 238 | } |
| 239 | |
Andy Hung | 2aa1510 | 2022-06-13 19:49:43 -0700 | [diff] [blame] | 240 | /* static */ |
| 241 | std::string TimeCheck::analyzeTimeouts( |
Andy Hung | cb89910 | 2024-08-06 15:17:16 -0700 | [diff] [blame] | 242 | float requestedTimeoutMs, float secondChanceMs, |
| 243 | float elapsedSteadyMs, float elapsedSystemMs) { |
Andy Hung | 2aa1510 | 2022-06-13 19:49:43 -0700 | [diff] [blame] | 244 | // Track any OS clock issues with suspend. |
| 245 | // It is possible that the elapsedSystemMs is much greater than elapsedSteadyMs if |
| 246 | // a suspend occurs; however, we always expect the timeout ms should always be slightly |
| 247 | // less than the elapsed steady ms regardless of whether a suspend occurs or not. |
| 248 | |
Andy Hung | cb89910 | 2024-08-06 15:17:16 -0700 | [diff] [blame] | 249 | const float totalTimeoutMs = requestedTimeoutMs + secondChanceMs; |
| 250 | std::string s = std::format( |
| 251 | "Timeout ms {:.2f} ({:.2f} + {:.2f})" |
| 252 | " elapsed steady ms {:.4f} elapsed system ms {:.4f}", |
| 253 | totalTimeoutMs, requestedTimeoutMs, secondChanceMs, elapsedSteadyMs, elapsedSystemMs); |
Andy Hung | 2aa1510 | 2022-06-13 19:49:43 -0700 | [diff] [blame] | 254 | |
| 255 | // Is there something unusual? |
| 256 | static constexpr float TOLERANCE_CONTEXT_SWITCH_MS = 200.f; |
| 257 | |
Andy Hung | cb89910 | 2024-08-06 15:17:16 -0700 | [diff] [blame] | 258 | if (totalTimeoutMs > elapsedSteadyMs || totalTimeoutMs > elapsedSystemMs) { |
Andy Hung | 2aa1510 | 2022-06-13 19:49:43 -0700 | [diff] [blame] | 259 | s.append("\nError: early expiration - " |
Andy Hung | cb89910 | 2024-08-06 15:17:16 -0700 | [diff] [blame] | 260 | "totalTimeoutMs should be less than elapsed time"); |
Andy Hung | 2aa1510 | 2022-06-13 19:49:43 -0700 | [diff] [blame] | 261 | } |
| 262 | |
| 263 | if (elapsedSteadyMs > elapsedSystemMs + TOLERANCE_CONTEXT_SWITCH_MS) { |
| 264 | s.append("\nWarning: steady time should not advance faster than system time"); |
| 265 | } |
| 266 | |
| 267 | // This has been found in suspend stress testing. |
Andy Hung | cb89910 | 2024-08-06 15:17:16 -0700 | [diff] [blame] | 268 | if (elapsedSteadyMs > totalTimeoutMs + TOLERANCE_CONTEXT_SWITCH_MS) { |
Andy Hung | 2aa1510 | 2022-06-13 19:49:43 -0700 | [diff] [blame] | 269 | s.append("\nWarning: steady time significantly exceeds timeout " |
| 270 | "- possible thread stall or aborted suspend"); |
| 271 | } |
| 272 | |
| 273 | // This has been found in suspend stress testing. |
Andy Hung | cb89910 | 2024-08-06 15:17:16 -0700 | [diff] [blame] | 274 | if (elapsedSystemMs > totalTimeoutMs + TOLERANCE_CONTEXT_SWITCH_MS) { |
Andy Hung | 2aa1510 | 2022-06-13 19:49:43 -0700 | [diff] [blame] | 275 | s.append("\nInformation: system time significantly exceeds timeout " |
| 276 | "- possible suspend"); |
| 277 | } |
| 278 | return s; |
| 279 | } |
| 280 | |
| 281 | // To avoid any potential race conditions, the timer handle |
| 282 | // (expiration = clock steady start + timeout) is passed into the callback. |
Andy Hung | 5c6d68a | 2022-03-09 21:54:59 -0800 | [diff] [blame] | 283 | void TimeCheck::TimeCheckHandler::onCancel(TimerThread::Handle timerHandle) const |
| 284 | { |
| 285 | if (TimeCheck::getTimeCheckThread().cancelTask(timerHandle) && onTimer) { |
Andy Hung | 2aa1510 | 2022-06-13 19:49:43 -0700 | [diff] [blame] | 286 | const std::chrono::steady_clock::time_point endSteadyTime = |
| 287 | std::chrono::steady_clock::now(); |
| 288 | const float elapsedSteadyMs = std::chrono::duration_cast<FloatMs>( |
| 289 | endSteadyTime - timerHandle + timeoutDuration).count(); |
| 290 | // send the elapsed steady time for statistics. |
| 291 | onTimer(false /* timeout */, elapsedSteadyMs); |
Andy Hung | 5c6d68a | 2022-03-09 21:54:59 -0800 | [diff] [blame] | 292 | } |
| 293 | } |
| 294 | |
Andy Hung | 2aa1510 | 2022-06-13 19:49:43 -0700 | [diff] [blame] | 295 | // To avoid any potential race conditions, the timer handle |
| 296 | // (expiration = clock steady start + timeout) is passed into the callback. |
| 297 | void TimeCheck::TimeCheckHandler::onTimeout(TimerThread::Handle timerHandle) const |
Andy Hung | 5c6d68a | 2022-03-09 21:54:59 -0800 | [diff] [blame] | 298 | { |
Andy Hung | 2aa1510 | 2022-06-13 19:49:43 -0700 | [diff] [blame] | 299 | const std::chrono::steady_clock::time_point endSteadyTime = std::chrono::steady_clock::now(); |
| 300 | const std::chrono::system_clock::time_point endSystemTime = std::chrono::system_clock::now(); |
| 301 | // timerHandle incorporates the timeout |
| 302 | const float elapsedSteadyMs = std::chrono::duration_cast<FloatMs>( |
| 303 | endSteadyTime - (timerHandle - timeoutDuration)).count(); |
| 304 | const float elapsedSystemMs = std::chrono::duration_cast<FloatMs>( |
| 305 | endSystemTime - startSystemTime).count(); |
| 306 | const float requestedTimeoutMs = std::chrono::duration_cast<FloatMs>( |
| 307 | timeoutDuration).count(); |
Andy Hung | f8ab093 | 2022-06-13 19:49:43 -0700 | [diff] [blame] | 308 | const float secondChanceMs = std::chrono::duration_cast<FloatMs>( |
| 309 | secondChanceDuration).count(); |
Andy Hung | 2aa1510 | 2022-06-13 19:49:43 -0700 | [diff] [blame] | 310 | |
Andy Hung | 5c6d68a | 2022-03-09 21:54:59 -0800 | [diff] [blame] | 311 | if (onTimer) { |
Andy Hung | 2aa1510 | 2022-06-13 19:49:43 -0700 | [diff] [blame] | 312 | onTimer(true /* timeout */, elapsedSteadyMs); |
Andy Hung | 5c6d68a | 2022-03-09 21:54:59 -0800 | [diff] [blame] | 313 | } |
| 314 | |
| 315 | if (!crashOnTimeout) return; |
Ytai Ben-Tsvi | 34f26b1 | 2021-12-02 13:58:38 -0800 | [diff] [blame] | 316 | |
Andy Hung | a2a1ac3 | 2022-03-18 16:12:11 -0700 | [diff] [blame] | 317 | // Generate the TimerThread summary string early before sending signals to the |
| 318 | // HAL processes which can affect thread behavior. |
Atneya Nair | cce1420 | 2022-09-05 20:17:50 -0700 | [diff] [blame] | 319 | const auto snapshotAnalysis = getTimeCheckThread().getSnapshotAnalysis(4 /* retiredCount */); |
Andy Hung | a2a1ac3 | 2022-03-18 16:12:11 -0700 | [diff] [blame] | 320 | |
Mikhail Naganov | 380cf5d | 2024-09-18 14:08:16 -0700 | [diff] [blame] | 321 | // Generate audio HAL processes tombstones. |
| 322 | std::string halPids = signalAudioHals(); |
| 323 | if (!halPids.empty()) { |
| 324 | halPids = "HAL pids [ " + halPids + "]"; |
Ytai Ben-Tsvi | 1ea62c9 | 2021-11-10 14:38:27 -0800 | [diff] [blame] | 325 | } else { |
Mikhail Naganov | 380cf5d | 2024-09-18 14:08:16 -0700 | [diff] [blame] | 326 | halPids = "No HAL process pids available"; |
| 327 | ALOGI("%s", (halPids + ", skipping tombstones").c_str()); |
Eric Laurent | 39b09b5 | 2018-06-29 12:24:40 -0700 | [diff] [blame] | 328 | } |
Andy Hung | f45f34c | 2022-03-25 13:09:03 -0700 | [diff] [blame] | 329 | |
Andy Hung | 5c6d68a | 2022-03-09 21:54:59 -0800 | [diff] [blame] | 330 | LOG_EVENT_STRING(LOGTAG_AUDIO_BINDER_TIMEOUT, tag.c_str()); |
Andy Hung | 10ac711 | 2022-03-28 08:00:40 -0700 | [diff] [blame] | 331 | |
| 332 | // Create abort message string - caution: this can be very large. |
| 333 | const std::string abortMessage = std::string("TimeCheck timeout for ") |
| 334 | .append(tag) |
Andy Hung | 2aa1510 | 2022-06-13 19:49:43 -0700 | [diff] [blame] | 335 | .append(" scheduled ").append(formatTime(startSystemTime)) |
Andy Hung | 10ac711 | 2022-03-28 08:00:40 -0700 | [diff] [blame] | 336 | .append(" on thread ").append(std::to_string(tid)).append("\n") |
Andy Hung | cb89910 | 2024-08-06 15:17:16 -0700 | [diff] [blame] | 337 | .append(analyzeTimeouts(requestedTimeoutMs, secondChanceMs, |
Andy Hung | f8ab093 | 2022-06-13 19:49:43 -0700 | [diff] [blame] | 338 | elapsedSteadyMs, elapsedSystemMs)).append("\n") |
Jaideep Sharma | eaf9d9c | 2022-05-13 15:15:27 +0530 | [diff] [blame] | 339 | .append(halPids).append("\n") |
Atneya Nair | cce1420 | 2022-09-05 20:17:50 -0700 | [diff] [blame] | 340 | .append(snapshotAnalysis.toString()); |
Andy Hung | 10ac711 | 2022-03-28 08:00:40 -0700 | [diff] [blame] | 341 | |
Andy Hung | 4c544c3 | 2023-11-03 15:56:24 -0700 | [diff] [blame] | 342 | // In many cases, the initial timeout stack differs from the abort backtrace because |
| 343 | // (1) the time difference between initial timeout and the final abort signal |
| 344 | // and (2) signalling the HAL audio service may cause |
| 345 | // the thread to unblock and continue. |
| 346 | |
Andy Hung | 10ac711 | 2022-03-28 08:00:40 -0700 | [diff] [blame] | 347 | // Note: LOG_ALWAYS_FATAL limits the size of the string - per log/log.h: |
| 348 | // Log message text may be truncated to less than an |
| 349 | // implementation-specific limit (1023 bytes). |
| 350 | // |
| 351 | // Here, we send the string through android-base/logging.h LOG() |
| 352 | // to avoid the size limitation. LOG(FATAL) does an abort whereas |
| 353 | // LOG(FATAL_WITHOUT_ABORT) does not abort. |
| 354 | |
Atneya Nair | cce1420 | 2022-09-05 20:17:50 -0700 | [diff] [blame] | 355 | static constexpr pid_t invalidPid = TimerThread::SnapshotAnalysis::INVALID_PID; |
| 356 | pid_t tidToAbort = invalidPid; |
| 357 | if (snapshotAnalysis.suspectTid != invalidPid) { |
| 358 | tidToAbort = snapshotAnalysis.suspectTid; |
| 359 | } else if (snapshotAnalysis.timeoutTid != invalidPid) { |
| 360 | tidToAbort = snapshotAnalysis.timeoutTid; |
| 361 | } |
| 362 | |
| 363 | LOG(FATAL_WITHOUT_ABORT) << abortMessage; |
| 364 | const auto ret = abortTid(tidToAbort); |
| 365 | if (ret < 0) { |
| 366 | LOG(FATAL) << "TimeCheck thread signal failed, aborting process. " |
| 367 | "errno: " << errno << base::ErrnoNumberAsString(errno); |
| 368 | } |
Eric Laurent | 3528c93 | 2018-02-23 17:17:22 -0800 | [diff] [blame] | 369 | } |
| 370 | |
Andy Hung | 224f82f | 2022-03-22 00:00:49 -0700 | [diff] [blame] | 371 | // Automatically create a TimeCheck class for a class and method. |
Mikhail Naganov | 31d4665 | 2023-01-10 18:29:25 +0000 | [diff] [blame] | 372 | // This is used for Audio HAL support. |
Andy Hung | 224f82f | 2022-03-22 00:00:49 -0700 | [diff] [blame] | 373 | mediautils::TimeCheck makeTimeCheckStatsForClassMethod( |
| 374 | std::string_view className, std::string_view methodName) { |
| 375 | std::shared_ptr<MethodStatistics<std::string>> statistics = |
| 376 | mediautils::getStatisticsForClass(className); |
| 377 | if (!statistics) return {}; // empty TimeCheck. |
| 378 | return mediautils::TimeCheck( |
Andy Hung | 35f9615 | 2022-07-15 15:18:59 -0700 | [diff] [blame] | 379 | FixedString62(className).append("::").append(methodName), |
| 380 | [ safeMethodName = FixedString30(methodName), |
Andy Hung | 224f82f | 2022-03-22 00:00:49 -0700 | [diff] [blame] | 381 | stats = std::move(statistics) ] |
| 382 | (bool timeout, float elapsedMs) { |
| 383 | if (timeout) { |
| 384 | ; // ignored, there is no timeout value. |
| 385 | } else { |
Andy Hung | 35f9615 | 2022-07-15 15:18:59 -0700 | [diff] [blame] | 386 | stats->event(safeMethodName.asStringView(), elapsedMs); |
Andy Hung | 224f82f | 2022-03-22 00:00:49 -0700 | [diff] [blame] | 387 | } |
Andy Hung | f8ab093 | 2022-06-13 19:49:43 -0700 | [diff] [blame] | 388 | }, {} /* timeoutDuration */, {} /* secondChanceDuration */, false /* crashOnTimeout */); |
Andy Hung | 224f82f | 2022-03-22 00:00:49 -0700 | [diff] [blame] | 389 | } |
| 390 | |
Andy Hung | 5c6d68a | 2022-03-09 21:54:59 -0800 | [diff] [blame] | 391 | } // namespace android::mediautils |