Eric Laurent | 3528c93 | 2018-02-23 17:17:22 -0800 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2018 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
Eric Laurent | 42896a0 | 2019-09-27 15:40:33 -0700 | [diff] [blame] | 17 | #define LOG_TAG "TimeCheck" |
Eric Laurent | 3528c93 | 2018-02-23 17:17:22 -0800 | [diff] [blame] | 18 | |
Ytai Ben-Tsvi | 1ea62c9 | 2021-11-10 14:38:27 -0800 | [diff] [blame] | 19 | #include <optional> |
| 20 | |
Andy Hung | 10ac711 | 2022-03-28 08:00:40 -0700 | [diff] [blame] | 21 | #include <android-base/logging.h> |
Andy Hung | a2a1ac3 | 2022-03-18 16:12:11 -0700 | [diff] [blame] | 22 | #include <audio_utils/clock.h> |
Marco Nelissen | cf90b49 | 2019-09-26 11:20:54 -0700 | [diff] [blame] | 23 | #include <mediautils/EventLog.h> |
Andy Hung | c8c2dde | 2022-07-15 15:18:59 -0700 | [diff] [blame] | 24 | #include <mediautils/FixedString.h> |
Andy Hung | 224f82f | 2022-03-22 00:00:49 -0700 | [diff] [blame] | 25 | #include <mediautils/MethodStatistics.h> |
Ytai Ben-Tsvi | 1ea62c9 | 2021-11-10 14:38:27 -0800 | [diff] [blame] | 26 | #include <mediautils/TimeCheck.h> |
| 27 | #include <utils/Log.h> |
Eric Laurent | 42896a0 | 2019-09-27 15:40:33 -0700 | [diff] [blame] | 28 | #include "debuggerd/handler.h" |
Eric Laurent | 3528c93 | 2018-02-23 17:17:22 -0800 | [diff] [blame] | 29 | |
Andy Hung | 5c6d68a | 2022-03-09 21:54:59 -0800 | [diff] [blame] | 30 | namespace android::mediautils { |
Eric Laurent | 3528c93 | 2018-02-23 17:17:22 -0800 | [diff] [blame] | 31 | |
Andy Hung | a2a1ac3 | 2022-03-18 16:12:11 -0700 | [diff] [blame] | 32 | /** |
| 33 | * Returns the std::string "HH:MM:SS.MSc" from a system_clock time_point. |
| 34 | */ |
Ytai Ben-Tsvi | 34f26b1 | 2021-12-02 13:58:38 -0800 | [diff] [blame] | 35 | std::string formatTime(std::chrono::system_clock::time_point t) { |
Andy Hung | a2a1ac3 | 2022-03-18 16:12:11 -0700 | [diff] [blame] | 36 | auto time_string = audio_utils_time_string_from_ns( |
| 37 | std::chrono::nanoseconds(t.time_since_epoch()).count()); |
| 38 | |
| 39 | // The time string is 19 characters (including null termination). |
| 40 | // Example: "03-27 16:47:06.187" |
| 41 | // MM DD HH MM SS MS |
| 42 | // We offset by 6 to get HH:MM:SS.MSc |
| 43 | // |
| 44 | return time_string.time + 6; // offset to remove month/day. |
Ytai Ben-Tsvi | 34f26b1 | 2021-12-02 13:58:38 -0800 | [diff] [blame] | 45 | } |
| 46 | |
Andy Hung | a2a1ac3 | 2022-03-18 16:12:11 -0700 | [diff] [blame] | 47 | /** |
| 48 | * Finds the end of the common time prefix. |
| 49 | * |
| 50 | * This is as an option to remove the common time prefix to avoid |
| 51 | * unnecessary duplicated strings. |
| 52 | * |
| 53 | * \param time1 a time string |
| 54 | * \param time2 a time string |
| 55 | * \return the position where the common time prefix ends. For abbreviated |
| 56 | * printing of time2, offset the character pointer by this position. |
| 57 | */ |
| 58 | static size_t commonTimePrefixPosition(std::string_view time1, std::string_view time2) { |
| 59 | const size_t endPos = std::min(time1.size(), time2.size()); |
| 60 | size_t i; |
| 61 | |
| 62 | // Find location of the first mismatch between strings |
| 63 | for (i = 0; ; ++i) { |
| 64 | if (i == endPos) { |
| 65 | return i; // strings match completely to the length of one of the strings. |
| 66 | } |
| 67 | if (time1[i] != time2[i]) { |
| 68 | break; |
| 69 | } |
| 70 | if (time1[i] == '\0') { |
| 71 | return i; // "printed" strings match completely. No need to check further. |
| 72 | } |
| 73 | } |
| 74 | |
| 75 | // Go backwards until we find a delimeter or space. |
| 76 | for (; i > 0 |
| 77 | && isdigit(time1[i]) // still a number |
| 78 | && time1[i - 1] != ' ' |
| 79 | ; --i) { |
| 80 | } |
| 81 | return i; |
| 82 | } |
| 83 | |
| 84 | /** |
| 85 | * Returns the unique suffix of time2 that isn't present in time1. |
| 86 | * |
| 87 | * If time2 is identical to time1, then an empty string_view is returned. |
| 88 | * This method is used to elide the common prefix when printing times. |
| 89 | */ |
| 90 | std::string_view timeSuffix(std::string_view time1, std::string_view time2) { |
| 91 | const size_t pos = commonTimePrefixPosition(time1, time2); |
| 92 | return time2.substr(pos); |
| 93 | } |
Ytai Ben-Tsvi | 34f26b1 | 2021-12-02 13:58:38 -0800 | [diff] [blame] | 94 | |
Eric Laurent | 42896a0 | 2019-09-27 15:40:33 -0700 | [diff] [blame] | 95 | // Audio HAL server pids vector used to generate audio HAL processes tombstone |
| 96 | // when audioserver watchdog triggers. |
| 97 | // We use a lockless storage to avoid potential deadlocks in the context of watchdog |
| 98 | // trigger. |
| 99 | // Protection again simultaneous writes is not needed given one update takes place |
| 100 | // during AudioFlinger construction and other comes necessarily later once the IAudioFlinger |
| 101 | // interface is available. |
| 102 | // The use of an atomic index just guaranties that current vector is fully initialized |
| 103 | // when read. |
| 104 | /* static */ |
| 105 | void TimeCheck::accessAudioHalPids(std::vector<pid_t>* pids, bool update) { |
| 106 | static constexpr int kNumAudioHalPidsVectors = 3; |
| 107 | static std::vector<pid_t> audioHalPids[kNumAudioHalPidsVectors]; |
Andy Hung | 5c6d68a | 2022-03-09 21:54:59 -0800 | [diff] [blame] | 108 | static std::atomic<unsigned> curAudioHalPids = 0; |
Eric Laurent | 42896a0 | 2019-09-27 15:40:33 -0700 | [diff] [blame] | 109 | |
| 110 | if (update) { |
Eric Laurent | 1ad278b | 2021-03-05 18:09:01 +0100 | [diff] [blame] | 111 | audioHalPids[(curAudioHalPids++ + 1) % kNumAudioHalPidsVectors] = *pids; |
Eric Laurent | 42896a0 | 2019-09-27 15:40:33 -0700 | [diff] [blame] | 112 | } else { |
Eric Laurent | 1ad278b | 2021-03-05 18:09:01 +0100 | [diff] [blame] | 113 | *pids = audioHalPids[curAudioHalPids % kNumAudioHalPidsVectors]; |
Eric Laurent | 42896a0 | 2019-09-27 15:40:33 -0700 | [diff] [blame] | 114 | } |
| 115 | } |
| 116 | |
| 117 | /* static */ |
| 118 | void TimeCheck::setAudioHalPids(const std::vector<pid_t>& pids) { |
| 119 | accessAudioHalPids(&(const_cast<std::vector<pid_t>&>(pids)), true); |
| 120 | } |
| 121 | |
| 122 | /* static */ |
| 123 | std::vector<pid_t> TimeCheck::getAudioHalPids() { |
| 124 | std::vector<pid_t> pids; |
| 125 | accessAudioHalPids(&pids, false); |
| 126 | return pids; |
| 127 | } |
| 128 | |
Eric Laurent | 3528c93 | 2018-02-23 17:17:22 -0800 | [diff] [blame] | 129 | /* static */ |
Andy Hung | 5c6d68a | 2022-03-09 21:54:59 -0800 | [diff] [blame] | 130 | TimerThread& TimeCheck::getTimeCheckThread() { |
| 131 | static TimerThread sTimeCheckThread{}; |
Eric Laurent | 3528c93 | 2018-02-23 17:17:22 -0800 | [diff] [blame] | 132 | return sTimeCheckThread; |
| 133 | } |
| 134 | |
Andy Hung | a2a1ac3 | 2022-03-18 16:12:11 -0700 | [diff] [blame] | 135 | /* static */ |
| 136 | std::string TimeCheck::toString() { |
| 137 | // note pending and retired are individually locked for maximum concurrency, |
| 138 | // snapshot is not instantaneous at a single time. |
| 139 | return getTimeCheckThread().toString(); |
| 140 | } |
| 141 | |
Andy Hung | df1ed5c | 2022-06-13 19:49:43 -0700 | [diff] [blame^] | 142 | TimeCheck::TimeCheck(std::string_view tag, OnTimerFunc&& onTimer, uint32_t requestedTimeoutMs, |
Andy Hung | 5c6d68a | 2022-03-09 21:54:59 -0800 | [diff] [blame] | 143 | bool crashOnTimeout) |
Andy Hung | c8c2dde | 2022-07-15 15:18:59 -0700 | [diff] [blame] | 144 | : mTimeCheckHandler{ std::make_shared<TimeCheckHandler>( |
Andy Hung | df1ed5c | 2022-06-13 19:49:43 -0700 | [diff] [blame^] | 145 | tag, std::move(onTimer), crashOnTimeout, std::chrono::milliseconds(requestedTimeoutMs), |
Andy Hung | c8c2dde | 2022-07-15 15:18:59 -0700 | [diff] [blame] | 146 | std::chrono::system_clock::now(), gettid()) } |
Andy Hung | df1ed5c | 2022-06-13 19:49:43 -0700 | [diff] [blame^] | 147 | , mTimerHandle(requestedTimeoutMs == 0 |
Andy Hung | a2a1ac3 | 2022-03-18 16:12:11 -0700 | [diff] [blame] | 148 | ? getTimeCheckThread().trackTask(mTimeCheckHandler->tag) |
| 149 | : getTimeCheckThread().scheduleTask( |
| 150 | mTimeCheckHandler->tag, |
| 151 | // Pass in all the arguments by value to this task for safety. |
| 152 | // The thread could call the callback before the constructor is finished. |
| 153 | // The destructor is not blocked on callback. |
Andy Hung | df1ed5c | 2022-06-13 19:49:43 -0700 | [diff] [blame^] | 154 | [ timeCheckHandler = mTimeCheckHandler ](TimerThread::Handle timerHandle) { |
| 155 | timeCheckHandler->onTimeout(timerHandle); |
Andy Hung | a2a1ac3 | 2022-03-18 16:12:11 -0700 | [diff] [blame] | 156 | }, |
Andy Hung | df1ed5c | 2022-06-13 19:49:43 -0700 | [diff] [blame^] | 157 | std::chrono::milliseconds(requestedTimeoutMs))) {} |
Eric Laurent | 3528c93 | 2018-02-23 17:17:22 -0800 | [diff] [blame] | 158 | |
| 159 | TimeCheck::~TimeCheck() { |
Andy Hung | a2a1ac3 | 2022-03-18 16:12:11 -0700 | [diff] [blame] | 160 | if (mTimeCheckHandler) { |
| 161 | mTimeCheckHandler->onCancel(mTimerHandle); |
| 162 | } |
Eric Laurent | 3528c93 | 2018-02-23 17:17:22 -0800 | [diff] [blame] | 163 | } |
| 164 | |
Andy Hung | df1ed5c | 2022-06-13 19:49:43 -0700 | [diff] [blame^] | 165 | /* static */ |
| 166 | std::string TimeCheck::analyzeTimeouts( |
| 167 | float requestedTimeoutMs, float elapsedSteadyMs, float elapsedSystemMs) { |
| 168 | // Track any OS clock issues with suspend. |
| 169 | // It is possible that the elapsedSystemMs is much greater than elapsedSteadyMs if |
| 170 | // a suspend occurs; however, we always expect the timeout ms should always be slightly |
| 171 | // less than the elapsed steady ms regardless of whether a suspend occurs or not. |
| 172 | |
| 173 | std::string s("Timeout ms "); |
| 174 | s.append(std::to_string(requestedTimeoutMs)) |
| 175 | .append(" elapsed steady ms ").append(std::to_string(elapsedSteadyMs)) |
| 176 | .append(" elapsed system ms ").append(std::to_string(elapsedSystemMs)); |
| 177 | |
| 178 | // Is there something unusual? |
| 179 | static constexpr float TOLERANCE_CONTEXT_SWITCH_MS = 200.f; |
| 180 | |
| 181 | if (requestedTimeoutMs > elapsedSteadyMs || requestedTimeoutMs > elapsedSystemMs) { |
| 182 | s.append("\nError: early expiration - " |
| 183 | "requestedTimeoutMs should be less than elapsed time"); |
| 184 | } |
| 185 | |
| 186 | if (elapsedSteadyMs > elapsedSystemMs + TOLERANCE_CONTEXT_SWITCH_MS) { |
| 187 | s.append("\nWarning: steady time should not advance faster than system time"); |
| 188 | } |
| 189 | |
| 190 | // This has been found in suspend stress testing. |
| 191 | if (elapsedSteadyMs > requestedTimeoutMs + TOLERANCE_CONTEXT_SWITCH_MS) { |
| 192 | s.append("\nWarning: steady time significantly exceeds timeout " |
| 193 | "- possible thread stall or aborted suspend"); |
| 194 | } |
| 195 | |
| 196 | // This has been found in suspend stress testing. |
| 197 | if (elapsedSystemMs > requestedTimeoutMs + TOLERANCE_CONTEXT_SWITCH_MS) { |
| 198 | s.append("\nInformation: system time significantly exceeds timeout " |
| 199 | "- possible suspend"); |
| 200 | } |
| 201 | return s; |
| 202 | } |
| 203 | |
| 204 | // To avoid any potential race conditions, the timer handle |
| 205 | // (expiration = clock steady start + timeout) is passed into the callback. |
Andy Hung | 5c6d68a | 2022-03-09 21:54:59 -0800 | [diff] [blame] | 206 | void TimeCheck::TimeCheckHandler::onCancel(TimerThread::Handle timerHandle) const |
| 207 | { |
| 208 | if (TimeCheck::getTimeCheckThread().cancelTask(timerHandle) && onTimer) { |
Andy Hung | df1ed5c | 2022-06-13 19:49:43 -0700 | [diff] [blame^] | 209 | const std::chrono::steady_clock::time_point endSteadyTime = |
| 210 | std::chrono::steady_clock::now(); |
| 211 | const float elapsedSteadyMs = std::chrono::duration_cast<FloatMs>( |
| 212 | endSteadyTime - timerHandle + timeoutDuration).count(); |
| 213 | // send the elapsed steady time for statistics. |
| 214 | onTimer(false /* timeout */, elapsedSteadyMs); |
Andy Hung | 5c6d68a | 2022-03-09 21:54:59 -0800 | [diff] [blame] | 215 | } |
| 216 | } |
| 217 | |
Andy Hung | df1ed5c | 2022-06-13 19:49:43 -0700 | [diff] [blame^] | 218 | // To avoid any potential race conditions, the timer handle |
| 219 | // (expiration = clock steady start + timeout) is passed into the callback. |
| 220 | void TimeCheck::TimeCheckHandler::onTimeout(TimerThread::Handle timerHandle) const |
Andy Hung | 5c6d68a | 2022-03-09 21:54:59 -0800 | [diff] [blame] | 221 | { |
Andy Hung | df1ed5c | 2022-06-13 19:49:43 -0700 | [diff] [blame^] | 222 | const std::chrono::steady_clock::time_point endSteadyTime = std::chrono::steady_clock::now(); |
| 223 | const std::chrono::system_clock::time_point endSystemTime = std::chrono::system_clock::now(); |
| 224 | // timerHandle incorporates the timeout |
| 225 | const float elapsedSteadyMs = std::chrono::duration_cast<FloatMs>( |
| 226 | endSteadyTime - (timerHandle - timeoutDuration)).count(); |
| 227 | const float elapsedSystemMs = std::chrono::duration_cast<FloatMs>( |
| 228 | endSystemTime - startSystemTime).count(); |
| 229 | const float requestedTimeoutMs = std::chrono::duration_cast<FloatMs>( |
| 230 | timeoutDuration).count(); |
| 231 | |
Andy Hung | 5c6d68a | 2022-03-09 21:54:59 -0800 | [diff] [blame] | 232 | if (onTimer) { |
Andy Hung | df1ed5c | 2022-06-13 19:49:43 -0700 | [diff] [blame^] | 233 | onTimer(true /* timeout */, elapsedSteadyMs); |
Andy Hung | 5c6d68a | 2022-03-09 21:54:59 -0800 | [diff] [blame] | 234 | } |
| 235 | |
| 236 | if (!crashOnTimeout) return; |
Ytai Ben-Tsvi | 34f26b1 | 2021-12-02 13:58:38 -0800 | [diff] [blame] | 237 | |
Andy Hung | a2a1ac3 | 2022-03-18 16:12:11 -0700 | [diff] [blame] | 238 | // Generate the TimerThread summary string early before sending signals to the |
| 239 | // HAL processes which can affect thread behavior. |
| 240 | const std::string summary = getTimeCheckThread().toString(4 /* retiredCount */); |
| 241 | |
Ytai Ben-Tsvi | 1ea62c9 | 2021-11-10 14:38:27 -0800 | [diff] [blame] | 242 | // Generate audio HAL processes tombstones and allow time to complete |
| 243 | // before forcing restart |
Andy Hung | 5c6d68a | 2022-03-09 21:54:59 -0800 | [diff] [blame] | 244 | std::vector<pid_t> pids = TimeCheck::getAudioHalPids(); |
Jaideep Sharma | eaf9d9c | 2022-05-13 15:15:27 +0530 | [diff] [blame] | 245 | std::string halPids = "HAL pids [ "; |
Ytai Ben-Tsvi | 1ea62c9 | 2021-11-10 14:38:27 -0800 | [diff] [blame] | 246 | if (pids.size() != 0) { |
| 247 | for (const auto& pid : pids) { |
| 248 | ALOGI("requesting tombstone for pid: %d", pid); |
Jaideep Sharma | eaf9d9c | 2022-05-13 15:15:27 +0530 | [diff] [blame] | 249 | halPids.append(std::to_string(pid)).append(" "); |
Ytai Ben-Tsvi | 1ea62c9 | 2021-11-10 14:38:27 -0800 | [diff] [blame] | 250 | sigqueue(pid, DEBUGGER_SIGNAL, {.sival_int = 0}); |
Eric Laurent | 3528c93 | 2018-02-23 17:17:22 -0800 | [diff] [blame] | 251 | } |
Ytai Ben-Tsvi | 1ea62c9 | 2021-11-10 14:38:27 -0800 | [diff] [blame] | 252 | sleep(1); |
| 253 | } else { |
| 254 | ALOGI("No HAL process pid available, skipping tombstones"); |
Eric Laurent | 39b09b5 | 2018-06-29 12:24:40 -0700 | [diff] [blame] | 255 | } |
Jaideep Sharma | eaf9d9c | 2022-05-13 15:15:27 +0530 | [diff] [blame] | 256 | halPids.append("]"); |
Andy Hung | f45f34c | 2022-03-25 13:09:03 -0700 | [diff] [blame] | 257 | |
Andy Hung | 5c6d68a | 2022-03-09 21:54:59 -0800 | [diff] [blame] | 258 | LOG_EVENT_STRING(LOGTAG_AUDIO_BINDER_TIMEOUT, tag.c_str()); |
Andy Hung | 10ac711 | 2022-03-28 08:00:40 -0700 | [diff] [blame] | 259 | |
| 260 | // Create abort message string - caution: this can be very large. |
| 261 | const std::string abortMessage = std::string("TimeCheck timeout for ") |
| 262 | .append(tag) |
Andy Hung | df1ed5c | 2022-06-13 19:49:43 -0700 | [diff] [blame^] | 263 | .append(" scheduled ").append(formatTime(startSystemTime)) |
Andy Hung | 10ac711 | 2022-03-28 08:00:40 -0700 | [diff] [blame] | 264 | .append(" on thread ").append(std::to_string(tid)).append("\n") |
Andy Hung | df1ed5c | 2022-06-13 19:49:43 -0700 | [diff] [blame^] | 265 | .append(analyzeTimeouts( |
| 266 | requestedTimeoutMs, elapsedSteadyMs, elapsedSystemMs)).append("\n") |
Jaideep Sharma | eaf9d9c | 2022-05-13 15:15:27 +0530 | [diff] [blame] | 267 | .append(halPids).append("\n") |
Andy Hung | 10ac711 | 2022-03-28 08:00:40 -0700 | [diff] [blame] | 268 | .append(summary); |
| 269 | |
| 270 | // Note: LOG_ALWAYS_FATAL limits the size of the string - per log/log.h: |
| 271 | // Log message text may be truncated to less than an |
| 272 | // implementation-specific limit (1023 bytes). |
| 273 | // |
| 274 | // Here, we send the string through android-base/logging.h LOG() |
| 275 | // to avoid the size limitation. LOG(FATAL) does an abort whereas |
| 276 | // LOG(FATAL_WITHOUT_ABORT) does not abort. |
| 277 | |
| 278 | LOG(FATAL) << abortMessage; |
Eric Laurent | 3528c93 | 2018-02-23 17:17:22 -0800 | [diff] [blame] | 279 | } |
| 280 | |
Andy Hung | 224f82f | 2022-03-22 00:00:49 -0700 | [diff] [blame] | 281 | // Automatically create a TimeCheck class for a class and method. |
| 282 | // This is used for Audio HIDL support. |
| 283 | mediautils::TimeCheck makeTimeCheckStatsForClassMethod( |
| 284 | std::string_view className, std::string_view methodName) { |
| 285 | std::shared_ptr<MethodStatistics<std::string>> statistics = |
| 286 | mediautils::getStatisticsForClass(className); |
| 287 | if (!statistics) return {}; // empty TimeCheck. |
| 288 | return mediautils::TimeCheck( |
Andy Hung | c8c2dde | 2022-07-15 15:18:59 -0700 | [diff] [blame] | 289 | FixedString62(className).append("::").append(methodName), |
| 290 | [ safeMethodName = FixedString30(methodName), |
Andy Hung | 224f82f | 2022-03-22 00:00:49 -0700 | [diff] [blame] | 291 | stats = std::move(statistics) ] |
| 292 | (bool timeout, float elapsedMs) { |
| 293 | if (timeout) { |
| 294 | ; // ignored, there is no timeout value. |
| 295 | } else { |
Andy Hung | c8c2dde | 2022-07-15 15:18:59 -0700 | [diff] [blame] | 296 | stats->event(safeMethodName.asStringView(), elapsedMs); |
Andy Hung | 224f82f | 2022-03-22 00:00:49 -0700 | [diff] [blame] | 297 | } |
Andy Hung | df1ed5c | 2022-06-13 19:49:43 -0700 | [diff] [blame^] | 298 | }, 0 /* requestedTimeoutMs */); |
Andy Hung | 224f82f | 2022-03-22 00:00:49 -0700 | [diff] [blame] | 299 | } |
| 300 | |
Andy Hung | 5c6d68a | 2022-03-09 21:54:59 -0800 | [diff] [blame] | 301 | } // namespace android::mediautils |