blob: b760ee2d8d6b0b3278627de4b63cd2cc9164c89e [file] [log] [blame]
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -08001/*
2 * Copyright (C) 2021 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define LOG_TAG "TimerThread"
18
19#include <optional>
Andy Hunga2a1ac32022-03-18 16:12:11 -070020#include <sstream>
21#include <unistd.h>
22#include <vector>
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -080023
Andy Hungd4265822022-04-01 18:54:32 -070024#include <mediautils/MediaUtilsDelayed.h>
Atneya Nairf5b68512022-05-23 20:02:49 -040025#include <mediautils/TidWrapper.h>
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -080026#include <mediautils/TimerThread.h>
Andy Hungf8ab0932022-06-13 19:49:43 -070027#include <utils/Log.h>
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -080028#include <utils/ThreadDefs.h>
29
Andy Hung2aa15102022-06-13 19:49:43 -070030using namespace std::chrono_literals;
31
Andy Hunga2a1ac32022-03-18 16:12:11 -070032namespace android::mediautils {
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -080033
Andy Hunga2a1ac32022-03-18 16:12:11 -070034extern std::string formatTime(std::chrono::system_clock::time_point t);
35extern std::string_view timeSuffix(std::string_view time1, std::string_view time2);
36
37TimerThread::Handle TimerThread::scheduleTask(
Andy Hungf8ab0932022-06-13 19:49:43 -070038 std::string_view tag, TimerCallback&& func,
39 Duration timeoutDuration, Duration secondChanceDuration) {
Andy Hunga2a1ac32022-03-18 16:12:11 -070040 const auto now = std::chrono::system_clock::now();
Andy Hung2aa15102022-06-13 19:49:43 -070041 auto request = std::make_shared<const Request>(now, now +
42 std::chrono::duration_cast<std::chrono::system_clock::duration>(timeoutDuration),
Atneya Nairf5b68512022-05-23 20:02:49 -040043 secondChanceDuration, getThreadIdWrapper(), tag);
Andy Hung2aa15102022-06-13 19:49:43 -070044 return mMonitorThread.add(std::move(request), std::move(func), timeoutDuration);
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -080045}
46
Andy Hung35f96152022-07-15 15:18:59 -070047TimerThread::Handle TimerThread::trackTask(std::string_view tag) {
Andy Hunga2a1ac32022-03-18 16:12:11 -070048 const auto now = std::chrono::system_clock::now();
Andy Hungf8ab0932022-06-13 19:49:43 -070049 auto request = std::make_shared<const Request>(now, now,
Atneya Nairf5b68512022-05-23 20:02:49 -040050 Duration{} /* secondChanceDuration */, getThreadIdWrapper(), tag);
Andy Hunga2a1ac32022-03-18 16:12:11 -070051 return mNoTimeoutMap.add(std::move(request));
52}
53
54bool TimerThread::cancelTask(Handle handle) {
Andy Hung2aa15102022-06-13 19:49:43 -070055 std::shared_ptr<const Request> request = isNoTimeoutHandle(handle) ?
Andy Hunga2a1ac32022-03-18 16:12:11 -070056 mNoTimeoutMap.remove(handle) : mMonitorThread.remove(handle);
57 if (!request) return false;
58 mRetiredQueue.add(std::move(request));
59 return true;
60}
61
Atneya Naircce14202022-09-05 20:17:50 -070062
63std::string TimerThread::SnapshotAnalysis::toString() const {
Andy Hungf45f34c2022-03-25 13:09:03 -070064 // Note: These request queues are snapshot very close together but
65 // not at "identical" times as we don't use a class-wide lock.
Atneya Naircce14202022-09-05 20:17:50 -070066 std::string analysisSummary = std::string("\nanalysis [ ").append(description).append(" ]");
Andy Hung10ac7112022-03-28 08:00:40 -070067 std::string timeoutStack;
Andy Hung10ac7112022-03-28 08:00:40 -070068 std::string blockedStack;
Atneya Naircce14202022-09-05 20:17:50 -070069 if (timeoutTid != -1) {
70 timeoutStack = std::string(suspectTid == timeoutTid ? "\ntimeout/blocked(" : "\ntimeout(")
71 .append(std::to_string(timeoutTid)).append(") callstack [\n")
72 .append(getCallStackStringForTid(timeoutTid)).append("]");
73 }
74
75 if (suspectTid != -1 && suspectTid != timeoutTid) {
Andy Hung10ac7112022-03-28 08:00:40 -070076 blockedStack = std::string("\nblocked(")
Atneya Naircce14202022-09-05 20:17:50 -070077 .append(std::to_string(suspectTid)).append(") callstack [\n")
78 .append(getCallStackStringForTid(suspectTid)).append("]");
Andy Hung10ac7112022-03-28 08:00:40 -070079 }
80
Andy Hunga2a1ac32022-03-18 16:12:11 -070081 return std::string("now ")
82 .append(formatTime(std::chrono::system_clock::now()))
Andy Hungf8ab0932022-06-13 19:49:43 -070083 .append("\nsecondChanceCount ")
Atneya Naircce14202022-09-05 20:17:50 -070084 .append(std::to_string(secondChanceCount))
Andy Hungf45f34c2022-03-25 13:09:03 -070085 .append(analysisSummary)
86 .append("\ntimeout [ ")
87 .append(requestsToString(timeoutRequests))
88 .append(" ]\npending [ ")
89 .append(requestsToString(pendingRequests))
Andy Hunga2a1ac32022-03-18 16:12:11 -070090 .append(" ]\nretired [ ")
Andy Hungf45f34c2022-03-25 13:09:03 -070091 .append(requestsToString(retiredRequests))
Andy Hung10ac7112022-03-28 08:00:40 -070092 .append(" ]")
93 .append(timeoutStack)
94 .append(blockedStack);
Andy Hunga2a1ac32022-03-18 16:12:11 -070095}
96
Andy Hungf45f34c2022-03-25 13:09:03 -070097// A HAL method is where the substring "Hidl" is in the class name.
98// The tag should look like: ... Hidl ... :: ...
99// When the audio HAL is updated to AIDL perhaps we will use instead
100// a global directory of HAL classes.
101//
102// See MethodStatistics.cpp:
103// mediautils::getStatisticsClassesForModule(METHOD_STATISTICS_MODULE_NAME_AUDIO_HIDL)
104//
105/* static */
106bool TimerThread::isRequestFromHal(const std::shared_ptr<const Request>& request) {
Andy Hung35f96152022-07-15 15:18:59 -0700107 const size_t hidlPos = request->tag.asStringView().find("Hidl");
Andy Hungf45f34c2022-03-25 13:09:03 -0700108 if (hidlPos == std::string::npos) return false;
109 // should be a separator afterwards Hidl which indicates the string was in the class.
Andy Hung35f96152022-07-15 15:18:59 -0700110 const size_t separatorPos = request->tag.asStringView().find("::", hidlPos);
Andy Hungf45f34c2022-03-25 13:09:03 -0700111 return separatorPos != std::string::npos;
112}
113
Atneya Naircce14202022-09-05 20:17:50 -0700114struct TimerThread::SnapshotAnalysis TimerThread::getSnapshotAnalysis(size_t retiredCount) const {
115 struct SnapshotAnalysis analysis{};
116 // The following snapshot of the TimerThread state will be utilized for
117 // analysis. Note, there is no lock around these calls, so there could be
118 // a state update between them.
119 mTimeoutQueue.copyRequests(analysis.timeoutRequests);
120 mRetiredQueue.copyRequests(analysis.retiredRequests, retiredCount);
121 analysis.pendingRequests = getPendingRequests();
122 analysis.secondChanceCount = mMonitorThread.getSecondChanceCount();
123 // No call has timed out, so there is no analysis to be done.
124 if (analysis.timeoutRequests.empty())
125 return analysis;
Andy Hungf45f34c2022-03-25 13:09:03 -0700126 // for now look at last timeout (in our case, the only timeout)
Atneya Naircce14202022-09-05 20:17:50 -0700127 const std::shared_ptr<const Request> timeout = analysis.timeoutRequests.back();
128 analysis.timeoutTid = timeout->tid;
129 if (analysis.pendingRequests.empty())
130 return analysis;
Andy Hungf45f34c2022-03-25 13:09:03 -0700131 // pending Requests that are problematic.
132 std::vector<std::shared_ptr<const Request>> pendingExact;
133 std::vector<std::shared_ptr<const Request>> pendingPossible;
134
Andy Hung2aa15102022-06-13 19:49:43 -0700135 // We look at pending requests that were scheduled no later than kPendingDuration
Andy Hungf45f34c2022-03-25 13:09:03 -0700136 // after the timeout request. This prevents false matches with calls
137 // that naturally block for a short period of time
138 // such as HAL write() and read().
139 //
Andy Hung2aa15102022-06-13 19:49:43 -0700140 constexpr Duration kPendingDuration = 1000ms;
Atneya Naircce14202022-09-05 20:17:50 -0700141 for (const auto& pending : analysis.pendingRequests) {
Andy Hungf45f34c2022-03-25 13:09:03 -0700142 // If the pending tid is the same as timeout tid, problem identified.
143 if (pending->tid == timeout->tid) {
144 pendingExact.emplace_back(pending);
145 continue;
146 }
147
148 // if the pending tid is scheduled within time limit
Andy Hung2aa15102022-06-13 19:49:43 -0700149 if (pending->scheduled - timeout->scheduled < kPendingDuration) {
Andy Hungf45f34c2022-03-25 13:09:03 -0700150 pendingPossible.emplace_back(pending);
151 }
152 }
153
Atneya Naircce14202022-09-05 20:17:50 -0700154 std::string& description = analysis.description;
Andy Hungf45f34c2022-03-25 13:09:03 -0700155 if (!pendingExact.empty()) {
156 const auto& request = pendingExact.front();
157 const bool hal = isRequestFromHal(request);
158
159 if (hal) {
Atneya Naircce14202022-09-05 20:17:50 -0700160 description = std::string("Blocked directly due to HAL call: ")
Andy Hungf45f34c2022-03-25 13:09:03 -0700161 .append(request->toString());
Atneya Naircce14202022-09-05 20:17:50 -0700162 analysis.suspectTid= request->tid;
Andy Hungf45f34c2022-03-25 13:09:03 -0700163 }
164 }
Atneya Naircce14202022-09-05 20:17:50 -0700165 if (description.empty() && !pendingPossible.empty()) {
Andy Hungf45f34c2022-03-25 13:09:03 -0700166 for (const auto& request : pendingPossible) {
167 const bool hal = isRequestFromHal(request);
168 if (hal) {
169 // The first blocked call is the most likely one.
170 // Recent calls might be temporarily blocked
171 // calls such as write() or read() depending on kDuration.
Atneya Naircce14202022-09-05 20:17:50 -0700172 description = std::string("Blocked possibly due to HAL call: ")
Andy Hungf45f34c2022-03-25 13:09:03 -0700173 .append(request->toString());
Atneya Naircce14202022-09-05 20:17:50 -0700174 analysis.suspectTid= request->tid;
Andy Hungf45f34c2022-03-25 13:09:03 -0700175 }
176 }
177 }
178 return analysis;
179}
180
Andy Hunga2a1ac32022-03-18 16:12:11 -0700181std::vector<std::shared_ptr<const TimerThread::Request>> TimerThread::getPendingRequests() const {
182 constexpr size_t kEstimatedPendingRequests = 8; // approx 128 byte alloc.
183 std::vector<std::shared_ptr<const Request>> pendingRequests;
184 pendingRequests.reserve(kEstimatedPendingRequests); // preallocate vector out of lock.
185
186 // following are internally locked calls, which add to our local pendingRequests.
187 mMonitorThread.copyRequests(pendingRequests);
188 mNoTimeoutMap.copyRequests(pendingRequests);
189
190 // Sort in order of scheduled time.
191 std::sort(pendingRequests.begin(), pendingRequests.end(),
192 [](const std::shared_ptr<const Request>& r1,
193 const std::shared_ptr<const Request>& r2) {
194 return r1->scheduled < r2->scheduled;
195 });
196 return pendingRequests;
197}
198
199std::string TimerThread::pendingToString() const {
200 return requestsToString(getPendingRequests());
201}
202
203std::string TimerThread::retiredToString(size_t n) const {
204 std::vector<std::shared_ptr<const Request>> retiredRequests;
205 mRetiredQueue.copyRequests(retiredRequests, n);
206
207 // Dump to string
208 return requestsToString(retiredRequests);
209}
210
211std::string TimerThread::timeoutToString(size_t n) const {
212 std::vector<std::shared_ptr<const Request>> timeoutRequests;
213 mTimeoutQueue.copyRequests(timeoutRequests, n);
214
215 // Dump to string
216 return requestsToString(timeoutRequests);
217}
218
219std::string TimerThread::Request::toString() const {
220 const auto scheduledString = formatTime(scheduled);
221 const auto deadlineString = formatTime(deadline);
222 return std::string(tag)
223 .append(" scheduled ").append(scheduledString)
224 .append(" deadline ").append(timeSuffix(scheduledString, deadlineString))
225 .append(" tid ").append(std::to_string(tid));
226}
227
228void TimerThread::RequestQueue::add(std::shared_ptr<const Request> request) {
229 std::lock_guard lg(mRQMutex);
230 mRequestQueue.emplace_back(std::chrono::system_clock::now(), std::move(request));
231 if (mRequestQueue.size() > mRequestQueueMax) {
232 mRequestQueue.pop_front();
233 }
234}
235
236void TimerThread::RequestQueue::copyRequests(
237 std::vector<std::shared_ptr<const Request>>& requests, size_t n) const {
238 std::lock_guard lg(mRQMutex);
239 const size_t size = mRequestQueue.size();
240 size_t i = n >= size ? 0 : size - n;
241 for (; i < size; ++i) {
242 const auto &[time, request] = mRequestQueue[i];
243 requests.emplace_back(request);
244 }
245}
246
Andy Hunga2a1ac32022-03-18 16:12:11 -0700247TimerThread::Handle TimerThread::NoTimeoutMap::add(std::shared_ptr<const Request> request) {
248 std::lock_guard lg(mNTMutex);
249 // A unique handle is obtained by mNoTimeoutRequests.fetch_add(1),
250 // This need not be under a lock, but we do so anyhow.
Andy Hung2aa15102022-06-13 19:49:43 -0700251 const Handle handle = getUniqueHandle_l();
Andy Hunga2a1ac32022-03-18 16:12:11 -0700252 mMap[handle] = request;
253 return handle;
254}
255
256std::shared_ptr<const TimerThread::Request> TimerThread::NoTimeoutMap::remove(Handle handle) {
257 std::lock_guard lg(mNTMutex);
258 auto it = mMap.find(handle);
259 if (it == mMap.end()) return {};
260 auto request = it->second;
261 mMap.erase(it);
262 return request;
263}
264
265void TimerThread::NoTimeoutMap::copyRequests(
266 std::vector<std::shared_ptr<const Request>>& requests) const {
267 std::lock_guard lg(mNTMutex);
268 for (const auto &[handle, request] : mMap) {
269 requests.emplace_back(request);
270 }
271}
272
Andy Hunga2a1ac32022-03-18 16:12:11 -0700273TimerThread::MonitorThread::MonitorThread(RequestQueue& timeoutQueue)
274 : mTimeoutQueue(timeoutQueue)
275 , mThread([this] { threadFunc(); }) {
276 pthread_setname_np(mThread.native_handle(), "TimerThread");
277 pthread_setschedprio(mThread.native_handle(), PRIORITY_URGENT_AUDIO);
278}
279
280TimerThread::MonitorThread::~MonitorThread() {
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -0800281 {
282 std::lock_guard _l(mMutex);
283 mShouldExit = true;
284 mCond.notify_all();
285 }
286 mThread.join();
287}
288
Andy Hunga2a1ac32022-03-18 16:12:11 -0700289void TimerThread::MonitorThread::threadFunc() {
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -0800290 std::unique_lock _l(mMutex);
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -0800291 while (!mShouldExit) {
Andy Hungf8ab0932022-06-13 19:49:43 -0700292 Handle nextDeadline = INVALID_HANDLE;
293 Handle now = INVALID_HANDLE;
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -0800294 if (!mMonitorRequests.empty()) {
Andy Hungf8ab0932022-06-13 19:49:43 -0700295 nextDeadline = mMonitorRequests.begin()->first;
296 now = std::chrono::steady_clock::now();
297 if (nextDeadline < now) {
298 auto node = mMonitorRequests.extract(mMonitorRequests.begin());
Andy Hunga2a1ac32022-03-18 16:12:11 -0700299 // Deadline has expired, handle the request.
Andy Hungf8ab0932022-06-13 19:49:43 -0700300 auto secondChanceDuration = node.mapped().first->secondChanceDuration;
301 if (secondChanceDuration.count() != 0) {
302 // We now apply the second chance duration to find the clock
303 // monotonic second deadline. The unique key is then the
304 // pair<second_deadline, first_deadline>.
305 //
306 // The second chance prevents a false timeout should there be
307 // any clock monotonic advancement during suspend.
308 auto newHandle = now + secondChanceDuration;
309 ALOGD("%s: TimeCheck second chance applied for %s",
310 __func__, node.mapped().first->tag.c_str()); // should be rare event.
311 mSecondChanceRequests.emplace_hint(mSecondChanceRequests.end(),
312 std::make_pair(newHandle, nextDeadline),
313 std::move(node.mapped()));
314 // increment second chance counter.
315 mSecondChanceCount.fetch_add(1 /* arg */, std::memory_order_relaxed);
316 } else {
317 {
318 _l.unlock();
319 // We add Request to retired queue early so that it can be dumped out.
320 mTimeoutQueue.add(std::move(node.mapped().first));
321 node.mapped().second(nextDeadline);
322 // Caution: we don't hold lock when we call TimerCallback,
323 // but this is the timeout case! We will crash soon,
324 // maybe before returning.
325 // anything left over is released here outside lock.
326 }
327 // reacquire the lock - if something was added, we loop immediately to check.
328 _l.lock();
329 }
330 // always process expiring monitor requests first.
331 continue;
332 }
333 }
334 // now process any second chance requests.
335 if (!mSecondChanceRequests.empty()) {
336 Handle secondDeadline = mSecondChanceRequests.begin()->first.first;
337 if (now == INVALID_HANDLE) now = std::chrono::steady_clock::now();
338 if (secondDeadline < now) {
339 auto node = mSecondChanceRequests.extract(mSecondChanceRequests.begin());
Andy Hunga2a1ac32022-03-18 16:12:11 -0700340 {
Andy Hunga2a1ac32022-03-18 16:12:11 -0700341 _l.unlock();
342 // We add Request to retired queue early so that it can be dumped out.
343 mTimeoutQueue.add(std::move(node.mapped().first));
Andy Hungf8ab0932022-06-13 19:49:43 -0700344 const Handle originalHandle = node.key().second;
345 node.mapped().second(originalHandle);
Andy Hung2aa15102022-06-13 19:49:43 -0700346 // Caution: we don't hold lock when we call TimerCallback.
347 // This is benign issue - we permit concurrent operations
348 // while in the callback to the MonitorQueue.
349 //
350 // Anything left over is released here outside lock.
Andy Hunga2a1ac32022-03-18 16:12:11 -0700351 }
352 // reacquire the lock - if something was added, we loop immediately to check.
353 _l.lock();
354 continue;
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -0800355 }
Andy Hungf8ab0932022-06-13 19:49:43 -0700356 // update the deadline.
357 if (nextDeadline == INVALID_HANDLE) {
358 nextDeadline = secondDeadline;
359 } else {
360 nextDeadline = std::min(nextDeadline, secondDeadline);
361 }
362 }
363 if (nextDeadline != INVALID_HANDLE) {
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -0800364 mCond.wait_until(_l, nextDeadline);
365 } else {
366 mCond.wait(_l);
367 }
368 }
369}
370
Andy Hunga2a1ac32022-03-18 16:12:11 -0700371TimerThread::Handle TimerThread::MonitorThread::add(
Andy Hung2aa15102022-06-13 19:49:43 -0700372 std::shared_ptr<const Request> request, TimerCallback&& func, Duration timeout) {
Andy Hunga2a1ac32022-03-18 16:12:11 -0700373 std::lock_guard _l(mMutex);
374 const Handle handle = getUniqueHandle_l(timeout);
Andy Hungf8ab0932022-06-13 19:49:43 -0700375 mMonitorRequests.emplace_hint(mMonitorRequests.end(),
376 handle, std::make_pair(std::move(request), std::move(func)));
Andy Hunga2a1ac32022-03-18 16:12:11 -0700377 mCond.notify_all();
378 return handle;
379}
380
381std::shared_ptr<const TimerThread::Request> TimerThread::MonitorThread::remove(Handle handle) {
Andy Hungf8ab0932022-06-13 19:49:43 -0700382 std::pair<std::shared_ptr<const Request>, TimerCallback> data;
Andy Hunga2a1ac32022-03-18 16:12:11 -0700383 std::unique_lock ul(mMutex);
Andy Hungf8ab0932022-06-13 19:49:43 -0700384 if (const auto it = mMonitorRequests.find(handle);
385 it != mMonitorRequests.end()) {
386 data = std::move(it->second);
387 mMonitorRequests.erase(it);
388 ul.unlock(); // manually release lock here so func (data.second)
389 // is released outside of lock.
390 return data.first; // request
Andy Hunga2a1ac32022-03-18 16:12:11 -0700391 }
Andy Hungf8ab0932022-06-13 19:49:43 -0700392
393 // this check is O(N), but since the second chance requests are ordered
394 // in terms of earliest expiration time, we would expect better than average results.
395 for (auto it = mSecondChanceRequests.begin(); it != mSecondChanceRequests.end(); ++it) {
396 if (it->first.second == handle) {
397 data = std::move(it->second);
398 mSecondChanceRequests.erase(it);
399 ul.unlock(); // manually release lock here so func (data.second)
400 // is released outside of lock.
401 return data.first; // request
402 }
403 }
404 return {};
Andy Hunga2a1ac32022-03-18 16:12:11 -0700405}
406
407void TimerThread::MonitorThread::copyRequests(
408 std::vector<std::shared_ptr<const Request>>& requests) const {
409 std::lock_guard lg(mMutex);
410 for (const auto &[deadline, monitorpair] : mMonitorRequests) {
411 requests.emplace_back(monitorpair.first);
412 }
Andy Hungf8ab0932022-06-13 19:49:43 -0700413 // we combine the second map with the first map - this is
414 // everything that is pending on the monitor thread.
415 // The second map will be older than the first map so this
416 // is in order.
417 for (const auto &[deadline, monitorpair] : mSecondChanceRequests) {
418 requests.emplace_back(monitorpair.first);
419 }
Andy Hunga2a1ac32022-03-18 16:12:11 -0700420}
421
422} // namespace android::mediautils