blob: 51bd5eb0f7741029cb92d6f1eb1fc0694bc0d24e [file] [log] [blame]
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -08001/*
2 * Copyright (C) 2021 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define LOG_TAG "TimerThread"
18
19#include <optional>
Andy Hunga2a1ac32022-03-18 16:12:11 -070020#include <sstream>
21#include <unistd.h>
22#include <vector>
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -080023
Andy Hungd4265822022-04-01 18:54:32 -070024#include <mediautils/MediaUtilsDelayed.h>
Atneya Nairf5b68512022-05-23 20:02:49 -040025#include <mediautils/TidWrapper.h>
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -080026#include <mediautils/TimerThread.h>
Andy Hungf8ab0932022-06-13 19:49:43 -070027#include <utils/Log.h>
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -080028#include <utils/ThreadDefs.h>
29
Andy Hung2aa15102022-06-13 19:49:43 -070030using namespace std::chrono_literals;
31
Andy Hunga2a1ac32022-03-18 16:12:11 -070032namespace android::mediautils {
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -080033
Andy Hunga2a1ac32022-03-18 16:12:11 -070034extern std::string formatTime(std::chrono::system_clock::time_point t);
35extern std::string_view timeSuffix(std::string_view time1, std::string_view time2);
36
37TimerThread::Handle TimerThread::scheduleTask(
Andy Hungf8ab0932022-06-13 19:49:43 -070038 std::string_view tag, TimerCallback&& func,
39 Duration timeoutDuration, Duration secondChanceDuration) {
Andy Hunga2a1ac32022-03-18 16:12:11 -070040 const auto now = std::chrono::system_clock::now();
Andy Hung2aa15102022-06-13 19:49:43 -070041 auto request = std::make_shared<const Request>(now, now +
42 std::chrono::duration_cast<std::chrono::system_clock::duration>(timeoutDuration),
Atneya Nairf5b68512022-05-23 20:02:49 -040043 secondChanceDuration, getThreadIdWrapper(), tag);
Andy Hung2aa15102022-06-13 19:49:43 -070044 return mMonitorThread.add(std::move(request), std::move(func), timeoutDuration);
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -080045}
46
Andy Hung35f96152022-07-15 15:18:59 -070047TimerThread::Handle TimerThread::trackTask(std::string_view tag) {
Andy Hunga2a1ac32022-03-18 16:12:11 -070048 const auto now = std::chrono::system_clock::now();
Andy Hungf8ab0932022-06-13 19:49:43 -070049 auto request = std::make_shared<const Request>(now, now,
Atneya Nairf5b68512022-05-23 20:02:49 -040050 Duration{} /* secondChanceDuration */, getThreadIdWrapper(), tag);
Andy Hunga2a1ac32022-03-18 16:12:11 -070051 return mNoTimeoutMap.add(std::move(request));
52}
53
54bool TimerThread::cancelTask(Handle handle) {
Andy Hung2aa15102022-06-13 19:49:43 -070055 std::shared_ptr<const Request> request = isNoTimeoutHandle(handle) ?
Andy Hunga2a1ac32022-03-18 16:12:11 -070056 mNoTimeoutMap.remove(handle) : mMonitorThread.remove(handle);
57 if (!request) return false;
58 mRetiredQueue.add(std::move(request));
59 return true;
60}
61
62std::string TimerThread::toString(size_t retiredCount) const {
Andy Hungf45f34c2022-03-25 13:09:03 -070063 // Note: These request queues are snapshot very close together but
64 // not at "identical" times as we don't use a class-wide lock.
65
66 std::vector<std::shared_ptr<const Request>> timeoutRequests;
67 std::vector<std::shared_ptr<const Request>> retiredRequests;
68 mTimeoutQueue.copyRequests(timeoutRequests);
69 mRetiredQueue.copyRequests(retiredRequests, retiredCount);
70 std::vector<std::shared_ptr<const Request>> pendingRequests =
71 getPendingRequests();
72
73 struct Analysis analysis = analyzeTimeout(timeoutRequests, pendingRequests);
74 std::string analysisSummary;
75 if (!analysis.summary.empty()) {
76 analysisSummary = std::string("\nanalysis [ ").append(analysis.summary).append(" ]");
77 }
Andy Hung10ac7112022-03-28 08:00:40 -070078 std::string timeoutStack;
79 if (analysis.timeoutTid != -1) {
80 timeoutStack = std::string("\ntimeout(")
81 .append(std::to_string(analysis.timeoutTid)).append(") callstack [\n")
Andy Hungd4265822022-04-01 18:54:32 -070082 .append(getCallStackStringForTid(analysis.timeoutTid)).append("]");
Andy Hung10ac7112022-03-28 08:00:40 -070083 }
84 std::string blockedStack;
85 if (analysis.HALBlockedTid != -1) {
86 blockedStack = std::string("\nblocked(")
87 .append(std::to_string(analysis.HALBlockedTid)).append(") callstack [\n")
Andy Hungd4265822022-04-01 18:54:32 -070088 .append(getCallStackStringForTid(analysis.HALBlockedTid)).append("]");
Andy Hung10ac7112022-03-28 08:00:40 -070089 }
90
Andy Hunga2a1ac32022-03-18 16:12:11 -070091 return std::string("now ")
92 .append(formatTime(std::chrono::system_clock::now()))
Andy Hungf8ab0932022-06-13 19:49:43 -070093 .append("\nsecondChanceCount ")
94 .append(std::to_string(mMonitorThread.getSecondChanceCount()))
Andy Hungf45f34c2022-03-25 13:09:03 -070095 .append(analysisSummary)
96 .append("\ntimeout [ ")
97 .append(requestsToString(timeoutRequests))
98 .append(" ]\npending [ ")
99 .append(requestsToString(pendingRequests))
Andy Hunga2a1ac32022-03-18 16:12:11 -0700100 .append(" ]\nretired [ ")
Andy Hungf45f34c2022-03-25 13:09:03 -0700101 .append(requestsToString(retiredRequests))
Andy Hung10ac7112022-03-28 08:00:40 -0700102 .append(" ]")
103 .append(timeoutStack)
104 .append(blockedStack);
Andy Hunga2a1ac32022-03-18 16:12:11 -0700105}
106
Andy Hungf45f34c2022-03-25 13:09:03 -0700107// A HAL method is where the substring "Hidl" is in the class name.
108// The tag should look like: ... Hidl ... :: ...
109// When the audio HAL is updated to AIDL perhaps we will use instead
110// a global directory of HAL classes.
111//
112// See MethodStatistics.cpp:
113// mediautils::getStatisticsClassesForModule(METHOD_STATISTICS_MODULE_NAME_AUDIO_HIDL)
114//
115/* static */
116bool TimerThread::isRequestFromHal(const std::shared_ptr<const Request>& request) {
Andy Hung35f96152022-07-15 15:18:59 -0700117 const size_t hidlPos = request->tag.asStringView().find("Hidl");
Andy Hungf45f34c2022-03-25 13:09:03 -0700118 if (hidlPos == std::string::npos) return false;
119 // should be a separator afterwards Hidl which indicates the string was in the class.
Andy Hung35f96152022-07-15 15:18:59 -0700120 const size_t separatorPos = request->tag.asStringView().find("::", hidlPos);
Andy Hungf45f34c2022-03-25 13:09:03 -0700121 return separatorPos != std::string::npos;
122}
123
124/* static */
125struct TimerThread::Analysis TimerThread::analyzeTimeout(
126 const std::vector<std::shared_ptr<const Request>>& timeoutRequests,
127 const std::vector<std::shared_ptr<const Request>>& pendingRequests) {
128
129 if (timeoutRequests.empty() || pendingRequests.empty()) return {}; // nothing to say.
130
131 // for now look at last timeout (in our case, the only timeout)
132 const std::shared_ptr<const Request> timeout = timeoutRequests.back();
133
134 // pending Requests that are problematic.
135 std::vector<std::shared_ptr<const Request>> pendingExact;
136 std::vector<std::shared_ptr<const Request>> pendingPossible;
137
Andy Hung2aa15102022-06-13 19:49:43 -0700138 // We look at pending requests that were scheduled no later than kPendingDuration
Andy Hungf45f34c2022-03-25 13:09:03 -0700139 // after the timeout request. This prevents false matches with calls
140 // that naturally block for a short period of time
141 // such as HAL write() and read().
142 //
Andy Hung2aa15102022-06-13 19:49:43 -0700143 constexpr Duration kPendingDuration = 1000ms;
Andy Hungf45f34c2022-03-25 13:09:03 -0700144 for (const auto& pending : pendingRequests) {
145 // If the pending tid is the same as timeout tid, problem identified.
146 if (pending->tid == timeout->tid) {
147 pendingExact.emplace_back(pending);
148 continue;
149 }
150
151 // if the pending tid is scheduled within time limit
Andy Hung2aa15102022-06-13 19:49:43 -0700152 if (pending->scheduled - timeout->scheduled < kPendingDuration) {
Andy Hungf45f34c2022-03-25 13:09:03 -0700153 pendingPossible.emplace_back(pending);
154 }
155 }
156
157 struct Analysis analysis{};
158
159 analysis.timeoutTid = timeout->tid;
160 std::string& summary = analysis.summary;
161 if (!pendingExact.empty()) {
162 const auto& request = pendingExact.front();
163 const bool hal = isRequestFromHal(request);
164
165 if (hal) {
166 summary = std::string("Blocked directly due to HAL call: ")
167 .append(request->toString());
168 }
169 }
170 if (summary.empty() && !pendingPossible.empty()) {
171 for (const auto& request : pendingPossible) {
172 const bool hal = isRequestFromHal(request);
173 if (hal) {
174 // The first blocked call is the most likely one.
175 // Recent calls might be temporarily blocked
176 // calls such as write() or read() depending on kDuration.
177 summary = std::string("Blocked possibly due to HAL call: ")
178 .append(request->toString());
179 analysis.HALBlockedTid = request->tid;
180 }
181 }
182 }
183 return analysis;
184}
185
Andy Hunga2a1ac32022-03-18 16:12:11 -0700186std::vector<std::shared_ptr<const TimerThread::Request>> TimerThread::getPendingRequests() const {
187 constexpr size_t kEstimatedPendingRequests = 8; // approx 128 byte alloc.
188 std::vector<std::shared_ptr<const Request>> pendingRequests;
189 pendingRequests.reserve(kEstimatedPendingRequests); // preallocate vector out of lock.
190
191 // following are internally locked calls, which add to our local pendingRequests.
192 mMonitorThread.copyRequests(pendingRequests);
193 mNoTimeoutMap.copyRequests(pendingRequests);
194
195 // Sort in order of scheduled time.
196 std::sort(pendingRequests.begin(), pendingRequests.end(),
197 [](const std::shared_ptr<const Request>& r1,
198 const std::shared_ptr<const Request>& r2) {
199 return r1->scheduled < r2->scheduled;
200 });
201 return pendingRequests;
202}
203
204std::string TimerThread::pendingToString() const {
205 return requestsToString(getPendingRequests());
206}
207
208std::string TimerThread::retiredToString(size_t n) const {
209 std::vector<std::shared_ptr<const Request>> retiredRequests;
210 mRetiredQueue.copyRequests(retiredRequests, n);
211
212 // Dump to string
213 return requestsToString(retiredRequests);
214}
215
216std::string TimerThread::timeoutToString(size_t n) const {
217 std::vector<std::shared_ptr<const Request>> timeoutRequests;
218 mTimeoutQueue.copyRequests(timeoutRequests, n);
219
220 // Dump to string
221 return requestsToString(timeoutRequests);
222}
223
224std::string TimerThread::Request::toString() const {
225 const auto scheduledString = formatTime(scheduled);
226 const auto deadlineString = formatTime(deadline);
227 return std::string(tag)
228 .append(" scheduled ").append(scheduledString)
229 .append(" deadline ").append(timeSuffix(scheduledString, deadlineString))
230 .append(" tid ").append(std::to_string(tid));
231}
232
233void TimerThread::RequestQueue::add(std::shared_ptr<const Request> request) {
234 std::lock_guard lg(mRQMutex);
235 mRequestQueue.emplace_back(std::chrono::system_clock::now(), std::move(request));
236 if (mRequestQueue.size() > mRequestQueueMax) {
237 mRequestQueue.pop_front();
238 }
239}
240
241void TimerThread::RequestQueue::copyRequests(
242 std::vector<std::shared_ptr<const Request>>& requests, size_t n) const {
243 std::lock_guard lg(mRQMutex);
244 const size_t size = mRequestQueue.size();
245 size_t i = n >= size ? 0 : size - n;
246 for (; i < size; ++i) {
247 const auto &[time, request] = mRequestQueue[i];
248 requests.emplace_back(request);
249 }
250}
251
Andy Hunga2a1ac32022-03-18 16:12:11 -0700252TimerThread::Handle TimerThread::NoTimeoutMap::add(std::shared_ptr<const Request> request) {
253 std::lock_guard lg(mNTMutex);
254 // A unique handle is obtained by mNoTimeoutRequests.fetch_add(1),
255 // This need not be under a lock, but we do so anyhow.
Andy Hung2aa15102022-06-13 19:49:43 -0700256 const Handle handle = getUniqueHandle_l();
Andy Hunga2a1ac32022-03-18 16:12:11 -0700257 mMap[handle] = request;
258 return handle;
259}
260
261std::shared_ptr<const TimerThread::Request> TimerThread::NoTimeoutMap::remove(Handle handle) {
262 std::lock_guard lg(mNTMutex);
263 auto it = mMap.find(handle);
264 if (it == mMap.end()) return {};
265 auto request = it->second;
266 mMap.erase(it);
267 return request;
268}
269
270void TimerThread::NoTimeoutMap::copyRequests(
271 std::vector<std::shared_ptr<const Request>>& requests) const {
272 std::lock_guard lg(mNTMutex);
273 for (const auto &[handle, request] : mMap) {
274 requests.emplace_back(request);
275 }
276}
277
Andy Hunga2a1ac32022-03-18 16:12:11 -0700278TimerThread::MonitorThread::MonitorThread(RequestQueue& timeoutQueue)
279 : mTimeoutQueue(timeoutQueue)
280 , mThread([this] { threadFunc(); }) {
281 pthread_setname_np(mThread.native_handle(), "TimerThread");
282 pthread_setschedprio(mThread.native_handle(), PRIORITY_URGENT_AUDIO);
283}
284
285TimerThread::MonitorThread::~MonitorThread() {
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -0800286 {
287 std::lock_guard _l(mMutex);
288 mShouldExit = true;
289 mCond.notify_all();
290 }
291 mThread.join();
292}
293
Andy Hunga2a1ac32022-03-18 16:12:11 -0700294void TimerThread::MonitorThread::threadFunc() {
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -0800295 std::unique_lock _l(mMutex);
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -0800296 while (!mShouldExit) {
Andy Hungf8ab0932022-06-13 19:49:43 -0700297 Handle nextDeadline = INVALID_HANDLE;
298 Handle now = INVALID_HANDLE;
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -0800299 if (!mMonitorRequests.empty()) {
Andy Hungf8ab0932022-06-13 19:49:43 -0700300 nextDeadline = mMonitorRequests.begin()->first;
301 now = std::chrono::steady_clock::now();
302 if (nextDeadline < now) {
303 auto node = mMonitorRequests.extract(mMonitorRequests.begin());
Andy Hunga2a1ac32022-03-18 16:12:11 -0700304 // Deadline has expired, handle the request.
Andy Hungf8ab0932022-06-13 19:49:43 -0700305 auto secondChanceDuration = node.mapped().first->secondChanceDuration;
306 if (secondChanceDuration.count() != 0) {
307 // We now apply the second chance duration to find the clock
308 // monotonic second deadline. The unique key is then the
309 // pair<second_deadline, first_deadline>.
310 //
311 // The second chance prevents a false timeout should there be
312 // any clock monotonic advancement during suspend.
313 auto newHandle = now + secondChanceDuration;
314 ALOGD("%s: TimeCheck second chance applied for %s",
315 __func__, node.mapped().first->tag.c_str()); // should be rare event.
316 mSecondChanceRequests.emplace_hint(mSecondChanceRequests.end(),
317 std::make_pair(newHandle, nextDeadline),
318 std::move(node.mapped()));
319 // increment second chance counter.
320 mSecondChanceCount.fetch_add(1 /* arg */, std::memory_order_relaxed);
321 } else {
322 {
323 _l.unlock();
324 // We add Request to retired queue early so that it can be dumped out.
325 mTimeoutQueue.add(std::move(node.mapped().first));
326 node.mapped().second(nextDeadline);
327 // Caution: we don't hold lock when we call TimerCallback,
328 // but this is the timeout case! We will crash soon,
329 // maybe before returning.
330 // anything left over is released here outside lock.
331 }
332 // reacquire the lock - if something was added, we loop immediately to check.
333 _l.lock();
334 }
335 // always process expiring monitor requests first.
336 continue;
337 }
338 }
339 // now process any second chance requests.
340 if (!mSecondChanceRequests.empty()) {
341 Handle secondDeadline = mSecondChanceRequests.begin()->first.first;
342 if (now == INVALID_HANDLE) now = std::chrono::steady_clock::now();
343 if (secondDeadline < now) {
344 auto node = mSecondChanceRequests.extract(mSecondChanceRequests.begin());
Andy Hunga2a1ac32022-03-18 16:12:11 -0700345 {
Andy Hunga2a1ac32022-03-18 16:12:11 -0700346 _l.unlock();
347 // We add Request to retired queue early so that it can be dumped out.
348 mTimeoutQueue.add(std::move(node.mapped().first));
Andy Hungf8ab0932022-06-13 19:49:43 -0700349 const Handle originalHandle = node.key().second;
350 node.mapped().second(originalHandle);
Andy Hung2aa15102022-06-13 19:49:43 -0700351 // Caution: we don't hold lock when we call TimerCallback.
352 // This is benign issue - we permit concurrent operations
353 // while in the callback to the MonitorQueue.
354 //
355 // Anything left over is released here outside lock.
Andy Hunga2a1ac32022-03-18 16:12:11 -0700356 }
357 // reacquire the lock - if something was added, we loop immediately to check.
358 _l.lock();
359 continue;
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -0800360 }
Andy Hungf8ab0932022-06-13 19:49:43 -0700361 // update the deadline.
362 if (nextDeadline == INVALID_HANDLE) {
363 nextDeadline = secondDeadline;
364 } else {
365 nextDeadline = std::min(nextDeadline, secondDeadline);
366 }
367 }
368 if (nextDeadline != INVALID_HANDLE) {
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -0800369 mCond.wait_until(_l, nextDeadline);
370 } else {
371 mCond.wait(_l);
372 }
373 }
374}
375
Andy Hunga2a1ac32022-03-18 16:12:11 -0700376TimerThread::Handle TimerThread::MonitorThread::add(
Andy Hung2aa15102022-06-13 19:49:43 -0700377 std::shared_ptr<const Request> request, TimerCallback&& func, Duration timeout) {
Andy Hunga2a1ac32022-03-18 16:12:11 -0700378 std::lock_guard _l(mMutex);
379 const Handle handle = getUniqueHandle_l(timeout);
Andy Hungf8ab0932022-06-13 19:49:43 -0700380 mMonitorRequests.emplace_hint(mMonitorRequests.end(),
381 handle, std::make_pair(std::move(request), std::move(func)));
Andy Hunga2a1ac32022-03-18 16:12:11 -0700382 mCond.notify_all();
383 return handle;
384}
385
386std::shared_ptr<const TimerThread::Request> TimerThread::MonitorThread::remove(Handle handle) {
Andy Hungf8ab0932022-06-13 19:49:43 -0700387 std::pair<std::shared_ptr<const Request>, TimerCallback> data;
Andy Hunga2a1ac32022-03-18 16:12:11 -0700388 std::unique_lock ul(mMutex);
Andy Hungf8ab0932022-06-13 19:49:43 -0700389 if (const auto it = mMonitorRequests.find(handle);
390 it != mMonitorRequests.end()) {
391 data = std::move(it->second);
392 mMonitorRequests.erase(it);
393 ul.unlock(); // manually release lock here so func (data.second)
394 // is released outside of lock.
395 return data.first; // request
Andy Hunga2a1ac32022-03-18 16:12:11 -0700396 }
Andy Hungf8ab0932022-06-13 19:49:43 -0700397
398 // this check is O(N), but since the second chance requests are ordered
399 // in terms of earliest expiration time, we would expect better than average results.
400 for (auto it = mSecondChanceRequests.begin(); it != mSecondChanceRequests.end(); ++it) {
401 if (it->first.second == handle) {
402 data = std::move(it->second);
403 mSecondChanceRequests.erase(it);
404 ul.unlock(); // manually release lock here so func (data.second)
405 // is released outside of lock.
406 return data.first; // request
407 }
408 }
409 return {};
Andy Hunga2a1ac32022-03-18 16:12:11 -0700410}
411
412void TimerThread::MonitorThread::copyRequests(
413 std::vector<std::shared_ptr<const Request>>& requests) const {
414 std::lock_guard lg(mMutex);
415 for (const auto &[deadline, monitorpair] : mMonitorRequests) {
416 requests.emplace_back(monitorpair.first);
417 }
Andy Hungf8ab0932022-06-13 19:49:43 -0700418 // we combine the second map with the first map - this is
419 // everything that is pending on the monitor thread.
420 // The second map will be older than the first map so this
421 // is in order.
422 for (const auto &[deadline, monitorpair] : mSecondChanceRequests) {
423 requests.emplace_back(monitorpair.first);
424 }
Andy Hunga2a1ac32022-03-18 16:12:11 -0700425}
426
427} // namespace android::mediautils