blob: 5e58a3d681169fac5953a5640fb4568c1de53e29 [file] [log] [blame]
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -08001/*
2 * Copyright (C) 2021 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define LOG_TAG "TimerThread"
18
19#include <optional>
Andy Hunga2a1ac32022-03-18 16:12:11 -070020#include <sstream>
21#include <unistd.h>
22#include <vector>
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -080023
Andy Hungd4265822022-04-01 18:54:32 -070024#include <mediautils/MediaUtilsDelayed.h>
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -080025#include <mediautils/TimerThread.h>
Andy Hung741b3dd2022-06-13 19:49:43 -070026#include <utils/Log.h>
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -080027#include <utils/ThreadDefs.h>
28
Andy Hungdf1ed5c2022-06-13 19:49:43 -070029using namespace std::chrono_literals;
30
Andy Hunga2a1ac32022-03-18 16:12:11 -070031namespace android::mediautils {
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -080032
Andy Hunga2a1ac32022-03-18 16:12:11 -070033extern std::string formatTime(std::chrono::system_clock::time_point t);
34extern std::string_view timeSuffix(std::string_view time1, std::string_view time2);
35
36TimerThread::Handle TimerThread::scheduleTask(
Andy Hung741b3dd2022-06-13 19:49:43 -070037 std::string_view tag, TimerCallback&& func,
38 Duration timeoutDuration, Duration secondChanceDuration) {
Andy Hunga2a1ac32022-03-18 16:12:11 -070039 const auto now = std::chrono::system_clock::now();
Andy Hungdf1ed5c2022-06-13 19:49:43 -070040 auto request = std::make_shared<const Request>(now, now +
41 std::chrono::duration_cast<std::chrono::system_clock::duration>(timeoutDuration),
Andy Hung741b3dd2022-06-13 19:49:43 -070042 secondChanceDuration, gettid(), tag);
Andy Hungdf1ed5c2022-06-13 19:49:43 -070043 return mMonitorThread.add(std::move(request), std::move(func), timeoutDuration);
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -080044}
45
Andy Hungc8c2dde2022-07-15 15:18:59 -070046TimerThread::Handle TimerThread::trackTask(std::string_view tag) {
Andy Hunga2a1ac32022-03-18 16:12:11 -070047 const auto now = std::chrono::system_clock::now();
Andy Hung741b3dd2022-06-13 19:49:43 -070048 auto request = std::make_shared<const Request>(now, now,
49 Duration{} /* secondChanceDuration */, gettid(), tag);
Andy Hunga2a1ac32022-03-18 16:12:11 -070050 return mNoTimeoutMap.add(std::move(request));
51}
52
53bool TimerThread::cancelTask(Handle handle) {
Andy Hungdf1ed5c2022-06-13 19:49:43 -070054 std::shared_ptr<const Request> request = isNoTimeoutHandle(handle) ?
Andy Hunga2a1ac32022-03-18 16:12:11 -070055 mNoTimeoutMap.remove(handle) : mMonitorThread.remove(handle);
56 if (!request) return false;
57 mRetiredQueue.add(std::move(request));
58 return true;
59}
60
61std::string TimerThread::toString(size_t retiredCount) const {
Andy Hungf45f34c2022-03-25 13:09:03 -070062 // Note: These request queues are snapshot very close together but
63 // not at "identical" times as we don't use a class-wide lock.
64
65 std::vector<std::shared_ptr<const Request>> timeoutRequests;
66 std::vector<std::shared_ptr<const Request>> retiredRequests;
67 mTimeoutQueue.copyRequests(timeoutRequests);
68 mRetiredQueue.copyRequests(retiredRequests, retiredCount);
69 std::vector<std::shared_ptr<const Request>> pendingRequests =
70 getPendingRequests();
71
72 struct Analysis analysis = analyzeTimeout(timeoutRequests, pendingRequests);
73 std::string analysisSummary;
74 if (!analysis.summary.empty()) {
75 analysisSummary = std::string("\nanalysis [ ").append(analysis.summary).append(" ]");
76 }
Andy Hung10ac7112022-03-28 08:00:40 -070077 std::string timeoutStack;
78 if (analysis.timeoutTid != -1) {
79 timeoutStack = std::string("\ntimeout(")
80 .append(std::to_string(analysis.timeoutTid)).append(") callstack [\n")
Andy Hungd4265822022-04-01 18:54:32 -070081 .append(getCallStackStringForTid(analysis.timeoutTid)).append("]");
Andy Hung10ac7112022-03-28 08:00:40 -070082 }
83 std::string blockedStack;
84 if (analysis.HALBlockedTid != -1) {
85 blockedStack = std::string("\nblocked(")
86 .append(std::to_string(analysis.HALBlockedTid)).append(") callstack [\n")
Andy Hungd4265822022-04-01 18:54:32 -070087 .append(getCallStackStringForTid(analysis.HALBlockedTid)).append("]");
Andy Hung10ac7112022-03-28 08:00:40 -070088 }
89
Andy Hunga2a1ac32022-03-18 16:12:11 -070090 return std::string("now ")
91 .append(formatTime(std::chrono::system_clock::now()))
Andy Hung741b3dd2022-06-13 19:49:43 -070092 .append("\nsecondChanceCount ")
93 .append(std::to_string(mMonitorThread.getSecondChanceCount()))
Andy Hungf45f34c2022-03-25 13:09:03 -070094 .append(analysisSummary)
95 .append("\ntimeout [ ")
96 .append(requestsToString(timeoutRequests))
97 .append(" ]\npending [ ")
98 .append(requestsToString(pendingRequests))
Andy Hunga2a1ac32022-03-18 16:12:11 -070099 .append(" ]\nretired [ ")
Andy Hungf45f34c2022-03-25 13:09:03 -0700100 .append(requestsToString(retiredRequests))
Andy Hung10ac7112022-03-28 08:00:40 -0700101 .append(" ]")
102 .append(timeoutStack)
103 .append(blockedStack);
Andy Hunga2a1ac32022-03-18 16:12:11 -0700104}
105
Andy Hungf45f34c2022-03-25 13:09:03 -0700106// A HAL method is where the substring "Hidl" is in the class name.
107// The tag should look like: ... Hidl ... :: ...
108// When the audio HAL is updated to AIDL perhaps we will use instead
109// a global directory of HAL classes.
110//
111// See MethodStatistics.cpp:
112// mediautils::getStatisticsClassesForModule(METHOD_STATISTICS_MODULE_NAME_AUDIO_HIDL)
113//
114/* static */
115bool TimerThread::isRequestFromHal(const std::shared_ptr<const Request>& request) {
Andy Hungc8c2dde2022-07-15 15:18:59 -0700116 const size_t hidlPos = request->tag.asStringView().find("Hidl");
Andy Hungf45f34c2022-03-25 13:09:03 -0700117 if (hidlPos == std::string::npos) return false;
118 // should be a separator afterwards Hidl which indicates the string was in the class.
Andy Hungc8c2dde2022-07-15 15:18:59 -0700119 const size_t separatorPos = request->tag.asStringView().find("::", hidlPos);
Andy Hungf45f34c2022-03-25 13:09:03 -0700120 return separatorPos != std::string::npos;
121}
122
123/* static */
124struct TimerThread::Analysis TimerThread::analyzeTimeout(
125 const std::vector<std::shared_ptr<const Request>>& timeoutRequests,
126 const std::vector<std::shared_ptr<const Request>>& pendingRequests) {
127
128 if (timeoutRequests.empty() || pendingRequests.empty()) return {}; // nothing to say.
129
130 // for now look at last timeout (in our case, the only timeout)
131 const std::shared_ptr<const Request> timeout = timeoutRequests.back();
132
133 // pending Requests that are problematic.
134 std::vector<std::shared_ptr<const Request>> pendingExact;
135 std::vector<std::shared_ptr<const Request>> pendingPossible;
136
Andy Hungdf1ed5c2022-06-13 19:49:43 -0700137 // We look at pending requests that were scheduled no later than kPendingDuration
Andy Hungf45f34c2022-03-25 13:09:03 -0700138 // after the timeout request. This prevents false matches with calls
139 // that naturally block for a short period of time
140 // such as HAL write() and read().
141 //
Andy Hungdf1ed5c2022-06-13 19:49:43 -0700142 constexpr Duration kPendingDuration = 1000ms;
Andy Hungf45f34c2022-03-25 13:09:03 -0700143 for (const auto& pending : pendingRequests) {
144 // If the pending tid is the same as timeout tid, problem identified.
145 if (pending->tid == timeout->tid) {
146 pendingExact.emplace_back(pending);
147 continue;
148 }
149
150 // if the pending tid is scheduled within time limit
Andy Hungdf1ed5c2022-06-13 19:49:43 -0700151 if (pending->scheduled - timeout->scheduled < kPendingDuration) {
Andy Hungf45f34c2022-03-25 13:09:03 -0700152 pendingPossible.emplace_back(pending);
153 }
154 }
155
156 struct Analysis analysis{};
157
158 analysis.timeoutTid = timeout->tid;
159 std::string& summary = analysis.summary;
160 if (!pendingExact.empty()) {
161 const auto& request = pendingExact.front();
162 const bool hal = isRequestFromHal(request);
163
164 if (hal) {
165 summary = std::string("Blocked directly due to HAL call: ")
166 .append(request->toString());
167 }
168 }
169 if (summary.empty() && !pendingPossible.empty()) {
170 for (const auto& request : pendingPossible) {
171 const bool hal = isRequestFromHal(request);
172 if (hal) {
173 // The first blocked call is the most likely one.
174 // Recent calls might be temporarily blocked
175 // calls such as write() or read() depending on kDuration.
176 summary = std::string("Blocked possibly due to HAL call: ")
177 .append(request->toString());
178 analysis.HALBlockedTid = request->tid;
179 }
180 }
181 }
182 return analysis;
183}
184
Andy Hunga2a1ac32022-03-18 16:12:11 -0700185std::vector<std::shared_ptr<const TimerThread::Request>> TimerThread::getPendingRequests() const {
186 constexpr size_t kEstimatedPendingRequests = 8; // approx 128 byte alloc.
187 std::vector<std::shared_ptr<const Request>> pendingRequests;
188 pendingRequests.reserve(kEstimatedPendingRequests); // preallocate vector out of lock.
189
190 // following are internally locked calls, which add to our local pendingRequests.
191 mMonitorThread.copyRequests(pendingRequests);
192 mNoTimeoutMap.copyRequests(pendingRequests);
193
194 // Sort in order of scheduled time.
195 std::sort(pendingRequests.begin(), pendingRequests.end(),
196 [](const std::shared_ptr<const Request>& r1,
197 const std::shared_ptr<const Request>& r2) {
198 return r1->scheduled < r2->scheduled;
199 });
200 return pendingRequests;
201}
202
203std::string TimerThread::pendingToString() const {
204 return requestsToString(getPendingRequests());
205}
206
207std::string TimerThread::retiredToString(size_t n) const {
208 std::vector<std::shared_ptr<const Request>> retiredRequests;
209 mRetiredQueue.copyRequests(retiredRequests, n);
210
211 // Dump to string
212 return requestsToString(retiredRequests);
213}
214
215std::string TimerThread::timeoutToString(size_t n) const {
216 std::vector<std::shared_ptr<const Request>> timeoutRequests;
217 mTimeoutQueue.copyRequests(timeoutRequests, n);
218
219 // Dump to string
220 return requestsToString(timeoutRequests);
221}
222
223std::string TimerThread::Request::toString() const {
224 const auto scheduledString = formatTime(scheduled);
225 const auto deadlineString = formatTime(deadline);
226 return std::string(tag)
227 .append(" scheduled ").append(scheduledString)
228 .append(" deadline ").append(timeSuffix(scheduledString, deadlineString))
229 .append(" tid ").append(std::to_string(tid));
230}
231
232void TimerThread::RequestQueue::add(std::shared_ptr<const Request> request) {
233 std::lock_guard lg(mRQMutex);
234 mRequestQueue.emplace_back(std::chrono::system_clock::now(), std::move(request));
235 if (mRequestQueue.size() > mRequestQueueMax) {
236 mRequestQueue.pop_front();
237 }
238}
239
240void TimerThread::RequestQueue::copyRequests(
241 std::vector<std::shared_ptr<const Request>>& requests, size_t n) const {
242 std::lock_guard lg(mRQMutex);
243 const size_t size = mRequestQueue.size();
244 size_t i = n >= size ? 0 : size - n;
245 for (; i < size; ++i) {
246 const auto &[time, request] = mRequestQueue[i];
247 requests.emplace_back(request);
248 }
249}
250
Andy Hunga2a1ac32022-03-18 16:12:11 -0700251TimerThread::Handle TimerThread::NoTimeoutMap::add(std::shared_ptr<const Request> request) {
252 std::lock_guard lg(mNTMutex);
253 // A unique handle is obtained by mNoTimeoutRequests.fetch_add(1),
254 // This need not be under a lock, but we do so anyhow.
Andy Hungdf1ed5c2022-06-13 19:49:43 -0700255 const Handle handle = getUniqueHandle_l();
Andy Hunga2a1ac32022-03-18 16:12:11 -0700256 mMap[handle] = request;
257 return handle;
258}
259
260std::shared_ptr<const TimerThread::Request> TimerThread::NoTimeoutMap::remove(Handle handle) {
261 std::lock_guard lg(mNTMutex);
262 auto it = mMap.find(handle);
263 if (it == mMap.end()) return {};
264 auto request = it->second;
265 mMap.erase(it);
266 return request;
267}
268
269void TimerThread::NoTimeoutMap::copyRequests(
270 std::vector<std::shared_ptr<const Request>>& requests) const {
271 std::lock_guard lg(mNTMutex);
272 for (const auto &[handle, request] : mMap) {
273 requests.emplace_back(request);
274 }
275}
276
Andy Hunga2a1ac32022-03-18 16:12:11 -0700277TimerThread::MonitorThread::MonitorThread(RequestQueue& timeoutQueue)
278 : mTimeoutQueue(timeoutQueue)
279 , mThread([this] { threadFunc(); }) {
280 pthread_setname_np(mThread.native_handle(), "TimerThread");
281 pthread_setschedprio(mThread.native_handle(), PRIORITY_URGENT_AUDIO);
282}
283
284TimerThread::MonitorThread::~MonitorThread() {
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -0800285 {
286 std::lock_guard _l(mMutex);
287 mShouldExit = true;
288 mCond.notify_all();
289 }
290 mThread.join();
291}
292
Andy Hunga2a1ac32022-03-18 16:12:11 -0700293void TimerThread::MonitorThread::threadFunc() {
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -0800294 std::unique_lock _l(mMutex);
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -0800295 while (!mShouldExit) {
Andy Hung741b3dd2022-06-13 19:49:43 -0700296 Handle nextDeadline = INVALID_HANDLE;
297 Handle now = INVALID_HANDLE;
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -0800298 if (!mMonitorRequests.empty()) {
Andy Hung741b3dd2022-06-13 19:49:43 -0700299 nextDeadline = mMonitorRequests.begin()->first;
300 now = std::chrono::steady_clock::now();
301 if (nextDeadline < now) {
302 auto node = mMonitorRequests.extract(mMonitorRequests.begin());
Andy Hunga2a1ac32022-03-18 16:12:11 -0700303 // Deadline has expired, handle the request.
Andy Hung741b3dd2022-06-13 19:49:43 -0700304 auto secondChanceDuration = node.mapped().first->secondChanceDuration;
305 if (secondChanceDuration.count() != 0) {
306 // We now apply the second chance duration to find the clock
307 // monotonic second deadline. The unique key is then the
308 // pair<second_deadline, first_deadline>.
309 //
310 // The second chance prevents a false timeout should there be
311 // any clock monotonic advancement during suspend.
312 auto newHandle = now + secondChanceDuration;
313 ALOGD("%s: TimeCheck second chance applied for %s",
314 __func__, node.mapped().first->tag.c_str()); // should be rare event.
315 mSecondChanceRequests.emplace_hint(mSecondChanceRequests.end(),
316 std::make_pair(newHandle, nextDeadline),
317 std::move(node.mapped()));
318 // increment second chance counter.
319 mSecondChanceCount.fetch_add(1 /* arg */, std::memory_order_relaxed);
320 } else {
321 {
322 _l.unlock();
323 // We add Request to retired queue early so that it can be dumped out.
324 mTimeoutQueue.add(std::move(node.mapped().first));
325 node.mapped().second(nextDeadline);
326 // Caution: we don't hold lock when we call TimerCallback,
327 // but this is the timeout case! We will crash soon,
328 // maybe before returning.
329 // anything left over is released here outside lock.
330 }
331 // reacquire the lock - if something was added, we loop immediately to check.
332 _l.lock();
333 }
334 // always process expiring monitor requests first.
335 continue;
336 }
337 }
338 // now process any second chance requests.
339 if (!mSecondChanceRequests.empty()) {
340 Handle secondDeadline = mSecondChanceRequests.begin()->first.first;
341 if (now == INVALID_HANDLE) now = std::chrono::steady_clock::now();
342 if (secondDeadline < now) {
343 auto node = mSecondChanceRequests.extract(mSecondChanceRequests.begin());
Andy Hunga2a1ac32022-03-18 16:12:11 -0700344 {
Andy Hunga2a1ac32022-03-18 16:12:11 -0700345 _l.unlock();
346 // We add Request to retired queue early so that it can be dumped out.
347 mTimeoutQueue.add(std::move(node.mapped().first));
Andy Hung741b3dd2022-06-13 19:49:43 -0700348 const Handle originalHandle = node.key().second;
349 node.mapped().second(originalHandle);
Andy Hungdf1ed5c2022-06-13 19:49:43 -0700350 // Caution: we don't hold lock when we call TimerCallback.
351 // This is benign issue - we permit concurrent operations
352 // while in the callback to the MonitorQueue.
353 //
354 // Anything left over is released here outside lock.
Andy Hunga2a1ac32022-03-18 16:12:11 -0700355 }
356 // reacquire the lock - if something was added, we loop immediately to check.
357 _l.lock();
358 continue;
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -0800359 }
Andy Hung741b3dd2022-06-13 19:49:43 -0700360 // update the deadline.
361 if (nextDeadline == INVALID_HANDLE) {
362 nextDeadline = secondDeadline;
363 } else {
364 nextDeadline = std::min(nextDeadline, secondDeadline);
365 }
366 }
367 if (nextDeadline != INVALID_HANDLE) {
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -0800368 mCond.wait_until(_l, nextDeadline);
369 } else {
370 mCond.wait(_l);
371 }
372 }
373}
374
Andy Hunga2a1ac32022-03-18 16:12:11 -0700375TimerThread::Handle TimerThread::MonitorThread::add(
Andy Hungdf1ed5c2022-06-13 19:49:43 -0700376 std::shared_ptr<const Request> request, TimerCallback&& func, Duration timeout) {
Andy Hunga2a1ac32022-03-18 16:12:11 -0700377 std::lock_guard _l(mMutex);
378 const Handle handle = getUniqueHandle_l(timeout);
Andy Hung741b3dd2022-06-13 19:49:43 -0700379 mMonitorRequests.emplace_hint(mMonitorRequests.end(),
380 handle, std::make_pair(std::move(request), std::move(func)));
Andy Hunga2a1ac32022-03-18 16:12:11 -0700381 mCond.notify_all();
382 return handle;
383}
384
385std::shared_ptr<const TimerThread::Request> TimerThread::MonitorThread::remove(Handle handle) {
Andy Hung741b3dd2022-06-13 19:49:43 -0700386 std::pair<std::shared_ptr<const Request>, TimerCallback> data;
Andy Hunga2a1ac32022-03-18 16:12:11 -0700387 std::unique_lock ul(mMutex);
Andy Hung741b3dd2022-06-13 19:49:43 -0700388 if (const auto it = mMonitorRequests.find(handle);
389 it != mMonitorRequests.end()) {
390 data = std::move(it->second);
391 mMonitorRequests.erase(it);
392 ul.unlock(); // manually release lock here so func (data.second)
393 // is released outside of lock.
394 return data.first; // request
Andy Hunga2a1ac32022-03-18 16:12:11 -0700395 }
Andy Hung741b3dd2022-06-13 19:49:43 -0700396
397 // this check is O(N), but since the second chance requests are ordered
398 // in terms of earliest expiration time, we would expect better than average results.
399 for (auto it = mSecondChanceRequests.begin(); it != mSecondChanceRequests.end(); ++it) {
400 if (it->first.second == handle) {
401 data = std::move(it->second);
402 mSecondChanceRequests.erase(it);
403 ul.unlock(); // manually release lock here so func (data.second)
404 // is released outside of lock.
405 return data.first; // request
406 }
407 }
408 return {};
Andy Hunga2a1ac32022-03-18 16:12:11 -0700409}
410
411void TimerThread::MonitorThread::copyRequests(
412 std::vector<std::shared_ptr<const Request>>& requests) const {
413 std::lock_guard lg(mMutex);
414 for (const auto &[deadline, monitorpair] : mMonitorRequests) {
415 requests.emplace_back(monitorpair.first);
416 }
Andy Hung741b3dd2022-06-13 19:49:43 -0700417 // we combine the second map with the first map - this is
418 // everything that is pending on the monitor thread.
419 // The second map will be older than the first map so this
420 // is in order.
421 for (const auto &[deadline, monitorpair] : mSecondChanceRequests) {
422 requests.emplace_back(monitorpair.first);
423 }
Andy Hunga2a1ac32022-03-18 16:12:11 -0700424}
425
426} // namespace android::mediautils