blob: d5be17752871241fc92c92e5dc1f803e79ab9d70 [file] [log] [blame]
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -08001/*
2 * Copyright (C) 2021 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#pragma once
18
Andy Hunga2a1ac32022-03-18 16:12:11 -070019#include <atomic>
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -080020#include <condition_variable>
Andy Hunga2a1ac32022-03-18 16:12:11 -070021#include <deque>
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -080022#include <functional>
23#include <map>
Atneya Naircce14202022-09-05 20:17:50 -070024#include <memory>
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -080025#include <mutex>
Andy Hunga2a1ac32022-03-18 16:12:11 -070026#include <string>
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -080027#include <thread>
Atneya Naircce14202022-09-05 20:17:50 -070028#include <vector>
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -080029
30#include <android-base/thread_annotations.h>
31
Andy Hung35f96152022-07-15 15:18:59 -070032#include <mediautils/FixedString.h>
33
Andy Hunga2a1ac32022-03-18 16:12:11 -070034namespace android::mediautils {
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -080035
36/**
37 * A thread for deferred execution of tasks, with cancellation.
38 */
39class TimerThread {
40 public:
Andy Hung2aa15102022-06-13 19:49:43 -070041 // A Handle is a time_point that serves as a unique key to access a queued
42 // request to the TimerThread.
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -080043 using Handle = std::chrono::steady_clock::time_point;
44
Andy Hung2aa15102022-06-13 19:49:43 -070045 // Duration is based on steady_clock (typically nanoseconds)
46 // vs the system_clock duration (typically microseconds).
47 using Duration = std::chrono::steady_clock::duration;
48
Andy Hunga2a1ac32022-03-18 16:12:11 -070049 static inline constexpr Handle INVALID_HANDLE =
50 std::chrono::steady_clock::time_point::min();
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -080051
Andy Hung2aa15102022-06-13 19:49:43 -070052 // Handle implementation details:
53 // A Handle represents the timer expiration time based on std::chrono::steady_clock
54 // (clock monotonic). This Handle is computed as now() + timeout.
55 //
56 // The lsb of the Handle time_point is adjusted to indicate whether there is
57 // a timeout action (1) or not (0).
58 //
59
60 template <size_t COUNT>
61 static constexpr bool is_power_of_2_v = COUNT > 0 && (COUNT & (COUNT - 1)) == 0;
62
63 template <size_t COUNT>
64 static constexpr size_t mask_from_count_v = COUNT - 1;
65
66 static constexpr size_t HANDLE_TYPES = 2;
67 // HANDLE_TYPES must be a power of 2.
68 static_assert(is_power_of_2_v<HANDLE_TYPES>);
69
70 // The handle types
71 enum class HANDLE_TYPE : size_t {
72 NO_TIMEOUT = 0,
73 TIMEOUT = 1,
74 };
75
76 static constexpr size_t HANDLE_TYPE_MASK = mask_from_count_v<HANDLE_TYPES>;
77
78 template <typename T>
79 static constexpr auto enum_as_value(T x) {
80 return static_cast<std::underlying_type_t<T>>(x);
81 }
82
83 static inline bool isNoTimeoutHandle(Handle handle) {
84 return (handle.time_since_epoch().count() & HANDLE_TYPE_MASK) ==
85 enum_as_value(HANDLE_TYPE::NO_TIMEOUT);
86 }
87
88 static inline bool isTimeoutHandle(Handle handle) {
89 return (handle.time_since_epoch().count() & HANDLE_TYPE_MASK) ==
90 enum_as_value(HANDLE_TYPE::TIMEOUT);
91 }
92
93 // Returns a unique Handle that doesn't exist in the container.
94 template <size_t MAX_TYPED_HANDLES, size_t HANDLE_TYPE_AS_VALUE, typename C, typename T>
95 static Handle getUniqueHandleForHandleType_l(C container, T timeout) {
96 static_assert(MAX_TYPED_HANDLES > 0 && HANDLE_TYPE_AS_VALUE < MAX_TYPED_HANDLES
97 && is_power_of_2_v<MAX_TYPED_HANDLES>,
98 " handles must be power of two");
99
100 // Our initial handle is the deadline as computed from steady_clock.
101 auto deadline = std::chrono::steady_clock::now() + timeout;
102
103 // We adjust the lsbs by the minimum increment to have the correct
104 // HANDLE_TYPE in the least significant bits.
105 auto remainder = deadline.time_since_epoch().count() & HANDLE_TYPE_MASK;
106 size_t offset = HANDLE_TYPE_AS_VALUE > remainder ? HANDLE_TYPE_AS_VALUE - remainder :
107 MAX_TYPED_HANDLES + HANDLE_TYPE_AS_VALUE - remainder;
108 deadline += std::chrono::steady_clock::duration(offset);
109
110 // To avoid key collisions, advance the handle by MAX_TYPED_HANDLES (the modulus factor)
111 // until the key is unique.
112 while (container.find(deadline) != container.end()) {
113 deadline += std::chrono::steady_clock::duration(MAX_TYPED_HANDLES);
114 }
115 return deadline;
116 }
117
118 // TimerCallback invoked on timeout or cancel.
119 using TimerCallback = std::function<void(Handle)>;
120
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -0800121 /**
Andy Hunga2a1ac32022-03-18 16:12:11 -0700122 * Schedules a task to be executed in the future (`timeout` duration from now).
123 *
124 * \param tag string associated with the task. This need not be unique,
125 * as the Handle returned is used for cancelling.
126 * \param func callback function that is invoked at the timeout.
Andy Hung2aa15102022-06-13 19:49:43 -0700127 * \param timeoutDuration timeout duration which is converted to milliseconds with at
Andy Hunga2a1ac32022-03-18 16:12:11 -0700128 * least 45 integer bits.
129 * A timeout of 0 (or negative) means the timer never expires
130 * so func() is never called. These tasks are stored internally
131 * and reported in the toString() until manually cancelled.
132 * \returns a handle that can be used for cancellation.
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -0800133 */
Andy Hunga2a1ac32022-03-18 16:12:11 -0700134 Handle scheduleTask(
Andy Hungf8ab0932022-06-13 19:49:43 -0700135 std::string_view tag, TimerCallback&& func,
136 Duration timeoutDuration, Duration secondChanceDuration);
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -0800137
138 /**
Andy Hunga2a1ac32022-03-18 16:12:11 -0700139 * Tracks a task that shows up on toString() until cancelled.
140 *
141 * \param tag string associated with the task.
142 * \returns a handle that can be used for cancellation.
143 */
Andy Hung35f96152022-07-15 15:18:59 -0700144 Handle trackTask(std::string_view tag);
Andy Hunga2a1ac32022-03-18 16:12:11 -0700145
146 /**
147 * Cancels a task previously scheduled with scheduleTask()
148 * or trackTask().
149 *
150 * \returns true if cancelled. If the task has already executed
151 * or if the handle doesn't exist, this is a no-op
152 * and returns false.
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -0800153 */
Andy Hung5c6d68a2022-03-09 21:54:59 -0800154 bool cancelTask(Handle handle);
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -0800155
Atneya Naircce14202022-09-05 20:17:50 -0700156 struct SnapshotAnalysis;
157 /**
158 * Take a snapshot of the current state of the TimerThread and determine the
159 * potential cause of a deadlock.
160 * \param retiredCount The number of successfully retired calls to capture
161 * (may be many).
162 * \return See below for a description of a SnapShotAnalysis object
163 */
164 SnapshotAnalysis getSnapshotAnalysis(size_t retiredCount = SIZE_MAX) const;
Andy Hunga2a1ac32022-03-18 16:12:11 -0700165
166 /**
167 * Returns a string representation of the TimerThread queue.
168 *
169 * The queue is dumped in order of scheduling (not deadline).
170 */
171 std::string pendingToString() const;
172
173 /**
174 * Returns a string representation of the last retired tasks.
175 *
176 * These tasks from trackTask() or scheduleTask() are
177 * cancelled.
178 *
179 * These are ordered when the task was retired.
180 *
181 * \param n is maximum number of tasks to dump.
182 */
183 std::string retiredToString(size_t n = SIZE_MAX) const;
184
185
186 /**
187 * Returns a string representation of the last timeout tasks.
188 *
189 * These tasks from scheduleTask() which have timed-out.
190 *
191 * These are ordered when the task had timed-out.
192 *
193 * \param n is maximum number of tasks to dump.
194 */
195 std::string timeoutToString(size_t n = SIZE_MAX) const;
196
197 /**
198 * Dumps a container with SmartPointer<Request> to a string.
199 *
200 * "{ Request1 } { Request2} ...{ RequestN }"
201 */
202 template <typename T>
203 static std::string requestsToString(const T& containerRequests) {
204 std::string s;
205 // append seems to be faster than stringstream.
206 // https://stackoverflow.com/questions/18892281/most-optimized-way-of-concatenation-in-strings
207 for (const auto& request : containerRequests) {
208 s.append("{ ").append(request->toString()).append(" } ");
209 }
210 // If not empty, there's an extra space at the end, so we trim it off.
211 if (!s.empty()) s.pop_back();
212 return s;
213 }
214
Andy Hunga2a1ac32022-03-18 16:12:11 -0700215 // To minimize movement of data, we pass around shared_ptrs to Requests.
216 // These are allocated and deallocated outside of the lock.
Andy Hungf8ab0932022-06-13 19:49:43 -0700217 // TODO(b/243839867) consider options to merge Request with the
218 // TimeCheck::TimeCheckHandler struct.
Andy Hunga2a1ac32022-03-18 16:12:11 -0700219 struct Request {
Andy Hung2aa15102022-06-13 19:49:43 -0700220 Request(std::chrono::system_clock::time_point _scheduled,
221 std::chrono::system_clock::time_point _deadline,
Andy Hungf8ab0932022-06-13 19:49:43 -0700222 Duration _secondChanceDuration,
Andy Hung35f96152022-07-15 15:18:59 -0700223 pid_t _tid,
224 std::string_view _tag)
225 : scheduled(_scheduled)
226 , deadline(_deadline)
Andy Hungf8ab0932022-06-13 19:49:43 -0700227 , secondChanceDuration(_secondChanceDuration)
Andy Hung35f96152022-07-15 15:18:59 -0700228 , tid(_tid)
229 , tag(_tag)
230 {}
231
Andy Hunga2a1ac32022-03-18 16:12:11 -0700232 const std::chrono::system_clock::time_point scheduled;
Andy Hungf8ab0932022-06-13 19:49:43 -0700233 const std::chrono::system_clock::time_point deadline; // deadline := scheduled
234 // + timeoutDuration
235 // + secondChanceDuration
Andy Hunga2a1ac32022-03-18 16:12:11 -0700236 // if deadline == scheduled, no
237 // timeout, task not executed.
Andy Hungf8ab0932022-06-13 19:49:43 -0700238 Duration secondChanceDuration;
Andy Hunga2a1ac32022-03-18 16:12:11 -0700239 const pid_t tid;
Andy Hung35f96152022-07-15 15:18:59 -0700240 const FixedString62 tag;
Andy Hunga2a1ac32022-03-18 16:12:11 -0700241 std::string toString() const;
242 };
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -0800243
Atneya Naircce14202022-09-05 20:17:50 -0700244
245 // SnapshotAnalysis contains info deduced by analysisTimeout().
246
247 struct SnapshotAnalysis {
248 // If we were unable to determine any applicable thread ids,
249 // we leave their value as INVALID_PID.
250 // Note, we use the linux thread id (not pthread), so its type is pid_t.
251 static constexpr pid_t INVALID_PID = -1;
252 // Description of likely issue and/or blocked method.
253 // Empty if no actionable info.
254 std::string description;
255 // Tid of the (latest) monitored thread which has timed out.
256 // This is the thread which the suspect is deduced with respect to.
257 // Most often, this is the thread which an abort is being triggered
258 // from.
259 pid_t timeoutTid = INVALID_PID;
260 // Tid of the (HAL) thread which has likely halted progress, selected
261 // from pendingRequests. May be the same as timeoutTid, if the timed-out
262 // thread directly called into the HAL.
263 pid_t suspectTid = INVALID_PID;
264 // Number of second chances given by the timer thread
265 size_t secondChanceCount;
266 // List of pending requests
267 std::vector<std::shared_ptr<const Request>> pendingRequests;
268 // List of timed-out requests
269 std::vector<std::shared_ptr<const Request>> timeoutRequests;
270 // List of retired requests
271 std::vector<std::shared_ptr<const Request>> retiredRequests;
272 // Dumps the information contained above as well as additional call
273 // stacks where applicable.
274 std::string toString() const;
275 };
276
277 private:
Andy Hunga2a1ac32022-03-18 16:12:11 -0700278 // Deque of requests, in order of add().
279 // This class is thread-safe.
280 class RequestQueue {
281 public:
282 explicit RequestQueue(size_t maxSize)
283 : mRequestQueueMax(maxSize) {}
284
285 void add(std::shared_ptr<const Request>);
286
287 // return up to the last "n" requests retired.
288 void copyRequests(std::vector<std::shared_ptr<const Request>>& requests,
289 size_t n = SIZE_MAX) const;
290
291 private:
292 const size_t mRequestQueueMax;
293 mutable std::mutex mRQMutex;
294 std::deque<std::pair<std::chrono::system_clock::time_point,
295 std::shared_ptr<const Request>>>
296 mRequestQueue GUARDED_BY(mRQMutex);
297 };
298
Andy Hung2aa15102022-06-13 19:49:43 -0700299 // A storage map of tasks without timeouts. There is no TimerCallback
Andy Hunga2a1ac32022-03-18 16:12:11 -0700300 // required, it just tracks the tasks with the tag, scheduled time and the tid.
301 // These tasks show up on a pendingToString() until manually cancelled.
302 class NoTimeoutMap {
Andy Hunga2a1ac32022-03-18 16:12:11 -0700303 mutable std::mutex mNTMutex;
304 std::map<Handle, std::shared_ptr<const Request>> mMap GUARDED_BY(mNTMutex);
Andy Hung2aa15102022-06-13 19:49:43 -0700305 Handle getUniqueHandle_l() REQUIRES(mNTMutex) {
306 return getUniqueHandleForHandleType_l<
307 HANDLE_TYPES, enum_as_value(HANDLE_TYPE::NO_TIMEOUT)>(
308 mMap, Duration{} /* timeout */);
309 }
Andy Hunga2a1ac32022-03-18 16:12:11 -0700310
311 public:
312 bool isValidHandle(Handle handle) const; // lock free
313 Handle add(std::shared_ptr<const Request> request);
314 std::shared_ptr<const Request> remove(Handle handle);
315 void copyRequests(std::vector<std::shared_ptr<const Request>>& requests) const;
316 };
317
318 // Monitor thread.
319 // This thread manages shared pointers to Requests and a function to
320 // call on timeout.
321 // This class is thread-safe.
322 class MonitorThread {
Andy Hungf8ab0932022-06-13 19:49:43 -0700323 std::atomic<size_t> mSecondChanceCount{};
Andy Hunga2a1ac32022-03-18 16:12:11 -0700324 mutable std::mutex mMutex;
Andy Hung2aa15102022-06-13 19:49:43 -0700325 mutable std::condition_variable mCond GUARDED_BY(mMutex);
Andy Hunga2a1ac32022-03-18 16:12:11 -0700326
327 // Ordered map of requests based on time of deadline.
328 //
Andy Hung2aa15102022-06-13 19:49:43 -0700329 std::map<Handle, std::pair<std::shared_ptr<const Request>, TimerCallback>>
Andy Hunga2a1ac32022-03-18 16:12:11 -0700330 mMonitorRequests GUARDED_BY(mMutex);
331
Andy Hungf8ab0932022-06-13 19:49:43 -0700332 // Due to monotonic/steady clock inaccuracies during suspend,
333 // we allow an additional second chance waiting time to prevent
334 // false removal.
335
336 // This mSecondChanceRequests queue is almost always empty.
337 // Using a pair with the original handle allows lookup and keeps
338 // the Key unique.
339 std::map<std::pair<Handle /* new */, Handle /* original */>,
340 std::pair<std::shared_ptr<const Request>, TimerCallback>>
341 mSecondChanceRequests GUARDED_BY(mMutex);
342
Andy Hunga2a1ac32022-03-18 16:12:11 -0700343 RequestQueue& mTimeoutQueue; // locked internally, added to when request times out.
344
345 // Worker thread variables
346 bool mShouldExit GUARDED_BY(mMutex) = false;
347
348 // To avoid race with initialization,
349 // mThread should be initialized last as the thread is launched immediately.
350 std::thread mThread;
351
352 void threadFunc();
Andy Hung2aa15102022-06-13 19:49:43 -0700353 Handle getUniqueHandle_l(Duration timeout) REQUIRES(mMutex) {
354 return getUniqueHandleForHandleType_l<
355 HANDLE_TYPES, enum_as_value(HANDLE_TYPE::TIMEOUT)>(
356 mMonitorRequests, timeout);
357 }
Andy Hunga2a1ac32022-03-18 16:12:11 -0700358
359 public:
360 MonitorThread(RequestQueue &timeoutQueue);
361 ~MonitorThread();
362
Andy Hung2aa15102022-06-13 19:49:43 -0700363 Handle add(std::shared_ptr<const Request> request, TimerCallback&& func,
364 Duration timeout);
Andy Hunga2a1ac32022-03-18 16:12:11 -0700365 std::shared_ptr<const Request> remove(Handle handle);
366 void copyRequests(std::vector<std::shared_ptr<const Request>>& requests) const;
Andy Hungf8ab0932022-06-13 19:49:43 -0700367 size_t getSecondChanceCount() const {
368 return mSecondChanceCount.load(std::memory_order_relaxed);
369 }
Andy Hunga2a1ac32022-03-18 16:12:11 -0700370 };
371
Andy Hungf45f34c2022-03-25 13:09:03 -0700372
373 // A HAL method is where the substring "Hidl" is in the class name.
374 // The tag should look like: ... Hidl ... :: ...
375 static bool isRequestFromHal(const std::shared_ptr<const Request>& request);
376
Andy Hunga2a1ac32022-03-18 16:12:11 -0700377 std::vector<std::shared_ptr<const Request>> getPendingRequests() const;
378
Andy Hunga2a1ac32022-03-18 16:12:11 -0700379 static constexpr size_t kRetiredQueueMax = 16;
380 RequestQueue mRetiredQueue{kRetiredQueueMax}; // locked internally
381
382 static constexpr size_t kTimeoutQueueMax = 16;
383 RequestQueue mTimeoutQueue{kTimeoutQueueMax}; // locked internally
384
385 NoTimeoutMap mNoTimeoutMap; // locked internally
386
387 MonitorThread mMonitorThread{mTimeoutQueue}; // This should be initialized last because
388 // the thread is launched immediately.
389 // Locked internally.
Ytai Ben-Tsvi1ea62c92021-11-10 14:38:27 -0800390};
391
Andy Hunga2a1ac32022-03-18 16:12:11 -0700392} // namespace android::mediautils