TimeCheck: Add second chance queue
Split the timeout into two waits.
This reduces the chance of false timeout when the monotonic clock
advances without an active CPU during an aborted suspend.
Removed default arguments for TimeCheck constructor
to avoid accidental argument confusion.
Test: overnight stress test
Test: atest libmediautils_test
Test: atest timecheck_tests
Bug: 227594853
Change-Id: If6507a053e5bf15ddd3a3f8f53bdc0d3643e5924
diff --git a/media/utils/include/mediautils/TimeCheck.h b/media/utils/include/mediautils/TimeCheck.h
index 8bee8d1..bdb5337 100644
--- a/media/utils/include/mediautils/TimeCheck.h
+++ b/media/utils/include/mediautils/TimeCheck.h
@@ -16,6 +16,7 @@
#pragma once
+#include <chrono>
#include <vector>
#include <mediautils/TimerThread.h>
@@ -44,7 +45,16 @@
// The default timeout is chosen to be less than system server watchdog timeout
// Note: kDefaultTimeOutMs should be no less than 2 seconds, otherwise spurious timeouts
// may occur with system suspend.
- static constexpr uint32_t kDefaultTimeOutMs = 5000;
+ static constexpr TimeCheck::Duration kDefaultTimeoutDuration = std::chrono::milliseconds(3000);
+
+ // Due to suspend abort not incrementing the monotonic clock,
+ // we allow another second chance timeout after the first timeout expires.
+ //
+ // The total timeout is therefore kDefaultTimeoutDuration + kDefaultSecondChanceDuration,
+ // and the result is more stable when the monotonic clock increments during suspend.
+ //
+ static constexpr TimeCheck::Duration kDefaultSecondChanceDuration =
+ std::chrono::milliseconds(2000);
/**
* TimeCheck is a RAII object which will notify a callback
@@ -64,14 +74,18 @@
* to block for callback completion if it is already in progress
* (for maximum concurrency and reduced deadlock potential), so use proper
* lifetime analysis (e.g. shared or weak pointers).
- * \param requestedTimeoutMs timeout in milliseconds.
+ * \param requestedTimeoutDuration timeout in milliseconds.
* A zero timeout means no timeout is set -
* the callback is called only when
* the TimeCheck object is destroyed or leaves scope.
+ * \param secondChanceDuration additional milliseconds to wait if the first timeout expires.
+ * This is used to prevent false timeouts if the steady (monotonic)
+ * clock advances on aborted suspend.
* \param crashOnTimeout true if the object issues an abort on timeout.
*/
- explicit TimeCheck(std::string_view tag, OnTimerFunc&& onTimer = {},
- uint32_t requestedTimeoutMs = kDefaultTimeOutMs, bool crashOnTimeout = true);
+ explicit TimeCheck(std::string_view tag, OnTimerFunc&& onTimer,
+ Duration requestedTimeoutDuration, Duration secondChanceDuration,
+ bool crashOnTimeout);
TimeCheck() = default;
// Remove copy constructors as there should only be one call to the destructor.
@@ -91,13 +105,14 @@
public:
template <typename S, typename F>
TimeCheckHandler(S&& _tag, F&& _onTimer, bool _crashOnTimeout,
- Duration _timeoutDuration,
+ Duration _timeoutDuration, Duration _secondChanceDuration,
std::chrono::system_clock::time_point _startSystemTime,
pid_t _tid)
: tag(std::forward<S>(_tag))
, onTimer(std::forward<F>(_onTimer))
, crashOnTimeout(_crashOnTimeout)
, timeoutDuration(_timeoutDuration)
+ , secondChanceDuration(_secondChanceDuration)
, startSystemTime(_startSystemTime)
, tid(_tid)
{}
@@ -105,6 +120,7 @@
const OnTimerFunc onTimer;
const bool crashOnTimeout;
const Duration timeoutDuration;
+ const Duration secondChanceDuration;
const std::chrono::system_clock::time_point startSystemTime;
const pid_t tid;
diff --git a/media/utils/include/mediautils/TimerThread.h b/media/utils/include/mediautils/TimerThread.h
index ff3ef4b..c76fa7d 100644
--- a/media/utils/include/mediautils/TimerThread.h
+++ b/media/utils/include/mediautils/TimerThread.h
@@ -130,7 +130,8 @@
* \returns a handle that can be used for cancellation.
*/
Handle scheduleTask(
- std::string_view tag, TimerCallback&& func, Duration timeoutDuration);
+ std::string_view tag, TimerCallback&& func,
+ Duration timeoutDuration, Duration secondChanceDuration);
/**
* Tracks a task that shows up on toString() until cancelled.
@@ -204,24 +205,30 @@
private:
// To minimize movement of data, we pass around shared_ptrs to Requests.
// These are allocated and deallocated outside of the lock.
+ // TODO(b/243839867) consider options to merge Request with the
+ // TimeCheck::TimeCheckHandler struct.
struct Request {
Request(std::chrono::system_clock::time_point _scheduled,
std::chrono::system_clock::time_point _deadline,
+ Duration _secondChanceDuration,
pid_t _tid,
std::string_view _tag)
: scheduled(_scheduled)
, deadline(_deadline)
+ , secondChanceDuration(_secondChanceDuration)
, tid(_tid)
, tag(_tag)
{}
const std::chrono::system_clock::time_point scheduled;
- const std::chrono::system_clock::time_point deadline; // deadline := scheduled + timeout
+ const std::chrono::system_clock::time_point deadline; // deadline := scheduled
+ // + timeoutDuration
+ // + secondChanceDuration
// if deadline == scheduled, no
// timeout, task not executed.
+ Duration secondChanceDuration;
const pid_t tid;
const FixedString62 tag;
-
std::string toString() const;
};
@@ -270,6 +277,7 @@
// call on timeout.
// This class is thread-safe.
class MonitorThread {
+ std::atomic<size_t> mSecondChanceCount{};
mutable std::mutex mMutex;
mutable std::condition_variable mCond GUARDED_BY(mMutex);
@@ -278,6 +286,17 @@
std::map<Handle, std::pair<std::shared_ptr<const Request>, TimerCallback>>
mMonitorRequests GUARDED_BY(mMutex);
+ // Due to monotonic/steady clock inaccuracies during suspend,
+ // we allow an additional second chance waiting time to prevent
+ // false removal.
+
+ // This mSecondChanceRequests queue is almost always empty.
+ // Using a pair with the original handle allows lookup and keeps
+ // the Key unique.
+ std::map<std::pair<Handle /* new */, Handle /* original */>,
+ std::pair<std::shared_ptr<const Request>, TimerCallback>>
+ mSecondChanceRequests GUARDED_BY(mMutex);
+
RequestQueue& mTimeoutQueue; // locked internally, added to when request times out.
// Worker thread variables
@@ -302,6 +321,9 @@
Duration timeout);
std::shared_ptr<const Request> remove(Handle handle);
void copyRequests(std::vector<std::shared_ptr<const Request>>& requests) const;
+ size_t getSecondChanceCount() const {
+ return mSecondChanceCount.load(std::memory_order_relaxed);
+ }
};
// Analysis contains info deduced by analysisTimeout().