TimeCheck: Add second chance queue

Split the timeout into two waits.

This reduces the chance of false timeout when the monotonic clock
advances without an active CPU during an aborted suspend.

Removed default arguments for TimeCheck constructor
to avoid accidental argument confusion.

Test: overnight stress test
Test: atest libmediautils_test
Test: atest timecheck_tests
Bug: 227594853
Merged-In: If6507a053e5bf15ddd3a3f8f53bdc0d3643e5924
Change-Id: If6507a053e5bf15ddd3a3f8f53bdc0d3643e5924
diff --git a/media/utils/TimeCheck.cpp b/media/utils/TimeCheck.cpp
index 0a7790f..6823f4f 100644
--- a/media/utils/TimeCheck.cpp
+++ b/media/utils/TimeCheck.cpp
@@ -139,12 +139,13 @@
     return getTimeCheckThread().toString();
 }
 
-TimeCheck::TimeCheck(std::string_view tag, OnTimerFunc&& onTimer, uint32_t requestedTimeoutMs,
-        bool crashOnTimeout)
+TimeCheck::TimeCheck(std::string_view tag, OnTimerFunc&& onTimer, Duration requestedTimeoutDuration,
+        Duration secondChanceDuration, bool crashOnTimeout)
     : mTimeCheckHandler{ std::make_shared<TimeCheckHandler>(
-            tag, std::move(onTimer), crashOnTimeout, std::chrono::milliseconds(requestedTimeoutMs),
-            std::chrono::system_clock::now(), gettid()) }
-    , mTimerHandle(requestedTimeoutMs == 0
+            tag, std::move(onTimer), crashOnTimeout, requestedTimeoutDuration,
+            secondChanceDuration, std::chrono::system_clock::now(), gettid()) }
+    , mTimerHandle(requestedTimeoutDuration.count() == 0
+              /* for TimeCheck we don't consider a non-zero secondChanceDuration here */
               ? getTimeCheckThread().trackTask(mTimeCheckHandler->tag)
               : getTimeCheckThread().scheduleTask(
                       mTimeCheckHandler->tag,
@@ -154,7 +155,8 @@
                       [ timeCheckHandler = mTimeCheckHandler ](TimerThread::Handle timerHandle) {
                           timeCheckHandler->onTimeout(timerHandle);
                       },
-                      std::chrono::milliseconds(requestedTimeoutMs))) {}
+                      requestedTimeoutDuration,
+                      secondChanceDuration)) {}
 
 TimeCheck::~TimeCheck() {
     if (mTimeCheckHandler) {
@@ -228,6 +230,8 @@
             endSystemTime - startSystemTime).count();
     const float requestedTimeoutMs = std::chrono::duration_cast<FloatMs>(
             timeoutDuration).count();
+    const float secondChanceMs = std::chrono::duration_cast<FloatMs>(
+            secondChanceDuration).count();
 
     if (onTimer) {
         onTimer(true /* timeout */, elapsedSteadyMs);
@@ -262,8 +266,8 @@
             .append(tag)
             .append(" scheduled ").append(formatTime(startSystemTime))
             .append(" on thread ").append(std::to_string(tid)).append("\n")
-            .append(analyzeTimeouts(
-                    requestedTimeoutMs, elapsedSteadyMs, elapsedSystemMs)).append("\n")
+            .append(analyzeTimeouts(requestedTimeoutMs + secondChanceMs,
+                    elapsedSteadyMs, elapsedSystemMs)).append("\n")
             .append(halPids).append("\n")
             .append(summary);
 
@@ -295,7 +299,7 @@
                     } else {
                         stats->event(safeMethodName.asStringView(), elapsedMs);
                     }
-            }, 0 /* requestedTimeoutMs */);
+            }, {} /* timeoutDuration */, {} /* secondChanceDuration */, false /* crashOnTimeout */);
 }
 
 }  // namespace android::mediautils
diff --git a/media/utils/TimerThread-test.cpp b/media/utils/TimerThread-test.cpp
index deb743a..9452c07 100644
--- a/media/utils/TimerThread-test.cpp
+++ b/media/utils/TimerThread-test.cpp
@@ -33,12 +33,27 @@
     return std::count(s.begin(), s.end(), c);
 }
 
-TEST(TimerThread, Basic) {
+
+// Split msec time between timeout and second chance time
+// This tests expiration times weighted between timeout and the second chance time.
+#define DISTRIBUTE_TIMEOUT_SECONDCHANCE_MS_FRAC(msec, frac) \
+    std::chrono::milliseconds(int((msec) * (frac)) + 1), \
+    std::chrono::milliseconds(int((msec) * (1.f - (frac))))
+
+// The TimerThreadTest is parameterized on a fraction between 0.f and 1.f which
+// is how the total timeout time is split between the first timeout and the second chance time.
+//
+class TimerThreadTest : public ::testing::TestWithParam<float> {
+protected:
+
+static void testBasic() {
+    const auto frac = GetParam();
+
     std::atomic<bool> taskRan = false;
     TimerThread thread;
     TimerThread::Handle handle =
             thread.scheduleTask("Basic", [&taskRan](TimerThread::Handle handle __unused) {
-                    taskRan = true; }, 100ms);
+                    taskRan = true; }, DISTRIBUTE_TIMEOUT_SECONDCHANCE_MS_FRAC(100, frac));
     ASSERT_TRUE(TimerThread::isTimeoutHandle(handle));
     std::this_thread::sleep_for(100ms - kJitter);
     ASSERT_FALSE(taskRan);
@@ -47,12 +62,14 @@
     ASSERT_EQ(1, countChars(thread.retiredToString(), REQUEST_START));
 }
 
-TEST(TimerThread, Cancel) {
+static void testCancel() {
+    const auto frac = GetParam();
+
     std::atomic<bool> taskRan = false;
     TimerThread thread;
     TimerThread::Handle handle =
             thread.scheduleTask("Cancel", [&taskRan](TimerThread::Handle handle __unused) {
-                    taskRan = true; }, 100ms);
+                    taskRan = true; }, DISTRIBUTE_TIMEOUT_SECONDCHANCE_MS_FRAC(100, frac));
     ASSERT_TRUE(TimerThread::isTimeoutHandle(handle));
     std::this_thread::sleep_for(100ms - kJitter);
     ASSERT_FALSE(taskRan);
@@ -62,13 +79,16 @@
     ASSERT_EQ(1, countChars(thread.retiredToString(), REQUEST_START));
 }
 
-TEST(TimerThread, CancelAfterRun) {
+static void testCancelAfterRun() {
+    const auto frac = GetParam();
+
     std::atomic<bool> taskRan = false;
     TimerThread thread;
     TimerThread::Handle handle =
             thread.scheduleTask("CancelAfterRun",
                     [&taskRan](TimerThread::Handle handle __unused) {
-                            taskRan = true; }, 100ms);
+                            taskRan = true; },
+                            DISTRIBUTE_TIMEOUT_SECONDCHANCE_MS_FRAC(100, frac));
     ASSERT_TRUE(TimerThread::isTimeoutHandle(handle));
     std::this_thread::sleep_for(100ms + kJitter);
     ASSERT_TRUE(taskRan);
@@ -76,83 +96,70 @@
     ASSERT_EQ(1, countChars(thread.retiredToString(), REQUEST_START));
 }
 
-TEST(TimerThread, MultipleTasks) {
+static void testMultipleTasks() {
+    const auto frac = GetParam();
+
     std::array<std::atomic<bool>, 6> taskRan{};
     TimerThread thread;
 
     auto startTime = std::chrono::steady_clock::now();
 
     thread.scheduleTask("0", [&taskRan](TimerThread::Handle handle __unused) {
-            taskRan[0] = true; }, 300ms);
+            taskRan[0] = true; }, DISTRIBUTE_TIMEOUT_SECONDCHANCE_MS_FRAC(300, frac));
     thread.scheduleTask("1", [&taskRan](TimerThread::Handle handle __unused) {
-            taskRan[1] = true; }, 100ms);
+            taskRan[1] = true; }, DISTRIBUTE_TIMEOUT_SECONDCHANCE_MS_FRAC(100, frac));
     thread.scheduleTask("2", [&taskRan](TimerThread::Handle handle __unused) {
-            taskRan[2] = true; }, 200ms);
+            taskRan[2] = true; }, DISTRIBUTE_TIMEOUT_SECONDCHANCE_MS_FRAC(200, frac));
     thread.scheduleTask("3", [&taskRan](TimerThread::Handle handle __unused) {
-            taskRan[3] = true; }, 400ms);
+            taskRan[3] = true; }, DISTRIBUTE_TIMEOUT_SECONDCHANCE_MS_FRAC(400, frac));
     auto handle4 = thread.scheduleTask("4", [&taskRan](TimerThread::Handle handle __unused) {
-            taskRan[4] = true; }, 200ms);
+            taskRan[4] = true; }, DISTRIBUTE_TIMEOUT_SECONDCHANCE_MS_FRAC(200, frac));
     thread.scheduleTask("5", [&taskRan](TimerThread::Handle handle __unused) {
-            taskRan[5] = true; }, 200ms);
+            taskRan[5] = true; }, DISTRIBUTE_TIMEOUT_SECONDCHANCE_MS_FRAC(200, frac));
 
     // 6 tasks pending
     ASSERT_EQ(6, countChars(thread.pendingToString(), REQUEST_START));
     // 0 tasks completed
     ASSERT_EQ(0, countChars(thread.retiredToString(), REQUEST_START));
 
+    // None of the tasks are expected to have finished at the start.
+    std::array<std::atomic<bool>, 6> expected{};
+
     // Task 1 should trigger around 100ms.
     std::this_thread::sleep_until(startTime + 100ms - kJitter);
-    ASSERT_FALSE(taskRan[0]);
-    ASSERT_FALSE(taskRan[1]);
-    ASSERT_FALSE(taskRan[2]);
-    ASSERT_FALSE(taskRan[3]);
-    ASSERT_FALSE(taskRan[4]);
-    ASSERT_FALSE(taskRan[5]);
+
+    ASSERT_EQ(expected, taskRan);
+
 
     std::this_thread::sleep_until(startTime + 100ms + kJitter);
-    ASSERT_FALSE(taskRan[0]);
-    ASSERT_TRUE(taskRan[1]);
-    ASSERT_FALSE(taskRan[2]);
-    ASSERT_FALSE(taskRan[3]);
-    ASSERT_FALSE(taskRan[4]);
-    ASSERT_FALSE(taskRan[5]);
+
+    expected[1] = true;
+    ASSERT_EQ(expected, taskRan);
 
     // Cancel task 4 before it gets a chance to run.
     thread.cancelTask(handle4);
 
     // Tasks 2 and 5 should trigger around 200ms.
     std::this_thread::sleep_until(startTime + 200ms - kJitter);
-    ASSERT_FALSE(taskRan[0]);
-    ASSERT_TRUE(taskRan[1]);
-    ASSERT_FALSE(taskRan[2]);
-    ASSERT_FALSE(taskRan[3]);
-    ASSERT_FALSE(taskRan[4]);
-    ASSERT_FALSE(taskRan[5]);
+
+    ASSERT_EQ(expected, taskRan);
+
 
     std::this_thread::sleep_until(startTime + 200ms + kJitter);
-    ASSERT_FALSE(taskRan[0]);
-    ASSERT_TRUE(taskRan[1]);
-    ASSERT_TRUE(taskRan[2]);
-    ASSERT_FALSE(taskRan[3]);
-    ASSERT_FALSE(taskRan[4]);
-    ASSERT_TRUE(taskRan[5]);
+
+    expected[2] = true;
+    expected[5] = true;
+    ASSERT_EQ(expected, taskRan);
 
     // Task 0 should trigger around 300ms.
     std::this_thread::sleep_until(startTime + 300ms - kJitter);
-    ASSERT_FALSE(taskRan[0]);
-    ASSERT_TRUE(taskRan[1]);
-    ASSERT_TRUE(taskRan[2]);
-    ASSERT_FALSE(taskRan[3]);
-    ASSERT_FALSE(taskRan[4]);
-    ASSERT_TRUE(taskRan[5]);
+
+    ASSERT_EQ(expected, taskRan);
 
     std::this_thread::sleep_until(startTime + 300ms + kJitter);
-    ASSERT_TRUE(taskRan[0]);
-    ASSERT_TRUE(taskRan[1]);
-    ASSERT_TRUE(taskRan[2]);
-    ASSERT_FALSE(taskRan[3]);
-    ASSERT_FALSE(taskRan[4]);
-    ASSERT_TRUE(taskRan[5]);
+
+    expected[0] = true;
+    ASSERT_EQ(expected, taskRan);
 
     // 1 task pending
     ASSERT_EQ(1, countChars(thread.pendingToString(), REQUEST_START));
@@ -161,23 +168,16 @@
 
     // Task 3 should trigger around 400ms.
     std::this_thread::sleep_until(startTime + 400ms - kJitter);
-    ASSERT_TRUE(taskRan[0]);
-    ASSERT_TRUE(taskRan[1]);
-    ASSERT_TRUE(taskRan[2]);
-    ASSERT_FALSE(taskRan[3]);
-    ASSERT_FALSE(taskRan[4]);
-    ASSERT_TRUE(taskRan[5]);
+
+    ASSERT_EQ(expected, taskRan);
 
     // 4 tasks ran and 1 cancelled
     ASSERT_EQ(4 + 1, countChars(thread.retiredToString(), REQUEST_START));
 
     std::this_thread::sleep_until(startTime + 400ms + kJitter);
-    ASSERT_TRUE(taskRan[0]);
-    ASSERT_TRUE(taskRan[1]);
-    ASSERT_TRUE(taskRan[2]);
-    ASSERT_TRUE(taskRan[3]);
-    ASSERT_FALSE(taskRan[4]);
-    ASSERT_TRUE(taskRan[5]);
+
+    expected[3] = true;
+    ASSERT_EQ(expected, taskRan);
 
     // 0 tasks pending
     ASSERT_EQ(0, countChars(thread.pendingToString(), REQUEST_START));
@@ -185,6 +185,30 @@
     ASSERT_EQ(5 + 1, countChars(thread.retiredToString(), REQUEST_START));
 }
 
+}; // class TimerThreadTest
+
+TEST_P(TimerThreadTest, Basic) {
+    testBasic();
+}
+
+TEST_P(TimerThreadTest, Cancel) {
+    testCancel();
+}
+
+TEST_P(TimerThreadTest, CancelAfterRun) {
+    testCancelAfterRun();
+}
+
+TEST_P(TimerThreadTest, MultipleTasks) {
+    testMultipleTasks();
+}
+
+INSTANTIATE_TEST_CASE_P(
+        TimerThread,
+        TimerThreadTest,
+        ::testing::Values(0.f, 0.5f, 1.f)
+        );
+
 TEST(TimerThread, TrackedTasks) {
     TimerThread thread;
 
diff --git a/media/utils/TimerThread.cpp b/media/utils/TimerThread.cpp
index 4999de8..5e58a3d 100644
--- a/media/utils/TimerThread.cpp
+++ b/media/utils/TimerThread.cpp
@@ -23,6 +23,7 @@
 
 #include <mediautils/MediaUtilsDelayed.h>
 #include <mediautils/TimerThread.h>
+#include <utils/Log.h>
 #include <utils/ThreadDefs.h>
 
 using namespace std::chrono_literals;
@@ -33,17 +34,19 @@
 extern std::string_view timeSuffix(std::string_view time1, std::string_view time2);
 
 TimerThread::Handle TimerThread::scheduleTask(
-        std::string_view tag, TimerCallback&& func, Duration timeoutDuration) {
+        std::string_view tag, TimerCallback&& func,
+        Duration timeoutDuration, Duration secondChanceDuration) {
     const auto now = std::chrono::system_clock::now();
     auto request = std::make_shared<const Request>(now, now +
             std::chrono::duration_cast<std::chrono::system_clock::duration>(timeoutDuration),
-            gettid(), tag);
+            secondChanceDuration, gettid(), tag);
     return mMonitorThread.add(std::move(request), std::move(func), timeoutDuration);
 }
 
 TimerThread::Handle TimerThread::trackTask(std::string_view tag) {
     const auto now = std::chrono::system_clock::now();
-    auto request = std::make_shared<const Request>(now, now, gettid(), tag);
+    auto request = std::make_shared<const Request>(now, now,
+            Duration{} /* secondChanceDuration */, gettid(), tag);
     return mNoTimeoutMap.add(std::move(request));
 }
 
@@ -86,6 +89,8 @@
 
     return std::string("now ")
             .append(formatTime(std::chrono::system_clock::now()))
+            .append("\nsecondChanceCount ")
+            .append(std::to_string(mMonitorThread.getSecondChanceCount()))
             .append(analysisSummary)
             .append("\ntimeout [ ")
             .append(requestsToString(timeoutRequests))
@@ -288,16 +293,60 @@
 void TimerThread::MonitorThread::threadFunc() {
     std::unique_lock _l(mMutex);
     while (!mShouldExit) {
+        Handle nextDeadline = INVALID_HANDLE;
+        Handle now = INVALID_HANDLE;
         if (!mMonitorRequests.empty()) {
-            Handle nextDeadline = mMonitorRequests.begin()->first;
-            if (nextDeadline < std::chrono::steady_clock::now()) {
+            nextDeadline = mMonitorRequests.begin()->first;
+            now = std::chrono::steady_clock::now();
+            if (nextDeadline < now) {
+                auto node = mMonitorRequests.extract(mMonitorRequests.begin());
                 // Deadline has expired, handle the request.
+                auto secondChanceDuration = node.mapped().first->secondChanceDuration;
+                if (secondChanceDuration.count() != 0) {
+                    // We now apply the second chance duration to find the clock
+                    // monotonic second deadline.  The unique key is then the
+                    // pair<second_deadline, first_deadline>.
+                    //
+                    // The second chance prevents a false timeout should there be
+                    // any clock monotonic advancement during suspend.
+                    auto newHandle = now + secondChanceDuration;
+                    ALOGD("%s: TimeCheck second chance applied for %s",
+                            __func__, node.mapped().first->tag.c_str()); // should be rare event.
+                    mSecondChanceRequests.emplace_hint(mSecondChanceRequests.end(),
+                            std::make_pair(newHandle, nextDeadline),
+                            std::move(node.mapped()));
+                    // increment second chance counter.
+                    mSecondChanceCount.fetch_add(1 /* arg */, std::memory_order_relaxed);
+                } else {
+                    {
+                        _l.unlock();
+                        // We add Request to retired queue early so that it can be dumped out.
+                        mTimeoutQueue.add(std::move(node.mapped().first));
+                        node.mapped().second(nextDeadline);
+                        // Caution: we don't hold lock when we call TimerCallback,
+                        // but this is the timeout case!  We will crash soon,
+                        // maybe before returning.
+                        // anything left over is released here outside lock.
+                    }
+                    // reacquire the lock - if something was added, we loop immediately to check.
+                    _l.lock();
+                }
+                // always process expiring monitor requests first.
+                continue;
+            }
+        }
+        // now process any second chance requests.
+        if (!mSecondChanceRequests.empty()) {
+            Handle secondDeadline = mSecondChanceRequests.begin()->first.first;
+            if (now == INVALID_HANDLE) now = std::chrono::steady_clock::now();
+            if (secondDeadline < now) {
+                auto node = mSecondChanceRequests.extract(mSecondChanceRequests.begin());
                 {
-                    auto node = mMonitorRequests.extract(mMonitorRequests.begin());
                     _l.unlock();
                     // We add Request to retired queue early so that it can be dumped out.
                     mTimeoutQueue.add(std::move(node.mapped().first));
-                    node.mapped().second(nextDeadline);
+                    const Handle originalHandle = node.key().second;
+                    node.mapped().second(originalHandle);
                     // Caution: we don't hold lock when we call TimerCallback.
                     // This is benign issue - we permit concurrent operations
                     // while in the callback to the MonitorQueue.
@@ -308,6 +357,14 @@
                 _l.lock();
                 continue;
             }
+            // update the deadline.
+            if (nextDeadline == INVALID_HANDLE) {
+                nextDeadline = secondDeadline;
+            } else {
+                nextDeadline = std::min(nextDeadline, secondDeadline);
+            }
+        }
+        if (nextDeadline != INVALID_HANDLE) {
             mCond.wait_until(_l, nextDeadline);
         } else {
             mCond.wait(_l);
@@ -319,22 +376,36 @@
         std::shared_ptr<const Request> request, TimerCallback&& func, Duration timeout) {
     std::lock_guard _l(mMutex);
     const Handle handle = getUniqueHandle_l(timeout);
-    mMonitorRequests.emplace(handle, std::make_pair(std::move(request), std::move(func)));
+    mMonitorRequests.emplace_hint(mMonitorRequests.end(),
+            handle, std::make_pair(std::move(request), std::move(func)));
     mCond.notify_all();
     return handle;
 }
 
 std::shared_ptr<const TimerThread::Request> TimerThread::MonitorThread::remove(Handle handle) {
+    std::pair<std::shared_ptr<const Request>, TimerCallback> data;
     std::unique_lock ul(mMutex);
-    const auto it = mMonitorRequests.find(handle);
-    if (it == mMonitorRequests.end()) {
-        return {};
+    if (const auto it = mMonitorRequests.find(handle);
+        it != mMonitorRequests.end()) {
+        data = std::move(it->second);
+        mMonitorRequests.erase(it);
+        ul.unlock();  // manually release lock here so func (data.second)
+                      // is released outside of lock.
+        return data.first;  // request
     }
-    std::shared_ptr<const TimerThread::Request> request = std::move(it->second.first);
-    TimerCallback func = std::move(it->second.second);
-    mMonitorRequests.erase(it);
-    ul.unlock();  // manually release lock here so func is released outside of lock.
-    return request;
+
+    // this check is O(N), but since the second chance requests are ordered
+    // in terms of earliest expiration time, we would expect better than average results.
+    for (auto it = mSecondChanceRequests.begin(); it != mSecondChanceRequests.end(); ++it) {
+        if (it->first.second == handle) {
+            data = std::move(it->second);
+            mSecondChanceRequests.erase(it);
+            ul.unlock();  // manually release lock here so func (data.second)
+                          // is released outside of lock.
+            return data.first; // request
+        }
+    }
+    return {};
 }
 
 void TimerThread::MonitorThread::copyRequests(
@@ -343,6 +414,13 @@
     for (const auto &[deadline, monitorpair] : mMonitorRequests) {
         requests.emplace_back(monitorpair.first);
     }
+    // we combine the second map with the first map - this is
+    // everything that is pending on the monitor thread.
+    // The second map will be older than the first map so this
+    // is in order.
+    for (const auto &[deadline, monitorpair] : mSecondChanceRequests) {
+        requests.emplace_back(monitorpair.first);
+    }
 }
 
 }  // namespace android::mediautils
diff --git a/media/utils/fuzzers/TimeCheckFuzz.cpp b/media/utils/fuzzers/TimeCheckFuzz.cpp
index 7966469..65b2885 100644
--- a/media/utils/fuzzers/TimeCheckFuzz.cpp
+++ b/media/utils/fuzzers/TimeCheckFuzz.cpp
@@ -48,7 +48,9 @@
     std::string name = data_provider.ConsumeRandomLengthString(kMaxStringLen);
 
     // 3. The constructor, which is fuzzed here:
-    android::mediautils::TimeCheck timeCheck(name.c_str(), {} /* onTimer */, timeoutMs);
+    android::mediautils::TimeCheck timeCheck(name.c_str(), {} /* onTimer */,
+            std::chrono::milliseconds(timeoutMs),
+            {} /* secondChanceDuration */, true /* crashOnTimeout */);
     // We will leave some buffer to avoid sleeping too long
     uint8_t sleep_amount_ms = data_provider.ConsumeIntegralInRange<uint8_t>(0, timeoutMs / 2);
 
diff --git a/media/utils/include/mediautils/TimeCheck.h b/media/utils/include/mediautils/TimeCheck.h
index 8bee8d1..bdb5337 100644
--- a/media/utils/include/mediautils/TimeCheck.h
+++ b/media/utils/include/mediautils/TimeCheck.h
@@ -16,6 +16,7 @@
 
 #pragma once
 
+#include <chrono>
 #include <vector>
 
 #include <mediautils/TimerThread.h>
@@ -44,7 +45,16 @@
     // The default timeout is chosen to be less than system server watchdog timeout
     // Note: kDefaultTimeOutMs should be no less than 2 seconds, otherwise spurious timeouts
     // may occur with system suspend.
-    static constexpr uint32_t kDefaultTimeOutMs = 5000;
+    static constexpr TimeCheck::Duration kDefaultTimeoutDuration = std::chrono::milliseconds(3000);
+
+    // Due to suspend abort not incrementing the monotonic clock,
+    // we allow another second chance timeout after the first timeout expires.
+    //
+    // The total timeout is therefore kDefaultTimeoutDuration + kDefaultSecondChanceDuration,
+    // and the result is more stable when the monotonic clock increments during suspend.
+    //
+    static constexpr TimeCheck::Duration kDefaultSecondChanceDuration =
+            std::chrono::milliseconds(2000);
 
     /**
      * TimeCheck is a RAII object which will notify a callback
@@ -64,14 +74,18 @@
      *                  to block for callback completion if it is already in progress
      *                  (for maximum concurrency and reduced deadlock potential), so use proper
      *                  lifetime analysis (e.g. shared or weak pointers).
-     * \param requestedTimeoutMs timeout in milliseconds.
+     * \param requestedTimeoutDuration timeout in milliseconds.
      *                  A zero timeout means no timeout is set -
      *                  the callback is called only when
      *                  the TimeCheck object is destroyed or leaves scope.
+     * \param secondChanceDuration additional milliseconds to wait if the first timeout expires.
+     *                  This is used to prevent false timeouts if the steady (monotonic)
+     *                  clock advances on aborted suspend.
      * \param crashOnTimeout true if the object issues an abort on timeout.
      */
-    explicit TimeCheck(std::string_view tag, OnTimerFunc&& onTimer = {},
-            uint32_t requestedTimeoutMs = kDefaultTimeOutMs, bool crashOnTimeout = true);
+    explicit TimeCheck(std::string_view tag, OnTimerFunc&& onTimer,
+            Duration requestedTimeoutDuration, Duration secondChanceDuration,
+            bool crashOnTimeout);
 
     TimeCheck() = default;
     // Remove copy constructors as there should only be one call to the destructor.
@@ -91,13 +105,14 @@
     public:
         template <typename S, typename F>
         TimeCheckHandler(S&& _tag, F&& _onTimer, bool _crashOnTimeout,
-            Duration _timeoutDuration,
+            Duration _timeoutDuration, Duration _secondChanceDuration,
             std::chrono::system_clock::time_point _startSystemTime,
             pid_t _tid)
             : tag(std::forward<S>(_tag))
             , onTimer(std::forward<F>(_onTimer))
             , crashOnTimeout(_crashOnTimeout)
             , timeoutDuration(_timeoutDuration)
+            , secondChanceDuration(_secondChanceDuration)
             , startSystemTime(_startSystemTime)
             , tid(_tid)
             {}
@@ -105,6 +120,7 @@
         const OnTimerFunc onTimer;
         const bool crashOnTimeout;
         const Duration timeoutDuration;
+        const Duration secondChanceDuration;
         const std::chrono::system_clock::time_point startSystemTime;
         const pid_t tid;
 
diff --git a/media/utils/include/mediautils/TimerThread.h b/media/utils/include/mediautils/TimerThread.h
index ff3ef4b..c76fa7d 100644
--- a/media/utils/include/mediautils/TimerThread.h
+++ b/media/utils/include/mediautils/TimerThread.h
@@ -130,7 +130,8 @@
      * \returns       a handle that can be used for cancellation.
      */
     Handle scheduleTask(
-            std::string_view tag, TimerCallback&& func, Duration timeoutDuration);
+            std::string_view tag, TimerCallback&& func,
+            Duration timeoutDuration, Duration secondChanceDuration);
 
     /**
      * Tracks a task that shows up on toString() until cancelled.
@@ -204,24 +205,30 @@
   private:
     // To minimize movement of data, we pass around shared_ptrs to Requests.
     // These are allocated and deallocated outside of the lock.
+    // TODO(b/243839867) consider options to merge Request with the
+    // TimeCheck::TimeCheckHandler struct.
     struct Request {
         Request(std::chrono::system_clock::time_point _scheduled,
                 std::chrono::system_clock::time_point _deadline,
+                Duration _secondChanceDuration,
                 pid_t _tid,
                 std::string_view _tag)
             : scheduled(_scheduled)
             , deadline(_deadline)
+            , secondChanceDuration(_secondChanceDuration)
             , tid(_tid)
             , tag(_tag)
             {}
 
         const std::chrono::system_clock::time_point scheduled;
-        const std::chrono::system_clock::time_point deadline; // deadline := scheduled + timeout
+        const std::chrono::system_clock::time_point deadline; // deadline := scheduled
+                                                              // + timeoutDuration
+                                                              // + secondChanceDuration
                                                               // if deadline == scheduled, no
                                                               // timeout, task not executed.
+        Duration secondChanceDuration;
         const pid_t tid;
         const FixedString62 tag;
-
         std::string toString() const;
     };
 
@@ -270,6 +277,7 @@
     // call on timeout.
     // This class is thread-safe.
     class MonitorThread {
+        std::atomic<size_t> mSecondChanceCount{};
         mutable std::mutex mMutex;
         mutable std::condition_variable mCond GUARDED_BY(mMutex);
 
@@ -278,6 +286,17 @@
         std::map<Handle, std::pair<std::shared_ptr<const Request>, TimerCallback>>
                 mMonitorRequests GUARDED_BY(mMutex);
 
+        // Due to monotonic/steady clock inaccuracies during suspend,
+        // we allow an additional second chance waiting time to prevent
+        // false removal.
+
+        // This mSecondChanceRequests queue is almost always empty.
+        // Using a pair with the original handle allows lookup and keeps
+        // the Key unique.
+        std::map<std::pair<Handle /* new */, Handle /* original */>,
+                std::pair<std::shared_ptr<const Request>, TimerCallback>>
+                        mSecondChanceRequests GUARDED_BY(mMutex);
+
         RequestQueue& mTimeoutQueue; // locked internally, added to when request times out.
 
         // Worker thread variables
@@ -302,6 +321,9 @@
                 Duration timeout);
         std::shared_ptr<const Request> remove(Handle handle);
         void copyRequests(std::vector<std::shared_ptr<const Request>>& requests) const;
+        size_t getSecondChanceCount() const {
+            return mSecondChanceCount.load(std::memory_order_relaxed);
+        }
     };
 
     // Analysis contains info deduced by analysisTimeout().
diff --git a/media/utils/tests/timecheck_tests.cpp b/media/utils/tests/timecheck_tests.cpp
index 6ebf44d..8236174 100644
--- a/media/utils/tests/timecheck_tests.cpp
+++ b/media/utils/tests/timecheck_tests.cpp
@@ -39,7 +39,7 @@
             timeoutRegistered = timeout;
             elapsedMsRegistered = elapsedMs;
             event = true;
-        }, 1000 /* msec */, false /* crash */);
+        }, 1000ms /* timeoutDuration */, {} /* secondChanceDuration */, false /* crash */);
     }
     ASSERT_TRUE(event);
     ASSERT_FALSE(timeoutRegistered);
@@ -58,7 +58,7 @@
             timeoutRegistered = timeout;
             elapsedMsRegistered = elapsedMs;
             event = true; // store-release, must be last.
-        }, 1 /* msec */, false /* crash */);
+        }, 1ms /* timeoutDuration */, {} /* secondChanceDuration */, false /* crash */);
         std::this_thread::sleep_for(100ms);
     }
     ASSERT_TRUE(event); // load-acquire, must be first.
@@ -69,4 +69,4 @@
 // Note: We do not test TimeCheck crash because TimeCheck is multithreaded and the
 // EXPECT_EXIT() signal catching is imperfect due to the gtest fork.
 
-} // namespace
\ No newline at end of file
+} // namespace