SF: Fix many flakes in IdleTimerTest

There were a few flakes that were possible due to the way the idle
thread could end up being scheduled. In particular the following
tests have historically been flaky:

IdleTimerTest.idleTimerIdlesTest
IdleTimerTest.noCallbacksAfterStopAndResetTest
IdleTimerTest.noCallbacksAfterStopTest
IdleTimerTest.resetTest
IdleTimerTest.startStopTest
IdleTimerTest.timeoutCallbackExecutionTest

One thing that helps is to boost the priority of the test process as a
whole so that the IdleTimer thread is scheduled when it wants to be run.

But even then there are no guarantees that requesting a callback after
"3ms" actually generates that callback that quickly.

The adjustments to the test include:

1) Removing calls to sleep_for, in favor of doing follow-up operations
immediately after (start() then stop() for example), or relying on the
ability of the AsyncCallRecorder to wait for callbacks.
2) For startStopTest, using a larger interval, and sanity checking with
a clock source that an unexpected event really is unexpected.
3) For resetTest also using a long interval to observe the behavior if
the reset happens shortly before a callback for the previous interval
would be made.

There was unfortunately one change necessary to the implementation, not
just the test. It turned out that the condition.wait_for could return
after the interval had expired without returning
std::cv_status::timeout, and therefore not trigger a callback even
though it should have. This lead to idleTimerIdlesTest being flakey even
with the other changes.

There was also second issue discovered in Scheduler, where it did not
shut down the thread properly in its destructor, which could allow the
callback the Scheduler sets to be invoked while the Scheduler instance
is being destroyed (leading to lock attempt on a destroyed mutex).

Test: libsurfaceflinger_unittest --gtest_repeat=1000  # ALL pass x1000
Test: atest libsurfaceflinger_unittest # All, not just IdleTimerTest
Bug: 122319747
Change-Id: I716451524c32cc6a299523c47c11cfefd6ab4460
diff --git a/services/surfaceflinger/Scheduler/IdleTimer.cpp b/services/surfaceflinger/Scheduler/IdleTimer.cpp
index 5a76dbc..e975e65 100644
--- a/services/surfaceflinger/Scheduler/IdleTimer.cpp
+++ b/services/surfaceflinger/Scheduler/IdleTimer.cpp
@@ -54,18 +54,24 @@
         if (mState == TimerState::IDLE) {
             mCondition.wait(mMutex);
         } else if (mState == TimerState::RESET) {
+            auto triggerTime = std::chrono::steady_clock::now() + mInterval;
             mState = TimerState::WAITING;
-            if (mCondition.wait_for(mMutex, mInterval) == std::cv_status::timeout) {
-                if (mTimeoutCallback) {
-                    mTimeoutCallback();
+            while (mState == TimerState::WAITING) {
+                constexpr auto zero = std::chrono::steady_clock::duration::zero();
+                auto waitTime = triggerTime - std::chrono::steady_clock::now();
+                if (waitTime > zero) mCondition.wait_for(mMutex, waitTime);
+                if (mState == TimerState::WAITING &&
+                    (triggerTime - std::chrono::steady_clock::now()) <= zero) {
+                    if (mTimeoutCallback) {
+                        mTimeoutCallback();
+                    }
+
+                    mState = TimerState::IDLE;
                 }
             }
-            if (mState == TimerState::WAITING) {
-                mState = TimerState::IDLE;
-            }
         }
     }
-}
+} // namespace scheduler
 
 void IdleTimer::reset() {
     {
diff --git a/services/surfaceflinger/Scheduler/Scheduler.cpp b/services/surfaceflinger/Scheduler/Scheduler.cpp
index 00820f1..6473afd 100644
--- a/services/surfaceflinger/Scheduler/Scheduler.cpp
+++ b/services/surfaceflinger/Scheduler/Scheduler.cpp
@@ -81,7 +81,10 @@
     }
 }
 
-Scheduler::~Scheduler() = default;
+Scheduler::~Scheduler() {
+    // Ensure the IdleTimer thread is joined before we start destroying state.
+    mIdleTimer.reset();
+}
 
 sp<Scheduler::ConnectionHandle> Scheduler::createConnection(
         const char* connectionName, int64_t phaseOffsetNs, ResyncCallback resyncCallback,
diff --git a/services/surfaceflinger/tests/unittests/IdleTimerTest.cpp b/services/surfaceflinger/tests/unittests/IdleTimerTest.cpp
index 9fe9a18..dc63260 100644
--- a/services/surfaceflinger/tests/unittests/IdleTimerTest.cpp
+++ b/services/surfaceflinger/tests/unittests/IdleTimerTest.cpp
@@ -34,91 +34,117 @@
     IdleTimerTest() = default;
     ~IdleTimerTest() override = default;
 
+    // This timeout should be used when a 3ms callback is expected.
+    // While the tests typically request a callback after 3ms, the scheduler
+    // does not always cooperate, at it can take significantly longer (observed
+    // 30ms).
+    static constexpr auto waitTimeForExpected3msCallback = 100ms;
+
+    // This timeout should be used when an 3ms callback is not expected.
+    // Note that there can be false-negatives if the callback happens later.
+    static constexpr auto waitTimeForUnexpected3msCallback = 6ms;
+
     AsyncCallRecorder<void (*)()> mExpiredTimerCallback;
 
     std::unique_ptr<IdleTimer> mIdleTimer;
+
+    void clearPendingCallbacks() {
+        while (mExpiredTimerCallback.waitForCall(0us).has_value()) {
+        }
+    }
 };
 
 namespace {
 TEST_F(IdleTimerTest, createAndDestroyTest) {
-    mIdleTimer = std::make_unique<scheduler::IdleTimer>(30ms, [] {});
+    mIdleTimer = std::make_unique<scheduler::IdleTimer>(3ms, [] {});
 }
 
 TEST_F(IdleTimerTest, startStopTest) {
     mIdleTimer = std::make_unique<scheduler::IdleTimer>(30ms, mExpiredTimerCallback.getInvocable());
+    auto startTime = std::chrono::steady_clock::now();
     mIdleTimer->start();
-    std::this_thread::sleep_for(std::chrono::milliseconds(10));
-    // The timer expires after 30 ms, so the call to the callback should not happen.
-    EXPECT_FALSE(mExpiredTimerCallback.waitForCall().has_value());
+    // The idle timer fires after 30ms, so there should be no callback within
+    // 25ms (waiting for a ballback for the full 30ms would be problematic).
+    bool callbackCalled = mExpiredTimerCallback.waitForCall(25ms).has_value();
+    // Under ideal conditions there should be no event. But occasionally
+    // it is possible that the wait just prior takes more than 30ms, and
+    // a callback is observed. We check the elapsed time since before the IdleTimer
+    // thread was started as a sanity check to not have a flakey test.
+    EXPECT_FALSE(callbackCalled && std::chrono::steady_clock::now() - startTime < 30ms);
     mIdleTimer->stop();
 }
 
 TEST_F(IdleTimerTest, resetTest) {
-    mIdleTimer = std::make_unique<scheduler::IdleTimer>(20ms, mExpiredTimerCallback.getInvocable());
+    mIdleTimer = std::make_unique<scheduler::IdleTimer>(30ms, mExpiredTimerCallback.getInvocable());
     mIdleTimer->start();
-    std::this_thread::sleep_for(std::chrono::milliseconds(10));
-    // The timer expires after 30 ms, so the call to the callback should not happen.
-    EXPECT_FALSE(mExpiredTimerCallback.waitForCall(1us).has_value());
+    // Observe any event that happens in about 25ms. We don't care if one was
+    // observed or not.
+    mExpiredTimerCallback.waitForCall(25ms).has_value();
     mIdleTimer->reset();
-    // The timer was reset, so the call to the callback should not happen.
-    std::this_thread::sleep_for(std::chrono::milliseconds(15));
-    EXPECT_FALSE(mExpiredTimerCallback.waitForCall(1us).has_value());
+    // There may have been a race with the reset. Clear any callbacks we
+    // received right afterwards.
+    clearPendingCallbacks();
+    // A single callback should be generated after 30ms
+    EXPECT_TRUE(
+            mExpiredTimerCallback.waitForCall(waitTimeForExpected3msCallback + 30ms).has_value());
+    // After one event, it should be idle, and not generate another.
+    EXPECT_FALSE(
+            mExpiredTimerCallback.waitForCall(waitTimeForUnexpected3msCallback * 10).has_value());
     mIdleTimer->stop();
+    // Final quick check that no more callback were observed.
+    EXPECT_FALSE(mExpiredTimerCallback.waitForCall(0ms).has_value());
 }
 
 TEST_F(IdleTimerTest, startNotCalledTest) {
     mIdleTimer = std::make_unique<scheduler::IdleTimer>(3ms, mExpiredTimerCallback.getInvocable());
-    std::this_thread::sleep_for(6ms);
     // The start hasn't happened, so the callback does not happen.
-    EXPECT_FALSE(mExpiredTimerCallback.waitForCall(1us).has_value());
+    EXPECT_FALSE(mExpiredTimerCallback.waitForCall(waitTimeForUnexpected3msCallback).has_value());
     mIdleTimer->stop();
+    // Final quick check that no more callback were observed.
+    EXPECT_FALSE(mExpiredTimerCallback.waitForCall(0ms).has_value());
 }
 
 TEST_F(IdleTimerTest, idleTimerIdlesTest) {
     mIdleTimer = std::make_unique<scheduler::IdleTimer>(3ms, mExpiredTimerCallback.getInvocable());
     mIdleTimer->start();
-    std::this_thread::sleep_for(6ms);
-    // The timer expires after 3 ms, so the call to the callback happens.
-    EXPECT_TRUE(mExpiredTimerCallback.waitForCall(1us).has_value());
-    std::this_thread::sleep_for(6ms);
-    // Timer can be idle.
-    EXPECT_FALSE(mExpiredTimerCallback.waitForCall(1us).has_value());
-    // Timer can be reset.
+    // A callback should be generated after 3ms
+    EXPECT_TRUE(mExpiredTimerCallback.waitForCall(waitTimeForExpected3msCallback).has_value());
+    // After one event, it should be idle, and not generate another.
+    EXPECT_FALSE(mExpiredTimerCallback.waitForCall(waitTimeForUnexpected3msCallback).has_value());
+    // Once reset, it should generate another
     mIdleTimer->reset();
-    std::this_thread::sleep_for(6ms);
-    // Timer fires again.
-    EXPECT_TRUE(mExpiredTimerCallback.waitForCall(1us).has_value());
+    EXPECT_TRUE(mExpiredTimerCallback.waitForCall(waitTimeForExpected3msCallback).has_value());
     mIdleTimer->stop();
+    // Final quick check that no more callback were observed.
+    EXPECT_FALSE(mExpiredTimerCallback.waitForCall(0ms).has_value());
 }
 
 TEST_F(IdleTimerTest, timeoutCallbackExecutionTest) {
     mIdleTimer = std::make_unique<scheduler::IdleTimer>(3ms, mExpiredTimerCallback.getInvocable());
 
     mIdleTimer->start();
-    std::this_thread::sleep_for(6ms);
-    // The timer expires after 3 ms, so the call to the callback should happen.
-    EXPECT_TRUE(mExpiredTimerCallback.waitForCall(1us).has_value());
+    EXPECT_TRUE(mExpiredTimerCallback.waitForCall(waitTimeForExpected3msCallback).has_value());
     mIdleTimer->stop();
 }
 
 TEST_F(IdleTimerTest, noCallbacksAfterStopAndResetTest) {
     mIdleTimer = std::make_unique<scheduler::IdleTimer>(3ms, mExpiredTimerCallback.getInvocable());
     mIdleTimer->start();
-    std::this_thread::sleep_for(6ms);
-    EXPECT_TRUE(mExpiredTimerCallback.waitForCall().has_value());
+    EXPECT_TRUE(mExpiredTimerCallback.waitForCall(waitTimeForExpected3msCallback).has_value());
     mIdleTimer->stop();
+    clearPendingCallbacks();
     mIdleTimer->reset();
-    std::this_thread::sleep_for(6ms);
-    EXPECT_FALSE(mExpiredTimerCallback.waitForCall().has_value());
+    EXPECT_FALSE(mExpiredTimerCallback.waitForCall(waitTimeForUnexpected3msCallback).has_value());
 }
 
 TEST_F(IdleTimerTest, noCallbacksAfterStopTest) {
     mIdleTimer = std::make_unique<scheduler::IdleTimer>(3ms, mExpiredTimerCallback.getInvocable());
     mIdleTimer->start();
-    std::this_thread::sleep_for(1ms);
     mIdleTimer->stop();
-    std::this_thread::sleep_for(3ms);
-    EXPECT_FALSE(mExpiredTimerCallback.waitForCall(1us).has_value());
+    clearPendingCallbacks();
+    mIdleTimer->reset();
+    // No more idle events should be observed
+    EXPECT_FALSE(mExpiredTimerCallback.waitForCall(waitTimeForUnexpected3msCallback).has_value());
 }
 
 } // namespace
diff --git a/services/surfaceflinger/tests/unittests/libsurfaceflinger_unittest_main.cpp b/services/surfaceflinger/tests/unittests/libsurfaceflinger_unittest_main.cpp
index bc1f00d..ed628b8 100644
--- a/services/surfaceflinger/tests/unittests/libsurfaceflinger_unittest_main.cpp
+++ b/services/surfaceflinger/tests/unittests/libsurfaceflinger_unittest_main.cpp
@@ -16,6 +16,11 @@
 
 #include <gtest/gtest.h>
 
+#include <sched.h>
+
+#include <processgroup/sched_policy.h>
+#include <system/graphics.h>
+
 #include "libsurfaceflinger_unittest_main.h"
 
 // ------------------------------------------------------------------------
@@ -39,6 +44,12 @@
 int main(int argc, char **argv) {
     ::testing::InitGoogleTest(&argc, argv);
 
+    // The SurfaceFlinger implementation assumes that threads resume
+    // execution as quickly as possible once they become unblocked.
+    // (These same calls are made in main_surfaceflinger.cpp)
+    setpriority(PRIO_PROCESS, 0, HAL_PRIORITY_URGENT_DISPLAY);
+    set_sched_policy(0, SP_FOREGROUND);
+
     for (int i = 1; i < argc; i++) {
         if (strcmp(argv[i], "--no-slow") == 0) {
             g_noSlowTests = true;