SurfaceFlinger: LocklessQueue for transaction queue

Test: presubmit
Test: go/wm-smoke
Bug: 238781169

Change-Id: If23f4eae1c4bb652abbe8109f728bde20f7f66e5
diff --git a/services/surfaceflinger/LocklessQueue.h b/services/surfaceflinger/LocklessQueue.h
new file mode 100644
index 0000000..6b63360
--- /dev/null
+++ b/services/surfaceflinger/LocklessQueue.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+#include <atomic>
+#include <optional>
+
+template <typename T>
+// Single consumer multi producer stack. We can understand the two operations independently to see
+// why they are without race condition.
+//
+// push is responsible for maintaining a linked list stored in mPush, and called from multiple
+// threads without lock. We can see that if two threads never observe the same value from
+// mPush.load, it just functions as a normal linked list. In the case where two threads observe the
+// same value, one of them has to execute the compare_exchange first. The one that doesn't execute
+// the compare exchange first, will receive false from compare_exchange. previousHead is updated (by
+// compare_exchange) to the most recent value of mPush, and we try again. It's relatively clear to
+// see that the process can repeat with an arbitrary number of threads.
+//
+// Pop is much simpler. If mPop is empty (as it begins) it atomically exchanges
+// the entire push list with null. This is safe, since the only other reader (push)
+// of mPush will retry if it changes in between it's read and atomic compare. We
+// then store the list and pop one element.
+//
+// If we already had something in the pop list we just pop directly.
+class LocklessQueue {
+public:
+    class Entry {
+    public:
+        T mValue;
+        std::atomic<Entry*> mNext;
+        Entry(T value) : mValue(value) {}
+    };
+    std::atomic<Entry*> mPush = nullptr;
+    std::atomic<Entry*> mPop = nullptr;
+    bool isEmpty() { return (mPush.load() == nullptr) && (mPop.load() == nullptr); }
+
+    void push(T value) {
+        Entry* entry = new Entry(value);
+        Entry* previousHead = mPush.load(/*std::memory_order_relaxed*/);
+        do {
+            entry->mNext = previousHead;
+        } while (!mPush.compare_exchange_weak(previousHead, entry)); /*std::memory_order_release*/
+    }
+    std::optional<T> pop() {
+        Entry* popped = mPop.load(/*std::memory_order_acquire*/);
+        if (popped) {
+            // Single consumer so this is fine
+            mPop.store(popped->mNext /* , std::memory_order_release */);
+            auto value = popped->mValue;
+            delete popped;
+            return std::move(value);
+        } else {
+            Entry* grabbedList = mPush.exchange(nullptr /* , std::memory_order_acquire */);
+            if (!grabbedList) return std::nullopt;
+            // Reverse the list
+            while (grabbedList->mNext) {
+                Entry* next = grabbedList->mNext;
+                grabbedList->mNext = popped;
+                popped = grabbedList;
+                grabbedList = next;
+            }
+            mPop.store(popped /* , std::memory_order_release */);
+            auto value = grabbedList->mValue;
+            delete grabbedList;
+            return std::move(value);
+        }
+    }
+};
diff --git a/services/surfaceflinger/SurfaceFlinger.cpp b/services/surfaceflinger/SurfaceFlinger.cpp
index 9ef8e7b..ce54f50 100644
--- a/services/surfaceflinger/SurfaceFlinger.cpp
+++ b/services/surfaceflinger/SurfaceFlinger.cpp
@@ -3753,6 +3753,8 @@
 
             transactions.emplace_back(std::move(transaction));
             transactionQueue.pop();
+            mPendingTransactionCount--;
+            ATRACE_INT("TransactionQueue", mPendingTransactionCount.load());
         }
 
         if (transactionQueue.empty()) {
@@ -3776,8 +3778,6 @@
     {
         Mutex::Autolock _l(mStateLock);
         {
-            Mutex::Autolock _l(mQueueLock);
-
             int lastTransactionsPendingBarrier = 0;
             int transactionsPendingBarrier = 0;
             // First collect transactions from the pending transaction queues.
@@ -3790,8 +3790,12 @@
             // Second, collect transactions from the transaction queue.
             // Here as well we are not allowing unsignaled buffers for the same
             // reason as above.
-            while (!mTransactionQueue.empty()) {
-                auto& transaction = mTransactionQueue.front();
+            while (!mLocklessTransactionQueue.isEmpty()) {
+                auto maybeTransaction = mLocklessTransactionQueue.pop();
+                if (!maybeTransaction.has_value()) {
+                    break;
+                }
+                auto transaction = maybeTransaction.value();
                 const bool pendingTransactions =
                         mPendingTransactionQueues.find(transaction.applyToken) !=
                         mPendingTransactionQueues.end();
@@ -3829,9 +3833,9 @@
                         }
                     });
                     transactions.emplace_back(std::move(transaction));
+                    mPendingTransactionCount--;
+                    ATRACE_INT("TransactionQueue", mPendingTransactionCount.load());
                 }
-                mTransactionQueue.pop_front();
-                ATRACE_INT("TransactionQueue", mTransactionQueue.size());
             }
 
             // Transactions with a buffer pending on a barrier may be on a different applyToken
@@ -3892,8 +3896,7 @@
 }
 
 bool SurfaceFlinger::transactionFlushNeeded() {
-    Mutex::Autolock _l(mQueueLock);
-    return !mPendingTransactionQueues.empty() || !mTransactionQueue.empty();
+    return !mPendingTransactionQueues.empty() || !mLocklessTransactionQueue.isEmpty();
 }
 
 bool SurfaceFlinger::frameIsEarly(nsecs_t expectedPresentTime, int64_t vsyncId) const {
@@ -4060,17 +4063,15 @@
 
 void SurfaceFlinger::queueTransaction(TransactionState& state) {
     state.queueTime = systemTime();
-
-    Mutex::Autolock lock(mQueueLock);
-
     // Generate a CountDownLatch pending state if this is a synchronous transaction.
     if (state.flags & eSynchronous) {
         state.transactionCommittedSignal =
                 std::make_shared<CountDownLatch>(CountDownLatch::eSyncTransaction);
     }
 
-    mTransactionQueue.emplace_back(state);
-    ATRACE_INT("TransactionQueue", mTransactionQueue.size());
+    mLocklessTransactionQueue.push(state);
+    mPendingTransactionCount++;
+    ATRACE_INT("TransactionQueue", mPendingTransactionCount.load());
 
     const auto schedule = [](uint32_t flags) {
         if (flags & eEarlyWakeupEnd) return TransactionSchedule::EarlyEnd;
diff --git a/services/surfaceflinger/SurfaceFlinger.h b/services/surfaceflinger/SurfaceFlinger.h
index 7d23c3c..1e4fae7 100644
--- a/services/surfaceflinger/SurfaceFlinger.h
+++ b/services/surfaceflinger/SurfaceFlinger.h
@@ -65,6 +65,7 @@
 #include "FlagManager.h"
 #include "FrameTracker.h"
 #include "LayerVector.h"
+#include "LocklessQueue.h"
 #include "Scheduler/RefreshRateConfigs.h"
 #include "Scheduler/RefreshRateStats.h"
 #include "Scheduler/Scheduler.h"
@@ -762,13 +763,13 @@
             std::vector<TransactionState>& transactions,
             std::unordered_map<sp<IBinder>, uint64_t, SpHash<IBinder>>& bufferLayersReadyToPresent,
             std::unordered_set<sp<IBinder>, SpHash<IBinder>>& applyTokensWithUnsignaledTransactions,
-            bool tryApplyUnsignaled) REQUIRES(mStateLock, mQueueLock);
+            bool tryApplyUnsignaled) REQUIRES(mStateLock);
 
     int flushUnsignaledPendingTransactionQueues(
             std::vector<TransactionState>& transactions,
             std::unordered_map<sp<IBinder>, uint64_t, SpHash<IBinder>>& bufferLayersReadyToPresent,
             std::unordered_set<sp<IBinder>, SpHash<IBinder>>& applyTokensWithUnsignaledTransactions)
-            REQUIRES(mStateLock, mQueueLock);
+            REQUIRES(mStateLock);
 
     uint32_t setClientStateLocked(const FrameTimelineInfo&, ComposerState&,
                                   int64_t desiredPresentTime, bool isAutoTimestamp,
@@ -1098,7 +1099,7 @@
     status_t CheckTransactCodeCredentials(uint32_t code);
 
     // Add transaction to the Transaction Queue
-    void queueTransaction(TransactionState& state) EXCLUDES(mQueueLock);
+    void queueTransaction(TransactionState& state);
     void waitForSynchronousTransaction(const CountDownLatch& transactionCommittedSignal);
     void signalSynchronousTransactions(const uint32_t flag);
 
@@ -1262,11 +1263,11 @@
     uint32_t mTexturePoolSize = 0;
     std::vector<uint32_t> mTexturePool;
 
-    mutable Mutex mQueueLock;
     Condition mTransactionQueueCV;
     std::unordered_map<sp<IBinder>, std::queue<TransactionState>, IListenerHash>
-            mPendingTransactionQueues GUARDED_BY(mQueueLock);
-    std::deque<TransactionState> mTransactionQueue GUARDED_BY(mQueueLock);
+            mPendingTransactionQueues;
+    LocklessQueue<TransactionState> mLocklessTransactionQueue;
+    std::atomic<size_t> mPendingTransactionCount = 0;
     /*
      * Feature prototyping
      */
diff --git a/services/surfaceflinger/fuzzer/surfaceflinger_fuzzers_utils.h b/services/surfaceflinger/fuzzer/surfaceflinger_fuzzers_utils.h
index 3338da0..de04592 100644
--- a/services/surfaceflinger/fuzzer/surfaceflinger_fuzzers_utils.h
+++ b/services/surfaceflinger/fuzzer/surfaceflinger_fuzzers_utils.h
@@ -727,7 +727,7 @@
         return mFlinger->setPowerModeInternal(display, mode);
     }
 
-    auto &getTransactionQueue() { return mFlinger->mTransactionQueue; }
+    auto &getTransactionQueue() { return mFlinger->mLocklessTransactionQueue; }
     auto &getPendingTransactionQueue() { return mFlinger->mPendingTransactionQueues; }
 
     auto setTransactionState(
diff --git a/services/surfaceflinger/tests/unittests/TestableSurfaceFlinger.h b/services/surfaceflinger/tests/unittests/TestableSurfaceFlinger.h
index 60285f9..eae5f82 100644
--- a/services/surfaceflinger/tests/unittests/TestableSurfaceFlinger.h
+++ b/services/surfaceflinger/tests/unittests/TestableSurfaceFlinger.h
@@ -421,7 +421,7 @@
         return mFlinger->SurfaceFlinger::getDisplayNativePrimaries(displayToken, primaries);
     }
 
-    auto& getTransactionQueue() { return mFlinger->mTransactionQueue; }
+    auto& getTransactionQueue() { return mFlinger->mLocklessTransactionQueue; }
     auto& getPendingTransactionQueue() { return mFlinger->mPendingTransactionQueues; }
     auto& getTransactionCommittedSignals() { return mFlinger->mTransactionCommittedSignals; }
 
diff --git a/services/surfaceflinger/tests/unittests/TransactionApplicationTest.cpp b/services/surfaceflinger/tests/unittests/TransactionApplicationTest.cpp
index 84f1170..b2da34e 100644
--- a/services/surfaceflinger/tests/unittests/TransactionApplicationTest.cpp
+++ b/services/surfaceflinger/tests/unittests/TransactionApplicationTest.cpp
@@ -117,7 +117,7 @@
     }
 
     void NotPlacedOnTransactionQueue(uint32_t flags) {
-        ASSERT_EQ(0u, mFlinger.getTransactionQueue().size());
+        ASSERT_TRUE(mFlinger.getTransactionQueue().isEmpty());
         EXPECT_CALL(*mFlinger.scheduler(), scheduleFrame()).Times(1);
         TransactionInfo transaction;
         setupSingle(transaction, flags,
@@ -140,12 +140,12 @@
             EXPECT_LE(returnedTime, applicationTime + mFlinger.getAnimationTransactionTimeout());
         }
         // Each transaction should have been placed on the transaction queue
-        auto transactionQueue = mFlinger.getTransactionQueue();
-        EXPECT_EQ(1u, transactionQueue.size());
+        auto& transactionQueue = mFlinger.getTransactionQueue();
+        EXPECT_FALSE(transactionQueue.isEmpty());
     }
 
     void PlaceOnTransactionQueue(uint32_t flags) {
-        ASSERT_EQ(0u, mFlinger.getTransactionQueue().size());
+        ASSERT_TRUE(mFlinger.getTransactionQueue().isEmpty());
         EXPECT_CALL(*mFlinger.scheduler(), scheduleFrame()).Times(1);
 
         // first check will see desired present time has not passed,
@@ -171,12 +171,12 @@
                       applicationSentTime + mFlinger.getAnimationTransactionTimeout());
         }
         // This transaction should have been placed on the transaction queue
-        auto transactionQueue = mFlinger.getTransactionQueue();
-        EXPECT_EQ(1u, transactionQueue.size());
+        auto& transactionQueue = mFlinger.getTransactionQueue();
+        EXPECT_FALSE(transactionQueue.isEmpty());
     }
 
     void BlockedByPriorTransaction(uint32_t flags) {
-        ASSERT_EQ(0u, mFlinger.getTransactionQueue().size());
+        ASSERT_TRUE(mFlinger.getTransactionQueue().isEmpty());
         nsecs_t time = systemTime();
         EXPECT_CALL(*mFlinger.scheduler(), scheduleFrame()).Times(2);
 
@@ -239,8 +239,8 @@
     int mTransactionNumber = 0;
 };
 
-TEST_F(TransactionApplicationTest, Flush_RemovesFromQueue) {
-    ASSERT_EQ(0u, mFlinger.getTransactionQueue().size());
+TEST_F(TransactionApplicationTest, AddToPendingQueue) {
+    ASSERT_TRUE(mFlinger.getTransactionQueue().isEmpty());
     EXPECT_CALL(*mFlinger.scheduler(), scheduleFrame()).Times(1);
 
     TransactionInfo transactionA; // transaction to go on pending queue
@@ -253,10 +253,27 @@
                                  mHasListenerCallbacks, mCallbacks, transactionA.id);
 
     auto& transactionQueue = mFlinger.getTransactionQueue();
-    ASSERT_EQ(1u, transactionQueue.size());
+    ASSERT_FALSE(transactionQueue.isEmpty());
 
-    auto& transactionState = transactionQueue.front();
+    auto transactionState = transactionQueue.pop().value();
     checkEqual(transactionA, transactionState);
+}
+
+TEST_F(TransactionApplicationTest, Flush_RemovesFromQueue) {
+    ASSERT_TRUE(mFlinger.getTransactionQueue().isEmpty());
+    EXPECT_CALL(*mFlinger.scheduler(), scheduleFrame()).Times(1);
+
+    TransactionInfo transactionA; // transaction to go on pending queue
+    setupSingle(transactionA, /*flags*/ 0, /*desiredPresentTime*/ s2ns(1), false,
+                FrameTimelineInfo{});
+    mFlinger.setTransactionState(transactionA.frameTimelineInfo, transactionA.states,
+                                 transactionA.displays, transactionA.flags, transactionA.applyToken,
+                                 transactionA.inputWindowCommands, transactionA.desiredPresentTime,
+                                 transactionA.isAutoTimestamp, transactionA.uncacheBuffer,
+                                 mHasListenerCallbacks, mCallbacks, transactionA.id);
+
+    auto& transactionQueue = mFlinger.getTransactionQueue();
+    ASSERT_FALSE(transactionQueue.isEmpty());
 
     // because flushing uses the cached expected present time, we send an empty
     // transaction here (sending a null applyToken to fake it as from a
@@ -272,7 +289,7 @@
     // passed
     mFlinger.flushTransactionQueues();
 
-    EXPECT_EQ(0u, transactionQueue.size());
+    EXPECT_TRUE(mFlinger.getTransactionQueue().isEmpty());
 }
 
 TEST_F(TransactionApplicationTest, NotPlacedOnTransactionQueue_Synchronous) {
@@ -306,7 +323,9 @@
     void TearDown() override {
         // Clear all transaction queues to release all transactions we sent
         // in the tests. Otherwise, gmock complains about memory leaks.
-        mFlinger.getTransactionQueue().clear();
+        while (!mFlinger.getTransactionQueue().isEmpty()) {
+            mFlinger.getTransactionQueue().pop();
+        }
         mFlinger.getPendingTransactionQueue().clear();
         mFlinger.getTransactionCommittedSignals().clear();
         mFlinger.commitTransactionsLocked(eTransactionMask);
@@ -358,7 +377,7 @@
     void setTransactionStates(const std::vector<TransactionInfo>& transactions,
                               size_t expectedTransactionsApplied,
                               size_t expectedTransactionsPending) {
-        EXPECT_EQ(0u, mFlinger.getTransactionQueue().size());
+        EXPECT_TRUE(mFlinger.getTransactionQueue().isEmpty());
         EXPECT_EQ(0u, mFlinger.getPendingTransactionQueue().size());
 
         for (const auto& transaction : transactions) {
@@ -370,7 +389,7 @@
                                          mHasListenerCallbacks, mCallbacks, transaction.id);
         }
         mFlinger.flushTransactionQueues();
-        EXPECT_EQ(0u, mFlinger.getTransactionQueue().size());
+        EXPECT_TRUE(mFlinger.getTransactionQueue().isEmpty());
         EXPECT_EQ(expectedTransactionsPending, mFlinger.getPendingTransactionQueue().size());
         EXPECT_EQ(expectedTransactionsApplied, mFlinger.getTransactionCommittedSignals().size());
     }