Add getStalledTransactionInfo method to SurfaceComposer

Bug: 287577707
Test: presubmits
Change-Id: I9c464ee302e7bafe8d45021063368fcd984e27ec
Merged-In: I9c464ee302e7bafe8d45021063368fcd984e27ec
diff --git a/libs/gui/Android.bp b/libs/gui/Android.bp
index 3c8df2b..b9a8fb6 100644
--- a/libs/gui/Android.bp
+++ b/libs/gui/Android.bp
@@ -62,6 +62,7 @@
     name: "guiconstants_aidl",
     srcs: [
         "android/gui/DropInputMode.aidl",
+        "android/gui/StalledTransactionInfo.aidl",
         "android/**/TouchOcclusionMode.aidl",
     ],
 }
diff --git a/libs/gui/SurfaceComposerClient.cpp b/libs/gui/SurfaceComposerClient.cpp
index 48a97b2..c905196 100644
--- a/libs/gui/SurfaceComposerClient.cpp
+++ b/libs/gui/SurfaceComposerClient.cpp
@@ -1302,6 +1302,13 @@
     return status.isOk() ? display : nullptr;
 }
 
+std::optional<gui::StalledTransactionInfo> SurfaceComposerClient::getStalledTransactionInfo(
+        pid_t pid) {
+    std::optional<gui::StalledTransactionInfo> result;
+    ComposerServiceAIDL::getComposerService()->getStalledTransactionInfo(pid, &result);
+    return result;
+}
+
 void SurfaceComposerClient::Transaction::setAnimationTransaction() {
     mAnimation = true;
 }
diff --git a/libs/gui/aidl/android/gui/ISurfaceComposer.aidl b/libs/gui/aidl/android/gui/ISurfaceComposer.aidl
index 539a1c1..7e652ac 100644
--- a/libs/gui/aidl/android/gui/ISurfaceComposer.aidl
+++ b/libs/gui/aidl/android/gui/ISurfaceComposer.aidl
@@ -46,6 +46,7 @@
 import android.gui.OverlayProperties;
 import android.gui.PullAtomData;
 import android.gui.ARect;
+import android.gui.StalledTransactionInfo;
 import android.gui.StaticDisplayInfo;
 import android.gui.WindowInfosListenerInfo;
 
@@ -507,4 +508,10 @@
     void removeWindowInfosListener(IWindowInfosListener windowInfosListener);
 
     OverlayProperties getOverlaySupport();
+
+    /**
+     * Returns an instance of StalledTransaction if a transaction from the passed pid has not been
+     * applied in SurfaceFlinger due to an unsignaled fence. Otherwise, null is returned.
+     */
+    @nullable StalledTransactionInfo getStalledTransactionInfo(int pid);
 }
diff --git a/libs/gui/android/gui/StalledTransactionInfo.aidl b/libs/gui/android/gui/StalledTransactionInfo.aidl
new file mode 100644
index 0000000..e6aa9bd
--- /dev/null
+++ b/libs/gui/android/gui/StalledTransactionInfo.aidl
@@ -0,0 +1,24 @@
+/*
+ * Copyright 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package android.gui;
+
+/** @hide */
+parcelable StalledTransactionInfo {
+    String layerName;
+    long bufferId;
+    long frameNumber;
+}
\ No newline at end of file
diff --git a/libs/gui/fuzzer/libgui_fuzzer_utils.h b/libs/gui/fuzzer/libgui_fuzzer_utils.h
index 4c7d056..c70197c 100644
--- a/libs/gui/fuzzer/libgui_fuzzer_utils.h
+++ b/libs/gui/fuzzer/libgui_fuzzer_utils.h
@@ -158,6 +158,8 @@
     MOCK_METHOD(binder::Status, removeWindowInfosListener, (const sp<gui::IWindowInfosListener>&),
                 (override));
     MOCK_METHOD(binder::Status, getOverlaySupport, (gui::OverlayProperties*), (override));
+    MOCK_METHOD(binder::Status, getStalledTransactionInfo,
+                (int32_t, std::optional<gui::StalledTransactionInfo>*), (override));
 };
 
 class FakeBnSurfaceComposerClient : public gui::BnSurfaceComposerClient {
diff --git a/libs/gui/include/gui/SurfaceComposerClient.h b/libs/gui/include/gui/SurfaceComposerClient.h
index 3cf57b1..dbcbd3b 100644
--- a/libs/gui/include/gui/SurfaceComposerClient.h
+++ b/libs/gui/include/gui/SurfaceComposerClient.h
@@ -371,6 +371,10 @@
     //! Get token for a physical display given its stable ID
     static sp<IBinder> getPhysicalDisplayToken(PhysicalDisplayId displayId);
 
+    // Returns StalledTransactionInfo if a transaction from the provided pid has not been applied
+    // due to an unsignaled fence.
+    static std::optional<gui::StalledTransactionInfo> getStalledTransactionInfo(pid_t pid);
+
     struct SCHash {
         std::size_t operator()(const sp<SurfaceControl>& sc) const {
             return std::hash<SurfaceControl *>{}(sc.get());
diff --git a/libs/gui/tests/Surface_test.cpp b/libs/gui/tests/Surface_test.cpp
index 567604d..d7910d2 100644
--- a/libs/gui/tests/Surface_test.cpp
+++ b/libs/gui/tests/Surface_test.cpp
@@ -1016,6 +1016,11 @@
         return binder::Status::ok();
     }
 
+    binder::Status getStalledTransactionInfo(
+            int32_t /*pid*/, std::optional<gui::StalledTransactionInfo>* /*result*/) override {
+        return binder::Status::ok();
+    }
+
 protected:
     IBinder* onAsBinder() override { return nullptr; }
 
diff --git a/services/surfaceflinger/FrontEnd/TransactionHandler.cpp b/services/surfaceflinger/FrontEnd/TransactionHandler.cpp
index fa8eb3c..6e78e93 100644
--- a/services/surfaceflinger/FrontEnd/TransactionHandler.cpp
+++ b/services/surfaceflinger/FrontEnd/TransactionHandler.cpp
@@ -186,21 +186,36 @@
 }
 
 void TransactionHandler::onTransactionQueueStalled(uint64_t transactionId,
-                                                   sp<ITransactionCompletedListener>& listener,
-                                                   const std::string& reason) {
-    if (std::find(mStalledTransactions.begin(), mStalledTransactions.end(), transactionId) !=
-        mStalledTransactions.end()) {
-        return;
-    }
-
-    mStalledTransactions.push_back(transactionId);
-    listener->onTransactionQueueStalled(String8(reason.c_str()));
+                                                   StalledTransactionInfo stalledTransactionInfo) {
+    std::lock_guard lock{mStalledMutex};
+    mStalledTransactions.emplace(transactionId, std::move(stalledTransactionInfo));
 }
 
-void TransactionHandler::removeFromStalledTransactions(uint64_t id) {
-    auto it = std::find(mStalledTransactions.begin(), mStalledTransactions.end(), id);
-    if (it != mStalledTransactions.end()) {
-        mStalledTransactions.erase(it);
+void TransactionHandler::removeFromStalledTransactions(uint64_t transactionId) {
+    std::lock_guard lock{mStalledMutex};
+    mStalledTransactions.erase(transactionId);
+}
+
+std::optional<TransactionHandler::StalledTransactionInfo>
+TransactionHandler::getStalledTransactionInfo(pid_t pid) {
+    std::lock_guard lock{mStalledMutex};
+    for (auto [_, stalledTransactionInfo] : mStalledTransactions) {
+        if (pid == stalledTransactionInfo.pid) {
+            return stalledTransactionInfo;
+        }
+    }
+    return std::nullopt;
+}
+
+void TransactionHandler::onLayerDestroyed(uint32_t layerId) {
+    std::lock_guard lock{mStalledMutex};
+    for (auto it = mStalledTransactions.begin(); it != mStalledTransactions.end();) {
+        if (it->second.layerId == layerId) {
+            it = mStalledTransactions.erase(it);
+        } else {
+            it++;
+        }
     }
 }
+
 } // namespace android::surfaceflinger::frontend
diff --git a/services/surfaceflinger/FrontEnd/TransactionHandler.h b/services/surfaceflinger/FrontEnd/TransactionHandler.h
index 865835f..ff54dc5 100644
--- a/services/surfaceflinger/FrontEnd/TransactionHandler.h
+++ b/services/surfaceflinger/FrontEnd/TransactionHandler.h
@@ -18,6 +18,7 @@
 
 #include <semaphore.h>
 #include <cstdint>
+#include <optional>
 #include <vector>
 
 #include <LocklessQueue.h>
@@ -61,9 +62,18 @@
     std::vector<TransactionState> flushTransactions();
     void addTransactionReadyFilter(TransactionFilter&&);
     void queueTransaction(TransactionState&&);
-    void onTransactionQueueStalled(uint64_t transactionId, sp<ITransactionCompletedListener>&,
-                                   const std::string& reason);
+
+    struct StalledTransactionInfo {
+        pid_t pid;
+        uint32_t layerId;
+        std::string layerName;
+        uint64_t bufferId;
+        uint64_t frameNumber;
+    };
+    void onTransactionQueueStalled(uint64_t transactionId, StalledTransactionInfo);
     void removeFromStalledTransactions(uint64_t transactionId);
+    std::optional<StalledTransactionInfo> getStalledTransactionInfo(pid_t pid);
+    void onLayerDestroyed(uint32_t layerId);
 
 private:
     // For unit tests
@@ -79,7 +89,10 @@
     LocklessQueue<TransactionState> mLocklessTransactionQueue;
     std::atomic<size_t> mPendingTransactionCount = 0;
     ftl::SmallVector<TransactionFilter, 2> mTransactionReadyFilters;
-    std::vector<uint64_t> mStalledTransactions;
+
+    std::mutex mStalledMutex;
+    std::unordered_map<uint64_t /* transactionId */, StalledTransactionInfo> mStalledTransactions
+            GUARDED_BY(mStalledMutex);
 };
 } // namespace surfaceflinger::frontend
 } // namespace android
diff --git a/services/surfaceflinger/SurfaceFlinger.cpp b/services/surfaceflinger/SurfaceFlinger.cpp
index 9f8af90..d41318e 100644
--- a/services/surfaceflinger/SurfaceFlinger.cpp
+++ b/services/surfaceflinger/SurfaceFlinger.cpp
@@ -4355,9 +4355,13 @@
                     (flushState.queueProcessTime - transaction.postTime) >
                             std::chrono::nanoseconds(4s).count()) {
                     mTransactionHandler
-                            .onTransactionQueueStalled(transaction.id, listener,
-                                                       "Buffer processing hung up due to stuck "
-                                                       "fence. Indicates GPU hang");
+                            .onTransactionQueueStalled(transaction.id,
+                                                       {.pid = layer->getOwnerPid(),
+                                                        .layerId = static_cast<uint32_t>(
+                                                                layer->getSequence()),
+                                                        .layerName = layer->getDebugName(),
+                                                        .bufferId = s.bufferData->getId(),
+                                                        .frameNumber = s.bufferData->frameNumber});
                 }
                 ATRACE_FORMAT("fence unsignaled %s", layer->getDebugName());
                 return TraverseBuffersReturnValues::STOP_TRAVERSAL;
@@ -5381,6 +5385,8 @@
         mDestroyedHandles.emplace_back(layerId);
     }
 
+    mTransactionHandler.onLayerDestroyed(layerId);
+
     Mutex::Autolock lock(mStateLock);
     markLayerPendingRemovalLocked(layer);
     layer->onHandleDestroyed();
@@ -7981,6 +7987,12 @@
     return NO_ERROR;
 }
 
+status_t SurfaceFlinger::getStalledTransactionInfo(
+        int pid, std::optional<TransactionHandler::StalledTransactionInfo>& result) {
+    result = mTransactionHandler.getStalledTransactionInfo(pid);
+    return NO_ERROR;
+}
+
 std::shared_ptr<renderengine::ExternalTexture> SurfaceFlinger::getExternalTextureFromBufferData(
         BufferData& bufferData, const char* layerName, uint64_t transactionId) {
     if (bufferData.buffer &&
@@ -9085,6 +9097,28 @@
     return binderStatusFromStatusT(status);
 }
 
+binder::Status SurfaceComposerAIDL::getStalledTransactionInfo(
+        int pid, std::optional<gui::StalledTransactionInfo>* outInfo) {
+    const int callingPid = IPCThreadState::self()->getCallingPid();
+    const int callingUid = IPCThreadState::self()->getCallingUid();
+    if (!checkPermission(sAccessSurfaceFlinger, callingPid, callingUid)) {
+        return binderStatusFromStatusT(PERMISSION_DENIED);
+    }
+
+    std::optional<TransactionHandler::StalledTransactionInfo> stalledTransactionInfo;
+    status_t status = mFlinger->getStalledTransactionInfo(pid, stalledTransactionInfo);
+    if (stalledTransactionInfo) {
+        gui::StalledTransactionInfo result;
+        result.layerName = String16{stalledTransactionInfo->layerName.c_str()},
+        result.bufferId = stalledTransactionInfo->bufferId,
+        result.frameNumber = stalledTransactionInfo->frameNumber,
+        outInfo->emplace(std::move(result));
+    } else {
+        outInfo->reset();
+    }
+    return binderStatusFromStatusT(status);
+}
+
 status_t SurfaceComposerAIDL::checkAccessPermission(bool usePermissionCache) {
     if (!mFlinger->callingThreadHasUnscopedSurfaceFlingerAccess(usePermissionCache)) {
         IPCThreadState* ipc = IPCThreadState::self();
diff --git a/services/surfaceflinger/SurfaceFlinger.h b/services/surfaceflinger/SurfaceFlinger.h
index f1759a5..34f4d34 100644
--- a/services/surfaceflinger/SurfaceFlinger.h
+++ b/services/surfaceflinger/SurfaceFlinger.h
@@ -618,6 +618,9 @@
     status_t removeWindowInfosListener(
             const sp<gui::IWindowInfosListener>& windowInfosListener) const;
 
+    status_t getStalledTransactionInfo(
+            int pid, std::optional<TransactionHandler::StalledTransactionInfo>& result);
+
     // Implements IBinder::DeathRecipient.
     void binderDied(const wp<IBinder>& who) override;
 
@@ -1542,6 +1545,8 @@
                                           gui::WindowInfosListenerInfo* outInfo) override;
     binder::Status removeWindowInfosListener(
             const sp<gui::IWindowInfosListener>& windowInfosListener) override;
+    binder::Status getStalledTransactionInfo(int pid,
+                                             std::optional<gui::StalledTransactionInfo>* outInfo);
 
 private:
     static const constexpr bool kUsePermissionCache = true;