Reorder release fence attachment for non-threaded RE

Changes when release fences are attached to layers for non-threaded
RenderEngine to ensure that fences are added and fired in the same
hop to the main thread. This removes the dependency on legacy screenshot
code and prevents a deadlock where screenshot requests are blocked on
the main thread while the main thread is blocked on the previous
screenshot request finishing.

Bug: b/351894825
Test: atest SurfaceFlinger_test
Flag: EXEMPT refactor and for flag removal
Change-Id: If9d4134aba1106484f94231c5104a57a605147b8
diff --git a/services/surfaceflinger/RegionSamplingThread.cpp b/services/surfaceflinger/RegionSamplingThread.cpp
index 011fd9e..08f831c 100644
--- a/services/surfaceflinger/RegionSamplingThread.cpp
+++ b/services/surfaceflinger/RegionSamplingThread.cpp
@@ -354,14 +354,13 @@
                               RenderArea::Options::CAPTURE_SECURE_LAYERS);
 
     FenceResult fenceResult;
-    if (FlagManager::getInstance().single_hop_screenshot() &&
-        mFlinger.mRenderEngine->isThreaded()) {
-        std::vector<sp<LayerFE>> layerFEs;
-        auto displayState = mFlinger.getSnapshotsFromMainThread(renderAreaBuilder,
-                                                                getLayerSnapshotsFn, layerFEs);
+    if (FlagManager::getInstance().single_hop_screenshot()) {
+        std::vector<std::pair<Layer*, sp<LayerFE>>> layers;
+        auto displayState =
+                mFlinger.getSnapshotsFromMainThread(renderAreaBuilder, getLayerSnapshotsFn, layers);
         fenceResult = mFlinger.captureScreenshot(renderAreaBuilder, buffer, kRegionSampling,
                                                  kGrayscale, kIsProtected, kAttachGainmap, nullptr,
-                                                 displayState, layerFEs)
+                                                 displayState, layers)
                               .get();
     } else {
         fenceResult = mFlinger.captureScreenshotLegacy(renderAreaBuilder, getLayerSnapshotsFn,
diff --git a/services/surfaceflinger/SurfaceFlinger.cpp b/services/surfaceflinger/SurfaceFlinger.cpp
index d35a76a..34798e1 100644
--- a/services/surfaceflinger/SurfaceFlinger.cpp
+++ b/services/surfaceflinger/SurfaceFlinger.cpp
@@ -7158,9 +7158,10 @@
 // typically a layer with DRM contents, or have the GRALLOC_USAGE_PROTECTED set on the buffer.
 // A protected layer has no implication on whether it's secure, which is explicitly set by
 // application to avoid being screenshot or drawn via unsecure display.
-bool SurfaceFlinger::layersHasProtectedLayer(const std::vector<sp<LayerFE>>& layers) const {
+bool SurfaceFlinger::layersHasProtectedLayer(
+        const std::vector<std::pair<Layer*, sp<LayerFE>>>& layers) const {
     bool protectedLayerFound = false;
-    for (auto& layerFE : layers) {
+    for (auto& [_, layerFE] : layers) {
         protectedLayerFound |=
                 (layerFE->mSnapshot->isVisible && layerFE->mSnapshot->hasProtectedContent);
         if (protectedLayerFound) {
@@ -7176,15 +7177,21 @@
 // risk of deadlocks.
 std::optional<SurfaceFlinger::OutputCompositionState> SurfaceFlinger::getSnapshotsFromMainThread(
         RenderAreaBuilderVariant& renderAreaBuilder, GetLayerSnapshotsFunction getLayerSnapshotsFn,
-        std::vector<sp<LayerFE>>& layerFEs) {
+        std::vector<std::pair<Layer*, sp<LayerFE>>>& layers) {
     return mScheduler
-            ->schedule([=, this, &renderAreaBuilder, &layerFEs]() REQUIRES(kMainThreadContext) {
+            ->schedule([=, this, &renderAreaBuilder, &layers]() REQUIRES(kMainThreadContext) {
                 SFTRACE_NAME("getSnapshotsFromMainThread");
-                auto layers = getLayerSnapshotsFn();
-                for (auto& [layer, layerFE] : layers) {
-                    attachReleaseFenceFutureToLayer(layer, layerFE.get(), ui::INVALID_LAYER_STACK);
+                layers = getLayerSnapshotsFn();
+                // Non-threaded RenderEngine eventually returns to the main thread a 2nd time
+                // to complete the screenshot. Release fences should only be added during the 2nd
+                // hop to main thread in order to avoid potential deadlocks from waiting for the
+                // the future fence to fire.
+                if (mRenderEngine->isThreaded()) {
+                    for (auto& [layer, layerFE] : layers) {
+                        attachReleaseFenceFutureToLayer(layer, layerFE.get(),
+                                                        ui::INVALID_LAYER_STACK);
+                    }
                 }
-                layerFEs = extractLayerFEs(layers);
                 return getDisplayStateFromRenderAreaBuilder(renderAreaBuilder);
             })
             .get();
@@ -7205,15 +7212,15 @@
         return;
     }
 
-    if (FlagManager::getInstance().single_hop_screenshot() && mRenderEngine->isThreaded()) {
-        std::vector<sp<LayerFE>> layerFEs;
+    if (FlagManager::getInstance().single_hop_screenshot()) {
+        std::vector<std::pair<Layer*, sp<LayerFE>>> layers;
         auto displayState =
-                getSnapshotsFromMainThread(renderAreaBuilder, getLayerSnapshotsFn, layerFEs);
+                getSnapshotsFromMainThread(renderAreaBuilder, getLayerSnapshotsFn, layers);
 
         const bool supportsProtected = getRenderEngine().supportsProtectedContent();
         bool hasProtectedLayer = false;
         if (allowProtected && supportsProtected) {
-            hasProtectedLayer = layersHasProtectedLayer(layerFEs);
+            hasProtectedLayer = layersHasProtectedLayer(layers);
         }
         const bool isProtected = hasProtectedLayer && allowProtected && supportsProtected;
         const uint32_t usage = GRALLOC_USAGE_HW_COMPOSER | GRALLOC_USAGE_HW_RENDER |
@@ -7240,7 +7247,7 @@
                                                              WRITEABLE);
         auto futureFence = captureScreenshot(renderAreaBuilder, texture, false /* regionSampling */,
                                              grayscale, isProtected, attachGainmap, captureListener,
-                                             displayState, layerFEs);
+                                             displayState, layers);
         futureFence.get();
 
     } else {
@@ -7248,7 +7255,7 @@
         bool hasProtectedLayer = false;
         if (allowProtected && supportsProtected) {
             auto layers = mScheduler->schedule([=]() { return getLayerSnapshotsFn(); }).get();
-            hasProtectedLayer = layersHasProtectedLayer(extractLayerFEs(layers));
+            hasProtectedLayer = layersHasProtectedLayer(layers);
         }
         const bool isProtected = hasProtectedLayer && allowProtected && supportsProtected;
         const uint32_t usage = GRALLOC_USAGE_HW_COMPOSER | GRALLOC_USAGE_HW_RENDER |
@@ -7316,22 +7323,13 @@
     return std::nullopt;
 }
 
-std::vector<sp<LayerFE>> SurfaceFlinger::extractLayerFEs(
-        const std::vector<std::pair<Layer*, sp<LayerFE>>>& layers) const {
-    std::vector<sp<LayerFE>> layerFEs;
-    layerFEs.reserve(layers.size());
-    for (const auto& [_, layerFE] : layers) {
-        layerFEs.push_back(layerFE);
-    }
-    return layerFEs;
-}
-
 ftl::SharedFuture<FenceResult> SurfaceFlinger::captureScreenshot(
         const RenderAreaBuilderVariant& renderAreaBuilder,
         const std::shared_ptr<renderengine::ExternalTexture>& buffer, bool regionSampling,
         bool grayscale, bool isProtected, bool attachGainmap,
         const sp<IScreenCaptureListener>& captureListener,
-        std::optional<OutputCompositionState>& displayState, std::vector<sp<LayerFE>>& layerFEs) {
+        std::optional<OutputCompositionState>& displayState,
+        std::vector<std::pair<Layer*, sp<LayerFE>>>& layers) {
     SFTRACE_CALL();
 
     ScreenCaptureResults captureResults;
@@ -7350,11 +7348,9 @@
     float displayBrightnessNits = displayState.value().displayBrightnessNits;
     float sdrWhitePointNits = displayState.value().sdrWhitePointNits;
 
-    // Empty vector needed to pass into renderScreenImpl for legacy path
-    std::vector<std::pair<Layer*, sp<android::LayerFE>>> layers;
     ftl::SharedFuture<FenceResult> renderFuture =
             renderScreenImpl(renderArea.get(), buffer, regionSampling, grayscale, isProtected,
-                             attachGainmap, captureResults, displayState, layers, layerFEs);
+                             attachGainmap, captureResults, displayState, layers);
 
     if (captureResults.capturedHdrLayers && attachGainmap &&
         FlagManager::getInstance().true_hdr_screenshots()) {
@@ -7390,7 +7386,7 @@
             ftl::SharedFuture<FenceResult> hdrRenderFuture =
                     renderScreenImpl(renderArea.get(), hdrTexture, regionSampling, grayscale,
                                      isProtected, attachGainmap, unusedResults, displayState,
-                                     layers, layerFEs);
+                                     layers);
 
             renderFuture =
                     ftl::Future(std::move(renderFuture))
@@ -7446,9 +7442,6 @@
     auto takeScreenshotFn = [=, this, renderAreaBuilder = std::move(renderAreaBuilder)]() REQUIRES(
                                     kMainThreadContext) mutable -> ftl::SharedFuture<FenceResult> {
         auto layers = getLayerSnapshotsFn();
-        for (auto& [layer, layerFE] : layers) {
-            attachReleaseFenceFutureToLayer(layer, layerFE.get(), ui::INVALID_LAYER_STACK);
-        }
         auto displayState = getDisplayStateFromRenderAreaBuilder(renderAreaBuilder);
 
         ScreenCaptureResults captureResults;
@@ -7465,10 +7458,9 @@
             return ftl::yield<FenceResult>(base::unexpected(NO_ERROR)).share();
         }
 
-        auto layerFEs = extractLayerFEs(layers);
         ftl::SharedFuture<FenceResult> renderFuture =
                 renderScreenImpl(renderArea.get(), buffer, regionSampling, grayscale, isProtected,
-                                 attachGainmap, captureResults, displayState, layers, layerFEs);
+                                 attachGainmap, captureResults, displayState, layers);
 
         if (captureListener) {
             // Defer blocking on renderFuture back to the Binder thread.
@@ -7501,10 +7493,10 @@
         const RenderArea* renderArea, const std::shared_ptr<renderengine::ExternalTexture>& buffer,
         bool regionSampling, bool grayscale, bool isProtected, bool attachGainmap,
         ScreenCaptureResults& captureResults, std::optional<OutputCompositionState>& displayState,
-        std::vector<std::pair<Layer*, sp<LayerFE>>>& layers, std::vector<sp<LayerFE>>& layerFEs) {
+        std::vector<std::pair<Layer*, sp<LayerFE>>>& layers) {
     SFTRACE_CALL();
 
-    for (auto& layerFE : layerFEs) {
+    for (auto& [_, layerFE] : layers) {
         frontend::LayerSnapshot* snapshot = layerFE->mSnapshot.get();
         captureResults.capturedSecureLayers |= (snapshot->isVisible && snapshot->isSecure);
         captureResults.capturedHdrLayers |= isHdrLayer(*snapshot);
@@ -7563,29 +7555,32 @@
     captureResults.buffer = capturedBuffer->getBuffer();
 
     ui::LayerStack layerStack{ui::DEFAULT_LAYER_STACK};
-    if (!layerFEs.empty()) {
-        const sp<LayerFE>& layerFE = layerFEs.back();
+    if (!layers.empty()) {
+        const sp<LayerFE>& layerFE = layers.back().second;
         layerStack = layerFE->getCompositionState()->outputFilter.layerStack;
     }
 
-    auto copyLayerFEs = [&layerFEs]() {
-        std::vector<sp<compositionengine::LayerFE>> ceLayerFEs;
-        ceLayerFEs.reserve(layerFEs.size());
-        for (const auto& layerFE : layerFEs) {
-            ceLayerFEs.push_back(layerFE);
-        }
-        return ceLayerFEs;
-    };
-
     auto present = [this, buffer = capturedBuffer, dataspace = captureResults.capturedDataspace,
                     sdrWhitePointNits, displayBrightnessNits, grayscale, isProtected,
-                    layerFEs = copyLayerFEs(), layerStack, regionSampling,
+                    layers = std::move(layers), layerStack, regionSampling,
                     renderArea = std::move(renderArea), renderIntent,
                     enableLocalTonemapping]() -> FenceResult {
         std::unique_ptr<compositionengine::CompositionEngine> compositionEngine =
                 mFactory.createCompositionEngine();
         compositionEngine->setRenderEngine(mRenderEngine.get());
 
+        std::vector<sp<compositionengine::LayerFE>> layerFEs;
+        layerFEs.reserve(layers.size());
+        for (auto& [layer, layerFE] : layers) {
+            // Release fences were not yet added for non-threaded render engine. To avoid
+            // deadlocks between main thread and binder threads waiting for the future fence
+            // result, fences should be added to layers in the same hop onto the main thread.
+            if (!mRenderEngine->isThreaded()) {
+                attachReleaseFenceFutureToLayer(layer, layerFE.get(), ui::INVALID_LAYER_STACK);
+            }
+            layerFEs.push_back(layerFE);
+        }
+
         compositionengine::Output::ColorProfile colorProfile{.dataspace = dataspace,
                                                              .renderIntent = renderIntent};
 
@@ -7644,8 +7639,10 @@
     // TODO(b/196334700) Once we use RenderEngineThreaded everywhere we can always defer the call
     // to CompositionEngine::present.
     ftl::SharedFuture<FenceResult> presentFuture;
-    if (FlagManager::getInstance().single_hop_screenshot() && mRenderEngine->isThreaded()) {
-        presentFuture = ftl::yield(present()).share();
+    if (FlagManager::getInstance().single_hop_screenshot()) {
+        presentFuture = mRenderEngine->isThreaded()
+                ? ftl::yield(present()).share()
+                : mScheduler->schedule(std::move(present)).share();
     } else {
         presentFuture = mRenderEngine->isThreaded() ? ftl::defer(std::move(present)).share()
                                                     : ftl::yield(present()).share();
diff --git a/services/surfaceflinger/SurfaceFlinger.h b/services/surfaceflinger/SurfaceFlinger.h
index 31218ed..114a09b 100644
--- a/services/surfaceflinger/SurfaceFlinger.h
+++ b/services/surfaceflinger/SurfaceFlinger.h
@@ -851,13 +851,14 @@
     void attachReleaseFenceFutureToLayer(Layer* layer, LayerFE* layerFE, ui::LayerStack layerStack);
 
     // Checks if a protected layer exists in a list of layers.
-    bool layersHasProtectedLayer(const std::vector<sp<LayerFE>>& layers) const;
+    bool layersHasProtectedLayer(const std::vector<std::pair<Layer*, sp<LayerFE>>>& layers) const;
 
     using OutputCompositionState = compositionengine::impl::OutputCompositionState;
 
     std::optional<OutputCompositionState> getSnapshotsFromMainThread(
             RenderAreaBuilderVariant& renderAreaBuilder,
-            GetLayerSnapshotsFunction getLayerSnapshotsFn, std::vector<sp<LayerFE>>& layerFEs);
+            GetLayerSnapshotsFunction getLayerSnapshotsFn,
+            std::vector<std::pair<Layer*, sp<LayerFE>>>& layers);
 
     void captureScreenCommon(RenderAreaBuilderVariant, GetLayerSnapshotsFunction,
                              ui::Size bufferSize, ui::PixelFormat, bool allowProtected,
@@ -866,19 +867,13 @@
     std::optional<OutputCompositionState> getDisplayStateFromRenderAreaBuilder(
             RenderAreaBuilderVariant& renderAreaBuilder) REQUIRES(kMainThreadContext);
 
-    // Legacy layer raw pointer is not safe to access outside the main thread.
-    // Creates a new vector consisting only of LayerFEs, which can be safely
-    // accessed outside the main thread.
-    std::vector<sp<LayerFE>> extractLayerFEs(
-            const std::vector<std::pair<Layer*, sp<LayerFE>>>& layers) const;
-
     ftl::SharedFuture<FenceResult> captureScreenshot(
             const RenderAreaBuilderVariant& renderAreaBuilder,
             const std::shared_ptr<renderengine::ExternalTexture>& buffer, bool regionSampling,
             bool grayscale, bool isProtected, bool attachGainmap,
             const sp<IScreenCaptureListener>& captureListener,
             std::optional<OutputCompositionState>& displayState,
-            std::vector<sp<LayerFE>>& layerFEs);
+            std::vector<std::pair<Layer*, sp<LayerFE>>>& layers);
 
     ftl::SharedFuture<FenceResult> captureScreenshotLegacy(
             RenderAreaBuilderVariant, GetLayerSnapshotsFunction,
@@ -890,8 +885,7 @@
             const RenderArea*, const std::shared_ptr<renderengine::ExternalTexture>&,
             bool regionSampling, bool grayscale, bool isProtected, bool attachGainmap,
             ScreenCaptureResults&, std::optional<OutputCompositionState>& displayState,
-            std::vector<std::pair<Layer*, sp<LayerFE>>>& layers,
-            std::vector<sp<LayerFE>>& layerFEs);
+            std::vector<std::pair<Layer*, sp<LayerFE>>>& layers);
 
     void readPersistentProperties();
 
diff --git a/services/surfaceflinger/tests/unittests/TestableSurfaceFlinger.h b/services/surfaceflinger/tests/unittests/TestableSurfaceFlinger.h
index 4dec5f6..81b929b 100644
--- a/services/surfaceflinger/tests/unittests/TestableSurfaceFlinger.h
+++ b/services/surfaceflinger/tests/unittests/TestableSurfaceFlinger.h
@@ -472,12 +472,11 @@
         ScreenCaptureResults captureResults;
         auto displayState = std::optional{display->getCompositionDisplay()->getState()};
         auto layers = getLayerSnapshotsFn();
-        auto layerFEs = mFlinger->extractLayerFEs(layers);
 
         return mFlinger->renderScreenImpl(renderArea.get(), buffer, regionSampling,
                                           false /* grayscale */, false /* isProtected */,
                                           false /* attachGainmap */, captureResults, displayState,
-                                          layers, layerFEs);
+                                          layers);
     }
 
     auto getLayerSnapshotsForScreenshotsFn(ui::LayerStack layerStack, uint32_t uid) {