Offload present to a separate thread Make Output::present() return an ftl::Future. When offloading its HWC call, present will return a future that can be waited upon. In CompositionEngine::present, store a vector of futures and wait upon the results before returning. This allow's HWC's present to run in parallel with other work; in particular, present on other displays. Waiting here ensures that post-composition work does not start until present has completed. Future work may defer this even later. Reuse HwcAsyncWorker to run present on a separate thread. Add a new variable for determining whether to run validate in parallel, since the presence of the HwcAsyncWorker could just mean that we are offloading present. Read the new DisplayCapability to determine whether a display can be offloaded to a worker thread. Only run displays in parallel if they all have the DisplayCapability. Non HWC-enabled displays without the capability do not prevent other displays from being offloaded. They also run last, since they can run in parallel with HWC-enabled displays. (The ordering is now set by SurfaceFlinger, which places physical displays at the start of the list of outputs.) When telling a display to offload its present call, make it only last for a single frame. This simplifies the code while ensuring we do not leave it enabled unnecessarily. Leave a single present call on the main thread. This saves a thread-hop, while still allowing it to run in parallel with other HWC work. Only attempt to offload present if the appropriate trunk stable flag (or debug sysprop, "debug.sf.multithreaded_present") is set. Bug: 241285491 Bug: 259132483 Test: manual: perfetto trace Test: libcompositionengine_test Change-Id: Ib9d074671e32c95875ef7e0791dd95d6e595e47a

commit: 2f60d735f5e2e77643efa8b7129aeb81f93c9008 [log] [tgz]
author: Leon Scroggins III <scroggo@google.com> Mon Sep 12 14:42:38 2022 -0400
committer: Leon Scroggins III <scroggo@google.com> Tue Nov 21 15:10:58 2023 -0500
tree: 2e14e54fafa59e5e092505a95265f094939b594a
parent: b345a2c1a20252df3c7ad4aa969a144d97362e88 [diff]
diff --git a/services/surfaceflinger/CompositionEngine/src/CompositionEngine.cpp b/services/surfaceflinger/CompositionEngine/src/CompositionEngine.cpp
index 15fadbc..9041964 100644
--- a/services/surfaceflinger/CompositionEngine/src/CompositionEngine.cpp
+++ b/services/surfaceflinger/CompositionEngine/src/CompositionEngine.cpp

@@ -20,6 +20,7 @@
 #include <compositionengine/OutputLayer.h>
 #include <compositionengine/impl/CompositionEngine.h>
 #include <compositionengine/impl/Display.h>
+#include <ui/DisplayMap.h>
 
 #include <renderengine/RenderEngine.h>
 #include <utils/Trace.h>
@@ -88,6 +89,33 @@
     return mRefreshStartTime;
 }
 
+namespace {
+int numDisplaysWithOffloadPresentSupport(const CompositionRefreshArgs& args) {
+    if (!FlagManager::getInstance().multithreaded_present() || args.outputs.size() < 2) {
+        return 0;
+    }
+
+    int numEligibleDisplays = 0;
+    // Only run present in multiple threads if all HWC-enabled displays
+    // being refreshed support it.
+    if (!std::all_of(args.outputs.begin(), args.outputs.end(),
+                     [&numEligibleDisplays](const auto& output) {
+                         if (!ftl::Optional(output->getDisplayId())
+                                      .and_then(HalDisplayId::tryCast)) {
+                             // Not HWC-enabled, so it is always
+                             // client-composited.
+                             return true;
+                         }
+                         const bool support = output->supportsOffloadPresent();
+                         numEligibleDisplays += static_cast<int>(support);
+                         return support;
+                     })) {
+        return 0;
+    }
+    return numEligibleDisplays;
+}
+} // namespace
+
 void CompositionEngine::present(CompositionRefreshArgs& args) {
     ATRACE_CALL();
     ALOGV(__FUNCTION__);
@@ -105,8 +133,36 @@
         }
     }
 
+    // Offloading the HWC call for `present` allows us to simultaneously call it
+    // on multiple displays. This is desirable because these calls block and can
+    // be slow.
+    if (const int numEligibleDisplays = numDisplaysWithOffloadPresentSupport(args);
+        numEligibleDisplays > 1) {
+        // Leave the last eligible display on the main thread, which will
+        // allow it to run concurrently without an extra thread hop.
+        int numToOffload = numEligibleDisplays - 1;
+        for (auto& output : args.outputs) {
+            if (output->supportsOffloadPresent()) {
+                output->offloadPresentNextFrame();
+                if (--numToOffload == 0) {
+                    break;
+                }
+            }
+        }
+    }
+
+    ui::DisplayVector<ftl::Future<std::monostate>> presentFutures;
     for (const auto& output : args.outputs) {
-        output->present(args);
+        presentFutures.push_back(output->present(args));
+    }
+
+    {
+        ATRACE_NAME("Waiting on HWC");
+        for (auto& future : presentFutures) {
+            // TODO(b/185536303): Call ftl::Future::wait() once it exists, since
+            // we do not need the return value of get().
+            future.get();
+        }
     }
 }
 

diff --git a/services/surfaceflinger/CompositionEngine/src/Display.cpp b/services/surfaceflinger/CompositionEngine/src/Display.cpp
index 469fb38..0475881 100644
--- a/services/surfaceflinger/CompositionEngine/src/Display.cpp
+++ b/services/surfaceflinger/CompositionEngine/src/Display.cpp

@@ -430,4 +430,13 @@
     impl::Output::finishFrame(std::move(result));
 }
 
+bool Display::supportsOffloadPresent() const {
+    if (const auto halDisplayId = HalDisplayId::tryCast(mId)) {
+        const auto& hwc = getCompositionEngine().getHwComposer();
+        return hwc.hasDisplayCapability(*halDisplayId, DisplayCapability::MULTI_THREADED_PRESENT);
+    }
+
+    return false;
+}
+
 } // namespace android::compositionengine::impl

diff --git a/services/surfaceflinger/CompositionEngine/src/Output.cpp b/services/surfaceflinger/CompositionEngine/src/Output.cpp
index 2ae80de..e4d7578 100644
--- a/services/surfaceflinger/CompositionEngine/src/Output.cpp
+++ b/services/surfaceflinger/CompositionEngine/src/Output.cpp

@@ -427,7 +427,8 @@
     uncacheBuffers(refreshArgs.bufferIdsToUncache);
 }
 
-void Output::present(const compositionengine::CompositionRefreshArgs& refreshArgs) {
+ftl::Future<std::monostate> Output::present(
+        const compositionengine::CompositionRefreshArgs& refreshArgs) {
     ATRACE_FORMAT("%s for %s", __func__, mNamePlusId.c_str());
     ALOGV(__FUNCTION__);
 
@@ -448,8 +449,26 @@
 
     devOptRepaintFlash(refreshArgs);
     finishFrame(std::move(result));
-    presentFrameAndReleaseLayers();
+    ftl::Future<std::monostate> future;
+    if (mOffloadPresent) {
+        future = presentFrameAndReleaseLayersAsync();
+
+        // Only offload for this frame. The next frame will determine whether it
+        // needs to be offloaded. Leave the HwcAsyncWorker in place. For one thing,
+        // it is currently presenting. Further, it may be needed next frame, and
+        // we don't want to churn.
+        mOffloadPresent = false;
+    } else {
+        presentFrameAndReleaseLayers();
+        future = ftl::yield<std::monostate>({});
+    }
     renderCachedSets(refreshArgs);
+    return future;
+}
+
+void Output::offloadPresentNextFrame() {
+    mOffloadPresent = true;
+    updateHwcAsyncWorker();
 }
 
 void Output::uncacheBuffers(std::vector<uint64_t> const& bufferIdsToUncache) {
@@ -1084,6 +1103,14 @@
     finishPrepareFrame();
 }
 
+ftl::Future<std::monostate> Output::presentFrameAndReleaseLayersAsync() {
+    return ftl::Future<bool>(std::move(mHwComposerAsyncWorker->send([&]() {
+               presentFrameAndReleaseLayers();
+               return true;
+           })))
+            .then([](bool) { return std::monostate{}; });
+}
+
 std::future<bool> Output::chooseCompositionStrategyAsync(
         std::optional<android::HWComposer::DeviceRequestedChanges>* changes) {
     return mHwComposerAsyncWorker->send(
@@ -1600,8 +1627,15 @@
 }
 
 void Output::setPredictCompositionStrategy(bool predict) {
-    if (predict) {
-        mHwComposerAsyncWorker = std::make_unique<HwcAsyncWorker>();
+    mPredictCompositionStrategy = predict;
+    updateHwcAsyncWorker();
+}
+
+void Output::updateHwcAsyncWorker() {
+    if (mPredictCompositionStrategy || mOffloadPresent) {
+        if (!mHwComposerAsyncWorker) {
+            mHwComposerAsyncWorker = std::make_unique<HwcAsyncWorker>();
+        }
     } else {
         mHwComposerAsyncWorker.reset(nullptr);
     }
@@ -1616,7 +1650,7 @@
     uint64_t outputLayerHash = getState().outputLayerHash;
     editState().lastOutputLayerHash = outputLayerHash;
 
-    if (!getState().isEnabled || !mHwComposerAsyncWorker) {
+    if (!getState().isEnabled || !mPredictCompositionStrategy) {
         ALOGV("canPredictCompositionStrategy disabled");
         return false;
     }
commit	2f60d735f5e2e77643efa8b7129aeb81f93c9008	[log] [tgz]
author	Leon Scroggins III <scroggo@google.com>	Mon Sep 12 14:42:38 2022 -0400
committer	Leon Scroggins III <scroggo@google.com>	Tue Nov 21 15:10:58 2023 -0500
tree	2e14e54fafa59e5e092505a95265f094939b594a
parent	b345a2c1a20252df3c7ad4aa969a144d97362e88 [diff]