FTL: Yield futures without overhead

ftl::yield, which lifts T to std::future<T>, incurs the cost of
allocating, ref counting, and locking the latter's shared state.

Consolidate the existing std::future extensions into ftl::Future,
and optimize ftl::yield by including static storage for T within.

Bug: 232436803
Test: simpleperf (-31% cycles in postFramebuffer)
Change-Id: I9a7ca7de17e7af10515de97d2f6a0dfa24e35d7a
diff --git a/services/surfaceflinger/SurfaceFlinger.cpp b/services/surfaceflinger/SurfaceFlinger.cpp
index d8a2696..d333ff8 100644
--- a/services/surfaceflinger/SurfaceFlinger.cpp
+++ b/services/surfaceflinger/SurfaceFlinger.cpp
@@ -1693,7 +1693,7 @@
     }
 
     const char* const whence = __func__;
-    return ftl::chain(mScheduler->schedule([=]() FTL_FAKE_GUARD(mStateLock) {
+    return ftl::Future(mScheduler->schedule([=]() FTL_FAKE_GUARD(mStateLock) {
                if (const auto display = getDisplayDeviceLocked(displayToken)) {
                    const bool supportsDisplayBrightnessCommand =
                            getHwComposer().getComposer()->isSupported(
@@ -1731,7 +1731,7 @@
                    return ftl::yield<status_t>(NAME_NOT_FOUND);
                }
            }))
-            .then([](std::future<status_t> task) { return task; })
+            .then([](ftl::Future<status_t> task) { return task; })
             .get();
 }
 
@@ -6577,7 +6577,7 @@
     return fenceStatus(future.get());
 }
 
-std::shared_future<FenceResult> SurfaceFlinger::captureScreenCommon(
+ftl::SharedFuture<FenceResult> SurfaceFlinger::captureScreenCommon(
         RenderAreaFuture renderAreaFuture, TraverseLayersFunction traverseLayers,
         ui::Size bufferSize, ui::PixelFormat reqPixelFormat, bool allowProtected, bool grayscale,
         const sp<IScreenCaptureListener>& captureListener) {
@@ -6629,7 +6629,7 @@
                                false /* regionSampling */, grayscale, captureListener);
 }
 
-std::shared_future<FenceResult> SurfaceFlinger::captureScreenCommon(
+ftl::SharedFuture<FenceResult> SurfaceFlinger::captureScreenCommon(
         RenderAreaFuture renderAreaFuture, TraverseLayersFunction traverseLayers,
         const std::shared_ptr<renderengine::ExternalTexture>& buffer, bool regionSampling,
         bool grayscale, const sp<IScreenCaptureListener>& captureListener) {
@@ -6638,7 +6638,7 @@
     bool canCaptureBlackoutContent = hasCaptureBlackoutContentPermission();
 
     auto future = mScheduler->schedule([=, renderAreaFuture = std::move(renderAreaFuture)]() mutable
-                                       -> std::shared_future<FenceResult> {
+                                       -> ftl::SharedFuture<FenceResult> {
         ScreenCaptureResults captureResults;
         std::unique_ptr<RenderArea> renderArea = renderAreaFuture.get();
         if (!renderArea) {
@@ -6648,7 +6648,7 @@
             return ftl::yield<FenceResult>(base::unexpected(NO_ERROR)).share();
         }
 
-        std::shared_future<FenceResult> renderFuture;
+        ftl::SharedFuture<FenceResult> renderFuture;
         renderArea->render([&] {
             renderFuture =
                     renderScreenImpl(*renderArea, traverseLayers, buffer, canCaptureBlackoutContent,
@@ -6675,15 +6675,14 @@
     }
 
     // Flatten nested futures.
-    std::future<FenceResult> chain =
-            ftl::chain(std::move(future)).then([](std::shared_future<FenceResult> future) {
-                return future.get();
-            });
+    auto chain = ftl::Future(std::move(future)).then([](ftl::SharedFuture<FenceResult> future) {
+        return future;
+    });
 
     return chain.share();
 }
 
-std::shared_future<FenceResult> SurfaceFlinger::renderScreenImpl(
+ftl::SharedFuture<FenceResult> SurfaceFlinger::renderScreenImpl(
         const RenderArea& renderArea, TraverseLayersFunction traverseLayers,
         const std::shared_ptr<renderengine::ExternalTexture>& buffer,
         bool canCaptureBlackoutContent, bool regionSampling, bool grayscale,
@@ -6824,10 +6823,10 @@
     getRenderEngine().useProtectedContext(useProtected);
 
     constexpr bool kUseFramebufferCache = false;
-    std::future<FenceResult> chain =
-            ftl::chain(getRenderEngine().drawLayers(clientCompositionDisplay,
-                                                    clientRenderEngineLayers, buffer,
-                                                    kUseFramebufferCache, std::move(bufferFence)))
+    auto chain =
+            ftl::Future(getRenderEngine().drawLayers(clientCompositionDisplay,
+                                                     clientRenderEngineLayers, buffer,
+                                                     kUseFramebufferCache, std::move(bufferFence)))
                     .then(&toFenceResult);
 
     const auto future = chain.share();