FTL: Yield futures without overhead

ftl::yield, which lifts T to std::future<T>, incurs the cost of
allocating, ref counting, and locking the latter's shared state.

Consolidate the existing std::future extensions into ftl::Future,
and optimize ftl::yield by including static storage for T within.

Bug: 232436803
Test: simpleperf (-31% cycles in postFramebuffer)
Change-Id: I9a7ca7de17e7af10515de97d2f6a0dfa24e35d7a
diff --git a/include/ftl/details/future.h b/include/ftl/details/future.h
new file mode 100644
index 0000000..df1323e
--- /dev/null
+++ b/include/ftl/details/future.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright 2022 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+namespace android::ftl {
+
+template <typename, template <typename> class>
+class Future;
+
+namespace details {
+
+template <typename T>
+struct future_result {
+  using type = T;
+};
+
+template <typename T>
+struct future_result<std::future<T>> {
+  using type = T;
+};
+
+template <typename T>
+struct future_result<std::shared_future<T>> {
+  using type = T;
+};
+
+template <typename T, template <typename> class FutureImpl>
+struct future_result<Future<T, FutureImpl>> {
+  using type = T;
+};
+
+template <typename T>
+using future_result_t = typename future_result<T>::type;
+
+struct ValueTag {};
+
+template <typename, typename T, template <typename> class>
+class BaseFuture;
+
+template <typename Self, typename T>
+class BaseFuture<Self, T, std::future> {
+  using Impl = std::future<T>;
+
+ public:
+  Future<T, std::shared_future> share() {
+    if (T* value = std::get_if<T>(&self())) {
+      return {ValueTag{}, std::move(*value)};
+    }
+
+    return std::get<Impl>(self()).share();
+  }
+
+ protected:
+  T get() {
+    if (T* value = std::get_if<T>(&self())) {
+      return std::move(*value);
+    }
+
+    return std::get<Impl>(self()).get();
+  }
+
+ private:
+  auto& self() { return static_cast<Self&>(*this).future_; }
+};
+
+template <typename Self, typename T>
+class BaseFuture<Self, T, std::shared_future> {
+  using Impl = std::shared_future<T>;
+
+ protected:
+  const T& get() const {
+    if (const T* value = std::get_if<T>(&self())) {
+      return *value;
+    }
+
+    return std::get<Impl>(self()).get();
+  }
+
+ private:
+  const auto& self() const { return static_cast<const Self&>(*this).future_; }
+};
+
+}  // namespace details
+}  // namespace android::ftl
diff --git a/include/ftl/future.h b/include/ftl/future.h
index dd6358f..c78f9b7 100644
--- a/include/ftl/future.h
+++ b/include/ftl/future.h
@@ -19,18 +19,87 @@
 #include <future>
 #include <type_traits>
 #include <utility>
+#include <variant>
+
+#include <ftl/details/future.h>
 
 namespace android::ftl {
 
-// Creates a future that defers a function call until its result is queried.
+// Thin wrapper around FutureImpl<T> (concretely std::future<T> or std::shared_future<T>) with
+// extensions for pure values (created via ftl::yield) and continuations.
 //
-//   auto future = ftl::defer([](int x) { return x + 1; }, 99);
-//   assert(future.get() == 100);
+// See also SharedFuture<T> shorthand below.
 //
-template <typename F, typename... Args>
-inline auto defer(F&& f, Args&&... args) {
-  return std::async(std::launch::deferred, std::forward<F>(f), std::forward<Args>(args)...);
-}
+template <typename T, template <typename> class FutureImpl = std::future>
+class Future final : public details::BaseFuture<Future<T, FutureImpl>, T, FutureImpl> {
+  using Base = details::BaseFuture<Future, T, FutureImpl>;
+
+  friend Base;                                            // For BaseFuture<...>::self.
+  friend details::BaseFuture<Future<T>, T, std::future>;  // For BaseFuture<...>::share.
+
+ public:
+  // Constructs an invalid future.
+  Future() : future_(std::in_place_type<FutureImpl<T>>) {}
+
+  // Constructs a future from its standard counterpart, implicitly.
+  Future(FutureImpl<T>&& f) : future_(std::move(f)) {}
+
+  bool valid() const {
+    return std::holds_alternative<T>(future_) || std::get<FutureImpl<T>>(future_).valid();
+  }
+
+  // Forwarding functions. Base::share is only defined when FutureImpl is std::future, whereas the
+  // following are defined for either FutureImpl:
+  using Base::get;
+
+  // Attaches a continuation to the future. The continuation is a function that maps T to either R
+  // or ftl::Future<R>. In the former case, the chain wraps the result in a future as if by
+  // ftl::yield.
+  //
+  //   auto future = ftl::yield(123);
+  //   ftl::Future<char> futures[] = {ftl::yield('a'), ftl::yield('b')};
+  //
+  //   auto chain =
+  //       ftl::Future(std::move(future))
+  //           .then([](int x) { return static_cast<std::size_t>(x % 2); })
+  //           .then([&futures](std::size_t i) { return std::move(futures[i]); });
+  //
+  //   assert(chain.get() == 'b');
+  //
+  template <typename F, typename R = std::invoke_result_t<F, T>>
+  auto then(F&& op) && -> Future<details::future_result_t<R>> {
+    return defer(
+        [](auto&& f, F&& op) {
+          R r = op(f.get());
+          if constexpr (std::is_same_v<R, details::future_result_t<R>>) {
+            return r;
+          } else {
+            return r.get();
+          }
+        },
+        std::move(*this), std::forward<F>(op));
+  }
+
+ private:
+  template <typename V>
+  friend Future<V> yield(V&&);
+
+  template <typename V, typename... Args>
+  friend Future<V> yield(Args&&...);
+
+  template <typename... Args>
+  Future(details::ValueTag, Args&&... args)
+      : future_(std::in_place_type<T>, std::forward<Args>(args)...) {}
+
+  std::variant<T, FutureImpl<T>> future_;
+};
+
+template <typename T>
+using SharedFuture = Future<T, std::shared_future>;
+
+// Deduction guide for implicit conversion.
+template <typename T, template <typename> class FutureImpl>
+Future(FutureImpl<T>&&) -> Future<T, FutureImpl>;
 
 // Creates a future that wraps a value.
 //
@@ -41,69 +110,24 @@
 //   auto future = ftl::yield(std::move(ptr));
 //   assert(*future.get() == '!');
 //
-template <typename T>
-inline std::future<T> yield(T&& v) {
-  return defer([](T&& v) { return std::forward<T>(v); }, std::forward<T>(v));
+template <typename V>
+inline Future<V> yield(V&& value) {
+  return {details::ValueTag{}, std::move(value)};
 }
 
-namespace details {
+template <typename V, typename... Args>
+inline Future<V> yield(Args&&... args) {
+  return {details::ValueTag{}, std::forward<Args>(args)...};
+}
 
-template <typename T>
-struct future_result {
-  using type = T;
-};
-
-template <typename T>
-struct future_result<std::future<T>> {
-  using type = T;
-};
-
-template <typename T>
-using future_result_t = typename future_result<T>::type;
-
-// Attaches a continuation to a future. The continuation is a function that maps T to either R or
-// std::future<R>. In the former case, the chain wraps the result in a future as if by ftl::yield.
+// Creates a future that defers a function call until its result is queried.
 //
-//   auto future = ftl::yield(123);
-//   std::future<char> futures[] = {ftl::yield('a'), ftl::yield('b')};
+//   auto future = ftl::defer([](int x) { return x + 1; }, 99);
+//   assert(future.get() == 100);
 //
-//   std::future<char> chain =
-//       ftl::chain(std::move(future))
-//           .then([](int x) { return static_cast<std::size_t>(x % 2); })
-//           .then([&futures](std::size_t i) { return std::move(futures[i]); });
-//
-//   assert(chain.get() == 'b');
-//
-template <typename T>
-struct Chain {
-  // Implicit conversion.
-  Chain(std::future<T>&& f) : future(std::move(f)) {}
-  operator std::future<T>&&() && { return std::move(future); }
-
-  T get() && { return future.get(); }
-
-  template <typename F, typename R = std::invoke_result_t<F, T>>
-  auto then(F&& op) && -> Chain<future_result_t<R>> {
-    return defer(
-        [](auto&& f, F&& op) {
-          R r = op(f.get());
-          if constexpr (std::is_same_v<R, future_result_t<R>>) {
-            return r;
-          } else {
-            return r.get();
-          }
-        },
-        std::move(future), std::forward<F>(op));
-  }
-
-  std::future<T> future;
-};
-
-}  // namespace details
-
-template <typename T>
-inline auto chain(std::future<T>&& f) -> details::Chain<T> {
-  return std::move(f);
+template <typename F, typename... Args>
+inline auto defer(F&& f, Args&&... args) {
+  return Future(std::async(std::launch::deferred, std::forward<F>(f), std::forward<Args>(args)...));
 }
 
 }  // namespace android::ftl
diff --git a/libs/ftl/future_test.cpp b/libs/ftl/future_test.cpp
index 9b3e936..5a245b6 100644
--- a/libs/ftl/future_test.cpp
+++ b/libs/ftl/future_test.cpp
@@ -42,9 +42,9 @@
   }
   {
     auto future = ftl::yield(123);
-    std::future<char> futures[] = {ftl::yield('a'), ftl::yield('b')};
+    ftl::Future<char> futures[] = {ftl::yield('a'), ftl::yield('b')};
 
-    std::future<char> chain = ftl::chain(std::move(future))
+    ftl::Future<char> chain = ftl::Future(std::move(future))
                                   .then([](int x) { return static_cast<size_t>(x % 2); })
                                   .then([&futures](size_t i) { return std::move(futures[i]); });
 
@@ -71,7 +71,7 @@
     return ByteVector{str.begin(), str.end()};
   });
 
-  std::packaged_task<std::future<ByteVector>(ByteVector)> decrement_bytes(
+  std::packaged_task<ftl::Future<ByteVector>(ByteVector)> decrement_bytes(
       [](ByteVector bytes) { return ftl::defer(decrement, std::move(bytes)); });
 
   auto fetch = fetch_string.get_future();
@@ -81,7 +81,7 @@
 
   EXPECT_EQ(
       "hello, world",
-      ftl::chain(std::move(fetch))
+      ftl::Future(std::move(fetch))
           .then([](const char* str) { return std::string(str); })
           .then([&](std::string str) {
             auto append = append_string.get_future();
@@ -93,7 +93,7 @@
             decrement_thread = std::thread(std::move(decrement_bytes), std::move(bytes));
             return decrement;
           })
-          .then([](std::future<ByteVector> bytes) { return bytes; })
+          .then([](ftl::Future<ByteVector> bytes) { return bytes; })
           .then([](const ByteVector& bytes) { return std::string(bytes.begin(), bytes.end()); })
           .get());
 
diff --git a/services/surfaceflinger/BufferQueueLayer.cpp b/services/surfaceflinger/BufferQueueLayer.cpp
index 6a1a38b..00b7085 100644
--- a/services/surfaceflinger/BufferQueueLayer.cpp
+++ b/services/surfaceflinger/BufferQueueLayer.cpp
@@ -48,7 +48,7 @@
 // Interface implementation for Layer
 // -----------------------------------------------------------------------
 
-void BufferQueueLayer::onLayerDisplayed(std::shared_future<FenceResult> futureFenceResult) {
+void BufferQueueLayer::onLayerDisplayed(ftl::SharedFuture<FenceResult> futureFenceResult) {
     const sp<Fence> releaseFence = futureFenceResult.get().value_or(Fence::NO_FENCE);
     mConsumer->setReleaseFence(releaseFence);
 
diff --git a/services/surfaceflinger/BufferQueueLayer.h b/services/surfaceflinger/BufferQueueLayer.h
index 4587b5e..6038083 100644
--- a/services/surfaceflinger/BufferQueueLayer.h
+++ b/services/surfaceflinger/BufferQueueLayer.h
@@ -42,7 +42,7 @@
     // Implements Layer.
     const char* getType() const override { return "BufferQueueLayer"; }
 
-    void onLayerDisplayed(std::shared_future<FenceResult>) override;
+    void onLayerDisplayed(ftl::SharedFuture<FenceResult>) override;
 
     // If a buffer was replaced this frame, release the former buffer
     void releasePendingBuffer(nsecs_t dequeueReadyTime) override;
diff --git a/services/surfaceflinger/BufferStateLayer.cpp b/services/surfaceflinger/BufferStateLayer.cpp
index c885110..b424a4a 100644
--- a/services/surfaceflinger/BufferStateLayer.cpp
+++ b/services/surfaceflinger/BufferStateLayer.cpp
@@ -73,7 +73,7 @@
 // -----------------------------------------------------------------------
 // Interface implementation for Layer
 // -----------------------------------------------------------------------
-void BufferStateLayer::onLayerDisplayed(std::shared_future<FenceResult> futureFenceResult) {
+void BufferStateLayer::onLayerDisplayed(ftl::SharedFuture<FenceResult> futureFenceResult) {
     // If we are displayed on multiple displays in a single composition cycle then we would
     // need to do careful tracking to enable the use of the mLastClientCompositionFence.
     //  For example we can only use it if all the displays are client comp, and we need
diff --git a/services/surfaceflinger/BufferStateLayer.h b/services/surfaceflinger/BufferStateLayer.h
index cc510d8..bce36d8 100644
--- a/services/surfaceflinger/BufferStateLayer.h
+++ b/services/surfaceflinger/BufferStateLayer.h
@@ -39,7 +39,7 @@
     // Implements Layer.
     const char* getType() const override { return "BufferStateLayer"; }
 
-    void onLayerDisplayed(std::shared_future<FenceResult>) override;
+    void onLayerDisplayed(ftl::SharedFuture<FenceResult>) override;
 
     void releasePendingBuffer(nsecs_t dequeueReadyTime) override;
 
diff --git a/services/surfaceflinger/CompositionEngine/include/compositionengine/LayerFE.h b/services/surfaceflinger/CompositionEngine/include/compositionengine/LayerFE.h
index b7fc62f..ec610c1 100644
--- a/services/surfaceflinger/CompositionEngine/include/compositionengine/LayerFE.h
+++ b/services/surfaceflinger/CompositionEngine/include/compositionengine/LayerFE.h
@@ -16,7 +16,6 @@
 
 #pragma once
 
-#include <future>
 #include <optional>
 #include <ostream>
 #include <unordered_set>
@@ -33,6 +32,7 @@
 // TODO(b/129481165): remove the #pragma below and fix conversion issues
 #pragma clang diagnostic pop // ignored "-Wconversion -Wextra"
 
+#include <ftl/future.h>
 #include <utils/RefBase.h>
 #include <utils/Timers.h>
 
@@ -157,7 +157,7 @@
             ClientCompositionTargetSettings&) = 0;
 
     // Called after the layer is displayed to update the presentation fence
-    virtual void onLayerDisplayed(std::shared_future<FenceResult>) = 0;
+    virtual void onLayerDisplayed(ftl::SharedFuture<FenceResult>) = 0;
 
     // Gets some kind of identifier for the layer for debug purposes.
     virtual const char* getDebugName() const = 0;
diff --git a/services/surfaceflinger/CompositionEngine/include/compositionengine/mock/LayerFE.h b/services/surfaceflinger/CompositionEngine/include/compositionengine/mock/LayerFE.h
index 871599d..1c5c10f 100644
--- a/services/surfaceflinger/CompositionEngine/include/compositionengine/mock/LayerFE.h
+++ b/services/surfaceflinger/CompositionEngine/include/compositionengine/mock/LayerFE.h
@@ -49,7 +49,7 @@
                  std::vector<compositionengine::LayerFE::LayerSettings>(
                          compositionengine::LayerFE::ClientCompositionTargetSettings&));
 
-    MOCK_METHOD1(onLayerDisplayed, void(std::shared_future<FenceResult>));
+    MOCK_METHOD(void, onLayerDisplayed, (ftl::SharedFuture<FenceResult>), (override));
 
     MOCK_CONST_METHOD0(getDebugName, const char*());
     MOCK_CONST_METHOD0(getSequence, int32_t());
diff --git a/services/surfaceflinger/CompositionEngine/tests/MockHWComposer.h b/services/surfaceflinger/CompositionEngine/tests/MockHWComposer.h
index ff2aa15..9b12b08 100644
--- a/services/surfaceflinger/CompositionEngine/tests/MockHWComposer.h
+++ b/services/surfaceflinger/CompositionEngine/tests/MockHWComposer.h
@@ -84,9 +84,9 @@
     MOCK_METHOD4(setDisplayContentSamplingEnabled, status_t(HalDisplayId, bool, uint8_t, uint64_t));
     MOCK_METHOD4(getDisplayedContentSample,
                  status_t(HalDisplayId, uint64_t, uint64_t, DisplayedFrameStats*));
-    MOCK_METHOD4(setDisplayBrightness,
-                 std::future<status_t>(PhysicalDisplayId, float, float,
-                                       const Hwc2::Composer::DisplayBrightnessOptions&));
+    MOCK_METHOD(ftl::Future<status_t>, setDisplayBrightness,
+                (PhysicalDisplayId, float, float, const Hwc2::Composer::DisplayBrightnessOptions&),
+                (override));
     MOCK_METHOD2(getDisplayBrightnessSupport, status_t(PhysicalDisplayId, bool*));
 
     MOCK_METHOD2(onHotplug,
diff --git a/services/surfaceflinger/CompositionEngine/tests/OutputTest.cpp b/services/surfaceflinger/CompositionEngine/tests/OutputTest.cpp
index 784abea..deaea87 100644
--- a/services/surfaceflinger/CompositionEngine/tests/OutputTest.cpp
+++ b/services/surfaceflinger/CompositionEngine/tests/OutputTest.cpp
@@ -3189,15 +3189,15 @@
     // would not survive certain calls like Fence::merge() which would return a
     // new instance.
     EXPECT_CALL(*mLayer1.layerFE, onLayerDisplayed(_))
-            .WillOnce([&layer1Fence](std::shared_future<FenceResult> futureFenceResult) {
+            .WillOnce([&layer1Fence](ftl::SharedFuture<FenceResult> futureFenceResult) {
                 EXPECT_EQ(FenceResult(layer1Fence), futureFenceResult.get());
             });
     EXPECT_CALL(*mLayer2.layerFE, onLayerDisplayed(_))
-            .WillOnce([&layer2Fence](std::shared_future<FenceResult> futureFenceResult) {
+            .WillOnce([&layer2Fence](ftl::SharedFuture<FenceResult> futureFenceResult) {
                 EXPECT_EQ(FenceResult(layer2Fence), futureFenceResult.get());
             });
     EXPECT_CALL(*mLayer3.layerFE, onLayerDisplayed(_))
-            .WillOnce([&layer3Fence](std::shared_future<FenceResult> futureFenceResult) {
+            .WillOnce([&layer3Fence](ftl::SharedFuture<FenceResult> futureFenceResult) {
                 EXPECT_EQ(FenceResult(layer3Fence), futureFenceResult.get());
             });
 
@@ -3256,15 +3256,15 @@
 
     // Each released layer should be given the presentFence.
     EXPECT_CALL(*releasedLayer1, onLayerDisplayed(_))
-            .WillOnce([&presentFence](std::shared_future<FenceResult> futureFenceResult) {
+            .WillOnce([&presentFence](ftl::SharedFuture<FenceResult> futureFenceResult) {
                 EXPECT_EQ(FenceResult(presentFence), futureFenceResult.get());
             });
     EXPECT_CALL(*releasedLayer2, onLayerDisplayed(_))
-            .WillOnce([&presentFence](std::shared_future<FenceResult> futureFenceResult) {
+            .WillOnce([&presentFence](ftl::SharedFuture<FenceResult> futureFenceResult) {
                 EXPECT_EQ(FenceResult(presentFence), futureFenceResult.get());
             });
     EXPECT_CALL(*releasedLayer3, onLayerDisplayed(_))
-            .WillOnce([&presentFence](std::shared_future<FenceResult> futureFenceResult) {
+            .WillOnce([&presentFence](ftl::SharedFuture<FenceResult> futureFenceResult) {
                 EXPECT_EQ(FenceResult(presentFence), futureFenceResult.get());
             });
 
diff --git a/services/surfaceflinger/DisplayHardware/HWC2.cpp b/services/surfaceflinger/DisplayHardware/HWC2.cpp
index adf4be3..c52e96d 100644
--- a/services/surfaceflinger/DisplayHardware/HWC2.cpp
+++ b/services/surfaceflinger/DisplayHardware/HWC2.cpp
@@ -27,7 +27,6 @@
 #include "HWC2.h"
 
 #include <android/configuration.h>
-#include <ftl/future.h>
 #include <ui/Fence.h>
 #include <ui/FloatRect.h>
 #include <ui/GraphicBuffer.h>
@@ -543,7 +542,7 @@
     return error;
 }
 
-std::future<Error> Display::setDisplayBrightness(
+ftl::Future<Error> Display::setDisplayBrightness(
         float brightness, float brightnessNits,
         const Hwc2::Composer::DisplayBrightnessOptions& options) {
     return ftl::defer([composer = &mComposer, id = mId, brightness, brightnessNits, options] {
diff --git a/services/surfaceflinger/DisplayHardware/HWC2.h b/services/surfaceflinger/DisplayHardware/HWC2.h
index cca20bd..24aef9b 100644
--- a/services/surfaceflinger/DisplayHardware/HWC2.h
+++ b/services/surfaceflinger/DisplayHardware/HWC2.h
@@ -18,6 +18,7 @@
 
 #include <android-base/expected.h>
 #include <android-base/thread_annotations.h>
+#include <ftl/future.h>
 #include <gui/HdrMetadata.h>
 #include <math/mat4.h>
 #include <ui/HdrCapabilities.h>
@@ -28,7 +29,6 @@
 #include <utils/Timers.h>
 
 #include <functional>
-#include <future>
 #include <string>
 #include <unordered_map>
 #include <unordered_set>
@@ -147,7 +147,7 @@
                                                        uint32_t* outNumRequests,
                                                        android::sp<android::Fence>* outPresentFence,
                                                        uint32_t* state) = 0;
-    [[nodiscard]] virtual std::future<hal::Error> setDisplayBrightness(
+    [[nodiscard]] virtual ftl::Future<hal::Error> setDisplayBrightness(
             float brightness, float brightnessNits,
             const Hwc2::Composer::DisplayBrightnessOptions& options) = 0;
     [[nodiscard]] virtual hal::Error setActiveConfigWithConstraints(
@@ -229,7 +229,7 @@
                                  uint32_t* outNumRequests,
                                  android::sp<android::Fence>* outPresentFence,
                                  uint32_t* state) override;
-    std::future<hal::Error> setDisplayBrightness(
+    ftl::Future<hal::Error> setDisplayBrightness(
             float brightness, float brightnessNits,
             const Hwc2::Composer::DisplayBrightnessOptions& options) override;
     hal::Error setActiveConfigWithConstraints(hal::HWConfigId configId,
diff --git a/services/surfaceflinger/DisplayHardware/HWComposer.cpp b/services/surfaceflinger/DisplayHardware/HWComposer.cpp
index 79e4c75..0da8ece 100644
--- a/services/surfaceflinger/DisplayHardware/HWComposer.cpp
+++ b/services/surfaceflinger/DisplayHardware/HWComposer.cpp
@@ -30,7 +30,6 @@
 #include <compositionengine/Output.h>
 #include <compositionengine/OutputLayer.h>
 #include <compositionengine/impl/OutputLayerCompositionState.h>
-#include <ftl/future.h>
 #include <log/log.h>
 #include <ui/DebugUtils.h>
 #include <ui/GraphicBuffer.h>
@@ -720,13 +719,13 @@
     return NO_ERROR;
 }
 
-std::future<status_t> HWComposer::setDisplayBrightness(
+ftl::Future<status_t> HWComposer::setDisplayBrightness(
         PhysicalDisplayId displayId, float brightness, float brightnessNits,
         const Hwc2::Composer::DisplayBrightnessOptions& options) {
     RETURN_IF_INVALID_DISPLAY(displayId, ftl::yield<status_t>(BAD_INDEX));
     auto& display = mDisplayData[displayId].hwcDisplay;
 
-    return ftl::chain(display->setDisplayBrightness(brightness, brightnessNits, options))
+    return display->setDisplayBrightness(brightness, brightnessNits, options)
             .then([displayId](hal::Error error) -> status_t {
                 if (error == hal::Error::UNSUPPORTED) {
                     RETURN_IF_HWC_ERROR(error, displayId, INVALID_OPERATION);
diff --git a/services/surfaceflinger/DisplayHardware/HWComposer.h b/services/surfaceflinger/DisplayHardware/HWComposer.h
index 7dc10ea..4c0ecd8 100644
--- a/services/surfaceflinger/DisplayHardware/HWComposer.h
+++ b/services/surfaceflinger/DisplayHardware/HWComposer.h
@@ -17,7 +17,6 @@
 #pragma once
 
 #include <cstdint>
-#include <future>
 #include <memory>
 #include <mutex>
 #include <optional>
@@ -26,6 +25,7 @@
 #include <vector>
 
 #include <android-base/thread_annotations.h>
+#include <ftl/future.h>
 #include <ui/DisplayIdentification.h>
 #include <ui/FenceTime.h>
 
@@ -195,7 +195,7 @@
                                                DisplayedFrameStats* outStats) = 0;
 
     // Sets the brightness of a display.
-    virtual std::future<status_t> setDisplayBrightness(
+    virtual ftl::Future<status_t> setDisplayBrightness(
             PhysicalDisplayId, float brightness, float brightnessNits,
             const Hwc2::Composer::DisplayBrightnessOptions&) = 0;
 
@@ -372,7 +372,7 @@
                                               uint64_t maxFrames) override;
     status_t getDisplayedContentSample(HalDisplayId, uint64_t maxFrames, uint64_t timestamp,
                                        DisplayedFrameStats* outStats) override;
-    std::future<status_t> setDisplayBrightness(
+    ftl::Future<status_t> setDisplayBrightness(
             PhysicalDisplayId, float brightness, float brightnessNits,
             const Hwc2::Composer::DisplayBrightnessOptions&) override;
 
diff --git a/services/surfaceflinger/Layer.cpp b/services/surfaceflinger/Layer.cpp
index 2298f03..a4f25e2 100644
--- a/services/surfaceflinger/Layer.cpp
+++ b/services/surfaceflinger/Layer.cpp
@@ -213,7 +213,7 @@
  * Layer.  So, the implementation is done in BufferLayer.  When called on a
  * EffectLayer object, it's essentially a NOP.
  */
-void Layer::onLayerDisplayed(std::shared_future<FenceResult>) {}
+void Layer::onLayerDisplayed(ftl::SharedFuture<FenceResult>) {}
 
 void Layer::removeRelativeZ(const std::vector<Layer*>& layersInTree) {
     if (mDrawingState.zOrderRelativeOf == nullptr) {
diff --git a/services/surfaceflinger/Layer.h b/services/surfaceflinger/Layer.h
index 1007043..7475510 100644
--- a/services/surfaceflinger/Layer.h
+++ b/services/surfaceflinger/Layer.h
@@ -620,7 +620,7 @@
     void prepareCompositionState(compositionengine::LayerFE::StateSubset subset) override;
     std::vector<compositionengine::LayerFE::LayerSettings> prepareClientCompositionList(
             compositionengine::LayerFE::ClientCompositionTargetSettings&) override;
-    void onLayerDisplayed(std::shared_future<FenceResult>) override;
+    void onLayerDisplayed(ftl::SharedFuture<FenceResult>) override;
 
     void setWasClientComposed(const sp<Fence>& fence) override {
         mLastClientCompositionFence = fence;
diff --git a/services/surfaceflinger/SurfaceFlinger.cpp b/services/surfaceflinger/SurfaceFlinger.cpp
index d8a2696..d333ff8 100644
--- a/services/surfaceflinger/SurfaceFlinger.cpp
+++ b/services/surfaceflinger/SurfaceFlinger.cpp
@@ -1693,7 +1693,7 @@
     }
 
     const char* const whence = __func__;
-    return ftl::chain(mScheduler->schedule([=]() FTL_FAKE_GUARD(mStateLock) {
+    return ftl::Future(mScheduler->schedule([=]() FTL_FAKE_GUARD(mStateLock) {
                if (const auto display = getDisplayDeviceLocked(displayToken)) {
                    const bool supportsDisplayBrightnessCommand =
                            getHwComposer().getComposer()->isSupported(
@@ -1731,7 +1731,7 @@
                    return ftl::yield<status_t>(NAME_NOT_FOUND);
                }
            }))
-            .then([](std::future<status_t> task) { return task; })
+            .then([](ftl::Future<status_t> task) { return task; })
             .get();
 }
 
@@ -6577,7 +6577,7 @@
     return fenceStatus(future.get());
 }
 
-std::shared_future<FenceResult> SurfaceFlinger::captureScreenCommon(
+ftl::SharedFuture<FenceResult> SurfaceFlinger::captureScreenCommon(
         RenderAreaFuture renderAreaFuture, TraverseLayersFunction traverseLayers,
         ui::Size bufferSize, ui::PixelFormat reqPixelFormat, bool allowProtected, bool grayscale,
         const sp<IScreenCaptureListener>& captureListener) {
@@ -6629,7 +6629,7 @@
                                false /* regionSampling */, grayscale, captureListener);
 }
 
-std::shared_future<FenceResult> SurfaceFlinger::captureScreenCommon(
+ftl::SharedFuture<FenceResult> SurfaceFlinger::captureScreenCommon(
         RenderAreaFuture renderAreaFuture, TraverseLayersFunction traverseLayers,
         const std::shared_ptr<renderengine::ExternalTexture>& buffer, bool regionSampling,
         bool grayscale, const sp<IScreenCaptureListener>& captureListener) {
@@ -6638,7 +6638,7 @@
     bool canCaptureBlackoutContent = hasCaptureBlackoutContentPermission();
 
     auto future = mScheduler->schedule([=, renderAreaFuture = std::move(renderAreaFuture)]() mutable
-                                       -> std::shared_future<FenceResult> {
+                                       -> ftl::SharedFuture<FenceResult> {
         ScreenCaptureResults captureResults;
         std::unique_ptr<RenderArea> renderArea = renderAreaFuture.get();
         if (!renderArea) {
@@ -6648,7 +6648,7 @@
             return ftl::yield<FenceResult>(base::unexpected(NO_ERROR)).share();
         }
 
-        std::shared_future<FenceResult> renderFuture;
+        ftl::SharedFuture<FenceResult> renderFuture;
         renderArea->render([&] {
             renderFuture =
                     renderScreenImpl(*renderArea, traverseLayers, buffer, canCaptureBlackoutContent,
@@ -6675,15 +6675,14 @@
     }
 
     // Flatten nested futures.
-    std::future<FenceResult> chain =
-            ftl::chain(std::move(future)).then([](std::shared_future<FenceResult> future) {
-                return future.get();
-            });
+    auto chain = ftl::Future(std::move(future)).then([](ftl::SharedFuture<FenceResult> future) {
+        return future;
+    });
 
     return chain.share();
 }
 
-std::shared_future<FenceResult> SurfaceFlinger::renderScreenImpl(
+ftl::SharedFuture<FenceResult> SurfaceFlinger::renderScreenImpl(
         const RenderArea& renderArea, TraverseLayersFunction traverseLayers,
         const std::shared_ptr<renderengine::ExternalTexture>& buffer,
         bool canCaptureBlackoutContent, bool regionSampling, bool grayscale,
@@ -6824,10 +6823,10 @@
     getRenderEngine().useProtectedContext(useProtected);
 
     constexpr bool kUseFramebufferCache = false;
-    std::future<FenceResult> chain =
-            ftl::chain(getRenderEngine().drawLayers(clientCompositionDisplay,
-                                                    clientRenderEngineLayers, buffer,
-                                                    kUseFramebufferCache, std::move(bufferFence)))
+    auto chain =
+            ftl::Future(getRenderEngine().drawLayers(clientCompositionDisplay,
+                                                     clientRenderEngineLayers, buffer,
+                                                     kUseFramebufferCache, std::move(bufferFence)))
                     .then(&toFenceResult);
 
     const auto future = chain.share();
diff --git a/services/surfaceflinger/SurfaceFlinger.h b/services/surfaceflinger/SurfaceFlinger.h
index dc54ac2..52dd418 100644
--- a/services/surfaceflinger/SurfaceFlinger.h
+++ b/services/surfaceflinger/SurfaceFlinger.h
@@ -27,6 +27,7 @@
 #include <android/gui/DisplayState.h>
 #include <cutils/atomic.h>
 #include <cutils/compiler.h>
+#include <ftl/future.h>
 #include <ftl/small_map.h>
 #include <gui/BufferQueue.h>
 #include <gui/FrameTimestamps.h>
@@ -77,7 +78,6 @@
 #include <atomic>
 #include <cstdint>
 #include <functional>
-#include <future>
 #include <map>
 #include <memory>
 #include <mutex>
@@ -398,7 +398,7 @@
     using VsyncModulator = scheduler::VsyncModulator;
     using TransactionSchedule = scheduler::TransactionSchedule;
     using TraverseLayersFunction = std::function<void(const LayerVector::Visitor&)>;
-    using RenderAreaFuture = std::future<std::unique_ptr<RenderArea>>;
+    using RenderAreaFuture = ftl::Future<std::unique_ptr<RenderArea>>;
     using DumpArgs = Vector<String16>;
     using Dumper = std::function<void(const DumpArgs&, bool asProto, std::string&)>;
 
@@ -868,15 +868,15 @@
     // Boot animation, on/off animations and screen capture
     void startBootAnim();
 
-    std::shared_future<FenceResult> captureScreenCommon(RenderAreaFuture, TraverseLayersFunction,
-                                                        ui::Size bufferSize, ui::PixelFormat,
-                                                        bool allowProtected, bool grayscale,
-                                                        const sp<IScreenCaptureListener>&);
-    std::shared_future<FenceResult> captureScreenCommon(
+    ftl::SharedFuture<FenceResult> captureScreenCommon(RenderAreaFuture, TraverseLayersFunction,
+                                                       ui::Size bufferSize, ui::PixelFormat,
+                                                       bool allowProtected, bool grayscale,
+                                                       const sp<IScreenCaptureListener>&);
+    ftl::SharedFuture<FenceResult> captureScreenCommon(
             RenderAreaFuture, TraverseLayersFunction,
             const std::shared_ptr<renderengine::ExternalTexture>&, bool regionSampling,
             bool grayscale, const sp<IScreenCaptureListener>&);
-    std::shared_future<FenceResult> renderScreenImpl(
+    ftl::SharedFuture<FenceResult> renderScreenImpl(
             const RenderArea&, TraverseLayersFunction,
             const std::shared_ptr<renderengine::ExternalTexture>&, bool canCaptureBlackoutContent,
             bool regionSampling, bool grayscale, ScreenCaptureResults&) EXCLUDES(mStateLock);
diff --git a/services/surfaceflinger/TransactionCallbackInvoker.h b/services/surfaceflinger/TransactionCallbackInvoker.h
index b96444d..81d79f0 100644
--- a/services/surfaceflinger/TransactionCallbackInvoker.h
+++ b/services/surfaceflinger/TransactionCallbackInvoker.h
@@ -18,7 +18,6 @@
 
 #include <condition_variable>
 #include <deque>
-#include <future>
 #include <mutex>
 #include <queue>
 #include <thread>
@@ -26,9 +25,9 @@
 #include <unordered_set>
 
 #include <android-base/thread_annotations.h>
-
 #include <binder/IBinder.h>
 #include <compositionengine/FenceResult.h>
+#include <ftl/future.h>
 #include <gui/ITransactionCompletedListener.h>
 #include <ui/Fence.h>
 
@@ -46,7 +45,7 @@
     bool releasePreviousBuffer = false;
     std::string name;
     sp<Fence> previousReleaseFence;
-    std::vector<std::shared_future<FenceResult>> previousReleaseFences;
+    std::vector<ftl::SharedFuture<FenceResult>> previousReleaseFences;
     std::variant<nsecs_t, sp<Fence>> acquireTimeOrFence = -1;
     nsecs_t latchTime = -1;
     uint32_t transformHint = 0;
diff --git a/services/surfaceflinger/tests/unittests/mock/DisplayHardware/MockHWC2.h b/services/surfaceflinger/tests/unittests/mock/DisplayHardware/MockHWC2.h
index 3e0d6d3..07cd15d 100644
--- a/services/surfaceflinger/tests/unittests/mock/DisplayHardware/MockHWC2.h
+++ b/services/surfaceflinger/tests/unittests/mock/DisplayHardware/MockHWC2.h
@@ -80,7 +80,7 @@
     MOCK_METHOD(hal::Error, presentOrValidate,
                 (nsecs_t, uint32_t *, uint32_t *, android::sp<android::Fence> *, uint32_t *),
                 (override));
-    MOCK_METHOD(std::future<hal::Error>, setDisplayBrightness,
+    MOCK_METHOD(ftl::Future<hal::Error>, setDisplayBrightness,
                 (float, float, const Hwc2::Composer::DisplayBrightnessOptions &), (override));
     MOCK_METHOD(hal::Error, setActiveConfigWithConstraints,
                 (hal::HWConfigId, const hal::VsyncPeriodChangeConstraints &,