Optimize BLAST buffer releases via Unix sockets

Bug: 294133380
Flag: com.android.graphics.libgui.flags.buffer_release_channel
Test: BLASTBufferQueueTest
Change-Id: Ia183452198dadc7f8e540f7219bd44d8b5823458
diff --git a/libs/gui/Android.bp b/libs/gui/Android.bp
index 51d2e53..1243b21 100644
--- a/libs/gui/Android.bp
+++ b/libs/gui/Android.bp
@@ -255,6 +255,7 @@
         "BitTube.cpp",
         "BLASTBufferQueue.cpp",
         "BufferItemConsumer.cpp",
+        "BufferReleaseChannel.cpp",
         "Choreographer.cpp",
         "CompositorTiming.cpp",
         "ConsumerBase.cpp",
diff --git a/libs/gui/BLASTBufferQueue.cpp b/libs/gui/BLASTBufferQueue.cpp
index 739c3c2..f13d499 100644
--- a/libs/gui/BLASTBufferQueue.cpp
+++ b/libs/gui/BLASTBufferQueue.cpp
@@ -38,13 +38,17 @@
 #include <private/gui/ComposerService.h>
 #include <private/gui/ComposerServiceAIDL.h>
 
+#include <android-base/stringprintf.h>
 #include <android-base/thread_annotations.h>
+#include <sys/epoll.h>
+#include <sys/eventfd.h>
 #include <chrono>
 
 #include <com_android_graphics_libgui_flags.h>
 
 using namespace com::android::graphics::libgui;
 using namespace std::chrono_literals;
+using android::base::unique_fd;
 
 namespace {
 inline const char* boolToString(bool b) {
@@ -179,8 +183,6 @@
     // explicitly so that dequeueBuffer will block
     mProducer->setDequeueTimeout(std::numeric_limits<int64_t>::max());
 
-    // safe default, most producers are expected to override this
-    mProducer->setMaxDequeuedBufferCount(2);
     mBufferItemConsumer = new BLASTBufferItemConsumer(mConsumer,
                                                       GraphicBuffer::USAGE_HW_COMPOSER |
                                                               GraphicBuffer::USAGE_HW_TEXTURE,
@@ -210,6 +212,12 @@
             },
             this);
 
+#if COM_ANDROID_GRAPHICS_LIBGUI_FLAGS(BUFFER_RELEASE_CHANNEL)
+    std::unique_ptr<gui::BufferReleaseChannel::ConsumerEndpoint> bufferReleaseConsumer;
+    gui::BufferReleaseChannel::open(mName, bufferReleaseConsumer, mBufferReleaseProducer);
+    mBufferReleaseReader.emplace(std::move(bufferReleaseConsumer));
+#endif
+
     BQA_LOGV("BLASTBufferQueue created");
 }
 
@@ -259,6 +267,9 @@
     if (surfaceControlChanged) {
         t.setFlags(mSurfaceControl, layer_state_t::eEnableBackpressure,
                    layer_state_t::eEnableBackpressure);
+        if (mBufferReleaseProducer) {
+            t.setBufferReleaseChannel(mSurfaceControl, mBufferReleaseProducer);
+        }
         applyTransaction = true;
     }
     mTransformHint = mSurfaceControl->getTransformHint();
@@ -439,6 +450,21 @@
     BBQ_TRACE();
     releaseBufferCallbackLocked(id, releaseFence, currentMaxAcquiredBufferCount,
                                 false /* fakeRelease */);
+    if (!mBufferReleaseReader) {
+        return;
+    }
+    // Drain the buffer release channel socket
+    while (true) {
+        ReleaseCallbackId releaseCallbackId;
+        sp<Fence> releaseFence;
+        if (status_t status =
+                    mBufferReleaseReader->readNonBlocking(releaseCallbackId, releaseFence);
+            status != OK) {
+            break;
+        }
+        releaseBufferCallbackLocked(releaseCallbackId, releaseFence, std::nullopt,
+                                    false /* fakeRelease */);
+    }
 }
 
 void BLASTBufferQueue::releaseBufferCallbackLocked(
@@ -495,11 +521,11 @@
                                      const sp<Fence>& releaseFence) {
     auto it = mSubmitted.find(callbackId);
     if (it == mSubmitted.end()) {
-        BQA_LOGE("ERROR: releaseBufferCallback without corresponding submitted buffer %s",
-                 callbackId.to_string().c_str());
         return;
     }
     mNumAcquired--;
+    updateDequeueShouldBlockLocked();
+    mBufferReleaseReader->interruptBlockingRead();
     BBQ_TRACE("frame=%" PRIu64, callbackId.framenumber);
     BQA_LOGV("released %s", callbackId.to_string().c_str());
     mBufferItemConsumer->releaseBuffer(it->second, releaseFence);
@@ -564,6 +590,7 @@
 
     auto buffer = bufferItem.mGraphicBuffer;
     mNumFrameAvailable--;
+    updateDequeueShouldBlockLocked();
     BBQ_TRACE("frame=%" PRIu64, bufferItem.mFrameNumber);
 
     if (buffer == nullptr) {
@@ -582,6 +609,7 @@
     }
 
     mNumAcquired++;
+    updateDequeueShouldBlockLocked();
     mLastAcquiredFrameNumber = bufferItem.mFrameNumber;
     ReleaseCallbackId releaseCallbackId(buffer->getId(), mLastAcquiredFrameNumber);
     mSubmitted[releaseCallbackId] = bufferItem;
@@ -708,6 +736,7 @@
         return;
     }
     mNumFrameAvailable--;
+    updateDequeueShouldBlockLocked();
     mBufferItemConsumer->releaseBuffer(bufferItem, bufferItem.mFence);
 }
 
@@ -761,7 +790,9 @@
         }
 
         // add to shadow queue
+        mNumDequeued--;
         mNumFrameAvailable++;
+        updateDequeueShouldBlockLocked();
         if (waitForTransactionCallback && mNumFrameAvailable >= 2) {
             acquireAndReleaseBuffer();
         }
@@ -812,11 +843,21 @@
 void BLASTBufferQueue::onFrameDequeued(const uint64_t bufferId) {
     std::lock_guard _lock{mTimestampMutex};
     mDequeueTimestamps[bufferId] = systemTime();
-};
+    mNumDequeued++;
+}
 
 void BLASTBufferQueue::onFrameCancelled(const uint64_t bufferId) {
-    std::lock_guard _lock{mTimestampMutex};
-    mDequeueTimestamps.erase(bufferId);
+    {
+        std::lock_guard _lock{mTimestampMutex};
+        mDequeueTimestamps.erase(bufferId);
+    }
+
+    {
+        std::lock_guard lock{mMutex};
+        mNumDequeued--;
+        updateDequeueShouldBlockLocked();
+    }
+    mBufferReleaseReader->interruptBlockingRead();
 };
 
 bool BLASTBufferQueue::syncNextTransaction(
@@ -888,6 +929,22 @@
     return mSize != bufferSize;
 }
 
+void BLASTBufferQueue::updateDequeueShouldBlockLocked() {
+    int32_t buffersInUse = mNumDequeued + mNumFrameAvailable + mNumAcquired;
+    int32_t maxBufferCount = std::min(mMaxAcquiredBuffers + mMaxDequeuedBuffers, kMaxBufferCount);
+    bool bufferAvailable = buffersInUse < maxBufferCount;
+    // BLASTBufferQueueProducer should block until a buffer is released if
+    // (1) There are no free buffers available.
+    // (2) We're not in async mode. In async mode, BufferQueueProducer::dequeueBuffer returns
+    //     WOULD_BLOCK instead of blocking when there are no free buffers.
+    // (3) We're not in shared buffer mode. In shared buffer mode, both the producer and consumer
+    //     can access the same buffer simultaneously. BufferQueueProducer::dequeueBuffer returns
+    //     the shared buffer immediately instead of blocking.
+    mDequeueShouldBlock = !(bufferAvailable || mAsyncMode || mSharedBufferMode);
+    ATRACE_INT("Dequeued", mNumDequeued);
+    ATRACE_INT("DequeueShouldBlock", mDequeueShouldBlock);
+}
+
 class BBQSurface : public Surface {
 private:
     std::mutex mMutex;
@@ -1116,24 +1173,58 @@
                                             producerControlledByApp, output);
     }
 
+    status_t disconnect(int api, DisconnectMode mode) override {
+        if (status_t status = BufferQueueProducer::disconnect(api, mode); status != OK) {
+            return status;
+        }
+
+        sp<BLASTBufferQueue> bbq = mBLASTBufferQueue.promote();
+        if (!bbq) {
+            return OK;
+        }
+
+        {
+            std::lock_guard lock{bbq->mMutex};
+            bbq->mNumDequeued = 0;
+            bbq->mNumFrameAvailable = 0;
+            bbq->mNumAcquired = 0;
+            bbq->mSubmitted.clear();
+            bbq->updateDequeueShouldBlockLocked();
+        }
+        bbq->mBufferReleaseReader->interruptBlockingRead();
+
+        return OK;
+    }
+
     // We want to resize the frame history when changing the size of the buffer queue
     status_t setMaxDequeuedBufferCount(int maxDequeuedBufferCount) override {
         int maxBufferCount;
         status_t status = BufferQueueProducer::setMaxDequeuedBufferCount(maxDequeuedBufferCount,
                                                                          &maxBufferCount);
-        // if we can't determine the max buffer count, then just skip growing the history size
-        if (status == OK) {
-            size_t newFrameHistorySize = maxBufferCount + 2; // +2 because triple buffer rendering
-            // optimize away resizing the frame history unless it will grow
-            if (newFrameHistorySize > FrameEventHistory::INITIAL_MAX_FRAME_HISTORY) {
-                sp<BLASTBufferQueue> bbq = mBLASTBufferQueue.promote();
-                if (bbq != nullptr) {
-                    ALOGV("increasing frame history size to %zu", newFrameHistorySize);
-                    bbq->resizeFrameEventHistory(newFrameHistorySize);
-                }
-            }
+        if (status != OK) {
+            return status;
         }
-        return status;
+
+        sp<BLASTBufferQueue> bbq = mBLASTBufferQueue.promote();
+        if (!bbq) {
+            return OK;
+        }
+
+        {
+            std::lock_guard lock{bbq->mMutex};
+            bbq->mMaxDequeuedBuffers = maxDequeuedBufferCount;
+            bbq->updateDequeueShouldBlockLocked();
+        }
+        bbq->mBufferReleaseReader->interruptBlockingRead();
+
+        size_t newFrameHistorySize = maxBufferCount + 2; // +2 because triple buffer rendering
+        // optimize away resizing the frame history unless it will grow
+        if (newFrameHistorySize > FrameEventHistory::INITIAL_MAX_FRAME_HISTORY) {
+            ALOGV("increasing frame history size to %zu", newFrameHistorySize);
+            bbq->resizeFrameEventHistory(newFrameHistorySize);
+        }
+
+        return OK;
     }
 
     int query(int what, int* value) override {
@@ -1144,6 +1235,125 @@
         return BufferQueueProducer::query(what, value);
     }
 
+    status_t setAsyncMode(bool asyncMode) override {
+        if (status_t status = BufferQueueProducer::setAsyncMode(asyncMode); status != NO_ERROR) {
+            return status;
+        }
+
+        sp<BLASTBufferQueue> bbq = mBLASTBufferQueue.promote();
+        if (!bbq) {
+            return NO_ERROR;
+        }
+
+        {
+            std::lock_guard lock{bbq->mMutex};
+            bbq->mAsyncMode = asyncMode;
+            bbq->updateDequeueShouldBlockLocked();
+        }
+
+        bbq->mBufferReleaseReader->interruptBlockingRead();
+        return NO_ERROR;
+    }
+
+    status_t setSharedBufferMode(bool sharedBufferMode) override {
+        if (status_t status = BufferQueueProducer::setSharedBufferMode(sharedBufferMode);
+            status != NO_ERROR) {
+            return status;
+        }
+
+        sp<BLASTBufferQueue> bbq = mBLASTBufferQueue.promote();
+        if (!bbq) {
+            return NO_ERROR;
+        }
+
+        {
+            std::lock_guard lock{bbq->mMutex};
+            bbq->mSharedBufferMode = sharedBufferMode;
+            bbq->updateDequeueShouldBlockLocked();
+        }
+
+        bbq->mBufferReleaseReader->interruptBlockingRead();
+        return NO_ERROR;
+    }
+
+    status_t detachBuffer(int slot) override {
+        if (status_t status = BufferQueueProducer::detachBuffer(slot); status != NO_ERROR) {
+            return status;
+        }
+
+        sp<BLASTBufferQueue> bbq = mBLASTBufferQueue.promote();
+        if (!bbq) {
+            return NO_ERROR;
+        }
+
+        {
+            std::lock_guard lock{bbq->mMutex};
+            bbq->mNumDequeued--;
+            bbq->updateDequeueShouldBlockLocked();
+        }
+
+        bbq->mBufferReleaseReader->interruptBlockingRead();
+        return NO_ERROR;
+    }
+
+    // Override dequeueBuffer to block if there are no free buffers.
+    //
+    // Buffer releases are communicated via the BufferReleaseChannel. When dequeueBuffer determines
+    // a free buffer is not available, it blocks on an epoll file descriptor. Epoll is configured to
+    // detect messages on the BufferReleaseChannel's socket and an eventfd. The eventfd is signaled
+    // whenever an event other than a buffer release occurs that may change the number of free
+    // buffers. dequeueBuffer uses epoll in a similar manner as a condition variable by testing for
+    // the availability of a free buffer in a loop, breaking the loop once a free buffer is
+    // available.
+    //
+    // This is an optimization implemented to reduce thread scheduling delays in the previously
+    // existing binder release callback. The binder buffer release callback is still used and there
+    // are no guarantees around order between buffer releases via binder and the
+    // BufferReleaseChannel. If we attempt to a release a buffer here that has already been released
+    // via binder, the release is ignored.
+    status_t dequeueBuffer(int* outSlot, sp<Fence>* outFence, uint32_t width, uint32_t height,
+                           PixelFormat format, uint64_t usage, uint64_t* outBufferAge,
+                           FrameEventHistoryDelta* outTimestamps) {
+        sp<BLASTBufferQueue> bbq = mBLASTBufferQueue.promote();
+        if (!bbq || !bbq->mBufferReleaseReader) {
+            return BufferQueueProducer::dequeueBuffer(outSlot, outFence, width, height, format,
+                                                      usage, outBufferAge, outTimestamps);
+        }
+
+        if (bbq->mDequeueShouldBlock) {
+            ATRACE_FORMAT("waiting for free buffer");
+            auto maxWaitTime = std::chrono::steady_clock::now() + 1s;
+            do {
+                auto timeout = std::chrono::duration_cast<std::chrono::milliseconds>(
+                        maxWaitTime - std::chrono::steady_clock::now());
+                if (timeout <= 0ms) {
+                    break;
+                }
+
+                ReleaseCallbackId releaseCallbackId;
+                sp<Fence> releaseFence;
+                status_t status = bbq->mBufferReleaseReader->readBlocking(releaseCallbackId,
+                                                                          releaseFence, timeout);
+                if (status == WOULD_BLOCK) {
+                    // readBlocking was interrupted. The loop will test if we have a free buffer.
+                    continue;
+                }
+
+                if (status != OK) {
+                    // An error occurred or readBlocking timed out.
+                    break;
+                }
+
+                std::lock_guard lock{bbq->mMutex};
+                bbq->releaseBufferCallbackLocked(releaseCallbackId, releaseFence, std::nullopt,
+                                                 false);
+            } while (bbq->mDequeueShouldBlock);
+        }
+
+        return BufferQueueProducer::dequeueBuffer(outSlot, outFence, width, height, format, usage,
+                                                  outBufferAge, outTimestamps);
+    }
+
 private:
     const wp<BLASTBufferQueue> mBLASTBufferQueue;
 };
@@ -1173,6 +1383,16 @@
     *outConsumer = consumer;
 }
 
+void BLASTBufferQueue::onFirstRef() {
+    // safe default, most producers are expected to override this
+    //
+    // This is done in onFirstRef instead of BLASTBufferQueue's constructor because
+    // BBQBufferQueueProducer::setMaxDequeuedBufferCount promotes a weak pointer to BLASTBufferQueue
+    // to a strong pointer. If this is done in the constructor, then when the strong pointer goes
+    // out of scope, it's the last reference so BLASTBufferQueue is deleted.
+    mProducer->setMaxDequeuedBufferCount(2);
+}
+
 void BLASTBufferQueue::resizeFrameEventHistory(size_t newSize) {
     // This can be null during creation of the buffer queue, but resizing won't do anything at that
     // point in time, so just ignore. This can go away once the class relationships and lifetimes of
@@ -1222,4 +1442,72 @@
     mTransactionHangCallback = callback;
 }
 
+BLASTBufferQueue::BufferReleaseReader::BufferReleaseReader(
+        std::unique_ptr<gui::BufferReleaseChannel::ConsumerEndpoint> endpoint)
+      : mEndpoint(std::move(endpoint)) {
+    mEpollFd = android::base::unique_fd(epoll_create1(0));
+    if (!mEpollFd.ok()) {
+        ALOGE("Failed to create buffer release epoll file descriptor. errno=%d message='%s'", errno,
+              strerror(errno));
+    }
+
+    epoll_event event;
+    event.events = EPOLLIN;
+    event.data.fd = mEndpoint->getFd();
+    if (epoll_ctl(mEpollFd.get(), EPOLL_CTL_ADD, mEndpoint->getFd(), &event) == -1) {
+        ALOGE("Failed to register buffer release consumer file descriptor with epoll. errno=%d "
+              "message='%s'",
+              errno, strerror(errno));
+    }
+
+    mEventFd = android::base::unique_fd(eventfd(0, EFD_NONBLOCK));
+    event.data.fd = mEventFd.get();
+    if (epoll_ctl(mEpollFd.get(), EPOLL_CTL_ADD, mEventFd.get(), &event) == -1) {
+        ALOGE("Failed to register buffer release eventfd with epoll. errno=%d message='%s'", errno,
+              strerror(errno));
+    }
+}
+
+status_t BLASTBufferQueue::BufferReleaseReader::readNonBlocking(ReleaseCallbackId& outId,
+                                                                sp<Fence>& outFence) {
+    std::lock_guard lock{mMutex};
+    return mEndpoint->readReleaseFence(outId, outFence);
+}
+
+status_t BLASTBufferQueue::BufferReleaseReader::readBlocking(ReleaseCallbackId& outId,
+                                                             sp<Fence>& outFence,
+                                                             std::chrono::milliseconds timeout) {
+    epoll_event event;
+    int eventCount = epoll_wait(mEpollFd.get(), &event, 1 /* maxevents */, timeout.count());
+
+    if (eventCount == -1) {
+        ALOGE("epoll_wait error while waiting for buffer release. errno=%d message='%s'", errno,
+              strerror(errno));
+        return UNKNOWN_ERROR;
+    }
+
+    if (eventCount == 0) {
+        return TIMED_OUT;
+    }
+
+    if (event.data.fd == mEventFd.get()) {
+        uint64_t value;
+        if (read(mEventFd.get(), &value, sizeof(uint64_t)) == -1 && errno != EWOULDBLOCK) {
+            ALOGE("error while reading from eventfd. errno=%d message='%s'", errno,
+                  strerror(errno));
+        }
+        return WOULD_BLOCK;
+    }
+
+    std::lock_guard lock{mMutex};
+    return mEndpoint->readReleaseFence(outId, outFence);
+}
+
+void BLASTBufferQueue::BufferReleaseReader::interruptBlockingRead() {
+    uint64_t value = 1;
+    if (write(mEventFd.get(), &value, sizeof(uint64_t)) == -1) {
+        ALOGE("failed to notify dequeue event. errno=%d message='%s'", errno, strerror(errno));
+    }
+}
+
 } // namespace android
diff --git a/libs/gui/BufferReleaseChannel.cpp b/libs/gui/BufferReleaseChannel.cpp
new file mode 100644
index 0000000..183b25a
--- /dev/null
+++ b/libs/gui/BufferReleaseChannel.cpp
@@ -0,0 +1,364 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define LOG_TAG "BufferReleaseChannel"
+#define ATRACE_TAG ATRACE_TAG_GRAPHICS
+
+#include <errno.h>
+#include <fcntl.h>
+#include <math.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <android-base/logging.h>
+#include <android-base/properties.h>
+#include <android-base/stringprintf.h>
+#include <android/binder_status.h>
+#include <binder/Parcel.h>
+#include <cutils/properties.h>
+#include <ftl/enum.h>
+#include <log/log.h>
+#include <utils/Trace.h>
+
+#include <gui/TraceUtils.h>
+#include <private/gui/ParcelUtils.h>
+
+#include <gui/BufferReleaseChannel.h>
+
+using android::base::Result;
+
+namespace android::gui {
+
+namespace {
+
+template <typename T>
+static void readAligned(const void*& buffer, size_t& size, T& value) {
+    size -= FlattenableUtils::align<alignof(T)>(buffer);
+    FlattenableUtils::read(buffer, size, value);
+}
+
+template <typename T>
+static void writeAligned(void*& buffer, size_t& size, T value) {
+    size -= FlattenableUtils::align<alignof(T)>(buffer);
+    FlattenableUtils::write(buffer, size, value);
+}
+
+template <typename T>
+static void addAligned(size_t& size, T /* value */) {
+    size = FlattenableUtils::align<sizeof(T)>(size);
+    size += sizeof(T);
+}
+
+template <typename T>
+static inline constexpr uint32_t low32(const T n) {
+    return static_cast<uint32_t>(static_cast<uint64_t>(n));
+}
+
+template <typename T>
+static inline constexpr uint32_t high32(const T n) {
+    return static_cast<uint32_t>(static_cast<uint64_t>(n) >> 32);
+}
+
+template <typename T>
+static inline constexpr T to64(const uint32_t lo, const uint32_t hi) {
+    return static_cast<T>(static_cast<uint64_t>(hi) << 32 | lo);
+}
+
+} // namespace
+
+size_t BufferReleaseChannel::Message::getPodSize() const {
+    size_t size = 0;
+    addAligned(size, low32(releaseCallbackId.bufferId));
+    addAligned(size, high32(releaseCallbackId.bufferId));
+    addAligned(size, low32(releaseCallbackId.framenumber));
+    addAligned(size, high32(releaseCallbackId.framenumber));
+    return size;
+}
+
+size_t BufferReleaseChannel::Message::getFlattenedSize() const {
+    size_t size = releaseFence->getFlattenedSize();
+    size = FlattenableUtils::align<4>(size);
+    size += getPodSize();
+    return size;
+}
+
+status_t BufferReleaseChannel::Message::flatten(void*& buffer, size_t& size, int*& fds,
+                                                size_t& count) const {
+    if (status_t err = releaseFence->flatten(buffer, size, fds, count); err != OK) {
+        return err;
+    }
+    size -= FlattenableUtils::align<4>(buffer);
+
+    // Check we still have enough space
+    if (size < getPodSize()) {
+        return NO_MEMORY;
+    }
+
+    writeAligned(buffer, size, low32(releaseCallbackId.bufferId));
+    writeAligned(buffer, size, high32(releaseCallbackId.bufferId));
+    writeAligned(buffer, size, low32(releaseCallbackId.framenumber));
+    writeAligned(buffer, size, high32(releaseCallbackId.framenumber));
+    return OK;
+}
+
+status_t BufferReleaseChannel::Message::unflatten(void const*& buffer, size_t& size,
+                                                  int const*& fds, size_t& count) {
+    releaseFence = new Fence();
+    if (status_t err = releaseFence->unflatten(buffer, size, fds, count); err != OK) {
+        return err;
+    }
+    size -= FlattenableUtils::align<4>(buffer);
+
+    // Check we still have enough space
+    if (size < getPodSize()) {
+        return OK;
+    }
+
+    uint32_t bufferIdLo = 0, bufferIdHi = 0;
+    uint32_t frameNumberLo = 0, frameNumberHi = 0;
+
+    readAligned(buffer, size, bufferIdLo);
+    readAligned(buffer, size, bufferIdHi);
+    releaseCallbackId.bufferId = to64<int64_t>(bufferIdLo, bufferIdHi);
+    readAligned(buffer, size, frameNumberLo);
+    readAligned(buffer, size, frameNumberHi);
+    releaseCallbackId.framenumber = to64<uint64_t>(frameNumberLo, frameNumberHi);
+
+    return NO_ERROR;
+}
+
+status_t BufferReleaseChannel::ConsumerEndpoint::readReleaseFence(
+        ReleaseCallbackId& outReleaseCallbackId, sp<Fence>& outReleaseFence) {
+    Message releaseBufferMessage;
+    mFlattenedBuffer.resize(releaseBufferMessage.getFlattenedSize());
+    std::array<uint8_t, CMSG_SPACE(sizeof(int))> controlMessageBuffer;
+
+    iovec iov{
+            .iov_base = mFlattenedBuffer.data(),
+            .iov_len = mFlattenedBuffer.size(),
+    };
+
+    msghdr msg{
+            .msg_iov = &iov,
+            .msg_iovlen = 1,
+            .msg_control = controlMessageBuffer.data(),
+            .msg_controllen = controlMessageBuffer.size(),
+    };
+
+    int result;
+    do {
+        result = recvmsg(mFd, &msg, 0);
+    } while (result == -1 && errno == EINTR);
+    if (result == -1) {
+        if (errno == EWOULDBLOCK || errno == EAGAIN) {
+            return WOULD_BLOCK;
+        }
+        ALOGE("Error reading release fence from socket: error %#x (%s)", errno, strerror(errno));
+        return -errno;
+    }
+
+    if (msg.msg_iovlen != 1) {
+        ALOGE("Error reading release fence from socket: bad data length");
+        return UNKNOWN_ERROR;
+    }
+
+    if (msg.msg_controllen % sizeof(int) != 0) {
+        ALOGE("Error reading release fence from socket: bad fd length");
+        return UNKNOWN_ERROR;
+    }
+
+    size_t dataLen = msg.msg_iov->iov_len;
+    const void* data = static_cast<const void*>(msg.msg_iov->iov_base);
+    if (!data) {
+        ALOGE("Error reading release fence from socket: no buffer data");
+        return UNKNOWN_ERROR;
+    }
+
+    size_t fdCount = 0;
+    const int* fdData = nullptr;
+    if (cmsghdr* cmsg = CMSG_FIRSTHDR(&msg)) {
+        fdData = reinterpret_cast<const int*>(CMSG_DATA(cmsg));
+        fdCount = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
+    }
+
+    if (status_t err = releaseBufferMessage.unflatten(data, dataLen, fdData, fdCount); err != OK) {
+        return err;
+    }
+
+    outReleaseCallbackId = releaseBufferMessage.releaseCallbackId;
+    outReleaseFence = std::move(releaseBufferMessage.releaseFence);
+
+    return OK;
+}
+
+int BufferReleaseChannel::ProducerEndpoint::writeReleaseFence(const ReleaseCallbackId& callbackId,
+                                                              const sp<Fence>& fence) {
+    Message releaseBufferMessage(callbackId, fence ? fence : Fence::NO_FENCE);
+    mFlattenedBuffer.resize(releaseBufferMessage.getFlattenedSize());
+    int flattenedFd;
+    {
+        // Make copies of needed items since flatten modifies them, and we don't
+        // want to send anything if there's an error during flatten.
+        void* flattenedBufferPtr = mFlattenedBuffer.data();
+        size_t flattenedBufferSize = mFlattenedBuffer.size();
+        int* flattenedFdPtr = &flattenedFd;
+        size_t flattenedFdCount = 1;
+        if (status_t err = releaseBufferMessage.flatten(flattenedBufferPtr, flattenedBufferSize,
+                                                        flattenedFdPtr, flattenedFdCount);
+            err != OK) {
+            ALOGE("Failed to flatten BufferReleaseChannel message.");
+            return err;
+        }
+    }
+
+    iovec iov{
+            .iov_base = mFlattenedBuffer.data(),
+            .iov_len = mFlattenedBuffer.size(),
+    };
+
+    msghdr msg{
+            .msg_iov = &iov,
+            .msg_iovlen = 1,
+    };
+
+    std::array<uint8_t, CMSG_SPACE(sizeof(int))> controlMessageBuffer;
+    if (fence && fence->isValid()) {
+        msg.msg_control = controlMessageBuffer.data();
+        msg.msg_controllen = controlMessageBuffer.size();
+
+        cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
+        cmsg->cmsg_level = SOL_SOCKET;
+        cmsg->cmsg_type = SCM_RIGHTS;
+        cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+        memcpy(CMSG_DATA(cmsg), &flattenedFd, sizeof(int));
+    }
+
+    int result;
+    do {
+        result = sendmsg(mFd, &msg, 0);
+    } while (result == -1 && errno == EINTR);
+    if (result == -1) {
+        ALOGD("Error writing release fence to socket: error %#x (%s)", errno, strerror(errno));
+        return -errno;
+    }
+
+    return OK;
+}
+
+status_t BufferReleaseChannel::ProducerEndpoint::readFromParcel(const android::Parcel* parcel) {
+    if (!parcel) return STATUS_BAD_VALUE;
+    SAFE_PARCEL(parcel->readUtf8FromUtf16, &mName);
+    SAFE_PARCEL(parcel->readUniqueFileDescriptor, &mFd);
+    return STATUS_OK;
+}
+
+status_t BufferReleaseChannel::ProducerEndpoint::writeToParcel(android::Parcel* parcel) const {
+    if (!parcel) return STATUS_BAD_VALUE;
+    SAFE_PARCEL(parcel->writeUtf8AsUtf16, mName);
+    SAFE_PARCEL(parcel->writeUniqueFileDescriptor, mFd);
+    return STATUS_OK;
+}
+
+status_t BufferReleaseChannel::open(std::string name,
+                                    std::unique_ptr<ConsumerEndpoint>& outConsumer,
+                                    std::shared_ptr<ProducerEndpoint>& outProducer) {
+    outConsumer.reset();
+    outProducer.reset();
+
+    int sockets[2];
+    if (socketpair(AF_UNIX, SOCK_SEQPACKET, 0, sockets)) {
+        ALOGE("[%s] Failed to create socket pair. errorno=%d message='%s'", name.c_str(), errno,
+              strerror(errno));
+        return -errno;
+    }
+
+    android::base::unique_fd consumerFd(sockets[0]);
+    android::base::unique_fd producerFd(sockets[1]);
+
+    // Socket buffer size.  The default is typically about 128KB, which is much larger than
+    // we really need.  So we make it smaller.  It just needs to be big enough to hold
+    // a few dozen release fences.
+    const int bufferSize = 32 * 1024;
+    if (setsockopt(consumerFd.get(), SOL_SOCKET, SO_SNDBUF, &bufferSize, sizeof(bufferSize)) ==
+        -1) {
+        ALOGE("[%s] Failed to set consumer socket send buffer size. errno=%d message='%s'",
+              name.c_str(), errno, strerror(errno));
+        return -errno;
+    }
+    if (setsockopt(consumerFd.get(), SOL_SOCKET, SO_RCVBUF, &bufferSize, sizeof(bufferSize)) ==
+        -1) {
+        ALOGE("[%s] Failed to set consumer socket receive buffer size. errno=%d "
+              "message='%s'",
+              name.c_str(), errno, strerror(errno));
+        return -errno;
+    }
+    if (setsockopt(producerFd.get(), SOL_SOCKET, SO_SNDBUF, &bufferSize, sizeof(bufferSize)) ==
+        -1) {
+        ALOGE("[%s] Failed to set producer socket send buffer size. errno=%d message='%s'",
+              name.c_str(), errno, strerror(errno));
+        return -errno;
+    }
+    if (setsockopt(producerFd.get(), SOL_SOCKET, SO_RCVBUF, &bufferSize, sizeof(bufferSize)) ==
+        -1) {
+        ALOGE("[%s] Failed to set producer socket receive buffer size. errno=%d "
+              "message='%s'",
+              name.c_str(), errno, strerror(errno));
+        return -errno;
+    }
+
+    // Configure the consumer socket to be non-blocking.
+    int flags = fcntl(consumerFd.get(), F_GETFL, 0);
+    if (flags == -1) {
+        ALOGE("[%s] Failed to get consumer socket flags. errno=%d message='%s'", name.c_str(),
+              errno, strerror(errno));
+        return -errno;
+    }
+    if (fcntl(consumerFd.get(), F_SETFL, flags | O_NONBLOCK) == -1) {
+        ALOGE("[%s] Failed to set consumer socket to non-blocking mode. errno=%d "
+              "message='%s'",
+              name.c_str(), errno, strerror(errno));
+        return -errno;
+    }
+
+    // Configure a timeout for the producer socket.
+    const timeval timeout{.tv_sec = 1, .tv_usec = 0};
+    if (setsockopt(producerFd.get(), SOL_SOCKET, SO_RCVTIMEO, &timeout, sizeof(timeval)) == -1) {
+        ALOGE("[%s] Failed to set producer socket timeout. errno=%d message='%s'", name.c_str(),
+              errno, strerror(errno));
+        return -errno;
+    }
+
+    // Make the consumer read-only
+    if (shutdown(consumerFd.get(), SHUT_WR) == -1) {
+        ALOGE("[%s] Failed to shutdown writing on consumer socket. errno=%d message='%s'",
+              name.c_str(), errno, strerror(errno));
+        return -errno;
+    }
+
+    // Make the producer write-only
+    if (shutdown(producerFd.get(), SHUT_RD) == -1) {
+        ALOGE("[%s] Failed to shutdown reading on producer socket. errno=%d message='%s'",
+              name.c_str(), errno, strerror(errno));
+        return -errno;
+    }
+
+    outConsumer = std::make_unique<ConsumerEndpoint>(name, std::move(consumerFd));
+    outProducer = std::make_shared<ProducerEndpoint>(std::move(name), std::move(producerFd));
+    return STATUS_OK;
+}
+
+} // namespace android::gui
diff --git a/libs/gui/LayerState.cpp b/libs/gui/LayerState.cpp
index 307ae39..adf1646 100644
--- a/libs/gui/LayerState.cpp
+++ b/libs/gui/LayerState.cpp
@@ -194,6 +194,12 @@
     SAFE_PARCEL(output.writeFloat, currentHdrSdrRatio);
     SAFE_PARCEL(output.writeFloat, desiredHdrSdrRatio);
     SAFE_PARCEL(output.writeInt32, static_cast<int32_t>(cachingHint));
+
+    const bool hasBufferReleaseChannel = (bufferReleaseChannel != nullptr);
+    SAFE_PARCEL(output.writeBool, hasBufferReleaseChannel);
+    if (hasBufferReleaseChannel) {
+        SAFE_PARCEL(output.writeParcelable, *bufferReleaseChannel);
+    }
     return NO_ERROR;
 }
 
@@ -339,6 +345,12 @@
     SAFE_PARCEL(input.readInt32, &tmpInt32);
     cachingHint = static_cast<gui::CachingHint>(tmpInt32);
 
+    bool hasBufferReleaseChannel;
+    SAFE_PARCEL(input.readBool, &hasBufferReleaseChannel);
+    if (hasBufferReleaseChannel) {
+        bufferReleaseChannel = std::make_shared<gui::BufferReleaseChannel::ProducerEndpoint>();
+        SAFE_PARCEL(input.readParcelable, bufferReleaseChannel.get());
+    }
     return NO_ERROR;
 }
 
@@ -718,6 +730,10 @@
     if (other.what & eFlushJankData) {
         what |= eFlushJankData;
     }
+    if (other.what & eBufferReleaseChannelChanged) {
+        what |= eBufferReleaseChannelChanged;
+        bufferReleaseChannel = other.bufferReleaseChannel;
+    }
     if ((other.what & what) != other.what) {
         ALOGE("Unmerged SurfaceComposer Transaction properties. LayerState::merge needs updating? "
               "other.what=0x%" PRIX64 " what=0x%" PRIX64 " unmerged flags=0x%" PRIX64,
@@ -797,6 +813,7 @@
     CHECK_DIFF(diff, eColorChanged, other, color.rgb);
     CHECK_DIFF(diff, eColorSpaceAgnosticChanged, other, colorSpaceAgnostic);
     CHECK_DIFF(diff, eDimmingEnabledChanged, other, dimmingEnabled);
+    if (other.what & eBufferReleaseChannelChanged) diff |= eBufferReleaseChannelChanged;
     return diff;
 }
 
diff --git a/libs/gui/SurfaceComposerClient.cpp b/libs/gui/SurfaceComposerClient.cpp
index 2821b51..5c87772 100644
--- a/libs/gui/SurfaceComposerClient.cpp
+++ b/libs/gui/SurfaceComposerClient.cpp
@@ -2394,6 +2394,22 @@
     return *this;
 }
 
+SurfaceComposerClient::Transaction& SurfaceComposerClient::Transaction::setBufferReleaseChannel(
+        const sp<SurfaceControl>& sc,
+        const std::shared_ptr<gui::BufferReleaseChannel::ProducerEndpoint>& channel) {
+    layer_state_t* s = getLayerState(sc);
+    if (!s) {
+        mStatus = BAD_INDEX;
+        return *this;
+    }
+
+    s->what |= layer_state_t::eBufferReleaseChannelChanged;
+    s->bufferReleaseChannel = channel;
+
+    registerSurfaceControlForCallback(sc);
+    return *this;
+}
+
 // ---------------------------------------------------------------------------
 
 DisplayState& SurfaceComposerClient::Transaction::getDisplayState(const sp<IBinder>& token) {
diff --git a/libs/gui/include/gui/BLASTBufferQueue.h b/libs/gui/include/gui/BLASTBufferQueue.h
index 0e1a505..e9a350c 100644
--- a/libs/gui/include/gui/BLASTBufferQueue.h
+++ b/libs/gui/include/gui/BLASTBufferQueue.h
@@ -19,7 +19,7 @@
 
 #include <gui/BufferItem.h>
 #include <gui/BufferItemConsumer.h>
-
+#include <gui/BufferReleaseChannel.h>
 #include <gui/IGraphicBufferProducer.h>
 #include <gui/SurfaceComposerClient.h>
 
@@ -28,7 +28,6 @@
 #include <utils/RefBase.h>
 
 #include <system/window.h>
-#include <thread>
 #include <queue>
 
 #include <com_android_graphics_libgui_flags.h>
@@ -131,6 +130,8 @@
 
     virtual ~BLASTBufferQueue();
 
+    void onFirstRef() override;
+
 private:
     friend class BLASTBufferQueueHelper;
     friend class BBQBufferQueueProducer;
@@ -170,11 +171,23 @@
 
     // BufferQueue internally allows 1 more than
     // the max to be acquired
-    int32_t mMaxAcquiredBuffers = 1;
+    int32_t mMaxAcquiredBuffers GUARDED_BY(mMutex) = 1;
+    int32_t mMaxDequeuedBuffers GUARDED_BY(mMutex) = 1;
+    static constexpr int32_t kMaxBufferCount = BufferQueueDefs::NUM_BUFFER_SLOTS;
 
+    // mNumDequeued is an atomic instead of being guarded by mMutex so that it can be incremented in
+    // onFrameDequeued while avoiding lock contention. updateDequeueShouldBlockLocked is not called
+    // after mNumDequeued is incremented for this reason. This means mDequeueShouldBlock may be
+    // temporarily false when it should be true. This can happen if multiple threads are dequeuing
+    // buffers or if dequeueBuffers is called multiple times in a row without queuing a buffer in
+    // between. As mDequeueShouldBlock is only used for optimization, this is OK.
+    std::atomic_int mNumDequeued;
     int32_t mNumFrameAvailable GUARDED_BY(mMutex) = 0;
     int32_t mNumAcquired GUARDED_BY(mMutex) = 0;
 
+    bool mAsyncMode GUARDED_BY(mMutex) = false;
+    bool mSharedBufferMode GUARDED_BY(mMutex) = false;
+
     // A value used to identify if a producer has been changed for the same SurfaceControl.
     // This is needed to know when the frame number has been reset to make sure we don't
     // latch stale buffers and that we don't wait on barriers from an old producer.
@@ -300,6 +313,41 @@
     std::function<void(const std::string&)> mTransactionHangCallback;
 
     std::unordered_set<uint64_t> mSyncedFrameNumbers GUARDED_BY(mMutex);
+
+    class BufferReleaseReader {
+    public:
+        BufferReleaseReader(std::unique_ptr<gui::BufferReleaseChannel::ConsumerEndpoint>);
+
+        BufferReleaseReader(const BufferReleaseReader&) = delete;
+        BufferReleaseReader& operator=(const BufferReleaseReader&) = delete;
+
+        // Block until we can release a buffer.
+        //
+        // Returns:
+        // * OK if a ReleaseCallbackId and Fence were successfully read.
+        // * TIMED_OUT if the specified timeout was reached.
+        // * WOULD_BLOCK if the blocking read was interrupted by interruptBlockingRead.
+        // * UNKNOWN_ERROR if something went wrong.
+        status_t readBlocking(ReleaseCallbackId&, sp<Fence>&, std::chrono::milliseconds timeout);
+
+        status_t readNonBlocking(ReleaseCallbackId&, sp<Fence>&);
+
+        void interruptBlockingRead();
+
+    private:
+        std::mutex mMutex;
+        std::unique_ptr<gui::BufferReleaseChannel::ConsumerEndpoint> mEndpoint GUARDED_BY(mMutex);
+        android::base::unique_fd mEpollFd;
+        android::base::unique_fd mEventFd;
+    };
+
+    // BufferReleaseChannel is used to communicate buffer releases from SurfaceFlinger to
+    // the client. See BBQBufferQueueProducer::dequeueBuffer for details.
+    std::optional<BufferReleaseReader> mBufferReleaseReader;
+    std::shared_ptr<gui::BufferReleaseChannel::ProducerEndpoint> mBufferReleaseProducer;
+
+    std::atomic_bool mDequeueShouldBlock{false};
+    void updateDequeueShouldBlockLocked() REQUIRES(mMutex);
 };
 
 } // namespace android
diff --git a/libs/gui/include/gui/BufferReleaseChannel.h b/libs/gui/include/gui/BufferReleaseChannel.h
new file mode 100644
index 0000000..cb0b261
--- /dev/null
+++ b/libs/gui/include/gui/BufferReleaseChannel.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <string>
+
+#include <android-base/chrono_utils.h>
+#include <android-base/result.h>
+#include <android-base/unique_fd.h>
+
+#include <binder/IBinder.h>
+#include <binder/Parcelable.h>
+#include <gui/ITransactionCompletedListener.h>
+#include <sys/stat.h>
+#include <ui/Fence.h>
+#include <ui/Transform.h>
+#include <utils/BitSet.h>
+#include <utils/Errors.h>
+#include <utils/RefBase.h>
+#include <utils/Timers.h>
+
+namespace android::gui {
+
+/**
+ * IPC wrapper to pass release fences from SurfaceFlinger to apps via a local unix domain socket.
+ */
+class BufferReleaseChannel {
+private:
+    class Endpoint {
+    public:
+        Endpoint(std::string name, android::base::unique_fd fd)
+              : mName(std::move(name)), mFd(std::move(fd)) {}
+        Endpoint() {}
+
+        Endpoint(Endpoint&&) noexcept = default;
+        Endpoint& operator=(Endpoint&&) noexcept = default;
+
+        Endpoint(const Endpoint&) = delete;
+        void operator=(const Endpoint&) = delete;
+
+        const android::base::unique_fd& getFd() const { return mFd; }
+
+    protected:
+        std::string mName;
+        android::base::unique_fd mFd;
+    };
+
+public:
+    class ConsumerEndpoint : public Endpoint {
+    public:
+        ConsumerEndpoint(std::string name, android::base::unique_fd fd)
+              : Endpoint(std::move(name), std::move(fd)) {}
+
+        /**
+         * Reads a release fence from the BufferReleaseChannel.
+         *
+         * Returns OK on success.
+         * Returns WOULD_BLOCK if there is no fence present.
+         * Other errors probably indicate that the channel is broken.
+         */
+        status_t readReleaseFence(ReleaseCallbackId& outReleaseCallbackId,
+                                  sp<Fence>& outReleaseFence);
+
+    private:
+        std::vector<uint8_t> mFlattenedBuffer;
+    };
+
+    class ProducerEndpoint : public Endpoint, public Parcelable {
+    public:
+        ProducerEndpoint(std::string name, android::base::unique_fd fd)
+              : Endpoint(std::move(name), std::move(fd)) {}
+        ProducerEndpoint() {}
+
+        status_t readFromParcel(const android::Parcel* parcel) override;
+        status_t writeToParcel(android::Parcel* parcel) const override;
+
+        status_t writeReleaseFence(const ReleaseCallbackId&, const sp<Fence>& releaseFence);
+
+    private:
+        std::vector<uint8_t> mFlattenedBuffer;
+    };
+
+    /**
+     * Create two endpoints that make up the BufferReleaseChannel.
+     *
+     * Return OK on success.
+     */
+    static status_t open(const std::string name, std::unique_ptr<ConsumerEndpoint>& outConsumer,
+                         std::shared_ptr<ProducerEndpoint>& outProducer);
+
+    struct Message : public Flattenable<Message> {
+        ReleaseCallbackId releaseCallbackId;
+        sp<Fence> releaseFence = Fence::NO_FENCE;
+
+        Message() = default;
+        Message(ReleaseCallbackId releaseCallbackId, sp<Fence> releaseFence)
+              : releaseCallbackId(releaseCallbackId), releaseFence(std::move(releaseFence)) {}
+
+        // Flattenable protocol
+        size_t getFlattenedSize() const;
+
+        size_t getFdCount() const { return releaseFence->getFdCount(); }
+
+        status_t flatten(void*& buffer, size_t& size, int*& fds, size_t& count) const;
+
+        status_t unflatten(void const*& buffer, size_t& size, int const*& fds, size_t& count);
+
+    private:
+        size_t getPodSize() const;
+    };
+};
+
+} // namespace android::gui
diff --git a/libs/gui/include/gui/LayerState.h b/libs/gui/include/gui/LayerState.h
index 3fb1894..d419945 100644
--- a/libs/gui/include/gui/LayerState.h
+++ b/libs/gui/include/gui/LayerState.h
@@ -34,6 +34,7 @@
 #include <android/gui/TrustedOverlay.h>
 
 #include <ftl/flags.h>
+#include <gui/BufferReleaseChannel.h>
 #include <gui/DisplayCaptureArgs.h>
 #include <gui/ISurfaceComposer.h>
 #include <gui/LayerCaptureArgs.h>
@@ -220,6 +221,7 @@
         eDropInputModeChanged = 0x8000'00000000,
         eExtendedRangeBrightnessChanged = 0x10000'00000000,
         eEdgeExtensionChanged = 0x20000'00000000,
+        eBufferReleaseChannelChanged = 0x40000'00000000,
     };
 
     layer_state_t();
@@ -412,6 +414,8 @@
 
     TrustedPresentationThresholds trustedPresentationThresholds;
     TrustedPresentationListener trustedPresentationListener;
+
+    std::shared_ptr<gui::BufferReleaseChannel::ProducerEndpoint> bufferReleaseChannel;
 };
 
 class ComposerState {
diff --git a/libs/gui/include/gui/SurfaceComposerClient.h b/libs/gui/include/gui/SurfaceComposerClient.h
index 95574ee..8c1d644 100644
--- a/libs/gui/include/gui/SurfaceComposerClient.h
+++ b/libs/gui/include/gui/SurfaceComposerClient.h
@@ -45,6 +45,7 @@
 #include <android/gui/BnJankListener.h>
 #include <android/gui/ISurfaceComposerClient.h>
 
+#include <gui/BufferReleaseChannel.h>
 #include <gui/CpuConsumer.h>
 #include <gui/ISurfaceComposer.h>
 #include <gui/ITransactionCompletedListener.h>
@@ -763,6 +764,10 @@
                                          const Rect& destinationFrame);
         Transaction& setDropInputMode(const sp<SurfaceControl>& sc, gui::DropInputMode mode);
 
+        Transaction& setBufferReleaseChannel(
+                const sp<SurfaceControl>& sc,
+                const std::shared_ptr<gui::BufferReleaseChannel::ProducerEndpoint>& channel);
+
         status_t setDisplaySurface(const sp<IBinder>& token,
                 const sp<IGraphicBufferProducer>& bufferProducer);
 
diff --git a/libs/gui/libgui_flags.aconfig b/libs/gui/libgui_flags.aconfig
index 74d806e..e8f2b76 100644
--- a/libs/gui/libgui_flags.aconfig
+++ b/libs/gui/libgui_flags.aconfig
@@ -51,3 +51,11 @@
   bug: "322036393"
   is_fixed_read_only: true
 } # edge_extension_shader
+
+flag {
+  name: "buffer_release_channel"
+  namespace: "window_surfaces"
+  description: "Enable BufferReleaseChannel to optimize buffer releases"
+  bug: "294133380"
+  is_fixed_read_only: true
+} # buffer_release_channel
diff --git a/libs/gui/tests/Android.bp b/libs/gui/tests/Android.bp
index ea8acbb..daaddfb 100644
--- a/libs/gui/tests/Android.bp
+++ b/libs/gui/tests/Android.bp
@@ -32,6 +32,7 @@
         "BLASTBufferQueue_test.cpp",
         "BufferItemConsumer_test.cpp",
         "BufferQueue_test.cpp",
+        "BufferReleaseChannel_test.cpp",
         "Choreographer_test.cpp",
         "CompositorTiming_test.cpp",
         "CpuConsumer_test.cpp",
diff --git a/libs/gui/tests/BufferReleaseChannel_test.cpp b/libs/gui/tests/BufferReleaseChannel_test.cpp
new file mode 100644
index 0000000..f3e962c
--- /dev/null
+++ b/libs/gui/tests/BufferReleaseChannel_test.cpp
@@ -0,0 +1,121 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <string>
+#include <vector>
+
+#include <gtest/gtest.h>
+#include <gui/BufferReleaseChannel.h>
+
+using namespace std::string_literals;
+using android::gui::BufferReleaseChannel;
+
+namespace android {
+
+namespace {
+
+// Helper function to check if two file descriptors point to the same file.
+bool is_same_file(int fd1, int fd2) {
+    struct stat stat1;
+    if (fstat(fd1, &stat1) != 0) {
+        return false;
+    }
+    struct stat stat2;
+    if (fstat(fd2, &stat2) != 0) {
+        return false;
+    }
+    return (stat1.st_dev == stat2.st_dev) && (stat1.st_ino == stat2.st_ino);
+}
+
+} // namespace
+
+TEST(BufferReleaseChannelTest, MessageFlattenable) {
+    ReleaseCallbackId releaseCallbackId{1, 2};
+    sp<Fence> releaseFence = sp<Fence>::make(memfd_create("fake-fence-fd", 0));
+
+    std::vector<uint8_t> dataBuffer;
+    std::vector<int> fdBuffer;
+
+    // Verify that we can flatten a message
+    {
+        BufferReleaseChannel::Message message{releaseCallbackId, releaseFence};
+
+        dataBuffer.resize(message.getFlattenedSize());
+        void* dataPtr = dataBuffer.data();
+        size_t dataSize = dataBuffer.size();
+
+        fdBuffer.resize(message.getFdCount());
+        int* fdPtr = fdBuffer.data();
+        size_t fdSize = fdBuffer.size();
+
+        ASSERT_EQ(OK, message.flatten(dataPtr, dataSize, fdPtr, fdSize));
+
+        // Fence's unique_fd uses fdsan to check ownership of the file descriptor. Normally the file
+        // descriptor is passed through the Unix socket and duplicated (and sent to another process)
+        // so there's no problem with duplicate file descriptor ownership. For this unit test, we
+        // need to set up a duplicate file descriptor to avoid crashing due to duplicate ownership.
+        ASSERT_EQ(releaseFence->get(), fdBuffer[0]);
+        fdBuffer[0] = message.releaseFence->dup();
+    }
+
+    // Verify that we can unflatten a message
+    {
+        BufferReleaseChannel::Message message;
+
+        const void* dataPtr = dataBuffer.data();
+        size_t dataSize = dataBuffer.size();
+
+        const int* fdPtr = fdBuffer.data();
+        size_t fdSize = fdBuffer.size();
+
+        ASSERT_EQ(OK, message.unflatten(dataPtr, dataSize, fdPtr, fdSize));
+        ASSERT_EQ(releaseCallbackId, message.releaseCallbackId);
+        ASSERT_TRUE(is_same_file(releaseFence->get(), message.releaseFence->get()));
+    }
+}
+
+// Verify that the BufferReleaseChannel consume returns WOULD_BLOCK when there's no message
+// available.
+TEST(BufferReleaseChannelTest, ConsumerEndpointIsNonBlocking) {
+    std::unique_ptr<BufferReleaseChannel::ConsumerEndpoint> consumer;
+    std::shared_ptr<BufferReleaseChannel::ProducerEndpoint> producer;
+    ASSERT_EQ(OK, BufferReleaseChannel::open("test-channel"s, consumer, producer));
+
+    ReleaseCallbackId releaseCallbackId;
+    sp<Fence> releaseFence;
+    ASSERT_EQ(WOULD_BLOCK, consumer->readReleaseFence(releaseCallbackId, releaseFence));
+}
+
+// Verify that we can write a message to the BufferReleaseChannel producer and read that message
+// using the BufferReleaseChannel consumer.
+TEST(BufferReleaseChannelTest, ProduceAndConsume) {
+    std::unique_ptr<BufferReleaseChannel::ConsumerEndpoint> consumer;
+    std::shared_ptr<BufferReleaseChannel::ProducerEndpoint> producer;
+    ASSERT_EQ(OK, BufferReleaseChannel::open("test-channel"s, consumer, producer));
+
+    ReleaseCallbackId producerReleaseCallbackId{1, 2};
+    sp<Fence> producerReleaseFence = sp<Fence>::make(memfd_create("fake-fence-fd", 0));
+    ASSERT_EQ(OK, producer->writeReleaseFence(producerReleaseCallbackId, producerReleaseFence));
+
+    ReleaseCallbackId consumerReleaseCallbackId;
+    sp<Fence> consumerReleaseFence;
+    ASSERT_EQ(OK, consumer->readReleaseFence(consumerReleaseCallbackId, consumerReleaseFence));
+
+    ASSERT_EQ(producerReleaseCallbackId, consumerReleaseCallbackId);
+    ASSERT_TRUE(is_same_file(producerReleaseFence->get(), consumerReleaseFence->get()));
+}
+
+} // namespace android
diff --git a/services/surfaceflinger/Layer.cpp b/services/surfaceflinger/Layer.cpp
index a6d2b15..082d8aa 100644
--- a/services/surfaceflinger/Layer.cpp
+++ b/services/surfaceflinger/Layer.cpp
@@ -2783,11 +2783,14 @@
         return;
     }
     SFTRACE_FORMAT_INSTANT("callReleaseBufferCallback %s - %" PRIu64, getDebugName(), framenumber);
+    ReleaseCallbackId callbackId{buffer->getId(), framenumber};
+    const sp<Fence>& fence = releaseFence ? releaseFence : Fence::NO_FENCE;
     uint32_t currentMaxAcquiredBufferCount =
             mFlinger->getMaxAcquiredBufferCountForCurrentRefreshRate(mOwnerUid);
-    listener->onReleaseBuffer({buffer->getId(), framenumber},
-                              releaseFence ? releaseFence : Fence::NO_FENCE,
-                              currentMaxAcquiredBufferCount);
+    listener->onReleaseBuffer(callbackId, fence, currentMaxAcquiredBufferCount);
+    if (mBufferReleaseChannel) {
+        mBufferReleaseChannel->writeReleaseFence(callbackId, fence);
+    }
 }
 
 sp<CallbackHandle> Layer::findCallbackHandle() {
@@ -2905,6 +2908,7 @@
 
 void Layer::releasePendingBuffer(nsecs_t dequeueReadyTime) {
     for (const auto& handle : mDrawingState.callbackHandles) {
+        handle->bufferReleaseChannel = mBufferReleaseChannel;
         handle->transformHint = mTransformHint;
         handle->dequeueReadyTime = dequeueReadyTime;
         handle->currentMaxAcquiredBufferCount =
@@ -4376,6 +4380,11 @@
     return haveTrustedPresentationListener;
 }
 
+void Layer::setBufferReleaseChannel(
+        const std::shared_ptr<gui::BufferReleaseChannel::ProducerEndpoint>& channel) {
+    mBufferReleaseChannel = channel;
+}
+
 void Layer::updateLastLatchTime(nsecs_t latchTime) {
     mLastLatchTime = latchTime;
 }
diff --git a/services/surfaceflinger/Layer.h b/services/surfaceflinger/Layer.h
index 52f169e..d470738 100644
--- a/services/surfaceflinger/Layer.h
+++ b/services/surfaceflinger/Layer.h
@@ -550,6 +550,7 @@
     };
 
     BufferInfo mBufferInfo;
+    std::shared_ptr<gui::BufferReleaseChannel::ProducerEndpoint> mBufferReleaseChannel;
 
     // implements compositionengine::LayerFE
     const compositionengine::LayerFECompositionState* getCompositionState() const;
@@ -813,6 +814,8 @@
 
     bool setTrustedPresentationInfo(TrustedPresentationThresholds const& thresholds,
                                     TrustedPresentationListener const& listener);
+    void setBufferReleaseChannel(
+            const std::shared_ptr<gui::BufferReleaseChannel::ProducerEndpoint>& channel);
 
     // Creates a new handle each time, so we only expect
     // this to be called once.
diff --git a/services/surfaceflinger/SurfaceFlinger.cpp b/services/surfaceflinger/SurfaceFlinger.cpp
index 1d35730..beb05b9 100644
--- a/services/surfaceflinger/SurfaceFlinger.cpp
+++ b/services/surfaceflinger/SurfaceFlinger.cpp
@@ -5951,6 +5951,10 @@
         }
     }
 
+    if (what & layer_state_t::eBufferReleaseChannelChanged) {
+        layer->setBufferReleaseChannel(s.bufferReleaseChannel);
+    }
+
     const auto& requestedLayerState = mLayerLifecycleManager.getLayerFromId(layer->getSequence());
     bool willPresentCurrentTransaction = requestedLayerState &&
             (requestedLayerState->hasReadyFrame() ||
diff --git a/services/surfaceflinger/TransactionCallbackInvoker.cpp b/services/surfaceflinger/TransactionCallbackInvoker.cpp
index 881bf35..9ea0f5f 100644
--- a/services/surfaceflinger/TransactionCallbackInvoker.cpp
+++ b/services/surfaceflinger/TransactionCallbackInvoker.cpp
@@ -149,6 +149,12 @@
                                                     handle->transformHint,
                                                     handle->currentMaxAcquiredBufferCount,
                                                     eventStats, handle->previousReleaseCallbackId);
+        if (handle->bufferReleaseChannel &&
+            handle->previousReleaseCallbackId != ReleaseCallbackId::INVALID_ID) {
+            mBufferReleases.emplace_back(handle->bufferReleaseChannel,
+                                         handle->previousReleaseCallbackId,
+                                         handle->previousReleaseFence);
+        }
     }
     return NO_ERROR;
 }
@@ -158,6 +164,11 @@
 }
 
 void TransactionCallbackInvoker::sendCallbacks(bool onCommitOnly) {
+    for (const auto& bufferRelease : mBufferReleases) {
+        bufferRelease.channel->writeReleaseFence(bufferRelease.callbackId, bufferRelease.fence);
+    }
+    mBufferReleases.clear();
+
     // For each listener
     auto completedTransactionsItr = mCompletedTransactions.begin();
     ftl::SmallVector<ListenerStats, 10> listenerStatsToSend;
diff --git a/services/surfaceflinger/TransactionCallbackInvoker.h b/services/surfaceflinger/TransactionCallbackInvoker.h
index 7853a9f..f74f964 100644
--- a/services/surfaceflinger/TransactionCallbackInvoker.h
+++ b/services/surfaceflinger/TransactionCallbackInvoker.h
@@ -28,6 +28,7 @@
 #include <android-base/thread_annotations.h>
 #include <binder/IBinder.h>
 #include <ftl/future.h>
+#include <gui/BufferReleaseChannel.h>
 #include <gui/ITransactionCompletedListener.h>
 #include <ui/Fence.h>
 #include <ui/FenceResult.h>
@@ -59,6 +60,7 @@
     uint64_t frameNumber = 0;
     uint64_t previousFrameNumber = 0;
     ReleaseCallbackId previousReleaseCallbackId = ReleaseCallbackId::INVALID_ID;
+    std::shared_ptr<gui::BufferReleaseChannel::ProducerEndpoint> bufferReleaseChannel;
 };
 
 class TransactionCallbackInvoker {
@@ -86,6 +88,13 @@
     std::unordered_map<sp<IBinder>, std::deque<TransactionStats>, IListenerHash>
         mCompletedTransactions;
 
+    struct BufferRelease {
+        std::shared_ptr<gui::BufferReleaseChannel::ProducerEndpoint> channel;
+        ReleaseCallbackId callbackId;
+        sp<Fence> fence;
+    };
+    std::vector<BufferRelease> mBufferReleases;
+
     sp<Fence> mPresentFence;
 };