Merge "Prevent onHandleDestroyed from being called w/ mStateLock" into qt-dev
diff --git a/libs/gui/Android.bp b/libs/gui/Android.bp
index fcbfba9..f435d98 100644
--- a/libs/gui/Android.bp
+++ b/libs/gui/Android.bp
@@ -61,6 +61,7 @@
         "DisplayEventReceiver.cpp",
         "FrameTimestamps.cpp",
         "GLConsumer.cpp",
+        "GLConsumerUtils.cpp",
         "GuiConfig.cpp",
         "HdrMetadata.cpp",
         "IDisplayEventConnection.cpp",
@@ -170,4 +171,107 @@
     ],
 }
 
+// Used by media codec services exclusively as a static lib for
+// core bufferqueuesupport only.
+cc_library_static {
+    name: "libgui_bufferqueue_static",
+    vendor_available: true,
+
+    clang: true,
+    cflags: [
+        "-Wall",
+        "-Werror",
+        "-DNO_BUFFERHUB",
+    ],
+
+    cppflags: [
+        "-Wextra",
+        "-DDEBUG_ONLY_CODE=0",
+    ],
+
+    product_variables: {
+        eng: {
+            cppflags: [
+                "-UDEBUG_ONLY_CODE",
+                "-DDEBUG_ONLY_CODE=1",
+            ],
+        },
+    },
+
+    srcs: [
+        "BufferItem.cpp",
+        "BufferQueue.cpp",
+        "BufferQueueConsumer.cpp",
+        "BufferQueueCore.cpp",
+        "BufferQueueProducer.cpp",
+        "BufferQueueThreadState.cpp",
+        "BufferSlot.cpp",
+        "FrameTimestamps.cpp",
+        "GLConsumerUtils.cpp",
+        "HdrMetadata.cpp",
+        "IConsumerListener.cpp",
+        "IGraphicBufferConsumer.cpp",
+        "IGraphicBufferProducer.cpp",
+        "IProducerListener.cpp",
+        "OccupancyTracker.cpp",
+        "bufferqueue/1.0/B2HProducerListener.cpp",
+        "bufferqueue/1.0/Conversion.cpp",
+        "bufferqueue/1.0/H2BGraphicBufferProducer.cpp",
+        "bufferqueue/1.0/H2BProducerListener.cpp",
+        "bufferqueue/1.0/WProducerListener.cpp",
+        "bufferqueue/2.0/B2HGraphicBufferProducer.cpp",
+        "bufferqueue/2.0/B2HProducerListener.cpp",
+        "bufferqueue/2.0/H2BGraphicBufferProducer.cpp",
+        "bufferqueue/2.0/H2BProducerListener.cpp",
+        "bufferqueue/2.0/types.cpp",
+    ],
+
+    shared_libs: [
+        "android.hardware.graphics.bufferqueue@1.0",
+        "android.hardware.graphics.bufferqueue@2.0",
+        "android.hardware.graphics.common@1.1",
+        "android.hardware.graphics.common@1.2",
+        "android.hidl.token@1.0-utils",
+        "libbase",
+        "libbinder",
+        "libcutils",
+        "libEGL",
+        "libGLESv2",
+        "libhidlbase",
+        "libhidltransport",
+        "libhwbinder",
+        "liblog",
+        "libnativewindow",
+        "libsync",
+        "libui",
+        "libutils",
+        "libvndksupport",
+    ],
+
+    header_libs: [
+        "libgui_headers",
+        "libnativebase_headers",
+    ],
+
+    export_shared_lib_headers: [
+        "libbinder",
+        "libEGL",
+        "libnativewindow",
+        "libui",
+        "android.hardware.graphics.bufferqueue@1.0",
+        "android.hardware.graphics.bufferqueue@2.0",
+        "android.hardware.graphics.common@1.1",
+        "android.hardware.graphics.common@1.2",
+        "android.hidl.token@1.0-utils",
+    ],
+
+    export_header_lib_headers: [
+        "libgui_headers",
+    ],
+
+    export_include_dirs: [
+        "include",
+    ],
+}
+
 subdirs = ["tests"]
diff --git a/libs/gui/BufferQueueConsumer.cpp b/libs/gui/BufferQueueConsumer.cpp
index e226136..528bfb1 100644
--- a/libs/gui/BufferQueueConsumer.cpp
+++ b/libs/gui/BufferQueueConsumer.cpp
@@ -94,8 +94,6 @@
         // Skip this if we're in shared buffer mode and the queue is empty,
         // since in that case we'll just return the shared buffer.
         if (expectedPresent != 0 && !mCore->mQueue.empty()) {
-            const int MAX_REASONABLE_NSEC = 1000000000ULL; // 1 second
-
             // The 'expectedPresent' argument indicates when the buffer is expected
             // to be presented on-screen. If the buffer's desired present time is
             // earlier (less) than expectedPresent -- meaning it will be displayed
diff --git a/libs/gui/GLConsumer.cpp b/libs/gui/GLConsumer.cpp
index faf02f3..8d66154 100644
--- a/libs/gui/GLConsumer.cpp
+++ b/libs/gui/GLConsumer.cpp
@@ -745,104 +745,6 @@
         mCurrentTransform, mFilteringEnabled);
 }
 
-void GLConsumer::computeTransformMatrix(float outTransform[16],
-        const sp<GraphicBuffer>& buf, const Rect& cropRect, uint32_t transform,
-        bool filtering) {
-    // Transform matrices
-    static const mat4 mtxFlipH(
-        -1, 0, 0, 0,
-        0, 1, 0, 0,
-        0, 0, 1, 0,
-        1, 0, 0, 1
-    );
-    static const mat4 mtxFlipV(
-        1, 0, 0, 0,
-        0, -1, 0, 0,
-        0, 0, 1, 0,
-        0, 1, 0, 1
-    );
-    static const mat4 mtxRot90(
-        0, 1, 0, 0,
-        -1, 0, 0, 0,
-        0, 0, 1, 0,
-        1, 0, 0, 1
-    );
-
-    mat4 xform;
-    if (transform & NATIVE_WINDOW_TRANSFORM_FLIP_H) {
-        xform *= mtxFlipH;
-    }
-    if (transform & NATIVE_WINDOW_TRANSFORM_FLIP_V) {
-        xform *= mtxFlipV;
-    }
-    if (transform & NATIVE_WINDOW_TRANSFORM_ROT_90) {
-        xform *= mtxRot90;
-    }
-
-    if (!cropRect.isEmpty()) {
-        float tx = 0.0f, ty = 0.0f, sx = 1.0f, sy = 1.0f;
-        float bufferWidth = buf->getWidth();
-        float bufferHeight = buf->getHeight();
-        float shrinkAmount = 0.0f;
-        if (filtering) {
-            // In order to prevent bilinear sampling beyond the edge of the
-            // crop rectangle we may need to shrink it by 2 texels in each
-            // dimension.  Normally this would just need to take 1/2 a texel
-            // off each end, but because the chroma channels of YUV420 images
-            // are subsampled we may need to shrink the crop region by a whole
-            // texel on each side.
-            switch (buf->getPixelFormat()) {
-                case PIXEL_FORMAT_RGBA_8888:
-                case PIXEL_FORMAT_RGBX_8888:
-                case PIXEL_FORMAT_RGBA_FP16:
-                case PIXEL_FORMAT_RGBA_1010102:
-                case PIXEL_FORMAT_RGB_888:
-                case PIXEL_FORMAT_RGB_565:
-                case PIXEL_FORMAT_BGRA_8888:
-                    // We know there's no subsampling of any channels, so we
-                    // only need to shrink by a half a pixel.
-                    shrinkAmount = 0.5;
-                    break;
-
-                default:
-                    // If we don't recognize the format, we must assume the
-                    // worst case (that we care about), which is YUV420.
-                    shrinkAmount = 1.0;
-                    break;
-            }
-        }
-
-        // Only shrink the dimensions that are not the size of the buffer.
-        if (cropRect.width() < bufferWidth) {
-            tx = (float(cropRect.left) + shrinkAmount) / bufferWidth;
-            sx = (float(cropRect.width()) - (2.0f * shrinkAmount)) /
-                    bufferWidth;
-        }
-        if (cropRect.height() < bufferHeight) {
-            ty = (float(bufferHeight - cropRect.bottom) + shrinkAmount) /
-                    bufferHeight;
-            sy = (float(cropRect.height()) - (2.0f * shrinkAmount)) /
-                    bufferHeight;
-        }
-
-        mat4 crop(
-            sx, 0, 0, 0,
-            0, sy, 0, 0,
-            0, 0, 1, 0,
-            tx, ty, 0, 1
-        );
-        xform = crop * xform;
-    }
-
-    // GLConsumer uses the GL convention where (0, 0) is the bottom-left
-    // corner and (1, 1) is the top-right corner.  Add an additional vertical
-    // flip after all other transforms to map from GL convention to buffer
-    // queue memory layout, where (0, 0) is the top-left corner.
-    xform = mtxFlipV * xform;
-
-    memcpy(outTransform, xform.asArray(), sizeof(xform));
-}
-
 Rect GLConsumer::scaleDownCrop(const Rect& crop, uint32_t bufferWidth, uint32_t bufferHeight) {
     Rect outCrop = crop;
 
diff --git a/libs/gui/GLConsumerUtils.cpp b/libs/gui/GLConsumerUtils.cpp
new file mode 100644
index 0000000..7a06c3d
--- /dev/null
+++ b/libs/gui/GLConsumerUtils.cpp
@@ -0,0 +1,125 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define LOG_TAG "GLConsumerUtils"
+//#define LOG_NDEBUG 0
+
+#include <gui/GLConsumer.h>
+#include <math/mat4.h>
+#include <system/window.h>
+#include <utils/Log.h>
+
+namespace android {
+
+void GLConsumer::computeTransformMatrix(float outTransform[16],
+        const sp<GraphicBuffer>& buf, const Rect& cropRect, uint32_t transform,
+        bool filtering) {
+    // Transform matrices
+    static const mat4 mtxFlipH(
+        -1, 0, 0, 0,
+        0, 1, 0, 0,
+        0, 0, 1, 0,
+        1, 0, 0, 1
+    );
+    static const mat4 mtxFlipV(
+        1, 0, 0, 0,
+        0, -1, 0, 0,
+        0, 0, 1, 0,
+        0, 1, 0, 1
+    );
+    static const mat4 mtxRot90(
+        0, 1, 0, 0,
+        -1, 0, 0, 0,
+        0, 0, 1, 0,
+        1, 0, 0, 1
+    );
+
+    mat4 xform;
+    if (transform & NATIVE_WINDOW_TRANSFORM_FLIP_H) {
+        xform *= mtxFlipH;
+    }
+    if (transform & NATIVE_WINDOW_TRANSFORM_FLIP_V) {
+        xform *= mtxFlipV;
+    }
+    if (transform & NATIVE_WINDOW_TRANSFORM_ROT_90) {
+        xform *= mtxRot90;
+    }
+
+    if (!cropRect.isEmpty()) {
+        float tx = 0.0f, ty = 0.0f, sx = 1.0f, sy = 1.0f;
+        float bufferWidth = buf->getWidth();
+        float bufferHeight = buf->getHeight();
+        float shrinkAmount = 0.0f;
+        if (filtering) {
+            // In order to prevent bilinear sampling beyond the edge of the
+            // crop rectangle we may need to shrink it by 2 texels in each
+            // dimension.  Normally this would just need to take 1/2 a texel
+            // off each end, but because the chroma channels of YUV420 images
+            // are subsampled we may need to shrink the crop region by a whole
+            // texel on each side.
+            switch (buf->getPixelFormat()) {
+                case PIXEL_FORMAT_RGBA_8888:
+                case PIXEL_FORMAT_RGBX_8888:
+                case PIXEL_FORMAT_RGBA_FP16:
+                case PIXEL_FORMAT_RGBA_1010102:
+                case PIXEL_FORMAT_RGB_888:
+                case PIXEL_FORMAT_RGB_565:
+                case PIXEL_FORMAT_BGRA_8888:
+                    // We know there's no subsampling of any channels, so we
+                    // only need to shrink by a half a pixel.
+                    shrinkAmount = 0.5;
+                    break;
+
+                default:
+                    // If we don't recognize the format, we must assume the
+                    // worst case (that we care about), which is YUV420.
+                    shrinkAmount = 1.0;
+                    break;
+            }
+        }
+
+        // Only shrink the dimensions that are not the size of the buffer.
+        if (cropRect.width() < bufferWidth) {
+            tx = (float(cropRect.left) + shrinkAmount) / bufferWidth;
+            sx = (float(cropRect.width()) - (2.0f * shrinkAmount)) /
+                    bufferWidth;
+        }
+        if (cropRect.height() < bufferHeight) {
+            ty = (float(bufferHeight - cropRect.bottom) + shrinkAmount) /
+                    bufferHeight;
+            sy = (float(cropRect.height()) - (2.0f * shrinkAmount)) /
+                    bufferHeight;
+        }
+
+        mat4 crop(
+            sx, 0, 0, 0,
+            0, sy, 0, 0,
+            0, 0, 1, 0,
+            tx, ty, 0, 1
+        );
+        xform = crop * xform;
+    }
+
+    // GLConsumer uses the GL convention where (0, 0) is the bottom-left
+    // corner and (1, 1) is the top-right corner.  Add an additional vertical
+    // flip after all other transforms to map from GL convention to buffer
+    // queue memory layout, where (0, 0) is the top-left corner.
+    xform = mtxFlipV * xform;
+
+    memcpy(outTransform, xform.asArray(), sizeof(xform));
+}
+
+}; // namespace android
diff --git a/libs/gui/include/gui/BufferQueueConsumer.h b/libs/gui/include/gui/BufferQueueConsumer.h
index aa13c0c..7db69ec 100644
--- a/libs/gui/include/gui/BufferQueueConsumer.h
+++ b/libs/gui/include/gui/BufferQueueConsumer.h
@@ -171,6 +171,9 @@
 
     // End functions required for backwards compatibility
 
+    // Value used to determine if present time is valid.
+    constexpr static int MAX_REASONABLE_NSEC = 1'000'000'000ULL; // 1 second
+
 private:
     sp<BufferQueueCore> mCore;
 
diff --git a/libs/renderengine/gl/GLESRenderEngine.cpp b/libs/renderengine/gl/GLESRenderEngine.cpp
index 48cb620..efe4bdf 100644
--- a/libs/renderengine/gl/GLESRenderEngine.cpp
+++ b/libs/renderengine/gl/GLESRenderEngine.cpp
@@ -1311,7 +1311,9 @@
     StringAppendF(&result, "GLES: %s, %s, %s\n", extensions.getVendor(), extensions.getRenderer(),
                   extensions.getVersion());
     StringAppendF(&result, "%s\n", extensions.getExtensions());
-    StringAppendF(&result, "RenderEngine is in protected context : %d\n", mInProtectedContext);
+    StringAppendF(&result, "RenderEngine supports protected context: %d\n",
+                  supportsProtectedContent());
+    StringAppendF(&result, "RenderEngine is in protected context: %d\n", mInProtectedContext);
     StringAppendF(&result, "RenderEngine program cache size for unprotected context: %zu\n",
                   cache.getSize(mEGLContext));
     StringAppendF(&result, "RenderEngine program cache size for protected context: %zu\n",
diff --git a/libs/ui/GraphicBufferAllocator.cpp b/libs/ui/GraphicBufferAllocator.cpp
index 5a67dc4..0861a1f 100644
--- a/libs/ui/GraphicBufferAllocator.cpp
+++ b/libs/ui/GraphicBufferAllocator.cpp
@@ -60,6 +60,15 @@
 
 GraphicBufferAllocator::~GraphicBufferAllocator() {}
 
+size_t GraphicBufferAllocator::getTotalSize() const {
+    Mutex::Autolock _l(sLock);
+    size_t total = 0;
+    for (size_t i = 0; i < sAllocList.size(); ++i) {
+        total += sAllocList.valueAt(i).size;
+    }
+    return total;
+}
+
 void GraphicBufferAllocator::dump(std::string& result) const {
     Mutex::Autolock _l(sLock);
     KeyedVector<buffer_handle_t, alloc_rec_t>& list(sAllocList);
diff --git a/libs/ui/include/ui/GraphicBufferAllocator.h b/libs/ui/include/ui/GraphicBufferAllocator.h
index 3a547b6..25d4512 100644
--- a/libs/ui/include/ui/GraphicBufferAllocator.h
+++ b/libs/ui/include/ui/GraphicBufferAllocator.h
@@ -49,6 +49,8 @@
 
     status_t free(buffer_handle_t handle);
 
+    size_t getTotalSize() const;
+
     void dump(std::string& res) const;
     static void dumpToSystemLog();
 
diff --git a/services/surfaceflinger/BufferLayer.cpp b/services/surfaceflinger/BufferLayer.cpp
index 4ea587d..06caf1e 100644
--- a/services/surfaceflinger/BufferLayer.cpp
+++ b/services/surfaceflinger/BufferLayer.cpp
@@ -534,11 +534,13 @@
 
 // transaction
 void BufferLayer::notifyAvailableFrames() {
-    auto headFrameNumber = getHeadFrameNumber();
-    bool headFenceSignaled = fenceHasSignaled();
+    const auto headFrameNumber = getHeadFrameNumber();
+    const bool headFenceSignaled = fenceHasSignaled();
+    const bool presentTimeIsCurrent = framePresentTimeIsCurrent();
     Mutex::Autolock lock(mLocalSyncPointMutex);
     for (auto& point : mLocalSyncPoints) {
-        if (headFrameNumber >= point->getFrameNumber() && headFenceSignaled) {
+        if (headFrameNumber >= point->getFrameNumber() && headFenceSignaled &&
+            presentTimeIsCurrent) {
             point->setFrameAvailable();
         }
     }
diff --git a/services/surfaceflinger/BufferLayer.h b/services/surfaceflinger/BufferLayer.h
index dc103cb..b679380 100644
--- a/services/surfaceflinger/BufferLayer.h
+++ b/services/surfaceflinger/BufferLayer.h
@@ -114,6 +114,7 @@
     // -----------------------------------------------------------------------
 private:
     virtual bool fenceHasSignaled() const = 0;
+    virtual bool framePresentTimeIsCurrent() const = 0;
 
     virtual nsecs_t getDesiredPresentTime() = 0;
     virtual std::shared_ptr<FenceTime> getCurrentFenceTime() const = 0;
diff --git a/services/surfaceflinger/BufferQueueLayer.cpp b/services/surfaceflinger/BufferQueueLayer.cpp
index ff5f271..5d729f5 100644
--- a/services/surfaceflinger/BufferQueueLayer.cpp
+++ b/services/surfaceflinger/BufferQueueLayer.cpp
@@ -19,6 +19,7 @@
 #include <compositionengine/OutputLayer.h>
 #include <compositionengine/impl/LayerCompositionState.h>
 #include <compositionengine/impl/OutputLayerCompositionState.h>
+#include <gui/BufferQueueConsumer.h>
 #include <system/window.h>
 
 #include "BufferQueueLayer.h"
@@ -133,6 +134,15 @@
     return mQueueItems[0].mFenceTime->getSignalTime() != Fence::SIGNAL_TIME_PENDING;
 }
 
+bool BufferQueueLayer::framePresentTimeIsCurrent() const {
+    if (!hasFrameUpdate() || isRemovedFromCurrentState()) {
+        return true;
+    }
+
+    Mutex::Autolock lock(mQueueItemLock);
+    return mQueueItems[0].mTimestamp <= mFlinger->mScheduler->expectedPresentTime();
+}
+
 nsecs_t BufferQueueLayer::getDesiredPresentTime() {
     return mConsumer->getTimestamp();
 }
@@ -185,7 +195,37 @@
 
 uint64_t BufferQueueLayer::getFrameNumber() const {
     Mutex::Autolock lock(mQueueItemLock);
-    return mQueueItems[0].mFrameNumber;
+    uint64_t frameNumber = mQueueItems[0].mFrameNumber;
+
+    // The head of the queue will be dropped if there are signaled and timely frames behind it
+    nsecs_t expectedPresentTime = mFlinger->mScheduler->expectedPresentTime();
+
+    if (isRemovedFromCurrentState()) {
+        expectedPresentTime = 0;
+    }
+
+    for (int i = 1; i < mQueueItems.size(); i++) {
+        const bool fenceSignaled =
+                mQueueItems[i].mFenceTime->getSignalTime() != Fence::SIGNAL_TIME_PENDING;
+        if (!fenceSignaled) {
+            break;
+        }
+
+        // We don't drop frames without explicit timestamps
+        if (mQueueItems[i].mIsAutoTimestamp) {
+            break;
+        }
+
+        const nsecs_t desiredPresent = mQueueItems[i].mTimestamp;
+        if (desiredPresent < expectedPresentTime - BufferQueueConsumer::MAX_REASONABLE_NSEC ||
+            desiredPresent > expectedPresentTime) {
+            break;
+        }
+
+        frameNumber = mQueueItems[i].mFrameNumber;
+    }
+
+    return frameNumber;
 }
 
 bool BufferQueueLayer::getAutoRefresh() const {
diff --git a/services/surfaceflinger/BufferQueueLayer.h b/services/surfaceflinger/BufferQueueLayer.h
index a2aad17..7def33a 100644
--- a/services/surfaceflinger/BufferQueueLayer.h
+++ b/services/surfaceflinger/BufferQueueLayer.h
@@ -61,6 +61,7 @@
     // -----------------------------------------------------------------------
 public:
     bool fenceHasSignaled() const override;
+    bool framePresentTimeIsCurrent() const override;
 
 private:
     nsecs_t getDesiredPresentTime() override;
diff --git a/services/surfaceflinger/BufferStateLayer.cpp b/services/surfaceflinger/BufferStateLayer.cpp
index 6ef4518..a740afb 100644
--- a/services/surfaceflinger/BufferStateLayer.cpp
+++ b/services/surfaceflinger/BufferStateLayer.cpp
@@ -374,6 +374,14 @@
     return getDrawingState().acquireFence->getStatus() == Fence::Status::Signaled;
 }
 
+bool BufferStateLayer::framePresentTimeIsCurrent() const {
+    if (!hasFrameUpdate() || isRemovedFromCurrentState()) {
+        return true;
+    }
+
+    return mDesiredPresentTime <= mFlinger->mScheduler->expectedPresentTime();
+}
+
 nsecs_t BufferStateLayer::getDesiredPresentTime() {
     return mDesiredPresentTime;
 }
diff --git a/services/surfaceflinger/BufferStateLayer.h b/services/surfaceflinger/BufferStateLayer.h
index 13186dd..4e2bc45 100644
--- a/services/surfaceflinger/BufferStateLayer.h
+++ b/services/surfaceflinger/BufferStateLayer.h
@@ -101,6 +101,7 @@
     // Interface implementation for BufferLayer
     // -----------------------------------------------------------------------
     bool fenceHasSignaled() const override;
+    bool framePresentTimeIsCurrent() const override;
 
 private:
     nsecs_t getDesiredPresentTime() override;
diff --git a/services/surfaceflinger/CompositionEngine/src/Output.cpp b/services/surfaceflinger/CompositionEngine/src/Output.cpp
index 2893031..01b5781 100644
--- a/services/surfaceflinger/CompositionEngine/src/Output.cpp
+++ b/services/surfaceflinger/CompositionEngine/src/Output.cpp
@@ -89,14 +89,14 @@
 }
 
 void Output::setColorTransform(const mat4& transform) {
+    if (mState.colorTransformMat == transform) {
+        return;
+    }
+
     const bool isIdentity = (transform == mat4());
     const auto newColorTransform =
             isIdentity ? HAL_COLOR_TRANSFORM_IDENTITY : HAL_COLOR_TRANSFORM_ARBITRARY_MATRIX;
 
-    if (mState.colorTransform == newColorTransform) {
-        return;
-    }
-
     mState.colorTransform = newColorTransform;
     mState.colorTransformMat = transform;
 
diff --git a/services/surfaceflinger/CompositionEngine/tests/OutputTest.cpp b/services/surfaceflinger/CompositionEngine/tests/OutputTest.cpp
index a84af3a..fee0c11 100644
--- a/services/surfaceflinger/CompositionEngine/tests/OutputTest.cpp
+++ b/services/surfaceflinger/CompositionEngine/tests/OutputTest.cpp
@@ -172,16 +172,29 @@
     mOutput.setColorTransform(identity);
 
     EXPECT_EQ(HAL_COLOR_TRANSFORM_IDENTITY, mOutput.getState().colorTransform);
+    EXPECT_EQ(identity, mOutput.getState().colorTransformMat);
 
     // Since identity is the default, the dirty region should be unchanged (empty)
     EXPECT_THAT(mOutput.getState().dirtyRegion, RegionEq(Region()));
 
     // Non-identity matrix sets a non-identity state value
-    const mat4 nonIdentity = mat4() * 2;
+    const mat4 nonIdentityHalf = mat4() * 0.5;
 
-    mOutput.setColorTransform(nonIdentity);
+    mOutput.setColorTransform(nonIdentityHalf);
 
     EXPECT_EQ(HAL_COLOR_TRANSFORM_ARBITRARY_MATRIX, mOutput.getState().colorTransform);
+    EXPECT_EQ(nonIdentityHalf, mOutput.getState().colorTransformMat);
+
+    // Since this is a state change, the entire output should now be dirty.
+    EXPECT_THAT(mOutput.getState().dirtyRegion, RegionEq(Region(kDefaultDisplaySize)));
+
+    // Non-identity matrix sets a non-identity state value
+    const mat4 nonIdentityQuarter = mat4() * 0.25;
+
+    mOutput.setColorTransform(nonIdentityQuarter);
+
+    EXPECT_EQ(HAL_COLOR_TRANSFORM_ARBITRARY_MATRIX, mOutput.getState().colorTransform);
+    EXPECT_EQ(nonIdentityQuarter, mOutput.getState().colorTransformMat);
 
     // Since this is a state change, the entire output should now be dirty.
     EXPECT_THAT(mOutput.getState().dirtyRegion, RegionEq(Region(kDefaultDisplaySize)));
diff --git a/services/surfaceflinger/Layer.cpp b/services/surfaceflinger/Layer.cpp
index 46ca0b6..cbe8b29 100644
--- a/services/surfaceflinger/Layer.cpp
+++ b/services/surfaceflinger/Layer.cpp
@@ -130,7 +130,7 @@
     }
 
     mFrameTracker.logAndResetStats(mName);
-    mFlinger->onLayerDestroyed();
+    mFlinger->onLayerDestroyed(this);
 }
 
 // ---------------------------------------------------------------------------
diff --git a/services/surfaceflinger/Scheduler/PhaseOffsets.cpp b/services/surfaceflinger/Scheduler/PhaseOffsets.cpp
index 16f6729..276bce1 100644
--- a/services/surfaceflinger/Scheduler/PhaseOffsets.cpp
+++ b/services/surfaceflinger/Scheduler/PhaseOffsets.cpp
@@ -65,6 +65,13 @@
     property_get("debug.sf.high_fps_late_sf_phase_offset_ns", value, "1000000");
     const int highFpsLateSfOffsetNs = atoi(value);
 
+    // Below defines the threshold when an offset is considered to be negative, i.e. targeting
+    // for the N+2 vsync instead of N+1. This means that:
+    // For offset < threshold, SF wake up (vsync_duration - offset) before HW vsync.
+    // For offset >= threshold, SF wake up (2 * vsync_duration - offset) before HW vsync.
+    property_get("debug.sf.phase_offset_threshold_for_next_vsync_ns", value, "-1");
+    const int phaseOffsetThresholdForNextVsyncNs = atoi(value);
+
     mDefaultRefreshRateOffsets.early = {earlySfOffsetNs != -1 ? earlySfOffsetNs
                                                               : sfVsyncPhaseOffsetNs,
                                         earlyAppOffsetNs != -1 ? earlyAppOffsetNs
@@ -84,6 +91,10 @@
                                        highFpsEarlyGlAppOffsetNs != -1 ? highFpsEarlyGlAppOffsetNs
                                                                        : highFpsLateAppOffsetNs};
     mHighRefreshRateOffsets.late = {highFpsLateSfOffsetNs, highFpsLateAppOffsetNs};
+
+    mOffsetThresholdForNextVsync = phaseOffsetThresholdForNextVsyncNs != -1
+            ? phaseOffsetThresholdForNextVsyncNs
+            : std::numeric_limits<nsecs_t>::max();
 }
 
 PhaseOffsets::Offsets PhaseOffsets::getOffsetsForRefreshRate(
diff --git a/services/surfaceflinger/Scheduler/PhaseOffsets.h b/services/surfaceflinger/Scheduler/PhaseOffsets.h
index 08747a5..dc71e6e 100644
--- a/services/surfaceflinger/Scheduler/PhaseOffsets.h
+++ b/services/surfaceflinger/Scheduler/PhaseOffsets.h
@@ -46,6 +46,7 @@
             RefreshRateConfigs::RefreshRateType refreshRateType) const = 0;
     virtual Offsets getCurrentOffsets() const = 0;
     virtual void setRefreshRateType(RefreshRateConfigs::RefreshRateType refreshRateType) = 0;
+    virtual nsecs_t getOffsetThresholdForNextVsync() const = 0;
     virtual void dump(std::string& result) const = 0;
 };
 
@@ -72,6 +73,8 @@
         mRefreshRateType = refreshRateType;
     }
 
+    nsecs_t getOffsetThresholdForNextVsync() const override { return mOffsetThresholdForNextVsync; }
+
     // Returns current offsets in human friendly format.
     void dump(std::string& result) const override;
 
@@ -84,6 +87,7 @@
 
     Offsets mDefaultRefreshRateOffsets;
     Offsets mHighRefreshRateOffsets;
+    nsecs_t mOffsetThresholdForNextVsync;
 };
 } // namespace impl
 
diff --git a/services/surfaceflinger/Scheduler/VSyncModulator.h b/services/surfaceflinger/Scheduler/VSyncModulator.h
index 1a0de08..81a7864 100644
--- a/services/surfaceflinger/Scheduler/VSyncModulator.h
+++ b/services/surfaceflinger/Scheduler/VSyncModulator.h
@@ -139,6 +139,19 @@
         }
     }
 
+    Offsets getOffsets() {
+        // Early offsets are used if we're in the middle of a refresh rate
+        // change, or if we recently begin a transaction.
+        if (mTransactionStart == Scheduler::TransactionStart::EARLY ||
+            mRemainingEarlyFrameCount > 0 || mRefreshRateChangePending) {
+            return mEarlyOffsets;
+        } else if (mLastFrameUsedRenderEngine) {
+            return mEarlyGlOffsets;
+        } else {
+            return mLateOffsets;
+        }
+    }
+
 private:
     void updateOffsets() {
         const Offsets desired = getOffsets();
@@ -167,19 +180,6 @@
         }
     }
 
-    Offsets getOffsets() {
-        // Early offsets are used if we're in the middle of a refresh rate
-        // change, or if we recently begin a transaction.
-        if (mTransactionStart == Scheduler::TransactionStart::EARLY ||
-            mRemainingEarlyFrameCount > 0 || mRefreshRateChangePending) {
-            return mEarlyOffsets;
-        } else if (mLastFrameUsedRenderEngine) {
-            return mEarlyGlOffsets;
-        } else {
-            return mLateOffsets;
-        }
-    }
-
     Offsets mLateOffsets;
     Offsets mEarlyOffsets;
     Offsets mEarlyGlOffsets;
diff --git a/services/surfaceflinger/SurfaceFlinger.cpp b/services/surfaceflinger/SurfaceFlinger.cpp
index 37912d8..fc0d5fc 100644
--- a/services/surfaceflinger/SurfaceFlinger.cpp
+++ b/services/surfaceflinger/SurfaceFlinger.cpp
@@ -977,8 +977,7 @@
 bool SurfaceFlinger::performSetActiveConfig() {
     ATRACE_CALL();
     if (mCheckPendingFence) {
-        if (mPreviousPresentFence != Fence::NO_FENCE &&
-            (mPreviousPresentFence->getStatus() == Fence::Status::Unsignaled)) {
+        if (previousFrameMissed()) {
             // fence has not signaled yet. wait for the next invalidate
             mEventQueue->invalidateForHWC();
             return true;
@@ -1587,12 +1586,23 @@
     setTransactionFlags(eDisplayTransactionNeeded);
 }
 
+bool SurfaceFlinger::previousFrameMissed() NO_THREAD_SAFETY_ANALYSIS {
+    // We are storing the last 2 present fences. If sf's phase offset is to be
+    // woken up before the actual vsync but targeting the next vsync, we need to check
+    // fence N-2
+    const sp<Fence>& fence =
+            mVsyncModulator.getOffsets().sf < mPhaseOffsets->getOffsetThresholdForNextVsync()
+            ? mPreviousPresentFences[0]
+            : mPreviousPresentFences[1];
+
+    return fence != Fence::NO_FENCE && (fence->getStatus() == Fence::Status::Unsignaled);
+}
+
 void SurfaceFlinger::onMessageReceived(int32_t what) NO_THREAD_SAFETY_ANALYSIS {
     ATRACE_CALL();
     switch (what) {
         case MessageQueue::INVALIDATE: {
-            bool frameMissed = mPreviousPresentFence != Fence::NO_FENCE &&
-                    (mPreviousPresentFence->getStatus() == Fence::Status::Unsignaled);
+            bool frameMissed = previousFrameMissed();
             bool hwcFrameMissed = mHadDeviceComposition && frameMissed;
             bool gpuFrameMissed = mHadClientComposition && frameMissed;
             ATRACE_INT("FrameMissed", static_cast<int>(frameMissed));
@@ -1982,9 +1992,11 @@
     }
 
     getBE().mDisplayTimeline.updateSignalTimes();
-    mPreviousPresentFence = displayDevice ? getHwComposer().getPresentFence(*displayDevice->getId())
-                                          : Fence::NO_FENCE;
-    auto presentFenceTime = std::make_shared<FenceTime>(mPreviousPresentFence);
+    mPreviousPresentFences[1] = mPreviousPresentFences[0];
+    mPreviousPresentFences[0] = displayDevice
+            ? getHwComposer().getPresentFence(*displayDevice->getId())
+            : Fence::NO_FENCE;
+    auto presentFenceTime = std::make_shared<FenceTime>(mPreviousPresentFences[0]);
     getBE().mDisplayTimeline.push(presentFenceTime);
 
     DisplayStatInfo stats;
@@ -2075,12 +2087,18 @@
         }
     }
 
-    mTransactionCompletedThread.addPresentFence(mPreviousPresentFence);
+    mTransactionCompletedThread.addPresentFence(mPreviousPresentFences[0]);
     mTransactionCompletedThread.sendCallbacks();
 
     if (mLumaSampling && mRegionSamplingThread) {
         mRegionSamplingThread->notifyNewContent();
     }
+
+    // Even though ATRACE_INT64 already checks if tracing is enabled, it doesn't prevent the
+    // side-effect of getTotalSize(), so we check that again here
+    if (ATRACE_ENABLED()) {
+        ATRACE_INT64("Total Buffer Size", GraphicBufferAllocator::get().getTotalSize());
+    }
 }
 
 void SurfaceFlinger::computeLayerBounds() {
@@ -2932,6 +2950,13 @@
             if (l->isRemovedFromCurrentState()) {
                 latchAndReleaseBuffer(l);
             }
+
+            // If the layer has been removed and has no parent, then it will not be reachable
+            // when traversing layers on screen. Add the layer to the offscreenLayers set to
+            // ensure we can copy its current to drawing state.
+            if (!l->getParent()) {
+                mOffscreenLayers.emplace(l.get());
+            }
         }
         mLayersPendingRemoval.clear();
     }
@@ -2945,7 +2970,17 @@
         // clear the "changed" flags in current state
         mCurrentState.colorMatrixChanged = false;
 
-        mDrawingState.traverseInZOrder([](Layer* layer) { layer->commitChildList(); });
+        mDrawingState.traverseInZOrder([&](Layer* layer) {
+            layer->commitChildList();
+
+            // If the layer can be reached when traversing mDrawingState, then the layer is no
+            // longer offscreen. Remove the layer from the offscreenLayer set.
+            if (mOffscreenLayers.count(layer)) {
+                mOffscreenLayers.erase(layer);
+            }
+        });
+
+        commitOffscreenLayers();
     });
 
     mTransactionPending = false;
@@ -2973,6 +3008,18 @@
     }
 }
 
+void SurfaceFlinger::commitOffscreenLayers() {
+    for (Layer* offscreenLayer : mOffscreenLayers) {
+        offscreenLayer->traverseInZOrder(LayerVector::StateSet::Drawing, [](Layer* layer) {
+            uint32_t trFlags = layer->getTransactionFlags(eTransactionNeeded);
+            if (!trFlags) return;
+
+            layer->doTransaction(0);
+            layer->commitChildList();
+        });
+    }
+}
+
 void SurfaceFlinger::computeVisibleRegions(const sp<const DisplayDevice>& displayDevice,
                                            Region& outDirtyRegion, Region& outOpaqueRegion) {
     ATRACE_CALL();
diff --git a/services/surfaceflinger/SurfaceFlinger.h b/services/surfaceflinger/SurfaceFlinger.h
index 72e2ff9..5871774 100644
--- a/services/surfaceflinger/SurfaceFlinger.h
+++ b/services/surfaceflinger/SurfaceFlinger.h
@@ -308,7 +308,10 @@
         const sp<IGraphicBufferProducer>& bufferProducer) const;
 
     inline void onLayerCreated() { mNumLayers++; }
-    inline void onLayerDestroyed() { mNumLayers--; }
+    inline void onLayerDestroyed(Layer* layer) {
+        mNumLayers--;
+        mOffscreenLayers.erase(layer);
+    }
 
     TransactionCompletedThread& getTransactionCompletedThread() {
         return mTransactionCompletedThread;
@@ -563,6 +566,7 @@
     uint32_t setTransactionFlags(uint32_t flags, Scheduler::TransactionStart transactionStart);
     void latchAndReleaseBuffer(const sp<Layer>& layer);
     void commitTransaction() REQUIRES(mStateLock);
+    void commitOffscreenLayers();
     bool containsAnyInvalidClientState(const Vector<ComposerState>& states);
     bool transactionIsReadyToBeApplied(int64_t desiredPresentTime,
                                        const Vector<ComposerState>& states);
@@ -824,6 +828,8 @@
         return hwcDisplayId ? getHwComposer().toPhysicalDisplayId(*hwcDisplayId) : std::nullopt;
     }
 
+    bool previousFrameMissed();
+
     /*
      * Debugging & dumpsys
      */
@@ -956,7 +962,7 @@
     std::vector<sp<Layer>> mLayersWithQueuedFrames;
     // Tracks layers that need to update a display's dirty region.
     std::vector<sp<Layer>> mLayersPendingRefresh;
-    sp<Fence> mPreviousPresentFence = Fence::NO_FENCE;
+    std::array<sp<Fence>, 2> mPreviousPresentFences = {Fence::NO_FENCE, Fence::NO_FENCE};
     // True if in the previous frame at least one layer was composed via the GPU.
     bool mHadClientComposition = false;
     // True if in the previous frame at least one layer was composed via HW Composer.
@@ -1150,6 +1156,12 @@
 
     // Flag used to set override allowed display configs from backdoor
     bool mDebugDisplayConfigSetByBackdoor = false;
+
+    // A set of layers that have no parent so they are not drawn on screen.
+    // Should only be accessed by the main thread.
+    // The Layer pointer is removed from the set when the destructor is called so there shouldn't
+    // be any issues with a raw pointer referencing an invalid object.
+    std::unordered_set<Layer*> mOffscreenLayers;
 };
 
 } // namespace android
diff --git a/services/surfaceflinger/tests/unittests/FakePhaseOffsets.h b/services/surfaceflinger/tests/unittests/FakePhaseOffsets.h
index cfa8337..96121bb 100644
--- a/services/surfaceflinger/tests/unittests/FakePhaseOffsets.h
+++ b/services/surfaceflinger/tests/unittests/FakePhaseOffsets.h
@@ -49,6 +49,8 @@
     // refresh rates, to properly update the offsets.
     void setRefreshRateType(RefreshRateConfigs::RefreshRateType /*refreshRateType*/) override {}
 
+    nsecs_t getOffsetThresholdForNextVsync() const override { return FAKE_PHASE_OFFSET_NS; }
+
     // Returns current offsets in human friendly format.
     void dump(std::string& /*result*/) const override {}
 };