Add tracing thread in RenderEngine.

Times GPU work done by RenderEngine by creating sync fences during
flush() and finish() calls, asynchronously waits, and writes to
systrace.

As implemented here a frame is double-reported because SurfaceFlinger
calls flush an extra time, but we should get rid of that call anyways.

Bug: 120992906
Test: systrace
Change-Id: I6e1446fdf389157bfd533a96263c8e0cd1ed8514
diff --git a/libs/renderengine/gl/GLESRenderEngine.cpp b/libs/renderengine/gl/GLESRenderEngine.cpp
index d0a0ac8..ebe514c 100644
--- a/libs/renderengine/gl/GLESRenderEngine.cpp
+++ b/libs/renderengine/gl/GLESRenderEngine.cpp
@@ -30,6 +30,7 @@
 #include <GLES2/gl2ext.h>
 #include <android-base/stringprintf.h>
 #include <cutils/compiler.h>
+#include <cutils/properties.h>
 #include <renderengine/Mesh.h>
 #include <renderengine/Texture.h>
 #include <renderengine/private/Description.h>
@@ -416,6 +417,13 @@
         mBt2020ToSrgb = mXyzToSrgb * mBt2020ToXyz;
         mBt2020ToDisplayP3 = mXyzToDisplayP3 * mBt2020ToXyz;
     }
+
+    char value[PROPERTY_VALUE_MAX];
+    property_get("debug.egl.traceGpuCompletion", value, "0");
+    if (atoi(value)) {
+        mTraceGpuCompletion = true;
+        mFlushTracer = std::make_unique<FlushTracer>(this);
+    }
 }
 
 GLESRenderEngine::~GLESRenderEngine() {
@@ -461,6 +469,12 @@
         ALOGW("failed to dup EGL native fence sync: %#x", eglGetError());
     }
 
+    // Only trace if we have a valid fence, as current usage falls back to
+    // calling finish() if the fence fd is invalid.
+    if (CC_UNLIKELY(mTraceGpuCompletion && mFlushTracer) && fenceFd.get() >= 0) {
+        mFlushTracer->queueSync(eglCreateSyncKHR(mEGLDisplay, EGL_SYNC_FENCE_KHR, nullptr));
+    }
+
     return fenceFd;
 }
 
@@ -476,8 +490,15 @@
         return false;
     }
 
-    EGLint result = eglClientWaitSyncKHR(mEGLDisplay, sync, EGL_SYNC_FLUSH_COMMANDS_BIT_KHR,
-                                         2000000000 /*2 sec*/);
+    if (CC_UNLIKELY(mTraceGpuCompletion && mFlushTracer)) {
+        mFlushTracer->queueSync(eglCreateSyncKHR(mEGLDisplay, EGL_SYNC_FENCE_KHR, nullptr));
+    }
+
+    return waitSync(sync, EGL_SYNC_FLUSH_COMMANDS_BIT_KHR);
+}
+
+bool GLESRenderEngine::waitSync(EGLSyncKHR sync, EGLint flags) {
+    EGLint result = eglClientWaitSyncKHR(mEGLDisplay, sync, flags, 2000000000 /*2 sec*/);
     EGLint error = eglGetError();
     eglDestroySyncKHR(mEGLDisplay, sync);
     if (result != EGL_CONDITION_SATISFIED_KHR) {
@@ -1200,6 +1221,61 @@
     return (isInputHdrDataSpace || isOutputHdrDataSpace) && inputTransfer != outputTransfer;
 }
 
+// FlushTracer implementation
+GLESRenderEngine::FlushTracer::FlushTracer(GLESRenderEngine* engine) : mEngine(engine) {
+    mThread = std::thread(&GLESRenderEngine::FlushTracer::loop, this);
+}
+
+GLESRenderEngine::FlushTracer::~FlushTracer() {
+    {
+        std::lock_guard<std::mutex> lock(mMutex);
+        mRunning = false;
+    }
+    mCondition.notify_all();
+    if (mThread.joinable()) {
+        mThread.join();
+    }
+}
+
+void GLESRenderEngine::FlushTracer::queueSync(EGLSyncKHR sync) {
+    std::lock_guard<std::mutex> lock(mMutex);
+    char name[64];
+    const uint64_t frameNum = mFramesQueued++;
+    snprintf(name, sizeof(name), "Queueing sync for frame: %lu",
+             static_cast<unsigned long>(frameNum));
+    ATRACE_NAME(name);
+    mQueue.push({sync, frameNum});
+    ATRACE_INT("GPU Frames Outstanding", mQueue.size());
+    mCondition.notify_one();
+}
+
+void GLESRenderEngine::FlushTracer::loop() {
+    while (mRunning) {
+        QueueEntry entry;
+        {
+            std::lock_guard<std::mutex> lock(mMutex);
+
+            mCondition.wait(mMutex,
+                            [&]() REQUIRES(mMutex) { return !mQueue.empty() && !mRunning; });
+
+            if (!mRunning) {
+                // if mRunning is false, then FlushTracer is being destroyed, so
+                // bail out now.
+                break;
+            }
+            entry = mQueue.front();
+            mQueue.pop();
+        }
+        {
+            char name[64];
+            snprintf(name, sizeof(name), "waiting for frame %lu",
+                     static_cast<unsigned long>(entry.mFrameNum));
+            ATRACE_NAME(name);
+            mEngine->waitSync(entry.mSync, 0);
+        }
+    }
+}
+
 } // namespace gl
 } // namespace renderengine
 } // namespace android
diff --git a/libs/renderengine/gl/GLESRenderEngine.h b/libs/renderengine/gl/GLESRenderEngine.h
index 2af2aed..cd19931 100644
--- a/libs/renderengine/gl/GLESRenderEngine.h
+++ b/libs/renderengine/gl/GLESRenderEngine.h
@@ -17,8 +17,13 @@
 #ifndef SF_GLESRENDERENGINE_H_
 #define SF_GLESRENDERENGINE_H_
 
+#include <android-base/thread_annotations.h>
 #include <stdint.h>
 #include <sys/types.h>
+#include <condition_variable>
+#include <mutex>
+#include <queue>
+#include <thread>
 
 #include <EGL/egl.h>
 #include <EGL/eglext.h>
@@ -119,6 +124,7 @@
                                        Protection protection);
     static EGLSurface createDummyEglPbufferSurface(EGLDisplay display, EGLConfig config,
                                                    int hwcFormat, Protection protection);
+    bool waitSync(EGLSyncKHR sync, EGLint flags);
 
     // A data space is considered HDR data space if it has BT2020 color space
     // with PQ or HLG transfer function.
@@ -156,6 +162,8 @@
     mat4 mBt2020ToDisplayP3;
 
     bool mInProtectedContext = false;
+    // If set to true, then enables tracing flush() and finish() to systrace.
+    bool mTraceGpuCompletion = false;
     int32_t mFboHeight = 0;
 
     // Current dataspace of layer being rendered
@@ -167,6 +175,30 @@
     // Whether device supports color management, currently color management
     // supports sRGB, DisplayP3 color spaces.
     const bool mUseColorManagement = false;
+
+    class FlushTracer {
+    public:
+        FlushTracer(GLESRenderEngine* engine);
+        ~FlushTracer();
+        void queueSync(EGLSyncKHR sync) EXCLUDES(mMutex);
+
+        struct QueueEntry {
+            EGLSyncKHR mSync = nullptr;
+            uint64_t mFrameNum = 0;
+        };
+
+    private:
+        void loop();
+        GLESRenderEngine* const mEngine;
+        std::thread mThread;
+        std::condition_variable_any mCondition;
+        std::mutex mMutex;
+        std::queue<QueueEntry> mQueue GUARDED_BY(mMutex);
+        uint64_t mFramesQueued GUARDED_BY(mMutex) = 0;
+        bool mRunning = true;
+    };
+    friend class FlushTracer;
+    std::unique_ptr<FlushTracer> mFlushTracer;
 };
 
 } // namespace gl