Merge "Performance optimizations" into rvc-dev
diff --git a/libs/renderengine/gl/filters/BlurFilter.cpp b/libs/renderengine/gl/filters/BlurFilter.cpp
index 59469d4..724877b 100644
--- a/libs/renderengine/gl/filters/BlurFilter.cpp
+++ b/libs/renderengine/gl/filters/BlurFilter.cpp
@@ -54,13 +54,13 @@
     static constexpr auto translation = 1.0f;
     const GLfloat vboData[] = {
         // Vertex data
-        translation-size, -translation-size,
-        translation-size, -translation+size,
-        translation+size, -translation+size,
+        translation - size, -translation - size,
+        translation - size, -translation + size,
+        translation + size, -translation + size,
         // UV data
-        0.0f, 0.0f-translation,
-        0.0f, size-translation,
-        size, size-translation
+        0.0f, 0.0f - translation,
+        0.0f, size - translation,
+        size, size - translation
     };
     mMeshBuffer.allocateBuffers(vboData, 12 /* size */);
 }
@@ -122,27 +122,35 @@
 
 status_t BlurFilter::prepare() {
     ATRACE_NAME("BlurFilter::prepare");
-    blit(mCompositionFbo, mPingFbo);
 
     // Kawase is an approximation of Gaussian, but it behaves differently from it.
     // A radius transformation is required for approximating them, and also to introduce
     // non-integer steps, necessary to smoothly interpolate large radii.
-    auto radius = mRadius / 6.0f;
+    const auto radius = mRadius / 6.0f;
 
     // Calculate how many passes we'll do, based on the radius.
     // Too many passes will make the operation expensive.
-    auto passes = min(kMaxPasses, (uint32_t)ceil(radius));
+    const auto passes = min(kMaxPasses, (uint32_t)ceil(radius));
 
-    // We'll ping pong between our textures, to accumulate the result of various offsets.
+    const float radiusByPasses = radius / (float)passes;
+    const float stepX = radiusByPasses / (float)mCompositionFbo.getBufferWidth();
+    const float stepY = radiusByPasses / (float)mCompositionFbo.getBufferHeight();
+
+    // Let's start by downsampling and blurring the composited frame simultaneously.
     mBlurProgram.useProgram();
-    GLFramebuffer* read = &mPingFbo;
-    GLFramebuffer* draw = &mPongFbo;
-    float stepX = radius / (float)mCompositionFbo.getBufferWidth() / (float)passes;
-    float stepY = radius / (float)mCompositionFbo.getBufferHeight() / (float)passes;
-    glViewport(0, 0, draw->getBufferWidth(), draw->getBufferHeight());
     glActiveTexture(GL_TEXTURE0);
     glUniform1i(mBTextureLoc, 0);
-    for (auto i = 0; i < passes; i++) {
+    glBindTexture(GL_TEXTURE_2D, mCompositionFbo.getTextureName());
+    glUniform2f(mBOffsetLoc, stepX, stepY);
+    glViewport(0, 0, mPingFbo.getBufferWidth(), mPingFbo.getBufferHeight());
+    mPingFbo.bind();
+    drawMesh(mBUvLoc, mBPosLoc);
+
+    // And now we'll ping pong between our textures, to accumulate the result of various offsets.
+    GLFramebuffer* read = &mPingFbo;
+    GLFramebuffer* draw = &mPongFbo;
+    glViewport(0, 0, draw->getBufferWidth(), draw->getBufferHeight());
+    for (auto i = 1; i < passes; i++) {
         ATRACE_NAME("BlurFilter::renderPass");
         draw->bind();
 
@@ -158,9 +166,6 @@
     }
     mLastDrawTarget = read;
 
-    // Cleanup
-    glBindFramebuffer(GL_FRAMEBUFFER, 0);
-
     return NO_ERROR;
 }
 
@@ -179,7 +184,6 @@
         glBlitFramebuffer(0, 0, mLastDrawTarget->getBufferWidth(),
                           mLastDrawTarget->getBufferHeight(), 0, 0, mDisplayWidth, mDisplayHeight,
                           GL_COLOR_BUFFER_BIT, GL_LINEAR);
-        glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
         return NO_ERROR;
     }
 
@@ -258,12 +262,12 @@
 }
 
 void BlurFilter::blit(GLFramebuffer& read, GLFramebuffer& draw) const {
+    ATRACE_NAME("BlurFilter::blit");
     read.bindAsReadBuffer();
     draw.bindAsDrawBuffer();
     glBlitFramebuffer(0, 0, read.getBufferWidth(), read.getBufferHeight(), 0, 0,
                       draw.getBufferWidth(), draw.getBufferHeight(), GL_COLOR_BUFFER_BIT,
                       GL_LINEAR);
-    glBindFramebuffer(GL_FRAMEBUFFER, 0);
 }
 
 } // namespace gl