Blur shader performance optimizations

- removed mipmap in favor of fbo approach
- bell curve computation outside of shade
- type casting optimizations
- pre-computation of shader variables
- increased UV precision to remove scaling artifacts
- move final copy operation to a blit

Test: visual
Test: systrace
Test: SurfaceFlinger_test
Fixes: 148292484
Change-Id: I2ad38cde837b090cadbadbb79b941dc0524948a4
diff --git a/libs/renderengine/gl/GLESRenderEngine.cpp b/libs/renderengine/gl/GLESRenderEngine.cpp
index e257704..98605ba 100644
--- a/libs/renderengine/gl/GLESRenderEngine.cpp
+++ b/libs/renderengine/gl/GLESRenderEngine.cpp
@@ -1003,7 +1003,7 @@
         setViewportAndProjection(display.physicalDisplay, display.clip);
     } else {
         setViewportAndProjection(display.physicalDisplay, display.clip);
-        auto status = mBlurFilter->setAsDrawTarget(display);
+        auto status = mBlurFilter->setAsDrawTarget(display, blurLayer->backgroundBlurRadius);
         if (status != NO_ERROR) {
             ALOGE("Failed to prepare blur filter! Aborting GPU composition for buffer (%p).",
                   buffer->handle);
@@ -1037,7 +1037,7 @@
                         .build();
     for (auto const layer : layers) {
         if (blurLayer == layer) {
-            auto status = mBlurFilter->prepare(layer->backgroundBlurRadius);
+            auto status = mBlurFilter->prepare();
             if (status != NO_ERROR) {
                 ALOGE("Failed to render blur effect! Aborting GPU composition for buffer (%p).",
                       buffer->handle);
diff --git a/libs/renderengine/gl/GLFramebuffer.cpp b/libs/renderengine/gl/GLFramebuffer.cpp
index 091eac9..153935b 100644
--- a/libs/renderengine/gl/GLFramebuffer.cpp
+++ b/libs/renderengine/gl/GLFramebuffer.cpp
@@ -122,6 +122,14 @@
     glBindFramebuffer(GL_FRAMEBUFFER, mFramebufferName);
 }
 
+void GLFramebuffer::bindAsReadBuffer() const {
+    glBindFramebuffer(GL_READ_FRAMEBUFFER, mFramebufferName);
+}
+
+void GLFramebuffer::bindAsDrawBuffer() const {
+    glBindFramebuffer(GL_DRAW_FRAMEBUFFER, mFramebufferName);
+}
+
 void GLFramebuffer::unbind() const {
     glBindFramebuffer(GL_FRAMEBUFFER, 0);
 }
diff --git a/libs/renderengine/gl/GLFramebuffer.h b/libs/renderengine/gl/GLFramebuffer.h
index 668685a..69102d6 100644
--- a/libs/renderengine/gl/GLFramebuffer.h
+++ b/libs/renderengine/gl/GLFramebuffer.h
@@ -48,6 +48,8 @@
     int32_t getBufferWidth() const { return mBufferWidth; }
     GLenum getStatus() const { return mStatus; }
     void bind() const;
+    void bindAsReadBuffer() const;
+    void bindAsDrawBuffer() const;
     void unbind() const;
 
 private:
diff --git a/libs/renderengine/gl/filters/BlurFilter.cpp b/libs/renderengine/gl/filters/BlurFilter.cpp
index a18a999..48c2560 100644
--- a/libs/renderengine/gl/filters/BlurFilter.cpp
+++ b/libs/renderengine/gl/filters/BlurFilter.cpp
@@ -31,29 +31,24 @@
 namespace gl {
 
 BlurFilter::BlurFilter(GLESRenderEngine& engine)
-      : mEngine(engine), mCompositionFbo(engine), mBlurredFbo(engine), mSimpleProgram(engine) {
-    mSimpleProgram.compile(getVertexShader(), getSimpleFragShader());
-    mSPosLoc = mSimpleProgram.getAttributeLocation("aPosition");
-    mSUvLoc = mSimpleProgram.getAttributeLocation("aUV");
-    mSTextureLoc = mSimpleProgram.getUniformLocation("uTexture");
+      : mEngine(engine), mCompositionFbo(engine), mBlurredFbo(engine), mMixProgram(engine) {
+    mMixProgram.compile(getVertexShader(), getMixFragShader());
+    mMPosLoc = mMixProgram.getAttributeLocation("aPosition");
+    mMUvLoc = mMixProgram.getAttributeLocation("aUV");
+    mMTextureLoc = mMixProgram.getUniformLocation("uTexture");
+    mMCompositionTextureLoc = mMixProgram.getUniformLocation("uCompositionTexture");
+    mMMixLoc = mMixProgram.getUniformLocation("uMix");
 }
 
-status_t BlurFilter::setAsDrawTarget(const DisplaySettings& display) {
+status_t BlurFilter::setAsDrawTarget(const DisplaySettings& display, uint32_t radius) {
     ATRACE_NAME("BlurFilter::setAsDrawTarget");
+    mRadius = radius;
 
     if (!mTexturesAllocated) {
         mDisplayWidth = display.physicalDisplay.width();
         mDisplayHeight = display.physicalDisplay.height();
         mCompositionFbo.allocateBuffers(mDisplayWidth, mDisplayHeight);
 
-        // Let's use mimap filtering on the offscreen composition texture,
-        // this will drastically improve overall shader quality.
-        glBindTexture(GL_TEXTURE_2D, mCompositionFbo.getTextureName());
-        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_NEAREST);
-        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0);
-        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 3);
-        glBindTexture(GL_TEXTURE_2D, 0);
-
         const uint32_t fboWidth = floorf(mDisplayWidth * kFboScale);
         const uint32_t fboHeight = floorf(mDisplayHeight * kFboScale);
         mBlurredFbo.allocateBuffers(fboWidth, fboHeight);
@@ -94,27 +89,41 @@
 status_t BlurFilter::render() {
     ATRACE_NAME("BlurFilter::render");
 
-    // Now let's scale our blur up
-    mSimpleProgram.useProgram();
+    // Now let's scale our blur up. It will be interpolated with the larger composited
+    // texture for the first frames, to hide downscaling artifacts.
+    GLfloat mix = fmin(1.0, mRadius / kMaxCrossFadeRadius);
+    if (mix >= 1) {
+        mBlurredFbo.bindAsReadBuffer();
+        glBlitFramebuffer(0, 0, mBlurredFbo.getBufferWidth(), mBlurredFbo.getBufferHeight(), 0, 0,
+                          mDisplayWidth, mDisplayHeight, GL_COLOR_BUFFER_BIT, GL_LINEAR);
+        glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
+        return NO_ERROR;
+    }
+
+    mMixProgram.useProgram();
+    glUniform1f(mMMixLoc, mix);
     glActiveTexture(GL_TEXTURE0);
     glBindTexture(GL_TEXTURE_2D, mBlurredFbo.getTextureName());
-    glUniform1i(mSTextureLoc, 0);
+    glUniform1i(mMTextureLoc, 0);
+    glActiveTexture(GL_TEXTURE1);
+    glBindTexture(GL_TEXTURE_2D, mCompositionFbo.getTextureName());
+    glUniform1i(mMCompositionTextureLoc, 1);
     mEngine.checkErrors("Setting final pass uniforms");
 
-    drawMesh(mSUvLoc, mSPosLoc);
+    drawMesh(mMUvLoc, mMPosLoc);
 
     glUseProgram(0);
+    glActiveTexture(GL_TEXTURE0);
     return NO_ERROR;
 }
 
 string BlurFilter::getVertexShader() const {
     return R"SHADER(
         #version 310 es
-        precision lowp float;
 
         in vec2 aPosition;
-        in mediump vec2 aUV;
-        out mediump vec2 vUV;
+        in highp vec2 aUV;
+        out highp vec2 vUV;
 
         void main() {
             vUV = aUV;
@@ -123,18 +132,22 @@
     )SHADER";
 }
 
-string BlurFilter::getSimpleFragShader() const {
+string BlurFilter::getMixFragShader() const {
     string shader = R"SHADER(
         #version 310 es
-        precision lowp float;
+        precision mediump float;
 
-        in mediump vec2 vUV;
+        in highp vec2 vUV;
         out vec4 fragColor;
 
+        uniform sampler2D uCompositionTexture;
         uniform sampler2D uTexture;
+        uniform float uMix;
 
         void main() {
-            fragColor = texture(uTexture, vUV);
+            vec4 blurred = texture(uTexture, vUV);
+            vec4 composition = texture(uCompositionTexture, vUV);
+            fragColor = mix(composition, blurred, uMix);
         }
     )SHADER";
     return shader;
diff --git a/libs/renderengine/gl/filters/BlurFilter.h b/libs/renderengine/gl/filters/BlurFilter.h
index e265b51..6889939 100644
--- a/libs/renderengine/gl/filters/BlurFilter.h
+++ b/libs/renderengine/gl/filters/BlurFilter.h
@@ -31,22 +31,25 @@
 public:
     // Downsample FBO to improve performance
     static constexpr float kFboScale = 0.25f;
+    // To avoid downscaling artifacts, we interpolate the blurred fbo with the full composited
+    // image, up to this radius.
+    static constexpr float kMaxCrossFadeRadius = 15.0f;
 
     explicit BlurFilter(GLESRenderEngine& engine);
     virtual ~BlurFilter(){};
 
     // Set up render targets, redirecting output to offscreen texture.
-    status_t setAsDrawTarget(const DisplaySettings&);
+    status_t setAsDrawTarget(const DisplaySettings&, uint32_t radius);
     // Allocate any textures needed for the filter.
     virtual void allocateTextures() = 0;
     // Execute blur passes, rendering to offscreen texture.
-    virtual status_t prepare(uint32_t radius) = 0;
+    virtual status_t prepare() = 0;
     // Render blur to the bound framebuffer (screen).
     status_t render();
 
 protected:
+    uint32_t mRadius;
     void drawMesh(GLuint uv, GLuint position);
-    string getSimpleFragShader() const;
     string getVertexShader() const;
 
     GLESRenderEngine& mEngine;
@@ -58,12 +61,15 @@
     uint32_t mDisplayHeight;
 
 private:
+    string getMixFragShader() const;
     bool mTexturesAllocated = false;
 
-    GenericProgram mSimpleProgram;
-    GLuint mSPosLoc;
-    GLuint mSUvLoc;
-    GLuint mSTextureLoc;
+    GenericProgram mMixProgram;
+    GLuint mMPosLoc;
+    GLuint mMUvLoc;
+    GLuint mMMixLoc;
+    GLuint mMTextureLoc;
+    GLuint mMCompositionTextureLoc;
 };
 
 } // namespace gl
diff --git a/libs/renderengine/gl/filters/GaussianBlurFilter.cpp b/libs/renderengine/gl/filters/GaussianBlurFilter.cpp
index f5ba02a..4d7bf44 100644
--- a/libs/renderengine/gl/filters/GaussianBlurFilter.cpp
+++ b/libs/renderengine/gl/filters/GaussianBlurFilter.cpp
@@ -26,6 +26,10 @@
 
 #include <utils/Trace.h>
 
+#define PI 3.14159265359
+#define THETA 0.352
+#define K 1.0 / (2.0 * THETA * THETA)
+
 namespace android {
 namespace renderengine {
 namespace gl {
@@ -39,22 +43,24 @@
     mVPosLoc = mVerticalProgram.getAttributeLocation("aPosition");
     mVUvLoc = mVerticalProgram.getAttributeLocation("aUV");
     mVTextureLoc = mVerticalProgram.getUniformLocation("uTexture");
-    mVSizeLoc = mVerticalProgram.getUniformLocation("uSize");
-    mVRadiusLoc = mVerticalProgram.getUniformLocation("uRadius");
+    mVIncrementLoc = mVerticalProgram.getUniformLocation("uIncrement");
+    mVNumSamplesLoc = mVerticalProgram.getUniformLocation("uSamples");
+    mVGaussianWeightLoc = mVerticalProgram.getUniformLocation("uGaussianWeights");
 
     mHorizontalProgram.compile(getVertexShader(), getFragmentShader(true));
     mHPosLoc = mHorizontalProgram.getAttributeLocation("aPosition");
     mHUvLoc = mHorizontalProgram.getAttributeLocation("aUV");
     mHTextureLoc = mHorizontalProgram.getUniformLocation("uTexture");
-    mHSizeLoc = mHorizontalProgram.getUniformLocation("uSize");
-    mHRadiusLoc = mHorizontalProgram.getUniformLocation("uRadius");
+    mHIncrementLoc = mHorizontalProgram.getUniformLocation("uIncrement");
+    mHNumSamplesLoc = mHorizontalProgram.getUniformLocation("uSamples");
+    mHGaussianWeightLoc = mHorizontalProgram.getUniformLocation("uGaussianWeights");
 }
 
 void GaussianBlurFilter::allocateTextures() {
     mVerticalPassFbo.allocateBuffers(mBlurredFbo.getBufferWidth(), mBlurredFbo.getBufferHeight());
 }
 
-status_t GaussianBlurFilter::prepare(uint32_t radius) {
+status_t GaussianBlurFilter::prepare() {
     ATRACE_NAME("GaussianBlurFilter::prepare");
 
     if (mVerticalPassFbo.getStatus() != GL_FRAMEBUFFER_COMPLETE) {
@@ -70,21 +76,38 @@
         return GL_INVALID_OPERATION;
     }
 
+    mCompositionFbo.bindAsReadBuffer();
+    mBlurredFbo.bindAsDrawBuffer();
+    glBlitFramebuffer(0, 0, mCompositionFbo.getBufferWidth(), mCompositionFbo.getBufferHeight(), 0,
+                      0, mBlurredFbo.getBufferWidth(), mBlurredFbo.getBufferHeight(),
+                      GL_COLOR_BUFFER_BIT, GL_LINEAR);
+    glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
+    glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
+
     // First, we'll apply the vertical pass, that receives the flattened background layers.
     mVerticalPassFbo.bind();
     mVerticalProgram.useProgram();
 
+    // Precompute gaussian bell curve, and send it to the shader to avoid
+    // unnecessary computations.
+    auto samples = min(mRadius, kNumSamples);
+    GLfloat gaussianWeights[kNumSamples] = {};
+    for (size_t i = 0; i < samples; i++) {
+        float normalized = float(i) / samples;
+        gaussianWeights[i] = (float)exp(-K * normalized * normalized);
+    }
+
     // set uniforms
     auto width = mVerticalPassFbo.getBufferWidth();
     auto height = mVerticalPassFbo.getBufferHeight();
-    auto radiusF = fmax(1.0f, radius * kFboScale);
+    auto radiusF = fmax(1.0f, mRadius * kFboScale);
     glViewport(0, 0, width, height);
     glActiveTexture(GL_TEXTURE0);
-    glBindTexture(GL_TEXTURE_2D, mCompositionFbo.getTextureName());
-    glGenerateMipmap(GL_TEXTURE_2D);
+    glBindTexture(GL_TEXTURE_2D, mBlurredFbo.getTextureName());
     glUniform1i(mVTextureLoc, 0);
-    glUniform2f(mVSizeLoc, width, height);
-    glUniform1f(mVRadiusLoc, radiusF);
+    glUniform2f(mVIncrementLoc, radiusF / (width * 2.0f), radiusF / (height * 2.0f));
+    glUniform1i(mVNumSamplesLoc, samples);
+    glUniform1fv(mVGaussianWeightLoc, kNumSamples, gaussianWeights);
     mEngine.checkErrors("Setting vertical-diagonal pass uniforms");
 
     drawMesh(mVUvLoc, mVPosLoc);
@@ -97,8 +120,9 @@
     glActiveTexture(GL_TEXTURE0);
     glBindTexture(GL_TEXTURE_2D, mVerticalPassFbo.getTextureName());
     glUniform1i(mHTextureLoc, 0);
-    glUniform2f(mHSizeLoc, width, height);
-    glUniform1f(mHRadiusLoc, radiusF);
+    glUniform2f(mHIncrementLoc, radiusF / (width * 2.0f), radiusF / (height * 2.0f));
+    glUniform1i(mHNumSamplesLoc, samples);
+    glUniform1fv(mHGaussianWeightLoc, kNumSamples, gaussianWeights);
     mEngine.checkErrors("Setting vertical pass uniforms");
 
     drawMesh(mHUvLoc, mHPosLoc);
@@ -115,42 +139,31 @@
 }
 
 string GaussianBlurFilter::getFragmentShader(bool horizontal) const {
-    string shader = "#version 310 es\n#define DIRECTION ";
-    shader += (horizontal ? "1" : "0");
-    shader += R"SHADER(
-        precision lowp float;
+    stringstream shader;
+    shader << "#version 310 es\n"
+           << "#define DIRECTION " << (horizontal ? "1" : "0") << "\n"
+           << "#define NUM_SAMPLES " << kNumSamples <<
+            R"SHADER(
+        precision mediump float;
 
         uniform sampler2D uTexture;
-        uniform vec2 uSize;
-        uniform float uRadius;
+        uniform vec2 uIncrement;
+        uniform float[NUM_SAMPLES] uGaussianWeights;
+        uniform int uSamples;
 
-        mediump in vec2 vUV;
+        highp in vec2 vUV;
         out vec4 fragColor;
 
-        #define PI 3.14159265359
-        #define THETA 0.352
-        #define MU 0.0
-        #define A 1.0 / (THETA * sqrt(2.0 * PI))
-        #define K 1.0 / (2.0 * THETA * THETA)
-        #define MAX_SAMPLES 10
-
-        float gaussianBellCurve(float x) {
-            float tmp = (x - MU);
-            return exp(-K * tmp * tmp);
-        }
-
-        vec3 gaussianBlur(sampler2D texture, mediump vec2 uv, float size,
-                          mediump vec2 direction, float radius) {
+        vec3 gaussianBlur(sampler2D texture, highp vec2 uv, float inc, vec2 direction) {
             float totalWeight = 0.0;
-            vec3 blurred = vec3(0.);
-            int samples = min(int(ceil(radius / 2.0)), MAX_SAMPLES);
-            float inc = radius / (size * 2.0);
+            vec3 blurred = vec3(0.0);
+            float fSamples = 1.0 / float(uSamples);
 
-            for (int i = -samples; i <= samples; i++) {
-                float normalized = float(i) / float(samples);
-                float weight = gaussianBellCurve(normalized);
+            for (int i = -uSamples; i <= uSamples; i++) {
+                float weight = uGaussianWeights[abs(i)];
+                float normalized = float(i) * fSamples;
                 float radInc = inc * normalized;
-                blurred += weight * (texture(texture, uv + radInc * direction)).rgb;;
+                blurred += weight * (texture(texture, radInc * direction + uv, 0.0)).rgb;
                 totalWeight += weight;
             }
 
@@ -159,15 +172,15 @@
 
         void main() {
             #if DIRECTION == 1
-            vec3 color = gaussianBlur(uTexture, vUV, uSize.x, vec2(1.0, 0.0), uRadius);
+            vec3 color = gaussianBlur(uTexture, vUV, uIncrement.x, vec2(1.0, 0.0));
             #else
-            vec3 color = gaussianBlur(uTexture, vUV, uSize.y, vec2(0.0, 1.0), uRadius);
+            vec3 color = gaussianBlur(uTexture, vUV, uIncrement.y, vec2(0.0, 1.0));
             #endif
             fragColor = vec4(color, 1.0);
         }
 
     )SHADER";
-    return shader;
+    return shader.str();
 }
 
 } // namespace gl
diff --git a/libs/renderengine/gl/filters/GaussianBlurFilter.h b/libs/renderengine/gl/filters/GaussianBlurFilter.h
index acf0f07..8580522 100644
--- a/libs/renderengine/gl/filters/GaussianBlurFilter.h
+++ b/libs/renderengine/gl/filters/GaussianBlurFilter.h
@@ -30,8 +30,10 @@
 
 class GaussianBlurFilter : public BlurFilter {
 public:
+    static constexpr uint32_t kNumSamples = 12;
+
     explicit GaussianBlurFilter(GLESRenderEngine& engine);
-    status_t prepare(uint32_t radius) override;
+    status_t prepare() override;
     void allocateTextures() override;
 
 private:
@@ -45,16 +47,18 @@
     GLuint mVPosLoc;
     GLuint mVUvLoc;
     GLuint mVTextureLoc;
-    GLuint mVSizeLoc;
-    GLuint mVRadiusLoc;
+    GLuint mVIncrementLoc;
+    GLuint mVNumSamplesLoc;
+    GLuint mVGaussianWeightLoc;
 
     // Horizontal pass and its uniforms
     GenericProgram mHorizontalProgram;
     GLuint mHPosLoc;
     GLuint mHUvLoc;
     GLuint mHTextureLoc;
-    GLuint mHSizeLoc;
-    GLuint mHRadiusLoc;
+    GLuint mHIncrementLoc;
+    GLuint mHNumSamplesLoc;
+    GLuint mHGaussianWeightLoc;
 };
 
 } // namespace gl
diff --git a/libs/renderengine/gl/filters/LensBlurFilter.cpp b/libs/renderengine/gl/filters/LensBlurFilter.cpp
index 799deac..fb29fbb 100644
--- a/libs/renderengine/gl/filters/LensBlurFilter.cpp
+++ b/libs/renderengine/gl/filters/LensBlurFilter.cpp
@@ -62,7 +62,7 @@
                                              mBlurredFbo.getBufferHeight());
 }
 
-status_t LensBlurFilter::prepare(uint32_t radius) {
+status_t LensBlurFilter::prepare() {
     ATRACE_NAME("LensBlurFilter::prepare");
 
     if (mVerticalDiagonalPassFbo.getStatus() != GL_FRAMEBUFFER_COMPLETE) {
@@ -86,11 +86,10 @@
     // set uniforms
     auto width = mVerticalDiagonalPassFbo.getBufferWidth();
     auto height = mVerticalDiagonalPassFbo.getBufferHeight();
-    auto radiusF = fmax(1.0f, radius * kFboScale);
+    auto radiusF = fmax(1.0f, mRadius * kFboScale);
     glViewport(0, 0, width, height);
     glActiveTexture(GL_TEXTURE0);
     glBindTexture(GL_TEXTURE_2D, mCompositionFbo.getTextureName());
-    glGenerateMipmap(GL_TEXTURE_2D);
     glUniform1i(mVDTexture0Loc, 0);
     glUniform2f(mVDSizeLoc, mDisplayWidth, mDisplayHeight);
     glUniform1f(mVDRadiusLoc, radiusF);
@@ -134,8 +133,7 @@
     string shader = "#version 310 es\n#define DIRECTION ";
     shader += (forComposition ? "1" : "0");
     shader += R"SHADER(
-        precision lowp float;
-
+        precision mediump float;
         #define PI 3.14159265359
 
         uniform sampler2D uTexture0;
@@ -143,7 +141,7 @@
         uniform float uRadius;
         uniform int uNumSamples;
 
-        mediump in vec2 vUV;
+        highp in vec2 vUV;
 
         #if DIRECTION == 0
         layout(location = 0) out vec4 fragColor0;
diff --git a/libs/renderengine/gl/filters/LensBlurFilter.h b/libs/renderengine/gl/filters/LensBlurFilter.h
index 8543f0d..1620c5a 100644
--- a/libs/renderengine/gl/filters/LensBlurFilter.h
+++ b/libs/renderengine/gl/filters/LensBlurFilter.h
@@ -31,7 +31,7 @@
 class LensBlurFilter : public BlurFilter {
 public:
     explicit LensBlurFilter(GLESRenderEngine& engine);
-    status_t prepare(uint32_t radius) override;
+    status_t prepare() override;
     void allocateTextures() override;
 
 private: