Merge "SurfaceControl: C++ Binding Lifetime refactoring"
diff --git a/libs/renderengine/gl/filters/BlurFilter.cpp b/libs/renderengine/gl/filters/BlurFilter.cpp
index b1a84bd..eb66c8f 100644
--- a/libs/renderengine/gl/filters/BlurFilter.cpp
+++ b/libs/renderengine/gl/filters/BlurFilter.cpp
@@ -132,8 +132,7 @@
 }
 
 string BlurFilter::getVertexShader() const {
-    return R"SHADER(
-        #version 310 es
+    return R"SHADER(#version 310 es
 
         in vec2 aPosition;
         in highp vec2 aUV;
@@ -147,8 +146,7 @@
 }
 
 string BlurFilter::getMixFragShader() const {
-    string shader = R"SHADER(
-        #version 310 es
+    string shader = R"SHADER(#version 310 es
         precision mediump float;
 
         in highp vec2 vUV;
diff --git a/libs/renderengine/gl/filters/GaussianBlurFilter.cpp b/libs/renderengine/gl/filters/GaussianBlurFilter.cpp
index 4d7bf44..a0d7af8 100644
--- a/libs/renderengine/gl/filters/GaussianBlurFilter.cpp
+++ b/libs/renderengine/gl/filters/GaussianBlurFilter.cpp
@@ -43,7 +43,7 @@
     mVPosLoc = mVerticalProgram.getAttributeLocation("aPosition");
     mVUvLoc = mVerticalProgram.getAttributeLocation("aUV");
     mVTextureLoc = mVerticalProgram.getUniformLocation("uTexture");
-    mVIncrementLoc = mVerticalProgram.getUniformLocation("uIncrement");
+    mVGaussianOffsetLoc = mVerticalProgram.getUniformLocation("uGaussianOffsets");
     mVNumSamplesLoc = mVerticalProgram.getUniformLocation("uSamples");
     mVGaussianWeightLoc = mVerticalProgram.getUniformLocation("uGaussianWeights");
 
@@ -51,7 +51,7 @@
     mHPosLoc = mHorizontalProgram.getAttributeLocation("aPosition");
     mHUvLoc = mHorizontalProgram.getAttributeLocation("aUV");
     mHTextureLoc = mHorizontalProgram.getUniformLocation("uTexture");
-    mHIncrementLoc = mHorizontalProgram.getUniformLocation("uIncrement");
+    mHGaussianOffsetLoc = mHorizontalProgram.getUniformLocation("uGaussianOffsets");
     mHNumSamplesLoc = mHorizontalProgram.getUniformLocation("uSamples");
     mHGaussianWeightLoc = mHorizontalProgram.getUniformLocation("uGaussianWeights");
 }
@@ -60,6 +60,36 @@
     mVerticalPassFbo.allocateBuffers(mBlurredFbo.getBufferWidth(), mBlurredFbo.getBufferHeight());
 }
 
+static void calculateLinearGaussian(uint32_t samples, double dimension,
+                                    GLfloat* gaussianLinearOffsets, GLfloat* gaussianWeights,
+                                    GLfloat* gaussianLinearWeights) {
+    // The central point in the symmetric bell curve is not offset.
+    // This decision allows one less sampling in the GPU.
+    gaussianLinearWeights[0] = gaussianWeights[0];
+    gaussianLinearOffsets[0] = 0.0;
+
+    // Calculate the linear weights.
+    // This is a vector reduction where an element of the packed reduced array
+    // contains the sum of two adjacent members of the original packed array.
+    // We start preserving the element 1 of the array and then perform sum for
+    // every other (i+=2) element of the gaussianWeights array.
+    gaussianLinearWeights[1] = gaussianWeights[1];
+    const auto start = 1 + ((samples - 1) & 0x1);
+    for (size_t i = start; i < samples; i += 2) {
+        gaussianLinearWeights[start + i / 2] = gaussianWeights[i] + gaussianWeights[i + 1];
+    }
+
+    // Calculate the texture coordinates offsets as an average of the initial offsets,
+    // weighted by the Gaussian weights as described in the original article.
+    gaussianLinearOffsets[1] = 1.0 / dimension;
+    for (size_t i = start; i < samples; i += 2) {
+        GLfloat offset_1 = float(i) / dimension;
+        GLfloat offset_2 = float(i + 1) / dimension;
+        gaussianLinearOffsets[start + i / 2] =
+                (offset_1 * gaussianWeights[i] + offset_2 * gaussianWeights[i + 1]) /
+                gaussianLinearWeights[start + i / 2];
+    }
+}
 status_t GaussianBlurFilter::prepare() {
     ATRACE_NAME("GaussianBlurFilter::prepare");
 
@@ -88,27 +118,47 @@
     mVerticalPassFbo.bind();
     mVerticalProgram.useProgram();
 
-    // Precompute gaussian bell curve, and send it to the shader to avoid
-    // unnecessary computations.
-    auto samples = min(mRadius, kNumSamples);
+    // Precompute gaussian bell curve, and send it to the shader to avoid unnecessary computations.
+    double radiusD = fmax(1.0, mRadius * kFboScale);
+    auto samples = int(fmin(radiusD, kNumSamples));
     GLfloat gaussianWeights[kNumSamples] = {};
-    for (size_t i = 0; i < samples; i++) {
-        float normalized = float(i) / samples;
+
+    gaussianWeights[0] = 1.0f;
+    auto totalWeight = gaussianWeights[0];
+
+    // Gaussian weights calculation.
+    for (size_t i = 1; i < samples; i++) {
+        const double normalized = i / radiusD;
         gaussianWeights[i] = (float)exp(-K * normalized * normalized);
+        totalWeight += 2.0 * gaussianWeights[i];
     }
 
-    // set uniforms
+    // Gaussian weights normalization to avoid work in the GPU.
+    for (size_t i = 0; i < samples; i++) {
+        gaussianWeights[i] /= totalWeight;
+    }
+
     auto width = mVerticalPassFbo.getBufferWidth();
     auto height = mVerticalPassFbo.getBufferHeight();
-    auto radiusF = fmax(1.0f, mRadius * kFboScale);
     glViewport(0, 0, width, height);
+
+    // Allocate space for the corrected Gaussian weights and offsets.
+    // We could use less space, but let's keep the code simple.
+    GLfloat gaussianLinearWeights[kNumSamples] = {};
+    GLfloat gaussianLinearOffsets[kNumSamples] = {};
+
+    // Calculate the weights and offsets for the vertical pass.
+    // This only need to be called every time mRadius or height changes, so it could be optimized.
+    calculateLinearGaussian(samples, double(height), gaussianLinearOffsets, gaussianWeights,
+                            gaussianLinearWeights);
+    // set uniforms
     glActiveTexture(GL_TEXTURE0);
     glBindTexture(GL_TEXTURE_2D, mBlurredFbo.getTextureName());
     glUniform1i(mVTextureLoc, 0);
-    glUniform2f(mVIncrementLoc, radiusF / (width * 2.0f), radiusF / (height * 2.0f));
-    glUniform1i(mVNumSamplesLoc, samples);
-    glUniform1fv(mVGaussianWeightLoc, kNumSamples, gaussianWeights);
-    mEngine.checkErrors("Setting vertical-diagonal pass uniforms");
+    glUniform1i(mVNumSamplesLoc, 1 + (samples + 1) / 2);
+    glUniform1fv(mVGaussianWeightLoc, kNumSamples, gaussianLinearWeights);
+    glUniform1fv(mVGaussianOffsetLoc, kNumSamples, gaussianLinearOffsets);
+    mEngine.checkErrors("Setting vertical pass uniforms");
 
     drawMesh(mVUvLoc, mVPosLoc);
 
@@ -116,14 +166,18 @@
     mBlurredFbo.bind();
     mHorizontalProgram.useProgram();
 
+    // Calculate the weights and offsets for the horizontal pass.
+    // This only needs to be called every time mRadius or width change, so it could be optimized.
+    calculateLinearGaussian(samples, double(width), gaussianLinearOffsets, gaussianWeights,
+                            gaussianLinearWeights);
     // set uniforms
     glActiveTexture(GL_TEXTURE0);
     glBindTexture(GL_TEXTURE_2D, mVerticalPassFbo.getTextureName());
     glUniform1i(mHTextureLoc, 0);
-    glUniform2f(mHIncrementLoc, radiusF / (width * 2.0f), radiusF / (height * 2.0f));
-    glUniform1i(mHNumSamplesLoc, samples);
-    glUniform1fv(mHGaussianWeightLoc, kNumSamples, gaussianWeights);
-    mEngine.checkErrors("Setting vertical pass uniforms");
+    glUniform1i(mHNumSamplesLoc, 1 + (samples + 1) / 2);
+    glUniform1fv(mHGaussianWeightLoc, kNumSamples, gaussianLinearWeights);
+    glUniform1fv(mHGaussianOffsetLoc, kNumSamples, gaussianLinearOffsets);
+    mEngine.checkErrors("Setting horizontal pass uniforms");
 
     drawMesh(mHUvLoc, mHPosLoc);
 
@@ -142,43 +196,37 @@
     stringstream shader;
     shader << "#version 310 es\n"
            << "#define DIRECTION " << (horizontal ? "1" : "0") << "\n"
-           << "#define NUM_SAMPLES " << kNumSamples <<
+           << "#define NUM_SAMPLES " << 1 + (kNumSamples + 1) / 2 <<
             R"SHADER(
         precision mediump float;
 
         uniform sampler2D uTexture;
-        uniform vec2 uIncrement;
         uniform float[NUM_SAMPLES] uGaussianWeights;
+        uniform float[NUM_SAMPLES] uGaussianOffsets;
         uniform int uSamples;
 
         highp in vec2 vUV;
         out vec4 fragColor;
 
-        vec3 gaussianBlur(sampler2D texture, highp vec2 uv, float inc, vec2 direction) {
-            float totalWeight = 0.0;
-            vec3 blurred = vec3(0.0);
-            float fSamples = 1.0 / float(uSamples);
-
-            for (int i = -uSamples; i <= uSamples; i++) {
-                float weight = uGaussianWeights[abs(i)];
-                float normalized = float(i) * fSamples;
-                float radInc = inc * normalized;
-                blurred += weight * (texture(texture, radInc * direction + uv, 0.0)).rgb;
-                totalWeight += weight;
-            }
-
-            return blurred / totalWeight;
-        }
-
         void main() {
             #if DIRECTION == 1
-            vec3 color = gaussianBlur(uTexture, vUV, uIncrement.x, vec2(1.0, 0.0));
+            const vec2 direction = vec2(1.0, 0.0);
             #else
-            vec3 color = gaussianBlur(uTexture, vUV, uIncrement.y, vec2(0.0, 1.0));
+            const vec2 direction = vec2(0.0, 1.0);
             #endif
-            fragColor = vec4(color, 1.0);
-        }
 
+            // Iteration zero outside loop to avoid sampling the central point twice.
+            vec4 blurred = uGaussianWeights[0] * (texture(uTexture, vUV, 0.0));
+
+            // Iterate one side of the bell to halve the loop iterations.
+            for (int i = 1; i <= uSamples; i++) {
+                vec2 offset = uGaussianOffsets[i] * direction;
+                blurred += uGaussianWeights[i] * (texture(uTexture, vUV + offset, 0.0));
+                blurred += uGaussianWeights[i] * (texture(uTexture, vUV - offset, 0.0));
+            }
+
+            fragColor = vec4(blurred.rgb, 1.0);
+        }
     )SHADER";
     return shader.str();
 }
diff --git a/libs/renderengine/gl/filters/GaussianBlurFilter.h b/libs/renderengine/gl/filters/GaussianBlurFilter.h
index 8580522..44f5fde 100644
--- a/libs/renderengine/gl/filters/GaussianBlurFilter.h
+++ b/libs/renderengine/gl/filters/GaussianBlurFilter.h
@@ -28,9 +28,12 @@
 namespace renderengine {
 namespace gl {
 
+// Class that implements a Gaussian Filter that uses Linear Sampling
+// to halve the number of samples and reduce runtime by 40% as described in:
+// http://rastergrid.com/blog/2010/09/efficient-gaussian-blur-with-linear-sampling
 class GaussianBlurFilter : public BlurFilter {
 public:
-    static constexpr uint32_t kNumSamples = 12;
+    static constexpr uint32_t kNumSamples = 22;
 
     explicit GaussianBlurFilter(GLESRenderEngine& engine);
     status_t prepare() override;
@@ -47,7 +50,7 @@
     GLuint mVPosLoc;
     GLuint mVUvLoc;
     GLuint mVTextureLoc;
-    GLuint mVIncrementLoc;
+    GLuint mVGaussianOffsetLoc;
     GLuint mVNumSamplesLoc;
     GLuint mVGaussianWeightLoc;
 
@@ -56,7 +59,7 @@
     GLuint mHPosLoc;
     GLuint mHUvLoc;
     GLuint mHTextureLoc;
-    GLuint mHIncrementLoc;
+    GLuint mHGaussianOffsetLoc;
     GLuint mHNumSamplesLoc;
     GLuint mHGaussianWeightLoc;
 };