Add Kawase blur with dual filtering

Blurs are smoother between R=50 to R=200 with similar performance to
the previous version. We use between 1 and 5 passes at progressively
smaller resolutions depending on the blur radius that was requested.

This is currently not enabled in code, but will be enabled by a
follow-up CL.

Test: atest BlurTests
Bug: 185365391
Flag: EXEMPT already behind a non-aconfig flag
Change-Id: I2ad47d83b69b69215fe59a5ecda3b1766177a852
diff --git a/libs/renderengine/Android.bp b/libs/renderengine/Android.bp
index ecf98c6..7639fab 100644
--- a/libs/renderengine/Android.bp
+++ b/libs/renderengine/Android.bp
@@ -101,6 +101,7 @@
         "skia/debug/SkiaMemoryReporter.cpp",
         "skia/filters/BlurFilter.cpp",
         "skia/filters/GaussianBlurFilter.cpp",
+        "skia/filters/KawaseBlurDualFilter.cpp",
         "skia/filters/KawaseBlurFilter.cpp",
         "skia/filters/LinearEffect.cpp",
         "skia/filters/MouriMap.cpp",
diff --git a/libs/renderengine/benchmark/RenderEngineBench.cpp b/libs/renderengine/benchmark/RenderEngineBench.cpp
index 05a2063..326d1ce 100644
--- a/libs/renderengine/benchmark/RenderEngineBench.cpp
+++ b/libs/renderengine/benchmark/RenderEngineBench.cpp
@@ -64,14 +64,15 @@
     return std::pair<uint32_t, uint32_t>(width, height);
 }
 
-static std::unique_ptr<RenderEngine> createRenderEngine(RenderEngine::Threaded threaded,
-                                                        RenderEngine::GraphicsApi graphicsApi) {
+static std::unique_ptr<RenderEngine> createRenderEngine(
+        RenderEngine::Threaded threaded, RenderEngine::GraphicsApi graphicsApi,
+        RenderEngine::BlurAlgorithm blurAlgorithm = RenderEngine::BlurAlgorithm::KAWASE) {
     auto args = RenderEngineCreationArgs::Builder()
                         .setPixelFormat(static_cast<int>(ui::PixelFormat::RGBA_8888))
                         .setImageCacheSize(1)
                         .setEnableProtectedContext(true)
                         .setPrecacheToneMapperShaderOnly(false)
-                        .setBlurAlgorithm(renderengine::RenderEngine::BlurAlgorithm::KAWASE)
+                        .setBlurAlgorithm(blurAlgorithm)
                         .setContextPriority(RenderEngine::ContextPriority::REALTIME)
                         .setThreaded(threaded)
                         .setGraphicsApi(graphicsApi)
@@ -180,7 +181,8 @@
 void BM_blur(benchmark::State& benchState, Args&&... args) {
     auto args_tuple = std::make_tuple(std::move(args)...);
     auto re = createRenderEngine(static_cast<RenderEngine::Threaded>(std::get<0>(args_tuple)),
-                                 static_cast<RenderEngine::GraphicsApi>(std::get<1>(args_tuple)));
+                                 static_cast<RenderEngine::GraphicsApi>(std::get<1>(args_tuple)),
+                                 static_cast<RenderEngine::BlurAlgorithm>(std::get<2>(args_tuple)));
 
     // Initially use cpu access so we can decode into it with AImageDecoder.
     auto [width, height] = getDisplaySize();
@@ -224,5 +226,11 @@
     benchDrawLayers(*re, layers, benchState, "blurred");
 }
 
-BENCHMARK_CAPTURE(BM_blur, SkiaGLThreaded, RenderEngine::Threaded::YES,
-                  RenderEngine::GraphicsApi::GL);
+BENCHMARK_CAPTURE(BM_blur, gaussian, RenderEngine::Threaded::YES, RenderEngine::GraphicsApi::GL,
+                  RenderEngine::BlurAlgorithm::GAUSSIAN);
+
+BENCHMARK_CAPTURE(BM_blur, kawase, RenderEngine::Threaded::YES, RenderEngine::GraphicsApi::GL,
+                  RenderEngine::BlurAlgorithm::KAWASE);
+
+BENCHMARK_CAPTURE(BM_blur, kawase_dual_filter, RenderEngine::Threaded::YES,
+                  RenderEngine::GraphicsApi::GL, RenderEngine::BlurAlgorithm::KAWASE_DUAL_FILTER);
diff --git a/libs/renderengine/include/renderengine/RenderEngine.h b/libs/renderengine/include/renderengine/RenderEngine.h
index 7207394..9bc2c48 100644
--- a/libs/renderengine/include/renderengine/RenderEngine.h
+++ b/libs/renderengine/include/renderengine/RenderEngine.h
@@ -131,6 +131,7 @@
         NONE,
         GAUSSIAN,
         KAWASE,
+        KAWASE_DUAL_FILTER,
     };
 
     static std::unique_ptr<RenderEngine> create(const RenderEngineCreationArgs& args);
diff --git a/libs/renderengine/skia/SkiaRenderEngine.cpp b/libs/renderengine/skia/SkiaRenderEngine.cpp
index 9709cd1..d58f303 100644
--- a/libs/renderengine/skia/SkiaRenderEngine.cpp
+++ b/libs/renderengine/skia/SkiaRenderEngine.cpp
@@ -76,6 +76,7 @@
 #include "compat/SkiaGpuContext.h"
 #include "filters/BlurFilter.h"
 #include "filters/GaussianBlurFilter.h"
+#include "filters/KawaseBlurDualFilter.h"
 #include "filters/KawaseBlurFilter.h"
 #include "filters/LinearEffect.h"
 #include "filters/MouriMap.h"
@@ -285,6 +286,11 @@
             mBlurFilter = new KawaseBlurFilter();
             break;
         }
+        case BlurAlgorithm::KAWASE_DUAL_FILTER: {
+            ALOGD("Background Blurs Enabled (Kawase dual-filtering algorithm)");
+            mBlurFilter = new KawaseBlurDualFilter();
+            break;
+        }
         default: {
             mBlurFilter = nullptr;
             break;
diff --git a/libs/renderengine/skia/filters/KawaseBlurDualFilter.cpp b/libs/renderengine/skia/filters/KawaseBlurDualFilter.cpp
new file mode 100644
index 0000000..db0b133
--- /dev/null
+++ b/libs/renderengine/skia/filters/KawaseBlurDualFilter.cpp
@@ -0,0 +1,173 @@
+/*
+ * Copyright 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define ATRACE_TAG ATRACE_TAG_GRAPHICS
+
+#include "KawaseBlurDualFilter.h"
+#include <SkAlphaType.h>
+#include <SkBlendMode.h>
+#include <SkCanvas.h>
+#include <SkData.h>
+#include <SkPaint.h>
+#include <SkRRect.h>
+#include <SkRuntimeEffect.h>
+#include <SkShader.h>
+#include <SkSize.h>
+#include <SkString.h>
+#include <SkSurface.h>
+#include <SkTileMode.h>
+#include <include/gpu/GpuTypes.h>
+#include <include/gpu/ganesh/SkSurfaceGanesh.h>
+#include <log/log.h>
+#include <utils/Trace.h>
+
+namespace android {
+namespace renderengine {
+namespace skia {
+
+KawaseBlurDualFilter::KawaseBlurDualFilter() : BlurFilter() {
+    // A shader to sample each vertex of a unit regular heptagon
+    // plus the original fragment coordinate.
+    SkString blurString(R"(
+        uniform shader child;
+        uniform float in_blurOffset;
+        uniform float in_crossFade;
+
+        const float2 STEP_0 = float2( 1.0, 0.0);
+        const float2 STEP_1 = float2( 0.623489802,  0.781831482);
+        const float2 STEP_2 = float2(-0.222520934,  0.974927912);
+        const float2 STEP_3 = float2(-0.900968868,  0.433883739);
+        const float2 STEP_4 = float2( 0.900968868, -0.433883739);
+        const float2 STEP_5 = float2(-0.222520934, -0.974927912);
+        const float2 STEP_6 = float2(-0.623489802, -0.781831482);
+
+        half4 main(float2 xy) {
+            half3 c = child.eval(xy).rgb;
+
+            c += child.eval(xy + STEP_0 * in_blurOffset).rgb;
+            c += child.eval(xy + STEP_1 * in_blurOffset).rgb;
+            c += child.eval(xy + STEP_2 * in_blurOffset).rgb;
+            c += child.eval(xy + STEP_3 * in_blurOffset).rgb;
+            c += child.eval(xy + STEP_4 * in_blurOffset).rgb;
+            c += child.eval(xy + STEP_5 * in_blurOffset).rgb;
+            c += child.eval(xy + STEP_6 * in_blurOffset).rgb;
+
+            return half4(c * 0.125 * in_crossFade, in_crossFade);
+        }
+    )");
+
+    auto [blurEffect, error] = SkRuntimeEffect::MakeForShader(blurString);
+    LOG_ALWAYS_FATAL_IF(!blurEffect, "RuntimeShader error: %s", error.c_str());
+    mBlurEffect = std::move(blurEffect);
+}
+
+static sk_sp<SkSurface> makeSurface(SkiaGpuContext* context, const SkRect& origRect, int scale) {
+    SkImageInfo scaledInfo =
+            SkImageInfo::MakeN32Premul(ceil(static_cast<float>(origRect.width()) / scale),
+                                       ceil(static_cast<float>(origRect.height()) / scale));
+    return context->createRenderTarget(scaledInfo);
+}
+
+void KawaseBlurDualFilter::blurInto(const sk_sp<SkSurface>& drawSurface,
+                                    const sk_sp<SkImage>& readImage, const float radius,
+                                    const float alpha) const {
+    const float scale = static_cast<float>(drawSurface->width()) / readImage->width();
+    SkMatrix blurMatrix = SkMatrix::Scale(scale, scale);
+    blurInto(drawSurface,
+             readImage->makeShader(SkTileMode::kClamp, SkTileMode::kClamp,
+                                   SkSamplingOptions(SkFilterMode::kLinear, SkMipmapMode::kNone),
+                                   blurMatrix),
+             readImage->width() / static_cast<float>(drawSurface->width()), radius, alpha);
+}
+
+void KawaseBlurDualFilter::blurInto(const sk_sp<SkSurface>& drawSurface, sk_sp<SkShader> input,
+                                    const float inverseScale, const float radius,
+                                    const float alpha) const {
+    SkRuntimeShaderBuilder blurBuilder(mBlurEffect);
+    blurBuilder.child("child") = std::move(input);
+    blurBuilder.uniform("in_inverseScale") = inverseScale;
+    blurBuilder.uniform("in_blurOffset") = radius;
+    blurBuilder.uniform("in_crossFade") = alpha;
+    SkPaint paint;
+    paint.setShader(blurBuilder.makeShader(nullptr));
+    paint.setBlendMode(alpha == 1.0f ? SkBlendMode::kSrc : SkBlendMode::kSrcOver);
+    drawSurface->getCanvas()->drawPaint(paint);
+}
+
+sk_sp<SkImage> KawaseBlurDualFilter::generate(SkiaGpuContext* context, const uint32_t blurRadius,
+                                              const sk_sp<SkImage> input,
+                                              const SkRect& blurRect) const {
+    // Apply a conversion factor of (1 / sqrt(3)) to match Skia's built-in blur as used by
+    // RenderEffect. See the comment in SkBlurMask.cpp for reasoning behind this.
+    const float radius = blurRadius * 0.57735f;
+
+    // Use a variable number of blur passes depending on the radius. The non-integer part of this
+    // calculation is used to mix the final pass into the second-last with an alpha blend.
+    constexpr int kMaxSurfaces = 4;
+    const float filterDepth =
+            std::min(kMaxSurfaces - 1.0f, 1.0f + std::max(0.0f, log2f(radius * kInputScale)));
+    const int filterPasses = std::min(kMaxSurfaces - 1, static_cast<int>(ceil(filterDepth)));
+
+    // Render into surfaces downscaled by 1x, 1x, 2x, and 4x from the initial downscale.
+    sk_sp<SkSurface> surfaces[kMaxSurfaces] =
+            {filterPasses >= 0 ? makeSurface(context, blurRect, 1 * kInverseInputScale) : nullptr,
+             filterPasses >= 1 ? makeSurface(context, blurRect, 1 * kInverseInputScale) : nullptr,
+             filterPasses >= 2 ? makeSurface(context, blurRect, 2 * kInverseInputScale) : nullptr,
+             filterPasses >= 3 ? makeSurface(context, blurRect, 4 * kInverseInputScale) : nullptr};
+
+    // These weights for scaling offsets per-pass are handpicked to look good at 1 <= radius <= 600.
+    static const float kWeights[7] = {1.0f, 2.0f, 3.5f, 1.0f, 2.0f, 2.0f, 2.0f};
+
+    // Kawase is an approximation of Gaussian, but behaves differently because it is made up of many
+    // simpler blurs. A transformation is required to approximate the same effect as Gaussian.
+    float sumSquaredR = powf(kWeights[0] * powf(2.0f, 1), 2.0f);
+    for (int i = 0; i < filterPasses; i++) {
+        const float alpha = std::min(1.0f, filterDepth - i);
+        sumSquaredR += powf(powf(2.0f, i + 1) * alpha * kWeights[1 + i], 2.0f);
+        sumSquaredR += powf(powf(2.0f, i + 1) * alpha * kWeights[6 - i], 2.0f);
+    }
+    // Solve for R = sqrt(sum(r_i^2)). Divide R by hypot(1,1) to find some (x,y) offsets.
+    const float step = M_SQRT1_2 *
+            sqrtf(max(0.0f, (powf(radius, 2.0f) - powf(kInverseInputScale, 2.0f)) / sumSquaredR));
+
+    // Start by downscaling and doing the first blur pass.
+    {
+        // For sampling Skia's API expects the inverse of what logically seems appropriate. In this
+        // case one may expect Translate(blurRect.fLeft, blurRect.fTop) * Scale(kInverseInputScale)
+        // but instead we must do the inverse.
+        SkMatrix blurMatrix = SkMatrix::Translate(-blurRect.fLeft, -blurRect.fTop);
+        blurMatrix.postScale(kInputScale, kInputScale);
+        const auto sourceShader =
+                input->makeShader(SkTileMode::kClamp, SkTileMode::kClamp,
+                                  SkSamplingOptions(SkFilterMode::kLinear, SkMipmapMode::kNone),
+                                  blurMatrix);
+        blurInto(surfaces[0], std::move(sourceShader), kInputScale, kWeights[0] * step, 1.0f);
+    }
+    // Next the remaining downscale blur passes.
+    for (int i = 0; i < filterPasses; i++) {
+        blurInto(surfaces[i + 1], surfaces[i]->makeImageSnapshot(), kWeights[1 + i] * step, 1.0f);
+    }
+    // Finally blur+upscale back to our original size.
+    for (int i = filterPasses - 1; i >= 0; i--) {
+        blurInto(surfaces[i], surfaces[i + 1]->makeImageSnapshot(), kWeights[6 - i] * step,
+                 std::min(1.0f, filterDepth - i));
+    }
+    return surfaces[0]->makeImageSnapshot();
+}
+
+} // namespace skia
+} // namespace renderengine
+} // namespace android
diff --git a/libs/renderengine/skia/filters/KawaseBlurDualFilter.h b/libs/renderengine/skia/filters/KawaseBlurDualFilter.h
new file mode 100644
index 0000000..6f4adbf
--- /dev/null
+++ b/libs/renderengine/skia/filters/KawaseBlurDualFilter.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <SkCanvas.h>
+#include <SkImage.h>
+#include <SkRuntimeEffect.h>
+#include <SkSurface.h>
+#include "BlurFilter.h"
+
+namespace android {
+namespace renderengine {
+namespace skia {
+
+/**
+ * This is an implementation of a Kawase blur with dual-filtering passes, as described in here:
+ * https://community.arm.com/cfs-file/__key/communityserver-blogs-components-weblogfiles/00-00-00-20-66/siggraph2015_2D00_mmg_2D00_marius_2D00_slides.pdf
+ * https://community.arm.com/cfs-file/__key/communityserver-blogs-components-weblogfiles/00-00-00-20-66/siggraph2015_2D00_mmg_2D00_marius_2D00_notes.pdf
+ */
+class KawaseBlurDualFilter : public BlurFilter {
+public:
+    explicit KawaseBlurDualFilter();
+    virtual ~KawaseBlurDualFilter() {}
+
+    // Execute blur, saving it to a texture
+    sk_sp<SkImage> generate(SkiaGpuContext* context, const uint32_t radius,
+                            const sk_sp<SkImage> blurInput, const SkRect& blurRect) const override;
+
+private:
+    sk_sp<SkRuntimeEffect> mBlurEffect;
+
+    void blurInto(const sk_sp<SkSurface>& drawSurface, const sk_sp<SkImage>& readImage,
+                  const float radius, const float alpha) const;
+
+    void blurInto(const sk_sp<SkSurface>& drawSurface, const sk_sp<SkShader> input,
+                  const float inverseScale, const float radius, const float alpha) const;
+};
+
+} // namespace skia
+} // namespace renderengine
+} // namespace android