surfaceflinger: add BT2020_PQ support to RenderEngine

Add Key::Y410_BT2020_ON which performs Y410/BT2020 YUV -> RGB
conversion.

Add Key::{INPUT,OUTPUT}_TF_ST2084 which performs ST2084 EOTF and
OETF conversions.

Add Key::TONE_MAP_ON which performs tone-mapping.

Flip proper bits on when the source buffer has BT2020_PQ as its
dataspace.  That means,

  1. convert YUV to RGB
  2. apply ST 2084 EOTF
  3. apply tone-mapping
  4. convert to DisplayP3
  5. apply relative rendering intent (i.e., clamp)
  6. finally apply sRGB OETF

We still use hardcoded parameters rather than the ones from HDR
metadata.  It is also likely that we will need to switch to a LUT
for better perf.

Test: manual
Change-Id: I53556a2acfc34ef55e88c5b139000be94072dd3e
diff --git a/services/surfaceflinger/RenderEngine/Description.cpp b/services/surfaceflinger/RenderEngine/Description.cpp
index d70083f..5e79e7a 100644
--- a/services/surfaceflinger/RenderEngine/Description.cpp
+++ b/services/surfaceflinger/RenderEngine/Description.cpp
@@ -61,6 +61,10 @@
     return mColorMatrix;
 }
 
+void Description::setY410BT2020(bool enable) {
+    mY410BT2020 = enable;
+}
+
 void Description::setInputTransferFunction(TransferFunction transferFunction) {
     mInputTransferFunction = transferFunction;
 }
@@ -69,4 +73,8 @@
     mOutputTransferFunction = transferFunction;
 }
 
+void Description::enableToneMapping(bool enable) {
+    mToneMappingEnabled = enable;
+}
+
 } /* namespace android */
diff --git a/services/surfaceflinger/RenderEngine/Description.h b/services/surfaceflinger/RenderEngine/Description.h
index b0b08fe..75c1981 100644
--- a/services/surfaceflinger/RenderEngine/Description.h
+++ b/services/surfaceflinger/RenderEngine/Description.h
@@ -45,13 +45,18 @@
     void setColorMatrix(const mat4& mtx);
     const mat4& getColorMatrix() const;
 
+    void setY410BT2020(bool enable);
+
     enum class TransferFunction : int {
         LINEAR,
         SRGB,
+        ST2084,
     };
     void setInputTransferFunction(TransferFunction transferFunction);
     void setOutputTransferFunction(TransferFunction transferFunction);
 
+    void enableToneMapping(bool enable);
+
 private:
     friend class Program;
     friend class ProgramCache;
@@ -73,9 +78,15 @@
     bool mColorMatrixEnabled = false;
     mat4 mColorMatrix;
 
+    // true if the sampled pixel values are in Y410/BT2020 rather than RGBA
+    bool mY410BT2020 = false;
+
     // transfer functions for the input/output
     TransferFunction mInputTransferFunction = TransferFunction::LINEAR;
     TransferFunction mOutputTransferFunction = TransferFunction::LINEAR;
+
+    // tone-map the color
+    bool mToneMappingEnabled = false;
 };
 
 } /* namespace android */
diff --git a/services/surfaceflinger/RenderEngine/GLES20RenderEngine.cpp b/services/surfaceflinger/RenderEngine/GLES20RenderEngine.cpp
index 745929d..323cec7 100644
--- a/services/surfaceflinger/RenderEngine/GLES20RenderEngine.cpp
+++ b/services/surfaceflinger/RenderEngine/GLES20RenderEngine.cpp
@@ -132,6 +132,10 @@
         // Display-P3 only.
         mSrgbToDisplayP3 = mat4(
                 ColorSpaceConnector(ColorSpace::sRGB(), ColorSpace::DisplayP3()).getTransform());
+
+        // Compute BT2020 to DisplayP3 color transform
+        mBt2020ToDisplayP3 = mat4(
+                ColorSpaceConnector(ColorSpace::BT2020(), ColorSpace::DisplayP3()).getTransform());
     }
 }
 
@@ -229,6 +233,10 @@
     mDataSpace = source;
 }
 
+void GLES20RenderEngine::setSourceY410BT2020(bool enable) {
+    mState.setY410BT2020(enable);
+}
+
 void GLES20RenderEngine::setWideColor(bool hasWideColor) {
     ALOGV("setWideColor: %s", hasWideColor ? "true" : "false");
     mDisplayHasWideColor = hasWideColor;
@@ -322,6 +330,12 @@
             case HAL_DATASPACE_DISPLAY_P3:
                 // input matches output
                 break;
+            case HAL_DATASPACE_BT2020_PQ:
+                wideColorState.setColorMatrix(mState.getColorMatrix() * mBt2020ToDisplayP3);
+                wideColorState.setInputTransferFunction(Description::TransferFunction::ST2084);
+                wideColorState.setOutputTransferFunction(Description::TransferFunction::SRGB);
+                wideColorState.enableToneMapping(true);
+                break;
             default:
                 wideColorState.setColorMatrix(mState.getColorMatrix() * mSrgbToDisplayP3);
                 wideColorState.setInputTransferFunction(Description::TransferFunction::SRGB);
diff --git a/services/surfaceflinger/RenderEngine/GLES20RenderEngine.h b/services/surfaceflinger/RenderEngine/GLES20RenderEngine.h
index 5ee9326..f3af547 100644
--- a/services/surfaceflinger/RenderEngine/GLES20RenderEngine.h
+++ b/services/surfaceflinger/RenderEngine/GLES20RenderEngine.h
@@ -71,6 +71,7 @@
     // Color management related functions and state
     void setColorMode(android_color_mode mode);
     void setSourceDataSpace(android_dataspace source);
+    void setSourceY410BT2020(bool enable);
     void setWideColor(bool hasWideColor);
     bool usesWideColor();
 
@@ -85,8 +86,9 @@
     bool mUseWideColor = false;
     uint64_t mWideColorFrameCount = 0;
 
-    // Currently only supporting sRGB and DisplayP3 color spaces
+    // Currently only supporting sRGB, BT2020 and DisplayP3 color spaces
     mat4 mSrgbToDisplayP3;
+    mat4 mBt2020ToDisplayP3;
     bool mPlatformHasWideColor = false;
 
     virtual void setupLayerTexturing(const Texture& texture);
diff --git a/services/surfaceflinger/RenderEngine/ProgramCache.cpp b/services/surfaceflinger/RenderEngine/ProgramCache.cpp
index 30d2369..7a43ea9 100644
--- a/services/surfaceflinger/RenderEngine/ProgramCache.cpp
+++ b/services/surfaceflinger/RenderEngine/ProgramCache.cpp
@@ -128,6 +128,9 @@
             .set(Key::COLOR_MATRIX_MASK,
                  description.mColorMatrixEnabled ? Key::COLOR_MATRIX_ON : Key::COLOR_MATRIX_OFF);
 
+    needs.set(Key::Y410_BT2020_MASK,
+              description.mY410BT2020 ? Key::Y410_BT2020_ON : Key::Y410_BT2020_OFF);
+
     if (needs.hasColorMatrix()) {
         switch (description.mInputTransferFunction) {
             case Description::TransferFunction::LINEAR:
@@ -137,6 +140,9 @@
             case Description::TransferFunction::SRGB:
                 needs.set(Key::INPUT_TF_MASK, Key::INPUT_TF_SRGB);
                 break;
+            case Description::TransferFunction::ST2084:
+                needs.set(Key::INPUT_TF_MASK, Key::INPUT_TF_ST2084);
+                break;
         }
 
         switch (description.mOutputTransferFunction) {
@@ -147,7 +153,13 @@
             case Description::TransferFunction::SRGB:
                 needs.set(Key::OUTPUT_TF_MASK, Key::OUTPUT_TF_SRGB);
                 break;
+            case Description::TransferFunction::ST2084:
+                needs.set(Key::OUTPUT_TF_MASK, Key::OUTPUT_TF_ST2084);
+                break;
         }
+
+        needs.set(Key::TONE_MAPPING_MASK,
+                  description.mToneMappingEnabled ? Key::TONE_MAPPING_ON : Key::TONE_MAPPING_OFF);
     }
 
     return needs;
@@ -191,6 +203,20 @@
         fs << "uniform vec4 color;";
     }
 
+    if (needs.isY410BT2020()) {
+        fs << R"__SHADER__(
+            vec3 convertY410BT2020(const vec3 color) {
+                const vec3 offset = vec3(0.0625, 0.5, 0.5);
+                const mat3 transform = mat3(
+                    vec3(1.1678,  1.1678, 1.1678),
+                    vec3(   0.0, -0.1878, 2.1481),
+                    vec3(1.6836, -0.6523,   0.0));
+                // Y is in G, U is in R, and V is in B
+                return clamp(transform * (color.grb - offset), 0.0, 1.0);
+            }
+            )__SHADER__";
+    }
+
     if (needs.hasColorMatrix()) {
         fs << "uniform mat4 colorMatrix;";
 
@@ -218,6 +244,21 @@
                     }
                 )__SHADER__";
                 break;
+            case Key::INPUT_TF_ST2084:
+                fs << R"__SHADER__(
+                    vec3 EOTF(const highp vec3 color) {
+                        const highp float m1 = (2610.0 / 4096.0) / 4.0;
+                        const highp float m2 = (2523.0 / 4096.0) * 128.0;
+                        const highp float c1 = (3424.0 / 4096.0);
+                        const highp float c2 = (2413.0 / 4096.0) * 32.0;
+                        const highp float c3 = (2392.0 / 4096.0) * 32.0;
+
+                        highp vec3 tmp = pow(color, 1.0 / vec3(m2));
+                        tmp = max(tmp - c1, 0.0) / (c2 - c3 * tmp);
+                        return pow(tmp, 1.0 / vec3(m1));
+                    }
+                    )__SHADER__";
+                break;
         }
 
         switch (needs.getOutputTF()) {
@@ -245,12 +286,103 @@
                     }
                 )__SHADER__";
                 break;
+            case Key::OUTPUT_TF_ST2084:
+                fs << R"__SHADER__(
+                    vec3 OETF(const vec3 linear) {
+                        const float m1 = (2610.0 / 4096.0) / 4.0;
+                        const float m2 = (2523.0 / 4096.0) * 128.0;
+                        const float c1 = (3424.0 / 4096.0);
+                        const float c2 = (2413.0 / 4096.0) * 32.0;
+                        const float c3 = (2392.0 / 4096.0) * 32.0;
+
+                        vec3 tmp = pow(linear, vec3(m1));
+                        tmp = (c1 + c2 * tmp) / (1.0 + c3 * tmp);
+                        return pow(tmp, vec3(m2));
+                    }
+                )__SHADER__";
+                break;
+        }
+
+        if (needs.hasToneMapping()) {
+            fs << R"__SHADER__(
+                float ToneMapChannel(const float color) {
+                    const float maxLumi = 10000.0;
+                    const float maxMasteringLumi = 1000.0;
+                    const float maxContentLumi = 1000.0;
+                    const float maxInLumi = min(maxMasteringLumi, maxContentLumi);
+                    const float maxOutLumi = 500.0;
+
+                    // convert to nits first
+                    float nits = color * maxLumi;
+
+                    // clamp to max input luminance
+                    nits = clamp(nits, 0.0, maxInLumi);
+
+                    // scale [0.0, maxInLumi] to [0.0, maxOutLumi]
+                    if (maxInLumi <= maxOutLumi) {
+                        nits *= maxOutLumi / maxInLumi;
+                    } else {
+                        // three control points
+                        const float x0 = 10.0;
+                        const float y0 = 17.0;
+                        const float x1 = maxOutLumi * 0.75;
+                        const float y1 = x1;
+                        const float x2 = x1 + (maxInLumi - x1) / 2.0;
+                        const float y2 = y1 + (maxOutLumi - y1) * 0.75;
+
+                        // horizontal distances between the last three control points
+                        const float h12 = x2 - x1;
+                        const float h23 = maxInLumi - x2;
+                        // tangents at the last three control points
+                        const float m1 = (y2 - y1) / h12;
+                        const float m3 = (maxOutLumi - y2) / h23;
+                        const float m2 = (m1 + m3) / 2.0;
+
+                        if (nits < x0) {
+                            // scale [0.0, x0] to [0.0, y0] linearly
+                            const float slope = y0 / x0;
+                            nits *= slope;
+                        } else if (nits < x1) {
+                            // scale [x0, x1] to [y0, y1] linearly
+                            const float slope = (y1 - y0) / (x1 - x0);
+                            nits = y0 + (nits - x0) * slope;
+                        } else if (nits < x2) {
+                            // scale [x1, x2] to [y1, y2] using Hermite interp
+                            float t = (nits - x1) / h12;
+                            nits = (y1 * (1.0 + 2.0 * t) + h12 * m1 * t) * (1.0 - t) * (1.0 - t) +
+                                   (y2 * (3.0 - 2.0 * t) + h12 * m2 * (t - 1.0)) * t * t;
+                        } else {
+                            // scale [x2, maxInLumi] to [y2, maxOutLumi] using Hermite interp
+                            float t = (nits - x2) / h23;
+                            nits = (y2 * (1.0 + 2.0 * t) + h23 * m2 * t) * (1.0 - t) * (1.0 - t) +
+                                   (maxOutLumi * (3.0 - 2.0 * t) + h23 * m3 * (t - 1.0)) * t * t;
+                        }
+                    }
+
+                    // convert back to [0.0, 1.0]
+                    return nits / maxOutLumi;
+                }
+
+                vec3 ToneMap(const vec3 color) {
+                    return vec3(ToneMapChannel(color.r), ToneMapChannel(color.g),
+                                ToneMapChannel(color.b));
+                }
+            )__SHADER__";
+        } else {
+            fs << R"__SHADER__(
+                vec3 ToneMap(const vec3 color) {
+                    return color;
+                }
+            )__SHADER__";
         }
     }
 
     fs << "void main(void) {" << indent;
     if (needs.isTexturing()) {
         fs << "gl_FragColor = texture2D(sampler, outTexCoords);";
+        if (needs.isY410BT2020()) {
+            fs << "gl_FragColor.rgb = convertY410BT2020(gl_FragColor.rgb);";
+        }
     } else {
         fs << "gl_FragColor.rgb = color.rgb;";
         fs << "gl_FragColor.a = 1.0;";
@@ -274,7 +406,10 @@
             // avoid divide by 0 by adding 0.5/256 to the alpha channel
             fs << "gl_FragColor.rgb = gl_FragColor.rgb / (gl_FragColor.a + 0.0019);";
         }
-        fs << "vec4 transformed = colorMatrix * vec4(EOTF(gl_FragColor.rgb), 1);";
+        fs << "vec4 transformed = colorMatrix * vec4(ToneMap(EOTF(gl_FragColor.rgb)), 1);";
+        // the transformation from a wider colorspace to a narrower one can
+        // result in >1.0 or <0.0 pixel values
+        fs << "transformed.rgb = clamp(transformed.rgb, 0.0, 1.0);";
         // We assume the last row is always {0,0,0,1} and we skip the division by w
         fs << "gl_FragColor.rgb = OETF(transformed.rgb);";
         if (!needs.isOpaque() && needs.isPremultiplied()) {
diff --git a/services/surfaceflinger/RenderEngine/ProgramCache.h b/services/surfaceflinger/RenderEngine/ProgramCache.h
index 491fad3..dcc8cc6 100644
--- a/services/surfaceflinger/RenderEngine/ProgramCache.h
+++ b/services/surfaceflinger/RenderEngine/ProgramCache.h
@@ -80,11 +80,23 @@
             INPUT_TF_MASK = 3 << INPUT_TF_SHIFT,
             INPUT_TF_LINEAR = 0 << INPUT_TF_SHIFT,
             INPUT_TF_SRGB = 1 << INPUT_TF_SHIFT,
+            INPUT_TF_ST2084 = 2 << INPUT_TF_SHIFT,
 
             OUTPUT_TF_SHIFT = 8,
             OUTPUT_TF_MASK = 3 << OUTPUT_TF_SHIFT,
             OUTPUT_TF_LINEAR = 0 << OUTPUT_TF_SHIFT,
             OUTPUT_TF_SRGB = 1 << OUTPUT_TF_SHIFT,
+            OUTPUT_TF_ST2084 = 2 << OUTPUT_TF_SHIFT,
+
+            TONE_MAPPING_SHIFT = 10,
+            TONE_MAPPING_MASK = 1 << TONE_MAPPING_SHIFT,
+            TONE_MAPPING_OFF = 0 << TONE_MAPPING_SHIFT,
+            TONE_MAPPING_ON = 1 << TONE_MAPPING_SHIFT,
+
+            Y410_BT2020_SHIFT = 11,
+            Y410_BT2020_MASK = 1 << Y410_BT2020_SHIFT,
+            Y410_BT2020_OFF = 0 << Y410_BT2020_SHIFT,
+            Y410_BT2020_ON = 1 << Y410_BT2020_SHIFT,
         };
 
         inline Key() : mKey(0) {}
@@ -103,6 +115,8 @@
         inline bool hasColorMatrix() const { return (mKey & COLOR_MATRIX_MASK) == COLOR_MATRIX_ON; }
         inline int getInputTF() const { return (mKey & INPUT_TF_MASK); }
         inline int getOutputTF() const { return (mKey & OUTPUT_TF_MASK); }
+        inline bool hasToneMapping() const { return (mKey & TONE_MAPPING_MASK) == TONE_MAPPING_ON; }
+        inline bool isY410BT2020() const { return (mKey & Y410_BT2020_MASK) == Y410_BT2020_ON; }
 
         // this is the definition of a friend function -- not a method of class Needs
         friend inline int strictly_order_type(const Key& lhs, const Key& rhs) {
diff --git a/services/surfaceflinger/RenderEngine/RenderEngine.h b/services/surfaceflinger/RenderEngine/RenderEngine.h
index 737b1dd..67c0d1c 100644
--- a/services/surfaceflinger/RenderEngine/RenderEngine.h
+++ b/services/surfaceflinger/RenderEngine/RenderEngine.h
@@ -138,6 +138,7 @@
                                     const half4& color) = 0;
     virtual void setColorMode(android_color_mode mode) = 0;
     virtual void setSourceDataSpace(android_dataspace source) = 0;
+    virtual void setSourceY410BT2020(bool enable) = 0;
     virtual void setWideColor(bool hasWideColor) = 0;
     virtual bool usesWideColor() = 0;
     virtual void setupLayerTexturing(const Texture& texture) = 0;