sf: optimize luma sampling code
Removing some math from the luma-sampling
code reduces its running time by about half
(~.5ms -> ~.25ms at 1.1MHz on a little core
in some experiments).
Bug: 127973145
Bug: 134922154
Test: trace and compare sampleArea times when
scrolling in news app
Change-Id: Ie53d9595bea6685cf45f53972b42daa5e32fcc8e
diff --git a/services/surfaceflinger/RegionSamplingThread.cpp b/services/surfaceflinger/RegionSamplingThread.cpp
index 7fa33f5..fdc68aa 100644
--- a/services/surfaceflinger/RegionSamplingThread.cpp
+++ b/services/surfaceflinger/RegionSamplingThread.cpp
@@ -258,7 +258,7 @@
namespace {
// Using Rec. 709 primaries
-float getLuma(float r, float g, float b) {
+inline float getLuma(float r, float g, float b) {
constexpr auto rec709_red_primary = 0.2126f;
constexpr auto rec709_green_primary = 0.7152f;
constexpr auto rec709_blue_primary = 0.0722f;
@@ -289,10 +289,10 @@
const uint32_t* rowBase = data + row * stride;
for (int32_t column = area.left; column < area.right; ++column) {
uint32_t pixel = rowBase[column];
- const float r = (pixel & 0xFF) / 255.0f;
- const float g = ((pixel >> 8) & 0xFF) / 255.0f;
- const float b = ((pixel >> 16) & 0xFF) / 255.0f;
- const uint8_t luma = std::round(getLuma(r, g, b) * 255.0f);
+ const float r = pixel & 0xFF;
+ const float g = (pixel >> 8) & 0xFF;
+ const float b = (pixel >> 16) & 0xFF;
+ const uint8_t luma = std::round(getLuma(r, g, b));
++brightnessBuckets[luma];
if (brightnessBuckets[luma] > majoritySampleNum) return luma / 255.0f;
}