More benchmark tweaks

Bug: 187718492
Test: this
Change-Id: Ie0cad91e1c7d1a02704fc4b1007ba2c28cb2b3e7
diff --git a/libs/hwui/pipeline/skia/SkiaMemoryTracer.cpp b/libs/hwui/pipeline/skia/SkiaMemoryTracer.cpp
index 0b995bc..1042703 100644
--- a/libs/hwui/pipeline/skia/SkiaMemoryTracer.cpp
+++ b/libs/hwui/pipeline/skia/SkiaMemoryTracer.cpp
@@ -86,12 +86,9 @@
             }
         }
 
-        // if we don't have a resource name then we don't know how to label the
-        // data and should abort.
+        // if we don't have a pretty name then use the dumpName
         if (resourceName == nullptr) {
-            mCurrentElement.clear();
-            mCurrentValues.clear();
-            return;
+            resourceName = mCurrentElement.c_str();
         }
 
         auto result = mResults.find(resourceName);
@@ -157,6 +154,14 @@
     }
 }
 
+size_t SkiaMemoryTracer::total() {
+    processElement();
+    if (!strcmp("bytes", mTotalSize.units)) {
+        return mTotalSize.value;
+    }
+    return 0;
+}
+
 void SkiaMemoryTracer::logTotals(String8& log) {
     TraceValue total = convertUnits(mTotalSize);
     TraceValue purgeable = convertUnits(mPurgeableSize);
diff --git a/libs/hwui/pipeline/skia/SkiaMemoryTracer.h b/libs/hwui/pipeline/skia/SkiaMemoryTracer.h
index b393b07..cba3b04 100644
--- a/libs/hwui/pipeline/skia/SkiaMemoryTracer.h
+++ b/libs/hwui/pipeline/skia/SkiaMemoryTracer.h
@@ -37,6 +37,7 @@
     bool hasOutput();
     void logOutput(String8& log);
     void logTotals(String8& log);
+    size_t total();
 
     void dumpNumericValue(const char* dumpName, const char* valueName, const char* units,
                           uint64_t value) override;
diff --git a/libs/hwui/renderthread/CacheManager.cpp b/libs/hwui/renderthread/CacheManager.cpp
index 5047be9..46e8060 100644
--- a/libs/hwui/renderthread/CacheManager.cpp
+++ b/libs/hwui/renderthread/CacheManager.cpp
@@ -130,27 +130,43 @@
     mGrContext->purgeResourcesNotUsedInMs(std::chrono::seconds(30));
 }
 
+void CacheManager::getMemoryUsage(size_t* cpuUsage, size_t* gpuUsage) {
+    *cpuUsage = 0;
+    *gpuUsage = 0;
+    if (!mGrContext) {
+        return;
+    }
+
+    skiapipeline::SkiaMemoryTracer cpuTracer("category", true);
+    SkGraphics::DumpMemoryStatistics(&cpuTracer);
+    *cpuUsage += cpuTracer.total();
+
+    skiapipeline::SkiaMemoryTracer gpuTracer("category", true);
+    mGrContext->dumpMemoryStatistics(&gpuTracer);
+    *gpuUsage += gpuTracer.total();
+}
+
 void CacheManager::dumpMemoryUsage(String8& log, const RenderState* renderState) {
     if (!mGrContext) {
         log.appendFormat("No valid cache instance.\n");
         return;
     }
 
-    log.appendFormat("Font Cache (CPU):\n");
-    log.appendFormat("  Size: %.2f kB \n", SkGraphics::GetFontCacheUsed() / 1024.0f);
-    log.appendFormat("  Glyph Count: %d \n", SkGraphics::GetFontCacheCountUsed());
-
     std::vector<skiapipeline::ResourcePair> cpuResourceMap = {
             {"skia/sk_resource_cache/bitmap_", "Bitmaps"},
             {"skia/sk_resource_cache/rrect-blur_", "Masks"},
             {"skia/sk_resource_cache/rects-blur_", "Masks"},
             {"skia/sk_resource_cache/tessellated", "Shadows"},
+            {"skia/sk_glyph_cache", "Glyph Cache"},
     };
     skiapipeline::SkiaMemoryTracer cpuTracer(cpuResourceMap, false);
     SkGraphics::DumpMemoryStatistics(&cpuTracer);
     if (cpuTracer.hasOutput()) {
         log.appendFormat("CPU Caches:\n");
         cpuTracer.logOutput(log);
+        log.appendFormat("  Glyph Count: %d \n", SkGraphics::GetFontCacheCountUsed());
+        log.appendFormat("Total CPU memory usage:\n");
+        cpuTracer.logTotals(log);
     }
 
     skiapipeline::SkiaMemoryTracer gpuTracer("category", true);
diff --git a/libs/hwui/renderthread/CacheManager.h b/libs/hwui/renderthread/CacheManager.h
index 0a6b8dc..713ea99 100644
--- a/libs/hwui/renderthread/CacheManager.h
+++ b/libs/hwui/renderthread/CacheManager.h
@@ -47,6 +47,7 @@
     void trimMemory(TrimMemoryMode mode);
     void trimStaleResources();
     void dumpMemoryUsage(String8& log, const RenderState* renderState = nullptr);
+    void getMemoryUsage(size_t* cpuUsage, size_t* gpuUsage);
 
     size_t getCacheSize() const { return mMaxResourceBytes; }
     size_t getBackgroundCacheSize() const { return mBackgroundResourceBytes; }
diff --git a/libs/hwui/renderthread/RenderProxy.cpp b/libs/hwui/renderthread/RenderProxy.cpp
index ad325cf..95aa29d 100644
--- a/libs/hwui/renderthread/RenderProxy.cpp
+++ b/libs/hwui/renderthread/RenderProxy.cpp
@@ -195,6 +195,17 @@
     }
 }
 
+void RenderProxy::purgeCaches() {
+    if (RenderThread::hasInstance()) {
+        RenderThread& thread = RenderThread::getInstance();
+        thread.queue().post([&thread]() {
+            if (thread.getGrContext()) {
+                thread.cacheManager().trimMemory(CacheManager::TrimMemoryMode::Complete);
+            }
+        });
+    }
+}
+
 void RenderProxy::overrideProperty(const char* name, const char* value) {
     // expensive, but block here since name/value pointers owned by caller
     RenderThread::getInstance().queue().runSync(
@@ -256,6 +267,13 @@
     }
 }
 
+void RenderProxy::getMemoryUsage(size_t* cpuUsage, size_t* gpuUsage) {
+    if (RenderThread::hasInstance()) {
+        auto& thread = RenderThread::getInstance();
+        thread.queue().runSync([&]() { thread.getMemoryUsage(cpuUsage, gpuUsage); });
+    }
+}
+
 void RenderProxy::setProcessStatsBuffer(int fd) {
     auto& rt = RenderThread::getInstance();
     rt.queue().post([&rt, fd = dup(fd)]() {
diff --git a/libs/hwui/renderthread/RenderProxy.h b/libs/hwui/renderthread/RenderProxy.h
index 662b445..0681dc5 100644
--- a/libs/hwui/renderthread/RenderProxy.h
+++ b/libs/hwui/renderthread/RenderProxy.h
@@ -98,6 +98,7 @@
 
     void destroyHardwareResources();
     static void trimMemory(int level);
+    static void purgeCaches();
     static void overrideProperty(const char* name, const char* value);
 
     void fence();
@@ -110,6 +111,7 @@
     void resetProfileInfo();
     uint32_t frameTimePercentile(int p);
     static void dumpGraphicsMemory(int fd, bool includeProfileData = true);
+    static void getMemoryUsage(size_t* cpuUsage, size_t* gpuUsage);
 
     static void rotateProcessStatsBuffer();
     static void setProcessStatsBuffer(int fd);
diff --git a/libs/hwui/renderthread/RenderThread.cpp b/libs/hwui/renderthread/RenderThread.cpp
index 308352d..0268bfd7 100644
--- a/libs/hwui/renderthread/RenderThread.cpp
+++ b/libs/hwui/renderthread/RenderThread.cpp
@@ -323,6 +323,10 @@
     dprintf(fd, "\nPipeline=%s\n%s\n", pipelineToString(), cachesOutput.string());
 }
 
+void RenderThread::getMemoryUsage(size_t* cpuUsage, size_t* gpuUsage) {
+    mCacheManager->getMemoryUsage(cpuUsage, gpuUsage);
+}
+
 Readback& RenderThread::readback() {
     if (!mReadback) {
         mReadback = new Readback(*this);
diff --git a/libs/hwui/renderthread/RenderThread.h b/libs/hwui/renderthread/RenderThread.h
index afd5750..5021085 100644
--- a/libs/hwui/renderthread/RenderThread.h
+++ b/libs/hwui/renderthread/RenderThread.h
@@ -151,6 +151,7 @@
 
     sk_sp<Bitmap> allocateHardwareBitmap(SkBitmap& skBitmap);
     void dumpGraphicsMemory(int fd, bool includeProfileData);
+    void getMemoryUsage(size_t* cpuUsage, size_t* gpuUsage);
 
     void requireGlContext();
     void requireVkContext();
diff --git a/libs/hwui/tests/common/TestScene.h b/libs/hwui/tests/common/TestScene.h
index 781884a..6b0be53 100644
--- a/libs/hwui/tests/common/TestScene.h
+++ b/libs/hwui/tests/common/TestScene.h
@@ -40,6 +40,7 @@
         int reportFrametimeWeight = 0;
         bool renderOffscreen = true;
         bool reportGpuMemoryUsage = false;
+        bool reportGpuMemoryUsageVerbose = false;
     };
 
     template <class T>
diff --git a/libs/hwui/tests/macrobench/TestSceneRunner.cpp b/libs/hwui/tests/macrobench/TestSceneRunner.cpp
index 9d3b732..b640b90 100644
--- a/libs/hwui/tests/macrobench/TestSceneRunner.cpp
+++ b/libs/hwui/tests/macrobench/TestSceneRunner.cpp
@@ -28,6 +28,20 @@
 #include <log/log.h>
 #include <ui/PixelFormat.h>
 
+// These are unstable internal APIs in google-benchmark. We should just implement our own variant
+// of these instead, but this was quicker. Disabled-by-default to avoid any breakages when
+// google-benchmark updates if they change anything
+#if 0
+#define USE_SKETCHY_INTERNAL_STATS
+namespace benchmark {
+std::vector<BenchmarkReporter::Run> ComputeStats(
+        const std::vector<BenchmarkReporter::Run> &reports);
+double StatisticsMean(const std::vector<double>& v);
+double StatisticsMedian(const std::vector<double>& v);
+double StatisticsStdDev(const std::vector<double>& v);
+}
+#endif
+
 using namespace android;
 using namespace android::uirenderer;
 using namespace android::uirenderer::renderthread;
@@ -66,6 +80,7 @@
 
 void outputBenchmarkReport(const TestScene::Info& info, const TestScene::Options& opts,
                            double durationInS, int repetationIndex, BenchmarkResults* reports) {
+    using namespace benchmark;
     benchmark::BenchmarkReporter::Run report;
     report.repetitions = opts.repeatCount;
     report.repetition_index = repetationIndex;
@@ -73,12 +88,22 @@
     report.iterations = static_cast<int64_t>(opts.frameCount);
     report.real_accumulated_time = durationInS;
     report.cpu_accumulated_time = durationInS;
-    report.counters["items_per_second"] = opts.frameCount / durationInS;
+    report.counters["FPS"] = opts.frameCount / durationInS;
+    if (opts.reportGpuMemoryUsage) {
+        size_t cpuUsage, gpuUsage;
+        RenderProxy::getMemoryUsage(&cpuUsage, &gpuUsage);
+        report.counters["Rendering RAM"] = Counter{static_cast<double>(cpuUsage + gpuUsage),
+                                                   Counter::kDefaults, Counter::kIs1024};
+    }
     reports->push_back(report);
 }
 
 static void doRun(const TestScene::Info& info, const TestScene::Options& opts, int repetitionIndex,
                   BenchmarkResults* reports) {
+    if (opts.reportGpuMemoryUsage) {
+        // If we're reporting GPU memory usage we need to first start with a clean slate
+        RenderProxy::purgeCaches();
+    }
     Properties::forceDrawFrame = true;
     TestContext testContext;
     testContext.setRenderOffscreen(opts.renderOffscreen);
@@ -162,11 +187,6 @@
 
 void run(const TestScene::Info& info, const TestScene::Options& opts,
          benchmark::BenchmarkReporter* reporter) {
-    if (opts.reportGpuMemoryUsage) {
-        // If we're reporting GPU memory usage we need to first start with a clean slate
-        // All repetitions of the same test will share a single memory usage report
-        RenderProxy::trimMemory(100);
-    }
     BenchmarkResults results;
     for (int i = 0; i < opts.repeatCount; i++) {
         doRun(info, opts, i, reporter ? &results : nullptr);
@@ -174,10 +194,21 @@
     if (reporter) {
         reporter->ReportRuns(results);
         if (results.size() > 1) {
-            // TODO: Report summary
+#ifdef USE_SKETCHY_INTERNAL_STATS
+            std::vector<benchmark::internal::Statistics> stats;
+            stats.reserve(3);
+            stats.emplace_back("mean", benchmark::StatisticsMean);
+            stats.emplace_back("median", benchmark::StatisticsMedian);
+            stats.emplace_back("stddev", benchmark::StatisticsStdDev);
+            for (auto& it : results) {
+                it.statistics = &stats;
+            }
+            auto summary = benchmark::ComputeStats(results);
+            reporter->ReportRuns(summary);
+#endif
         }
     }
-    if (opts.reportGpuMemoryUsage) {
+    if (opts.reportGpuMemoryUsageVerbose) {
         RenderProxy::dumpGraphicsMemory(STDOUT_FILENO, false);
     }
 }
diff --git a/libs/hwui/tests/macrobench/main.cpp b/libs/hwui/tests/macrobench/main.cpp
index e9e962a..f3f32eb 100644
--- a/libs/hwui/tests/macrobench/main.cpp
+++ b/libs/hwui/tests/macrobench/main.cpp
@@ -71,7 +71,7 @@
   --benchmark_format   Set output format. Possible values are tabular, json, csv
   --renderer=TYPE      Sets the render pipeline to use. May be skiagl or skiavk
   --skip-leak-check    Skips the memory leak check
-  --report-gpu-memory  Dumps the GPU memory usage after each test run
+  --report-gpu-memory[=verbose]  Dumps the GPU memory usage after each test run
 )");
 }
 
@@ -142,7 +142,7 @@
     } else if (!strcmp(format, "json")) {
         gBenchmarkReporter.reset(new benchmark::JSONReporter());
     } else {
-        fprintf(stderr, "Unknown format '%s'", format);
+        fprintf(stderr, "Unknown format '%s'\n", format);
         return false;
     }
     return true;
@@ -154,7 +154,7 @@
     } else if (!strcmp(renderer, "skiavk")) {
         Properties::overrideRenderPipelineType(RenderPipelineType::SkiaVulkan);
     } else {
-        fprintf(stderr, "Unknown format '%s'", renderer);
+        fprintf(stderr, "Unknown format '%s'\n", renderer);
         return false;
     }
     return true;
@@ -191,7 +191,7 @@
         {"offscreen", no_argument, nullptr, LongOpts::Offscreen},
         {"renderer", required_argument, nullptr, LongOpts::Renderer},
         {"skip-leak-check", no_argument, nullptr, LongOpts::SkipLeakCheck},
-        {"report-gpu-memory", no_argument, nullptr, LongOpts::ReportGpuMemory},
+        {"report-gpu-memory", optional_argument, nullptr, LongOpts::ReportGpuMemory},
         {0, 0, 0, 0}};
 
 static const char* SHORT_OPTIONS = "c:r:h";
@@ -296,6 +296,14 @@
 
             case LongOpts::ReportGpuMemory:
                 gOpts.reportGpuMemoryUsage = true;
+                if (optarg) {
+                    if (!strcmp("verbose", optarg)) {
+                        gOpts.reportGpuMemoryUsageVerbose = true;
+                    } else {
+                        fprintf(stderr, "Invalid report gpu memory option '%s'\n", optarg);
+                        error = true;
+                    }
+                }
                 break;
 
             case 'h':
@@ -313,7 +321,7 @@
     }
 
     if (error) {
-        fprintf(stderr, "Try 'hwuitest --help' for more information.\n");
+        fprintf(stderr, "Try '%s --help' for more information.\n", argv[0]);
         exit(EXIT_FAILURE);
     }