MediaMetrics: Add HeatMap to track items with status.

Audio items with status can be automatically tracked by HeatMap,
which accumulates error statistics.

Test: atest mediametrics_tests
Bug: 199763036
Change-Id: I6dd3d475400aac725e58890181ec309598525c03
diff --git a/media/libmediametrics/MediaMetricsItem.cpp b/media/libmediametrics/MediaMetricsItem.cpp
index a7ec975..36ab8c3 100644
--- a/media/libmediametrics/MediaMetricsItem.cpp
+++ b/media/libmediametrics/MediaMetricsItem.cpp
@@ -57,6 +57,7 @@
     // This may be found in frameworks/av/media/libmediametrics/include/MediaMetricsConstants.h
     static std::unordered_map<std::string, int32_t> map{
         {"",                                      NO_ERROR},
+        {AMEDIAMETRICS_PROP_ERROR_VALUE_OK,       NO_ERROR},
         {AMEDIAMETRICS_PROP_ERROR_VALUE_ARGUMENT, BAD_VALUE},
         {AMEDIAMETRICS_PROP_ERROR_VALUE_IO,       DEAD_OBJECT},
         {AMEDIAMETRICS_PROP_ERROR_VALUE_MEMORY,   NO_MEMORY},
diff --git a/media/libmediametrics/include/MediaMetricsConstants.h b/media/libmediametrics/include/MediaMetricsConstants.h
index aeaa49c..7d120b5 100644
--- a/media/libmediametrics/include/MediaMetricsConstants.h
+++ b/media/libmediametrics/include/MediaMetricsConstants.h
@@ -238,8 +238,9 @@
 // https://cs.android.com/android/platform/superproject/+/master:frameworks/base/media/java/android/media/AudioSystem.java;drc=3ac246c43294d7f7012bdcb0ccb7bae1aa695bd4;l=785
 // https://cs.android.com/android/platform/superproject/+/master:frameworks/av/media/libaaudio/include/aaudio/AAudio.h;drc=cfd3a6fa3aaaf712a890dc02452b38ef401083b8;l=120
 
-// Error category:
-// An empty error string indicates no error.
+// Status errors:
+// An empty status string or "ok" is interpreted as no error.
+#define AMEDIAMETRICS_PROP_ERROR_VALUE_OK                 "ok"
 
 // Error category: argument
 //   IllegalArgumentException
diff --git a/services/mediametrics/AudioAnalytics.h b/services/mediametrics/AudioAnalytics.h
index 2b41a95..bcc407a 100644
--- a/services/mediametrics/AudioAnalytics.h
+++ b/services/mediametrics/AudioAnalytics.h
@@ -20,6 +20,7 @@
 #include "AnalyticsActions.h"
 #include "AnalyticsState.h"
 #include "AudioPowerUsage.h"
+#include "HeatMap.h"
 #include "StatsdLog.h"
 #include "TimedAction.h"
 #include "Wrap.h"
@@ -73,11 +74,23 @@
     std::pair<std::string, int32_t> dump(
             int32_t lines = INT32_MAX, int64_t sinceNs = 0, const char *prefix = nullptr) const;
 
+    /**
+     * Returns a pair consisting of the dump string and the number of lines in the string.
+     *
+     * HeatMap dump.
+     */
+    std::pair<std::string, int32_t> dumpHeatMap(int32_t lines = INT32_MAX) const {
+        return mHeatMap.dump(lines);
+    }
+
     void clear() {
         // underlying state is locked.
         mPreviousAnalyticsState->clear();
         mAnalyticsState->clear();
 
+        // Clears the status map
+        mHeatMap.clear();
+
         // Clear power usage state.
         mAudioPowerUsage.clear();
     }
@@ -124,6 +137,9 @@
     TimedAction mTimedAction; // locked internally
     const std::shared_ptr<StatsdLog> mStatsdLog; // locked internally, ok for multiple threads.
 
+    static constexpr size_t kHeatEntries = 100;
+    HeatMap mHeatMap{kHeatEntries}; // locked internally, ok for multiple threads.
+
     // DeviceUse is a nested class which handles audio device usage accounting.
     // We define this class at the end to ensure prior variables all properly constructed.
     // TODO: Track / Thread interaction
diff --git a/services/mediametrics/HeatMap.h b/services/mediametrics/HeatMap.h
new file mode 100644
index 0000000..60a79b3
--- /dev/null
+++ b/services/mediametrics/HeatMap.h
@@ -0,0 +1,223 @@
+/*
+ * Copyright (C) 2021 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <iomanip>
+#include <map>
+#include <sstream>
+#include "MediaMetricsConstants.h"
+
+namespace android::mediametrics {
+
+/**
+ * HeatData accumulates statistics on the status reported for a given key.
+ *
+ * HeatData is a helper class used by HeatMap to represent statistics.  We expose it
+ * here for testing purposes currently.
+ *
+ * Note: This class is not thread safe, so mutual exclusion should be obtained by the caller
+ * which in this case is HeatMap.  HeatMap getData() returns a local copy of HeatData, so use
+ * of that is thread-safe.
+ */
+class HeatData {
+    /* HeatData for a key is stored in a map based on the event (e.g. "start", "pause", create)
+     * and then another map based on the status (e.g. "ok", "argument", "state").
+     */
+    std::map<std::string /* event */,
+             std::map<std::string /* status name */, size_t /* count, nonzero */>> mMap;
+
+public:
+    /**
+     * Add status data.
+     *
+     * \param suffix  (ignored) the suffix to the key that was stripped, if any.
+     * \param event             the event (e.g. create, start, pause, stop, etc.).
+     * \param uid     (ignored) the uid associated with the error.
+     * \param message (ignored) the status message, if any.
+     * \param subCode (ignored) the status subcode, if any.
+     */
+    void add(const std::string& suffix, const std::string& event, const std::string& status,
+            uid_t uid, const std::string& message, int32_t subCode) {
+        // Perhaps there could be a more detailed print.
+        (void)suffix;
+        (void)uid;
+        (void)message;
+        (void)subCode;
+        ++mMap[event][status];
+    }
+
+    /** Returns the number of event names with status. */
+    size_t size() const {
+        return mMap.size();
+    }
+
+    /**
+     * Returns a deque with pairs indicating the count of Oks and Errors.
+     * The first pair is total, the other pairs are in order of mMap.
+     *
+     * Example return value of {ok, error} pairs:
+     *     total     key1      key2
+     * { { 2, 1 }, { 1, 0 }, { 1, 1 } }
+     */
+    std::deque<std::pair<size_t /* oks */, size_t /* errors */>> heatCount() const {
+        size_t totalOk = 0;
+        size_t totalError = 0;
+        std::deque<std::pair<size_t /* oks */, size_t /* errors */>> heat;
+        for (const auto &eventPair : mMap) {
+            size_t ok = 0;
+            size_t error = 0;
+            for (const auto &[name, count] : eventPair.second) {
+                if (name == AMEDIAMETRICS_PROP_ERROR_VALUE_OK) {
+                    ok += count;
+                } else {
+                    error += count;
+                }
+            }
+            totalOk += ok;
+            totalError += error;
+            heat.emplace_back(ok, error);
+        }
+        heat.emplace_front(totalOk, totalError);
+        return heat;
+    }
+
+    /** Returns the error fraction from a pair <oks, errors>, a float between 0.f to 1.f. */
+    static float fraction(const std::pair<size_t, size_t>& count) {
+        return (float)count.second / (count.first + count.second);
+    }
+
+    /** Returns the HeatMap information in a single line string. */
+    std::string dump() const {
+        const auto heat = heatCount();
+        auto it = heat.begin();
+        std::stringstream ss;
+        ss << "{ ";
+        float errorFraction = fraction(*it++);
+        if (errorFraction > 0.f) {
+            ss << std::fixed << std::setprecision(2) << errorFraction << " ";
+        }
+        for (const auto &eventPair : mMap) {
+            ss << eventPair.first << ": { ";
+            errorFraction = fraction(*it++);
+            if (errorFraction > 0.f) {
+                ss << std::fixed << std::setprecision(2) << errorFraction << " ";
+            }
+            for (const auto &[name, count]: eventPair.second) {
+                ss << "[ " << name << " : " << count << " ] ";
+            }
+            ss << "} ";
+        }
+        ss << " }";
+        return ss.str();
+    }
+};
+
+/**
+ * HeatMap is a thread-safe collection that counts activity of status errors per key.
+ *
+ * The classic heat map is a 2D picture with intensity shown by color.
+ * Here we accumulate the status results from keys to see if there are consistent
+ * failures in the system.
+ *
+ * TODO(b/210855555): Heatmap improvements.
+ *   1) heat decays in intensity in time for past events, currently we don't decay.
+ */
+
+class HeatMap {
+    const size_t mMaxSize;
+    mutable std::mutex mLock;
+    size_t mRejected GUARDED_BY(mLock) = 0;
+    std::map<std::string, HeatData> mMap GUARDED_BY(mLock);
+
+public:
+    /**
+     * Constructs a HeatMap.
+     *
+     * \param maxSize the maximum number of elements that are tracked.
+     */
+    explicit HeatMap(size_t maxSize) : mMaxSize(maxSize) {
+    }
+
+    /** Returns the number of keys. */
+    size_t size() const {
+        std::lock_guard l(mLock);
+        return mMap.size();
+    }
+
+    /** Clears error history. */
+    void clear() {
+        std::lock_guard l(mLock);
+        return mMap.clear();
+    }
+
+    /** Returns number of keys rejected due to space. */
+    size_t rejected() const {
+        std::lock_guard l(mLock);
+        return mRejected;
+    }
+
+    /** Returns a copy of the heat data associated with key. */
+    HeatData getData(const std::string& key) const {
+        std::lock_guard l(mLock);
+        return mMap.count(key) == 0 ? HeatData{} : mMap.at(key);
+    }
+
+    /**
+     * Adds a new entry.
+     * \param key               the key category (e.g. audio.track).
+     * \param suffix  (ignored) the suffix to the key that was stripped, if any.
+     * \param event             the event (e.g. create, start, pause, stop, etc.).
+     * \param uid     (ignored) the uid associated with the error.
+     * \param message (ignored) the status message, if any.
+     * \param subCode (ignored) the status subcode, if any.
+     */
+    void add(const std::string& key, const std::string& suffix, const std::string& event,
+            const std::string& status, uid_t uid, const std::string& message, int32_t subCode) {
+        std::lock_guard l(mLock);
+
+        // Hard limit on heat map entries.
+        // TODO: have better GC.
+        if (mMap.size() == mMaxSize && mMap.count(key) == 0) {
+            ++mRejected;
+            return;
+        }
+        mMap[key].add(suffix, event, status, uid, message, subCode);
+    }
+
+    /**
+     * Returns a pair consisting of the dump string and the number of lines in the string.
+     */
+    std::pair<std::string, int32_t> dump(int32_t lines = INT32_MAX) const {
+        std::stringstream ss;
+        int32_t ll = lines;
+        std::lock_guard l(mLock);
+        auto it = mMap.begin();
+        if (ll > 0) {
+            ss << "Error Heat Map (rejected: " << mRejected << "):\n";
+            --ll;
+        }
+        // TODO: restriction is implemented alphabetically not on priority.
+        for (int32_t count = 0; it != mMap.end() && count < lines; ++count, ++it) {
+            if (ll <= 0) break;
+            ss << it->first << ": " << it->second.dump() << "\n";
+            --ll;
+        }
+        return { ss.str(), lines - ll };
+    }
+};
+
+} // namespace android::mediametrics
diff --git a/services/mediametrics/MediaMetricsService.cpp b/services/mediametrics/MediaMetricsService.cpp
index 35e0ae4..636b343 100644
--- a/services/mediametrics/MediaMetricsService.cpp
+++ b/services/mediametrics/MediaMetricsService.cpp
@@ -319,11 +319,19 @@
                 result << "-- some lines may be truncated --\n";
             }
 
-            result << "LogSessionId:\n"
+            const int32_t heatLinesToDump = all ? INT32_MAX : 20;
+            const auto [ heatDumpString, heatLines] =
+                    mAudioAnalytics.dumpHeatMap(heatLinesToDump);
+            result << "\n" << heatDumpString;
+            if (heatLines == heatLinesToDump) {
+                result << "-- some lines may be truncated --\n";
+            }
+
+            result << "\nLogSessionId:\n"
                    << mediametrics::ValidateId::get()->dump();
 
             // Dump the statsd atoms we sent out.
-            result << "Statsd atoms:\n"
+            result << "\nStatsd atoms:\n"
                    << mStatsdLog->dumpToString("  " /* prefix */,
                            all ? STATSD_LOG_LINES_MAX : STATSD_LOG_LINES_DUMP);
         }
diff --git a/services/mediametrics/tests/mediametrics_tests.cpp b/services/mediametrics/tests/mediametrics_tests.cpp
index cd6af9f..d334676 100644
--- a/services/mediametrics/tests/mediametrics_tests.cpp
+++ b/services/mediametrics/tests/mediametrics_tests.cpp
@@ -1251,3 +1251,46 @@
     // Status errors specially considered.
     ASSERT_EQ(DEAD_OBJECT, roundTrip(FAILED_TRANSACTION));
 }
+
+TEST(mediametrics_tests, HeatMap) {
+    constexpr size_t SIZE = 2;
+    android::mediametrics::HeatMap heatMap{SIZE};
+    constexpr uid_t UID = 0;
+    constexpr int32_t SUBCODE = 1;
+
+    ASSERT_EQ((size_t)0, heatMap.size());
+    heatMap.add("someKey", "someSuffix", "someEvent",
+            AMEDIAMETRICS_PROP_ERROR_VALUE_OK, UID, "message", SUBCODE);
+    ASSERT_EQ((size_t)1, heatMap.size());
+    heatMap.add("someKey", "someSuffix", "someEvent",
+            AMEDIAMETRICS_PROP_ERROR_VALUE_OK, UID, "message", SUBCODE);
+    heatMap.add("someKey", "someSuffix", "anotherEvent",
+            AMEDIAMETRICS_PROP_ERROR_VALUE_ARGUMENT, UID, "message", SUBCODE);
+    ASSERT_EQ((size_t)1, heatMap.size());
+    heatMap.add("anotherKey", "someSuffix", "someEvent",
+            AMEDIAMETRICS_PROP_ERROR_VALUE_OK, UID, "message", SUBCODE);
+    ASSERT_EQ((size_t)2, heatMap.size());
+    ASSERT_EQ((size_t)0, heatMap.rejected());
+
+    heatMap.add("thirdKey", "someSuffix", "someEvent",
+            AMEDIAMETRICS_PROP_ERROR_VALUE_OK, UID, "message", SUBCODE);
+    ASSERT_EQ((size_t)2, heatMap.size());
+    ASSERT_EQ((size_t)1, heatMap.rejected());
+
+    android::mediametrics::HeatData heatData = heatMap.getData("someKey");
+    ASSERT_EQ((size_t)2, heatData.size());
+    auto count = heatData.heatCount();
+    ASSERT_EQ((size_t)3, count.size()); // pairs in order { total, "anotherEvent", "someEvent" }
+    // check total value
+    ASSERT_EQ((size_t)2, count[0].first);  // OK
+    ASSERT_EQ((size_t)1, count[0].second); // ERROR;
+    // first key "anotherEvent"
+    ASSERT_EQ((size_t)0, count[1].first);  // OK
+    ASSERT_EQ((size_t)1, count[1].second); // ERROR;
+    // second key "someEvent"
+    ASSERT_EQ((size_t)2, count[2].first);  // OK
+    ASSERT_EQ((size_t)0, count[2].second); // ERROR;
+
+    heatMap.clear();
+    ASSERT_EQ((size_t)0, heatMap.size());
+}