Andy Hung | f1a2383 | 2021-12-13 09:31:55 -0800 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2021 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #pragma once |
| 18 | |
| 19 | #include <iomanip> |
| 20 | #include <map> |
| 21 | #include <sstream> |
| 22 | #include "MediaMetricsConstants.h" |
| 23 | |
| 24 | namespace android::mediametrics { |
| 25 | |
| 26 | /** |
| 27 | * HeatData accumulates statistics on the status reported for a given key. |
| 28 | * |
| 29 | * HeatData is a helper class used by HeatMap to represent statistics. We expose it |
| 30 | * here for testing purposes currently. |
| 31 | * |
| 32 | * Note: This class is not thread safe, so mutual exclusion should be obtained by the caller |
| 33 | * which in this case is HeatMap. HeatMap getData() returns a local copy of HeatData, so use |
| 34 | * of that is thread-safe. |
| 35 | */ |
| 36 | class HeatData { |
| 37 | /* HeatData for a key is stored in a map based on the event (e.g. "start", "pause", create) |
| 38 | * and then another map based on the status (e.g. "ok", "argument", "state"). |
| 39 | */ |
| 40 | std::map<std::string /* event */, |
| 41 | std::map<std::string /* status name */, size_t /* count, nonzero */>> mMap; |
| 42 | |
| 43 | public: |
| 44 | /** |
| 45 | * Add status data. |
| 46 | * |
| 47 | * \param suffix (ignored) the suffix to the key that was stripped, if any. |
| 48 | * \param event the event (e.g. create, start, pause, stop, etc.). |
| 49 | * \param uid (ignored) the uid associated with the error. |
| 50 | * \param message (ignored) the status message, if any. |
| 51 | * \param subCode (ignored) the status subcode, if any. |
| 52 | */ |
| 53 | void add(const std::string& suffix, const std::string& event, const std::string& status, |
| 54 | uid_t uid, const std::string& message, int32_t subCode) { |
| 55 | // Perhaps there could be a more detailed print. |
| 56 | (void)suffix; |
| 57 | (void)uid; |
| 58 | (void)message; |
| 59 | (void)subCode; |
| 60 | ++mMap[event][status]; |
| 61 | } |
| 62 | |
| 63 | /** Returns the number of event names with status. */ |
| 64 | size_t size() const { |
| 65 | return mMap.size(); |
| 66 | } |
| 67 | |
| 68 | /** |
| 69 | * Returns a deque with pairs indicating the count of Oks and Errors. |
| 70 | * The first pair is total, the other pairs are in order of mMap. |
| 71 | * |
| 72 | * Example return value of {ok, error} pairs: |
| 73 | * total key1 key2 |
| 74 | * { { 2, 1 }, { 1, 0 }, { 1, 1 } } |
| 75 | */ |
| 76 | std::deque<std::pair<size_t /* oks */, size_t /* errors */>> heatCount() const { |
| 77 | size_t totalOk = 0; |
| 78 | size_t totalError = 0; |
| 79 | std::deque<std::pair<size_t /* oks */, size_t /* errors */>> heat; |
| 80 | for (const auto &eventPair : mMap) { |
| 81 | size_t ok = 0; |
| 82 | size_t error = 0; |
| 83 | for (const auto &[name, count] : eventPair.second) { |
Andy Hung | 73dc2f9 | 2021-12-07 21:50:04 -0800 | [diff] [blame] | 84 | if (name == AMEDIAMETRICS_PROP_STATUS_VALUE_OK) { |
Andy Hung | f1a2383 | 2021-12-13 09:31:55 -0800 | [diff] [blame] | 85 | ok += count; |
| 86 | } else { |
| 87 | error += count; |
| 88 | } |
| 89 | } |
| 90 | totalOk += ok; |
| 91 | totalError += error; |
| 92 | heat.emplace_back(ok, error); |
| 93 | } |
| 94 | heat.emplace_front(totalOk, totalError); |
| 95 | return heat; |
| 96 | } |
| 97 | |
| 98 | /** Returns the error fraction from a pair <oks, errors>, a float between 0.f to 1.f. */ |
| 99 | static float fraction(const std::pair<size_t, size_t>& count) { |
| 100 | return (float)count.second / (count.first + count.second); |
| 101 | } |
| 102 | |
| 103 | /** Returns the HeatMap information in a single line string. */ |
| 104 | std::string dump() const { |
| 105 | const auto heat = heatCount(); |
| 106 | auto it = heat.begin(); |
| 107 | std::stringstream ss; |
| 108 | ss << "{ "; |
| 109 | float errorFraction = fraction(*it++); |
| 110 | if (errorFraction > 0.f) { |
| 111 | ss << std::fixed << std::setprecision(2) << errorFraction << " "; |
| 112 | } |
| 113 | for (const auto &eventPair : mMap) { |
| 114 | ss << eventPair.first << ": { "; |
| 115 | errorFraction = fraction(*it++); |
| 116 | if (errorFraction > 0.f) { |
| 117 | ss << std::fixed << std::setprecision(2) << errorFraction << " "; |
| 118 | } |
| 119 | for (const auto &[name, count]: eventPair.second) { |
| 120 | ss << "[ " << name << " : " << count << " ] "; |
| 121 | } |
| 122 | ss << "} "; |
| 123 | } |
| 124 | ss << " }"; |
| 125 | return ss.str(); |
| 126 | } |
| 127 | }; |
| 128 | |
| 129 | /** |
| 130 | * HeatMap is a thread-safe collection that counts activity of status errors per key. |
| 131 | * |
| 132 | * The classic heat map is a 2D picture with intensity shown by color. |
| 133 | * Here we accumulate the status results from keys to see if there are consistent |
| 134 | * failures in the system. |
| 135 | * |
| 136 | * TODO(b/210855555): Heatmap improvements. |
| 137 | * 1) heat decays in intensity in time for past events, currently we don't decay. |
| 138 | */ |
| 139 | |
| 140 | class HeatMap { |
| 141 | const size_t mMaxSize; |
| 142 | mutable std::mutex mLock; |
| 143 | size_t mRejected GUARDED_BY(mLock) = 0; |
| 144 | std::map<std::string, HeatData> mMap GUARDED_BY(mLock); |
| 145 | |
| 146 | public: |
| 147 | /** |
| 148 | * Constructs a HeatMap. |
| 149 | * |
| 150 | * \param maxSize the maximum number of elements that are tracked. |
| 151 | */ |
| 152 | explicit HeatMap(size_t maxSize) : mMaxSize(maxSize) { |
| 153 | } |
| 154 | |
| 155 | /** Returns the number of keys. */ |
| 156 | size_t size() const { |
| 157 | std::lock_guard l(mLock); |
| 158 | return mMap.size(); |
| 159 | } |
| 160 | |
| 161 | /** Clears error history. */ |
| 162 | void clear() { |
| 163 | std::lock_guard l(mLock); |
| 164 | return mMap.clear(); |
| 165 | } |
| 166 | |
| 167 | /** Returns number of keys rejected due to space. */ |
| 168 | size_t rejected() const { |
| 169 | std::lock_guard l(mLock); |
| 170 | return mRejected; |
| 171 | } |
| 172 | |
| 173 | /** Returns a copy of the heat data associated with key. */ |
| 174 | HeatData getData(const std::string& key) const { |
| 175 | std::lock_guard l(mLock); |
| 176 | return mMap.count(key) == 0 ? HeatData{} : mMap.at(key); |
| 177 | } |
| 178 | |
| 179 | /** |
| 180 | * Adds a new entry. |
| 181 | * \param key the key category (e.g. audio.track). |
| 182 | * \param suffix (ignored) the suffix to the key that was stripped, if any. |
| 183 | * \param event the event (e.g. create, start, pause, stop, etc.). |
| 184 | * \param uid (ignored) the uid associated with the error. |
| 185 | * \param message (ignored) the status message, if any. |
| 186 | * \param subCode (ignored) the status subcode, if any. |
| 187 | */ |
| 188 | void add(const std::string& key, const std::string& suffix, const std::string& event, |
| 189 | const std::string& status, uid_t uid, const std::string& message, int32_t subCode) { |
| 190 | std::lock_guard l(mLock); |
| 191 | |
| 192 | // Hard limit on heat map entries. |
| 193 | // TODO: have better GC. |
| 194 | if (mMap.size() == mMaxSize && mMap.count(key) == 0) { |
| 195 | ++mRejected; |
| 196 | return; |
| 197 | } |
| 198 | mMap[key].add(suffix, event, status, uid, message, subCode); |
| 199 | } |
| 200 | |
| 201 | /** |
| 202 | * Returns a pair consisting of the dump string and the number of lines in the string. |
| 203 | */ |
| 204 | std::pair<std::string, int32_t> dump(int32_t lines = INT32_MAX) const { |
| 205 | std::stringstream ss; |
| 206 | int32_t ll = lines; |
| 207 | std::lock_guard l(mLock); |
Andy Hung | f1a2383 | 2021-12-13 09:31:55 -0800 | [diff] [blame] | 208 | if (ll > 0) { |
| 209 | ss << "Error Heat Map (rejected: " << mRejected << "):\n"; |
| 210 | --ll; |
| 211 | } |
| 212 | // TODO: restriction is implemented alphabetically not on priority. |
Andy Hung | 579f41f | 2021-12-16 08:58:07 -0800 | [diff] [blame] | 213 | for (const auto& [name, data] : mMap) { |
Andy Hung | f1a2383 | 2021-12-13 09:31:55 -0800 | [diff] [blame] | 214 | if (ll <= 0) break; |
Andy Hung | 579f41f | 2021-12-16 08:58:07 -0800 | [diff] [blame] | 215 | ss << name << ": " << data.dump() << "\n"; |
Andy Hung | f1a2383 | 2021-12-13 09:31:55 -0800 | [diff] [blame] | 216 | --ll; |
| 217 | } |
| 218 | return { ss.str(), lines - ll }; |
| 219 | } |
| 220 | }; |
| 221 | |
| 222 | } // namespace android::mediametrics |