blob: 230d1ad2ec4685a9693bc395eae99f8ff0574d28 [file] [log] [blame]
Andy Hung137a99f2021-12-13 09:31:55 -08001/*
2 * Copyright (C) 2021 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#pragma once
18
19#include <iomanip>
20#include <map>
21#include <sstream>
22#include "MediaMetricsConstants.h"
23
24namespace android::mediametrics {
25
26/**
27 * HeatData accumulates statistics on the status reported for a given key.
28 *
29 * HeatData is a helper class used by HeatMap to represent statistics. We expose it
30 * here for testing purposes currently.
31 *
32 * Note: This class is not thread safe, so mutual exclusion should be obtained by the caller
33 * which in this case is HeatMap. HeatMap getData() returns a local copy of HeatData, so use
34 * of that is thread-safe.
35 */
36class HeatData {
37 /* HeatData for a key is stored in a map based on the event (e.g. "start", "pause", create)
38 * and then another map based on the status (e.g. "ok", "argument", "state").
39 */
40 std::map<std::string /* event */,
41 std::map<std::string /* status name */, size_t /* count, nonzero */>> mMap;
42
43public:
44 /**
45 * Add status data.
46 *
47 * \param suffix (ignored) the suffix to the key that was stripped, if any.
48 * \param event the event (e.g. create, start, pause, stop, etc.).
49 * \param uid (ignored) the uid associated with the error.
50 * \param message (ignored) the status message, if any.
51 * \param subCode (ignored) the status subcode, if any.
52 */
53 void add(const std::string& suffix, const std::string& event, const std::string& status,
54 uid_t uid, const std::string& message, int32_t subCode) {
55 // Perhaps there could be a more detailed print.
56 (void)suffix;
57 (void)uid;
58 (void)message;
59 (void)subCode;
60 ++mMap[event][status];
61 }
62
63 /** Returns the number of event names with status. */
64 size_t size() const {
65 return mMap.size();
66 }
67
68 /**
69 * Returns a deque with pairs indicating the count of Oks and Errors.
70 * The first pair is total, the other pairs are in order of mMap.
71 *
72 * Example return value of {ok, error} pairs:
73 * total key1 key2
74 * { { 2, 1 }, { 1, 0 }, { 1, 1 } }
75 */
76 std::deque<std::pair<size_t /* oks */, size_t /* errors */>> heatCount() const {
77 size_t totalOk = 0;
78 size_t totalError = 0;
79 std::deque<std::pair<size_t /* oks */, size_t /* errors */>> heat;
80 for (const auto &eventPair : mMap) {
81 size_t ok = 0;
82 size_t error = 0;
83 for (const auto &[name, count] : eventPair.second) {
84 if (name == AMEDIAMETRICS_PROP_ERROR_VALUE_OK) {
85 ok += count;
86 } else {
87 error += count;
88 }
89 }
90 totalOk += ok;
91 totalError += error;
92 heat.emplace_back(ok, error);
93 }
94 heat.emplace_front(totalOk, totalError);
95 return heat;
96 }
97
98 /** Returns the error fraction from a pair <oks, errors>, a float between 0.f to 1.f. */
99 static float fraction(const std::pair<size_t, size_t>& count) {
100 return (float)count.second / (count.first + count.second);
101 }
102
103 /** Returns the HeatMap information in a single line string. */
104 std::string dump() const {
105 const auto heat = heatCount();
106 auto it = heat.begin();
107 std::stringstream ss;
108 ss << "{ ";
109 float errorFraction = fraction(*it++);
110 if (errorFraction > 0.f) {
111 ss << std::fixed << std::setprecision(2) << errorFraction << " ";
112 }
113 for (const auto &eventPair : mMap) {
114 ss << eventPair.first << ": { ";
115 errorFraction = fraction(*it++);
116 if (errorFraction > 0.f) {
117 ss << std::fixed << std::setprecision(2) << errorFraction << " ";
118 }
119 for (const auto &[name, count]: eventPair.second) {
120 ss << "[ " << name << " : " << count << " ] ";
121 }
122 ss << "} ";
123 }
124 ss << " }";
125 return ss.str();
126 }
127};
128
129/**
130 * HeatMap is a thread-safe collection that counts activity of status errors per key.
131 *
132 * The classic heat map is a 2D picture with intensity shown by color.
133 * Here we accumulate the status results from keys to see if there are consistent
134 * failures in the system.
135 *
136 * TODO(b/210855555): Heatmap improvements.
137 * 1) heat decays in intensity in time for past events, currently we don't decay.
138 */
139
140class HeatMap {
141 const size_t mMaxSize;
142 mutable std::mutex mLock;
143 size_t mRejected GUARDED_BY(mLock) = 0;
144 std::map<std::string, HeatData> mMap GUARDED_BY(mLock);
145
146public:
147 /**
148 * Constructs a HeatMap.
149 *
150 * \param maxSize the maximum number of elements that are tracked.
151 */
152 explicit HeatMap(size_t maxSize) : mMaxSize(maxSize) {
153 }
154
155 /** Returns the number of keys. */
156 size_t size() const {
157 std::lock_guard l(mLock);
158 return mMap.size();
159 }
160
161 /** Clears error history. */
162 void clear() {
163 std::lock_guard l(mLock);
164 return mMap.clear();
165 }
166
167 /** Returns number of keys rejected due to space. */
168 size_t rejected() const {
169 std::lock_guard l(mLock);
170 return mRejected;
171 }
172
173 /** Returns a copy of the heat data associated with key. */
174 HeatData getData(const std::string& key) const {
175 std::lock_guard l(mLock);
176 return mMap.count(key) == 0 ? HeatData{} : mMap.at(key);
177 }
178
179 /**
180 * Adds a new entry.
181 * \param key the key category (e.g. audio.track).
182 * \param suffix (ignored) the suffix to the key that was stripped, if any.
183 * \param event the event (e.g. create, start, pause, stop, etc.).
184 * \param uid (ignored) the uid associated with the error.
185 * \param message (ignored) the status message, if any.
186 * \param subCode (ignored) the status subcode, if any.
187 */
188 void add(const std::string& key, const std::string& suffix, const std::string& event,
189 const std::string& status, uid_t uid, const std::string& message, int32_t subCode) {
190 std::lock_guard l(mLock);
191
192 // Hard limit on heat map entries.
193 // TODO: have better GC.
194 if (mMap.size() == mMaxSize && mMap.count(key) == 0) {
195 ++mRejected;
196 return;
197 }
198 mMap[key].add(suffix, event, status, uid, message, subCode);
199 }
200
201 /**
202 * Returns a pair consisting of the dump string and the number of lines in the string.
203 */
204 std::pair<std::string, int32_t> dump(int32_t lines = INT32_MAX) const {
205 std::stringstream ss;
206 int32_t ll = lines;
207 std::lock_guard l(mLock);
Andy Hung137a99f2021-12-13 09:31:55 -0800208 if (ll > 0) {
209 ss << "Error Heat Map (rejected: " << mRejected << "):\n";
210 --ll;
211 }
212 // TODO: restriction is implemented alphabetically not on priority.
Andy Hung579f41f2021-12-16 08:58:07 -0800213 for (const auto& [name, data] : mMap) {
Andy Hung137a99f2021-12-13 09:31:55 -0800214 if (ll <= 0) break;
Andy Hung579f41f2021-12-16 08:58:07 -0800215 ss << name << ": " << data.dump() << "\n";
Andy Hung137a99f2021-12-13 09:31:55 -0800216 --ll;
217 }
218 return { ss.str(), lines - ll };
219 }
220};
221
222} // namespace android::mediametrics