blob: bad2dc37f8c506a8f59fa708740c4198152da00b [file] [log] [blame]
Amin Hassanif94b6432018-01-26 17:39:47 -08001#
2# Copyright (C) 2013 The Android Open Source Project
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15#
Gilad Arnold553b0ec2013-01-26 01:00:39 -080016
17"""Histogram generation tools."""
18
Andrew Lassalle165843c2019-11-05 13:30:34 -080019from __future__ import absolute_import
20from __future__ import division
21
Gilad Arnold553b0ec2013-01-26 01:00:39 -080022from collections import defaultdict
23
Amin Hassanib05a65a2017-12-18 15:15:32 -080024from update_payload import format_utils
Gilad Arnold553b0ec2013-01-26 01:00:39 -080025
26
27class Histogram(object):
28 """A histogram generating object.
29
30 This object serves the sole purpose of formatting (key, val) pairs as an
31 ASCII histogram, including bars and percentage markers, and taking care of
32 label alignment, scaling, etc. In addition to the standard __init__
33 interface, two static methods are provided for conveniently converting data
34 in different formats into a histogram. Histogram generation is exported via
35 its __str__ method, and looks as follows:
36
37 Yes |################ | 5 (83.3%)
38 No |### | 1 (16.6%)
39
40 TODO(garnold) we may want to add actual methods for adding data or tweaking
41 the output layout and formatting. For now, though, this is fine.
42
43 """
44
45 def __init__(self, data, scale=20, formatter=None):
46 """Initialize a histogram object.
47
48 Args:
49 data: list of (key, count) pairs constituting the histogram
50 scale: number of characters used to indicate 100%
51 formatter: function used for formatting raw histogram values
52
53 """
54 self.data = data
55 self.scale = scale
56 self.formatter = formatter or str
57 self.max_key_len = max([len(str(key)) for key, count in self.data])
58 self.total = sum([count for key, count in self.data])
59
60 @staticmethod
61 def FromCountDict(count_dict, scale=20, formatter=None, key_names=None):
62 """Takes a dictionary of counts and returns a histogram object.
63
64 This simply converts a mapping from names to counts into a list of (key,
65 count) pairs, optionally translating keys into name strings, then
66 generating and returning a histogram for them. This is a useful convenience
67 call for clients that update a dictionary of counters as they (say) scan a
68 data stream.
69
70 Args:
71 count_dict: dictionary mapping keys to occurrence counts
72 scale: number of characters used to indicate 100%
73 formatter: function used for formatting raw histogram values
74 key_names: dictionary mapping keys to name strings
75 Returns:
76 A histogram object based on the given data.
77
78 """
79 namer = None
80 if key_names:
81 namer = lambda key: key_names[key]
82 else:
83 namer = lambda key: key
84
85 hist = [(namer(key), count) for key, count in count_dict.items()]
86 return Histogram(hist, scale, formatter)
87
88 @staticmethod
89 def FromKeyList(key_list, scale=20, formatter=None, key_names=None):
90 """Takes a list of (possibly recurring) keys and returns a histogram object.
91
92 This converts the list into a dictionary of counters, then uses
93 FromCountDict() to generate the actual histogram. For example:
94
95 ['a', 'a', 'b', 'a', 'b'] --> {'a': 3, 'b': 2} --> ...
96
97 Args:
98 key_list: list of (possibly recurring) keys
99 scale: number of characters used to indicate 100%
100 formatter: function used for formatting raw histogram values
101 key_names: dictionary mapping keys to name strings
102 Returns:
103 A histogram object based on the given data.
104
105 """
106 count_dict = defaultdict(int) # Unset items default to zero
107 for key in key_list:
108 count_dict[key] += 1
109 return Histogram.FromCountDict(count_dict, scale, formatter, key_names)
110
111 def __str__(self):
112 hist_lines = []
113 hist_bar = '|'
114 for key, count in self.data:
115 if self.total:
Andrew Lassalle165843c2019-11-05 13:30:34 -0800116 bar_len = count * self.scale // self.total
Gilad Arnold553b0ec2013-01-26 01:00:39 -0800117 hist_bar = '|%s|' % ('#' * bar_len).ljust(self.scale)
118
Gilad Arnold6a3a3872013-10-04 18:18:45 -0700119 line = '%s %s %s' % (
Gilad Arnold553b0ec2013-01-26 01:00:39 -0800120 str(key).ljust(self.max_key_len),
121 hist_bar,
Gilad Arnold6a3a3872013-10-04 18:18:45 -0700122 self.formatter(count))
123 percent_str = format_utils.NumToPercent(count, self.total)
124 if percent_str:
125 line += ' (%s)' % percent_str
Gilad Arnold553b0ec2013-01-26 01:00:39 -0800126 hist_lines.append(line)
127
128 return '\n'.join(hist_lines)
129
130 def GetKeys(self):
131 """Returns the keys of the histogram."""
132 return [key for key, _ in self.data]