blob: 1ac2ab5d48f4203296e9dfc658a6dc739c5b751e [file] [log] [blame]
Amin Hassanif94b6432018-01-26 17:39:47 -08001#
2# Copyright (C) 2013 The Android Open Source Project
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8# http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15#
Gilad Arnold553b0ec2013-01-26 01:00:39 -080016
17"""Histogram generation tools."""
18
19from collections import defaultdict
20
Amin Hassanib05a65a2017-12-18 15:15:32 -080021from update_payload import format_utils
Gilad Arnold553b0ec2013-01-26 01:00:39 -080022
23
24class Histogram(object):
25 """A histogram generating object.
26
27 This object serves the sole purpose of formatting (key, val) pairs as an
28 ASCII histogram, including bars and percentage markers, and taking care of
29 label alignment, scaling, etc. In addition to the standard __init__
30 interface, two static methods are provided for conveniently converting data
31 in different formats into a histogram. Histogram generation is exported via
32 its __str__ method, and looks as follows:
33
34 Yes |################ | 5 (83.3%)
35 No |### | 1 (16.6%)
36
37 TODO(garnold) we may want to add actual methods for adding data or tweaking
38 the output layout and formatting. For now, though, this is fine.
39
40 """
41
42 def __init__(self, data, scale=20, formatter=None):
43 """Initialize a histogram object.
44
45 Args:
46 data: list of (key, count) pairs constituting the histogram
47 scale: number of characters used to indicate 100%
48 formatter: function used for formatting raw histogram values
49
50 """
51 self.data = data
52 self.scale = scale
53 self.formatter = formatter or str
54 self.max_key_len = max([len(str(key)) for key, count in self.data])
55 self.total = sum([count for key, count in self.data])
56
57 @staticmethod
58 def FromCountDict(count_dict, scale=20, formatter=None, key_names=None):
59 """Takes a dictionary of counts and returns a histogram object.
60
61 This simply converts a mapping from names to counts into a list of (key,
62 count) pairs, optionally translating keys into name strings, then
63 generating and returning a histogram for them. This is a useful convenience
64 call for clients that update a dictionary of counters as they (say) scan a
65 data stream.
66
67 Args:
68 count_dict: dictionary mapping keys to occurrence counts
69 scale: number of characters used to indicate 100%
70 formatter: function used for formatting raw histogram values
71 key_names: dictionary mapping keys to name strings
72 Returns:
73 A histogram object based on the given data.
74
75 """
76 namer = None
77 if key_names:
78 namer = lambda key: key_names[key]
79 else:
80 namer = lambda key: key
81
82 hist = [(namer(key), count) for key, count in count_dict.items()]
83 return Histogram(hist, scale, formatter)
84
85 @staticmethod
86 def FromKeyList(key_list, scale=20, formatter=None, key_names=None):
87 """Takes a list of (possibly recurring) keys and returns a histogram object.
88
89 This converts the list into a dictionary of counters, then uses
90 FromCountDict() to generate the actual histogram. For example:
91
92 ['a', 'a', 'b', 'a', 'b'] --> {'a': 3, 'b': 2} --> ...
93
94 Args:
95 key_list: list of (possibly recurring) keys
96 scale: number of characters used to indicate 100%
97 formatter: function used for formatting raw histogram values
98 key_names: dictionary mapping keys to name strings
99 Returns:
100 A histogram object based on the given data.
101
102 """
103 count_dict = defaultdict(int) # Unset items default to zero
104 for key in key_list:
105 count_dict[key] += 1
106 return Histogram.FromCountDict(count_dict, scale, formatter, key_names)
107
108 def __str__(self):
109 hist_lines = []
110 hist_bar = '|'
111 for key, count in self.data:
112 if self.total:
113 bar_len = count * self.scale / self.total
114 hist_bar = '|%s|' % ('#' * bar_len).ljust(self.scale)
115
Gilad Arnold6a3a3872013-10-04 18:18:45 -0700116 line = '%s %s %s' % (
Gilad Arnold553b0ec2013-01-26 01:00:39 -0800117 str(key).ljust(self.max_key_len),
118 hist_bar,
Gilad Arnold6a3a3872013-10-04 18:18:45 -0700119 self.formatter(count))
120 percent_str = format_utils.NumToPercent(count, self.total)
121 if percent_str:
122 line += ' (%s)' % percent_str
Gilad Arnold553b0ec2013-01-26 01:00:39 -0800123 hist_lines.append(line)
124
125 return '\n'.join(hist_lines)
126
127 def GetKeys(self):
128 """Returns the keys of the histogram."""
129 return [key for key, _ in self.data]