Amin Hassani | f94b643 | 2018-01-26 17:39:47 -0800 | [diff] [blame] | 1 | # |
| 2 | # Copyright (C) 2013 The Android Open Source Project |
| 3 | # |
| 4 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | # you may not use this file except in compliance with the License. |
| 6 | # You may obtain a copy of the License at |
| 7 | # |
| 8 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | # |
| 10 | # Unless required by applicable law or agreed to in writing, software |
| 11 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | # See the License for the specific language governing permissions and |
| 14 | # limitations under the License. |
| 15 | # |
Gilad Arnold | 553b0ec | 2013-01-26 01:00:39 -0800 | [diff] [blame] | 16 | |
| 17 | """Histogram generation tools.""" |
| 18 | |
Andrew Lassalle | 165843c | 2019-11-05 13:30:34 -0800 | [diff] [blame] | 19 | from __future__ import absolute_import |
| 20 | from __future__ import division |
| 21 | |
Gilad Arnold | 553b0ec | 2013-01-26 01:00:39 -0800 | [diff] [blame] | 22 | from collections import defaultdict |
| 23 | |
Amin Hassani | b05a65a | 2017-12-18 15:15:32 -0800 | [diff] [blame] | 24 | from update_payload import format_utils |
Gilad Arnold | 553b0ec | 2013-01-26 01:00:39 -0800 | [diff] [blame] | 25 | |
| 26 | |
| 27 | class Histogram(object): |
| 28 | """A histogram generating object. |
| 29 | |
| 30 | This object serves the sole purpose of formatting (key, val) pairs as an |
| 31 | ASCII histogram, including bars and percentage markers, and taking care of |
| 32 | label alignment, scaling, etc. In addition to the standard __init__ |
| 33 | interface, two static methods are provided for conveniently converting data |
| 34 | in different formats into a histogram. Histogram generation is exported via |
| 35 | its __str__ method, and looks as follows: |
| 36 | |
| 37 | Yes |################ | 5 (83.3%) |
| 38 | No |### | 1 (16.6%) |
| 39 | |
| 40 | TODO(garnold) we may want to add actual methods for adding data or tweaking |
| 41 | the output layout and formatting. For now, though, this is fine. |
| 42 | |
| 43 | """ |
| 44 | |
| 45 | def __init__(self, data, scale=20, formatter=None): |
| 46 | """Initialize a histogram object. |
| 47 | |
| 48 | Args: |
| 49 | data: list of (key, count) pairs constituting the histogram |
| 50 | scale: number of characters used to indicate 100% |
| 51 | formatter: function used for formatting raw histogram values |
| 52 | |
| 53 | """ |
| 54 | self.data = data |
| 55 | self.scale = scale |
| 56 | self.formatter = formatter or str |
| 57 | self.max_key_len = max([len(str(key)) for key, count in self.data]) |
| 58 | self.total = sum([count for key, count in self.data]) |
| 59 | |
| 60 | @staticmethod |
| 61 | def FromCountDict(count_dict, scale=20, formatter=None, key_names=None): |
| 62 | """Takes a dictionary of counts and returns a histogram object. |
| 63 | |
| 64 | This simply converts a mapping from names to counts into a list of (key, |
| 65 | count) pairs, optionally translating keys into name strings, then |
| 66 | generating and returning a histogram for them. This is a useful convenience |
| 67 | call for clients that update a dictionary of counters as they (say) scan a |
| 68 | data stream. |
| 69 | |
| 70 | Args: |
| 71 | count_dict: dictionary mapping keys to occurrence counts |
| 72 | scale: number of characters used to indicate 100% |
| 73 | formatter: function used for formatting raw histogram values |
| 74 | key_names: dictionary mapping keys to name strings |
| 75 | Returns: |
| 76 | A histogram object based on the given data. |
| 77 | |
| 78 | """ |
| 79 | namer = None |
| 80 | if key_names: |
| 81 | namer = lambda key: key_names[key] |
| 82 | else: |
| 83 | namer = lambda key: key |
| 84 | |
| 85 | hist = [(namer(key), count) for key, count in count_dict.items()] |
| 86 | return Histogram(hist, scale, formatter) |
| 87 | |
| 88 | @staticmethod |
| 89 | def FromKeyList(key_list, scale=20, formatter=None, key_names=None): |
| 90 | """Takes a list of (possibly recurring) keys and returns a histogram object. |
| 91 | |
| 92 | This converts the list into a dictionary of counters, then uses |
| 93 | FromCountDict() to generate the actual histogram. For example: |
| 94 | |
| 95 | ['a', 'a', 'b', 'a', 'b'] --> {'a': 3, 'b': 2} --> ... |
| 96 | |
| 97 | Args: |
| 98 | key_list: list of (possibly recurring) keys |
| 99 | scale: number of characters used to indicate 100% |
| 100 | formatter: function used for formatting raw histogram values |
| 101 | key_names: dictionary mapping keys to name strings |
| 102 | Returns: |
| 103 | A histogram object based on the given data. |
| 104 | |
| 105 | """ |
| 106 | count_dict = defaultdict(int) # Unset items default to zero |
| 107 | for key in key_list: |
| 108 | count_dict[key] += 1 |
| 109 | return Histogram.FromCountDict(count_dict, scale, formatter, key_names) |
| 110 | |
| 111 | def __str__(self): |
| 112 | hist_lines = [] |
| 113 | hist_bar = '|' |
| 114 | for key, count in self.data: |
| 115 | if self.total: |
Andrew Lassalle | 165843c | 2019-11-05 13:30:34 -0800 | [diff] [blame] | 116 | bar_len = count * self.scale // self.total |
Gilad Arnold | 553b0ec | 2013-01-26 01:00:39 -0800 | [diff] [blame] | 117 | hist_bar = '|%s|' % ('#' * bar_len).ljust(self.scale) |
| 118 | |
Gilad Arnold | 6a3a387 | 2013-10-04 18:18:45 -0700 | [diff] [blame] | 119 | line = '%s %s %s' % ( |
Gilad Arnold | 553b0ec | 2013-01-26 01:00:39 -0800 | [diff] [blame] | 120 | str(key).ljust(self.max_key_len), |
| 121 | hist_bar, |
Gilad Arnold | 6a3a387 | 2013-10-04 18:18:45 -0700 | [diff] [blame] | 122 | self.formatter(count)) |
| 123 | percent_str = format_utils.NumToPercent(count, self.total) |
| 124 | if percent_str: |
| 125 | line += ' (%s)' % percent_str |
Gilad Arnold | 553b0ec | 2013-01-26 01:00:39 -0800 | [diff] [blame] | 126 | hist_lines.append(line) |
| 127 | |
| 128 | return '\n'.join(hist_lines) |
| 129 | |
| 130 | def GetKeys(self): |
| 131 | """Returns the keys of the histogram.""" |
| 132 | return [key for key, _ in self.data] |