| Amin Hassani | f94b643 | 2018-01-26 17:39:47 -0800 | [diff] [blame] | 1 | # | 
|  | 2 | # Copyright (C) 2013 The Android Open Source Project | 
|  | 3 | # | 
|  | 4 | # Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | 5 | # you may not use this file except in compliance with the License. | 
|  | 6 | # You may obtain a copy of the License at | 
|  | 7 | # | 
|  | 8 | #      http://www.apache.org/licenses/LICENSE-2.0 | 
|  | 9 | # | 
|  | 10 | # Unless required by applicable law or agreed to in writing, software | 
|  | 11 | # distributed under the License is distributed on an "AS IS" BASIS, | 
|  | 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | 13 | # See the License for the specific language governing permissions and | 
|  | 14 | # limitations under the License. | 
|  | 15 | # | 
| Gilad Arnold | 553b0ec | 2013-01-26 01:00:39 -0800 | [diff] [blame] | 16 |  | 
|  | 17 | """Histogram generation tools.""" | 
|  | 18 |  | 
| Andrew Lassalle | 165843c | 2019-11-05 13:30:34 -0800 | [diff] [blame] | 19 | from __future__ import absolute_import | 
|  | 20 | from __future__ import division | 
|  | 21 |  | 
| Gilad Arnold | 553b0ec | 2013-01-26 01:00:39 -0800 | [diff] [blame] | 22 | from collections import defaultdict | 
|  | 23 |  | 
| Amin Hassani | b05a65a | 2017-12-18 15:15:32 -0800 | [diff] [blame] | 24 | from update_payload import format_utils | 
| Gilad Arnold | 553b0ec | 2013-01-26 01:00:39 -0800 | [diff] [blame] | 25 |  | 
|  | 26 |  | 
|  | 27 | class Histogram(object): | 
|  | 28 | """A histogram generating object. | 
|  | 29 |  | 
|  | 30 | This object serves the sole purpose of formatting (key, val) pairs as an | 
|  | 31 | ASCII histogram, including bars and percentage markers, and taking care of | 
|  | 32 | label alignment, scaling, etc. In addition to the standard __init__ | 
|  | 33 | interface, two static methods are provided for conveniently converting data | 
|  | 34 | in different formats into a histogram. Histogram generation is exported via | 
|  | 35 | its __str__ method, and looks as follows: | 
|  | 36 |  | 
|  | 37 | Yes |################    | 5 (83.3%) | 
|  | 38 | No  |###                 | 1 (16.6%) | 
|  | 39 |  | 
|  | 40 | TODO(garnold) we may want to add actual methods for adding data or tweaking | 
|  | 41 | the output layout and formatting. For now, though, this is fine. | 
|  | 42 |  | 
|  | 43 | """ | 
|  | 44 |  | 
|  | 45 | def __init__(self, data, scale=20, formatter=None): | 
|  | 46 | """Initialize a histogram object. | 
|  | 47 |  | 
|  | 48 | Args: | 
|  | 49 | data: list of (key, count) pairs constituting the histogram | 
|  | 50 | scale: number of characters used to indicate 100% | 
|  | 51 | formatter: function used for formatting raw histogram values | 
|  | 52 |  | 
|  | 53 | """ | 
|  | 54 | self.data = data | 
|  | 55 | self.scale = scale | 
|  | 56 | self.formatter = formatter or str | 
|  | 57 | self.max_key_len = max([len(str(key)) for key, count in self.data]) | 
|  | 58 | self.total = sum([count for key, count in self.data]) | 
|  | 59 |  | 
|  | 60 | @staticmethod | 
|  | 61 | def FromCountDict(count_dict, scale=20, formatter=None, key_names=None): | 
|  | 62 | """Takes a dictionary of counts and returns a histogram object. | 
|  | 63 |  | 
|  | 64 | This simply converts a mapping from names to counts into a list of (key, | 
|  | 65 | count) pairs, optionally translating keys into name strings, then | 
|  | 66 | generating and returning a histogram for them. This is a useful convenience | 
|  | 67 | call for clients that update a dictionary of counters as they (say) scan a | 
|  | 68 | data stream. | 
|  | 69 |  | 
|  | 70 | Args: | 
|  | 71 | count_dict: dictionary mapping keys to occurrence counts | 
|  | 72 | scale: number of characters used to indicate 100% | 
|  | 73 | formatter: function used for formatting raw histogram values | 
|  | 74 | key_names: dictionary mapping keys to name strings | 
|  | 75 | Returns: | 
|  | 76 | A histogram object based on the given data. | 
|  | 77 |  | 
|  | 78 | """ | 
|  | 79 | namer = None | 
|  | 80 | if key_names: | 
|  | 81 | namer = lambda key: key_names[key] | 
|  | 82 | else: | 
|  | 83 | namer = lambda key: key | 
|  | 84 |  | 
|  | 85 | hist = [(namer(key), count) for key, count in count_dict.items()] | 
|  | 86 | return Histogram(hist, scale, formatter) | 
|  | 87 |  | 
|  | 88 | @staticmethod | 
|  | 89 | def FromKeyList(key_list, scale=20, formatter=None, key_names=None): | 
|  | 90 | """Takes a list of (possibly recurring) keys and returns a histogram object. | 
|  | 91 |  | 
|  | 92 | This converts the list into a dictionary of counters, then uses | 
|  | 93 | FromCountDict() to generate the actual histogram. For example: | 
|  | 94 |  | 
|  | 95 | ['a', 'a', 'b', 'a', 'b'] --> {'a': 3, 'b': 2} --> ... | 
|  | 96 |  | 
|  | 97 | Args: | 
|  | 98 | key_list: list of (possibly recurring) keys | 
|  | 99 | scale: number of characters used to indicate 100% | 
|  | 100 | formatter: function used for formatting raw histogram values | 
|  | 101 | key_names: dictionary mapping keys to name strings | 
|  | 102 | Returns: | 
|  | 103 | A histogram object based on the given data. | 
|  | 104 |  | 
|  | 105 | """ | 
|  | 106 | count_dict = defaultdict(int)  # Unset items default to zero | 
|  | 107 | for key in key_list: | 
|  | 108 | count_dict[key] += 1 | 
|  | 109 | return Histogram.FromCountDict(count_dict, scale, formatter, key_names) | 
|  | 110 |  | 
|  | 111 | def __str__(self): | 
|  | 112 | hist_lines = [] | 
|  | 113 | hist_bar = '|' | 
|  | 114 | for key, count in self.data: | 
|  | 115 | if self.total: | 
| Andrew Lassalle | 165843c | 2019-11-05 13:30:34 -0800 | [diff] [blame] | 116 | bar_len = count * self.scale // self.total | 
| Gilad Arnold | 553b0ec | 2013-01-26 01:00:39 -0800 | [diff] [blame] | 117 | hist_bar = '|%s|' % ('#' * bar_len).ljust(self.scale) | 
|  | 118 |  | 
| Gilad Arnold | 6a3a387 | 2013-10-04 18:18:45 -0700 | [diff] [blame] | 119 | line = '%s %s %s' % ( | 
| Gilad Arnold | 553b0ec | 2013-01-26 01:00:39 -0800 | [diff] [blame] | 120 | str(key).ljust(self.max_key_len), | 
|  | 121 | hist_bar, | 
| Gilad Arnold | 6a3a387 | 2013-10-04 18:18:45 -0700 | [diff] [blame] | 122 | self.formatter(count)) | 
|  | 123 | percent_str = format_utils.NumToPercent(count, self.total) | 
|  | 124 | if percent_str: | 
|  | 125 | line += ' (%s)' % percent_str | 
| Gilad Arnold | 553b0ec | 2013-01-26 01:00:39 -0800 | [diff] [blame] | 126 | hist_lines.append(line) | 
|  | 127 |  | 
|  | 128 | return '\n'.join(hist_lines) | 
|  | 129 |  | 
|  | 130 | def GetKeys(self): | 
|  | 131 | """Returns the keys of the histogram.""" | 
|  | 132 | return [key for key, _ in self.data] |