| Gilad Arnold | 553b0ec | 2013-01-26 01:00:39 -0800 | [diff] [blame] | 1 | # Copyright (c) 2013 The Chromium OS Authors. All rights reserved. | 
|  | 2 | # Use of this source code is governed by a BSD-style license that can be | 
|  | 3 | # found in the LICENSE file. | 
|  | 4 |  | 
|  | 5 | """Histogram generation tools.""" | 
|  | 6 |  | 
|  | 7 | from collections import defaultdict | 
|  | 8 |  | 
| Amin Hassani | b05a65a | 2017-12-18 15:15:32 -0800 | [diff] [blame] | 9 | from update_payload import format_utils | 
| Gilad Arnold | 553b0ec | 2013-01-26 01:00:39 -0800 | [diff] [blame] | 10 |  | 
|  | 11 |  | 
|  | 12 | class Histogram(object): | 
|  | 13 | """A histogram generating object. | 
|  | 14 |  | 
|  | 15 | This object serves the sole purpose of formatting (key, val) pairs as an | 
|  | 16 | ASCII histogram, including bars and percentage markers, and taking care of | 
|  | 17 | label alignment, scaling, etc. In addition to the standard __init__ | 
|  | 18 | interface, two static methods are provided for conveniently converting data | 
|  | 19 | in different formats into a histogram. Histogram generation is exported via | 
|  | 20 | its __str__ method, and looks as follows: | 
|  | 21 |  | 
|  | 22 | Yes |################    | 5 (83.3%) | 
|  | 23 | No  |###                 | 1 (16.6%) | 
|  | 24 |  | 
|  | 25 | TODO(garnold) we may want to add actual methods for adding data or tweaking | 
|  | 26 | the output layout and formatting. For now, though, this is fine. | 
|  | 27 |  | 
|  | 28 | """ | 
|  | 29 |  | 
|  | 30 | def __init__(self, data, scale=20, formatter=None): | 
|  | 31 | """Initialize a histogram object. | 
|  | 32 |  | 
|  | 33 | Args: | 
|  | 34 | data: list of (key, count) pairs constituting the histogram | 
|  | 35 | scale: number of characters used to indicate 100% | 
|  | 36 | formatter: function used for formatting raw histogram values | 
|  | 37 |  | 
|  | 38 | """ | 
|  | 39 | self.data = data | 
|  | 40 | self.scale = scale | 
|  | 41 | self.formatter = formatter or str | 
|  | 42 | self.max_key_len = max([len(str(key)) for key, count in self.data]) | 
|  | 43 | self.total = sum([count for key, count in self.data]) | 
|  | 44 |  | 
|  | 45 | @staticmethod | 
|  | 46 | def FromCountDict(count_dict, scale=20, formatter=None, key_names=None): | 
|  | 47 | """Takes a dictionary of counts and returns a histogram object. | 
|  | 48 |  | 
|  | 49 | This simply converts a mapping from names to counts into a list of (key, | 
|  | 50 | count) pairs, optionally translating keys into name strings, then | 
|  | 51 | generating and returning a histogram for them. This is a useful convenience | 
|  | 52 | call for clients that update a dictionary of counters as they (say) scan a | 
|  | 53 | data stream. | 
|  | 54 |  | 
|  | 55 | Args: | 
|  | 56 | count_dict: dictionary mapping keys to occurrence counts | 
|  | 57 | scale: number of characters used to indicate 100% | 
|  | 58 | formatter: function used for formatting raw histogram values | 
|  | 59 | key_names: dictionary mapping keys to name strings | 
|  | 60 | Returns: | 
|  | 61 | A histogram object based on the given data. | 
|  | 62 |  | 
|  | 63 | """ | 
|  | 64 | namer = None | 
|  | 65 | if key_names: | 
|  | 66 | namer = lambda key: key_names[key] | 
|  | 67 | else: | 
|  | 68 | namer = lambda key: key | 
|  | 69 |  | 
|  | 70 | hist = [(namer(key), count) for key, count in count_dict.items()] | 
|  | 71 | return Histogram(hist, scale, formatter) | 
|  | 72 |  | 
|  | 73 | @staticmethod | 
|  | 74 | def FromKeyList(key_list, scale=20, formatter=None, key_names=None): | 
|  | 75 | """Takes a list of (possibly recurring) keys and returns a histogram object. | 
|  | 76 |  | 
|  | 77 | This converts the list into a dictionary of counters, then uses | 
|  | 78 | FromCountDict() to generate the actual histogram. For example: | 
|  | 79 |  | 
|  | 80 | ['a', 'a', 'b', 'a', 'b'] --> {'a': 3, 'b': 2} --> ... | 
|  | 81 |  | 
|  | 82 | Args: | 
|  | 83 | key_list: list of (possibly recurring) keys | 
|  | 84 | scale: number of characters used to indicate 100% | 
|  | 85 | formatter: function used for formatting raw histogram values | 
|  | 86 | key_names: dictionary mapping keys to name strings | 
|  | 87 | Returns: | 
|  | 88 | A histogram object based on the given data. | 
|  | 89 |  | 
|  | 90 | """ | 
|  | 91 | count_dict = defaultdict(int)  # Unset items default to zero | 
|  | 92 | for key in key_list: | 
|  | 93 | count_dict[key] += 1 | 
|  | 94 | return Histogram.FromCountDict(count_dict, scale, formatter, key_names) | 
|  | 95 |  | 
|  | 96 | def __str__(self): | 
|  | 97 | hist_lines = [] | 
|  | 98 | hist_bar = '|' | 
|  | 99 | for key, count in self.data: | 
|  | 100 | if self.total: | 
|  | 101 | bar_len = count * self.scale / self.total | 
|  | 102 | hist_bar = '|%s|' % ('#' * bar_len).ljust(self.scale) | 
|  | 103 |  | 
| Gilad Arnold | 6a3a387 | 2013-10-04 18:18:45 -0700 | [diff] [blame] | 104 | line = '%s %s %s' % ( | 
| Gilad Arnold | 553b0ec | 2013-01-26 01:00:39 -0800 | [diff] [blame] | 105 | str(key).ljust(self.max_key_len), | 
|  | 106 | hist_bar, | 
| Gilad Arnold | 6a3a387 | 2013-10-04 18:18:45 -0700 | [diff] [blame] | 107 | self.formatter(count)) | 
|  | 108 | percent_str = format_utils.NumToPercent(count, self.total) | 
|  | 109 | if percent_str: | 
|  | 110 | line += ' (%s)' % percent_str | 
| Gilad Arnold | 553b0ec | 2013-01-26 01:00:39 -0800 | [diff] [blame] | 111 | hist_lines.append(line) | 
|  | 112 |  | 
|  | 113 | return '\n'.join(hist_lines) | 
|  | 114 |  | 
|  | 115 | def GetKeys(self): | 
|  | 116 | """Returns the keys of the histogram.""" | 
|  | 117 | return [key for key, _ in self.data] |