Gilad Arnold | 553b0ec | 2013-01-26 01:00:39 -0800 | [diff] [blame] | 1 | # Copyright (c) 2013 The Chromium OS Authors. All rights reserved. |
| 2 | # Use of this source code is governed by a BSD-style license that can be |
| 3 | # found in the LICENSE file. |
| 4 | |
| 5 | """Histogram generation tools.""" |
| 6 | |
| 7 | from collections import defaultdict |
| 8 | |
Amin Hassani | b05a65a | 2017-12-18 15:15:32 -0800 | [diff] [blame^] | 9 | from update_payload import format_utils |
Gilad Arnold | 553b0ec | 2013-01-26 01:00:39 -0800 | [diff] [blame] | 10 | |
| 11 | |
| 12 | class Histogram(object): |
| 13 | """A histogram generating object. |
| 14 | |
| 15 | This object serves the sole purpose of formatting (key, val) pairs as an |
| 16 | ASCII histogram, including bars and percentage markers, and taking care of |
| 17 | label alignment, scaling, etc. In addition to the standard __init__ |
| 18 | interface, two static methods are provided for conveniently converting data |
| 19 | in different formats into a histogram. Histogram generation is exported via |
| 20 | its __str__ method, and looks as follows: |
| 21 | |
| 22 | Yes |################ | 5 (83.3%) |
| 23 | No |### | 1 (16.6%) |
| 24 | |
| 25 | TODO(garnold) we may want to add actual methods for adding data or tweaking |
| 26 | the output layout and formatting. For now, though, this is fine. |
| 27 | |
| 28 | """ |
| 29 | |
| 30 | def __init__(self, data, scale=20, formatter=None): |
| 31 | """Initialize a histogram object. |
| 32 | |
| 33 | Args: |
| 34 | data: list of (key, count) pairs constituting the histogram |
| 35 | scale: number of characters used to indicate 100% |
| 36 | formatter: function used for formatting raw histogram values |
| 37 | |
| 38 | """ |
| 39 | self.data = data |
| 40 | self.scale = scale |
| 41 | self.formatter = formatter or str |
| 42 | self.max_key_len = max([len(str(key)) for key, count in self.data]) |
| 43 | self.total = sum([count for key, count in self.data]) |
| 44 | |
| 45 | @staticmethod |
| 46 | def FromCountDict(count_dict, scale=20, formatter=None, key_names=None): |
| 47 | """Takes a dictionary of counts and returns a histogram object. |
| 48 | |
| 49 | This simply converts a mapping from names to counts into a list of (key, |
| 50 | count) pairs, optionally translating keys into name strings, then |
| 51 | generating and returning a histogram for them. This is a useful convenience |
| 52 | call for clients that update a dictionary of counters as they (say) scan a |
| 53 | data stream. |
| 54 | |
| 55 | Args: |
| 56 | count_dict: dictionary mapping keys to occurrence counts |
| 57 | scale: number of characters used to indicate 100% |
| 58 | formatter: function used for formatting raw histogram values |
| 59 | key_names: dictionary mapping keys to name strings |
| 60 | Returns: |
| 61 | A histogram object based on the given data. |
| 62 | |
| 63 | """ |
| 64 | namer = None |
| 65 | if key_names: |
| 66 | namer = lambda key: key_names[key] |
| 67 | else: |
| 68 | namer = lambda key: key |
| 69 | |
| 70 | hist = [(namer(key), count) for key, count in count_dict.items()] |
| 71 | return Histogram(hist, scale, formatter) |
| 72 | |
| 73 | @staticmethod |
| 74 | def FromKeyList(key_list, scale=20, formatter=None, key_names=None): |
| 75 | """Takes a list of (possibly recurring) keys and returns a histogram object. |
| 76 | |
| 77 | This converts the list into a dictionary of counters, then uses |
| 78 | FromCountDict() to generate the actual histogram. For example: |
| 79 | |
| 80 | ['a', 'a', 'b', 'a', 'b'] --> {'a': 3, 'b': 2} --> ... |
| 81 | |
| 82 | Args: |
| 83 | key_list: list of (possibly recurring) keys |
| 84 | scale: number of characters used to indicate 100% |
| 85 | formatter: function used for formatting raw histogram values |
| 86 | key_names: dictionary mapping keys to name strings |
| 87 | Returns: |
| 88 | A histogram object based on the given data. |
| 89 | |
| 90 | """ |
| 91 | count_dict = defaultdict(int) # Unset items default to zero |
| 92 | for key in key_list: |
| 93 | count_dict[key] += 1 |
| 94 | return Histogram.FromCountDict(count_dict, scale, formatter, key_names) |
| 95 | |
| 96 | def __str__(self): |
| 97 | hist_lines = [] |
| 98 | hist_bar = '|' |
| 99 | for key, count in self.data: |
| 100 | if self.total: |
| 101 | bar_len = count * self.scale / self.total |
| 102 | hist_bar = '|%s|' % ('#' * bar_len).ljust(self.scale) |
| 103 | |
Gilad Arnold | 6a3a387 | 2013-10-04 18:18:45 -0700 | [diff] [blame] | 104 | line = '%s %s %s' % ( |
Gilad Arnold | 553b0ec | 2013-01-26 01:00:39 -0800 | [diff] [blame] | 105 | str(key).ljust(self.max_key_len), |
| 106 | hist_bar, |
Gilad Arnold | 6a3a387 | 2013-10-04 18:18:45 -0700 | [diff] [blame] | 107 | self.formatter(count)) |
| 108 | percent_str = format_utils.NumToPercent(count, self.total) |
| 109 | if percent_str: |
| 110 | line += ' (%s)' % percent_str |
Gilad Arnold | 553b0ec | 2013-01-26 01:00:39 -0800 | [diff] [blame] | 111 | hist_lines.append(line) |
| 112 | |
| 113 | return '\n'.join(hist_lines) |
| 114 | |
| 115 | def GetKeys(self): |
| 116 | """Returns the keys of the histogram.""" |
| 117 | return [key for key, _ in self.data] |