| Amin Hassani | f94b643 | 2018-01-26 17:39:47 -0800 | [diff] [blame] | 1 | # | 
 | 2 | # Copyright (C) 2013 The Android Open Source Project | 
 | 3 | # | 
 | 4 | # Licensed under the Apache License, Version 2.0 (the "License"); | 
 | 5 | # you may not use this file except in compliance with the License. | 
 | 6 | # You may obtain a copy of the License at | 
 | 7 | # | 
 | 8 | #      http://www.apache.org/licenses/LICENSE-2.0 | 
 | 9 | # | 
 | 10 | # Unless required by applicable law or agreed to in writing, software | 
 | 11 | # distributed under the License is distributed on an "AS IS" BASIS, | 
 | 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
 | 13 | # See the License for the specific language governing permissions and | 
 | 14 | # limitations under the License. | 
 | 15 | # | 
| Gilad Arnold | 553b0ec | 2013-01-26 01:00:39 -0800 | [diff] [blame] | 16 |  | 
 | 17 | """Histogram generation tools.""" | 
 | 18 |  | 
| Andrew Lassalle | 165843c | 2019-11-05 13:30:34 -0800 | [diff] [blame] | 19 | from __future__ import absolute_import | 
 | 20 | from __future__ import division | 
 | 21 |  | 
| Gilad Arnold | 553b0ec | 2013-01-26 01:00:39 -0800 | [diff] [blame] | 22 | from collections import defaultdict | 
 | 23 |  | 
| Amin Hassani | b05a65a | 2017-12-18 15:15:32 -0800 | [diff] [blame] | 24 | from update_payload import format_utils | 
| Gilad Arnold | 553b0ec | 2013-01-26 01:00:39 -0800 | [diff] [blame] | 25 |  | 
 | 26 |  | 
 | 27 | class Histogram(object): | 
 | 28 |   """A histogram generating object. | 
 | 29 |  | 
 | 30 |   This object serves the sole purpose of formatting (key, val) pairs as an | 
 | 31 |   ASCII histogram, including bars and percentage markers, and taking care of | 
 | 32 |   label alignment, scaling, etc. In addition to the standard __init__ | 
 | 33 |   interface, two static methods are provided for conveniently converting data | 
 | 34 |   in different formats into a histogram. Histogram generation is exported via | 
 | 35 |   its __str__ method, and looks as follows: | 
 | 36 |  | 
 | 37 |     Yes |################    | 5 (83.3%) | 
 | 38 |     No  |###                 | 1 (16.6%) | 
 | 39 |  | 
 | 40 |   TODO(garnold) we may want to add actual methods for adding data or tweaking | 
 | 41 |   the output layout and formatting. For now, though, this is fine. | 
 | 42 |  | 
 | 43 |   """ | 
 | 44 |  | 
 | 45 |   def __init__(self, data, scale=20, formatter=None): | 
 | 46 |     """Initialize a histogram object. | 
 | 47 |  | 
 | 48 |     Args: | 
 | 49 |       data: list of (key, count) pairs constituting the histogram | 
 | 50 |       scale: number of characters used to indicate 100% | 
 | 51 |       formatter: function used for formatting raw histogram values | 
 | 52 |  | 
 | 53 |     """ | 
 | 54 |     self.data = data | 
 | 55 |     self.scale = scale | 
 | 56 |     self.formatter = formatter or str | 
 | 57 |     self.max_key_len = max([len(str(key)) for key, count in self.data]) | 
 | 58 |     self.total = sum([count for key, count in self.data]) | 
 | 59 |  | 
 | 60 |   @staticmethod | 
 | 61 |   def FromCountDict(count_dict, scale=20, formatter=None, key_names=None): | 
 | 62 |     """Takes a dictionary of counts and returns a histogram object. | 
 | 63 |  | 
 | 64 |     This simply converts a mapping from names to counts into a list of (key, | 
 | 65 |     count) pairs, optionally translating keys into name strings, then | 
 | 66 |     generating and returning a histogram for them. This is a useful convenience | 
 | 67 |     call for clients that update a dictionary of counters as they (say) scan a | 
 | 68 |     data stream. | 
 | 69 |  | 
 | 70 |     Args: | 
 | 71 |       count_dict: dictionary mapping keys to occurrence counts | 
 | 72 |       scale: number of characters used to indicate 100% | 
 | 73 |       formatter: function used for formatting raw histogram values | 
 | 74 |       key_names: dictionary mapping keys to name strings | 
 | 75 |     Returns: | 
 | 76 |       A histogram object based on the given data. | 
 | 77 |  | 
 | 78 |     """ | 
 | 79 |     namer = None | 
 | 80 |     if key_names: | 
 | 81 |       namer = lambda key: key_names[key] | 
 | 82 |     else: | 
 | 83 |       namer = lambda key: key | 
 | 84 |  | 
 | 85 |     hist = [(namer(key), count) for key, count in count_dict.items()] | 
 | 86 |     return Histogram(hist, scale, formatter) | 
 | 87 |  | 
 | 88 |   @staticmethod | 
 | 89 |   def FromKeyList(key_list, scale=20, formatter=None, key_names=None): | 
 | 90 |     """Takes a list of (possibly recurring) keys and returns a histogram object. | 
 | 91 |  | 
 | 92 |     This converts the list into a dictionary of counters, then uses | 
 | 93 |     FromCountDict() to generate the actual histogram. For example: | 
 | 94 |  | 
 | 95 |       ['a', 'a', 'b', 'a', 'b'] --> {'a': 3, 'b': 2} --> ... | 
 | 96 |  | 
 | 97 |     Args: | 
 | 98 |       key_list: list of (possibly recurring) keys | 
 | 99 |       scale: number of characters used to indicate 100% | 
 | 100 |       formatter: function used for formatting raw histogram values | 
 | 101 |       key_names: dictionary mapping keys to name strings | 
 | 102 |     Returns: | 
 | 103 |       A histogram object based on the given data. | 
 | 104 |  | 
 | 105 |     """ | 
 | 106 |     count_dict = defaultdict(int)  # Unset items default to zero | 
 | 107 |     for key in key_list: | 
 | 108 |       count_dict[key] += 1 | 
 | 109 |     return Histogram.FromCountDict(count_dict, scale, formatter, key_names) | 
 | 110 |  | 
 | 111 |   def __str__(self): | 
 | 112 |     hist_lines = [] | 
 | 113 |     hist_bar = '|' | 
 | 114 |     for key, count in self.data: | 
 | 115 |       if self.total: | 
| Andrew Lassalle | 165843c | 2019-11-05 13:30:34 -0800 | [diff] [blame] | 116 |         bar_len = count * self.scale // self.total | 
| Gilad Arnold | 553b0ec | 2013-01-26 01:00:39 -0800 | [diff] [blame] | 117 |         hist_bar = '|%s|' % ('#' * bar_len).ljust(self.scale) | 
 | 118 |  | 
| Gilad Arnold | 6a3a387 | 2013-10-04 18:18:45 -0700 | [diff] [blame] | 119 |       line = '%s %s %s' % ( | 
| Gilad Arnold | 553b0ec | 2013-01-26 01:00:39 -0800 | [diff] [blame] | 120 |           str(key).ljust(self.max_key_len), | 
 | 121 |           hist_bar, | 
| Gilad Arnold | 6a3a387 | 2013-10-04 18:18:45 -0700 | [diff] [blame] | 122 |           self.formatter(count)) | 
 | 123 |       percent_str = format_utils.NumToPercent(count, self.total) | 
 | 124 |       if percent_str: | 
 | 125 |         line += ' (%s)' % percent_str | 
| Gilad Arnold | 553b0ec | 2013-01-26 01:00:39 -0800 | [diff] [blame] | 126 |       hist_lines.append(line) | 
 | 127 |  | 
 | 128 |     return '\n'.join(hist_lines) | 
 | 129 |  | 
 | 130 |   def GetKeys(self): | 
 | 131 |     """Returns the keys of the histogram.""" | 
 | 132 |     return [key for key, _ in self.data] |