Chih-Hung Hsieh | 949205a | 2020-01-10 10:33:40 -0800 | [diff] [blame] | 1 | # python3 |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 2 | # Copyright (C) 2019 The Android Open Source Project |
| 3 | # |
| 4 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | # you may not use this file except in compliance with the License. |
| 6 | # You may obtain a copy of the License at |
| 7 | # |
| 8 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | # |
| 10 | # Unless required by applicable law or agreed to in writing, software |
| 11 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | # See the License for the specific language governing permissions and |
| 14 | # limitations under the License. |
| 15 | |
| 16 | """Grep warnings messages and output HTML tables or warning counts in CSV. |
| 17 | |
| 18 | Default is to output warnings in HTML tables grouped by warning severity. |
| 19 | Use option --byproject to output tables grouped by source file projects. |
| 20 | Use option --gencsv to output warning counts in CSV format. |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 21 | |
| 22 | Default input file is build.log, which can be changed with the --log flag. |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 23 | """ |
| 24 | |
| 25 | # List of important data structures and functions in this script. |
| 26 | # |
| 27 | # To parse and keep warning message in the input file: |
| 28 | # severity: classification of message severity |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 29 | # warn_patterns: |
| 30 | # warn_patterns[w]['category'] tool that issued the warning, not used now |
| 31 | # warn_patterns[w]['description'] table heading |
| 32 | # warn_patterns[w]['members'] matched warnings from input |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 33 | # warn_patterns[w]['patterns'] regular expressions to match warnings |
| 34 | # warn_patterns[w]['projects'][p] number of warnings of pattern w in p |
Chih-Hung Hsieh | 949205a | 2020-01-10 10:33:40 -0800 | [diff] [blame] | 35 | # warn_patterns[w]['severity'] severity tuple |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 36 | # project_list[p][0] project name |
| 37 | # project_list[p][1] regular expression to match a project path |
| 38 | # project_patterns[p] re.compile(project_list[p][1]) |
| 39 | # project_names[p] project_list[p][0] |
| 40 | # warning_messages array of each warning message, without source url |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 41 | # warning_links array of each warning code search link; for 'chrome' |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 42 | # warning_records array of [idx to warn_patterns, |
| 43 | # idx to project_names, |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 44 | # idx to warning_messages, |
| 45 | # idx to warning_links] |
Chih-Hung Hsieh | 949205a | 2020-01-10 10:33:40 -0800 | [diff] [blame] | 46 | # parse_input_file |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 47 | # |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 48 | import argparse |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 49 | import io |
| 50 | import multiprocessing |
| 51 | import os |
| 52 | import re |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 53 | import sys |
| 54 | |
Chih-Hung Hsieh | 98b285d | 2021-04-28 14:49:32 -0700 | [diff] [blame] | 55 | # pylint:disable=relative-beyond-top-level,no-name-in-module |
| 56 | # suppress false positive of no-name-in-module warnings |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 57 | from . import android_project_list |
| 58 | from . import chrome_project_list |
| 59 | from . import cpp_warn_patterns as cpp_patterns |
Chih-Hung Hsieh | 3cce2bc | 2020-02-27 15:39:18 -0800 | [diff] [blame] | 60 | from . import html_writer |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 61 | from . import java_warn_patterns as java_patterns |
| 62 | from . import make_warn_patterns as make_patterns |
| 63 | from . import other_warn_patterns as other_patterns |
| 64 | from . import tidy_warn_patterns as tidy_patterns |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 65 | |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 66 | |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 67 | def parse_args(use_google3): |
| 68 | """Define and parse the args. Return the parse_args() result.""" |
| 69 | parser = argparse.ArgumentParser( |
| 70 | description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) |
| 71 | parser.add_argument('--capacitor_path', default='', |
| 72 | help='Save capacitor warning file to the passed absolute' |
| 73 | ' path') |
| 74 | # csvpath has a different naming than the above path because historically the |
| 75 | # original Android script used csvpath, so other scripts rely on it |
| 76 | parser.add_argument('--csvpath', default='', |
| 77 | help='Save CSV warning file to the passed path') |
| 78 | parser.add_argument('--gencsv', action='store_true', |
| 79 | help='Generate CSV file with number of various warnings') |
Saeid Farivar Asanjan | 75dc8d2 | 2020-11-18 00:29:43 +0000 | [diff] [blame] | 80 | parser.add_argument('--csvwithdescription', default='', |
| 81 | help="""Save CSV warning file to the passed path this csv |
| 82 | will contain all the warning descriptions""") |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 83 | parser.add_argument('--byproject', action='store_true', |
| 84 | help='Separate warnings in HTML output by project names') |
| 85 | parser.add_argument('--url', default='', |
| 86 | help='Root URL of an Android source code tree prefixed ' |
| 87 | 'before files in warnings') |
| 88 | parser.add_argument('--separator', default='?l=', |
| 89 | help='Separator between the end of a URL and the line ' |
| 90 | 'number argument. e.g. #') |
| 91 | parser.add_argument('--processes', default=multiprocessing.cpu_count(), |
| 92 | type=int, |
| 93 | help='Number of parallel processes to process warnings') |
| 94 | # Old Android build scripts call warn.py without --platform, |
| 95 | # so the default platform is set to 'android'. |
| 96 | parser.add_argument('--platform', default='android', |
| 97 | choices=['chrome', 'android'], |
| 98 | help='Platform of the build log') |
| 99 | # Old Android build scripts call warn.py with only a build.log file path. |
| 100 | parser.add_argument('--log', help='Path to build log file') |
| 101 | parser.add_argument(dest='buildlog', metavar='build.log', |
| 102 | default='build.log', nargs='?', |
| 103 | help='Path to build.log file') |
| 104 | flags = parser.parse_args() |
| 105 | if not flags.log: |
| 106 | flags.log = flags.buildlog |
| 107 | if not use_google3 and not os.path.exists(flags.log): |
| 108 | sys.exit('Cannot find log file: ' + flags.log) |
| 109 | return flags |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 110 | |
| 111 | |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 112 | def get_project_names(project_list): |
| 113 | """Get project_names from project_list.""" |
| 114 | return [p[0] for p in project_list] |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 115 | |
| 116 | |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 117 | def find_project_index(line, project_patterns): |
Chih-Hung Hsieh | 98b285d | 2021-04-28 14:49:32 -0700 | [diff] [blame] | 118 | """Return the index to the project pattern array.""" |
Chih-Hung Hsieh | a606822 | 2021-04-30 14:30:58 -0700 | [diff] [blame] | 119 | for idx, pattern in enumerate(project_patterns): |
| 120 | if pattern.match(line): |
| 121 | return idx |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 122 | return -1 |
| 123 | |
| 124 | |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 125 | def classify_one_warning(warning, link, results, project_patterns, |
| 126 | warn_patterns): |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 127 | """Classify one warning line.""" |
Chih-Hung Hsieh | a606822 | 2021-04-30 14:30:58 -0700 | [diff] [blame] | 128 | for idx, pattern in enumerate(warn_patterns): |
| 129 | for cpat in pattern['compiled_patterns']: |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 130 | if cpat.match(warning): |
Chih-Hung Hsieh | a606822 | 2021-04-30 14:30:58 -0700 | [diff] [blame] | 131 | project_idx = find_project_index(warning, project_patterns) |
| 132 | results.append([warning, link, idx, project_idx]) |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 133 | return |
Chih-Hung Hsieh | 98b285d | 2021-04-28 14:49:32 -0700 | [diff] [blame] | 134 | # If we end up here, there was a problem parsing the log |
| 135 | # probably caused by 'make -j' mixing the output from |
| 136 | # 2 or more concurrent compiles |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 137 | |
| 138 | |
Chih-Hung Hsieh | 98b285d | 2021-04-28 14:49:32 -0700 | [diff] [blame] | 139 | def remove_prefix(src, sub): |
| 140 | """Remove everything before last occurrence of substring sub in string src.""" |
| 141 | if sub in src: |
| 142 | inc_sub = src.rfind(sub) |
| 143 | return src[inc_sub:] |
| 144 | return src |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 145 | |
| 146 | |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 147 | # TODO(emmavukelj): Don't have any generate_*_cs_link functions call |
| 148 | # normalize_path a second time (the first time being in parse_input_file) |
| 149 | def generate_cs_link(warning_line, flags, android_root=None): |
Chih-Hung Hsieh | 98b285d | 2021-04-28 14:49:32 -0700 | [diff] [blame] | 150 | """Try to add code search HTTP URL prefix.""" |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 151 | if flags.platform == 'chrome': |
| 152 | return generate_chrome_cs_link(warning_line, flags) |
| 153 | if flags.platform == 'android': |
| 154 | return generate_android_cs_link(warning_line, flags, android_root) |
| 155 | return 'https://cs.corp.google.com/' |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 156 | |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 157 | |
| 158 | def generate_android_cs_link(warning_line, flags, android_root): |
| 159 | """Generate the code search link for a warning line in Android.""" |
| 160 | # max_splits=2 -> only 3 items |
| 161 | raw_path, line_number_str, _ = warning_line.split(':', 2) |
| 162 | normalized_path = normalize_path(raw_path, flags, android_root) |
| 163 | if not flags.url: |
| 164 | return normalized_path |
| 165 | link_path = flags.url + '/' + normalized_path |
| 166 | if line_number_str.isdigit(): |
| 167 | link_path += flags.separator + line_number_str |
| 168 | return link_path |
| 169 | |
| 170 | |
| 171 | def generate_chrome_cs_link(warning_line, flags): |
| 172 | """Generate the code search link for a warning line in Chrome.""" |
| 173 | split_line = warning_line.split(':') |
| 174 | raw_path = split_line[0] |
| 175 | normalized_path = normalize_path(raw_path, flags) |
| 176 | link_base = 'https://cs.chromium.org/' |
| 177 | link_add = 'chromium' |
| 178 | link_path = None |
| 179 | |
| 180 | # Basically just going through a few specific directory cases and specifying |
| 181 | # the proper behavior for that case. This list of cases was accumulated |
| 182 | # through trial and error manually going through the warnings. |
| 183 | # |
| 184 | # This code pattern of using case-specific "if"s instead of "elif"s looks |
| 185 | # possibly accidental and mistaken but it is intentional because some paths |
| 186 | # fall under several cases (e.g. third_party/lib/nghttp2_frame.c) and for |
| 187 | # those we want the most specific case to be applied. If there is reliable |
| 188 | # knowledge of exactly where these occur, this could be changed to "elif"s |
| 189 | # but there is no reliable set of paths falling under multiple cases at the |
| 190 | # moment. |
| 191 | if '/src/third_party' in raw_path: |
| 192 | link_path = remove_prefix(raw_path, '/src/third_party/') |
| 193 | if '/chrome_root/src_internal/' in raw_path: |
| 194 | link_path = remove_prefix(raw_path, '/chrome_root/src_internal/') |
| 195 | link_path = link_path[len('/chrome_root'):] # remove chrome_root |
| 196 | if '/chrome_root/src/' in raw_path: |
| 197 | link_path = remove_prefix(raw_path, '/chrome_root/src/') |
| 198 | link_path = link_path[len('/chrome_root'):] # remove chrome_root |
| 199 | if '/libassistant/' in raw_path: |
| 200 | link_add = 'eureka_internal/chromium/src' |
| 201 | link_base = 'https://cs.corp.google.com/' # internal data |
| 202 | link_path = remove_prefix(normalized_path, '/libassistant/') |
| 203 | if raw_path.startswith('gen/'): |
| 204 | link_path = '/src/out/Debug/gen/' + normalized_path |
| 205 | if '/gen/' in raw_path: |
| 206 | return '%s?q=file:%s' % (link_base, remove_prefix(normalized_path, '/gen/')) |
| 207 | |
| 208 | if not link_path and (raw_path.startswith('src/') or |
| 209 | raw_path.startswith('src_internal/')): |
| 210 | link_path = '/%s' % raw_path |
| 211 | |
| 212 | if not link_path: # can't find specific link, send a query |
| 213 | return '%s?q=file:%s' % (link_base, normalized_path) |
| 214 | |
| 215 | line_number = int(split_line[1]) |
| 216 | link = '%s%s%s?l=%d' % (link_base, link_add, link_path, line_number) |
| 217 | return link |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 218 | |
| 219 | |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 220 | def find_warn_py_and_android_root(path): |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 221 | """Return android source root path if warn.py is found.""" |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 222 | parts = path.split('/') |
| 223 | for idx in reversed(range(2, len(parts))): |
| 224 | root_path = '/'.join(parts[:idx]) |
| 225 | # Android root directory should contain this script. |
| 226 | if os.path.exists(root_path + '/build/make/tools/warn.py'): |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 227 | return root_path |
| 228 | return '' |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 229 | |
| 230 | |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 231 | def find_android_root(buildlog): |
| 232 | """Guess android source root from common prefix of file paths.""" |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 233 | # Use the longest common prefix of the absolute file paths |
| 234 | # of the first 10000 warning messages as the android_root. |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 235 | warning_lines = [] |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 236 | warning_pattern = re.compile('^/[^ ]*/[^ ]*: warning: .*') |
| 237 | count = 0 |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 238 | for line in buildlog: |
Chih-Hung Hsieh | 77e3146 | 2021-09-02 16:37:23 -0700 | [diff] [blame^] | 239 | # We want to find android_root of a local build machine. |
| 240 | # Do not use RBE warning lines, which has '/b/f/w/' path prefix. |
| 241 | # Do not use /tmp/ file warnings. |
| 242 | if warning_pattern.match(line) and ( |
| 243 | '/b/f/w' not in line and not line.startswith('/tmp/')): |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 244 | warning_lines.append(line) |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 245 | count += 1 |
| 246 | if count > 9999: |
| 247 | break |
| 248 | # Try to find warn.py and use its location to find |
| 249 | # the source tree root. |
| 250 | if count < 100: |
| 251 | path = os.path.normpath(re.sub(':.*$', '', line)) |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 252 | android_root = find_warn_py_and_android_root(path) |
| 253 | if android_root: |
| 254 | return android_root |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 255 | # Do not use common prefix of a small number of paths. |
| 256 | if count > 10: |
Chih-Hung Hsieh | 949205a | 2020-01-10 10:33:40 -0800 | [diff] [blame] | 257 | # pytype: disable=wrong-arg-types |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 258 | root_path = os.path.commonprefix(warning_lines) |
Chih-Hung Hsieh | 949205a | 2020-01-10 10:33:40 -0800 | [diff] [blame] | 259 | # pytype: enable=wrong-arg-types |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 260 | if len(root_path) > 2 and root_path[len(root_path) - 1] == '/': |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 261 | return root_path[:-1] |
| 262 | return '' |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 263 | |
| 264 | |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 265 | def remove_android_root_prefix(path, android_root): |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 266 | """Remove android_root prefix from path if it is found.""" |
| 267 | if path.startswith(android_root): |
| 268 | return path[1 + len(android_root):] |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 269 | return path |
| 270 | |
| 271 | |
| 272 | def normalize_path(path, flags, android_root=None): |
| 273 | """Normalize file path relative to src/ or src-internal/ directory.""" |
| 274 | path = os.path.normpath(path) |
| 275 | |
| 276 | if flags.platform == 'android': |
| 277 | if android_root: |
| 278 | return remove_android_root_prefix(path, android_root) |
| 279 | return path |
| 280 | |
| 281 | # Remove known prefix of root path and normalize the suffix. |
| 282 | idx = path.find('chrome_root/') |
| 283 | if idx >= 0: |
| 284 | # remove chrome_root/, we want path relative to that |
| 285 | return path[idx + len('chrome_root/'):] |
Chih-Hung Hsieh | 98b285d | 2021-04-28 14:49:32 -0700 | [diff] [blame] | 286 | return path |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 287 | |
| 288 | |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 289 | def normalize_warning_line(line, flags, android_root=None): |
| 290 | """Normalize file path relative to src directory in a warning line.""" |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 291 | line = re.sub(u'[\u2018\u2019]', '\'', line) |
| 292 | # replace non-ASCII chars to spaces |
| 293 | line = re.sub(u'[^\x00-\x7f]', ' ', line) |
| 294 | line = line.strip() |
| 295 | first_column = line.find(':') |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 296 | return normalize_path(line[:first_column], flags, |
| 297 | android_root) + line[first_column:] |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 298 | |
| 299 | |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 300 | def parse_input_file_chrome(infile, flags): |
| 301 | """Parse Chrome input file, collect parameters and warning lines.""" |
| 302 | platform_version = 'unknown' |
| 303 | board_name = 'unknown' |
| 304 | architecture = 'unknown' |
| 305 | |
| 306 | # only handle warning lines of format 'file_path:line_no:col_no: warning: ...' |
Chih-Hung Hsieh | 77e3146 | 2021-09-02 16:37:23 -0700 | [diff] [blame^] | 307 | # Bug: http://198657613, This might need change to handle RBE output. |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 308 | chrome_warning_pattern = r'^[^ ]*/[^ ]*:[0-9]+:[0-9]+: warning: .*' |
| 309 | |
| 310 | warning_pattern = re.compile(chrome_warning_pattern) |
| 311 | |
| 312 | # Collect all unique warning lines |
| 313 | # Remove the duplicated warnings save ~8% of time when parsing |
| 314 | # one typical build log than before |
| 315 | unique_warnings = dict() |
| 316 | for line in infile: |
| 317 | if warning_pattern.match(line): |
| 318 | normalized_line = normalize_warning_line(line, flags) |
| 319 | if normalized_line not in unique_warnings: |
| 320 | unique_warnings[normalized_line] = generate_cs_link(line, flags) |
| 321 | elif (platform_version == 'unknown' or board_name == 'unknown' or |
| 322 | architecture == 'unknown'): |
Chih-Hung Hsieh | a606822 | 2021-04-30 14:30:58 -0700 | [diff] [blame] | 323 | result = re.match(r'.+Package:.+chromeos-base/chromeos-chrome-', line) |
| 324 | if result is not None: |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 325 | platform_version = 'R' + line.split('chrome-')[1].split('_')[0] |
| 326 | continue |
Chih-Hung Hsieh | a606822 | 2021-04-30 14:30:58 -0700 | [diff] [blame] | 327 | result = re.match(r'.+Source\sunpacked\sin\s(.+)', line) |
| 328 | if result is not None: |
| 329 | board_name = result.group(1).split('/')[2] |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 330 | continue |
Chih-Hung Hsieh | a606822 | 2021-04-30 14:30:58 -0700 | [diff] [blame] | 331 | result = re.match(r'.+USE:\s*([^\s]*).*', line) |
| 332 | if result is not None: |
| 333 | architecture = result.group(1) |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 334 | continue |
| 335 | |
| 336 | header_str = '%s - %s - %s' % (platform_version, board_name, architecture) |
| 337 | return unique_warnings, header_str |
| 338 | |
| 339 | |
| 340 | def add_normalized_line_to_warnings(line, flags, android_root, unique_warnings): |
| 341 | """Parse/normalize path, updating warning line and add to warnings dict.""" |
| 342 | normalized_line = normalize_warning_line(line, flags, android_root) |
| 343 | if normalized_line not in unique_warnings: |
| 344 | unique_warnings[normalized_line] = generate_cs_link(line, flags, |
| 345 | android_root) |
| 346 | return unique_warnings |
| 347 | |
| 348 | |
| 349 | def parse_input_file_android(infile, flags): |
| 350 | """Parse Android input file, collect parameters and warning lines.""" |
Chih-Hung Hsieh | 98b285d | 2021-04-28 14:49:32 -0700 | [diff] [blame] | 351 | # pylint:disable=too-many-locals,too-many-branches |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 352 | platform_version = 'unknown' |
| 353 | target_product = 'unknown' |
| 354 | target_variant = 'unknown' |
Chih-Hung Hsieh | 77e3146 | 2021-09-02 16:37:23 -0700 | [diff] [blame^] | 355 | build_id = 'unknown' |
| 356 | use_rbe = False |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 357 | android_root = find_android_root(infile) |
| 358 | infile.seek(0) |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 359 | |
| 360 | # rustc warning messages have two lines that should be combined: |
| 361 | # warning: description |
| 362 | # --> file_path:line_number:column_number |
| 363 | # Some warning messages have no file name: |
| 364 | # warning: macro replacement list ... [bugprone-macro-parentheses] |
| 365 | # Some makefile warning messages have no line number: |
| 366 | # some/path/file.mk: warning: description |
| 367 | # C/C++ compiler warning messages have line and column numbers: |
| 368 | # some/path/file.c:line_number:column_number: warning: description |
| 369 | warning_pattern = re.compile('(^[^ ]*/[^ ]*: warning: .*)|(^warning: .*)') |
| 370 | warning_without_file = re.compile('^warning: .*') |
| 371 | rustc_file_position = re.compile('^[ ]+--> [^ ]*/[^ ]*:[0-9]+:[0-9]+') |
| 372 | |
Chih-Hung Hsieh | 77e3146 | 2021-09-02 16:37:23 -0700 | [diff] [blame^] | 373 | # If RBE was used, try to reclaim some warning lines mixed with some |
| 374 | # leading chars from other concurrent job's stderr output . |
| 375 | # The leading characters can be any character, including digits and spaces. |
| 376 | # It's impossible to correctly identify the starting point of the source |
| 377 | # file path without the file directory name knowledge. |
| 378 | # Here we can only be sure to recover lines containing "/b/f/w/". |
| 379 | rbe_warning_pattern = re.compile('.*/b/f/w/[^ ]*: warning: .*') |
| 380 | |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 381 | # Collect all unique warning lines |
| 382 | # Remove the duplicated warnings save ~8% of time when parsing |
| 383 | # one typical build log than before |
| 384 | unique_warnings = dict() |
| 385 | line_counter = 0 |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 386 | prev_warning = '' |
| 387 | for line in infile: |
| 388 | if prev_warning: |
| 389 | if rustc_file_position.match(line): |
| 390 | # must be a rustc warning, combine 2 lines into one warning |
| 391 | line = line.strip().replace('--> ', '') + ': ' + prev_warning |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 392 | unique_warnings = add_normalized_line_to_warnings( |
| 393 | line, flags, android_root, unique_warnings) |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 394 | prev_warning = '' |
| 395 | continue |
| 396 | # add prev_warning, and then process the current line |
| 397 | prev_warning = 'unknown_source_file: ' + prev_warning |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 398 | unique_warnings = add_normalized_line_to_warnings( |
| 399 | prev_warning, flags, android_root, unique_warnings) |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 400 | prev_warning = '' |
Chih-Hung Hsieh | 5392cdb | 2020-01-13 14:05:17 -0800 | [diff] [blame] | 401 | |
Chih-Hung Hsieh | 77e3146 | 2021-09-02 16:37:23 -0700 | [diff] [blame^] | 402 | if use_rbe and rbe_warning_pattern.match(line): |
| 403 | cleaned_up_line = re.sub('.*/b/f/w/', '', line) |
| 404 | unique_warnings = add_normalized_line_to_warnings( |
| 405 | cleaned_up_line, flags, android_root, unique_warnings) |
| 406 | continue |
| 407 | |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 408 | if warning_pattern.match(line): |
| 409 | if warning_without_file.match(line): |
| 410 | # save this line and combine it with the next line |
| 411 | prev_warning = line |
| 412 | else: |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 413 | unique_warnings = add_normalized_line_to_warnings( |
| 414 | line, flags, android_root, unique_warnings) |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 415 | continue |
Chih-Hung Hsieh | 5392cdb | 2020-01-13 14:05:17 -0800 | [diff] [blame] | 416 | |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 417 | if line_counter < 100: |
| 418 | # save a little bit of time by only doing this for the first few lines |
| 419 | line_counter += 1 |
Chih-Hung Hsieh | a606822 | 2021-04-30 14:30:58 -0700 | [diff] [blame] | 420 | result = re.search('(?<=^PLATFORM_VERSION=).*', line) |
| 421 | if result is not None: |
| 422 | platform_version = result.group(0) |
Chih-Hung Hsieh | 77e3146 | 2021-09-02 16:37:23 -0700 | [diff] [blame^] | 423 | continue |
Chih-Hung Hsieh | a606822 | 2021-04-30 14:30:58 -0700 | [diff] [blame] | 424 | result = re.search('(?<=^TARGET_PRODUCT=).*', line) |
| 425 | if result is not None: |
| 426 | target_product = result.group(0) |
Chih-Hung Hsieh | 77e3146 | 2021-09-02 16:37:23 -0700 | [diff] [blame^] | 427 | continue |
Chih-Hung Hsieh | a606822 | 2021-04-30 14:30:58 -0700 | [diff] [blame] | 428 | result = re.search('(?<=^TARGET_BUILD_VARIANT=).*', line) |
| 429 | if result is not None: |
| 430 | target_variant = result.group(0) |
Chih-Hung Hsieh | 77e3146 | 2021-09-02 16:37:23 -0700 | [diff] [blame^] | 431 | continue |
| 432 | result = re.search('(?<=^BUILD_ID=).*', line) |
| 433 | if result is not None: |
| 434 | build_id = result.group(0) |
| 435 | continue |
Chih-Hung Hsieh | a606822 | 2021-04-30 14:30:58 -0700 | [diff] [blame] | 436 | result = re.search('(?<=^TOP=).*', line) |
| 437 | if result is not None: |
| 438 | android_root = result.group(1) |
Chih-Hung Hsieh | 77e3146 | 2021-09-02 16:37:23 -0700 | [diff] [blame^] | 439 | continue |
| 440 | if re.search('USE_RBE=', line) is not None: |
| 441 | use_rbe = True |
| 442 | continue |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 443 | |
| 444 | if android_root: |
| 445 | new_unique_warnings = dict() |
| 446 | for warning_line in unique_warnings: |
| 447 | normalized_line = normalize_warning_line(warning_line, flags, |
| 448 | android_root) |
| 449 | new_unique_warnings[normalized_line] = generate_android_cs_link( |
| 450 | warning_line, flags, android_root) |
| 451 | unique_warnings = new_unique_warnings |
| 452 | |
Chih-Hung Hsieh | 77e3146 | 2021-09-02 16:37:23 -0700 | [diff] [blame^] | 453 | header_str = '%s - %s - %s (%s)' % ( |
| 454 | platform_version, target_product, target_variant, build_id) |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 455 | return unique_warnings, header_str |
| 456 | |
| 457 | |
| 458 | def parse_input_file(infile, flags): |
Chih-Hung Hsieh | 98b285d | 2021-04-28 14:49:32 -0700 | [diff] [blame] | 459 | """Parse one input file for chrome or android.""" |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 460 | if flags.platform == 'chrome': |
| 461 | return parse_input_file_chrome(infile, flags) |
| 462 | if flags.platform == 'android': |
| 463 | return parse_input_file_android(infile, flags) |
| 464 | raise RuntimeError('parse_input_file not defined for platform %s' % |
| 465 | flags.platform) |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 466 | |
| 467 | |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 468 | def parse_compiler_output(compiler_output): |
| 469 | """Parse compiler output for relevant info.""" |
| 470 | split_output = compiler_output.split(':', 3) # 3 = max splits |
| 471 | file_path = split_output[0] |
| 472 | line_number = int(split_output[1]) |
| 473 | col_number = int(split_output[2].split(' ')[0]) |
| 474 | warning_message = split_output[3] |
| 475 | return file_path, line_number, col_number, warning_message |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 476 | |
| 477 | |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 478 | def get_warn_patterns(platform): |
| 479 | """Get and initialize warn_patterns.""" |
| 480 | warn_patterns = [] |
| 481 | if platform == 'chrome': |
| 482 | warn_patterns = cpp_patterns.warn_patterns |
| 483 | elif platform == 'android': |
Chih-Hung Hsieh | 98b285d | 2021-04-28 14:49:32 -0700 | [diff] [blame] | 484 | warn_patterns = (make_patterns.warn_patterns + cpp_patterns.warn_patterns + |
| 485 | java_patterns.warn_patterns + tidy_patterns.warn_patterns + |
| 486 | other_patterns.warn_patterns) |
Chih-Hung Hsieh | 888d143 | 2019-12-09 19:32:03 -0800 | [diff] [blame] | 487 | else: |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 488 | raise Exception('platform name %s is not valid' % platform) |
Chih-Hung Hsieh | a606822 | 2021-04-30 14:30:58 -0700 | [diff] [blame] | 489 | for pattern in warn_patterns: |
| 490 | pattern['members'] = [] |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 491 | # Each warning pattern has a 'projects' dictionary, that |
| 492 | # maps a project name to number of warnings in that project. |
Chih-Hung Hsieh | a606822 | 2021-04-30 14:30:58 -0700 | [diff] [blame] | 493 | pattern['projects'] = {} |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 494 | return warn_patterns |
| 495 | |
| 496 | |
| 497 | def get_project_list(platform): |
| 498 | """Return project list for appropriate platform.""" |
| 499 | if platform == 'chrome': |
| 500 | return chrome_project_list.project_list |
| 501 | if platform == 'android': |
| 502 | return android_project_list.project_list |
| 503 | raise Exception('platform name %s is not valid' % platform) |
| 504 | |
| 505 | |
| 506 | def parallel_classify_warnings(warning_data, args, project_names, |
| 507 | project_patterns, warn_patterns, |
| 508 | use_google3, create_launch_subprocs_fn, |
| 509 | classify_warnings_fn): |
| 510 | """Classify all warning lines with num_cpu parallel processes.""" |
Chih-Hung Hsieh | 98b285d | 2021-04-28 14:49:32 -0700 | [diff] [blame] | 511 | # pylint:disable=too-many-arguments,too-many-locals |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 512 | num_cpu = args.processes |
| 513 | group_results = [] |
| 514 | |
| 515 | if num_cpu > 1: |
| 516 | # set up parallel processing for this... |
| 517 | warning_groups = [[] for _ in range(num_cpu)] |
| 518 | i = 0 |
| 519 | for warning, link in warning_data.items(): |
| 520 | warning_groups[i].append((warning, link)) |
| 521 | i = (i + 1) % num_cpu |
| 522 | arg_groups = [[] for _ in range(num_cpu)] |
| 523 | for i, group in enumerate(warning_groups): |
| 524 | arg_groups[i] = [{ |
| 525 | 'group': group, |
| 526 | 'project_patterns': project_patterns, |
| 527 | 'warn_patterns': warn_patterns, |
| 528 | 'num_processes': num_cpu |
| 529 | }] |
| 530 | |
| 531 | group_results = create_launch_subprocs_fn(num_cpu, |
| 532 | classify_warnings_fn, |
| 533 | arg_groups, |
| 534 | group_results) |
| 535 | else: |
| 536 | group_results = [] |
| 537 | for warning, link in warning_data.items(): |
| 538 | classify_one_warning(warning, link, group_results, |
| 539 | project_patterns, warn_patterns) |
| 540 | group_results = [group_results] |
| 541 | |
| 542 | warning_messages = [] |
| 543 | warning_links = [] |
| 544 | warning_records = [] |
| 545 | if use_google3: |
| 546 | group_results = [group_results] |
| 547 | for group_result in group_results: |
| 548 | for result in group_result: |
| 549 | for line, link, pattern_idx, project_idx in result: |
| 550 | pattern = warn_patterns[pattern_idx] |
| 551 | pattern['members'].append(line) |
| 552 | message_idx = len(warning_messages) |
| 553 | warning_messages.append(line) |
| 554 | link_idx = len(warning_links) |
| 555 | warning_links.append(link) |
| 556 | warning_records.append([pattern_idx, project_idx, message_idx, |
| 557 | link_idx]) |
| 558 | pname = '???' if project_idx < 0 else project_names[project_idx] |
| 559 | # Count warnings by project. |
| 560 | if pname in pattern['projects']: |
| 561 | pattern['projects'][pname] += 1 |
| 562 | else: |
| 563 | pattern['projects'][pname] = 1 |
| 564 | return warning_messages, warning_links, warning_records |
| 565 | |
| 566 | |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 567 | def process_log(logfile, flags, project_names, project_patterns, warn_patterns, |
| 568 | html_path, use_google3, create_launch_subprocs_fn, |
| 569 | classify_warnings_fn, logfile_object): |
Chih-Hung Hsieh | 98b285d | 2021-04-28 14:49:32 -0700 | [diff] [blame] | 570 | # pylint does not recognize g-doc-* |
| 571 | # pylint: disable=bad-option-value,g-doc-args |
| 572 | # pylint: disable=bad-option-value,g-doc-return-or-yield |
| 573 | # pylint: disable=too-many-arguments,too-many-locals |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 574 | """Function that handles processing of a log. |
| 575 | |
| 576 | This is isolated into its own function (rather than just taking place in main) |
| 577 | so that it can be used by both warn.py and the borg job process_gs_logs.py, to |
| 578 | avoid duplication of code. |
| 579 | Note that if the arguments to this function change, process_gs_logs.py must |
| 580 | be updated accordingly. |
| 581 | """ |
| 582 | if logfile_object is None: |
| 583 | with io.open(logfile, encoding='utf-8') as log: |
| 584 | warning_lines_and_links, header_str = parse_input_file(log, flags) |
| 585 | else: |
| 586 | warning_lines_and_links, header_str = parse_input_file( |
| 587 | logfile_object, flags) |
| 588 | warning_messages, warning_links, warning_records = parallel_classify_warnings( |
| 589 | warning_lines_and_links, flags, project_names, project_patterns, |
| 590 | warn_patterns, use_google3, create_launch_subprocs_fn, |
| 591 | classify_warnings_fn) |
| 592 | |
Chih-Hung Hsieh | 3cce2bc | 2020-02-27 15:39:18 -0800 | [diff] [blame] | 593 | html_writer.write_html(flags, project_names, warn_patterns, html_path, |
| 594 | warning_messages, warning_links, warning_records, |
| 595 | header_str) |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 596 | |
| 597 | return warning_messages, warning_links, warning_records, header_str |
| 598 | |
| 599 | |
| 600 | def common_main(use_google3, create_launch_subprocs_fn, classify_warnings_fn, |
| 601 | logfile_object=None): |
| 602 | """Shared main function for Google3 and non-Google3 versions of warn.py.""" |
| 603 | flags = parse_args(use_google3) |
| 604 | warn_patterns = get_warn_patterns(flags.platform) |
| 605 | project_list = get_project_list(flags.platform) |
| 606 | |
| 607 | project_names = get_project_names(project_list) |
| 608 | project_patterns = [re.compile(p[1]) for p in project_list] |
| 609 | |
| 610 | # html_path=None because we output html below if not outputting CSV |
| 611 | warning_messages, warning_links, warning_records, header_str = process_log( |
| 612 | logfile=flags.log, flags=flags, project_names=project_names, |
| 613 | project_patterns=project_patterns, warn_patterns=warn_patterns, |
| 614 | html_path=None, use_google3=use_google3, |
| 615 | create_launch_subprocs_fn=create_launch_subprocs_fn, |
| 616 | classify_warnings_fn=classify_warnings_fn, |
| 617 | logfile_object=logfile_object) |
| 618 | |
Chih-Hung Hsieh | 3cce2bc | 2020-02-27 15:39:18 -0800 | [diff] [blame] | 619 | html_writer.write_out_csv(flags, warn_patterns, warning_messages, |
| 620 | warning_links, warning_records, header_str, |
| 621 | project_names) |
Chih-Hung Hsieh | 5ae5519 | 2020-02-24 10:20:36 -0800 | [diff] [blame] | 622 | |
| 623 | # Return these values, so that caller can use them, if desired. |
| 624 | return flags, warning_messages, warning_records, warn_patterns |