Fix pylint warnings and now prefer python3

* Read/write warning messages as utf-8 strings.
* No more embedded raw utf-8 code in file.

Test: ./warn.py --csvpath warnings.csv build.log > warnings.html
Change-Id: Ie51700721a32bede1d3b250f4a42fd8facd6af75
diff --git a/tools/warn.py b/tools/warn.py
index c710164..9389b7d 100755
--- a/tools/warn.py
+++ b/tools/warn.py
@@ -1,5 +1,5 @@
 #!/usr/bin/python
-# This file uses the following encoding: utf-8
+# Prefer python3 but work also with python2.
 
 """Grep warnings messages and output HTML tables or warning counts in CSV.
 
@@ -74,9 +74,11 @@
 #   escape_string, strip_escape_string, emit_warning_arrays
 #   emit_js_data():
 
+from __future__ import print_function
 import argparse
 import cgi
 import csv
+import io
 import multiprocessing
 import os
 import re
@@ -540,7 +542,7 @@
     {'category': 'java',
      'severity': Severity.LOW,
      'description':
-         'Java: Use Java\'s utility functional interfaces instead of Function\u003cA, B> for primitive types.',
+         u'Java: Use Java\'s utility functional interfaces instead of Function\u003cA, B> for primitive types.',
      'patterns': [r".*: warning: \[LambdaFunctionalInterface\] .+"]},
     {'category': 'java',
      'severity': Severity.LOW,
@@ -1270,7 +1272,7 @@
     {'category': 'java',
      'severity': Severity.MEDIUM,
      'description':
-         'Java: Prefer the short-circuiting boolean operators \u0026\u0026 and || to \u0026 and |.',
+         u'Java: Prefer the short-circuiting boolean operators \u0026\u0026 and || to \u0026 and |.',
      'patterns': [r".*: warning: \[ShortCircuitBoolean\] .+"]},
     {'category': 'java',
      'severity': Severity.MEDIUM,
@@ -1535,7 +1537,7 @@
     {'category': 'java',
      'severity': Severity.HIGH,
      'description':
-         'Java:  Implementing \'Comparable\u003cT>\' where T is not compatible with the implementing class.',
+         u'Java:  Implementing \'Comparable\u003cT>\' where T is not compatible with the implementing class.',
      'patterns': [r".*: warning: \[ComparableType\] .+"]},
     {'category': 'java',
      'severity': Severity.HIGH,
@@ -1790,7 +1792,7 @@
     {'category': 'java',
      'severity': Severity.HIGH,
      'description':
-         'Java: Path implements Iterable\u003cPath>; prefer Collection\u003cPath> for clarity',
+         u'Java: Path implements Iterable\u003cPath>; prefer Collection\u003cPath> for clarity',
      'patterns': [r".*: warning: \[IterablePathParameter\] .+"]},
     {'category': 'java',
      'severity': Severity.HIGH,
@@ -2922,17 +2924,17 @@
 
 
 def dump_html_prologue(title):
-  print '<html>\n<head>'
-  print '<title>' + title + '</title>'
-  print html_head_scripts
+  print('<html>\n<head>')
+  print('<title>' + title + '</title>')
+  print(html_head_scripts)
   emit_stats_by_project()
-  print '</head>\n<body>'
-  print html_big(title)
-  print '<p>'
+  print('</head>\n<body>')
+  print(html_big(title))
+  print('<p>')
 
 
 def dump_html_epilogue():
-  print '</body>\n</head>\n</html>'
+  print('</body>\n</head>\n</html>')
 
 
 def sort_warnings():
@@ -2943,6 +2945,7 @@
 def emit_stats_by_project():
   """Dump a google chart table of warnings per project and severity."""
   # warnings[p][s] is number of warnings in project p of severity s.
+  # pylint:disable=g-complex-comprehension
   warnings = {p: {s: 0 for s in Severity.range} for p in project_names}
   for i in warn_patterns:
     s = i['severity']
@@ -2988,11 +2991,11 @@
       total_all_severities += total_by_severity[s]
   one_row.append(total_all_projects)
   stats_rows.append(one_row)
-  print '<script>'
+  print('<script>')
   emit_const_string_array('StatsHeader', stats_header)
   emit_const_object_array('StatsRows', stats_rows)
-  print draw_table_javascript
-  print '</script>'
+  print(draw_table_javascript)
+  print('</script>')
 
 
 def dump_stats():
@@ -3008,14 +3011,14 @@
       skipped += len(i['members'])
     else:
       known += len(i['members'])
-  print 'Number of classified warnings: <b>' + str(known) + '</b><br>'
-  print 'Number of skipped warnings: <b>' + str(skipped) + '</b><br>'
-  print 'Number of unclassified warnings: <b>' + str(unknown) + '</b><br>'
+  print('Number of classified warnings: <b>' + str(known) + '</b><br>')
+  print('Number of skipped warnings: <b>' + str(skipped) + '</b><br>')
+  print('Number of unclassified warnings: <b>' + str(unknown) + '</b><br>')
   total = unknown + known + skipped
   extra_msg = ''
   if total < 1000:
     extra_msg = ' (low count may indicate incremental build)'
-  print 'Total number of warnings: <b>' + str(total) + '</b>' + extra_msg
+  print('Total number of warnings: <b>' + str(total) + '</b>' + extra_msg)
 
 
 # New base table of warnings, [severity, warn_id, project, warning_message]
@@ -3029,14 +3032,14 @@
 #     id for each warning pattern
 #     sort by project, severity, warn_id, warning_message
 def emit_buttons():
-  print ('<button class="button" onclick="expandCollapse(1);">'
-         'Expand all warnings</button>\n'
-         '<button class="button" onclick="expandCollapse(0);">'
-         'Collapse all warnings</button>\n'
-         '<button class="button" onclick="groupBySeverity();">'
-         'Group warnings by severity</button>\n'
-         '<button class="button" onclick="groupByProject();">'
-         'Group warnings by project</button><br>')
+  print('<button class="button" onclick="expandCollapse(1);">'
+        'Expand all warnings</button>\n'
+        '<button class="button" onclick="expandCollapse(0);">'
+        'Collapse all warnings</button>\n'
+        '<button class="button" onclick="groupBySeverity();">'
+        'Group warnings by severity</button>\n'
+        '<button class="button" onclick="groupByProject();">'
+        'Group warnings by project</button><br>')
 
 
 def all_patterns(category):
@@ -3051,14 +3054,14 @@
   """Show which warnings no longer occur."""
   anchor = 'fixed_warnings'
   mark = anchor + '_mark'
-  print ('\n<br><p style="background-color:lightblue"><b>'
-         '<button id="' + mark + '" '
-         'class="bt" onclick="expand(\'' + anchor + '\');">'
-         '&#x2295</button> Fixed warnings. '
-         'No more occurrences. Please consider turning these into '
-         'errors if possible, before they are reintroduced in to the build'
-         ':</b></p>')
-  print '<blockquote>'
+  print('\n<br><p style="background-color:lightblue"><b>'
+        '<button id="' + mark + '" '
+        'class="bt" onclick="expand(\'' + anchor + '\');">'
+        '&#x2295</button> Fixed warnings. '
+        'No more occurrences. Please consider turning these into '
+        'errors if possible, before they are reintroduced in to the build'
+        ':</b></p>')
+  print('<blockquote>')
   fixed_patterns = []
   for i in warn_patterns:
     if not i['members']:
@@ -3066,16 +3069,16 @@
                             all_patterns(i) + ')')
     if i['option']:
       fixed_patterns.append(' ' + i['option'])
-  fixed_patterns.sort()
-  print '<div id="' + anchor + '" style="display:none;"><table>'
+  fixed_patterns = sorted(fixed_patterns)
+  print('<div id="' + anchor + '" style="display:none;"><table>')
   cur_row_class = 0
   for text in fixed_patterns:
     cur_row_class = 1 - cur_row_class
     # remove last '\n'
     t = text[:-1] if text[-1] == '\n' else text
-    print '<tr><td class="c' + str(cur_row_class) + '">' + t + '</td></tr>'
-  print '</table></div>'
-  print '</blockquote>'
+    print('<tr><td class="c' + str(cur_row_class) + '">' + t + '</td></tr>')
+  print('</table></div>')
+  print('</blockquote>')
 
 
 def find_project_index(line):
@@ -3187,8 +3190,9 @@
 def normalize_warning_line(line):
   """Normalize file path relative to android_root in a warning line."""
   # replace fancy quotes with plain ol' quotes
-  line = line.replace('‘', "'")
-  line = line.replace('’', "'")
+  line = re.sub(u'[\u2018\u2019]', '\'', line)
+  # replace non-ASCII chars to spaces
+  line = re.sub(u'[^\x00-\x7f]', ' ', line)
   line = line.strip()
   first_column = line.find(':')
   if first_column > 0:
@@ -3246,21 +3250,22 @@
 
 
 def emit_warning_array(name):
-  print 'var warning_{} = ['.format(name)
+  print('var warning_{} = ['.format(name))
   for i in range(len(warn_patterns)):
-    print '{},'.format(warn_patterns[i][name])
-  print '];'
+    print('{},'.format(warn_patterns[i][name]))
+  print('];')
 
 
 def emit_warning_arrays():
   emit_warning_array('severity')
-  print 'var warning_description = ['
+  print('var warning_description = [')
   for i in range(len(warn_patterns)):
     if warn_patterns[i]['members']:
-      print '"{}",'.format(escape_string(warn_patterns[i]['description']))
+      print('"{}",'.format(escape_string(warn_patterns[i]['description'])))
     else:
-      print '"",'  # no such warning
-  print '];'
+      print('"",')  # no such warning
+  print('];')
+
 
 scripts_for_warning_groups = """
   function compareMessages(x1, x2) { // of the same warning type
@@ -3393,39 +3398,42 @@
 
 # Emit a JavaScript const string
 def emit_const_string(name, value):
-  print 'const ' + name + ' = "' + escape_string(value) + '";'
+  print('const ' + name + ' = "' + escape_string(value) + '";')
 
 
 # Emit a JavaScript const integer array.
 def emit_const_int_array(name, array):
-  print 'const ' + name + ' = ['
+  print('const ' + name + ' = [')
   for n in array:
-    print str(n) + ','
-  print '];'
+    print(str(n) + ',')
+  print('];')
 
 
 # Emit a JavaScript const string array.
 def emit_const_string_array(name, array):
-  print 'const ' + name + ' = ['
+  print('const ' + name + ' = [')
   for s in array:
-    print '"' + strip_escape_string(s) + '",'
-  print '];'
+    print('"' + strip_escape_string(s) + '",')
+  print('];')
 
 
 # Emit a JavaScript const string array for HTML.
 def emit_const_html_string_array(name, array):
-  print 'const ' + name + ' = ['
+  print('const ' + name + ' = [')
   for s in array:
-    print '"' + cgi.escape(strip_escape_string(s)) + '",'
-  print '];'
+    # Not using html.escape yet, to work for both python 2 and 3,
+    # until all users switch to python 3.
+    # pylint:disable=deprecated-method
+    print('"' + cgi.escape(strip_escape_string(s)) + '",')
+  print('];')
 
 
 # Emit a JavaScript const object array.
 def emit_const_object_array(name, array):
-  print 'const ' + name + ' = ['
+  print('const ' + name + ' = [')
   for x in array:
-    print str(x) + ','
-  print '];'
+    print(str(x) + ',')
+  print('];')
 
 
 def emit_js_data():
@@ -3471,18 +3479,18 @@
   dump_html_prologue('Warnings for ' + platform_version + ' - ' +
                      target_product + ' - ' + target_variant)
   dump_stats()
-  print '<br><div id="stats_table"></div><br>'
-  print '\n<script>'
+  print('<br><div id="stats_table"></div><br>')
+  print('\n<script>')
   emit_js_data()
-  print scripts_for_warning_groups
-  print '</script>'
+  print(scripts_for_warning_groups)
+  print('</script>')
   emit_buttons()
   # Warning messages are grouped by severities or project names.
-  print '<br><div id="warning_groups"></div>'
+  print('<br><div id="warning_groups"></div>')
   if args.byproject:
-    print '<script>groupByProject();</script>'
+    print('<script>groupByProject();</script>')
   else:
-    print '<script>groupBySeverity();</script>'
+    print('<script>groupBySeverity();</script>')
   dump_fixed()
   dump_html_epilogue()
 
@@ -3506,8 +3514,7 @@
       warning = kind + ': ' + description_for_csv(i)
       writer.writerow([n, '', warning])
       # print number of warnings for each project, ordered by project name.
-      projects = i['projects'].keys()
-      projects.sort()
+      projects = sorted(i['projects'].keys())
       for p in projects:
         writer.writerow([i['projects'][p], p, warning])
   writer.writerow([total, '', kind + ' warnings'])
@@ -3526,7 +3533,9 @@
 
 
 def main():
-  warning_lines = parse_input_file(open(args.buildlog, 'r'))
+  # We must use 'utf-8' codec to parse some non-ASCII code in warnings.
+  warning_lines = parse_input_file(
+      io.open(args.buildlog, mode='r', encoding='utf-8'))
   parallel_classify_warnings(warning_lines)
   # If a user pases a csv path, save the fileoutput to the path
   # If the user also passed gencsv write the output to stdout