Convert gensecomp.py to Python 3.

The genseccomp tests haven't been run since at least 2018. Deleted the
ones that are testing APIs that no longer exist or have been
refactored to take very different inputs.

Test: treehugger
Test: pytest tools
Bug: None
Change-Id: Iaf6b6b6a2e922b181a457a74eb4b5abe90425dfb
diff --git a/libc/tools/Android.bp b/libc/tools/Android.bp
index 2efb8a6..c93e004 100644
--- a/libc/tools/Android.bp
+++ b/libc/tools/Android.bp
@@ -16,15 +16,6 @@
     data: [
         ":all_kernel_uapi_headers",
     ],
-
-    version: {
-        py2: {
-            enabled: true,
-        },
-        py3: {
-            enabled: false,
-        },
-    },
 }
 
 python_binary_host {
diff --git a/libc/tools/genseccomp.py b/libc/tools/genseccomp.py
index 89eeb44..a78f6c1 100755
--- a/libc/tools/genseccomp.py
+++ b/libc/tools/genseccomp.py
@@ -1,11 +1,10 @@
 #!/usr/bin/env python
 
 import argparse
-import collections
 import logging
+import operator
 import os
 import re
-import subprocess
 import textwrap
 
 from gensyscalls import SupportedArchitectures, SysCallsTxtParser
@@ -16,7 +15,7 @@
 BPF_ALLOW = "BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW)"
 
 
-class SyscallRange(object):
+class SyscallRange:
   def __init__(self, name, value):
     self.names = [name]
     self.begin = value
@@ -35,23 +34,23 @@
 def load_syscall_names_from_file(file_path, architecture):
   parser = SysCallsTxtParser()
   parser.parse_open_file(open(file_path))
-  return set([x["name"] for x in parser.syscalls if x.get(architecture)])
+  return {x["name"] for x in parser.syscalls if x.get(architecture)}
 
 
 def load_syscall_priorities_from_file(file_path):
   format_re = re.compile(r'^\s*([A-Za-z_][A-Za-z0-9_]+)\s*$')
   priorities = []
-  with open(file_path) as f:
-    for line in f:
-      m = format_re.match(line)
-      if not m:
+  with open(file_path) as priority_file:
+    for line in priority_file:
+      match = format_re.match(line)
+      if match is None:
         continue
       try:
-        name = m.group(1)
+        name = match.group(1)
         priorities.append(name)
-      except:
-        logging.debug('Failed to parse %s from %s', (line, file_path))
-        pass
+      except IndexError:
+        # TODO: This should be impossible becauase it wouldn't have matched?
+        logging.exception('Failed to parse %s from %s', line, file_path)
 
   return priorities
 
@@ -93,7 +92,7 @@
   with open(names_path) as f:
     for line in f:
       m = constant_re.match(line)
-      if not m:
+      if m is None:
         continue
       try:
         name = m.group(1)
@@ -102,12 +101,21 @@
                                   m.group(2)))
 
         constants[name] = value
-      except:
+      except:  # pylint: disable=bare-except
+        # TODO: This seems wrong.
+        # Key error doesn't seem like the error the original author was trying
+        # to catch. It looks like the intent was to catch IndexError from
+        # match.group() for non-matching lines, but that's impossible because
+        # the match object is checked and continued if not matched. What
+        # actually happens is that KeyError is thrown by constants[x.group(0)]
+        # on at least the first run because the dict is empty.
+        #
+        # It's also matching syntax errors because not all C integer literals
+        # are valid Python integer literals, e.g. 10L.
         logging.debug('Failed to parse %s', line)
-        pass
 
   syscalls = {}
-  for name, value in constants.iteritems():
+  for name, value in constants.items():
     if not name.startswith("__NR_") and not name.startswith("__ARM_NR"):
       continue
     if name.startswith("__NR_"):
@@ -120,7 +128,7 @@
 
 def convert_NRs_to_ranges(syscalls):
   # Sort the values so we convert to ranges and binary chop
-  syscalls = sorted(syscalls, lambda x, y: cmp(x[1], y[1]))
+  syscalls = sorted(syscalls, key=operator.itemgetter(1))
 
   # Turn into a list of ranges. Keep the names for the comments
   ranges = []
@@ -148,12 +156,12 @@
     # We will replace {fail} and {allow} with appropriate range jumps later
     return [BPF_JGE.format(ranges[0].end, "{fail}", "{allow}") +
             ", //" + "|".join(ranges[0].names)]
-  else:
-    half = (len(ranges) + 1) / 2
-    first = convert_to_intermediate_bpf(ranges[:half])
-    second = convert_to_intermediate_bpf(ranges[half:])
-    jump = [BPF_JGE.format(ranges[half].begin, len(first), 0) + ","]
-    return jump + first + second
+
+  half = (len(ranges) + 1) // 2
+  first = convert_to_intermediate_bpf(ranges[:half])
+  second = convert_to_intermediate_bpf(ranges[half:])
+  jump = [BPF_JGE.format(ranges[half].begin, len(first), 0) + ","]
+  return jump + first + second
 
 
 # Converts the prioritized syscalls to a bpf list that  is prepended to the
@@ -162,7 +170,7 @@
 # immediately
 def convert_priority_to_intermediate_bpf(priority_syscalls):
   result = []
-  for i, syscall in enumerate(priority_syscalls):
+  for syscall in priority_syscalls:
     result.append(BPF_JEQ.format(syscall[1], "{allow}", 0) +
                   ", //" + syscall[0])
   return result
@@ -227,7 +235,8 @@
   return convert_bpf_to_output(bpf, architecture, name_modifier)
 
 
-def gen_policy(name_modifier, out_dir, base_syscall_file, syscall_files, syscall_NRs, priority_file):
+def gen_policy(name_modifier, out_dir, base_syscall_file, syscall_files,
+               syscall_NRs, priority_file):
   for arch in SupportedArchitectures:
     base_names = load_syscall_names_from_file(base_syscall_file, arch)
     allowlist_names = set()
@@ -251,7 +260,6 @@
     output = construct_bpf(allowed_syscalls, arch, name_modifier, priorities)
 
     # And output policy
-    existing = ""
     filename_modifier = "_" + name_modifier if name_modifier else ""
     output_path = os.path.join(out_dir,
                                "{}{}_policy.cpp".format(arch, filename_modifier))
@@ -274,8 +282,8 @@
                       help=("The path of the input files. In order to "
                             "simplify the build rules, it can take any of the "
                             "following files: \n"
-                            "* /blocklist.*\.txt$/ syscall blocklist.\n"
-                            "* /allowlist.*\.txt$/ syscall allowlist.\n"
+                            "* /blocklist.*\\.txt$/ syscall blocklist.\n"
+                            "* /allowlist.*\\.txt$/ syscall allowlist.\n"
                             "* /priority.txt$/ priorities for bpf rules.\n"
                             "* otherwise, syscall name-number mapping.\n"))
   args = parser.parse_args()
diff --git a/libc/tools/gensyscalls.py b/libc/tools/gensyscalls.py
index 0e0e25f..d8d4302 100755
--- a/libc/tools/gensyscalls.py
+++ b/libc/tools/gensyscalls.py
@@ -5,7 +5,6 @@
 # makefiles used to build all the stubs.
 
 import atexit
-import commands
 import filecmp
 import glob
 import re
@@ -315,7 +314,7 @@
         self.lineno   = 0
 
     def E(self, msg):
-        print "%d: %s" % (self.lineno, msg)
+        print("%d: %s" % (self.lineno, msg))
 
     def parse_line(self, line):
         """ parse a syscall spec line.
@@ -340,7 +339,7 @@
             return
 
         syscall_func = return_type[-1]
-        return_type  = string.join(return_type[:-1],' ')
+        return_type  = ' '.join(return_type[:-1])
         socketcall_id = -1
 
         pos_colon = syscall_func.find(':')
@@ -372,13 +371,13 @@
             alias_delim = syscall_name.find('|')
             if alias_delim > 0:
                 syscall_name = syscall_name[:alias_delim]
-            syscall_aliases = string.split(alias_list, ',')
+            syscall_aliases = alias_list.split(',')
         else:
             syscall_aliases = []
 
         if pos_rparen > pos_lparen+1:
             syscall_params = line[pos_lparen+1:pos_rparen].split(',')
-            params         = string.join(syscall_params,',')
+            params         = ','.join(syscall_params)
         else:
             syscall_params = []
             params         = "void"
@@ -398,7 +397,7 @@
             for arch in SupportedArchitectures:
                 t[arch] = True
         else:
-            for arch in string.split(arch_list, ','):
+            for arch in arch_list.split(','):
                 if arch == "lp32":
                     for arch in SupportedArchitectures:
                         if "64" not in arch:
@@ -464,7 +463,7 @@
 
 if __name__ == "__main__":
     if len(sys.argv) < 2:
-      print "Usage: gensyscalls.py ARCH SOURCE_FILE"
+      print("Usage: gensyscalls.py ARCH SOURCE_FILE")
       sys.exit(1)
 
     arch = sys.argv[1]
diff --git a/libc/tools/mypy.ini b/libc/tools/mypy.ini
new file mode 100644
index 0000000..0269354
--- /dev/null
+++ b/libc/tools/mypy.ini
@@ -0,0 +1,3 @@
+[mypy]
+# TODO: Enable.
+# disallow_untyped_defs = True
diff --git a/libc/tools/pylintrc b/libc/tools/pylintrc
new file mode 100644
index 0000000..df319e3
--- /dev/null
+++ b/libc/tools/pylintrc
@@ -0,0 +1,8 @@
+[MESSAGES CONTROL]
+disable=
+    eval-used,
+    design,
+    fixme,
+    invalid-name,
+    logging-fstring-interpolation,
+    missing-docstring
diff --git a/libc/tools/test_genseccomp.py b/libc/tools/test_genseccomp.py
index 812218e..8bd3517 100755
--- a/libc/tools/test_genseccomp.py
+++ b/libc/tools/test_genseccomp.py
@@ -1,176 +1,65 @@
 #!/usr/bin/env python
 # Unit tests for genseccomp.py
 
-import cStringIO
 import textwrap
 import unittest
 
 import genseccomp
 
 class TestGenseccomp(unittest.TestCase):
-  def setUp(self):
-    genseccomp.set_dir()
-
-  def get_config(self, arch):
-    for i in genseccomp.POLICY_CONFIGS:
-      if i[0] == arch:
-        return i
-    self.fail("No such architecture")
-
-  def get_headers(self, arch):
-    return self.get_config(arch)[1]
-
-  def get_switches(self, arch):
-    return self.get_config(arch)[2]
-
-  def test_get_names(self):
-    bionic = cStringIO.StringIO(textwrap.dedent("""\
-int __llseek:_llseek(int, unsigned long, unsigned long, off64_t*, int) arm,x86
-int         fchown:fchown(int, uid_t, gid_t)    arm64,x86_64
-    """))
-
-    allowlist = cStringIO.StringIO(textwrap.dedent("""\
-ssize_t     read(int, void*, size_t)        all
-    """))
-
-    empty = cStringIO.StringIO(textwrap.dedent("""\
-    """))
-
-    names = genseccomp.get_names([bionic, allowlist, empty], "arm")
-    bionic.seek(0)
-    allowlist.seek(0)
-    empty.seek(0)
-    names64 = genseccomp.get_names([bionic, allowlist, empty], "arm64")
-    bionic.seek(0)
-    allowlist.seek(0)
-    empty.seek(0)
-
-    self.assertIn("fchown", names64)
-    self.assertNotIn("fchown", names)
-    self.assertIn("_llseek", names)
-    self.assertNotIn("_llseek", names64)
-    self.assertIn("read", names)
-    self.assertIn("read", names64)
-
-    # Blocklist item must be in bionic
-    blocklist = cStringIO.StringIO(textwrap.dedent("""\
-int         fchown2:fchown2(int, uid_t, gid_t)    arm64,x86_64
-    """))
-    with self.assertRaises(RuntimeError):
-      genseccomp.get_names([bionic, allowlist, blocklist], "arm")
-    bionic.seek(0)
-    allowlist.seek(0)
-    blocklist.seek(0)
-
-    # Test blocklist item is removed
-    blocklist = cStringIO.StringIO(textwrap.dedent("""\
-int         fchown:fchown(int, uid_t, gid_t)    arm64,x86_64
-    """))
-    names = genseccomp.get_names([bionic, allowlist, blocklist], "arm64")
-    bionic.seek(0)
-    allowlist.seek(0)
-    blocklist.seek(0)
-    self.assertIn("read", names)
-    self.assertNotIn("fchown", names)
-
-    # Blocklist item must not be in allowlist
-    allowlist = cStringIO.StringIO(textwrap.dedent("""\
-int         fchown:fchown(int, uid_t, gid_t)    arm64,x86_64
-    """))
-    with self.assertRaises(RuntimeError):
-      genseccomp.get_names([empty, allowlist, blocklist], "arm")
-    empty.seek(0)
-    allowlist.seek(0)
-    blocklist.seek(0)
-
-    # No dups in bionic and allowlist
-    allowlist = cStringIO.StringIO(textwrap.dedent("""\
-int __llseek:_llseek(int, unsigned long, unsigned long, off64_t*, int) arm,x86
-    """))
-    with self.assertRaises(RuntimeError):
-      genseccomp.get_names([bionic, allowlist, empty], "arm")
-    bionic.seek(0)
-    allowlist.seek(0)
-    empty.seek(0)
-
-  def test_convert_names_to_NRs(self):
-    self.assertEquals(genseccomp.convert_names_to_NRs(["open"],
-                                                      self.get_headers("arm"),
-                                                      self.get_switches("arm")),
-                      [("open", 5)])
-
-    self.assertEquals(genseccomp.convert_names_to_NRs(["__ARM_NR_set_tls"],
-                                                      self.get_headers("arm"),
-                                                      self.get_switches("arm")),
-                      [('__ARM_NR_set_tls', 983045)])
-
-    self.assertEquals(genseccomp.convert_names_to_NRs(["openat"],
-                                                      self.get_headers("arm64"),
-                                                      self.get_switches("arm64")),
-                      [("openat", 56)])
-
-    self.assertEquals(genseccomp.convert_names_to_NRs(["openat"],
-                                                      self.get_headers("x86"),
-                                                      self.get_switches("x86")),
-                      [("openat", 295)])
-
-    self.assertEquals(genseccomp.convert_names_to_NRs(["openat"],
-                                                      self.get_headers("x86_64"),
-                                                      self.get_switches("x86_64")),
-                      [("openat", 257)])
-
-
   def test_convert_NRs_to_ranges(self):
     ranges = genseccomp.convert_NRs_to_ranges([("b", 2), ("a", 1)])
-    self.assertEquals(len(ranges), 1)
-    self.assertEquals(ranges[0].begin, 1)
-    self.assertEquals(ranges[0].end, 3)
-    self.assertItemsEqual(ranges[0].names, ["a", "b"])
+    self.assertEqual(len(ranges), 1)
+    self.assertEqual(ranges[0].begin, 1)
+    self.assertEqual(ranges[0].end, 3)
+    self.assertEqual(set(ranges[0].names), {"a", "b"})
 
     ranges = genseccomp.convert_NRs_to_ranges([("b", 3), ("a", 1)])
-    self.assertEquals(len(ranges), 2)
-    self.assertEquals(ranges[0].begin, 1)
-    self.assertEquals(ranges[0].end, 2)
-    self.assertItemsEqual(ranges[0].names, ["a"])
-    self.assertEquals(ranges[1].begin, 3)
-    self.assertEquals(ranges[1].end, 4)
-    self.assertItemsEqual(ranges[1].names, ["b"])
+    self.assertEqual(len(ranges), 2)
+    self.assertEqual(ranges[0].begin, 1)
+    self.assertEqual(ranges[0].end, 2)
+    self.assertEqual(set(ranges[0].names), {"a"})
+    self.assertEqual(ranges[1].begin, 3)
+    self.assertEqual(ranges[1].end, 4)
+    self.assertEqual(set(ranges[1].names), {"b"})
 
   def test_convert_to_intermediate_bpf(self):
     ranges = genseccomp.convert_NRs_to_ranges([("b", 2), ("a", 1)])
     bpf = genseccomp.convert_to_intermediate_bpf(ranges)
-    self.assertEquals(bpf, ['BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 3, {fail}, {allow}), //a|b'])
+    self.assertEqual(bpf, ['BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 3, {fail}, {allow}), //a|b'])
 
     ranges = genseccomp.convert_NRs_to_ranges([("b", 3), ("a", 1)])
     bpf = genseccomp.convert_to_intermediate_bpf(ranges)
-    self.assertEquals(bpf, ['BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 3, 1, 0),',
+    self.assertEqual(bpf, ['BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 3, 1, 0),',
                             'BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 2, {fail}, {allow}), //a',
                             'BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 4, {fail}, {allow}), //b'])
 
   def test_convert_ranges_to_bpf(self):
     ranges = genseccomp.convert_NRs_to_ranges([("b", 2), ("a", 1)])
-    bpf = genseccomp.convert_ranges_to_bpf(ranges)
-    self.assertEquals(bpf, ['BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 1, 0, 2),',
+    bpf = genseccomp.convert_ranges_to_bpf(ranges, priority_syscalls=[])
+    self.assertEqual(bpf, ['BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 1, 0, 2),',
                             'BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 3, 1, 0), //a|b',
                             'BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),'])
 
     ranges = genseccomp.convert_NRs_to_ranges([("b", 3), ("a", 1)])
-    bpf = genseccomp.convert_ranges_to_bpf(ranges)
-    self.assertEquals(bpf, ['BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 1, 0, 4),',
+    bpf = genseccomp.convert_ranges_to_bpf(ranges, priority_syscalls=[])
+    self.assertEqual(bpf, ['BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 1, 0, 4),',
                             'BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 3, 1, 0),',
                             'BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 2, 2, 1), //a',
                             'BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 4, 1, 0), //b',
                             'BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),'])
 
   def test_convert_bpf_to_output(self):
-    output = genseccomp.convert_bpf_to_output(["line1", "line2"], "arm")
+    output = genseccomp.convert_bpf_to_output(["line1", "line2"],
+                                              "arm",
+                                              name_modifier="")
     expected_output = textwrap.dedent("""\
-    // Autogenerated file - edit at your peril!!
+    // File autogenerated by genseccomp.py - edit at your peril!!
 
     #include <linux/filter.h>
     #include <errno.h>
 
-    #include "seccomp_bpfs.h"
+    #include "seccomp/seccomp_bpfs.h"
     const sock_filter arm_filter[] = {
     line1
     line2
@@ -178,43 +67,7 @@
 
     const size_t arm_filter_size = sizeof(arm_filter) / sizeof(struct sock_filter);
     """)
-    self.assertEquals(output, expected_output)
-
-  def test_construct_bpf(self):
-    syscalls = cStringIO.StringIO(textwrap.dedent("""\
-    int __llseek:_llseek(int, unsigned long, unsigned long, off64_t*, int) arm,x86
-    int         fchown:fchown(int, uid_t, gid_t)    arm64,x86_64
-    """))
-
-    allowlist = cStringIO.StringIO(textwrap.dedent("""\
-    ssize_t     read(int, void*, size_t)        all
-    """))
-
-    blocklist = cStringIO.StringIO(textwrap.dedent("""\
-    """))
-
-    syscall_files = [syscalls, allowlist, blocklist]
-    output = genseccomp.construct_bpf(syscall_files, "arm", self.get_headers("arm"),
-                                      self.get_switches("arm"))
-
-    expected_output = textwrap.dedent("""\
-    // Autogenerated file - edit at your peril!!
-
-    #include <linux/filter.h>
-    #include <errno.h>
-
-    #include "seccomp_bpfs.h"
-    const sock_filter arm_filter[] = {
-    BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 3, 0, 4),
-    BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 140, 1, 0),
-    BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 4, 2, 1), //read
-    BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 141, 1, 0), //_llseek
-    BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
-    };
-
-    const size_t arm_filter_size = sizeof(arm_filter) / sizeof(struct sock_filter);
-    """)
-    self.assertEquals(output, expected_output)
+    self.assertEqual(output, expected_output)
 
 
 if __name__ == '__main__':