seccomp: Generate the policy files at compile time

This change avoids having to run the genseccomp.py script every time a
policy file is edited, and instead generates these files at
compile-time.

Bug: None
Test: m
Test: find out/soong/ -name x86_64_global_policy.cpp  # Shows files
Test: generated policies are equivalent to original policies
Change-Id: I12461fe0c5fb02c008c1b2503fbb994b8aa2f56b
diff --git a/libc/tools/genseccomp.py b/libc/tools/genseccomp.py
index bb887d6..365e198 100755
--- a/libc/tools/genseccomp.py
+++ b/libc/tools/genseccomp.py
@@ -1,10 +1,14 @@
 #!/usr/bin/env python
+
+import argparse
 import collections
+import logging
 import os
 import re
+import subprocess
 import textwrap
+
 from gensyscalls import SysCallsTxtParser
-from subprocess import Popen, PIPE
 
 
 BPF_JGE = "BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, {0}, {1}, {2})"
@@ -36,72 +40,47 @@
 def merge_names(base_names, whitelist_names, blacklist_names):
   if bool(blacklist_names - base_names):
     raise RuntimeError("Blacklist item not in bionic - aborting " + str(
-        blacklist_name - base_names))
+        blacklist_names - base_names))
 
   return (base_names - blacklist_names) | whitelist_names
 
 
-def get_clang_path():
-  # Inspect the global soong config to figure out the default version of clang.
-  global_go_path = os.path.join(os.environ["ANDROID_BUILD_TOP"],
-                                "build/soong/cc/config/global.go")
-  clang_default_version = None
-  CLANG_DEFAULT_VERSION_RE = re.compile(
-      r'^\s*ClangDefaultVersion\s*=\s*"([^"]+)"\s*$')
-  with open(global_go_path) as f:
-    for line in f:
-      m = CLANG_DEFAULT_VERSION_RE.match(line)
-      if not m:
-        continue
-      clang_default_version = m.group(1)
-      break
-    else:
-      raise Exception('Could not find ClangDefaultVersion in %s' %
-                      global_go_path)
-
-  # Gets the path of the clang prebuilt binary.
-  return os.path.join(os.environ["ANDROID_BUILD_TOP"],
-                      "prebuilts/clang/host/linux-x86", clang_default_version,
-                      "bin/clang")
-
-
-def convert_names_to_NRs(names, header_dir, extra_switches):
-  # Run preprocessor over the __NR_syscall symbols, including unistd.h,
-  # to get the actual numbers
-  prefix = "__SECCOMP_"  # prefix to ensure no name collisions
-  cpp = Popen([get_clang_path(),
-               "-E", "-nostdinc", "-I" + header_dir, "-Ikernel/uapi/"]
-               + extra_switches
-               + ["-"],
-              stdin=PIPE, stdout=PIPE)
-  cpp.stdin.write("#include <asm/unistd.h>\n")
-  for name in names:
-    # In SYSCALLS.TXT, there are two arm-specific syscalls whose names start
-    # with __ARM__NR_. These we must simply write out as is.
-    if not name.startswith("__ARM_NR_"):
-      cpp.stdin.write(prefix + name + ", __NR_" + name + "\n")
-    else:
-      cpp.stdin.write(prefix + name + ", " + name + "\n")
-  content = cpp.communicate()[0].split("\n")
-
+def parse_syscall_NRs(names_path):
   # The input is now the preprocessed source file. This will contain a lot
   # of junk from the preprocessor, but our lines will be in the format:
   #
-  #     __SECCOMP_${NAME}, (0 + value)
+  #    #define __(ARM_)?NR_${NAME} ${VALUE}
+  #
+  # Where ${VALUE} is a preprocessor expression.
 
-  syscalls = []
-  for line in content:
-    if not line.startswith(prefix):
+  constant_re = re.compile(
+      r'^\s*#define\s+([A-Za-z_][A-Za-z0-9_]+)\s+(.+)\s*$')
+  token_re = re.compile(r'\b[A-Za-z_][A-Za-z0-9_]+\b')
+  constants = {}
+  with open(names_path) as f:
+    for line in f:
+      m = constant_re.match(line)
+      if not m:
+        continue
+      try:
+        name = m.group(1)
+        # eval() takes care of any arithmetic that may be done
+        value = eval(token_re.sub(lambda x: str(constants[x.group(0)]),
+                                  m.group(2)))
+
+        constants[name] = value
+      except:
+        logging.debug('Failed to parse %s', line)
+        pass
+
+  syscalls = {}
+  for name, value in constants.iteritems():
+    if not name.startswith("__NR_") and not name.startswith("__ARM_NR"):
       continue
-
-    # We might pick up extra whitespace during preprocessing, so best to strip.
-    name, value = [w.strip() for w in line.split(",")]
-    name = name[len(prefix):]
-
-    # Note that some of the numbers were expressed as base + offset, so we
-    # need to eval, not just int
-    value = eval(value)
-    syscalls.append((name, value))
+    if name.startswith("__NR_"):
+      # Remote the __NR_ prefix
+      name = name[len("__NR_"):]
+    syscalls[name] = value
 
   return syscalls
 
@@ -160,7 +139,6 @@
       bpf[i] = statement.format(fail=str(len(bpf) - i),
                                 allow=str(len(bpf) - i - 1))
 
-
   # Add the allow calls at the end. If the syscall is not matched, we will
   # continue. This allows the user to choose to match further syscalls, and
   # also to choose the action when we want to block
@@ -177,14 +155,15 @@
   else:
     name_modifier = ""
   header = textwrap.dedent("""\
-    // Autogenerated file - edit at your peril!!
+    // File autogenerated by {self_path} - edit at your peril!!
 
     #include <linux/filter.h>
     #include <errno.h>
 
-    #include "seccomp_bpfs.h"
+    #include "seccomp/seccomp_bpfs.h"
     const sock_filter {architecture}_{suffix}filter[] = {{
-    """).format(architecture=architecture,suffix=name_modifier)
+    """).format(self_path=os.path.basename(__file__), architecture=architecture,
+                suffix=name_modifier)
 
   footer = textwrap.dedent("""\
 
@@ -195,89 +174,78 @@
   return header + "\n".join(bpf) + footer
 
 
-def construct_bpf(names, architecture, header_dir, extra_switches,
-                  name_modifier):
-  syscalls = convert_names_to_NRs(names, header_dir, extra_switches)
+def construct_bpf(syscalls, architecture, name_modifier):
   ranges = convert_NRs_to_ranges(syscalls)
   bpf = convert_ranges_to_bpf(ranges)
   return convert_bpf_to_output(bpf, architecture, name_modifier)
 
 
-# final syscalls = base - blacklists + whitelists
-ANDROID_SYSTEM_SYSCALL_FILES = {
-    "base": "SYSCALLS.TXT",
-    "whitelists": [
-        "SECCOMP_WHITELIST_COMMON.TXT",
-        "SECCOMP_WHITELIST_SYSTEM.TXT"],
-    "blacklists": ["SECCOMP_BLACKLIST_COMMON.TXT"]
-}
-
-ANDROID_APP_SYSCALL_FILES = {
-    "base": "SYSCALLS.TXT",
-    "whitelists": [
-        "SECCOMP_WHITELIST_COMMON.TXT",
-        "SECCOMP_WHITELIST_APP.TXT"],
-    "blacklists": [
-        "SECCOMP_BLACKLIST_COMMON.TXT",
-        "SECCOMP_BLACKLIST_APP.TXT"]
-}
-
-ANDROID_GLOBAL_SYSCALL_FILES = {
-    "base": "SYSCALLS.TXT",
-    "whitelists": [
-        "SECCOMP_WHITELIST_COMMON.TXT",
-        "SECCOMP_WHITELIST_SYSTEM.TXT",
-        "SECCOMP_WHITELIST_APP.TXT",
-        "SECCOMP_WHITELIST_GLOBAL.TXT"],
-    "blacklists": ["SECCOMP_BLACKLIST_COMMON.TXT"]
-}
-
-
-POLICY_CONFIGS = [("arm", "kernel/uapi/asm-arm", []),
-                  ("arm64", "kernel/uapi/asm-arm64", []),
-                  ("x86", "kernel/uapi/asm-x86", ["-D__i386__"]),
-                  ("x86_64", "kernel/uapi/asm-x86", []),
-                  ("mips", "kernel/uapi/asm-mips", ["-D_MIPS_SIM=_MIPS_SIM_ABI32"]),
-                  ("mips64", "kernel/uapi/asm-mips", ["-D_MIPS_SIM=_MIPS_SIM_ABI64"])]
-
-
-def set_dir():
-  # Set working directory for predictable results
-  os.chdir(os.path.join(os.environ["ANDROID_BUILD_TOP"], "bionic/libc"))
-
-
-def gen_policy(syscall_files, name_modifier):
-  for arch, header_path, switches in POLICY_CONFIGS:
-    base_names = load_syscall_names_from_file(syscall_files["base"], arch)
+def gen_policy(name_modifier, out_dir, base_syscall_file, syscall_files, syscall_NRs):
+  for arch in ('arm', 'arm64', 'mips', 'mips64', 'x86', 'x86_64'):
+    base_names = load_syscall_names_from_file(base_syscall_file, arch)
     whitelist_names = set()
-    for f in syscall_files["whitelists"]:
-      whitelist_names |= load_syscall_names_from_file(f, arch)
     blacklist_names = set()
-    for f in syscall_files["blacklists"]:
-      blacklist_names |= load_syscall_names_from_file(f, arch)
+    for f in syscall_files:
+      if "blacklist" in f.lower():
+        blacklist_names |= load_syscall_names_from_file(f, arch)
+      else:
+        whitelist_names |= load_syscall_names_from_file(f, arch)
 
-    names = merge_names(base_names, whitelist_names, blacklist_names)
-    output = construct_bpf(names, arch, header_path, switches, name_modifier)
+    allowed_syscalls = []
+    for name in merge_names(base_names, whitelist_names, blacklist_names):
+      try:
+        allowed_syscalls.append((name, syscall_NRs[arch][name]))
+      except:
+        logging.exception("Failed to find %s in %s", name, arch)
+        raise
+    output = construct_bpf(allowed_syscalls, arch, name_modifier)
 
     # And output policy
     existing = ""
     filename_modifier = "_" + name_modifier if name_modifier else ""
-    output_path = "seccomp/{}{}_policy.cpp".format(arch, filename_modifier)
-    if os.path.isfile(output_path):
-      existing = open(output_path).read()
-    if output == existing:
-      print "File " + output_path + " not changed."
-    else:
-      with open(output_path, "w") as output_file:
-        output_file.write(output)
-      print "Generated file " + output_path
+    output_path = os.path.join(out_dir,
+                               "{}{}_policy.cpp".format(arch, filename_modifier))
+    with open(output_path, "w") as output_file:
+      output_file.write(output)
 
 
 def main():
-  set_dir()
-  gen_policy(ANDROID_SYSTEM_SYSCALL_FILES, 'system')
-  gen_policy(ANDROID_APP_SYSCALL_FILES, 'app')
-  gen_policy(ANDROID_GLOBAL_SYSCALL_FILES, 'global')
+  parser = argparse.ArgumentParser(
+      description="Generates a seccomp-bpf policy")
+  parser.add_argument("--verbose", "-v", help="Enables verbose logging.")
+  parser.add_argument("--name-modifier",
+                      help=("Specifies the name modifier for the policy. "
+                            "One of {app,global,system}."))
+  parser.add_argument("--out-dir",
+                      help="The output directory for the policy files")
+  parser.add_argument("base_file", metavar="base-file", type=str,
+                      help="The path of the base syscall list (SYSCALLS.TXT).")
+  parser.add_argument("files", metavar="FILE", type=str, nargs="+",
+                      help=("The path of the input files. In order to "
+                            "simplify the build rules, it can take any of the "
+                            "following files: \n"
+                            "* /blacklist.*\.txt$/ syscall blacklist.\n"
+                            "* /whitelist.*\.txt$/ syscall whitelist.\n"
+                            "* otherwise, syscall name-number mapping.\n"))
+  args = parser.parse_args()
+
+  if args.verbose:
+    logging.basicConfig(level=logging.DEBUG)
+  else:
+    logging.basicConfig(level=logging.INFO)
+
+  syscall_files = []
+  syscall_NRs = {}
+  for filename in args.files:
+    if filename.lower().endswith('.txt'):
+      syscall_files.append(filename)
+    else:
+      m = re.search(r"libseccomp_gen_syscall_nrs_([^/]+)", filename)
+      syscall_NRs[m.group(1)] = parse_syscall_NRs(filename)
+
+  gen_policy(name_modifier=args.name_modifier, out_dir=args.out_dir,
+             syscall_NRs=syscall_NRs, base_syscall_file=args.base_file,
+             syscall_files=args.files)
 
 
 if __name__ == "__main__":