Autogenerate single policy from syscalls and whitelist

Bug: 35392119
Bug: 34465958
Test: Check boots and same syscalls are blocked as before

Change-Id: I9efa97032c59aebbbfd32e6f0d2d491f6254f0a2
diff --git a/libc/tools/genseccomp.py b/libc/tools/genseccomp.py
index fa6e7e3..7d2b1da 100755
--- a/libc/tools/genseccomp.py
+++ b/libc/tools/genseccomp.py
@@ -5,7 +5,8 @@
 from gensyscalls import SysCallsTxtParser
 
 
-syscall_file = "SYSCALLS.TXT"
+BPF_JGE = "BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, {0}, {1}, {2})"
+BPF_ALLOW = "BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW)"
 
 
 class SyscallRange(object):
@@ -14,6 +15,9 @@
     self.begin = value
     self.end = self.begin + 1
 
+  def __str__(self):
+    return "(%s, %s, %s)" % (self.begin, self.end, self.names)
+
   def add(self, name, value):
     if value != self.end:
       raise ValueError
@@ -21,39 +25,21 @@
     self.names.append(name)
 
 
-def generate_bpf_jge(value, ge_target, less_target):
-  return "BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, {0}, {1}, {2})".format(value, ge_target, less_target)
-
-
-# Converts the sorted ranges of allowed syscalls to a binary tree bpf
-# For a single range, output a simple jump to {fail} or {allow}. We can't set
-# the jump ranges yet, since we don't know the size of the filter, so use a
-# placeholder
-# For multiple ranges, split into two, convert the two halves and output a jump
-# to the correct half
-def convert_to_bpf(ranges):
-  if len(ranges) == 1:
-    # We will replace {fail} and {allow} with appropriate range jumps later
-    return [generate_bpf_jge(ranges[0].end, "{fail}", "{allow}") +
-            ", //" + "|".join(ranges[0].names)]
-  else:
-    half = (len(ranges) + 1) / 2
-    first = convert_to_bpf(ranges[:half])
-    second = convert_to_bpf(ranges[half:])
-    return [generate_bpf_jge(ranges[half].begin, len(first), 0) + ","] + first + second
-
-
-def construct_bpf(architecture, header_dir, output_path):
-  parser = SysCallsTxtParser()
-  parser.parse_file(syscall_file)
-  syscalls = parser.syscalls
+def get_names(syscall_files, architecture):
+  syscalls = []
+  for syscall_file in syscall_files:
+    parser = SysCallsTxtParser()
+    parser.parse_open_file(syscall_file)
+    syscalls += parser.syscalls
 
   # Select only elements matching required architecture
   syscalls = [x for x in syscalls if architecture in x and x[architecture]]
 
   # We only want the name
-  names = [x["name"] for x in syscalls]
+  return [x["name"] for x in syscalls]
 
+
+def convert_names_to_NRs(names, header_dir):
   # Run preprocessor over the __NR_syscall symbols, including unistd.h,
   # to get the actual numbers
   prefix = "__SECCOMP_"  # prefix to ensure no name collisions
@@ -89,6 +75,10 @@
     value = eval(value)
     syscalls.append((name, value))
 
+  return syscalls
+
+
+def convert_NRs_to_ranges(syscalls):
   # Sort the values so we convert to ranges and binary chop
   syscalls = sorted(syscalls, lambda x, y: cmp(x[1], y[1]))
 
@@ -104,8 +94,30 @@
       last_range.add(name, value)
     else:
       ranges.append(SyscallRange(name, value))
+  return ranges
 
-  bpf = convert_to_bpf(ranges)
+
+# Converts the sorted ranges of allowed syscalls to a binary tree bpf
+# For a single range, output a simple jump to {fail} or {allow}. We can't set
+# the jump ranges yet, since we don't know the size of the filter, so use a
+# placeholder
+# For multiple ranges, split into two, convert the two halves and output a jump
+# to the correct half
+def convert_to_intermediate_bpf(ranges):
+  if len(ranges) == 1:
+    # We will replace {fail} and {allow} with appropriate range jumps later
+    return [BPF_JGE.format(ranges[0].end, "{fail}", "{allow}") +
+            ", //" + "|".join(ranges[0].names)]
+  else:
+    half = (len(ranges) + 1) / 2
+    first = convert_to_intermediate_bpf(ranges[:half])
+    second = convert_to_intermediate_bpf(ranges[half:])
+    jump = [BPF_JGE.format(ranges[half].begin, len(first), 0) + ","]
+    return jump + first + second
+
+
+def convert_ranges_to_bpf(ranges):
+  bpf = convert_to_intermediate_bpf(ranges)
 
   # Now we know the size of the tree, we can substitute the {fail} and {allow}
   # placeholders
@@ -121,16 +133,16 @@
                                 allow=str(len(bpf) - i - 1))
 
   # Add check that we aren't off the bottom of the syscalls
-  bpf.insert(0,
-             "BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, " + str(ranges[0].begin) +
-             ", 0, " + str(len(bpf)) + "),")
+  bpf.insert(0, BPF_JGE.format(ranges[0].begin, 0, str(len(bpf))) + ',')
 
   # Add the allow calls at the end. If the syscall is not matched, we will
   # continue. This allows the user to choose to match further syscalls, and
   # also to choose the action when we want to block
-  bpf.append("BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),")
+  bpf.append(BPF_ALLOW + ",")
+  return bpf
 
-  # And output policy
+
+def convert_bpf_to_output(bpf, architecture):
   header = textwrap.dedent("""\
     // Autogenerated file - edit at your peril!!
 
@@ -147,25 +159,52 @@
 
     const size_t {architecture}_filter_size = sizeof({architecture}_filter) / sizeof(struct sock_filter);
     """).format(architecture=architecture)
-  output = header + "\n".join(bpf) + footer
+  return header + "\n".join(bpf) + footer
 
-  existing = ""
-  if os.path.isfile(output_path):
-    existing = open(output_path).read()
-  if output == existing:
-    print "File " + output_path + " not changed."
-  else:
-    with open(output_path, "w") as output_file:
-      output_file.write(output)
 
-    print "Generated file " + output_path
+def construct_bpf(syscall_files, architecture, header_dir):
+  names = get_names(syscall_files, architecture)
+  syscalls = convert_names_to_NRs(names, header_dir)
+  ranges = convert_NRs_to_ranges(syscalls)
+  bpf = convert_ranges_to_bpf(ranges)
+  return convert_bpf_to_output(bpf, architecture)
+
+
+android_syscall_files = ["SYSCALLS.TXT", "SECCOMP_WHITELIST.TXT"]
+arm_headers = "kernel/uapi/asm-arm"
+arm64_headers = "kernel/uapi/asm-arm64"
+arm_architecture = "arm"
+arm64_architecture = "arm64"
+
+
+ANDROID_SYSCALL_FILES = ["SYSCALLS.TXT", "SECCOMP_WHITELIST.TXT"]
+
+POLICY_CONFIGS = [("arm", "kernel/uapi/asm-arm"),
+                  ("arm64", "kernel/uapi/asm-arm64")]
+
+
+def set_dir():
+  # Set working directory for predictable results
+  os.chdir(os.path.join(os.environ["ANDROID_BUILD_TOP"], "bionic/libc"))
 
 
 def main():
-  # Set working directory for predictable results
-  os.chdir(os.path.join(os.environ["ANDROID_BUILD_TOP"], "bionic/libc"))
-  construct_bpf("arm", "kernel/uapi/asm-arm", "seccomp/arm_policy.c")
-  construct_bpf("arm64", "kernel/uapi/asm-arm64", "seccomp/arm64_policy.c")
+  set_dir()
+  for arch, header_path in POLICY_CONFIGS:
+    files = [open(filename) for filename in ANDROID_SYSCALL_FILES]
+    output = construct_bpf(files, arch, header_path)
+
+    # And output policy
+    existing = ""
+    output_path = "seccomp/{}_policy.c".format(arch)
+    if os.path.isfile(output_path):
+      existing = open(output_path).read()
+    if output == existing:
+      print "File " + output_path + " not changed."
+    else:
+      with open(output_path, "w") as output_file:
+        output_file.write(output)
+      print "Generated file " + output_path
 
 
 if __name__ == "__main__":
diff --git a/libc/tools/gensyscalls.py b/libc/tools/gensyscalls.py
index 329184f..f50445c 100755
--- a/libc/tools/gensyscalls.py
+++ b/libc/tools/gensyscalls.py
@@ -500,18 +500,18 @@
 
         logging.debug(t)
 
-
-    def parse_file(self, file_path):
-        logging.debug("parse_file: %s" % file_path)
-        fp = open(file_path)
-        for line in fp.xreadlines():
+    def parse_open_file(self, fp):
+        for line in fp:
             self.lineno += 1
             line = line.strip()
             if not line: continue
             if line[0] == '#': continue
             self.parse_line(line)
 
-        fp.close()
+    def parse_file(self, file_path):
+        logging.debug("parse_file: %s" % file_path)
+        with open(file_path) as fp:
+            parse_open_file(fp)
 
 
 class State:
diff --git a/libc/tools/test_genseccomp.py b/libc/tools/test_genseccomp.py
new file mode 100755
index 0000000..de1e5fe
--- /dev/null
+++ b/libc/tools/test_genseccomp.py
@@ -0,0 +1,154 @@
+#!/usr/bin/env python
+# Unit tests for genseccomp.py
+
+import cStringIO
+import textwrap
+import unittest
+
+import genseccomp
+
+class TestGenseccomp(unittest.TestCase):
+  def setUp(self):
+    genseccomp.set_dir()
+
+  def get_config(self, arch):
+    for i in genseccomp.POLICY_CONFIGS:
+      if i[0] == arch:
+        return i
+    self.fail("No such architecture")
+
+  def get_headers(self, arch):
+    return self.get_config(arch)[1]
+
+  def test_get_names(self):
+    syscalls = cStringIO.StringIO(textwrap.dedent("""\
+int __llseek:_llseek(int, unsigned long, unsigned long, off64_t*, int) arm,mips,x86
+int         fchown:fchown(int, uid_t, gid_t)    arm64,mips,mips64,x86_64
+    """))
+
+    whitelist = cStringIO.StringIO(textwrap.dedent("""\
+ssize_t     read(int, void*, size_t)        all
+    """))
+
+    syscall_files = [syscalls, whitelist]
+    names = genseccomp.get_names(syscall_files, "arm")
+    for f in syscall_files:
+      f.seek(0)
+    names64 = genseccomp.get_names(syscall_files, "arm64")
+
+    self.assertIn("fchown", names64)
+    self.assertNotIn("fchown", names)
+    self.assertIn("_llseek", names)
+    self.assertNotIn("_llseek", names64)
+    self.assertIn("read", names)
+    self.assertIn("read", names64)
+
+  def test_convert_names_to_NRs(self):
+    self.assertEquals(genseccomp.convert_names_to_NRs(["open"],
+                                                      self.get_headers("arm")),
+                      [("open", 5)])
+
+    self.assertEquals(genseccomp.convert_names_to_NRs(["__ARM_NR_set_tls"],
+                                                      self.get_headers("arm")),
+                      [('__ARM_NR_set_tls', 983045)])
+
+    self.assertEquals(genseccomp.convert_names_to_NRs(["openat"],
+                                                      self.get_headers("arm64")),
+                      [("openat", 56)])
+
+
+  def test_convert_NRs_to_ranges(self):
+    ranges = genseccomp.convert_NRs_to_ranges([("b", 2), ("a", 1)])
+    self.assertEquals(len(ranges), 1)
+    self.assertEquals(ranges[0].begin, 1)
+    self.assertEquals(ranges[0].end, 3)
+    self.assertItemsEqual(ranges[0].names, ["a", "b"])
+
+    ranges = genseccomp.convert_NRs_to_ranges([("b", 3), ("a", 1)])
+    self.assertEquals(len(ranges), 2)
+    self.assertEquals(ranges[0].begin, 1)
+    self.assertEquals(ranges[0].end, 2)
+    self.assertItemsEqual(ranges[0].names, ["a"])
+    self.assertEquals(ranges[1].begin, 3)
+    self.assertEquals(ranges[1].end, 4)
+    self.assertItemsEqual(ranges[1].names, ["b"])
+
+  def test_convert_to_intermediate_bpf(self):
+    ranges = genseccomp.convert_NRs_to_ranges([("b", 2), ("a", 1)])
+    bpf = genseccomp.convert_to_intermediate_bpf(ranges)
+    self.assertEquals(bpf, ['BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 3, {fail}, {allow}), //a|b'])
+
+    ranges = genseccomp.convert_NRs_to_ranges([("b", 3), ("a", 1)])
+    bpf = genseccomp.convert_to_intermediate_bpf(ranges)
+    self.assertEquals(bpf, ['BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 3, 1, 0),',
+                            'BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 2, {fail}, {allow}), //a',
+                            'BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 4, {fail}, {allow}), //b'])
+
+  def test_convert_ranges_to_bpf(self):
+    ranges = genseccomp.convert_NRs_to_ranges([("b", 2), ("a", 1)])
+    bpf = genseccomp.convert_ranges_to_bpf(ranges)
+    self.assertEquals(bpf, ['BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 1, 0, 1),',
+                            'BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 3, 1, 0), //a|b',
+                            'BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),'])
+
+    ranges = genseccomp.convert_NRs_to_ranges([("b", 3), ("a", 1)])
+    bpf = genseccomp.convert_ranges_to_bpf(ranges)
+    self.assertEquals(bpf, ['BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 1, 0, 3),',
+                            'BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 3, 1, 0),',
+                            'BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 2, 2, 1), //a',
+                            'BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 4, 1, 0), //b',
+                            'BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),'])
+
+  def test_convert_bpf_to_output(self):
+    output = genseccomp.convert_bpf_to_output(["line1", "line2"], "arm")
+    expected_output = textwrap.dedent("""\
+    // Autogenerated file - edit at your peril!!
+
+    #include <linux/filter.h>
+    #include <errno.h>
+
+    #include "seccomp_policy.h"
+    const struct sock_filter arm_filter[] = {
+    line1
+    line2
+    };
+
+    const size_t arm_filter_size = sizeof(arm_filter) / sizeof(struct sock_filter);
+    """)
+    self.assertEquals(output, expected_output)
+
+  def test_construct_bpf(self):
+    syscalls = cStringIO.StringIO(textwrap.dedent("""\
+    int __llseek:_llseek(int, unsigned long, unsigned long, off64_t*, int) arm,mips,x86
+    int         fchown:fchown(int, uid_t, gid_t)    arm64,mips,mips64,x86_64
+    """))
+
+    whitelist = cStringIO.StringIO(textwrap.dedent("""\
+    ssize_t     read(int, void*, size_t)        all
+    """))
+
+    syscall_files = [syscalls, whitelist]
+    output = genseccomp.construct_bpf(syscall_files, "arm", self.get_headers("arm"))
+
+    expected_output = textwrap.dedent("""\
+    // Autogenerated file - edit at your peril!!
+
+    #include <linux/filter.h>
+    #include <errno.h>
+
+    #include "seccomp_policy.h"
+    const struct sock_filter arm_filter[] = {
+    BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 3, 0, 3),
+    BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 140, 1, 0),
+    BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 4, 2, 1), //read
+    BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 141, 1, 0), //_llseek
+    BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+    };
+
+    const size_t arm_filter_size = sizeof(arm_filter) / sizeof(struct sock_filter);
+    """)
+    self.assertEquals(output, expected_output)
+
+
+if __name__ == '__main__':
+  unittest.main()