Paul Lawrence | eabc352 | 2016-11-11 11:33:42 -0800 | [diff] [blame] | 1 | #!/usr/bin/env python |
Paul Lawrence | 89fa81f | 2017-02-17 10:22:03 -0800 | [diff] [blame] | 2 | import collections |
Paul Lawrence | eabc352 | 2016-11-11 11:33:42 -0800 | [diff] [blame] | 3 | import os |
Paul Lawrence | eabc352 | 2016-11-11 11:33:42 -0800 | [diff] [blame] | 4 | import textwrap |
| 5 | from gensyscalls import SysCallsTxtParser |
Paul Lawrence | 89fa81f | 2017-02-17 10:22:03 -0800 | [diff] [blame] | 6 | from subprocess import Popen, PIPE |
Paul Lawrence | eabc352 | 2016-11-11 11:33:42 -0800 | [diff] [blame] | 7 | |
| 8 | |
Paul Lawrence | 7ea4090 | 2017-02-14 13:32:23 -0800 | [diff] [blame] | 9 | BPF_JGE = "BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, {0}, {1}, {2})" |
| 10 | BPF_ALLOW = "BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW)" |
Paul Lawrence | eabc352 | 2016-11-11 11:33:42 -0800 | [diff] [blame] | 11 | |
| 12 | |
| 13 | class SyscallRange(object): |
| 14 | def __init__(self, name, value): |
| 15 | self.names = [name] |
| 16 | self.begin = value |
| 17 | self.end = self.begin + 1 |
| 18 | |
Paul Lawrence | 7ea4090 | 2017-02-14 13:32:23 -0800 | [diff] [blame] | 19 | def __str__(self): |
| 20 | return "(%s, %s, %s)" % (self.begin, self.end, self.names) |
| 21 | |
Paul Lawrence | eabc352 | 2016-11-11 11:33:42 -0800 | [diff] [blame] | 22 | def add(self, name, value): |
| 23 | if value != self.end: |
| 24 | raise ValueError |
| 25 | self.end += 1 |
| 26 | self.names.append(name) |
| 27 | |
| 28 | |
Paul Lawrence | 7ea4090 | 2017-02-14 13:32:23 -0800 | [diff] [blame] | 29 | def get_names(syscall_files, architecture): |
Paul Lawrence | 3dd3d55 | 2017-04-12 10:02:54 -0700 | [diff] [blame^] | 30 | syscall_lists = [] |
Paul Lawrence | 7ea4090 | 2017-02-14 13:32:23 -0800 | [diff] [blame] | 31 | for syscall_file in syscall_files: |
| 32 | parser = SysCallsTxtParser() |
| 33 | parser.parse_open_file(syscall_file) |
Paul Lawrence | 3dd3d55 | 2017-04-12 10:02:54 -0700 | [diff] [blame^] | 34 | syscall_lists.append(parser.syscalls) |
| 35 | |
| 36 | bionic, whitelist, blacklist = syscall_lists[0], syscall_lists[1], syscall_lists[2] |
| 37 | for x in blacklist: |
| 38 | if not x in bionic: |
| 39 | raise RuntimeError("Blacklist item not in bionic - aborting " + str(x)) |
| 40 | |
| 41 | if x in whitelist: |
| 42 | raise RuntimeError("Blacklist item in whitelist - aborting " + str(x)) |
| 43 | |
| 44 | bionic_minus_blacklist = [x for x in bionic if x not in blacklist] |
| 45 | syscalls = bionic_minus_blacklist + whitelist |
Paul Lawrence | eabc352 | 2016-11-11 11:33:42 -0800 | [diff] [blame] | 46 | |
| 47 | # Select only elements matching required architecture |
| 48 | syscalls = [x for x in syscalls if architecture in x and x[architecture]] |
| 49 | |
| 50 | # We only want the name |
Paul Lawrence | 89fa81f | 2017-02-17 10:22:03 -0800 | [diff] [blame] | 51 | names = [x["name"] for x in syscalls] |
| 52 | |
| 53 | # Check for duplicates |
| 54 | dups = [name for name, count in collections.Counter(names).items() if count > 1] |
| 55 | |
| 56 | # x86 has duplicate socketcall entries, so hard code for this |
| 57 | if architecture == "x86": |
| 58 | dups.remove("socketcall") |
| 59 | |
| 60 | if len(dups) > 0: |
Paul Lawrence | 3dd3d55 | 2017-04-12 10:02:54 -0700 | [diff] [blame^] | 61 | raise RuntimeError("Duplicate entries found - aborting " + str(dups)) |
Paul Lawrence | 89fa81f | 2017-02-17 10:22:03 -0800 | [diff] [blame] | 62 | |
| 63 | # Remove remaining duplicates |
| 64 | return list(set(names)) |
Paul Lawrence | eabc352 | 2016-11-11 11:33:42 -0800 | [diff] [blame] | 65 | |
Paul Lawrence | 7ea4090 | 2017-02-14 13:32:23 -0800 | [diff] [blame] | 66 | |
Paul Lawrence | 89fa81f | 2017-02-17 10:22:03 -0800 | [diff] [blame] | 67 | def convert_names_to_NRs(names, header_dir, extra_switches): |
Paul Lawrence | eabc352 | 2016-11-11 11:33:42 -0800 | [diff] [blame] | 68 | # Run preprocessor over the __NR_syscall symbols, including unistd.h, |
| 69 | # to get the actual numbers |
| 70 | prefix = "__SECCOMP_" # prefix to ensure no name collisions |
| 71 | cpp = Popen(["../../prebuilts/clang/host/linux-x86/clang-stable/bin/clang", |
Paul Lawrence | 89fa81f | 2017-02-17 10:22:03 -0800 | [diff] [blame] | 72 | "-E", "-nostdinc", "-I" + header_dir, "-Ikernel/uapi/"] |
| 73 | + extra_switches |
| 74 | + ["-"], |
Paul Lawrence | eabc352 | 2016-11-11 11:33:42 -0800 | [diff] [blame] | 75 | stdin=PIPE, stdout=PIPE) |
| 76 | cpp.stdin.write("#include <asm/unistd.h>\n") |
| 77 | for name in names: |
| 78 | # In SYSCALLS.TXT, there are two arm-specific syscalls whose names start |
| 79 | # with __ARM__NR_. These we must simply write out as is. |
| 80 | if not name.startswith("__ARM_NR_"): |
| 81 | cpp.stdin.write(prefix + name + ", __NR_" + name + "\n") |
| 82 | else: |
| 83 | cpp.stdin.write(prefix + name + ", " + name + "\n") |
| 84 | content = cpp.communicate()[0].split("\n") |
| 85 | |
| 86 | # The input is now the preprocessed source file. This will contain a lot |
| 87 | # of junk from the preprocessor, but our lines will be in the format: |
| 88 | # |
| 89 | # __SECCOMP_${NAME}, (0 + value) |
| 90 | |
| 91 | syscalls = [] |
| 92 | for line in content: |
| 93 | if not line.startswith(prefix): |
| 94 | continue |
| 95 | |
| 96 | # We might pick up extra whitespace during preprocessing, so best to strip. |
| 97 | name, value = [w.strip() for w in line.split(",")] |
| 98 | name = name[len(prefix):] |
| 99 | |
| 100 | # Note that some of the numbers were expressed as base + offset, so we |
| 101 | # need to eval, not just int |
| 102 | value = eval(value) |
| 103 | syscalls.append((name, value)) |
| 104 | |
Paul Lawrence | 7ea4090 | 2017-02-14 13:32:23 -0800 | [diff] [blame] | 105 | return syscalls |
| 106 | |
| 107 | |
| 108 | def convert_NRs_to_ranges(syscalls): |
Paul Lawrence | eabc352 | 2016-11-11 11:33:42 -0800 | [diff] [blame] | 109 | # Sort the values so we convert to ranges and binary chop |
| 110 | syscalls = sorted(syscalls, lambda x, y: cmp(x[1], y[1])) |
| 111 | |
| 112 | # Turn into a list of ranges. Keep the names for the comments |
| 113 | ranges = [] |
| 114 | for name, value in syscalls: |
| 115 | if not ranges: |
| 116 | ranges.append(SyscallRange(name, value)) |
| 117 | continue |
| 118 | |
| 119 | last_range = ranges[-1] |
| 120 | if last_range.end == value: |
| 121 | last_range.add(name, value) |
| 122 | else: |
| 123 | ranges.append(SyscallRange(name, value)) |
Paul Lawrence | 7ea4090 | 2017-02-14 13:32:23 -0800 | [diff] [blame] | 124 | return ranges |
Paul Lawrence | eabc352 | 2016-11-11 11:33:42 -0800 | [diff] [blame] | 125 | |
Paul Lawrence | 7ea4090 | 2017-02-14 13:32:23 -0800 | [diff] [blame] | 126 | |
| 127 | # Converts the sorted ranges of allowed syscalls to a binary tree bpf |
| 128 | # For a single range, output a simple jump to {fail} or {allow}. We can't set |
| 129 | # the jump ranges yet, since we don't know the size of the filter, so use a |
| 130 | # placeholder |
| 131 | # For multiple ranges, split into two, convert the two halves and output a jump |
| 132 | # to the correct half |
| 133 | def convert_to_intermediate_bpf(ranges): |
| 134 | if len(ranges) == 1: |
| 135 | # We will replace {fail} and {allow} with appropriate range jumps later |
| 136 | return [BPF_JGE.format(ranges[0].end, "{fail}", "{allow}") + |
| 137 | ", //" + "|".join(ranges[0].names)] |
| 138 | else: |
| 139 | half = (len(ranges) + 1) / 2 |
| 140 | first = convert_to_intermediate_bpf(ranges[:half]) |
| 141 | second = convert_to_intermediate_bpf(ranges[half:]) |
| 142 | jump = [BPF_JGE.format(ranges[half].begin, len(first), 0) + ","] |
| 143 | return jump + first + second |
| 144 | |
| 145 | |
| 146 | def convert_ranges_to_bpf(ranges): |
| 147 | bpf = convert_to_intermediate_bpf(ranges) |
Paul Lawrence | eabc352 | 2016-11-11 11:33:42 -0800 | [diff] [blame] | 148 | |
| 149 | # Now we know the size of the tree, we can substitute the {fail} and {allow} |
| 150 | # placeholders |
| 151 | for i, statement in enumerate(bpf): |
| 152 | # Replace placeholder with |
| 153 | # "distance to jump to fail, distance to jump to allow" |
| 154 | # We will add a kill statement and an allow statement after the tree |
| 155 | # With bpfs jmp 0 means the next statement, so the distance to the end is |
| 156 | # len(bpf) - i - 1, which is where we will put the kill statement, and |
| 157 | # then the statement after that is the allow statement |
| 158 | if "{fail}" in statement and "{allow}" in statement: |
Paul Lawrence | be8a2af | 2017-01-25 15:20:52 -0800 | [diff] [blame] | 159 | bpf[i] = statement.format(fail=str(len(bpf) - i), |
| 160 | allow=str(len(bpf) - i - 1)) |
Paul Lawrence | eabc352 | 2016-11-11 11:33:42 -0800 | [diff] [blame] | 161 | |
Paul Lawrence | eabc352 | 2016-11-11 11:33:42 -0800 | [diff] [blame] | 162 | |
Paul Lawrence | be8a2af | 2017-01-25 15:20:52 -0800 | [diff] [blame] | 163 | # Add the allow calls at the end. If the syscall is not matched, we will |
| 164 | # continue. This allows the user to choose to match further syscalls, and |
| 165 | # also to choose the action when we want to block |
Paul Lawrence | 7ea4090 | 2017-02-14 13:32:23 -0800 | [diff] [blame] | 166 | bpf.append(BPF_ALLOW + ",") |
Paul Lawrence | 65b47c9 | 2017-03-22 08:03:51 -0700 | [diff] [blame] | 167 | |
| 168 | # Add check that we aren't off the bottom of the syscalls |
| 169 | bpf.insert(0, BPF_JGE.format(ranges[0].begin, 0, str(len(bpf))) + ',') |
Paul Lawrence | 7ea4090 | 2017-02-14 13:32:23 -0800 | [diff] [blame] | 170 | return bpf |
Paul Lawrence | eabc352 | 2016-11-11 11:33:42 -0800 | [diff] [blame] | 171 | |
Paul Lawrence | 7ea4090 | 2017-02-14 13:32:23 -0800 | [diff] [blame] | 172 | |
| 173 | def convert_bpf_to_output(bpf, architecture): |
Paul Lawrence | eabc352 | 2016-11-11 11:33:42 -0800 | [diff] [blame] | 174 | header = textwrap.dedent("""\ |
| 175 | // Autogenerated file - edit at your peril!! |
| 176 | |
| 177 | #include <linux/filter.h> |
| 178 | #include <errno.h> |
| 179 | |
Paul Lawrence | dfe8434 | 2017-02-16 09:24:39 -0800 | [diff] [blame] | 180 | #include "seccomp_bpfs.h" |
| 181 | const sock_filter {architecture}_filter[] = {{ |
Paul Lawrence | eabc352 | 2016-11-11 11:33:42 -0800 | [diff] [blame] | 182 | """).format(architecture=architecture) |
| 183 | |
| 184 | footer = textwrap.dedent("""\ |
| 185 | |
| 186 | }}; |
| 187 | |
| 188 | const size_t {architecture}_filter_size = sizeof({architecture}_filter) / sizeof(struct sock_filter); |
| 189 | """).format(architecture=architecture) |
Paul Lawrence | 7ea4090 | 2017-02-14 13:32:23 -0800 | [diff] [blame] | 190 | return header + "\n".join(bpf) + footer |
Paul Lawrence | eabc352 | 2016-11-11 11:33:42 -0800 | [diff] [blame] | 191 | |
Paul Lawrence | eabc352 | 2016-11-11 11:33:42 -0800 | [diff] [blame] | 192 | |
Paul Lawrence | 89fa81f | 2017-02-17 10:22:03 -0800 | [diff] [blame] | 193 | def construct_bpf(syscall_files, architecture, header_dir, extra_switches): |
Paul Lawrence | 7ea4090 | 2017-02-14 13:32:23 -0800 | [diff] [blame] | 194 | names = get_names(syscall_files, architecture) |
Paul Lawrence | 89fa81f | 2017-02-17 10:22:03 -0800 | [diff] [blame] | 195 | syscalls = convert_names_to_NRs(names, header_dir, extra_switches) |
Paul Lawrence | 7ea4090 | 2017-02-14 13:32:23 -0800 | [diff] [blame] | 196 | ranges = convert_NRs_to_ranges(syscalls) |
| 197 | bpf = convert_ranges_to_bpf(ranges) |
| 198 | return convert_bpf_to_output(bpf, architecture) |
| 199 | |
| 200 | |
Paul Lawrence | 3dd3d55 | 2017-04-12 10:02:54 -0700 | [diff] [blame^] | 201 | ANDROID_SYSCALL_FILES = ["SYSCALLS.TXT", |
| 202 | "SECCOMP_WHITELIST.TXT", |
| 203 | "SECCOMP_BLACKLIST.TXT"] |
Paul Lawrence | 7ea4090 | 2017-02-14 13:32:23 -0800 | [diff] [blame] | 204 | |
Paul Lawrence | 89fa81f | 2017-02-17 10:22:03 -0800 | [diff] [blame] | 205 | |
| 206 | POLICY_CONFIGS = [("arm", "kernel/uapi/asm-arm", []), |
| 207 | ("arm64", "kernel/uapi/asm-arm64", []), |
| 208 | ("x86", "kernel/uapi/asm-x86", ["-D__i386__"]), |
| 209 | ("x86_64", "kernel/uapi/asm-x86", []), |
| 210 | ("mips", "kernel/uapi/asm-mips", ["-D_MIPS_SIM=_MIPS_SIM_ABI32"]), |
| 211 | ("mips64", "kernel/uapi/asm-mips", ["-D_MIPS_SIM=_MIPS_SIM_ABI64"])] |
Paul Lawrence | 7ea4090 | 2017-02-14 13:32:23 -0800 | [diff] [blame] | 212 | |
| 213 | |
| 214 | def set_dir(): |
| 215 | # Set working directory for predictable results |
| 216 | os.chdir(os.path.join(os.environ["ANDROID_BUILD_TOP"], "bionic/libc")) |
Paul Lawrence | eabc352 | 2016-11-11 11:33:42 -0800 | [diff] [blame] | 217 | |
| 218 | |
| 219 | def main(): |
Paul Lawrence | 7ea4090 | 2017-02-14 13:32:23 -0800 | [diff] [blame] | 220 | set_dir() |
Paul Lawrence | 89fa81f | 2017-02-17 10:22:03 -0800 | [diff] [blame] | 221 | for arch, header_path, switches in POLICY_CONFIGS: |
Paul Lawrence | 7ea4090 | 2017-02-14 13:32:23 -0800 | [diff] [blame] | 222 | files = [open(filename) for filename in ANDROID_SYSCALL_FILES] |
Paul Lawrence | 89fa81f | 2017-02-17 10:22:03 -0800 | [diff] [blame] | 223 | output = construct_bpf(files, arch, header_path, switches) |
Paul Lawrence | 7ea4090 | 2017-02-14 13:32:23 -0800 | [diff] [blame] | 224 | |
| 225 | # And output policy |
| 226 | existing = "" |
Paul Lawrence | dfe8434 | 2017-02-16 09:24:39 -0800 | [diff] [blame] | 227 | output_path = "seccomp/{}_policy.cpp".format(arch) |
Paul Lawrence | 7ea4090 | 2017-02-14 13:32:23 -0800 | [diff] [blame] | 228 | if os.path.isfile(output_path): |
| 229 | existing = open(output_path).read() |
| 230 | if output == existing: |
| 231 | print "File " + output_path + " not changed." |
| 232 | else: |
| 233 | with open(output_path, "w") as output_file: |
| 234 | output_file.write(output) |
| 235 | print "Generated file " + output_path |
Paul Lawrence | eabc352 | 2016-11-11 11:33:42 -0800 | [diff] [blame] | 236 | |
Paul Lawrence | eabc352 | 2016-11-11 11:33:42 -0800 | [diff] [blame] | 237 | if __name__ == "__main__": |
| 238 | main() |