Paul Lawrence | db929bf | 2016-10-21 13:13:02 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2016 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #include "seccomp.h" |
| 18 | |
| 19 | #include <vector> |
| 20 | |
| 21 | #include <sys/prctl.h> |
| 22 | |
| 23 | #include <linux/unistd.h> |
| 24 | #include <linux/audit.h> |
| 25 | #include <linux/filter.h> |
| 26 | #include <linux/seccomp.h> |
| 27 | |
| 28 | #include "log.h" |
| 29 | #include "seccomp_policy.h" |
| 30 | |
| 31 | #define syscall_nr (offsetof(struct seccomp_data, nr)) |
| 32 | #define arch_nr (offsetof(struct seccomp_data, arch)) |
| 33 | |
| 34 | #if defined __arm__ |
| 35 | #define AUDIT_ARCH_NR AUDIT_ARCH_ARM |
| 36 | #elif defined __aarch64__ |
| 37 | #define AUDIT_ARCH_NR AUDIT_ARCH_AARCH64 |
| 38 | #define AUDIT_ARCH_NR32 AUDIT_ARCH_ARM |
| 39 | #elif defined __i386__ |
| 40 | #define AUDIT_ARCH_NR AUDIT_ARCH_I386 |
| 41 | #elif defined __x86_64__ |
| 42 | #define AUDIT_ARCH_NR AUDIT_ARCH_X86_64 |
| 43 | #define AUDIT_ARCH_NR32 AUDIT_ARCH_I386 |
| 44 | #elif defined __mips64__ |
| 45 | #define AUDIT_ARCH_NR AUDIT_ARCH_MIPS64 |
| 46 | #define AUDIT_ARCH_NR32 AUDIT_ARCH_MIPS |
| 47 | #elif defined __mips__ && !defined __mips64__ |
| 48 | #define AUDIT_ARCH_NR AUDIT_ARCH_MIPS |
| 49 | #else |
| 50 | #error "Could not determine AUDIT_ARCH_NR for this architecture" |
| 51 | #endif |
| 52 | |
| 53 | typedef std::vector<sock_filter> filter; |
| 54 | |
| 55 | // We want to keep the below inline functions for debugging and future |
| 56 | // development even though they are not used currently. |
| 57 | #pragma clang diagnostic push |
| 58 | #pragma clang diagnostic ignored "-Wunused-function" |
| 59 | |
| 60 | static inline void Kill(filter& f) { |
| 61 | f.push_back(BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL)); |
| 62 | } |
| 63 | |
| 64 | static inline void Trap(filter& f) { |
| 65 | f.push_back(BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP)); |
| 66 | } |
| 67 | |
| 68 | static inline void Error(filter& f, __u16 retcode) { |
| 69 | f.push_back(BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO + retcode)); |
| 70 | } |
| 71 | |
| 72 | inline static void Trace(filter& f) { |
| 73 | f.push_back(BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE)); |
| 74 | } |
| 75 | |
| 76 | inline static void Allow(filter& f) { |
| 77 | f.push_back(BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW)); |
| 78 | } |
| 79 | |
| 80 | inline static void AllowSyscall(filter& f, __u32 num) { |
| 81 | f.push_back(BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, num, 0, 1)); |
| 82 | f.push_back(BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW)); |
| 83 | } |
| 84 | |
| 85 | inline static void ExamineSyscall(filter& f) { |
| 86 | f.push_back(BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_nr)); |
| 87 | } |
| 88 | |
| 89 | #ifdef AUDIT_ARCH_NR32 |
| 90 | inline static int SetValidateArchitectureJumpTarget(size_t offset, filter& f) { |
| 91 | auto jump_length = f.size() - offset - 1; |
| 92 | auto u8_jump_length = (__u8) jump_length; |
| 93 | if (u8_jump_length != jump_length) { |
| 94 | LOG(ERROR) << "Can't set jump greater than 255 - actual jump is " << jump_length; |
| 95 | return -1; |
| 96 | } |
| 97 | f[offset] = BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, AUDIT_ARCH_NR32, u8_jump_length, 0); |
| 98 | return 0; |
| 99 | } |
| 100 | #endif |
| 101 | |
| 102 | inline static size_t ValidateArchitectureAndJumpIfNeeded(filter& f) { |
| 103 | f.push_back(BPF_STMT(BPF_LD|BPF_W|BPF_ABS, arch_nr)); |
| 104 | |
| 105 | #ifdef AUDIT_ARCH_NR32 |
| 106 | f.push_back(BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, AUDIT_ARCH_NR, 2, 0)); |
| 107 | f.push_back(BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, AUDIT_ARCH_NR32, 1, 0)); |
| 108 | Kill(f); |
| 109 | return f.size() - 2; |
| 110 | #else |
| 111 | f.push_back(BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, AUDIT_ARCH_NR, 1, 0)); |
| 112 | Kill(f); |
| 113 | return 0; |
| 114 | #endif |
| 115 | } |
| 116 | |
| 117 | #pragma clang diagnostic pop |
| 118 | |
| 119 | static bool install_filter(filter const& f) { |
| 120 | struct sock_fprog prog = { |
| 121 | (unsigned short) f.size(), |
| 122 | (struct sock_filter*) &f[0], |
| 123 | }; |
| 124 | |
| 125 | if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog) < 0) { |
| 126 | PLOG(ERROR) << "SECCOMP: Could not set seccomp filter"; |
| 127 | return false; |
| 128 | } |
| 129 | |
| 130 | LOG(INFO) << "SECCOMP: Global filter installed"; |
| 131 | return true; |
| 132 | } |
| 133 | |
| 134 | bool set_seccomp_filter() { |
| 135 | filter f; |
| 136 | |
| 137 | // Note that for mixed 64/32 bit architectures, ValidateArchitecture inserts a |
| 138 | // jump that must be changed to point to the start of the 32-bit policy |
| 139 | // 32 bit syscalls will not hit the policy between here and the call to SetJump |
| 140 | #ifdef AUDIT_ARCH_NR32 |
| 141 | auto offset_to_32bit_filter = |
| 142 | #endif |
| 143 | ValidateArchitectureAndJumpIfNeeded(f); |
| 144 | |
| 145 | // Native filter |
| 146 | ExamineSyscall(f); |
| 147 | |
| 148 | #ifdef __aarch64__ |
| 149 | // Syscalls needed to boot Android |
| 150 | AllowSyscall(f, __NR_pivot_root); |
| 151 | AllowSyscall(f, __NR_ioprio_get); |
| 152 | AllowSyscall(f, __NR_ioprio_set); |
| 153 | AllowSyscall(f, __NR_gettid); |
| 154 | AllowSyscall(f, __NR_futex); |
| 155 | AllowSyscall(f, __NR_clone); |
| 156 | AllowSyscall(f, __NR_rt_sigreturn); |
| 157 | AllowSyscall(f, __NR_rt_tgsigqueueinfo); |
| 158 | AllowSyscall(f, __NR_add_key); |
| 159 | AllowSyscall(f, __NR_request_key); |
| 160 | AllowSyscall(f, __NR_keyctl); |
| 161 | AllowSyscall(f, __NR_restart_syscall); |
| 162 | AllowSyscall(f, __NR_getrandom); |
| 163 | |
| 164 | // Needed for performance tools |
| 165 | AllowSyscall(f, __NR_perf_event_open); |
| 166 | |
| 167 | // Needed for treble |
| 168 | AllowSyscall(f, __NR_finit_module); |
| 169 | |
| 170 | // Needed for trusty |
| 171 | AllowSyscall(f, __NR_syncfs); |
| 172 | |
Colin Cross | 2e253cb | 2017-01-23 17:06:14 -0800 | [diff] [blame] | 173 | // Needed for strace |
| 174 | AllowSyscall(f, __NR_tkill); // __NR_tkill |
| 175 | |
Colin Cross | 47afc6b | 2017-01-23 16:09:04 -0800 | [diff] [blame] | 176 | // Needed for kernel to restart syscalls |
| 177 | AllowSyscall(f, __NR_restart_syscall); |
| 178 | |
Paul Lawrence | db929bf | 2016-10-21 13:13:02 -0700 | [diff] [blame] | 179 | // arm64-only filter - autogenerated from bionic syscall usage |
| 180 | for (size_t i = 0; i < arm64_filter_size; ++i) |
| 181 | f.push_back(arm64_filter[i]); |
| 182 | #else |
| 183 | // Generic policy |
| 184 | Allow(f); |
| 185 | #endif |
| 186 | |
| 187 | #ifdef AUDIT_ARCH_NR32 |
| 188 | if (SetValidateArchitectureJumpTarget(offset_to_32bit_filter, f) != 0) |
| 189 | return -1; |
| 190 | |
| 191 | // 32-bit filter for 64-bit platforms |
| 192 | ExamineSyscall(f); |
| 193 | |
| 194 | #ifdef __aarch64__ |
| 195 | // Syscalls needed to boot android |
| 196 | AllowSyscall(f, 120); // __NR_clone |
| 197 | AllowSyscall(f, 240); // __NR_futex |
| 198 | AllowSyscall(f, 119); // __NR_sigreturn |
| 199 | AllowSyscall(f, 173); // __NR_rt_sigreturn |
| 200 | AllowSyscall(f, 363); // __NR_rt_tgsigqueueinfo |
| 201 | AllowSyscall(f, 224); // __NR_gettid |
| 202 | |
| 203 | // Syscalls needed to run Chrome |
| 204 | AllowSyscall(f, 383); // __NR_seccomp - needed to start Chrome |
| 205 | AllowSyscall(f, 384); // __NR_getrandom - needed to start Chrome |
| 206 | |
| 207 | // Syscalls needed to run GFXBenchmark |
| 208 | AllowSyscall(f, 190); // __NR_vfork |
| 209 | |
Colin Cross | 2e253cb | 2017-01-23 17:06:14 -0800 | [diff] [blame] | 210 | // Needed for strace |
Paul Lawrence | e4ddaa4 | 2017-01-27 10:23:21 -0800 | [diff] [blame] | 211 | AllowSyscall(f, 238); // __NR_tkill |
Colin Cross | 2e253cb | 2017-01-23 17:06:14 -0800 | [diff] [blame] | 212 | |
Colin Cross | 47afc6b | 2017-01-23 16:09:04 -0800 | [diff] [blame] | 213 | // Needed for kernel to restart syscalls |
Paul Lawrence | e4ddaa4 | 2017-01-27 10:23:21 -0800 | [diff] [blame] | 214 | AllowSyscall(f, 0); // __NR_restart_syscall |
Colin Cross | 47afc6b | 2017-01-23 16:09:04 -0800 | [diff] [blame] | 215 | |
Paul Lawrence | 31b2a9d | 2017-01-26 12:27:03 -0800 | [diff] [blame] | 216 | // Needed for debugging 32-bit Chrome |
Paul Lawrence | e4ddaa4 | 2017-01-27 10:23:21 -0800 | [diff] [blame] | 217 | AllowSyscall(f, 42); // __NR_pipe |
Paul Lawrence | 31b2a9d | 2017-01-26 12:27:03 -0800 | [diff] [blame] | 218 | |
Paul Lawrence | 73f6e44 | 2017-01-27 08:24:40 -0800 | [diff] [blame] | 219 | // b/34732712 |
| 220 | AllowSyscall(f, 364); // __NR_perf_event_open |
| 221 | |
Paul Lawrence | e4ddaa4 | 2017-01-27 10:23:21 -0800 | [diff] [blame] | 222 | // b/34651972 |
| 223 | AllowSyscall(f, 33); // __NR_access |
| 224 | AllowSyscall(f, 195); // __NR_stat64 |
| 225 | |
Paul Lawrence | ae3f906 | 2017-01-30 15:43:53 -0800 | [diff] [blame] | 226 | // b/34813887 |
| 227 | AllowSyscall(f, 5); // __NR_open |
| 228 | AllowSyscall(f, 141); // __NR_getdents |
| 229 | AllowSyscall(f, 217); // __NR_getdents64 |
| 230 | |
| 231 | // b/34719286 |
| 232 | AllowSyscall(f, 351); // __NR_eventfd |
| 233 | |
Paul Lawrence | 8afdd2a | 2017-01-31 14:49:50 -0800 | [diff] [blame] | 234 | // b/34817266 |
| 235 | AllowSyscall(f, 252); // __NR_epoll_wait |
| 236 | |
Alex Shlyapnikov | 2e71ceb | 2017-02-01 18:03:08 -0800 | [diff] [blame] | 237 | // Needed by sanitizers (b/34606909) |
| 238 | // 5 (__NR_open) and 195 (__NR_stat64) are also required, but they are |
| 239 | // already allowed. |
| 240 | AllowSyscall(f, 85); // __NR_readlink |
| 241 | |
Paul Lawrence | db929bf | 2016-10-21 13:13:02 -0700 | [diff] [blame] | 242 | // arm32-on-arm64 only filter - autogenerated from bionic syscall usage |
| 243 | for (size_t i = 0; i < arm_filter_size; ++i) |
| 244 | f.push_back(arm_filter[i]); |
| 245 | #else |
| 246 | // Generic policy |
| 247 | Allow(f); |
| 248 | #endif |
| 249 | #endif |
| 250 | return install_filter(f); |
| 251 | } |