blob: 9e5710e60024fa2f3437553929852fd8d4427ed8 [file] [log] [blame]
Lorenzo Colittieb92f482019-01-04 14:59:11 +09001/*
2 * Copyright 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 * main.c - main function
17 */
18
Lorenzo Colitti27da0ad2020-06-01 12:15:20 +090019#include <arpa/inet.h>
Lorenzo Colittieb92f482019-01-04 14:59:11 +090020#include <errno.h>
Maciej Żenczykowski5ae193a2025-03-14 12:44:50 -070021#include <linux/audit.h>
22#include <linux/filter.h>
23#include <linux/seccomp.h>
24#include <linux/unistd.h>
Lorenzo Colittieb92f482019-01-04 14:59:11 +090025#include <netinet/in.h>
Maciej Żenczykowski88cb78e2023-01-30 22:30:55 +000026#include <stdbool.h>
Lorenzo Colittieb92f482019-01-04 14:59:11 +090027#include <stdint.h>
28#include <stdlib.h>
29#include <string.h>
Maciej Żenczykowski88cb78e2023-01-30 22:30:55 +000030#include <sys/personality.h>
Maciej Żenczykowski5ae193a2025-03-14 12:44:50 -070031#include <sys/prctl.h>
Maciej Żenczykowski88cb78e2023-01-30 22:30:55 +000032#include <sys/utsname.h>
Lorenzo Colittieb92f482019-01-04 14:59:11 +090033#include <unistd.h>
34
Lorenzo Colittieb92f482019-01-04 14:59:11 +090035#include "clatd.h"
36#include "common.h"
37#include "config.h"
38#include "logging.h"
Lorenzo Colittieb92f482019-01-04 14:59:11 +090039
40#define DEVICEPREFIX "v4-"
41
Maciej Żenczykowskib984f312025-03-14 17:32:48 -070042/* function: handle_sigterm
Maciej Żenczykowski8ab7e132021-02-03 17:15:41 -080043 * signal handler: stop the event loop
44 */
Maciej Żenczykowskib984f312025-03-14 17:32:48 -070045static void handle_sigterm(__attribute__((unused)) int unused) { sigterm = 1; };
Maciej Żenczykowski8ab7e132021-02-03 17:15:41 -080046
Lorenzo Colittieb92f482019-01-04 14:59:11 +090047/* function: print_help
48 * in case the user is running this on the command line
49 */
50void print_help() {
51 printf("android-clat arguments:\n");
52 printf("-i [uplink interface]\n");
53 printf("-p [plat prefix]\n");
Lorenzo Colittif0fac862019-01-11 18:10:11 +090054 printf("-4 [IPv4 address]\n");
55 printf("-6 [IPv6 address]\n");
Maciej Żenczykowski716518d2019-04-08 17:46:48 -070056 printf("-t [tun file descriptor number]\n");
Hungming Chen06367f32021-11-24 17:22:52 +080057 printf("-r [read socket descriptor number]\n");
Nucca Chen0714a182021-12-13 09:24:38 +000058 printf("-w [write socket descriptor number]\n");
Lorenzo Colittieb92f482019-01-04 14:59:11 +090059}
60
Maciej Żenczykowski5ae193a2025-03-14 12:44:50 -070061// Load the architecture identifier (AUDIT_ARCH_* constant)
62#define BPF_SECCOMP_LOAD_AUDIT_ARCH \
63 BPF_STMT(BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, arch))
64
65// Load the system call number
66#define BPF_SECCOMP_LOAD_SYSCALL_NR \
67 BPF_STMT(BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, nr))
68
69#if __BYTE_ORDER == __LITTLE_ENDIAN
70// Load the system call argument n, where n is [0..5]
71#define BPF_SECCOMP_LOAD_SYSCALL_ARG_LO32(n) \
72 BPF_STMT(BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[n]))
73#define BPF_SECCOMP_LOAD_SYSCALL_ARG_HI32(n) \
74 BPF_STMT(BPF_LD | BPF_W | BPF_ABS, offsetof(struct seccomp_data, args[n]) + 4)
75#else
76#error "Not a little endian architecture?"
77#endif
78
79// Allow the system call
80#define BPF_SECCOMP_ALLOW BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW)
81
82// Allow (but 'audit' log) the system call
83#define BPF_SECCOMP_LOG BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_LOG)
84
85// Reject the system call (kill thread)
86#define BPF_SECCOMP_KILL BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL)
87
88// Note arguments to BPF_JUMP(opcode, operand, true_offset, false_offset)
89
90// If not equal, jump over count instructions
91#define BPF_JUMP_IF_NOT_EQUAL(v, count) \
92 BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, (v), 0, (count))
93
94// If equal, jump over count instructions
95#define BPF_JUMP_IF_EQUAL(v, count) \
96 BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, (v), (count), 0)
97
98// *TWO* instructions: compare and if not equal jump over the allow statement
99#define BPF2_SECCOMP_ALLOW_IF_EQUAL(v) \
100 BPF_JUMP_IF_NOT_EQUAL((v), 1), \
101 BPF_SECCOMP_ALLOW
102
103// *TWO* instructions: compare and if not equal jump over the log statement
104#define BPF2_SECCOMP_LOG_IF_EQUAL(v) \
105 BPF_JUMP_IF_NOT_EQUAL((v), 1), \
106 BPF_SECCOMP_LOG
107
108// *TWO* instructions: compare and if equal jump over the kill statement
109#define BPF2_SECCOMP_KILL_IF_NOT_EQUAL(v) \
110 BPF_JUMP_IF_EQUAL((v), 1), \
111 BPF_SECCOMP_KILL
112
113// Android only supports the following 5 little endian architectures
114#if defined(__aarch64__) && defined(__LP64__)
115 #define MY_AUDIT_ARCH AUDIT_ARCH_AARCH64
116#elif defined(__arm__) && defined(__ILP32__)
117 #define MY_AUDIT_ARCH AUDIT_ARCH_ARM
118#elif defined(__i386__) && defined(__ILP32__)
119 #define MY_AUDIT_ARCH AUDIT_ARCH_I386
120#elif defined(__x86_64__) && defined(__LP64__)
121 #define MY_AUDIT_ARCH AUDIT_ARCH_X86_64
122#elif defined(__riscv) && defined(__LP64__)
123 #define MY_AUDIT_ARCH AUDIT_ARCH_RISCV64
124#else
125 #error "Unknown AUDIT_ARCH_* architecture."
126#endif
127
128void enable_seccomp(void) {
129 static const struct sock_filter filter[] = {
130 BPF_SECCOMP_LOAD_AUDIT_ARCH,
131 BPF2_SECCOMP_KILL_IF_NOT_EQUAL(MY_AUDIT_ARCH),
132
133 BPF_SECCOMP_LOAD_SYSCALL_NR, // aarch64
134
135 // main event loop:
136 // ppoll ( read sendmsg | recvmsg writev )
137 BPF2_SECCOMP_ALLOW_IF_EQUAL(__NR_ppoll), // 73
138 BPF2_SECCOMP_ALLOW_IF_EQUAL(__NR_read), // 63
139 BPF2_SECCOMP_ALLOW_IF_EQUAL(__NR_sendmsg), // 211
140 BPF2_SECCOMP_ALLOW_IF_EQUAL(__NR_recvmsg), // 212
141 BPF2_SECCOMP_ALLOW_IF_EQUAL(__NR_writev), // 66
142
143 // logging: getuid writev
144 BPF2_SECCOMP_ALLOW_IF_EQUAL(__NR_getuid), // 174
145
146 // inbound signal (SIGTERM) processing
147 BPF2_SECCOMP_ALLOW_IF_EQUAL(__NR_rt_sigreturn), // 139
148
149 // sleep(n)
150 BPF2_SECCOMP_ALLOW_IF_EQUAL(__NR_nanosleep), // 101
151
152 // _exit(0)
153 BPF2_SECCOMP_ALLOW_IF_EQUAL(__NR_exit_group), // 94
154
155#if defined(__aarch64__)
156 // Pixels are aarch64 - if we break clatd functionality on them,
157 // we *will* notice on GoogleGuest WiFi network (which is ipv6 only)
158 BPF_SECCOMP_KILL,
159#else
160 // All other architectures: generate audit lines visible in dmesg and logcat
161 BPF_SECCOMP_LOG,
162#endif
163 };
164 static const struct sock_fprog prog = {
165 .len = (unsigned short)ARRAY_SIZE(filter),
166 .filter = (struct sock_filter *)filter,
167 };
168
169 // https://man7.org/linux/man-pages/man2/PR_SET_NO_NEW_PRIVS.2const.html
170 // required to allow non-privileged seccomp filter installation
171 int rv = prctl(PR_SET_NO_NEW_PRIVS, 1L, 0L, 0L, 0L);
172 if (rv) {
173 logmsg(ANDROID_LOG_FATAL, "prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) = %d [%d]", rv, errno);
174 exit(1);
175 }
176
177 // https://man7.org/linux/man-pages/man2/PR_SET_SECCOMP.2const.html
178 // but see also https://man7.org/linux/man-pages/man2/seccomp.2.html
179 rv = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0L, 0L);
180 if (rv) {
181 logmsg(ANDROID_LOG_FATAL, "prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0) = %d [%d]", rv, errno);
182 exit(1);
183 }
184}
185
Lorenzo Colittieb92f482019-01-04 14:59:11 +0900186/* function: main
187 * allocate and setup the tun device, then run the event loop
188 */
189int main(int argc, char **argv) {
190 struct tun_data tunnel;
191 int opt;
Hungming Chen9e718f42022-01-04 22:48:54 +0800192 char *uplink_interface = NULL, *plat_prefix = NULL;
Hungming Chen06367f32021-11-24 17:22:52 +0800193 char *v4_addr = NULL, *v6_addr = NULL, *tunfd_str = NULL, *read_sock_str = NULL,
194 *write_sock_str = NULL;
Lorenzo Colittieb92f482019-01-04 14:59:11 +0900195 unsigned len;
196
Maciej Żenczykowski19199ca2025-03-14 12:08:05 -0700197 // Clatd binary is setuid/gid CLAT, thus when we reach here we have:
198 // $ adb shell ps | grep clat
199 // [pid] [ppid]
200 // clat 7650 1393 10785364 2612 do_sys_poll 0 S clatd-wlan0
201 // $ adb shell cat /proc/7650/status | egrep -i '^(Uid:|Gid:|Groups:)'
202 // [real][effective][saved][filesystem]
203 // [uid] [euid] [suid] [fsuid]
204 // Uid: 1000 1029 1029 1029
205 // [gid] [egid] [sgid] [fsgid]
206 // Gid: 1000 1029 1029 1029
207 // Groups: 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1018 1021 1023 1024 1032 1065 3001 3002 3003 3005 3006 3007 3009 3010 3011 3012
208 // This mismatch between uid & euid appears to cause periodic (every 5 minutes):
209 // objhash pid ppid uid
210 // W ActivityManager: Stale PhantomProcessRecord {xxxxxxx 7650:1393:clatd-wlan0/1000}, removing
211 // This is due to:
212 // $ adbz shell ls -ld /proc/7650
213 // dr-xr-xr-x 9 clat clat 0 2025-03-14 11:37 /proc/7650
214 // which is used by
215 // //frameworks/base/core/java/com/android/internal/os/ProcessCpuTracker.java
216 // which thus returns the uid 'clat' vs
217 // //frameworks/base/core/java/android/os/Process.java
218 // getUidForPid() which grabs *real* 'uid' from /proc/<pid>/status and is used in:
219 // //frameworks/base/services/core/java/com/android/server/am/PhantomProcessList.java
220 // (perhaps this should grab euid instead? unclear)
221 //
222 // However, we want to drop as many privs as possible, hence:
223 gid_t egid = getegid(); // documented to never fail, hence should return AID_CLAT == 1029
224 uid_t euid = geteuid(); // (ditto)
225 setresgid(egid, egid, egid); // ignore any failure
226 setresuid(euid, euid, euid); // ignore any failure
227 // ideally we'd somehow drop supplementary groups too...
228 // but for historical reasons that actually requires CAP_SETGID which we don't have
229 // (see man 2 setgroups)
230 //
231 // Now we (should) have:
232 // $ adb shell ps | grep clat
233 // clat 5370 1479 10785364 2528 do_sys_poll 0 S clatd-wlan0
234 // # adb shell cat /proc/5370/status | egrep -i '^(Uid:|Gid:|Groups:)'
235 // Uid: 1029 1029 1029 1029
236 // Gid: 1029 1029 1029 1029
237 // Groups: 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1018 1021 1023 1024 1032 1065 3001 3002 3003 3005 3006 3007 3009 3010 3011 3012
238
Hungming Chen9e718f42022-01-04 22:48:54 +0800239 while ((opt = getopt(argc, argv, "i:p:4:6:t:r:w:h")) != -1) {
Lorenzo Colittieb92f482019-01-04 14:59:11 +0900240 switch (opt) {
241 case 'i':
242 uplink_interface = optarg;
243 break;
244 case 'p':
245 plat_prefix = optarg;
246 break;
Lorenzo Colittif0fac862019-01-11 18:10:11 +0900247 case '4':
248 v4_addr = optarg;
249 break;
250 case '6':
251 v6_addr = optarg;
252 break;
Maciej Żenczykowski716518d2019-04-08 17:46:48 -0700253 case 't':
254 tunfd_str = optarg;
255 break;
Hungming Chen06367f32021-11-24 17:22:52 +0800256 case 'r':
257 read_sock_str = optarg;
258 break;
Nucca Chen0714a182021-12-13 09:24:38 +0000259 case 'w':
260 write_sock_str = optarg;
261 break;
Lorenzo Colittieb92f482019-01-04 14:59:11 +0900262 case 'h':
263 print_help();
264 exit(0);
265 default:
266 logmsg(ANDROID_LOG_FATAL, "Unknown option -%c. Exiting.", (char)optopt);
267 exit(1);
268 }
269 }
270
271 if (uplink_interface == NULL) {
272 logmsg(ANDROID_LOG_FATAL, "clatd called without an interface");
273 exit(1);
274 }
275
Maciej Żenczykowski716518d2019-04-08 17:46:48 -0700276 if (tunfd_str != NULL && !parse_int(tunfd_str, &tunnel.fd4)) {
277 logmsg(ANDROID_LOG_FATAL, "invalid tunfd %s", tunfd_str);
278 exit(1);
279 }
280 if (!tunnel.fd4) {
281 logmsg(ANDROID_LOG_FATAL, "no tunfd specified on commandline.");
282 exit(1);
283 }
284
Hungming Chen06367f32021-11-24 17:22:52 +0800285 if (read_sock_str != NULL && !parse_int(read_sock_str, &tunnel.read_fd6)) {
Hungming Chen9e718f42022-01-04 22:48:54 +0800286 logmsg(ANDROID_LOG_FATAL, "invalid read socket %s", read_sock_str);
Hungming Chen06367f32021-11-24 17:22:52 +0800287 exit(1);
288 }
289 if (!tunnel.read_fd6) {
290 logmsg(ANDROID_LOG_FATAL, "no read_fd6 specified on commandline.");
291 exit(1);
292 }
293
Nucca Chen0714a182021-12-13 09:24:38 +0000294 if (write_sock_str != NULL && !parse_int(write_sock_str, &tunnel.write_fd6)) {
Hungming Chen9e718f42022-01-04 22:48:54 +0800295 logmsg(ANDROID_LOG_FATAL, "invalid write socket %s", write_sock_str);
Nucca Chen0714a182021-12-13 09:24:38 +0000296 exit(1);
297 }
298 if (!tunnel.write_fd6) {
299 logmsg(ANDROID_LOG_FATAL, "no write_fd6 specified on commandline.");
300 exit(1);
301 }
302
Lorenzo Colittieb92f482019-01-04 14:59:11 +0900303 len = snprintf(tunnel.device4, sizeof(tunnel.device4), "%s%s", DEVICEPREFIX, uplink_interface);
304 if (len >= sizeof(tunnel.device4)) {
305 logmsg(ANDROID_LOG_FATAL, "interface name too long '%s'", tunnel.device4);
306 exit(1);
307 }
308
Hungming Chen5c112132021-11-25 09:40:17 +0800309 Global_Clatd_Config.native_ipv6_interface = uplink_interface;
310 if (!plat_prefix || inet_pton(AF_INET6, plat_prefix, &Global_Clatd_Config.plat_subnet) <= 0) {
311 logmsg(ANDROID_LOG_FATAL, "invalid IPv6 address specified for plat prefix: %s", plat_prefix);
312 exit(1);
313 }
314
Hungming Chen5dafb0e2021-11-24 20:19:43 +0800315 if (!v4_addr || !inet_pton(AF_INET, v4_addr, &Global_Clatd_Config.ipv4_local_subnet.s_addr)) {
316 logmsg(ANDROID_LOG_FATAL, "Invalid IPv4 address %s", v4_addr);
317 exit(1);
318 }
319
Hungming Chen5c112132021-11-25 09:40:17 +0800320 if (!v6_addr || !inet_pton(AF_INET6, v6_addr, &Global_Clatd_Config.ipv6_local_subnet)) {
321 logmsg(ANDROID_LOG_FATAL, "Invalid source address %s", v6_addr);
322 exit(1);
323 }
324
Maciej Żenczykowski1961f672025-03-14 13:42:33 -0700325 logmsg(ANDROID_LOG_INFO, "Starting clat version " CLATD_VERSION " on %s plat=%s v4=%s v6=%s",
Hungming Chen9e718f42022-01-04 22:48:54 +0800326 uplink_interface, plat_prefix ? plat_prefix : "(none)", v4_addr ? v4_addr : "(none)",
Lorenzo Colitti27da0ad2020-06-01 12:15:20 +0900327 v6_addr ? v6_addr : "(none)");
Lorenzo Colittieb92f482019-01-04 14:59:11 +0900328
Maciej Żenczykowski88cb78e2023-01-30 22:30:55 +0000329 {
330 // Compile time detection of 32 vs 64-bit build. (note: C does not have 'constexpr')
331 // Avoid use of preprocessor macros to get compile time syntax checking even on 64-bit.
332 const int user_bits = sizeof(void*) * 8;
333 const bool user32 = (user_bits == 32);
334
335 // Note that on 64-bit all this personality related code simply compile optimizes out.
336 // 32-bit: fetch current personality (see 'man personality': 0xFFFFFFFF means retrieve only)
337 // On Linux fetching personality cannot fail.
338 const int prev_personality = user32 ? personality(0xFFFFFFFFuL) : PER_LINUX;
339 // 32-bit: attempt to get rid of kernel spoofing of 'uts.machine' architecture,
340 // In theory this cannot fail, as PER_LINUX should always be supported.
341 if (user32) (void)personality((prev_personality & ~PER_MASK) | PER_LINUX);
342 // 64-bit: this will compile time evaluate to false.
343 const bool was_linux32 = (prev_personality & PER_MASK) == PER_LINUX32;
344
345 struct utsname uts = {};
346 if (uname(&uts)) exit(1); // only possible error is EFAULT, but 'uts' is on stack
347
348 // sysname is likely 'Linux', release is 'kver', machine is kernel's *true* architecture
349 logmsg(ANDROID_LOG_INFO, "%d-bit userspace on %s kernel %s for %s%s.", user_bits,
350 uts.sysname, uts.release, uts.machine, was_linux32 ? " (was spoofed)" : "");
351
352 // 32-bit: try to return to the 'default' personality
353 // In theory this cannot fail, because it was already previously in use.
354 if (user32) (void)personality(prev_personality);
355 }
356
Lorenzo Colittieb92f482019-01-04 14:59:11 +0900357 // Loop until someone sends us a signal or brings down the tun interface.
Maciej Żenczykowskib984f312025-03-14 17:32:48 -0700358 if (signal(SIGTERM, handle_sigterm) == SIG_ERR) {
Lorenzo Colittieb92f482019-01-04 14:59:11 +0900359 logmsg(ANDROID_LOG_FATAL, "sigterm handler failed: %s", strerror(errno));
360 exit(1);
361 }
362
Maciej Żenczykowski2f96f2c2025-03-14 15:41:30 -0700363 // Apparently some network gear will refuse to perform NS for IPs that aren't DAD'ed,
364 // this would then result in an ipv6-only network with working native ipv6, working
365 // IPv4 via DNS64, but non-functioning IPv4 via CLAT (ie. IPv4 literals + IPv4 only apps).
366 // The kernel itself doesn't do DAD for anycast ips (but does handle IPV6 MLD and handle ND).
367 // So we'll spoof dad here, and yeah, we really should check for a response and in
368 // case of failure pick a different IP. Seeing as 48-bits of the IP are utterly random
369 // (with the other 16 chosen to guarantee checksum neutrality) this seems like a remote
370 // concern...
371 // TODO: actually perform true DAD
372 send_dad(tunnel.write_fd6, &Global_Clatd_Config.ipv6_local_subnet);
373
Maciej Żenczykowski5ae193a2025-03-14 12:44:50 -0700374 enable_seccomp(); // WARNING: from this point forward very limited system calls available.
375
Lorenzo Colittieb92f482019-01-04 14:59:11 +0900376 event_loop(&tunnel);
377
Maciej Żenczykowskib984f312025-03-14 17:32:48 -0700378 if (sigterm) {
Maciej Żenczykowski0372f292025-03-14 19:21:00 -0700379 logmsg(ANDROID_LOG_INFO, "Shutting down clatd on %s, already received SIGTERM", uplink_interface);
Maciej Żenczykowskib984f312025-03-14 17:32:48 -0700380 } else {
381 // this implies running == false, ie. we received EOF or ENETDOWN error.
Maciej Żenczykowski0372f292025-03-14 19:21:00 -0700382 logmsg(ANDROID_LOG_INFO, "Shutting down clatd on %s, waiting for SIGTERM", uplink_interface);
Maciej Żenczykowski9c05f752023-04-13 09:15:01 +0000383 // let's give higher level java code 15 seconds to kill us,
384 // but eventually terminate anyway, in case system server forgets about us...
Maciej Żenczykowskib984f312025-03-14 17:32:48 -0700385 // sleep() should be interrupted by SIGTERM, the handler should set 'sigterm'
Maciej Żenczykowski9c05f752023-04-13 09:15:01 +0000386 sleep(15);
387 logmsg(ANDROID_LOG_INFO, "Clatd on %s %s SIGTERM", uplink_interface,
Maciej Żenczykowskib984f312025-03-14 17:32:48 -0700388 sigterm ? "received" : "timed out waiting for");
Maciej Żenczykowski05b05412021-04-01 05:06:14 -0700389 }
Maciej Żenczykowski5ae193a2025-03-14 12:44:50 -0700390
391 // Using _exit() here avoids 4 mprotect() syscalls triggered via 'exit(0)' or 'return 0'
392 _exit(0);
Lorenzo Colittieb92f482019-01-04 14:59:11 +0900393}