blob: 9b1d987a04b6e4cb910485b66227f9355519e983 [file] [log] [blame]
Daniel Drowna45056e2012-03-23 10:42:54 -05001/*
2 * Copyright 2012 Daniel Drown
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 * clatd.c - tun interface setup and main event loop
17 */
junyulaic4e591a2018-11-26 22:36:10 +090018#include <arpa/inet.h>
19#include <errno.h>
20#include <fcntl.h>
Daniel Drowna45056e2012-03-23 10:42:54 -050021#include <poll.h>
22#include <signal.h>
Maciej Żenczykowski1f395ef2023-02-16 05:11:54 +000023#include <stdbool.h>
Daniel Drowna45056e2012-03-23 10:42:54 -050024#include <stdio.h>
junyulaic4e591a2018-11-26 22:36:10 +090025#include <stdlib.h>
26#include <string.h>
Daniel Drowna45056e2012-03-23 10:42:54 -050027#include <sys/ioctl.h>
Elliott Hughes3afe9ae2014-07-18 17:25:26 -070028#include <sys/prctl.h>
Daniel Drowna45056e2012-03-23 10:42:54 -050029#include <sys/stat.h>
junyulaic4e591a2018-11-26 22:36:10 +090030#include <sys/types.h>
31#include <time.h>
Daniel Drowna45056e2012-03-23 10:42:54 -050032#include <unistd.h>
Daniel Drowna45056e2012-03-23 10:42:54 -050033
Lorenzo Colittif08c5aa2014-06-03 12:56:38 +090034#include <linux/filter.h>
Daniel Drowna45056e2012-03-23 10:42:54 -050035#include <linux/if.h>
Daniel Drowna45056e2012-03-23 10:42:54 -050036#include <linux/if_ether.h>
Lorenzo Colittif08c5aa2014-06-03 12:56:38 +090037#include <linux/if_packet.h>
junyulaic4e591a2018-11-26 22:36:10 +090038#include <linux/if_tun.h>
Maciej Żenczykowski8eb48882023-03-14 03:55:22 +000039#include <linux/virtio_net.h>
Lorenzo Colittif08c5aa2014-06-03 12:56:38 +090040#include <net/if.h>
junyulaic4e591a2018-11-26 22:36:10 +090041#include <sys/uio.h>
Daniel Drowna45056e2012-03-23 10:42:54 -050042
Daniel Drowna45056e2012-03-23 10:42:54 -050043#include "clatd.h"
Maciej Żenczykowskif6ec94e2022-07-12 16:17:33 -070044#include "checksum.h"
Daniel Drowna45056e2012-03-23 10:42:54 -050045#include "config.h"
Daniel Drowna45056e2012-03-23 10:42:54 -050046#include "dump.h"
junyulaic4e591a2018-11-26 22:36:10 +090047#include "logging.h"
junyulaic4e591a2018-11-26 22:36:10 +090048#include "translate.h"
Daniel Drowna45056e2012-03-23 10:42:54 -050049
Maciej Żenczykowski5ce6cda2020-06-02 14:39:33 -070050struct clat_config Global_Clatd_Config;
51
Maciej Żenczykowskib984f312025-03-14 17:32:48 -070052volatile sig_atomic_t sigterm = 0;
53bool running = true;
Daniel Drowna45056e2012-03-23 10:42:54 -050054
Maciej Żenczykowski206b2382023-03-14 03:50:28 +000055// reads IPv6 packet from AF_PACKET socket, translates to IPv4, writes to tun
Maciej Żenczykowskie6e0c002023-01-18 23:57:35 +000056void process_packet_6_to_4(struct tun_data *tunnel) {
Maciej Żenczykowski4e764172023-03-13 21:55:54 +000057 // ethernet header is 14 bytes, plus 4 for a normal VLAN tag or 8 for Q-in-Q
58 // we don't really support vlans (or especially Q-in-Q)...
59 // but a few bytes of extra buffer space doesn't hurt...
Maciej Żenczykowski206b2382023-03-14 03:50:28 +000060 struct {
Maciej Żenczykowski8eb48882023-03-14 03:55:22 +000061 struct virtio_net_hdr vnet;
Maciej Żenczykowski206b2382023-03-14 03:50:28 +000062 uint8_t payload[22 + MAXMTU];
63 char pad; // +1 to make packet truncation obvious
64 } buf;
Maciej Żenczykowskife7a1672023-01-17 21:28:22 +000065 struct iovec iov = {
Maciej Żenczykowski206b2382023-03-14 03:50:28 +000066 .iov_base = &buf,
Maciej Żenczykowskif3eeff92023-03-13 21:50:01 +000067 .iov_len = sizeof(buf),
Maciej Żenczykowskife7a1672023-01-17 21:28:22 +000068 };
Maciej Żenczykowski4e764172023-03-13 21:55:54 +000069 char cmsg_buf[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
Maciej Żenczykowskife7a1672023-01-17 21:28:22 +000070 struct msghdr msgh = {
71 .msg_iov = &iov,
72 .msg_iovlen = 1,
73 .msg_control = cmsg_buf,
74 .msg_controllen = sizeof(cmsg_buf),
75 };
76 ssize_t readlen = recvmsg(tunnel->read_fd6, &msgh, /*flags*/ 0);
Daniel Drowna45056e2012-03-23 10:42:54 -050077
junyulaic4e591a2018-11-26 22:36:10 +090078 if (readlen < 0) {
Lorenzo Colitti49454812015-01-31 19:18:47 +090079 if (errno != EAGAIN) {
Maciej Żenczykowskie6e0c002023-01-18 23:57:35 +000080 logmsg(ANDROID_LOG_WARN, "%s: read error: %s", __func__, strerror(errno));
Lorenzo Colitti49454812015-01-31 19:18:47 +090081 }
Maciej Żenczykowskib984f312025-03-14 17:32:48 -070082 if (errno == ENETDOWN) running = false;
Daniel Drowna45056e2012-03-23 10:42:54 -050083 return;
junyulaic4e591a2018-11-26 22:36:10 +090084 } else if (readlen == 0) {
Maciej Żenczykowskie6e0c002023-01-18 23:57:35 +000085 logmsg(ANDROID_LOG_WARN, "%s: packet socket removed?", __func__);
Maciej Żenczykowskib984f312025-03-14 17:32:48 -070086 running = false;
Lorenzo Colittif08c5aa2014-06-03 12:56:38 +090087 return;
Maciej Żenczykowskif3eeff92023-03-13 21:50:01 +000088 } else if (readlen >= sizeof(buf)) {
Maciej Żenczykowskie6e0c002023-01-18 23:57:35 +000089 logmsg(ANDROID_LOG_WARN, "%s: read truncation - ignoring pkt", __func__);
90 return;
Lorenzo Colittif08c5aa2014-06-03 12:56:38 +090091 }
92
Maciej Żenczykowski8fe16b02023-06-01 08:58:43 +000093 bool ok = false;
Maciej Żenczykowskife7a1672023-01-17 21:28:22 +000094 __u32 tp_status = 0;
Maciej Żenczykowski4e764172023-03-13 21:55:54 +000095 __u16 tp_net = 0;
Maciej Żenczykowskife7a1672023-01-17 21:28:22 +000096
97 for (struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL; cmsg = CMSG_NXTHDR(&msgh,cmsg)) {
98 if (cmsg->cmsg_level == SOL_PACKET && cmsg->cmsg_type == PACKET_AUXDATA) {
99 struct tpacket_auxdata *aux = (struct tpacket_auxdata *)CMSG_DATA(cmsg);
Maciej Żenczykowski8fe16b02023-06-01 08:58:43 +0000100 ok = true;
Maciej Żenczykowskife7a1672023-01-17 21:28:22 +0000101 tp_status = aux->tp_status;
Maciej Żenczykowski4e764172023-03-13 21:55:54 +0000102 tp_net = aux->tp_net;
Maciej Żenczykowskife7a1672023-01-17 21:28:22 +0000103 break;
104 }
105 }
106
Maciej Żenczykowski8fe16b02023-06-01 08:58:43 +0000107 if (!ok) {
108 // theoretically this should not happen...
109 static bool logged = false;
110 if (!logged) {
111 logmsg(ANDROID_LOG_ERROR, "%s: failed to fetch tpacket_auxdata cmsg", __func__);
112 logged = true;
113 }
114 }
115
Maciej Żenczykowskia4df1012023-03-16 02:42:37 +0000116 const int payload_offset = offsetof(typeof(buf), payload);
117 if (readlen < payload_offset + tp_net) {
118 logmsg(ANDROID_LOG_WARN, "%s: ignoring %zd byte pkt shorter than %d+%u L2 header",
119 __func__, readlen, payload_offset, tp_net);
Maciej Żenczykowski4e764172023-03-13 21:55:54 +0000120 return;
121 }
122
Maciej Żenczykowskia4df1012023-03-16 02:42:37 +0000123 const int pkt_len = readlen - payload_offset;
Maciej Żenczykowskiaae2fb92023-03-15 00:51:54 +0000124
Maciej Żenczykowskife7a1672023-01-17 21:28:22 +0000125 // This will detect a skb->ip_summed == CHECKSUM_PARTIAL packet with non-final L4 checksum
126 if (tp_status & TP_STATUS_CSUMNOTREADY) {
Maciej Żenczykowski1f395ef2023-02-16 05:11:54 +0000127 static bool logged = false;
128 if (!logged) {
Maciej Żenczykowskiaae2fb92023-03-15 00:51:54 +0000129 logmsg(ANDROID_LOG_WARN, "%s: L4 checksum calculation required", __func__);
Maciej Żenczykowski1f395ef2023-02-16 05:11:54 +0000130 logged = true;
131 }
Maciej Żenczykowskiaae2fb92023-03-15 00:51:54 +0000132
133 // These are non-negative by virtue of csum_start/offset being u16
134 const int cs_start = buf.vnet.csum_start;
135 const int cs_offset = cs_start + buf.vnet.csum_offset;
136 if (cs_start > pkt_len) {
137 logmsg(ANDROID_LOG_ERROR, "%s: out of range - checksum start %d > %d",
138 __func__, cs_start, pkt_len);
139 } else if (cs_offset + 1 >= pkt_len) {
140 logmsg(ANDROID_LOG_ERROR, "%s: out of range - checksum offset %d + 1 >= %d",
141 __func__, cs_offset, pkt_len);
142 } else {
143 uint16_t csum = ip_checksum(buf.payload + cs_start, pkt_len - cs_start);
144 if (!csum) csum = 0xFFFF; // required fixup for UDP, TCP must live with it
145 buf.payload[cs_offset] = csum & 0xFF;
146 buf.payload[cs_offset + 1] = csum >> 8;
147 }
Maciej Żenczykowskife7a1672023-01-17 21:28:22 +0000148 }
149
Maciej Żenczykowskiaae2fb92023-03-15 00:51:54 +0000150 translate_packet(tunnel->fd4, 0 /* to_ipv6 */, buf.payload + tp_net, pkt_len - tp_net);
Maciej Żenczykowskie6e0c002023-01-18 23:57:35 +0000151}
152
153// reads TUN_PI + L3 IPv4 packet from tun, translates to IPv6, writes to AF_INET6/RAW socket
154void process_packet_4_to_6(struct tun_data *tunnel) {
Maciej Żenczykowskicfa100f2023-03-14 03:04:50 +0000155 struct {
156 struct tun_pi pi;
157 uint8_t payload[MAXMTU];
158 char pad; // +1 byte to make packet truncation obvious
159 } buf;
160 ssize_t readlen = read(tunnel->fd4, &buf, sizeof(buf));
Maciej Żenczykowskie6e0c002023-01-18 23:57:35 +0000161
162 if (readlen < 0) {
163 if (errno != EAGAIN) {
164 logmsg(ANDROID_LOG_WARN, "%s: read error: %s", __func__, strerror(errno));
165 }
Maciej Żenczykowskib984f312025-03-14 17:32:48 -0700166 if (errno == ENETDOWN) running = false; // not sure if this can happen
Maciej Żenczykowskie6e0c002023-01-18 23:57:35 +0000167 return;
168 } else if (readlen == 0) {
169 logmsg(ANDROID_LOG_WARN, "%s: tun interface removed", __func__);
Maciej Żenczykowskib984f312025-03-14 17:32:48 -0700170 running = false;
Maciej Żenczykowskie6e0c002023-01-18 23:57:35 +0000171 return;
Maciej Żenczykowskif3eeff92023-03-13 21:50:01 +0000172 } else if (readlen >= sizeof(buf)) {
Maciej Żenczykowskie6e0c002023-01-18 23:57:35 +0000173 logmsg(ANDROID_LOG_WARN, "%s: read truncation - ignoring pkt", __func__);
Maciej Żenczykowski50303532020-06-02 14:46:45 -0700174 return;
175 }
176
Maciej Żenczykowskia4df1012023-03-16 02:42:37 +0000177 const int payload_offset = offsetof(typeof(buf), payload);
178
179 if (readlen < payload_offset) {
Maciej Żenczykowskie6e0c002023-01-18 23:57:35 +0000180 logmsg(ANDROID_LOG_WARN, "%s: short read: got %ld bytes", __func__, readlen);
Lorenzo Colitti9353be22014-12-03 15:18:29 +0900181 return;
Daniel Drowna45056e2012-03-23 10:42:54 -0500182 }
Lorenzo Colittif08c5aa2014-06-03 12:56:38 +0900183
Maciej Żenczykowskia4df1012023-03-16 02:42:37 +0000184 const int pkt_len = readlen - payload_offset;
185
Maciej Żenczykowskicfa100f2023-03-14 03:04:50 +0000186 uint16_t proto = ntohs(buf.pi.proto);
Lorenzo Colitti9353be22014-12-03 15:18:29 +0900187 if (proto != ETH_P_IP) {
188 logmsg(ANDROID_LOG_WARN, "%s: unknown packet type = 0x%x", __func__, proto);
189 return;
190 }
191
Maciej Żenczykowskicfa100f2023-03-14 03:04:50 +0000192 if (buf.pi.flags != 0) {
193 logmsg(ANDROID_LOG_WARN, "%s: unexpected flags = %d", __func__, buf.pi.flags);
Lorenzo Colitti9353be22014-12-03 15:18:29 +0900194 }
195
Maciej Żenczykowskia4df1012023-03-16 02:42:37 +0000196 translate_packet(tunnel->write_fd6, 1 /* to_ipv6 */, buf.payload, pkt_len);
Daniel Drowna45056e2012-03-23 10:42:54 -0500197}
198
Maciej Żenczykowskif6ec94e2022-07-12 16:17:33 -0700199// IPv6 DAD packet format:
200// Ethernet header (if needed) will be added by the kernel:
201// u8[6] src_mac; u8[6] dst_mac '33:33:ff:XX:XX:XX'; be16 ethertype '0x86DD'
202// IPv6 header:
203// be32 0x60000000 - ipv6, tclass 0, flowlabel 0
204// be16 payload_length '32'; u8 nxt_hdr ICMPv6 '58'; u8 hop limit '255'
205// u128 src_ip6 '::'
206// u128 dst_ip6 'ff02::1:ffXX:XXXX'
207// ICMPv6 header:
208// u8 type '135'; u8 code '0'; u16 icmp6 checksum; u32 reserved '0'
209// ICMPv6 neighbour solicitation payload:
210// u128 tgt_ip6
211// ICMPv6 ND options:
212// u8 opt nr '14'; u8 length '1'; u8[6] nonce '6 random bytes'
Maciej Żenczykowskia1cb0f32022-07-19 09:22:58 -0700213void send_dad(int fd, const struct in6_addr* tgt) {
Maciej Żenczykowskif6ec94e2022-07-12 16:17:33 -0700214 struct {
215 struct ip6_hdr ip6h;
216 struct nd_neighbor_solicit ns;
217 uint8_t ns_opt_nr;
218 uint8_t ns_opt_len;
219 uint8_t ns_opt_nonce[6];
220 } dad_pkt = {
221 .ip6h = {
222 .ip6_flow = htonl(6 << 28), // v6, 0 tclass, 0 flowlabel
223 .ip6_plen = htons(sizeof(dad_pkt) - sizeof(struct ip6_hdr)), // payload length, ie. 32
224 .ip6_nxt = IPPROTO_ICMPV6, // 58
225 .ip6_hlim = 255,
226 .ip6_src = {}, // ::
227 .ip6_dst.s6_addr = {
228 0xFF, 0x02, 0, 0,
229 0, 0, 0, 0,
230 0, 0, 0, 1,
231 0xFF, tgt->s6_addr[13], tgt->s6_addr[14], tgt->s6_addr[15],
232 }, // ff02::1:ffXX:XXXX - multicast group address derived from bottom 24-bits of tgt
233 },
234 .ns = {
235 .nd_ns_type = ND_NEIGHBOR_SOLICIT, // 135
236 .nd_ns_code = 0,
237 .nd_ns_cksum = 0, // will be calculated later
238 .nd_ns_reserved = 0,
239 .nd_ns_target = *tgt,
240 },
241 .ns_opt_nr = 14, // icmp6 option 'nonce' from RFC3971
242 .ns_opt_len = 1, // in units of 8 bytes, including option nr and len
Maciej Żenczykowskia1cb0f32022-07-19 09:22:58 -0700243 .ns_opt_nonce = {}, // opt_len *8 - sizeof u8(opt_nr) - sizeof u8(opt_len) = 6 ranodmized bytes
Maciej Żenczykowskif6ec94e2022-07-12 16:17:33 -0700244 };
Maciej Żenczykowskia1cb0f32022-07-19 09:22:58 -0700245 arc4random_buf(&dad_pkt.ns_opt_nonce, sizeof(dad_pkt.ns_opt_nonce));
Maciej Żenczykowskif6ec94e2022-07-12 16:17:33 -0700246
247 // 40 byte IPv6 header + 8 byte ICMPv6 header + 16 byte ipv6 target address + 8 byte nonce option
248 _Static_assert(sizeof(dad_pkt) == 40 + 8 + 16 + 8, "sizeof dad packet != 72");
249
250 // IPv6 header checksum is standard negated 16-bit one's complement sum over the icmpv6 pseudo
251 // header (which includes payload length, nextheader, and src/dst ip) and the icmpv6 payload.
252 //
253 // Src/dst ip immediately prefix the icmpv6 header itself, so can be handled along
254 // with the payload. We thus only need to manually account for payload len & next header.
255 //
256 // The magic '8' is simply the offset of the ip6_src field in the ipv6 header,
257 // ie. we're skipping over the ipv6 version, tclass, flowlabel, payload length, next header
258 // and hop limit fields, because they're not quite where we want them to be.
259 //
260 // ip6_plen is already in network order, while ip6_nxt is a single byte and thus needs htons().
261 uint32_t csum = dad_pkt.ip6h.ip6_plen + htons(dad_pkt.ip6h.ip6_nxt);
262 csum = ip_checksum_add(csum, &dad_pkt.ip6h.ip6_src, sizeof(dad_pkt) - 8);
263 dad_pkt.ns.nd_ns_cksum = ip_checksum_finish(csum);
264
265 const struct sockaddr_in6 dst = {
266 .sin6_family = AF_INET6,
267 .sin6_addr = dad_pkt.ip6h.ip6_dst,
268 .sin6_scope_id = if_nametoindex(Global_Clatd_Config.native_ipv6_interface),
269 };
270
Maciej Żenczykowskia1cb0f32022-07-19 09:22:58 -0700271 sendto(fd, &dad_pkt, sizeof(dad_pkt), 0 /*flags*/, (const struct sockaddr *)&dst, sizeof(dst));
Maciej Żenczykowskif6ec94e2022-07-12 16:17:33 -0700272}
273
Daniel Drowna45056e2012-03-23 10:42:54 -0500274/* function: event_loop
275 * reads packets from the tun network interface and passes them down the stack
junyulaic4e591a2018-11-26 22:36:10 +0900276 * tunnel - tun device data
Daniel Drowna45056e2012-03-23 10:42:54 -0500277 */
Lorenzo Colitti9353be22014-12-03 15:18:29 +0900278void event_loop(struct tun_data *tunnel) {
Lorenzo Colittidce3ddf2014-08-25 16:07:12 -0700279 struct pollfd wait_fd[] = {
280 { tunnel->read_fd6, POLLIN, 0 },
281 { tunnel->fd4, POLLIN, 0 },
282 };
Daniel Drowna45056e2012-03-23 10:42:54 -0500283
Maciej Żenczykowskib984f312025-03-14 17:32:48 -0700284 while (running && !sigterm) {
Maciej Żenczykowskica9466c2023-03-15 17:40:45 +0000285 if (poll(wait_fd, ARRAY_SIZE(wait_fd), -1) == -1) {
Bernie Innocenti69dc60d2018-05-14 20:40:49 +0900286 if (errno != EINTR) {
junyulaic4e591a2018-11-26 22:36:10 +0900287 logmsg(ANDROID_LOG_WARN, "event_loop/poll returned an error: %s", strerror(errno));
Daniel Drowna45056e2012-03-23 10:42:54 -0500288 }
289 } else {
Maciej Żenczykowskie6e0c002023-01-18 23:57:35 +0000290 // Call process_packet if the socket has data to be read, but also if an
Lorenzo Colitti9353be22014-12-03 15:18:29 +0900291 // error is waiting. If we don't call read() after getting POLLERR, a
292 // subsequent poll() will return immediately with POLLERR again,
293 // causing this code to spin in a loop. Calling read() will clear the
294 // socket error flag instead.
Maciej Żenczykowskie6e0c002023-01-18 23:57:35 +0000295 if (wait_fd[0].revents) process_packet_6_to_4(tunnel);
296 if (wait_fd[1].revents) process_packet_4_to_6(tunnel);
Daniel Drowna45056e2012-03-23 10:42:54 -0500297 }
Daniel Drowna45056e2012-03-23 10:42:54 -0500298 }
299}