blob: cdfb763e63ea9a27bf351ca342fa980de6ae30c5 [file] [log] [blame]
Hungming Chen8ebdb6f2022-01-16 14:44:11 +08001/*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define LOG_TAG "TcUtils"
18
19#include "libclat/TcUtils.h"
20
21#include <arpa/inet.h>
22#include <linux/if.h>
23#include <linux/if_arp.h>
24#include <linux/netlink.h>
25#include <linux/pkt_cls.h>
26#include <linux/pkt_sched.h>
27#include <sys/ioctl.h>
28#include <sys/socket.h>
29#include <sys/types.h>
30#include <unistd.h>
31
32#include <log/log.h>
33
34#include "android-base/unique_fd.h"
35
36namespace android {
37namespace net {
38
39using std::max;
40
41// Sync from system/netd/server/NetlinkCommands.h
42const sockaddr_nl KERNEL_NLADDR = {AF_NETLINK, 0, 0, 0};
43const uint16_t NETLINK_REQUEST_FLAGS = NLM_F_REQUEST | NLM_F_ACK;
44
45static int doSIOCGIF(const std::string& interface, int opt) {
46 base::unique_fd ufd(socket(AF_INET6, SOCK_DGRAM | SOCK_CLOEXEC, 0));
47
48 if (ufd < 0) {
49 const int err = errno;
50 ALOGE("socket(AF_INET6, SOCK_DGRAM | SOCK_CLOEXEC, 0)");
51 return -err;
52 };
53
54 struct ifreq ifr = {};
55 // We use strncpy() instead of strlcpy() since kernel has to be able
56 // to handle non-zero terminated junk passed in by userspace anyway,
57 // and this way too long interface names (more than IFNAMSIZ-1 = 15
58 // characters plus terminating NULL) will not get truncated to 15
59 // characters and zero-terminated and thus potentially erroneously
60 // match a truncated interface if one were to exist.
61 strncpy(ifr.ifr_name, interface.c_str(), sizeof(ifr.ifr_name));
62
63 if (ioctl(ufd, opt, &ifr, sizeof(ifr))) return -errno;
64
65 if (opt == SIOCGIFHWADDR) return ifr.ifr_hwaddr.sa_family;
66 if (opt == SIOCGIFMTU) return ifr.ifr_mtu;
67 return -EINVAL;
68}
69
70int hardwareAddressType(const std::string& interface) {
71 return doSIOCGIF(interface, SIOCGIFHWADDR);
72}
73
74int deviceMTU(const std::string& interface) {
75 return doSIOCGIF(interface, SIOCGIFMTU);
76}
77
78base::Result<bool> isEthernet(const std::string& interface) {
79 int rv = hardwareAddressType(interface);
80 if (rv < 0) {
81 errno = -rv;
82 return ErrnoErrorf("Get hardware address type of interface {} failed", interface);
83 }
84
85 switch (rv) {
86 case ARPHRD_ETHER:
87 return true;
88 case ARPHRD_NONE:
89 case ARPHRD_RAWIP: // in Linux 4.14+ rmnet support was upstreamed and this is 519
90 case 530: // this is ARPHRD_RAWIP on some Android 4.9 kernels with rmnet
91 return false;
92 default:
93 errno = EAFNOSUPPORT; // Address family not supported
94 return ErrnoErrorf("Unknown hardware address type {} on interface {}", rv, interface);
95 }
96}
97
98// TODO: use //system/netd/server/NetlinkCommands.cpp:openNetlinkSocket(protocol)
99// and //system/netd/server/SockDiag.cpp:checkError(fd)
100static int sendAndProcessNetlinkResponse(const void* req, int len) {
101 base::unique_fd fd(socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE));
102 if (fd == -1) {
103 const int err = errno;
104 ALOGE("socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE)");
105 return -err;
106 }
107
108 static constexpr int on = 1;
109 int rv = setsockopt(fd, SOL_NETLINK, NETLINK_CAP_ACK, &on, sizeof(on));
110 if (rv) ALOGE("setsockopt(fd, SOL_NETLINK, NETLINK_CAP_ACK, %d)", on);
111
112 // this is needed to get sane strace netlink parsing, it allocates the pid
113 rv = bind(fd, (const struct sockaddr*)&KERNEL_NLADDR, sizeof(KERNEL_NLADDR));
114 if (rv) {
115 const int err = errno;
116 ALOGE("bind(fd, {AF_NETLINK, 0, 0})");
117 return -err;
118 }
119
120 // we do not want to receive messages from anyone besides the kernel
121 rv = connect(fd, (const struct sockaddr*)&KERNEL_NLADDR, sizeof(KERNEL_NLADDR));
122 if (rv) {
123 const int err = errno;
124 ALOGE("connect(fd, {AF_NETLINK, 0, 0})");
125 return -err;
126 }
127
128 rv = send(fd, req, len, 0);
129 if (rv == -1) return -errno;
130 if (rv != len) return -EMSGSIZE;
131
132 struct {
133 nlmsghdr h;
134 nlmsgerr e;
135 char buf[256];
136 } resp = {};
137
138 rv = recv(fd, &resp, sizeof(resp), MSG_TRUNC);
139
140 if (rv == -1) {
141 const int err = errno;
142 ALOGE("recv() failed");
143 return -err;
144 }
145
146 if (rv < (int)NLMSG_SPACE(sizeof(struct nlmsgerr))) {
147 ALOGE("recv() returned short packet: %d", rv);
148 return -EMSGSIZE;
149 }
150
151 if (resp.h.nlmsg_len != (unsigned)rv) {
152 ALOGE("recv() returned invalid header length: %d != %d", resp.h.nlmsg_len, rv);
153 return -EBADMSG;
154 }
155
156 if (resp.h.nlmsg_type != NLMSG_ERROR) {
157 ALOGE("recv() did not return NLMSG_ERROR message: %d", resp.h.nlmsg_type);
158 return -EBADMSG;
159 }
160
161 return resp.e.error; // returns 0 on success
162}
163
164// ADD: nlMsgType=RTM_NEWQDISC nlMsgFlags=NLM_F_EXCL|NLM_F_CREATE
165// REPLACE: nlMsgType=RTM_NEWQDISC nlMsgFlags=NLM_F_CREATE|NLM_F_REPLACE
166// DEL: nlMsgType=RTM_DELQDISC nlMsgFlags=0
167int doTcQdiscClsact(int ifIndex, uint16_t nlMsgType, uint16_t nlMsgFlags) {
168 // This is the name of the qdisc we are attaching.
169 // Some hoop jumping to make this compile time constant with known size,
170 // so that the structure declaration is well defined at compile time.
171#define CLSACT "clsact"
172 // sizeof() includes the terminating NULL
173 static constexpr size_t ASCIIZ_LEN_CLSACT = sizeof(CLSACT);
174
175 const struct {
176 nlmsghdr n;
177 tcmsg t;
178 struct {
179 nlattr attr;
180 char str[NLMSG_ALIGN(ASCIIZ_LEN_CLSACT)];
181 } kind;
182 } req = {
183 .n =
184 {
185 .nlmsg_len = sizeof(req),
186 .nlmsg_type = nlMsgType,
187 .nlmsg_flags = static_cast<__u16>(NETLINK_REQUEST_FLAGS | nlMsgFlags),
188 },
189 .t =
190 {
191 .tcm_family = AF_UNSPEC,
192 .tcm_ifindex = ifIndex,
193 .tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0),
194 .tcm_parent = TC_H_CLSACT,
195 },
196 .kind =
197 {
198 .attr =
199 {
200 .nla_len = NLA_HDRLEN + ASCIIZ_LEN_CLSACT,
201 .nla_type = TCA_KIND,
202 },
203 .str = CLSACT,
204 },
205 };
206#undef CLSACT
207
208 return sendAndProcessNetlinkResponse(&req, sizeof(req));
209}
210
211// tc filter add dev .. in/egress prio 4 protocol ipv6/ip bpf object-pinned /sys/fs/bpf/...
212// direct-action
213int tcFilterAddDevBpf(int ifIndex, bool ingress, uint16_t proto, int bpfFd, bool ethernet) {
214 // This is the name of the filter we're attaching (ie. this is the 'bpf'
215 // packet classifier enabled by kernel config option CONFIG_NET_CLS_BPF.
216 //
217 // We go through some hoops in order to make this compile time constants
218 // so that we can define the struct further down the function with the
219 // field for this sized correctly already during the build.
220#define BPF "bpf"
221 // sizeof() includes the terminating NULL
222 static constexpr size_t ASCIIZ_LEN_BPF = sizeof(BPF);
223
224 // This is to replicate program name suffix used by 'tc' Linux cli
225 // when it attaches programs.
226#define FSOBJ_SUFFIX ":[*fsobj]"
227
228 // This macro expands (from header files) to:
229 // prog_clatd_schedcls_ingress6_clat_rawip:[*fsobj]
230 // and is the name of the pinned ingress ebpf program for ARPHRD_RAWIP interfaces.
231 // (also compatible with anything that has 0 size L2 header)
232 static constexpr char name_clat_rx_rawip[] = CLAT_INGRESS6_PROG_RAWIP_NAME FSOBJ_SUFFIX;
233
234 // This macro expands (from header files) to:
235 // prog_clatd_schedcls_ingress6_clat_ether:[*fsobj]
236 // and is the name of the pinned ingress ebpf program for ARPHRD_ETHER interfaces.
237 // (also compatible with anything that has standard ethernet header)
238 static constexpr char name_clat_rx_ether[] = CLAT_INGRESS6_PROG_ETHER_NAME FSOBJ_SUFFIX;
239
240 // This macro expands (from header files) to:
241 // prog_clatd_schedcls_egress4_clat_rawip:[*fsobj]
242 // and is the name of the pinned egress ebpf program for ARPHRD_RAWIP interfaces.
243 // (also compatible with anything that has 0 size L2 header)
244 static constexpr char name_clat_tx_rawip[] = CLAT_EGRESS4_PROG_RAWIP_NAME FSOBJ_SUFFIX;
245
246 // This macro expands (from header files) to:
247 // prog_clatd_schedcls_egress4_clat_ether:[*fsobj]
248 // and is the name of the pinned egress ebpf program for ARPHRD_ETHER interfaces.
249 // (also compatible with anything that has standard ethernet header)
250 static constexpr char name_clat_tx_ether[] = CLAT_EGRESS4_PROG_ETHER_NAME FSOBJ_SUFFIX;
251
252#undef FSOBJ_SUFFIX
253
254 // The actual name we'll use is determined at run time via 'ethernet' and 'ingress'
255 // booleans. We need to compile time allocate enough space in the struct
256 // hence this macro magic to make sure we have enough space for either
257 // possibility. In practice some of these are actually the same size.
258 static constexpr size_t ASCIIZ_MAXLEN_NAME = max({
259 sizeof(name_clat_rx_rawip),
260 sizeof(name_clat_rx_ether),
261 sizeof(name_clat_tx_rawip),
262 sizeof(name_clat_tx_ether),
263 });
264
265 // These are not compile time constants: 'name' is used in strncpy below
266 const char* const name_clat_rx = ethernet ? name_clat_rx_ether : name_clat_rx_rawip;
267 const char* const name_clat_tx = ethernet ? name_clat_tx_ether : name_clat_tx_rawip;
268 const char* const name = ingress ? name_clat_rx : name_clat_tx;
269
270 struct {
271 nlmsghdr n;
272 tcmsg t;
273 struct {
274 nlattr attr;
275 char str[NLMSG_ALIGN(ASCIIZ_LEN_BPF)];
276 } kind;
277 struct {
278 nlattr attr;
279 struct {
280 nlattr attr;
281 __u32 u32;
282 } fd;
283 struct {
284 nlattr attr;
285 char str[NLMSG_ALIGN(ASCIIZ_MAXLEN_NAME)];
286 } name;
287 struct {
288 nlattr attr;
289 __u32 u32;
290 } flags;
291 } options;
292 } req = {
293 .n =
294 {
295 .nlmsg_len = sizeof(req),
296 .nlmsg_type = RTM_NEWTFILTER,
297 .nlmsg_flags = NETLINK_REQUEST_FLAGS | NLM_F_EXCL | NLM_F_CREATE,
298 },
299 .t =
300 {
301 .tcm_family = AF_UNSPEC,
302 .tcm_ifindex = ifIndex,
303 .tcm_handle = TC_H_UNSPEC,
304 .tcm_parent = TC_H_MAKE(TC_H_CLSACT,
305 ingress ? TC_H_MIN_INGRESS : TC_H_MIN_EGRESS),
306 .tcm_info = static_cast<__u32>((PRIO_CLAT << 16) | htons(proto)),
307 },
308 .kind =
309 {
310 .attr =
311 {
312 .nla_len = sizeof(req.kind),
313 .nla_type = TCA_KIND,
314 },
315 .str = BPF,
316 },
317 .options =
318 {
319 .attr =
320 {
321 .nla_len = sizeof(req.options),
322 .nla_type = NLA_F_NESTED | TCA_OPTIONS,
323 },
324 .fd =
325 {
326 .attr =
327 {
328 .nla_len = sizeof(req.options.fd),
329 .nla_type = TCA_BPF_FD,
330 },
331 .u32 = static_cast<__u32>(bpfFd),
332 },
333 .name =
334 {
335 .attr =
336 {
337 .nla_len = sizeof(req.options.name),
338 .nla_type = TCA_BPF_NAME,
339 },
340 // Visible via 'tc filter show', but
341 // is overwritten by strncpy below
342 .str = "placeholder",
343 },
344 .flags =
345 {
346 .attr =
347 {
348 .nla_len = sizeof(req.options.flags),
349 .nla_type = TCA_BPF_FLAGS,
350 },
351 .u32 = TCA_BPF_FLAG_ACT_DIRECT,
352 },
353 },
354 };
355#undef BPF
356
357 strncpy(req.options.name.str, name, sizeof(req.options.name.str));
358
359 return sendAndProcessNetlinkResponse(&req, sizeof(req));
360}
361
362// tc filter del dev .. in/egress prio 4 protocol ..
363int tcFilterDelDev(int ifIndex, bool ingress, uint16_t prio, uint16_t proto) {
364 const struct {
365 nlmsghdr n;
366 tcmsg t;
367 } req = {
368 .n =
369 {
370 .nlmsg_len = sizeof(req),
371 .nlmsg_type = RTM_DELTFILTER,
372 .nlmsg_flags = NETLINK_REQUEST_FLAGS,
373 },
374 .t =
375 {
376 .tcm_family = AF_UNSPEC,
377 .tcm_ifindex = ifIndex,
378 .tcm_handle = TC_H_UNSPEC,
379 .tcm_parent = TC_H_MAKE(TC_H_CLSACT,
380 ingress ? TC_H_MIN_INGRESS : TC_H_MIN_EGRESS),
381 .tcm_info = (static_cast<uint32_t>(prio) << 16) |
382 static_cast<uint32_t>(htons(proto)),
383 },
384 };
385
386 return sendAndProcessNetlinkResponse(&req, sizeof(req));
387}
388
389} // namespace net
390} // namespace android