blob: 9b8c843052e743a63564adda1ec1c643fff129a3 [file] [log] [blame]
Patrick Rohr776c40c2022-01-12 21:05:26 +01001/*
2 * Copyright (C) 2022 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define LOG_TAG "TcUtils"
18
19#include "tcutils/tcutils.h"
20
Patrick Rohre7f26e22022-01-12 22:13:12 +010021#include "kernelversion.h"
Patrick Rohr776c40c2022-01-12 21:05:26 +010022#include "scopeguard.h"
23
24#include <android/log.h>
25#include <arpa/inet.h>
26#include <cerrno>
Patrick Rohr776c40c2022-01-12 21:05:26 +010027#include <cstring>
28#include <libgen.h>
29#include <linux/if_arp.h>
30#include <linux/if_ether.h>
31#include <linux/netlink.h>
32#include <linux/pkt_cls.h>
33#include <linux/pkt_sched.h>
34#include <linux/rtnetlink.h>
35#include <net/if.h>
36#include <stdarg.h>
Patrick Rohr0c34e9a02022-01-17 13:59:09 +010037#include <stdio.h>
Patrick Rohr776c40c2022-01-12 21:05:26 +010038#include <sys/socket.h>
Patrick Rohr776c40c2022-01-12 21:05:26 +010039#include <unistd.h>
40#include <utility>
41
42#define BPF_FD_JUST_USE_INT
43#include <BpfSyscallWrappers.h>
44#undef BPF_FD_JUST_USE_INT
45
46// The maximum length of TCA_BPF_NAME. Sync from net/sched/cls_bpf.c.
47#define CLS_BPF_NAME_LEN 256
48
49// Classifier name. See cls_bpf_ops in net/sched/cls_bpf.c.
50#define CLS_BPF_KIND_NAME "bpf"
51
52namespace android {
53namespace {
54
55void logError(const char *fmt...) {
56 va_list args;
57 va_start(args, fmt);
58 __android_log_vprint(ANDROID_LOG_ERROR, LOG_TAG, fmt, args);
59 va_end(args);
60}
61
62const sockaddr_nl KERNEL_NLADDR = {AF_NETLINK, 0, 0, 0};
63const uint16_t NETLINK_REQUEST_FLAGS = NLM_F_REQUEST | NLM_F_ACK;
64
65int sendAndProcessNetlinkResponse(const void *req, int len) {
66 // TODO: use unique_fd instead of ScopeGuard
67 int fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE);
68 if (fd == -1) {
69 int error = errno;
70 logError("socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE): %d",
71 error);
72 return -error;
73 }
74 auto scopeGuard = base::make_scope_guard([fd] { close(fd); });
75
76 static constexpr int on = 1;
77 if (setsockopt(fd, SOL_NETLINK, NETLINK_CAP_ACK, &on, sizeof(on))) {
78 int error = errno;
79 logError("setsockopt(fd, SOL_NETLINK, NETLINK_CAP_ACK, 1): %d", error);
80 return -error;
81 }
82
83 // this is needed to get valid strace netlink parsing, it allocates the pid
84 if (bind(fd, (const struct sockaddr *)&KERNEL_NLADDR,
85 sizeof(KERNEL_NLADDR))) {
86 int error = errno;
87 logError("bind(fd, {AF_NETLINK, 0, 0}: %d)", error);
88 return -error;
89 }
90
91 // we do not want to receive messages from anyone besides the kernel
92 if (connect(fd, (const struct sockaddr *)&KERNEL_NLADDR,
93 sizeof(KERNEL_NLADDR))) {
94 int error = errno;
95 logError("connect(fd, {AF_NETLINK, 0, 0}): %d", error);
96 return -error;
97 }
98
99 int rv = send(fd, req, len, 0);
100
101 if (rv == -1) {
102 int error = errno;
103 logError("send(fd, req, len, 0) failed: %d", error);
104 return -error;
105 }
106
107 if (rv != len) {
108 logError("send(fd, req, len = %d, 0) returned invalid message size %d", len,
109 rv);
110 return -EMSGSIZE;
111 }
112
113 struct {
114 nlmsghdr h;
115 nlmsgerr e;
116 char buf[256];
117 } resp = {};
118
119 rv = recv(fd, &resp, sizeof(resp), MSG_TRUNC);
120
121 if (rv == -1) {
122 int error = errno;
123 logError("recv() failed: %d", error);
124 return -error;
125 }
126
127 if (rv < (int)NLMSG_SPACE(sizeof(struct nlmsgerr))) {
128 logError("recv() returned short packet: %d", rv);
129 return -EBADMSG;
130 }
131
132 if (resp.h.nlmsg_len != (unsigned)rv) {
133 logError("recv() returned invalid header length: %d != %d",
134 resp.h.nlmsg_len, rv);
135 return -EBADMSG;
136 }
137
138 if (resp.h.nlmsg_type != NLMSG_ERROR) {
139 logError("recv() did not return NLMSG_ERROR message: %d",
140 resp.h.nlmsg_type);
141 return -ENOMSG;
142 }
143
144 if (resp.e.error) {
145 logError("NLMSG_ERROR message return error: %d", resp.e.error);
146 }
147 return resp.e.error; // returns 0 on success
148}
149
150int hardwareAddressType(const char *interface) {
151 int fd = socket(AF_INET6, SOCK_DGRAM | SOCK_CLOEXEC, 0);
152 if (fd < 0)
153 return -errno;
154 auto scopeGuard = base::make_scope_guard([fd] { close(fd); });
155
156 struct ifreq ifr = {};
157 // We use strncpy() instead of strlcpy() since kernel has to be able
158 // to handle non-zero terminated junk passed in by userspace anyway,
159 // and this way too long interface names (more than IFNAMSIZ-1 = 15
160 // characters plus terminating NULL) will not get truncated to 15
161 // characters and zero-terminated and thus potentially erroneously
162 // match a truncated interface if one were to exist.
163 strncpy(ifr.ifr_name, interface, sizeof(ifr.ifr_name));
164
165 if (ioctl(fd, SIOCGIFHWADDR, &ifr, sizeof(ifr))) {
166 return -errno;
167 }
168 return ifr.ifr_hwaddr.sa_family;
169}
170
Patrick Rohr776c40c2022-01-12 21:05:26 +0100171} // namespace
172
173int isEthernet(const char *iface, bool &isEthernet) {
174 int rv = hardwareAddressType(iface);
175 if (rv < 0) {
176 logError("Get hardware address type of interface %s failed: %s", iface,
177 strerror(-rv));
Patrick Rohr27846ff2022-01-17 12:22:51 +0100178 return rv;
Patrick Rohr776c40c2022-01-12 21:05:26 +0100179 }
180
181 // Backwards compatibility with pre-GKI kernels that use various custom
182 // ARPHRD_* for their cellular interface
183 switch (rv) {
184 // ARPHRD_PUREIP on at least some Mediatek Android kernels
185 // example: wembley with 4.19 kernel
186 case 520:
187 // in Linux 4.14+ rmnet support was upstreamed and ARHRD_RAWIP became 519,
188 // but it is 530 on at least some Qualcomm Android 4.9 kernels with rmnet
189 // example: Pixel 3 family
190 case 530:
191 // >5.4 kernels are GKI2.0 and thus upstream compatible, however 5.10
192 // shipped with Android S, so (for safety) let's limit ourselves to
193 // >5.10, ie. 5.11+ as a guarantee we're on Android T+ and thus no
194 // longer need this non-upstream compatibility logic
195 static bool is_pre_5_11_kernel = !isAtLeastKernelVersion(5, 11, 0);
196 if (is_pre_5_11_kernel)
197 return false;
198 }
199
200 switch (rv) {
201 case ARPHRD_ETHER:
202 isEthernet = true;
203 return 0;
204 case ARPHRD_NONE:
205 case ARPHRD_PPP:
206 case ARPHRD_RAWIP:
207 isEthernet = false;
208 return 0;
209 default:
210 logError("Unknown hardware address type %d on interface %s", rv, iface);
Patrick Rohr27846ff2022-01-17 12:22:51 +0100211 return -EAFNOSUPPORT;
Patrick Rohr776c40c2022-01-12 21:05:26 +0100212 }
213}
214
Patrick Rohr42b58ae2022-01-17 13:09:12 +0100215// ADD: nlMsgType=RTM_NEWQDISC nlMsgFlags=NLM_F_EXCL|NLM_F_CREATE
216// REPLACE: nlMsgType=RTM_NEWQDISC nlMsgFlags=NLM_F_CREATE|NLM_F_REPLACE
217// DEL: nlMsgType=RTM_DELQDISC nlMsgFlags=0
218int doTcQdiscClsact(int ifIndex, uint16_t nlMsgType, uint16_t nlMsgFlags) {
219 // This is the name of the qdisc we are attaching.
220 // Some hoop jumping to make this compile time constant with known size,
221 // so that the structure declaration is well defined at compile time.
222#define CLSACT "clsact"
223 // sizeof() includes the terminating NULL
224 static constexpr size_t ASCIIZ_LEN_CLSACT = sizeof(CLSACT);
225
226 const struct {
227 nlmsghdr n;
228 tcmsg t;
229 struct {
230 nlattr attr;
231 char str[NLMSG_ALIGN(ASCIIZ_LEN_CLSACT)];
232 } kind;
233 } req = {
234 .n =
235 {
236 .nlmsg_len = sizeof(req),
237 .nlmsg_type = nlMsgType,
238 .nlmsg_flags =
239 static_cast<__u16>(NETLINK_REQUEST_FLAGS | nlMsgFlags),
240 },
241 .t =
242 {
243 .tcm_family = AF_UNSPEC,
244 .tcm_ifindex = ifIndex,
245 .tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0),
246 .tcm_parent = TC_H_CLSACT,
247 },
248 .kind =
249 {
250 .attr =
251 {
252 .nla_len = NLA_HDRLEN + ASCIIZ_LEN_CLSACT,
253 .nla_type = TCA_KIND,
254 },
255 .str = CLSACT,
256 },
257 };
258#undef CLSACT
259
260 return sendAndProcessNetlinkResponse(&req, sizeof(req));
261}
262
Patrick Rohr776c40c2022-01-12 21:05:26 +0100263// tc filter add dev .. in/egress prio 1 protocol ipv6/ip bpf object-pinned
264// /sys/fs/bpf/... direct-action
265int tcAddBpfFilter(int ifIndex, bool ingress, uint16_t prio, uint16_t proto,
266 const char *bpfProgPath) {
267 const int bpfFd = bpf::retrieveProgram(bpfProgPath);
268 if (bpfFd == -1) {
269 logError("retrieveProgram failed: %d", errno);
270 return -errno;
271 }
272 auto scopeGuard = base::make_scope_guard([bpfFd] { close(bpfFd); });
273
274 struct {
275 nlmsghdr n;
276 tcmsg t;
277 struct {
278 nlattr attr;
279 // The maximum classifier name length is defined in
280 // tcf_proto_ops in include/net/sch_generic.h.
281 char str[NLMSG_ALIGN(sizeof(CLS_BPF_KIND_NAME))];
282 } kind;
283 struct {
284 nlattr attr;
285 struct {
286 nlattr attr;
287 __u32 u32;
288 } fd;
289 struct {
290 nlattr attr;
291 char str[NLMSG_ALIGN(CLS_BPF_NAME_LEN)];
292 } name;
293 struct {
294 nlattr attr;
295 __u32 u32;
296 } flags;
297 } options;
298 } req = {
299 .n =
300 {
301 .nlmsg_len = sizeof(req),
302 .nlmsg_type = RTM_NEWTFILTER,
303 .nlmsg_flags = NETLINK_REQUEST_FLAGS | NLM_F_EXCL | NLM_F_CREATE,
304 },
305 .t =
306 {
307 .tcm_family = AF_UNSPEC,
308 .tcm_ifindex = ifIndex,
309 .tcm_handle = TC_H_UNSPEC,
310 .tcm_parent = TC_H_MAKE(TC_H_CLSACT, ingress ? TC_H_MIN_INGRESS
311 : TC_H_MIN_EGRESS),
312 .tcm_info =
313 static_cast<__u32>((static_cast<uint16_t>(prio) << 16) |
314 htons(static_cast<uint16_t>(proto))),
315 },
316 .kind =
317 {
318 .attr =
319 {
320 .nla_len = sizeof(req.kind),
321 .nla_type = TCA_KIND,
322 },
323 .str = CLS_BPF_KIND_NAME,
324 },
325 .options =
326 {
327 .attr =
328 {
329 .nla_len = sizeof(req.options),
330 .nla_type = NLA_F_NESTED | TCA_OPTIONS,
331 },
332 .fd =
333 {
334 .attr =
335 {
336 .nla_len = sizeof(req.options.fd),
337 .nla_type = TCA_BPF_FD,
338 },
339 .u32 = static_cast<__u32>(bpfFd),
340 },
341 .name =
342 {
343 .attr =
344 {
345 .nla_len = sizeof(req.options.name),
346 .nla_type = TCA_BPF_NAME,
347 },
348 // Visible via 'tc filter show', but
349 // is overwritten by strncpy below
350 .str = "placeholder",
351 },
352 .flags =
353 {
354 .attr =
355 {
356 .nla_len = sizeof(req.options.flags),
357 .nla_type = TCA_BPF_FLAGS,
358 },
359 .u32 = TCA_BPF_FLAG_ACT_DIRECT,
360 },
361 },
362 };
363
364 snprintf(req.options.name.str, sizeof(req.options.name.str), "%s:[*fsobj]",
365 basename(bpfProgPath));
366
367 int error = sendAndProcessNetlinkResponse(&req, sizeof(req));
368 return error;
369}
370
371// tc filter del dev .. in/egress prio .. protocol ..
372int tcDeleteFilter(int ifIndex, bool ingress, uint16_t prio, uint16_t proto) {
373 const struct {
374 nlmsghdr n;
375 tcmsg t;
376 } req = {
377 .n =
378 {
379 .nlmsg_len = sizeof(req),
380 .nlmsg_type = RTM_DELTFILTER,
381 .nlmsg_flags = NETLINK_REQUEST_FLAGS,
382 },
383 .t =
384 {
385 .tcm_family = AF_UNSPEC,
386 .tcm_ifindex = ifIndex,
387 .tcm_handle = TC_H_UNSPEC,
388 .tcm_parent = TC_H_MAKE(TC_H_CLSACT, ingress ? TC_H_MIN_INGRESS
389 : TC_H_MIN_EGRESS),
390 .tcm_info =
391 static_cast<__u32>((static_cast<uint16_t>(prio) << 16) |
392 htons(static_cast<uint16_t>(proto))),
393 },
394 };
395
396 return sendAndProcessNetlinkResponse(&req, sizeof(req));
397}
398
399} // namespace android