blob: c82390fae5d1d6f904092198936deda4bf4c5412 [file] [log] [blame]
Patrick Rohr776c40c2022-01-12 21:05:26 +01001/*
2 * Copyright (C) 2022 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define LOG_TAG "TcUtils"
18
19#include "tcutils/tcutils.h"
20
Patrick Rohr28c717a2022-01-19 14:51:35 +010021#include "logging.h"
Patrick Rohr66452f52023-05-17 11:35:08 -070022#include "bpf/KernelUtils.h"
Patrick Rohr776c40c2022-01-12 21:05:26 +010023#include "scopeguard.h"
24
Patrick Rohr776c40c2022-01-12 21:05:26 +010025#include <arpa/inet.h>
26#include <cerrno>
Patrick Rohr776c40c2022-01-12 21:05:26 +010027#include <cstring>
28#include <libgen.h>
29#include <linux/if_arp.h>
30#include <linux/if_ether.h>
31#include <linux/netlink.h>
32#include <linux/pkt_cls.h>
33#include <linux/pkt_sched.h>
34#include <linux/rtnetlink.h>
Patrick Rohre815a742022-01-17 10:37:40 +010035#include <linux/tc_act/tc_bpf.h>
Patrick Rohr776c40c2022-01-12 21:05:26 +010036#include <net/if.h>
Patrick Rohr0c34e9a02022-01-17 13:59:09 +010037#include <stdio.h>
Patrick Rohr776c40c2022-01-12 21:05:26 +010038#include <sys/socket.h>
Patrick Rohr776c40c2022-01-12 21:05:26 +010039#include <unistd.h>
40#include <utility>
41
42#define BPF_FD_JUST_USE_INT
43#include <BpfSyscallWrappers.h>
44#undef BPF_FD_JUST_USE_INT
45
46// The maximum length of TCA_BPF_NAME. Sync from net/sched/cls_bpf.c.
47#define CLS_BPF_NAME_LEN 256
48
49// Classifier name. See cls_bpf_ops in net/sched/cls_bpf.c.
50#define CLS_BPF_KIND_NAME "bpf"
51
52namespace android {
53namespace {
54
Patrick Rohre815a742022-01-17 10:37:40 +010055/**
56 * IngressPoliceFilterBuilder builds a nlmsg request equivalent to the following
57 * tc command:
58 *
59 * tc filter add dev .. ingress prio .. protocol .. matchall \
60 * action police rate .. burst .. conform-exceed pipe/continue \
61 * action bpf object-pinned .. \
62 * drop
63 */
64class IngressPoliceFilterBuilder final {
65 // default mtu is 2047, so the cell logarithm factor (cell_log) is 3.
66 // 0x7FF >> 0x3FF x 2^1 >> 0x1FF x 2^2 >> 0xFF x 2^3
67 static constexpr int RTAB_CELL_LOGARITHM = 3;
68 static constexpr size_t RTAB_SIZE = 256;
69 static constexpr unsigned TIME_UNITS_PER_SEC = 1000000;
70
71 struct Request {
72 nlmsghdr n;
73 tcmsg t;
74 struct {
75 nlattr attr;
76 char str[NLMSG_ALIGN(sizeof("matchall"))];
77 } kind;
78 struct {
79 nlattr attr;
80 struct {
81 nlattr attr;
82 struct {
83 nlattr attr;
84 struct {
85 nlattr attr;
86 char str[NLMSG_ALIGN(sizeof("police"))];
87 } kind;
88 struct {
89 nlattr attr;
90 struct {
91 nlattr attr;
92 struct tc_police obj;
93 } police;
94 struct {
95 nlattr attr;
96 uint32_t u32[RTAB_SIZE];
97 } rtab;
98 struct {
99 nlattr attr;
100 int32_t s32;
101 } notexceedact;
102 } opt;
103 } act1;
104 struct {
105 nlattr attr;
106 struct {
107 nlattr attr;
108 char str[NLMSG_ALIGN(sizeof("bpf"))];
109 } kind;
110 struct {
111 nlattr attr;
112 struct {
113 nlattr attr;
114 uint32_t u32;
115 } fd;
116 struct {
117 nlattr attr;
118 char str[NLMSG_ALIGN(CLS_BPF_NAME_LEN)];
119 } name;
120 struct {
121 nlattr attr;
122 struct tc_act_bpf obj;
123 } parms;
124 } opt;
125 } act2;
126 } acts;
127 } opt;
128 };
129
130 // class members
131 const unsigned mBurstInBytes;
132 const char *mBpfProgPath;
133 int mBpfFd;
134 Request mRequest;
135
136 static double getTickInUsec() {
137 FILE *fp = fopen("/proc/net/psched", "re");
138 if (!fp) {
Patrick Rohr28c717a2022-01-19 14:51:35 +0100139 ALOGE("fopen(\"/proc/net/psched\"): %s", strerror(errno));
Patrick Rohre815a742022-01-17 10:37:40 +0100140 return 0.0;
141 }
142 auto scopeGuard = base::make_scope_guard([fp] { fclose(fp); });
143
144 uint32_t t2us;
145 uint32_t us2t;
146 uint32_t clockRes;
147 const bool isError =
148 fscanf(fp, "%08x%08x%08x", &t2us, &us2t, &clockRes) != 3;
149
150 if (isError) {
Patrick Rohr28c717a2022-01-19 14:51:35 +0100151 ALOGE("fscanf(/proc/net/psched, \"%%08x%%08x%%08x\"): %s",
Patrick Rohre815a742022-01-17 10:37:40 +0100152 strerror(errno));
153 return 0.0;
154 }
155
156 const double clockFactor =
157 static_cast<double>(clockRes) / TIME_UNITS_PER_SEC;
158 return static_cast<double>(t2us) / static_cast<double>(us2t) * clockFactor;
159 }
160
161 static inline const double kTickInUsec = getTickInUsec();
162
163public:
164 // clang-format off
165 IngressPoliceFilterBuilder(int ifIndex, uint16_t prio, uint16_t proto, unsigned rateInBytesPerSec,
166 unsigned burstInBytes, const char* bpfProgPath)
167 : mBurstInBytes(burstInBytes),
168 mBpfProgPath(bpfProgPath),
169 mBpfFd(-1),
170 mRequest{
171 .n = {
172 .nlmsg_len = sizeof(mRequest),
173 .nlmsg_type = RTM_NEWTFILTER,
174 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE,
175 },
176 .t = {
177 .tcm_family = AF_UNSPEC,
178 .tcm_ifindex = ifIndex,
179 .tcm_handle = TC_H_UNSPEC,
180 .tcm_parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS),
181 .tcm_info = (static_cast<uint32_t>(prio) << 16)
182 | static_cast<uint32_t>(htons(proto)),
183 },
184 .kind = {
185 .attr = {
186 .nla_len = sizeof(mRequest.kind),
187 .nla_type = TCA_KIND,
188 },
189 .str = "matchall",
190 },
191 .opt = {
192 .attr = {
193 .nla_len = sizeof(mRequest.opt),
194 .nla_type = TCA_OPTIONS,
195 },
196 .acts = {
197 .attr = {
198 .nla_len = sizeof(mRequest.opt.acts),
Patrick Rohr20dcaf52022-02-11 13:14:45 +0100199 .nla_type = TCA_MATCHALL_ACT,
Patrick Rohre815a742022-01-17 10:37:40 +0100200 },
201 .act1 = {
202 .attr = {
203 .nla_len = sizeof(mRequest.opt.acts.act1),
204 .nla_type = 1, // action priority
205 },
206 .kind = {
207 .attr = {
208 .nla_len = sizeof(mRequest.opt.acts.act1.kind),
209 .nla_type = TCA_ACT_KIND,
210 },
211 .str = "police",
212 },
213 .opt = {
214 .attr = {
215 .nla_len = sizeof(mRequest.opt.acts.act1.opt),
216 .nla_type = TCA_ACT_OPTIONS | NLA_F_NESTED,
217 },
218 .police = {
219 .attr = {
220 .nla_len = sizeof(mRequest.opt.acts.act1.opt.police),
221 .nla_type = TCA_POLICE_TBF,
222 },
223 .obj = {
224 .action = TC_ACT_PIPE,
225 .burst = 0,
226 .rate = {
227 .cell_log = RTAB_CELL_LOGARITHM,
228 .linklayer = TC_LINKLAYER_ETHERNET,
229 .cell_align = -1,
230 .rate = rateInBytesPerSec,
231 },
232 },
233 },
234 .rtab = {
235 .attr = {
236 .nla_len = sizeof(mRequest.opt.acts.act1.opt.rtab),
237 .nla_type = TCA_POLICE_RATE,
238 },
239 .u32 = {},
240 },
241 .notexceedact = {
242 .attr = {
243 .nla_len = sizeof(mRequest.opt.acts.act1.opt.notexceedact),
244 .nla_type = TCA_POLICE_RESULT,
245 },
246 .s32 = TC_ACT_UNSPEC,
247 },
248 },
249 },
250 .act2 = {
251 .attr = {
252 .nla_len = sizeof(mRequest.opt.acts.act2),
253 .nla_type = 2, // action priority
254 },
255 .kind = {
256 .attr = {
257 .nla_len = sizeof(mRequest.opt.acts.act2.kind),
258 .nla_type = TCA_ACT_KIND,
259 },
260 .str = "bpf",
261 },
262 .opt = {
263 .attr = {
264 .nla_len = sizeof(mRequest.opt.acts.act2.opt),
265 .nla_type = TCA_ACT_OPTIONS | NLA_F_NESTED,
266 },
267 .fd = {
268 .attr = {
269 .nla_len = sizeof(mRequest.opt.acts.act2.opt.fd),
270 .nla_type = TCA_ACT_BPF_FD,
271 },
272 .u32 = 0, // set during build()
273 },
274 .name = {
275 .attr = {
276 .nla_len = sizeof(mRequest.opt.acts.act2.opt.name),
277 .nla_type = TCA_ACT_BPF_NAME,
278 },
279 .str = "placeholder",
280 },
281 .parms = {
282 .attr = {
283 .nla_len = sizeof(mRequest.opt.acts.act2.opt.parms),
284 .nla_type = TCA_ACT_BPF_PARMS,
285 },
286 .obj = {
287 // default action to be executed when bpf prog
288 // returns TC_ACT_UNSPEC.
289 .action = TC_ACT_SHOT,
290 },
291 },
292 },
293 },
294 },
295 },
296 } {
297 // constructor body
298 }
299 // clang-format on
300
301 ~IngressPoliceFilterBuilder() {
302 // TODO: use unique_fd
303 if (mBpfFd != -1) {
304 close(mBpfFd);
305 }
306 }
307
308 constexpr unsigned getRequestSize() const { return sizeof(Request); }
309
310private:
311 unsigned calculateXmitTime(unsigned size) {
312 const uint32_t rate = mRequest.opt.acts.act1.opt.police.obj.rate.rate;
313 return (static_cast<double>(size) / static_cast<double>(rate)) *
314 TIME_UNITS_PER_SEC * kTickInUsec;
315 }
316
317 void initBurstRate() {
318 mRequest.opt.acts.act1.opt.police.obj.burst =
319 calculateXmitTime(mBurstInBytes);
320 }
321
322 // Calculates a table with 256 transmission times for different packet sizes
323 // (all the way up to MTU). RTAB_CELL_LOGARITHM is used as a scaling factor.
324 // In this case, MTU size is always 2048, so RTAB_CELL_LOGARITHM is always
325 // 3. Therefore, this function generates the transmission times for packets
326 // of size 1..256 x 2^3.
327 void initRateTable() {
328 for (unsigned i = 0; i < RTAB_SIZE; ++i) {
329 unsigned adjustedSize = (i + 1) << RTAB_CELL_LOGARITHM;
330 mRequest.opt.acts.act1.opt.rtab.u32[i] = calculateXmitTime(adjustedSize);
331 }
332 }
333
334 int initBpfFd() {
335 mBpfFd = bpf::retrieveProgram(mBpfProgPath);
336 if (mBpfFd == -1) {
337 int error = errno;
Patrick Rohr28c717a2022-01-19 14:51:35 +0100338 ALOGE("retrieveProgram failed: %d", error);
Patrick Rohre815a742022-01-17 10:37:40 +0100339 return -error;
340 }
341
342 mRequest.opt.acts.act2.opt.fd.u32 = static_cast<uint32_t>(mBpfFd);
343 snprintf(mRequest.opt.acts.act2.opt.name.str,
344 sizeof(mRequest.opt.acts.act2.opt.name.str), "%s:[*fsobj]",
345 basename(mBpfProgPath));
346
347 return 0;
348 }
349
350public:
351 int build() {
352 if (kTickInUsec == 0.0) {
353 return -EINVAL;
354 }
355
356 initBurstRate();
357 initRateTable();
358 return initBpfFd();
359 }
360
361 const Request *getRequest() const {
362 // Make sure to call build() before calling this function. Otherwise, the
363 // request will be invalid.
364 return &mRequest;
365 }
366};
367
Patrick Rohr776c40c2022-01-12 21:05:26 +0100368const sockaddr_nl KERNEL_NLADDR = {AF_NETLINK, 0, 0, 0};
369const uint16_t NETLINK_REQUEST_FLAGS = NLM_F_REQUEST | NLM_F_ACK;
370
371int sendAndProcessNetlinkResponse(const void *req, int len) {
372 // TODO: use unique_fd instead of ScopeGuard
373 int fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE);
374 if (fd == -1) {
375 int error = errno;
Patrick Rohr28c717a2022-01-19 14:51:35 +0100376 ALOGE("socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE): %d",
Patrick Rohr776c40c2022-01-12 21:05:26 +0100377 error);
378 return -error;
379 }
380 auto scopeGuard = base::make_scope_guard([fd] { close(fd); });
381
382 static constexpr int on = 1;
383 if (setsockopt(fd, SOL_NETLINK, NETLINK_CAP_ACK, &on, sizeof(on))) {
384 int error = errno;
Patrick Rohr28c717a2022-01-19 14:51:35 +0100385 ALOGE("setsockopt(fd, SOL_NETLINK, NETLINK_CAP_ACK, 1): %d", error);
Patrick Rohr776c40c2022-01-12 21:05:26 +0100386 return -error;
387 }
388
Maciej Żenczykowskie8dcedd2022-06-09 23:27:32 +0000389 if (setsockopt(fd, SOL_NETLINK, NETLINK_EXT_ACK, &on, sizeof(on))) {
390 int error = errno;
391 ALOGW("setsockopt(fd, SOL_NETLINK, NETLINK_EXT_ACK, 1): %d", error);
392 // will fail on 4.9 kernels so don't: return -error;
393 }
394
Patrick Rohr776c40c2022-01-12 21:05:26 +0100395 // this is needed to get valid strace netlink parsing, it allocates the pid
396 if (bind(fd, (const struct sockaddr *)&KERNEL_NLADDR,
397 sizeof(KERNEL_NLADDR))) {
398 int error = errno;
Patrick Rohr28c717a2022-01-19 14:51:35 +0100399 ALOGE("bind(fd, {AF_NETLINK, 0, 0}: %d)", error);
Patrick Rohr776c40c2022-01-12 21:05:26 +0100400 return -error;
401 }
402
403 // we do not want to receive messages from anyone besides the kernel
404 if (connect(fd, (const struct sockaddr *)&KERNEL_NLADDR,
405 sizeof(KERNEL_NLADDR))) {
406 int error = errno;
Patrick Rohr28c717a2022-01-19 14:51:35 +0100407 ALOGE("connect(fd, {AF_NETLINK, 0, 0}): %d", error);
Patrick Rohr776c40c2022-01-12 21:05:26 +0100408 return -error;
409 }
410
411 int rv = send(fd, req, len, 0);
412
413 if (rv == -1) {
414 int error = errno;
Patrick Rohr28c717a2022-01-19 14:51:35 +0100415 ALOGE("send(fd, req, len, 0) failed: %d", error);
Patrick Rohr776c40c2022-01-12 21:05:26 +0100416 return -error;
417 }
418
419 if (rv != len) {
Patrick Rohr28c717a2022-01-19 14:51:35 +0100420 ALOGE("send(fd, req, len = %d, 0) returned invalid message size %d", len,
Patrick Rohr776c40c2022-01-12 21:05:26 +0100421 rv);
422 return -EMSGSIZE;
423 }
424
425 struct {
426 nlmsghdr h;
427 nlmsgerr e;
428 char buf[256];
429 } resp = {};
430
431 rv = recv(fd, &resp, sizeof(resp), MSG_TRUNC);
432
433 if (rv == -1) {
434 int error = errno;
Patrick Rohr28c717a2022-01-19 14:51:35 +0100435 ALOGE("recv() failed: %d", error);
Patrick Rohr776c40c2022-01-12 21:05:26 +0100436 return -error;
437 }
438
439 if (rv < (int)NLMSG_SPACE(sizeof(struct nlmsgerr))) {
Patrick Rohr28c717a2022-01-19 14:51:35 +0100440 ALOGE("recv() returned short packet: %d", rv);
Patrick Rohr776c40c2022-01-12 21:05:26 +0100441 return -EBADMSG;
442 }
443
444 if (resp.h.nlmsg_len != (unsigned)rv) {
Patrick Rohr28c717a2022-01-19 14:51:35 +0100445 ALOGE("recv() returned invalid header length: %d != %d",
Patrick Rohr776c40c2022-01-12 21:05:26 +0100446 resp.h.nlmsg_len, rv);
447 return -EBADMSG;
448 }
449
450 if (resp.h.nlmsg_type != NLMSG_ERROR) {
Patrick Rohr28c717a2022-01-19 14:51:35 +0100451 ALOGE("recv() did not return NLMSG_ERROR message: %d",
Patrick Rohr776c40c2022-01-12 21:05:26 +0100452 resp.h.nlmsg_type);
453 return -ENOMSG;
454 }
455
456 if (resp.e.error) {
Patrick Rohr28c717a2022-01-19 14:51:35 +0100457 ALOGE("NLMSG_ERROR message return error: %d", resp.e.error);
Patrick Rohr776c40c2022-01-12 21:05:26 +0100458 }
459 return resp.e.error; // returns 0 on success
460}
461
462int hardwareAddressType(const char *interface) {
463 int fd = socket(AF_INET6, SOCK_DGRAM | SOCK_CLOEXEC, 0);
464 if (fd < 0)
465 return -errno;
466 auto scopeGuard = base::make_scope_guard([fd] { close(fd); });
467
468 struct ifreq ifr = {};
469 // We use strncpy() instead of strlcpy() since kernel has to be able
470 // to handle non-zero terminated junk passed in by userspace anyway,
471 // and this way too long interface names (more than IFNAMSIZ-1 = 15
472 // characters plus terminating NULL) will not get truncated to 15
473 // characters and zero-terminated and thus potentially erroneously
474 // match a truncated interface if one were to exist.
475 strncpy(ifr.ifr_name, interface, sizeof(ifr.ifr_name));
476
477 if (ioctl(fd, SIOCGIFHWADDR, &ifr, sizeof(ifr))) {
478 return -errno;
479 }
480 return ifr.ifr_hwaddr.sa_family;
481}
482
Patrick Rohr776c40c2022-01-12 21:05:26 +0100483} // namespace
484
485int isEthernet(const char *iface, bool &isEthernet) {
486 int rv = hardwareAddressType(iface);
487 if (rv < 0) {
Patrick Rohr28c717a2022-01-19 14:51:35 +0100488 ALOGE("Get hardware address type of interface %s failed: %s", iface,
Patrick Rohr776c40c2022-01-12 21:05:26 +0100489 strerror(-rv));
Patrick Rohr27846ff2022-01-17 12:22:51 +0100490 return rv;
Patrick Rohr776c40c2022-01-12 21:05:26 +0100491 }
492
493 // Backwards compatibility with pre-GKI kernels that use various custom
494 // ARPHRD_* for their cellular interface
495 switch (rv) {
496 // ARPHRD_PUREIP on at least some Mediatek Android kernels
497 // example: wembley with 4.19 kernel
498 case 520:
499 // in Linux 4.14+ rmnet support was upstreamed and ARHRD_RAWIP became 519,
500 // but it is 530 on at least some Qualcomm Android 4.9 kernels with rmnet
501 // example: Pixel 3 family
502 case 530:
503 // >5.4 kernels are GKI2.0 and thus upstream compatible, however 5.10
504 // shipped with Android S, so (for safety) let's limit ourselves to
505 // >5.10, ie. 5.11+ as a guarantee we're on Android T+ and thus no
506 // longer need this non-upstream compatibility logic
Maciej Żenczykowskid7d3b032022-12-22 17:03:18 +0000507 static bool is_pre_5_11_kernel = !bpf::isAtLeastKernelVersion(5, 11, 0);
Patrick Rohr776c40c2022-01-12 21:05:26 +0100508 if (is_pre_5_11_kernel)
509 return false;
510 }
511
512 switch (rv) {
513 case ARPHRD_ETHER:
514 isEthernet = true;
515 return 0;
516 case ARPHRD_NONE:
517 case ARPHRD_PPP:
518 case ARPHRD_RAWIP:
519 isEthernet = false;
520 return 0;
521 default:
Patrick Rohr28c717a2022-01-19 14:51:35 +0100522 ALOGE("Unknown hardware address type %d on interface %s", rv, iface);
Patrick Rohr27846ff2022-01-17 12:22:51 +0100523 return -EAFNOSUPPORT;
Patrick Rohr776c40c2022-01-12 21:05:26 +0100524 }
525}
526
Patrick Rohr42b58ae2022-01-17 13:09:12 +0100527// ADD: nlMsgType=RTM_NEWQDISC nlMsgFlags=NLM_F_EXCL|NLM_F_CREATE
528// REPLACE: nlMsgType=RTM_NEWQDISC nlMsgFlags=NLM_F_CREATE|NLM_F_REPLACE
529// DEL: nlMsgType=RTM_DELQDISC nlMsgFlags=0
530int doTcQdiscClsact(int ifIndex, uint16_t nlMsgType, uint16_t nlMsgFlags) {
531 // This is the name of the qdisc we are attaching.
532 // Some hoop jumping to make this compile time constant with known size,
533 // so that the structure declaration is well defined at compile time.
534#define CLSACT "clsact"
535 // sizeof() includes the terminating NULL
536 static constexpr size_t ASCIIZ_LEN_CLSACT = sizeof(CLSACT);
537
538 const struct {
539 nlmsghdr n;
540 tcmsg t;
541 struct {
542 nlattr attr;
543 char str[NLMSG_ALIGN(ASCIIZ_LEN_CLSACT)];
544 } kind;
545 } req = {
546 .n =
547 {
548 .nlmsg_len = sizeof(req),
549 .nlmsg_type = nlMsgType,
550 .nlmsg_flags =
551 static_cast<__u16>(NETLINK_REQUEST_FLAGS | nlMsgFlags),
552 },
553 .t =
554 {
555 .tcm_family = AF_UNSPEC,
556 .tcm_ifindex = ifIndex,
557 .tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0),
558 .tcm_parent = TC_H_CLSACT,
559 },
560 .kind =
561 {
562 .attr =
563 {
564 .nla_len = NLA_HDRLEN + ASCIIZ_LEN_CLSACT,
565 .nla_type = TCA_KIND,
566 },
567 .str = CLSACT,
568 },
569 };
570#undef CLSACT
571
572 return sendAndProcessNetlinkResponse(&req, sizeof(req));
573}
574
Patrick Rohr776c40c2022-01-12 21:05:26 +0100575// tc filter add dev .. in/egress prio 1 protocol ipv6/ip bpf object-pinned
576// /sys/fs/bpf/... direct-action
577int tcAddBpfFilter(int ifIndex, bool ingress, uint16_t prio, uint16_t proto,
578 const char *bpfProgPath) {
579 const int bpfFd = bpf::retrieveProgram(bpfProgPath);
580 if (bpfFd == -1) {
Patrick Rohr28c717a2022-01-19 14:51:35 +0100581 ALOGE("retrieveProgram failed: %d", errno);
Patrick Rohr776c40c2022-01-12 21:05:26 +0100582 return -errno;
583 }
584 auto scopeGuard = base::make_scope_guard([bpfFd] { close(bpfFd); });
585
586 struct {
587 nlmsghdr n;
588 tcmsg t;
589 struct {
590 nlattr attr;
591 // The maximum classifier name length is defined in
592 // tcf_proto_ops in include/net/sch_generic.h.
593 char str[NLMSG_ALIGN(sizeof(CLS_BPF_KIND_NAME))];
594 } kind;
595 struct {
596 nlattr attr;
597 struct {
598 nlattr attr;
599 __u32 u32;
600 } fd;
601 struct {
602 nlattr attr;
603 char str[NLMSG_ALIGN(CLS_BPF_NAME_LEN)];
604 } name;
605 struct {
606 nlattr attr;
607 __u32 u32;
608 } flags;
609 } options;
610 } req = {
611 .n =
612 {
613 .nlmsg_len = sizeof(req),
614 .nlmsg_type = RTM_NEWTFILTER,
615 .nlmsg_flags = NETLINK_REQUEST_FLAGS | NLM_F_EXCL | NLM_F_CREATE,
616 },
617 .t =
618 {
619 .tcm_family = AF_UNSPEC,
620 .tcm_ifindex = ifIndex,
621 .tcm_handle = TC_H_UNSPEC,
622 .tcm_parent = TC_H_MAKE(TC_H_CLSACT, ingress ? TC_H_MIN_INGRESS
623 : TC_H_MIN_EGRESS),
624 .tcm_info =
625 static_cast<__u32>((static_cast<uint16_t>(prio) << 16) |
626 htons(static_cast<uint16_t>(proto))),
627 },
628 .kind =
629 {
630 .attr =
631 {
632 .nla_len = sizeof(req.kind),
633 .nla_type = TCA_KIND,
634 },
635 .str = CLS_BPF_KIND_NAME,
636 },
637 .options =
638 {
639 .attr =
640 {
641 .nla_len = sizeof(req.options),
642 .nla_type = NLA_F_NESTED | TCA_OPTIONS,
643 },
644 .fd =
645 {
646 .attr =
647 {
648 .nla_len = sizeof(req.options.fd),
649 .nla_type = TCA_BPF_FD,
650 },
651 .u32 = static_cast<__u32>(bpfFd),
652 },
653 .name =
654 {
655 .attr =
656 {
657 .nla_len = sizeof(req.options.name),
658 .nla_type = TCA_BPF_NAME,
659 },
660 // Visible via 'tc filter show', but
661 // is overwritten by strncpy below
662 .str = "placeholder",
663 },
664 .flags =
665 {
666 .attr =
667 {
668 .nla_len = sizeof(req.options.flags),
669 .nla_type = TCA_BPF_FLAGS,
670 },
671 .u32 = TCA_BPF_FLAG_ACT_DIRECT,
672 },
673 },
674 };
675
676 snprintf(req.options.name.str, sizeof(req.options.name.str), "%s:[*fsobj]",
677 basename(bpfProgPath));
678
679 int error = sendAndProcessNetlinkResponse(&req, sizeof(req));
680 return error;
681}
682
Patrick Rohre815a742022-01-17 10:37:40 +0100683// tc filter add dev .. ingress prio .. protocol .. matchall \
684// action police rate .. burst .. conform-exceed pipe/continue \
685// action bpf object-pinned .. \
686// drop
687//
688// TODO: tc-police does not do ECN marking, so in the future, we should consider
689// adding a second tc-police filter at a lower priority that rate limits traffic
690// at something like 0.8 times the global rate limit and ecn marks exceeding
691// packets inside a bpf program (but does not drop them).
692int tcAddIngressPoliceFilter(int ifIndex, uint16_t prio, uint16_t proto,
693 unsigned rateInBytesPerSec,
694 const char *bpfProgPath) {
695 // TODO: this value needs to be validated.
696 // TCP IW10 (initial congestion window) means servers will send 10 mtus worth
697 // of data on initial connect.
698 // If nic is LRO capable it could aggregate up to 64KiB, so again probably a
699 // bad idea to set burst below that, because ingress packets could get
700 // aggregated to 64KiB at the nic.
701 // I don't know, but I wonder whether we shouldn't just do 128KiB and not do
702 // any math.
703 static constexpr unsigned BURST_SIZE_IN_BYTES = 128 * 1024; // 128KiB
704 IngressPoliceFilterBuilder filter(ifIndex, prio, proto, rateInBytesPerSec,
705 BURST_SIZE_IN_BYTES, bpfProgPath);
706 const int error = filter.build();
707 if (error) {
708 return error;
709 }
710 return sendAndProcessNetlinkResponse(filter.getRequest(),
711 filter.getRequestSize());
712}
713
Patrick Rohr776c40c2022-01-12 21:05:26 +0100714// tc filter del dev .. in/egress prio .. protocol ..
715int tcDeleteFilter(int ifIndex, bool ingress, uint16_t prio, uint16_t proto) {
716 const struct {
717 nlmsghdr n;
718 tcmsg t;
719 } req = {
720 .n =
721 {
722 .nlmsg_len = sizeof(req),
723 .nlmsg_type = RTM_DELTFILTER,
724 .nlmsg_flags = NETLINK_REQUEST_FLAGS,
725 },
726 .t =
727 {
728 .tcm_family = AF_UNSPEC,
729 .tcm_ifindex = ifIndex,
730 .tcm_handle = TC_H_UNSPEC,
731 .tcm_parent = TC_H_MAKE(TC_H_CLSACT, ingress ? TC_H_MIN_INGRESS
732 : TC_H_MIN_EGRESS),
733 .tcm_info =
734 static_cast<__u32>((static_cast<uint16_t>(prio) << 16) |
735 htons(static_cast<uint16_t>(proto))),
736 },
737 };
738
739 return sendAndProcessNetlinkResponse(&req, sizeof(req));
740}
741
742} // namespace android