blob: ad9d9e4ab34d77273d2b040bcd28066b3711b0ac [file] [log] [blame]
Patrick Rohr776c40c2022-01-12 21:05:26 +01001/*
2 * Copyright (C) 2022 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#define LOG_TAG "TcUtils"
18
19#include "tcutils/tcutils.h"
20
Patrick Rohre7f26e22022-01-12 22:13:12 +010021#include "kernelversion.h"
Patrick Rohr776c40c2022-01-12 21:05:26 +010022#include "scopeguard.h"
23
24#include <android/log.h>
25#include <arpa/inet.h>
26#include <cerrno>
Patrick Rohr776c40c2022-01-12 21:05:26 +010027#include <cstring>
28#include <libgen.h>
29#include <linux/if_arp.h>
30#include <linux/if_ether.h>
31#include <linux/netlink.h>
32#include <linux/pkt_cls.h>
33#include <linux/pkt_sched.h>
34#include <linux/rtnetlink.h>
Patrick Rohre815a742022-01-17 10:37:40 +010035#include <linux/tc_act/tc_bpf.h>
Patrick Rohr776c40c2022-01-12 21:05:26 +010036#include <net/if.h>
37#include <stdarg.h>
Patrick Rohr0c34e9a02022-01-17 13:59:09 +010038#include <stdio.h>
Patrick Rohr776c40c2022-01-12 21:05:26 +010039#include <sys/socket.h>
Patrick Rohr776c40c2022-01-12 21:05:26 +010040#include <unistd.h>
41#include <utility>
42
43#define BPF_FD_JUST_USE_INT
44#include <BpfSyscallWrappers.h>
45#undef BPF_FD_JUST_USE_INT
46
47// The maximum length of TCA_BPF_NAME. Sync from net/sched/cls_bpf.c.
48#define CLS_BPF_NAME_LEN 256
49
50// Classifier name. See cls_bpf_ops in net/sched/cls_bpf.c.
51#define CLS_BPF_KIND_NAME "bpf"
52
53namespace android {
54namespace {
55
56void logError(const char *fmt...) {
57 va_list args;
58 va_start(args, fmt);
59 __android_log_vprint(ANDROID_LOG_ERROR, LOG_TAG, fmt, args);
60 va_end(args);
61}
62
Patrick Rohre815a742022-01-17 10:37:40 +010063/**
64 * IngressPoliceFilterBuilder builds a nlmsg request equivalent to the following
65 * tc command:
66 *
67 * tc filter add dev .. ingress prio .. protocol .. matchall \
68 * action police rate .. burst .. conform-exceed pipe/continue \
69 * action bpf object-pinned .. \
70 * drop
71 */
72class IngressPoliceFilterBuilder final {
73 // default mtu is 2047, so the cell logarithm factor (cell_log) is 3.
74 // 0x7FF >> 0x3FF x 2^1 >> 0x1FF x 2^2 >> 0xFF x 2^3
75 static constexpr int RTAB_CELL_LOGARITHM = 3;
76 static constexpr size_t RTAB_SIZE = 256;
77 static constexpr unsigned TIME_UNITS_PER_SEC = 1000000;
78
79 struct Request {
80 nlmsghdr n;
81 tcmsg t;
82 struct {
83 nlattr attr;
84 char str[NLMSG_ALIGN(sizeof("matchall"))];
85 } kind;
86 struct {
87 nlattr attr;
88 struct {
89 nlattr attr;
90 struct {
91 nlattr attr;
92 struct {
93 nlattr attr;
94 char str[NLMSG_ALIGN(sizeof("police"))];
95 } kind;
96 struct {
97 nlattr attr;
98 struct {
99 nlattr attr;
100 struct tc_police obj;
101 } police;
102 struct {
103 nlattr attr;
104 uint32_t u32[RTAB_SIZE];
105 } rtab;
106 struct {
107 nlattr attr;
108 int32_t s32;
109 } notexceedact;
110 } opt;
111 } act1;
112 struct {
113 nlattr attr;
114 struct {
115 nlattr attr;
116 char str[NLMSG_ALIGN(sizeof("bpf"))];
117 } kind;
118 struct {
119 nlattr attr;
120 struct {
121 nlattr attr;
122 uint32_t u32;
123 } fd;
124 struct {
125 nlattr attr;
126 char str[NLMSG_ALIGN(CLS_BPF_NAME_LEN)];
127 } name;
128 struct {
129 nlattr attr;
130 struct tc_act_bpf obj;
131 } parms;
132 } opt;
133 } act2;
134 } acts;
135 } opt;
136 };
137
138 // class members
139 const unsigned mBurstInBytes;
140 const char *mBpfProgPath;
141 int mBpfFd;
142 Request mRequest;
143
144 static double getTickInUsec() {
145 FILE *fp = fopen("/proc/net/psched", "re");
146 if (!fp) {
147 logError("fopen(\"/proc/net/psched\"): %s", strerror(errno));
148 return 0.0;
149 }
150 auto scopeGuard = base::make_scope_guard([fp] { fclose(fp); });
151
152 uint32_t t2us;
153 uint32_t us2t;
154 uint32_t clockRes;
155 const bool isError =
156 fscanf(fp, "%08x%08x%08x", &t2us, &us2t, &clockRes) != 3;
157
158 if (isError) {
159 logError("fscanf(/proc/net/psched, \"%%08x%%08x%%08x\"): %s",
160 strerror(errno));
161 return 0.0;
162 }
163
164 const double clockFactor =
165 static_cast<double>(clockRes) / TIME_UNITS_PER_SEC;
166 return static_cast<double>(t2us) / static_cast<double>(us2t) * clockFactor;
167 }
168
169 static inline const double kTickInUsec = getTickInUsec();
170
171public:
172 // clang-format off
173 IngressPoliceFilterBuilder(int ifIndex, uint16_t prio, uint16_t proto, unsigned rateInBytesPerSec,
174 unsigned burstInBytes, const char* bpfProgPath)
175 : mBurstInBytes(burstInBytes),
176 mBpfProgPath(bpfProgPath),
177 mBpfFd(-1),
178 mRequest{
179 .n = {
180 .nlmsg_len = sizeof(mRequest),
181 .nlmsg_type = RTM_NEWTFILTER,
182 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE,
183 },
184 .t = {
185 .tcm_family = AF_UNSPEC,
186 .tcm_ifindex = ifIndex,
187 .tcm_handle = TC_H_UNSPEC,
188 .tcm_parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS),
189 .tcm_info = (static_cast<uint32_t>(prio) << 16)
190 | static_cast<uint32_t>(htons(proto)),
191 },
192 .kind = {
193 .attr = {
194 .nla_len = sizeof(mRequest.kind),
195 .nla_type = TCA_KIND,
196 },
197 .str = "matchall",
198 },
199 .opt = {
200 .attr = {
201 .nla_len = sizeof(mRequest.opt),
202 .nla_type = TCA_OPTIONS,
203 },
204 .acts = {
205 .attr = {
206 .nla_len = sizeof(mRequest.opt.acts),
207 .nla_type = TCA_U32_ACT,
208 },
209 .act1 = {
210 .attr = {
211 .nla_len = sizeof(mRequest.opt.acts.act1),
212 .nla_type = 1, // action priority
213 },
214 .kind = {
215 .attr = {
216 .nla_len = sizeof(mRequest.opt.acts.act1.kind),
217 .nla_type = TCA_ACT_KIND,
218 },
219 .str = "police",
220 },
221 .opt = {
222 .attr = {
223 .nla_len = sizeof(mRequest.opt.acts.act1.opt),
224 .nla_type = TCA_ACT_OPTIONS | NLA_F_NESTED,
225 },
226 .police = {
227 .attr = {
228 .nla_len = sizeof(mRequest.opt.acts.act1.opt.police),
229 .nla_type = TCA_POLICE_TBF,
230 },
231 .obj = {
232 .action = TC_ACT_PIPE,
233 .burst = 0,
234 .rate = {
235 .cell_log = RTAB_CELL_LOGARITHM,
236 .linklayer = TC_LINKLAYER_ETHERNET,
237 .cell_align = -1,
238 .rate = rateInBytesPerSec,
239 },
240 },
241 },
242 .rtab = {
243 .attr = {
244 .nla_len = sizeof(mRequest.opt.acts.act1.opt.rtab),
245 .nla_type = TCA_POLICE_RATE,
246 },
247 .u32 = {},
248 },
249 .notexceedact = {
250 .attr = {
251 .nla_len = sizeof(mRequest.opt.acts.act1.opt.notexceedact),
252 .nla_type = TCA_POLICE_RESULT,
253 },
254 .s32 = TC_ACT_UNSPEC,
255 },
256 },
257 },
258 .act2 = {
259 .attr = {
260 .nla_len = sizeof(mRequest.opt.acts.act2),
261 .nla_type = 2, // action priority
262 },
263 .kind = {
264 .attr = {
265 .nla_len = sizeof(mRequest.opt.acts.act2.kind),
266 .nla_type = TCA_ACT_KIND,
267 },
268 .str = "bpf",
269 },
270 .opt = {
271 .attr = {
272 .nla_len = sizeof(mRequest.opt.acts.act2.opt),
273 .nla_type = TCA_ACT_OPTIONS | NLA_F_NESTED,
274 },
275 .fd = {
276 .attr = {
277 .nla_len = sizeof(mRequest.opt.acts.act2.opt.fd),
278 .nla_type = TCA_ACT_BPF_FD,
279 },
280 .u32 = 0, // set during build()
281 },
282 .name = {
283 .attr = {
284 .nla_len = sizeof(mRequest.opt.acts.act2.opt.name),
285 .nla_type = TCA_ACT_BPF_NAME,
286 },
287 .str = "placeholder",
288 },
289 .parms = {
290 .attr = {
291 .nla_len = sizeof(mRequest.opt.acts.act2.opt.parms),
292 .nla_type = TCA_ACT_BPF_PARMS,
293 },
294 .obj = {
295 // default action to be executed when bpf prog
296 // returns TC_ACT_UNSPEC.
297 .action = TC_ACT_SHOT,
298 },
299 },
300 },
301 },
302 },
303 },
304 } {
305 // constructor body
306 }
307 // clang-format on
308
309 ~IngressPoliceFilterBuilder() {
310 // TODO: use unique_fd
311 if (mBpfFd != -1) {
312 close(mBpfFd);
313 }
314 }
315
316 constexpr unsigned getRequestSize() const { return sizeof(Request); }
317
318private:
319 unsigned calculateXmitTime(unsigned size) {
320 const uint32_t rate = mRequest.opt.acts.act1.opt.police.obj.rate.rate;
321 return (static_cast<double>(size) / static_cast<double>(rate)) *
322 TIME_UNITS_PER_SEC * kTickInUsec;
323 }
324
325 void initBurstRate() {
326 mRequest.opt.acts.act1.opt.police.obj.burst =
327 calculateXmitTime(mBurstInBytes);
328 }
329
330 // Calculates a table with 256 transmission times for different packet sizes
331 // (all the way up to MTU). RTAB_CELL_LOGARITHM is used as a scaling factor.
332 // In this case, MTU size is always 2048, so RTAB_CELL_LOGARITHM is always
333 // 3. Therefore, this function generates the transmission times for packets
334 // of size 1..256 x 2^3.
335 void initRateTable() {
336 for (unsigned i = 0; i < RTAB_SIZE; ++i) {
337 unsigned adjustedSize = (i + 1) << RTAB_CELL_LOGARITHM;
338 mRequest.opt.acts.act1.opt.rtab.u32[i] = calculateXmitTime(adjustedSize);
339 }
340 }
341
342 int initBpfFd() {
343 mBpfFd = bpf::retrieveProgram(mBpfProgPath);
344 if (mBpfFd == -1) {
345 int error = errno;
346 logError("retrieveProgram failed: %d", error);
347 return -error;
348 }
349
350 mRequest.opt.acts.act2.opt.fd.u32 = static_cast<uint32_t>(mBpfFd);
351 snprintf(mRequest.opt.acts.act2.opt.name.str,
352 sizeof(mRequest.opt.acts.act2.opt.name.str), "%s:[*fsobj]",
353 basename(mBpfProgPath));
354
355 return 0;
356 }
357
358public:
359 int build() {
360 if (kTickInUsec == 0.0) {
361 return -EINVAL;
362 }
363
364 initBurstRate();
365 initRateTable();
366 return initBpfFd();
367 }
368
369 const Request *getRequest() const {
370 // Make sure to call build() before calling this function. Otherwise, the
371 // request will be invalid.
372 return &mRequest;
373 }
374};
375
Patrick Rohr776c40c2022-01-12 21:05:26 +0100376const sockaddr_nl KERNEL_NLADDR = {AF_NETLINK, 0, 0, 0};
377const uint16_t NETLINK_REQUEST_FLAGS = NLM_F_REQUEST | NLM_F_ACK;
378
379int sendAndProcessNetlinkResponse(const void *req, int len) {
380 // TODO: use unique_fd instead of ScopeGuard
381 int fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE);
382 if (fd == -1) {
383 int error = errno;
384 logError("socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE): %d",
385 error);
386 return -error;
387 }
388 auto scopeGuard = base::make_scope_guard([fd] { close(fd); });
389
390 static constexpr int on = 1;
391 if (setsockopt(fd, SOL_NETLINK, NETLINK_CAP_ACK, &on, sizeof(on))) {
392 int error = errno;
393 logError("setsockopt(fd, SOL_NETLINK, NETLINK_CAP_ACK, 1): %d", error);
394 return -error;
395 }
396
397 // this is needed to get valid strace netlink parsing, it allocates the pid
398 if (bind(fd, (const struct sockaddr *)&KERNEL_NLADDR,
399 sizeof(KERNEL_NLADDR))) {
400 int error = errno;
401 logError("bind(fd, {AF_NETLINK, 0, 0}: %d)", error);
402 return -error;
403 }
404
405 // we do not want to receive messages from anyone besides the kernel
406 if (connect(fd, (const struct sockaddr *)&KERNEL_NLADDR,
407 sizeof(KERNEL_NLADDR))) {
408 int error = errno;
409 logError("connect(fd, {AF_NETLINK, 0, 0}): %d", error);
410 return -error;
411 }
412
413 int rv = send(fd, req, len, 0);
414
415 if (rv == -1) {
416 int error = errno;
417 logError("send(fd, req, len, 0) failed: %d", error);
418 return -error;
419 }
420
421 if (rv != len) {
422 logError("send(fd, req, len = %d, 0) returned invalid message size %d", len,
423 rv);
424 return -EMSGSIZE;
425 }
426
427 struct {
428 nlmsghdr h;
429 nlmsgerr e;
430 char buf[256];
431 } resp = {};
432
433 rv = recv(fd, &resp, sizeof(resp), MSG_TRUNC);
434
435 if (rv == -1) {
436 int error = errno;
437 logError("recv() failed: %d", error);
438 return -error;
439 }
440
441 if (rv < (int)NLMSG_SPACE(sizeof(struct nlmsgerr))) {
442 logError("recv() returned short packet: %d", rv);
443 return -EBADMSG;
444 }
445
446 if (resp.h.nlmsg_len != (unsigned)rv) {
447 logError("recv() returned invalid header length: %d != %d",
448 resp.h.nlmsg_len, rv);
449 return -EBADMSG;
450 }
451
452 if (resp.h.nlmsg_type != NLMSG_ERROR) {
453 logError("recv() did not return NLMSG_ERROR message: %d",
454 resp.h.nlmsg_type);
455 return -ENOMSG;
456 }
457
458 if (resp.e.error) {
459 logError("NLMSG_ERROR message return error: %d", resp.e.error);
460 }
461 return resp.e.error; // returns 0 on success
462}
463
464int hardwareAddressType(const char *interface) {
465 int fd = socket(AF_INET6, SOCK_DGRAM | SOCK_CLOEXEC, 0);
466 if (fd < 0)
467 return -errno;
468 auto scopeGuard = base::make_scope_guard([fd] { close(fd); });
469
470 struct ifreq ifr = {};
471 // We use strncpy() instead of strlcpy() since kernel has to be able
472 // to handle non-zero terminated junk passed in by userspace anyway,
473 // and this way too long interface names (more than IFNAMSIZ-1 = 15
474 // characters plus terminating NULL) will not get truncated to 15
475 // characters and zero-terminated and thus potentially erroneously
476 // match a truncated interface if one were to exist.
477 strncpy(ifr.ifr_name, interface, sizeof(ifr.ifr_name));
478
479 if (ioctl(fd, SIOCGIFHWADDR, &ifr, sizeof(ifr))) {
480 return -errno;
481 }
482 return ifr.ifr_hwaddr.sa_family;
483}
484
Patrick Rohr776c40c2022-01-12 21:05:26 +0100485} // namespace
486
487int isEthernet(const char *iface, bool &isEthernet) {
488 int rv = hardwareAddressType(iface);
489 if (rv < 0) {
490 logError("Get hardware address type of interface %s failed: %s", iface,
491 strerror(-rv));
Patrick Rohr27846ff2022-01-17 12:22:51 +0100492 return rv;
Patrick Rohr776c40c2022-01-12 21:05:26 +0100493 }
494
495 // Backwards compatibility with pre-GKI kernels that use various custom
496 // ARPHRD_* for their cellular interface
497 switch (rv) {
498 // ARPHRD_PUREIP on at least some Mediatek Android kernels
499 // example: wembley with 4.19 kernel
500 case 520:
501 // in Linux 4.14+ rmnet support was upstreamed and ARHRD_RAWIP became 519,
502 // but it is 530 on at least some Qualcomm Android 4.9 kernels with rmnet
503 // example: Pixel 3 family
504 case 530:
505 // >5.4 kernels are GKI2.0 and thus upstream compatible, however 5.10
506 // shipped with Android S, so (for safety) let's limit ourselves to
507 // >5.10, ie. 5.11+ as a guarantee we're on Android T+ and thus no
508 // longer need this non-upstream compatibility logic
509 static bool is_pre_5_11_kernel = !isAtLeastKernelVersion(5, 11, 0);
510 if (is_pre_5_11_kernel)
511 return false;
512 }
513
514 switch (rv) {
515 case ARPHRD_ETHER:
516 isEthernet = true;
517 return 0;
518 case ARPHRD_NONE:
519 case ARPHRD_PPP:
520 case ARPHRD_RAWIP:
521 isEthernet = false;
522 return 0;
523 default:
524 logError("Unknown hardware address type %d on interface %s", rv, iface);
Patrick Rohr27846ff2022-01-17 12:22:51 +0100525 return -EAFNOSUPPORT;
Patrick Rohr776c40c2022-01-12 21:05:26 +0100526 }
527}
528
Patrick Rohr42b58ae2022-01-17 13:09:12 +0100529// ADD: nlMsgType=RTM_NEWQDISC nlMsgFlags=NLM_F_EXCL|NLM_F_CREATE
530// REPLACE: nlMsgType=RTM_NEWQDISC nlMsgFlags=NLM_F_CREATE|NLM_F_REPLACE
531// DEL: nlMsgType=RTM_DELQDISC nlMsgFlags=0
532int doTcQdiscClsact(int ifIndex, uint16_t nlMsgType, uint16_t nlMsgFlags) {
533 // This is the name of the qdisc we are attaching.
534 // Some hoop jumping to make this compile time constant with known size,
535 // so that the structure declaration is well defined at compile time.
536#define CLSACT "clsact"
537 // sizeof() includes the terminating NULL
538 static constexpr size_t ASCIIZ_LEN_CLSACT = sizeof(CLSACT);
539
540 const struct {
541 nlmsghdr n;
542 tcmsg t;
543 struct {
544 nlattr attr;
545 char str[NLMSG_ALIGN(ASCIIZ_LEN_CLSACT)];
546 } kind;
547 } req = {
548 .n =
549 {
550 .nlmsg_len = sizeof(req),
551 .nlmsg_type = nlMsgType,
552 .nlmsg_flags =
553 static_cast<__u16>(NETLINK_REQUEST_FLAGS | nlMsgFlags),
554 },
555 .t =
556 {
557 .tcm_family = AF_UNSPEC,
558 .tcm_ifindex = ifIndex,
559 .tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0),
560 .tcm_parent = TC_H_CLSACT,
561 },
562 .kind =
563 {
564 .attr =
565 {
566 .nla_len = NLA_HDRLEN + ASCIIZ_LEN_CLSACT,
567 .nla_type = TCA_KIND,
568 },
569 .str = CLSACT,
570 },
571 };
572#undef CLSACT
573
574 return sendAndProcessNetlinkResponse(&req, sizeof(req));
575}
576
Patrick Rohr776c40c2022-01-12 21:05:26 +0100577// tc filter add dev .. in/egress prio 1 protocol ipv6/ip bpf object-pinned
578// /sys/fs/bpf/... direct-action
579int tcAddBpfFilter(int ifIndex, bool ingress, uint16_t prio, uint16_t proto,
580 const char *bpfProgPath) {
581 const int bpfFd = bpf::retrieveProgram(bpfProgPath);
582 if (bpfFd == -1) {
583 logError("retrieveProgram failed: %d", errno);
584 return -errno;
585 }
586 auto scopeGuard = base::make_scope_guard([bpfFd] { close(bpfFd); });
587
588 struct {
589 nlmsghdr n;
590 tcmsg t;
591 struct {
592 nlattr attr;
593 // The maximum classifier name length is defined in
594 // tcf_proto_ops in include/net/sch_generic.h.
595 char str[NLMSG_ALIGN(sizeof(CLS_BPF_KIND_NAME))];
596 } kind;
597 struct {
598 nlattr attr;
599 struct {
600 nlattr attr;
601 __u32 u32;
602 } fd;
603 struct {
604 nlattr attr;
605 char str[NLMSG_ALIGN(CLS_BPF_NAME_LEN)];
606 } name;
607 struct {
608 nlattr attr;
609 __u32 u32;
610 } flags;
611 } options;
612 } req = {
613 .n =
614 {
615 .nlmsg_len = sizeof(req),
616 .nlmsg_type = RTM_NEWTFILTER,
617 .nlmsg_flags = NETLINK_REQUEST_FLAGS | NLM_F_EXCL | NLM_F_CREATE,
618 },
619 .t =
620 {
621 .tcm_family = AF_UNSPEC,
622 .tcm_ifindex = ifIndex,
623 .tcm_handle = TC_H_UNSPEC,
624 .tcm_parent = TC_H_MAKE(TC_H_CLSACT, ingress ? TC_H_MIN_INGRESS
625 : TC_H_MIN_EGRESS),
626 .tcm_info =
627 static_cast<__u32>((static_cast<uint16_t>(prio) << 16) |
628 htons(static_cast<uint16_t>(proto))),
629 },
630 .kind =
631 {
632 .attr =
633 {
634 .nla_len = sizeof(req.kind),
635 .nla_type = TCA_KIND,
636 },
637 .str = CLS_BPF_KIND_NAME,
638 },
639 .options =
640 {
641 .attr =
642 {
643 .nla_len = sizeof(req.options),
644 .nla_type = NLA_F_NESTED | TCA_OPTIONS,
645 },
646 .fd =
647 {
648 .attr =
649 {
650 .nla_len = sizeof(req.options.fd),
651 .nla_type = TCA_BPF_FD,
652 },
653 .u32 = static_cast<__u32>(bpfFd),
654 },
655 .name =
656 {
657 .attr =
658 {
659 .nla_len = sizeof(req.options.name),
660 .nla_type = TCA_BPF_NAME,
661 },
662 // Visible via 'tc filter show', but
663 // is overwritten by strncpy below
664 .str = "placeholder",
665 },
666 .flags =
667 {
668 .attr =
669 {
670 .nla_len = sizeof(req.options.flags),
671 .nla_type = TCA_BPF_FLAGS,
672 },
673 .u32 = TCA_BPF_FLAG_ACT_DIRECT,
674 },
675 },
676 };
677
678 snprintf(req.options.name.str, sizeof(req.options.name.str), "%s:[*fsobj]",
679 basename(bpfProgPath));
680
681 int error = sendAndProcessNetlinkResponse(&req, sizeof(req));
682 return error;
683}
684
Patrick Rohre815a742022-01-17 10:37:40 +0100685// tc filter add dev .. ingress prio .. protocol .. matchall \
686// action police rate .. burst .. conform-exceed pipe/continue \
687// action bpf object-pinned .. \
688// drop
689//
690// TODO: tc-police does not do ECN marking, so in the future, we should consider
691// adding a second tc-police filter at a lower priority that rate limits traffic
692// at something like 0.8 times the global rate limit and ecn marks exceeding
693// packets inside a bpf program (but does not drop them).
694int tcAddIngressPoliceFilter(int ifIndex, uint16_t prio, uint16_t proto,
695 unsigned rateInBytesPerSec,
696 const char *bpfProgPath) {
697 // TODO: this value needs to be validated.
698 // TCP IW10 (initial congestion window) means servers will send 10 mtus worth
699 // of data on initial connect.
700 // If nic is LRO capable it could aggregate up to 64KiB, so again probably a
701 // bad idea to set burst below that, because ingress packets could get
702 // aggregated to 64KiB at the nic.
703 // I don't know, but I wonder whether we shouldn't just do 128KiB and not do
704 // any math.
705 static constexpr unsigned BURST_SIZE_IN_BYTES = 128 * 1024; // 128KiB
706 IngressPoliceFilterBuilder filter(ifIndex, prio, proto, rateInBytesPerSec,
707 BURST_SIZE_IN_BYTES, bpfProgPath);
708 const int error = filter.build();
709 if (error) {
710 return error;
711 }
712 return sendAndProcessNetlinkResponse(filter.getRequest(),
713 filter.getRequestSize());
714}
715
Patrick Rohr776c40c2022-01-12 21:05:26 +0100716// tc filter del dev .. in/egress prio .. protocol ..
717int tcDeleteFilter(int ifIndex, bool ingress, uint16_t prio, uint16_t proto) {
718 const struct {
719 nlmsghdr n;
720 tcmsg t;
721 } req = {
722 .n =
723 {
724 .nlmsg_len = sizeof(req),
725 .nlmsg_type = RTM_DELTFILTER,
726 .nlmsg_flags = NETLINK_REQUEST_FLAGS,
727 },
728 .t =
729 {
730 .tcm_family = AF_UNSPEC,
731 .tcm_ifindex = ifIndex,
732 .tcm_handle = TC_H_UNSPEC,
733 .tcm_parent = TC_H_MAKE(TC_H_CLSACT, ingress ? TC_H_MIN_INGRESS
734 : TC_H_MIN_EGRESS),
735 .tcm_info =
736 static_cast<__u32>((static_cast<uint16_t>(prio) << 16) |
737 htons(static_cast<uint16_t>(proto))),
738 },
739 };
740
741 return sendAndProcessNetlinkResponse(&req, sizeof(req));
742}
743
744} // namespace android