blob: 4084933e7674a930854d6ccf1c9039a0f559a411 [file] [log] [blame]
Tyler Wear72388212021-09-09 14:49:02 -07001/*
2 * Copyright (C) 2021 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
Maciej Żenczykowski11141da2024-03-15 18:21:33 -070017// The resulting .o needs to load on Android T+
Maciej Żenczykowski4e4f8722024-06-15 06:38:08 -070018#define BPFLOADER_MIN_VER BPFLOADER_MAINLINE_T_VERSION
Maciej Żenczykowskiacebffb2022-05-16 16:05:15 -070019
Maciej Żenczykowski85c9c992024-08-16 17:57:36 -070020#include "bpf_net_helpers.h"
Ken Chen74ff3ee2022-07-14 16:46:39 +080021#include "dscpPolicy.h"
Tyler Wear72388212021-09-09 14:49:02 -070022
Tyler Wear11f494f2022-06-14 16:04:49 -070023#define ECN_MASK 3
Maciej Żenczykowskid7b92c02022-07-27 19:57:15 +000024#define UPDATE_TOS(dscp, tos) ((dscp) << 2) | ((tos) & ECN_MASK)
Tyler Wear11f494f2022-06-14 16:04:49 -070025
Maciej Żenczykowski52ff2b62024-08-27 18:17:33 -070026// The cache is never read nor written by userspace and is indexed by socket cookie % CACHE_MAP_SIZE
27#define CACHE_MAP_SIZE 32 // should be a power of two so we can % cheaply
28DEFINE_BPF_MAP_GRO(socket_policy_cache_map, PERCPU_ARRAY, uint32_t, RuleEntry, CACHE_MAP_SIZE,
29 AID_SYSTEM)
Tyler Wear72388212021-09-09 14:49:02 -070030
Tyler Wear11f494f2022-06-14 16:04:49 -070031DEFINE_BPF_MAP_GRW(ipv4_dscp_policies_map, ARRAY, uint32_t, DscpPolicy, MAX_POLICIES, AID_SYSTEM)
32DEFINE_BPF_MAP_GRW(ipv6_dscp_policies_map, ARRAY, uint32_t, DscpPolicy, MAX_POLICIES, AID_SYSTEM)
Tyler Wear3ad80892022-02-03 15:14:44 -080033
Maciej Żenczykowski1ab3ad82024-08-22 17:30:20 +000034static inline __always_inline void match_policy(struct __sk_buff* skb, const bool ipv4) {
Tyler Wear3ad80892022-02-03 15:14:44 -080035 void* data = (void*)(long)skb->data;
36 const void* data_end = (void*)(long)skb->data_end;
37
Patrick Rohr7f325cc2022-07-25 10:15:02 -070038 const int l2_header_size = sizeof(struct ethhdr);
39 struct ethhdr* eth = data;
Tyler Wear3ad80892022-02-03 15:14:44 -080040
41 if (data + l2_header_size > data_end) return;
42
Tyler Wear3ad80892022-02-03 15:14:44 -080043 int hdr_size = 0;
Tyler Wear72388212021-09-09 14:49:02 -070044
45 // used for map lookup
46 uint64_t cookie = bpf_get_socket_cookie(skb);
Tyler Wear11f494f2022-06-14 16:04:49 -070047 if (!cookie) return;
Tyler Wear72388212021-09-09 14:49:02 -070048
Maciej Żenczykowski52ff2b62024-08-27 18:17:33 -070049 uint32_t cacheid = cookie % CACHE_MAP_SIZE;
50
Maciej Żenczykowski640752b2022-08-09 23:02:57 +000051 __be16 sport = 0;
Tyler Wear3ad80892022-02-03 15:14:44 -080052 uint16_t dport = 0;
Tyler Wear11f494f2022-06-14 16:04:49 -070053 uint8_t protocol = 0; // TODO: Use are reserved value? Or int (-1) and cast to uint below?
Tyler Wear92281052022-06-22 15:32:14 -070054 struct in6_addr src_ip = {};
55 struct in6_addr dst_ip = {};
Maciej Żenczykowski242af392022-08-22 09:11:10 +000056 uint8_t tos = 0; // Only used for IPv4
57 __be32 old_first_be32 = 0; // Only used for IPv6
Tyler Wear3ad80892022-02-03 15:14:44 -080058 if (ipv4) {
Patrick Rohr7f325cc2022-07-25 10:15:02 -070059 const struct iphdr* const iph = (void*)(eth + 1);
Tyler Wear11f494f2022-06-14 16:04:49 -070060 hdr_size = l2_header_size + sizeof(struct iphdr);
Tyler Wear72388212021-09-09 14:49:02 -070061 // Must have ipv4 header
Tyler Wear11f494f2022-06-14 16:04:49 -070062 if (data + hdr_size > data_end) return;
Tyler Wear72388212021-09-09 14:49:02 -070063
64 // IP version must be 4
Tyler Wear3ad80892022-02-03 15:14:44 -080065 if (iph->version != 4) return;
Tyler Wear72388212021-09-09 14:49:02 -070066
67 // We cannot handle IP options, just standard 20 byte == 5 dword minimal IPv4 header
Tyler Wear3ad80892022-02-03 15:14:44 -080068 if (iph->ihl != 5) return;
Tyler Wear72388212021-09-09 14:49:02 -070069
Tyler Wear3ad80892022-02-03 15:14:44 -080070 // V4 mapped address in in6_addr sets 10/11 position to 0xff.
Tyler Wear92281052022-06-22 15:32:14 -070071 src_ip.s6_addr32[2] = htonl(0x0000ffff);
72 dst_ip.s6_addr32[2] = htonl(0x0000ffff);
Tyler Wear72388212021-09-09 14:49:02 -070073
Tyler Wear3ad80892022-02-03 15:14:44 -080074 // Copy IPv4 address into in6_addr for easy comparison below.
Tyler Wear92281052022-06-22 15:32:14 -070075 src_ip.s6_addr32[3] = iph->saddr;
76 dst_ip.s6_addr32[3] = iph->daddr;
Tyler Wear3ad80892022-02-03 15:14:44 -080077 protocol = iph->protocol;
78 tos = iph->tos;
Tyler Wear3ad80892022-02-03 15:14:44 -080079 } else {
Patrick Rohr7f325cc2022-07-25 10:15:02 -070080 struct ipv6hdr* ip6h = (void*)(eth + 1);
Tyler Wear11f494f2022-06-14 16:04:49 -070081 hdr_size = l2_header_size + sizeof(struct ipv6hdr);
Tyler Wear3ad80892022-02-03 15:14:44 -080082 // Must have ipv6 header
Tyler Wear11f494f2022-06-14 16:04:49 -070083 if (data + hdr_size > data_end) return;
Tyler Wear72388212021-09-09 14:49:02 -070084
Tyler Wear3ad80892022-02-03 15:14:44 -080085 if (ip6h->version != 6) return;
Tyler Wear72388212021-09-09 14:49:02 -070086
Tyler Wear92281052022-06-22 15:32:14 -070087 src_ip = ip6h->saddr;
88 dst_ip = ip6h->daddr;
Tyler Wear3ad80892022-02-03 15:14:44 -080089 protocol = ip6h->nexthdr;
Maciej Żenczykowski242af392022-08-22 09:11:10 +000090 old_first_be32 = *(__be32*)ip6h;
Tyler Wear3ad80892022-02-03 15:14:44 -080091 }
Tyler Wear72388212021-09-09 14:49:02 -070092
Tyler Wear3ad80892022-02-03 15:14:44 -080093 switch (protocol) {
94 case IPPROTO_UDP:
Tyler Wear11f494f2022-06-14 16:04:49 -070095 case IPPROTO_UDPLITE: {
96 struct udphdr* udp;
Tyler Wear3ad80892022-02-03 15:14:44 -080097 udp = data + hdr_size;
98 if ((void*)(udp + 1) > data_end) return;
99 sport = udp->source;
Maciej Żenczykowski640752b2022-08-09 23:02:57 +0000100 dport = ntohs(udp->dest);
Tyler Wear11f494f2022-06-14 16:04:49 -0700101 } break;
102 case IPPROTO_TCP: {
103 struct tcphdr* tcp;
Tyler Wear3ad80892022-02-03 15:14:44 -0800104 tcp = data + hdr_size;
105 if ((void*)(tcp + 1) > data_end) return;
106 sport = tcp->source;
Maciej Żenczykowski640752b2022-08-09 23:02:57 +0000107 dport = ntohs(tcp->dest);
Tyler Wear11f494f2022-06-14 16:04:49 -0700108 } break;
Tyler Wear3ad80892022-02-03 15:14:44 -0800109 default:
110 return;
111 }
112
Maciej Żenczykowski52ff2b62024-08-27 18:17:33 -0700113 // this array lookup cannot actually fail
114 RuleEntry* existing_rule = bpf_socket_policy_cache_map_lookup_elem(&cacheid);
Tyler Wear3ad80892022-02-03 15:14:44 -0800115
Maciej Żenczykowskibbb54992024-08-29 18:15:30 -0700116 if (!existing_rule) return; // impossible
117
118 if (v6_equal(src_ip, existing_rule->src_ip) &&
Maciej Żenczykowski1feaa432022-07-29 21:17:07 +0000119 v6_equal(dst_ip, existing_rule->dst_ip) &&
120 skb->ifindex == existing_rule->ifindex &&
121 sport == existing_rule->src_port &&
122 dport == existing_rule->dst_port &&
123 protocol == existing_rule->proto) {
Maciej Żenczykowskid7b92c02022-07-27 19:57:15 +0000124 if (existing_rule->dscp_val < 0) return;
Tyler Wear3ad80892022-02-03 15:14:44 -0800125 if (ipv4) {
Tyler Wear92281052022-06-22 15:32:14 -0700126 uint8_t newTos = UPDATE_TOS(existing_rule->dscp_val, tos);
Maciej Żenczykowski85c9c992024-08-16 17:57:36 -0700127 bpf_l3_csum_replace(skb, l2_header_size + IP4_OFFSET(check), htons(tos), htons(newTos),
Tyler Wear11f494f2022-06-14 16:04:49 -0700128 sizeof(uint16_t));
Maciej Żenczykowski85c9c992024-08-16 17:57:36 -0700129 bpf_skb_store_bytes(skb, l2_header_size + IP4_OFFSET(tos), &newTos, sizeof(newTos), 0);
Tyler Wear3ad80892022-02-03 15:14:44 -0800130 } else {
Maciej Żenczykowski242af392022-08-22 09:11:10 +0000131 __be32 new_first_be32 =
132 htonl(ntohl(old_first_be32) & 0xF03FFFFF | (existing_rule->dscp_val << 22));
133 bpf_skb_store_bytes(skb, l2_header_size, &new_first_be32, sizeof(__be32),
Tyler Wear4e8949b2022-06-23 14:15:58 -0700134 BPF_F_RECOMPUTE_CSUM);
Tyler Wear3ad80892022-02-03 15:14:44 -0800135 }
136 return;
137 }
138
139 // Linear scan ipv4_dscp_policies_map since no stored params match skb.
Maciej Żenczykowskid7b92c02022-07-27 19:57:15 +0000140 int best_score = 0;
141 int8_t new_dscp = -1;
Tyler Wear3ad80892022-02-03 15:14:44 -0800142
143 for (register uint64_t i = 0; i < MAX_POLICIES; i++) {
Tyler Wear3ad80892022-02-03 15:14:44 -0800144 // Using a uint64 in for loop prevents infinite loop during BPF load,
145 // but the key is uint32, so convert back.
146 uint32_t key = i;
147
148 DscpPolicy* policy;
149 if (ipv4) {
150 policy = bpf_ipv4_dscp_policies_map_lookup_elem(&key);
151 } else {
152 policy = bpf_ipv6_dscp_policies_map_lookup_elem(&key);
Tyler Wear72388212021-09-09 14:49:02 -0700153 }
154
Maciej Żenczykowski1ab3ad82024-08-22 17:30:20 +0000155 // Lookup failure cannot happen on an array with MAX_POLICIES entries.
156 // While 'continue' would make logical sense here, 'return' should be
157 // easier for the verifier to analyze.
158 if (!policy) return;
Tyler Wear72388212021-09-09 14:49:02 -0700159
Maciej Żenczykowski1feaa432022-07-29 21:17:07 +0000160 // If policy iface index does not match skb, then skip to next policy.
161 if (policy->ifindex != skb->ifindex) continue;
Tyler Wear72388212021-09-09 14:49:02 -0700162
Maciej Żenczykowski1feaa432022-07-29 21:17:07 +0000163 int score = 0;
164
Maciej Żenczykowskid6c0db32024-08-22 18:11:30 +0000165 if (policy->match_proto) {
Maciej Żenczykowski1feaa432022-07-29 21:17:07 +0000166 if (protocol != policy->proto) continue;
167 score += 0xFFFF;
168 }
Maciej Żenczykowskid6c0db32024-08-22 18:11:30 +0000169 if (policy->match_src_ip) {
Maciej Żenczykowski1feaa432022-07-29 21:17:07 +0000170 if (v6_not_equal(src_ip, policy->src_ip)) continue;
171 score += 0xFFFF;
172 }
Maciej Żenczykowskid6c0db32024-08-22 18:11:30 +0000173 if (policy->match_dst_ip) {
Maciej Żenczykowski1feaa432022-07-29 21:17:07 +0000174 if (v6_not_equal(dst_ip, policy->dst_ip)) continue;
175 score += 0xFFFF;
176 }
Maciej Żenczykowskid6c0db32024-08-22 18:11:30 +0000177 if (policy->match_src_port) {
Maciej Żenczykowski1feaa432022-07-29 21:17:07 +0000178 if (sport != policy->src_port) continue;
179 score += 0xFFFF;
180 }
Maciej Żenczykowski640752b2022-08-09 23:02:57 +0000181 if (dport < policy->dst_port_start) continue;
182 if (dport > policy->dst_port_end) continue;
Maciej Żenczykowskia44510b2022-08-09 14:59:25 +0000183 score += 0xFFFF + policy->dst_port_start - policy->dst_port_end;
Maciej Żenczykowski1feaa432022-07-29 21:17:07 +0000184
185 if (score > best_score) {
Tyler Wear92281052022-06-22 15:32:14 -0700186 best_score = score;
Tyler Wear92281052022-06-22 15:32:14 -0700187 new_dscp = policy->dscp_val;
Tyler Wear72388212021-09-09 14:49:02 -0700188 }
Maciej Żenczykowskid7b92c02022-07-27 19:57:15 +0000189 }
Tyler Wear72388212021-09-09 14:49:02 -0700190
Maciej Żenczykowskibbb54992024-08-29 18:15:30 -0700191 // Update cache with found policy.
192 *existing_rule = (RuleEntry){
Tyler Wear92281052022-06-22 15:32:14 -0700193 .src_ip = src_ip,
194 .dst_ip = dst_ip,
Tyler Wear3ad80892022-02-03 15:14:44 -0800195 .ifindex = skb->ifindex,
Tyler Wear92281052022-06-22 15:32:14 -0700196 .src_port = sport,
197 .dst_port = dport,
Tyler Wear3ad80892022-02-03 15:14:44 -0800198 .proto = protocol,
Tyler Wear92281052022-06-22 15:32:14 -0700199 .dscp_val = new_dscp,
Tyler Wear3ad80892022-02-03 15:14:44 -0800200 };
Tyler Wear72388212021-09-09 14:49:02 -0700201
Maciej Żenczykowskid7b92c02022-07-27 19:57:15 +0000202 if (new_dscp < 0) return;
203
Tyler Wear3ad80892022-02-03 15:14:44 -0800204 // Need to store bytes after updating map or program will not load.
Tyler Wear4e8949b2022-06-23 14:15:58 -0700205 if (ipv4) {
206 uint8_t new_tos = UPDATE_TOS(new_dscp, tos);
Maciej Żenczykowski85c9c992024-08-16 17:57:36 -0700207 bpf_l3_csum_replace(skb, l2_header_size + IP4_OFFSET(check), htons(tos), htons(new_tos), 2);
208 bpf_skb_store_bytes(skb, l2_header_size + IP4_OFFSET(tos), &new_tos, sizeof(new_tos), 0);
Tyler Wear4e8949b2022-06-23 14:15:58 -0700209 } else {
Maciej Żenczykowski242af392022-08-22 09:11:10 +0000210 __be32 new_first_be32 = htonl(ntohl(old_first_be32) & 0xF03FFFFF | (new_dscp << 22));
211 bpf_skb_store_bytes(skb, l2_header_size, &new_first_be32, sizeof(__be32),
Tyler Wear4e8949b2022-06-23 14:15:58 -0700212 BPF_F_RECOMPUTE_CSUM);
Tyler Wear3ad80892022-02-03 15:14:44 -0800213 }
214 return;
215}
Tyler Wear72388212021-09-09 14:49:02 -0700216
Tyler Wear4e8949b2022-06-23 14:15:58 -0700217DEFINE_BPF_PROG_KVER("schedcls/set_dscp_ether", AID_ROOT, AID_SYSTEM, schedcls_set_dscp_ether,
Maciej Żenczykowski901c7102023-10-06 15:47:46 -0700218 KVER_5_15)
Tyler Wear3ad80892022-02-03 15:14:44 -0800219(struct __sk_buff* skb) {
Tyler Wear3ad80892022-02-03 15:14:44 -0800220 if (skb->pkt_type != PACKET_HOST) return TC_ACT_PIPE;
221
222 if (skb->protocol == htons(ETH_P_IP)) {
Patrick Rohr7f325cc2022-07-25 10:15:02 -0700223 match_policy(skb, true);
Tyler Wear3ad80892022-02-03 15:14:44 -0800224 } else if (skb->protocol == htons(ETH_P_IPV6)) {
Patrick Rohr7f325cc2022-07-25 10:15:02 -0700225 match_policy(skb, false);
Tyler Wear3ad80892022-02-03 15:14:44 -0800226 }
227
228 // Always return TC_ACT_PIPE
229 return TC_ACT_PIPE;
230}
231
Tyler Wear72388212021-09-09 14:49:02 -0700232LICENSE("Apache 2.0");
233CRITICAL("Connectivity");