blob: 73cef8913da1fe4d7f54f53324c41aa86de80298 [file] [log] [blame]
Maciej Żenczykowskia728a702021-01-11 19:08:33 -08001/*
2 * Copyright (C) 2021 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#pragma once
18
Maciej Żenczykowski52018c82024-06-04 16:05:16 +000019#include <stdlib.h>
20#include <unistd.h>
Maciej Żenczykowskia728a702021-01-11 19:08:33 -080021#include <linux/bpf.h>
22#include <linux/unistd.h>
Maciej Żenczykowski52018c82024-06-04 16:05:16 +000023#include <sys/file.h>
24
Maciej Żenczykowskia728a702021-01-11 19:08:33 -080025#ifdef BPF_FD_JUST_USE_INT
26 #define BPF_FD_TYPE int
27 #define BPF_FD_TO_U32(x) static_cast<__u32>(x)
28#else
29 #include <android-base/unique_fd.h>
30 #define BPF_FD_TYPE base::unique_fd&
31 #define BPF_FD_TO_U32(x) static_cast<__u32>((x).get())
32#endif
33
Maciej Żenczykowskia728a702021-01-11 19:08:33 -080034namespace android {
35namespace bpf {
36
Maciej Żenczykowskibe020542023-06-13 18:47:57 -070037inline uint64_t ptr_to_u64(const void * const x) {
38 return (uint64_t)(uintptr_t)x;
39}
40
Maciej Żenczykowskia728a702021-01-11 19:08:33 -080041/* Note: bpf_attr is a union which might have a much larger size then the anonymous struct portion
42 * of it that we are using. The kernel's bpf() system call will perform a strict check to ensure
43 * all unused portions are zero. It will fail with E2BIG if we don't fully zero bpf_attr.
44 */
45
Maciej Żenczykowskid8c03fe2022-12-16 20:59:00 +000046inline int bpf(enum bpf_cmd cmd, const bpf_attr& attr) {
Maciej Żenczykowskia728a702021-01-11 19:08:33 -080047 return syscall(__NR_bpf, cmd, &attr, sizeof(attr));
48}
49
Maciej Żenczykowski340e2ff2023-10-03 07:25:38 +000050// this version is meant for use with cmd's which mutate the argument
51inline int bpf(enum bpf_cmd cmd, bpf_attr *attr) {
52 return syscall(__NR_bpf, cmd, attr, sizeof(*attr));
53}
54
Maciej Żenczykowskia728a702021-01-11 19:08:33 -080055inline int createMap(bpf_map_type map_type, uint32_t key_size, uint32_t value_size,
56 uint32_t max_entries, uint32_t map_flags) {
57 return bpf(BPF_MAP_CREATE, {
58 .map_type = map_type,
59 .key_size = key_size,
60 .value_size = value_size,
61 .max_entries = max_entries,
62 .map_flags = map_flags,
63 });
64}
65
Maciej Żenczykowskic6e41222023-06-12 22:50:02 -070066// Note:
67// 'map_type' must be one of BPF_MAP_TYPE_{ARRAY,HASH}_OF_MAPS
68// 'value_size' must be sizeof(u32), ie. 4
69// 'inner_map_fd' is basically a template specifying {map_type, key_size, value_size, max_entries, map_flags}
70// of the inner map type (and possibly only key_size/value_size actually matter?).
71inline int createOuterMap(bpf_map_type map_type, uint32_t key_size, uint32_t value_size,
72 uint32_t max_entries, uint32_t map_flags, const BPF_FD_TYPE inner_map_fd) {
73 return bpf(BPF_MAP_CREATE, {
74 .map_type = map_type,
75 .key_size = key_size,
76 .value_size = value_size,
77 .max_entries = max_entries,
78 .map_flags = map_flags,
79 .inner_map_fd = BPF_FD_TO_U32(inner_map_fd),
80 });
81}
82
Maciej Żenczykowskia728a702021-01-11 19:08:33 -080083inline int writeToMapEntry(const BPF_FD_TYPE map_fd, const void* key, const void* value,
84 uint64_t flags) {
85 return bpf(BPF_MAP_UPDATE_ELEM, {
86 .map_fd = BPF_FD_TO_U32(map_fd),
87 .key = ptr_to_u64(key),
88 .value = ptr_to_u64(value),
89 .flags = flags,
90 });
91}
92
93inline int findMapEntry(const BPF_FD_TYPE map_fd, const void* key, void* value) {
94 return bpf(BPF_MAP_LOOKUP_ELEM, {
95 .map_fd = BPF_FD_TO_U32(map_fd),
96 .key = ptr_to_u64(key),
97 .value = ptr_to_u64(value),
98 });
99}
100
101inline int deleteMapEntry(const BPF_FD_TYPE map_fd, const void* key) {
102 return bpf(BPF_MAP_DELETE_ELEM, {
103 .map_fd = BPF_FD_TO_U32(map_fd),
104 .key = ptr_to_u64(key),
105 });
106}
107
108inline int getNextMapKey(const BPF_FD_TYPE map_fd, const void* key, void* next_key) {
109 return bpf(BPF_MAP_GET_NEXT_KEY, {
110 .map_fd = BPF_FD_TO_U32(map_fd),
111 .key = ptr_to_u64(key),
112 .next_key = ptr_to_u64(next_key),
113 });
114}
115
116inline int getFirstMapKey(const BPF_FD_TYPE map_fd, void* firstKey) {
117 return getNextMapKey(map_fd, NULL, firstKey);
118}
119
120inline int bpfFdPin(const BPF_FD_TYPE map_fd, const char* pathname) {
121 return bpf(BPF_OBJ_PIN, {
122 .pathname = ptr_to_u64(pathname),
123 .bpf_fd = BPF_FD_TO_U32(map_fd),
124 });
125}
126
127inline int bpfFdGet(const char* pathname, uint32_t flag) {
128 return bpf(BPF_OBJ_GET, {
129 .pathname = ptr_to_u64(pathname),
130 .file_flags = flag,
131 });
132}
133
Maciej Żenczykowski52018c82024-06-04 16:05:16 +0000134int bpfGetFdMapId(const BPF_FD_TYPE map_fd);
135
136inline int bpfLock(int fd, short type) {
Maciej Żenczykowski52018c82024-06-04 16:05:16 +0000137 if (fd < 0) return fd; // pass any errors straight through
Maciej Żenczykowski04fb3862024-06-15 00:14:16 +0000138#ifdef BPF_MAP_LOCKLESS_FOR_TEST
139 return fd;
140#endif
Maciej Żenczykowski52018c82024-06-04 16:05:16 +0000141#ifdef BPF_FD_JUST_USE_INT
142 int mapId = bpfGetFdMapId(fd);
Maciej Żenczykowski4acfa1f2024-06-14 14:16:31 -0700143 int saved_errno = errno;
Maciej Żenczykowski52018c82024-06-04 16:05:16 +0000144#else
145 base::unique_fd ufd(fd);
146 int mapId = bpfGetFdMapId(ufd);
Maciej Żenczykowski4acfa1f2024-06-14 14:16:31 -0700147 int saved_errno = errno;
Maciej Żenczykowski52018c82024-06-04 16:05:16 +0000148 (void)ufd.release();
149#endif
Maciej Żenczykowski4acfa1f2024-06-14 14:16:31 -0700150 // 4.14+ required to fetch map id, but we don't want to call isAtLeastKernelVersion
151 if (mapId == -1 && saved_errno == EINVAL) return fd;
Maciej Żenczykowski52018c82024-06-04 16:05:16 +0000152 if (mapId <= 0) abort(); // should not be possible
153
154 // on __LP64__ (aka. 64-bit userspace) 'struct flock64' is the same as 'struct flock'
155 struct flock64 fl = {
156 .l_type = type, // short: F_{RD,WR,UN}LCK
157 .l_whence = SEEK_SET, // short: SEEK_{SET,CUR,END}
158 .l_start = mapId, // off_t: start offset
159 .l_len = 1, // off_t: number of bytes
160 };
161
162 // see: bionic/libc/bionic/fcntl.cpp: iff !__LP64__ this uses fcntl64
163 int ret = fcntl(fd, F_OFD_SETLK, &fl);
164 if (!ret) return fd; // success
165 close(fd);
166 return ret; // most likely -1 with errno == EAGAIN, due to already held lock
167}
168
Maciej Żenczykowski0fff8392024-06-15 02:43:12 -0700169inline int mapRetrieveLocklessRW(const char* pathname) {
170 return bpfFdGet(pathname, 0);
Maciej Żenczykowski52018c82024-06-04 16:05:16 +0000171}
Maciej Żenczykowski0fff8392024-06-15 02:43:12 -0700172
173inline int mapRetrieveExclusiveRW(const char* pathname) {
174 return bpfLock(mapRetrieveLocklessRW(pathname), F_WRLCK);
175}
176
Maciej Żenczykowskia728a702021-01-11 19:08:33 -0800177inline int mapRetrieveRW(const char* pathname) {
Maciej Żenczykowski0fff8392024-06-15 02:43:12 -0700178 return bpfLock(mapRetrieveLocklessRW(pathname), F_RDLCK);
Maciej Żenczykowskia728a702021-01-11 19:08:33 -0800179}
180
181inline int mapRetrieveRO(const char* pathname) {
Maciej Żenczykowskidfef2292024-06-04 13:48:36 +0000182 return bpfFdGet(pathname, BPF_F_RDONLY);
Maciej Żenczykowskia728a702021-01-11 19:08:33 -0800183}
184
Maciej Żenczykowski52018c82024-06-04 16:05:16 +0000185// WARNING: it's impossible to grab a shared (ie. read) lock on a write-only fd,
Maciej Żenczykowski7eb7d672024-06-14 13:55:09 -0700186// so we instead choose to grab an exclusive (ie. write) lock.
Maciej Żenczykowskia728a702021-01-11 19:08:33 -0800187inline int mapRetrieveWO(const char* pathname) {
Maciej Żenczykowski7eb7d672024-06-14 13:55:09 -0700188 return bpfLock(bpfFdGet(pathname, BPF_F_WRONLY), F_WRLCK);
Maciej Żenczykowskia728a702021-01-11 19:08:33 -0800189}
190
191inline int retrieveProgram(const char* pathname) {
192 return bpfFdGet(pathname, BPF_F_RDONLY);
193}
194
Maciej Żenczykowskie950f6d2024-04-26 11:52:25 -0700195inline bool usableProgram(const char* pathname) {
196 int fd = retrieveProgram(pathname);
197 bool ok = (fd >= 0);
198 if (ok) close(fd);
199 return ok;
200}
201
Maciej Żenczykowskia728a702021-01-11 19:08:33 -0800202inline int attachProgram(bpf_attach_type type, const BPF_FD_TYPE prog_fd,
KaiWen Zhengcfe2f2a2022-02-08 09:38:50 +0800203 const BPF_FD_TYPE cg_fd, uint32_t flags = 0) {
Maciej Żenczykowskia728a702021-01-11 19:08:33 -0800204 return bpf(BPF_PROG_ATTACH, {
205 .target_fd = BPF_FD_TO_U32(cg_fd),
206 .attach_bpf_fd = BPF_FD_TO_U32(prog_fd),
207 .attach_type = type,
KaiWen Zhengcfe2f2a2022-02-08 09:38:50 +0800208 .attach_flags = flags,
Maciej Żenczykowskia728a702021-01-11 19:08:33 -0800209 });
210}
211
212inline int detachProgram(bpf_attach_type type, const BPF_FD_TYPE cg_fd) {
213 return bpf(BPF_PROG_DETACH, {
214 .target_fd = BPF_FD_TO_U32(cg_fd),
215 .attach_type = type,
216 });
217}
218
Maciej Żenczykowski340e2ff2023-10-03 07:25:38 +0000219inline int queryProgram(const BPF_FD_TYPE cg_fd,
220 enum bpf_attach_type attach_type,
221 __u32 query_flags = 0,
222 __u32 attach_flags = 0) {
223 int prog_id = -1; // equivalent to an array of one integer.
224 bpf_attr arg = {
225 .query = {
226 .target_fd = BPF_FD_TO_U32(cg_fd),
227 .attach_type = attach_type,
228 .query_flags = query_flags,
229 .attach_flags = attach_flags,
230 .prog_ids = ptr_to_u64(&prog_id), // pointer to output array
231 .prog_cnt = 1, // in: space - nr of ints in the array, out: used
232 }
233 };
234 int v = bpf(BPF_PROG_QUERY, &arg);
235 if (v) return v; // error case
236 if (!arg.query.prog_cnt) return 0; // no program, kernel never returns zero id
237 return prog_id; // return actual id
238}
239
KaiWen Zhengcfe2f2a2022-02-08 09:38:50 +0800240inline int detachSingleProgram(bpf_attach_type type, const BPF_FD_TYPE prog_fd,
241 const BPF_FD_TYPE cg_fd) {
242 return bpf(BPF_PROG_DETACH, {
243 .target_fd = BPF_FD_TO_U32(cg_fd),
244 .attach_bpf_fd = BPF_FD_TO_U32(prog_fd),
245 .attach_type = type,
246 });
247}
248
Ryan Zuklie2669e242022-11-30 11:12:41 -0800249// Available in 4.12 and later kernels.
250inline int runProgram(const BPF_FD_TYPE prog_fd, const void* data,
251 const uint32_t data_size) {
252 return bpf(BPF_PROG_RUN, {
253 .test = {
254 .prog_fd = BPF_FD_TO_U32(prog_fd),
Ryan Zuklie2669e242022-11-30 11:12:41 -0800255 .data_size_in = data_size,
Maciej Żenczykowski325f6752023-09-06 23:50:47 +0000256 .data_in = ptr_to_u64(data),
Ryan Zuklie2669e242022-11-30 11:12:41 -0800257 },
258 });
259}
260
Maciej Żenczykowski0ce77582022-06-20 18:11:03 -0700261// BPF_OBJ_GET_INFO_BY_FD requires 4.14+ kernel
262//
263// Note: some fields are only defined in newer kernels (ie. the map_info struct grows
264// over time), so we need to check that the field we're interested in is actually
265// supported/returned by the running kernel. We do this by checking it is fully
266// within the bounds of the struct size as reported by the kernel.
Maciej Żenczykowski008f51e2023-06-13 18:27:57 -0700267#define DEFINE_BPF_GET_FD(TYPE, NAME, FIELD) \
268inline int bpfGetFd ## NAME(const BPF_FD_TYPE fd) { \
269 struct bpf_ ## TYPE ## _info info = {}; \
Maciej Żenczykowski5c5fae72022-05-25 12:58:31 -0700270 union bpf_attr attr = { .info = { \
Maciej Żenczykowski008f51e2023-06-13 18:27:57 -0700271 .bpf_fd = BPF_FD_TO_U32(fd), \
272 .info_len = sizeof(info), \
273 .info = ptr_to_u64(&info), \
Maciej Żenczykowski5c5fae72022-05-25 12:58:31 -0700274 }}; \
275 int rv = bpf(BPF_OBJ_GET_INFO_BY_FD, attr); \
276 if (rv) return rv; \
Maciej Żenczykowski008f51e2023-06-13 18:27:57 -0700277 if (attr.info.info_len < offsetof(bpf_ ## TYPE ## _info, FIELD) + sizeof(info.FIELD)) { \
Maciej Żenczykowski5c5fae72022-05-25 12:58:31 -0700278 errno = EOPNOTSUPP; \
279 return -1; \
280 }; \
Maciej Żenczykowski008f51e2023-06-13 18:27:57 -0700281 return info.FIELD; \
Maciej Żenczykowski5c5fae72022-05-25 12:58:31 -0700282}
283
Maciej Żenczykowski008f51e2023-06-13 18:27:57 -0700284// All 7 of these fields are already present in Linux v4.14 (even ACK 4.14-P)
Maciej Żenczykowski5c5fae72022-05-25 12:58:31 -0700285// while BPF_OBJ_GET_INFO_BY_FD is not implemented at all in v4.9 (even ACK 4.9-Q)
Maciej Żenczykowski008f51e2023-06-13 18:27:57 -0700286DEFINE_BPF_GET_FD(map, MapType, type) // int bpfGetFdMapType(const BPF_FD_TYPE map_fd)
287DEFINE_BPF_GET_FD(map, MapId, id) // int bpfGetFdMapId(const BPF_FD_TYPE map_fd)
288DEFINE_BPF_GET_FD(map, KeySize, key_size) // int bpfGetFdKeySize(const BPF_FD_TYPE map_fd)
289DEFINE_BPF_GET_FD(map, ValueSize, value_size) // int bpfGetFdValueSize(const BPF_FD_TYPE map_fd)
290DEFINE_BPF_GET_FD(map, MaxEntries, max_entries) // int bpfGetFdMaxEntries(const BPF_FD_TYPE map_fd)
291DEFINE_BPF_GET_FD(map, MapFlags, map_flags) // int bpfGetFdMapFlags(const BPF_FD_TYPE map_fd)
292DEFINE_BPF_GET_FD(prog, ProgId, id) // int bpfGetFdProgId(const BPF_FD_TYPE prog_fd)
Maciej Żenczykowski5c5fae72022-05-25 12:58:31 -0700293
Maciej Żenczykowski008f51e2023-06-13 18:27:57 -0700294#undef DEFINE_BPF_GET_FD
Maciej Żenczykowski5c5fae72022-05-25 12:58:31 -0700295
Maciej Żenczykowskia728a702021-01-11 19:08:33 -0800296} // namespace bpf
297} // namespace android
298
Maciej Żenczykowskia728a702021-01-11 19:08:33 -0800299#undef BPF_FD_TO_U32
300#undef BPF_FD_TYPE
301#undef BPF_FD_JUST_USE_INT