Maciej Żenczykowski | a728a70 | 2021-01-11 19:08:33 -0800 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2021 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #pragma once |
| 18 | |
Maciej Żenczykowski | 52018c8 | 2024-06-04 16:05:16 +0000 | [diff] [blame] | 19 | #include <stdlib.h> |
| 20 | #include <unistd.h> |
Maciej Żenczykowski | a728a70 | 2021-01-11 19:08:33 -0800 | [diff] [blame] | 21 | #include <linux/bpf.h> |
| 22 | #include <linux/unistd.h> |
Maciej Żenczykowski | 52018c8 | 2024-06-04 16:05:16 +0000 | [diff] [blame] | 23 | #include <sys/file.h> |
| 24 | |
Maciej Żenczykowski | a728a70 | 2021-01-11 19:08:33 -0800 | [diff] [blame] | 25 | #ifdef BPF_FD_JUST_USE_INT |
| 26 | #define BPF_FD_TYPE int |
| 27 | #define BPF_FD_TO_U32(x) static_cast<__u32>(x) |
| 28 | #else |
| 29 | #include <android-base/unique_fd.h> |
| 30 | #define BPF_FD_TYPE base::unique_fd& |
| 31 | #define BPF_FD_TO_U32(x) static_cast<__u32>((x).get()) |
| 32 | #endif |
| 33 | |
Maciej Żenczykowski | a728a70 | 2021-01-11 19:08:33 -0800 | [diff] [blame] | 34 | namespace android { |
| 35 | namespace bpf { |
| 36 | |
Maciej Żenczykowski | be02054 | 2023-06-13 18:47:57 -0700 | [diff] [blame] | 37 | inline uint64_t ptr_to_u64(const void * const x) { |
| 38 | return (uint64_t)(uintptr_t)x; |
| 39 | } |
| 40 | |
Maciej Żenczykowski | a728a70 | 2021-01-11 19:08:33 -0800 | [diff] [blame] | 41 | /* Note: bpf_attr is a union which might have a much larger size then the anonymous struct portion |
| 42 | * of it that we are using. The kernel's bpf() system call will perform a strict check to ensure |
| 43 | * all unused portions are zero. It will fail with E2BIG if we don't fully zero bpf_attr. |
| 44 | */ |
| 45 | |
Maciej Żenczykowski | d8c03fe | 2022-12-16 20:59:00 +0000 | [diff] [blame] | 46 | inline int bpf(enum bpf_cmd cmd, const bpf_attr& attr) { |
Maciej Żenczykowski | a728a70 | 2021-01-11 19:08:33 -0800 | [diff] [blame] | 47 | return syscall(__NR_bpf, cmd, &attr, sizeof(attr)); |
| 48 | } |
| 49 | |
Maciej Żenczykowski | 340e2ff | 2023-10-03 07:25:38 +0000 | [diff] [blame] | 50 | // this version is meant for use with cmd's which mutate the argument |
| 51 | inline int bpf(enum bpf_cmd cmd, bpf_attr *attr) { |
| 52 | return syscall(__NR_bpf, cmd, attr, sizeof(*attr)); |
| 53 | } |
| 54 | |
Maciej Żenczykowski | a728a70 | 2021-01-11 19:08:33 -0800 | [diff] [blame] | 55 | inline int createMap(bpf_map_type map_type, uint32_t key_size, uint32_t value_size, |
| 56 | uint32_t max_entries, uint32_t map_flags) { |
| 57 | return bpf(BPF_MAP_CREATE, { |
| 58 | .map_type = map_type, |
| 59 | .key_size = key_size, |
| 60 | .value_size = value_size, |
| 61 | .max_entries = max_entries, |
| 62 | .map_flags = map_flags, |
| 63 | }); |
| 64 | } |
| 65 | |
Maciej Żenczykowski | c6e4122 | 2023-06-12 22:50:02 -0700 | [diff] [blame] | 66 | // Note: |
| 67 | // 'map_type' must be one of BPF_MAP_TYPE_{ARRAY,HASH}_OF_MAPS |
| 68 | // 'value_size' must be sizeof(u32), ie. 4 |
| 69 | // 'inner_map_fd' is basically a template specifying {map_type, key_size, value_size, max_entries, map_flags} |
| 70 | // of the inner map type (and possibly only key_size/value_size actually matter?). |
| 71 | inline int createOuterMap(bpf_map_type map_type, uint32_t key_size, uint32_t value_size, |
| 72 | uint32_t max_entries, uint32_t map_flags, const BPF_FD_TYPE inner_map_fd) { |
| 73 | return bpf(BPF_MAP_CREATE, { |
| 74 | .map_type = map_type, |
| 75 | .key_size = key_size, |
| 76 | .value_size = value_size, |
| 77 | .max_entries = max_entries, |
| 78 | .map_flags = map_flags, |
| 79 | .inner_map_fd = BPF_FD_TO_U32(inner_map_fd), |
| 80 | }); |
| 81 | } |
| 82 | |
Maciej Żenczykowski | a728a70 | 2021-01-11 19:08:33 -0800 | [diff] [blame] | 83 | inline int writeToMapEntry(const BPF_FD_TYPE map_fd, const void* key, const void* value, |
| 84 | uint64_t flags) { |
| 85 | return bpf(BPF_MAP_UPDATE_ELEM, { |
| 86 | .map_fd = BPF_FD_TO_U32(map_fd), |
| 87 | .key = ptr_to_u64(key), |
| 88 | .value = ptr_to_u64(value), |
| 89 | .flags = flags, |
| 90 | }); |
| 91 | } |
| 92 | |
| 93 | inline int findMapEntry(const BPF_FD_TYPE map_fd, const void* key, void* value) { |
| 94 | return bpf(BPF_MAP_LOOKUP_ELEM, { |
| 95 | .map_fd = BPF_FD_TO_U32(map_fd), |
| 96 | .key = ptr_to_u64(key), |
| 97 | .value = ptr_to_u64(value), |
| 98 | }); |
| 99 | } |
| 100 | |
| 101 | inline int deleteMapEntry(const BPF_FD_TYPE map_fd, const void* key) { |
| 102 | return bpf(BPF_MAP_DELETE_ELEM, { |
| 103 | .map_fd = BPF_FD_TO_U32(map_fd), |
| 104 | .key = ptr_to_u64(key), |
| 105 | }); |
| 106 | } |
| 107 | |
| 108 | inline int getNextMapKey(const BPF_FD_TYPE map_fd, const void* key, void* next_key) { |
| 109 | return bpf(BPF_MAP_GET_NEXT_KEY, { |
| 110 | .map_fd = BPF_FD_TO_U32(map_fd), |
| 111 | .key = ptr_to_u64(key), |
| 112 | .next_key = ptr_to_u64(next_key), |
| 113 | }); |
| 114 | } |
| 115 | |
| 116 | inline int getFirstMapKey(const BPF_FD_TYPE map_fd, void* firstKey) { |
| 117 | return getNextMapKey(map_fd, NULL, firstKey); |
| 118 | } |
| 119 | |
| 120 | inline int bpfFdPin(const BPF_FD_TYPE map_fd, const char* pathname) { |
| 121 | return bpf(BPF_OBJ_PIN, { |
| 122 | .pathname = ptr_to_u64(pathname), |
| 123 | .bpf_fd = BPF_FD_TO_U32(map_fd), |
| 124 | }); |
| 125 | } |
| 126 | |
| 127 | inline int bpfFdGet(const char* pathname, uint32_t flag) { |
| 128 | return bpf(BPF_OBJ_GET, { |
| 129 | .pathname = ptr_to_u64(pathname), |
| 130 | .file_flags = flag, |
| 131 | }); |
| 132 | } |
| 133 | |
Maciej Żenczykowski | 52018c8 | 2024-06-04 16:05:16 +0000 | [diff] [blame] | 134 | int bpfGetFdMapId(const BPF_FD_TYPE map_fd); |
| 135 | |
| 136 | inline int bpfLock(int fd, short type) { |
Maciej Żenczykowski | 52018c8 | 2024-06-04 16:05:16 +0000 | [diff] [blame] | 137 | if (fd < 0) return fd; // pass any errors straight through |
Maciej Żenczykowski | 04fb386 | 2024-06-15 00:14:16 +0000 | [diff] [blame] | 138 | #ifdef BPF_MAP_LOCKLESS_FOR_TEST |
| 139 | return fd; |
| 140 | #endif |
Maciej Żenczykowski | 52018c8 | 2024-06-04 16:05:16 +0000 | [diff] [blame] | 141 | #ifdef BPF_FD_JUST_USE_INT |
| 142 | int mapId = bpfGetFdMapId(fd); |
Maciej Żenczykowski | 4acfa1f | 2024-06-14 14:16:31 -0700 | [diff] [blame] | 143 | int saved_errno = errno; |
Maciej Żenczykowski | 52018c8 | 2024-06-04 16:05:16 +0000 | [diff] [blame] | 144 | #else |
| 145 | base::unique_fd ufd(fd); |
| 146 | int mapId = bpfGetFdMapId(ufd); |
Maciej Żenczykowski | 4acfa1f | 2024-06-14 14:16:31 -0700 | [diff] [blame] | 147 | int saved_errno = errno; |
Maciej Żenczykowski | 52018c8 | 2024-06-04 16:05:16 +0000 | [diff] [blame] | 148 | (void)ufd.release(); |
| 149 | #endif |
Maciej Żenczykowski | 4acfa1f | 2024-06-14 14:16:31 -0700 | [diff] [blame] | 150 | // 4.14+ required to fetch map id, but we don't want to call isAtLeastKernelVersion |
| 151 | if (mapId == -1 && saved_errno == EINVAL) return fd; |
Maciej Żenczykowski | 52018c8 | 2024-06-04 16:05:16 +0000 | [diff] [blame] | 152 | if (mapId <= 0) abort(); // should not be possible |
| 153 | |
| 154 | // on __LP64__ (aka. 64-bit userspace) 'struct flock64' is the same as 'struct flock' |
| 155 | struct flock64 fl = { |
| 156 | .l_type = type, // short: F_{RD,WR,UN}LCK |
| 157 | .l_whence = SEEK_SET, // short: SEEK_{SET,CUR,END} |
| 158 | .l_start = mapId, // off_t: start offset |
| 159 | .l_len = 1, // off_t: number of bytes |
| 160 | }; |
| 161 | |
| 162 | // see: bionic/libc/bionic/fcntl.cpp: iff !__LP64__ this uses fcntl64 |
| 163 | int ret = fcntl(fd, F_OFD_SETLK, &fl); |
| 164 | if (!ret) return fd; // success |
| 165 | close(fd); |
| 166 | return ret; // most likely -1 with errno == EAGAIN, due to already held lock |
| 167 | } |
| 168 | |
Maciej Żenczykowski | 0fff839 | 2024-06-15 02:43:12 -0700 | [diff] [blame] | 169 | inline int mapRetrieveLocklessRW(const char* pathname) { |
| 170 | return bpfFdGet(pathname, 0); |
Maciej Żenczykowski | 52018c8 | 2024-06-04 16:05:16 +0000 | [diff] [blame] | 171 | } |
Maciej Żenczykowski | 0fff839 | 2024-06-15 02:43:12 -0700 | [diff] [blame] | 172 | |
| 173 | inline int mapRetrieveExclusiveRW(const char* pathname) { |
| 174 | return bpfLock(mapRetrieveLocklessRW(pathname), F_WRLCK); |
| 175 | } |
| 176 | |
Maciej Żenczykowski | a728a70 | 2021-01-11 19:08:33 -0800 | [diff] [blame] | 177 | inline int mapRetrieveRW(const char* pathname) { |
Maciej Żenczykowski | 0fff839 | 2024-06-15 02:43:12 -0700 | [diff] [blame] | 178 | return bpfLock(mapRetrieveLocklessRW(pathname), F_RDLCK); |
Maciej Żenczykowski | a728a70 | 2021-01-11 19:08:33 -0800 | [diff] [blame] | 179 | } |
| 180 | |
| 181 | inline int mapRetrieveRO(const char* pathname) { |
Maciej Żenczykowski | dfef229 | 2024-06-04 13:48:36 +0000 | [diff] [blame] | 182 | return bpfFdGet(pathname, BPF_F_RDONLY); |
Maciej Żenczykowski | a728a70 | 2021-01-11 19:08:33 -0800 | [diff] [blame] | 183 | } |
| 184 | |
Maciej Żenczykowski | 52018c8 | 2024-06-04 16:05:16 +0000 | [diff] [blame] | 185 | // WARNING: it's impossible to grab a shared (ie. read) lock on a write-only fd, |
Maciej Żenczykowski | 7eb7d67 | 2024-06-14 13:55:09 -0700 | [diff] [blame] | 186 | // so we instead choose to grab an exclusive (ie. write) lock. |
Maciej Żenczykowski | a728a70 | 2021-01-11 19:08:33 -0800 | [diff] [blame] | 187 | inline int mapRetrieveWO(const char* pathname) { |
Maciej Żenczykowski | 7eb7d67 | 2024-06-14 13:55:09 -0700 | [diff] [blame] | 188 | return bpfLock(bpfFdGet(pathname, BPF_F_WRONLY), F_WRLCK); |
Maciej Żenczykowski | a728a70 | 2021-01-11 19:08:33 -0800 | [diff] [blame] | 189 | } |
| 190 | |
| 191 | inline int retrieveProgram(const char* pathname) { |
| 192 | return bpfFdGet(pathname, BPF_F_RDONLY); |
| 193 | } |
| 194 | |
Maciej Żenczykowski | e950f6d | 2024-04-26 11:52:25 -0700 | [diff] [blame] | 195 | inline bool usableProgram(const char* pathname) { |
| 196 | int fd = retrieveProgram(pathname); |
| 197 | bool ok = (fd >= 0); |
| 198 | if (ok) close(fd); |
| 199 | return ok; |
| 200 | } |
| 201 | |
Maciej Żenczykowski | a728a70 | 2021-01-11 19:08:33 -0800 | [diff] [blame] | 202 | inline int attachProgram(bpf_attach_type type, const BPF_FD_TYPE prog_fd, |
KaiWen Zheng | cfe2f2a | 2022-02-08 09:38:50 +0800 | [diff] [blame] | 203 | const BPF_FD_TYPE cg_fd, uint32_t flags = 0) { |
Maciej Żenczykowski | a728a70 | 2021-01-11 19:08:33 -0800 | [diff] [blame] | 204 | return bpf(BPF_PROG_ATTACH, { |
| 205 | .target_fd = BPF_FD_TO_U32(cg_fd), |
| 206 | .attach_bpf_fd = BPF_FD_TO_U32(prog_fd), |
| 207 | .attach_type = type, |
KaiWen Zheng | cfe2f2a | 2022-02-08 09:38:50 +0800 | [diff] [blame] | 208 | .attach_flags = flags, |
Maciej Żenczykowski | a728a70 | 2021-01-11 19:08:33 -0800 | [diff] [blame] | 209 | }); |
| 210 | } |
| 211 | |
| 212 | inline int detachProgram(bpf_attach_type type, const BPF_FD_TYPE cg_fd) { |
| 213 | return bpf(BPF_PROG_DETACH, { |
| 214 | .target_fd = BPF_FD_TO_U32(cg_fd), |
| 215 | .attach_type = type, |
| 216 | }); |
| 217 | } |
| 218 | |
Maciej Żenczykowski | 340e2ff | 2023-10-03 07:25:38 +0000 | [diff] [blame] | 219 | inline int queryProgram(const BPF_FD_TYPE cg_fd, |
| 220 | enum bpf_attach_type attach_type, |
| 221 | __u32 query_flags = 0, |
| 222 | __u32 attach_flags = 0) { |
| 223 | int prog_id = -1; // equivalent to an array of one integer. |
| 224 | bpf_attr arg = { |
| 225 | .query = { |
| 226 | .target_fd = BPF_FD_TO_U32(cg_fd), |
| 227 | .attach_type = attach_type, |
| 228 | .query_flags = query_flags, |
| 229 | .attach_flags = attach_flags, |
| 230 | .prog_ids = ptr_to_u64(&prog_id), // pointer to output array |
| 231 | .prog_cnt = 1, // in: space - nr of ints in the array, out: used |
| 232 | } |
| 233 | }; |
| 234 | int v = bpf(BPF_PROG_QUERY, &arg); |
| 235 | if (v) return v; // error case |
| 236 | if (!arg.query.prog_cnt) return 0; // no program, kernel never returns zero id |
| 237 | return prog_id; // return actual id |
| 238 | } |
| 239 | |
KaiWen Zheng | cfe2f2a | 2022-02-08 09:38:50 +0800 | [diff] [blame] | 240 | inline int detachSingleProgram(bpf_attach_type type, const BPF_FD_TYPE prog_fd, |
| 241 | const BPF_FD_TYPE cg_fd) { |
| 242 | return bpf(BPF_PROG_DETACH, { |
| 243 | .target_fd = BPF_FD_TO_U32(cg_fd), |
| 244 | .attach_bpf_fd = BPF_FD_TO_U32(prog_fd), |
| 245 | .attach_type = type, |
| 246 | }); |
| 247 | } |
| 248 | |
Ryan Zuklie | 2669e24 | 2022-11-30 11:12:41 -0800 | [diff] [blame] | 249 | // Available in 4.12 and later kernels. |
| 250 | inline int runProgram(const BPF_FD_TYPE prog_fd, const void* data, |
| 251 | const uint32_t data_size) { |
| 252 | return bpf(BPF_PROG_RUN, { |
| 253 | .test = { |
| 254 | .prog_fd = BPF_FD_TO_U32(prog_fd), |
Ryan Zuklie | 2669e24 | 2022-11-30 11:12:41 -0800 | [diff] [blame] | 255 | .data_size_in = data_size, |
Maciej Żenczykowski | 325f675 | 2023-09-06 23:50:47 +0000 | [diff] [blame] | 256 | .data_in = ptr_to_u64(data), |
Ryan Zuklie | 2669e24 | 2022-11-30 11:12:41 -0800 | [diff] [blame] | 257 | }, |
| 258 | }); |
| 259 | } |
| 260 | |
Maciej Żenczykowski | 0ce7758 | 2022-06-20 18:11:03 -0700 | [diff] [blame] | 261 | // BPF_OBJ_GET_INFO_BY_FD requires 4.14+ kernel |
| 262 | // |
| 263 | // Note: some fields are only defined in newer kernels (ie. the map_info struct grows |
| 264 | // over time), so we need to check that the field we're interested in is actually |
| 265 | // supported/returned by the running kernel. We do this by checking it is fully |
| 266 | // within the bounds of the struct size as reported by the kernel. |
Maciej Żenczykowski | 008f51e | 2023-06-13 18:27:57 -0700 | [diff] [blame] | 267 | #define DEFINE_BPF_GET_FD(TYPE, NAME, FIELD) \ |
| 268 | inline int bpfGetFd ## NAME(const BPF_FD_TYPE fd) { \ |
| 269 | struct bpf_ ## TYPE ## _info info = {}; \ |
Maciej Żenczykowski | 5c5fae7 | 2022-05-25 12:58:31 -0700 | [diff] [blame] | 270 | union bpf_attr attr = { .info = { \ |
Maciej Żenczykowski | 008f51e | 2023-06-13 18:27:57 -0700 | [diff] [blame] | 271 | .bpf_fd = BPF_FD_TO_U32(fd), \ |
| 272 | .info_len = sizeof(info), \ |
| 273 | .info = ptr_to_u64(&info), \ |
Maciej Żenczykowski | 5c5fae7 | 2022-05-25 12:58:31 -0700 | [diff] [blame] | 274 | }}; \ |
| 275 | int rv = bpf(BPF_OBJ_GET_INFO_BY_FD, attr); \ |
| 276 | if (rv) return rv; \ |
Maciej Żenczykowski | 008f51e | 2023-06-13 18:27:57 -0700 | [diff] [blame] | 277 | if (attr.info.info_len < offsetof(bpf_ ## TYPE ## _info, FIELD) + sizeof(info.FIELD)) { \ |
Maciej Żenczykowski | 5c5fae7 | 2022-05-25 12:58:31 -0700 | [diff] [blame] | 278 | errno = EOPNOTSUPP; \ |
| 279 | return -1; \ |
| 280 | }; \ |
Maciej Żenczykowski | 008f51e | 2023-06-13 18:27:57 -0700 | [diff] [blame] | 281 | return info.FIELD; \ |
Maciej Żenczykowski | 5c5fae7 | 2022-05-25 12:58:31 -0700 | [diff] [blame] | 282 | } |
| 283 | |
Maciej Żenczykowski | 008f51e | 2023-06-13 18:27:57 -0700 | [diff] [blame] | 284 | // All 7 of these fields are already present in Linux v4.14 (even ACK 4.14-P) |
Maciej Żenczykowski | 5c5fae7 | 2022-05-25 12:58:31 -0700 | [diff] [blame] | 285 | // while BPF_OBJ_GET_INFO_BY_FD is not implemented at all in v4.9 (even ACK 4.9-Q) |
Maciej Żenczykowski | 008f51e | 2023-06-13 18:27:57 -0700 | [diff] [blame] | 286 | DEFINE_BPF_GET_FD(map, MapType, type) // int bpfGetFdMapType(const BPF_FD_TYPE map_fd) |
| 287 | DEFINE_BPF_GET_FD(map, MapId, id) // int bpfGetFdMapId(const BPF_FD_TYPE map_fd) |
| 288 | DEFINE_BPF_GET_FD(map, KeySize, key_size) // int bpfGetFdKeySize(const BPF_FD_TYPE map_fd) |
| 289 | DEFINE_BPF_GET_FD(map, ValueSize, value_size) // int bpfGetFdValueSize(const BPF_FD_TYPE map_fd) |
| 290 | DEFINE_BPF_GET_FD(map, MaxEntries, max_entries) // int bpfGetFdMaxEntries(const BPF_FD_TYPE map_fd) |
| 291 | DEFINE_BPF_GET_FD(map, MapFlags, map_flags) // int bpfGetFdMapFlags(const BPF_FD_TYPE map_fd) |
| 292 | DEFINE_BPF_GET_FD(prog, ProgId, id) // int bpfGetFdProgId(const BPF_FD_TYPE prog_fd) |
Maciej Żenczykowski | 5c5fae7 | 2022-05-25 12:58:31 -0700 | [diff] [blame] | 293 | |
Maciej Żenczykowski | 008f51e | 2023-06-13 18:27:57 -0700 | [diff] [blame] | 294 | #undef DEFINE_BPF_GET_FD |
Maciej Żenczykowski | 5c5fae7 | 2022-05-25 12:58:31 -0700 | [diff] [blame] | 295 | |
Maciej Żenczykowski | a728a70 | 2021-01-11 19:08:33 -0800 | [diff] [blame] | 296 | } // namespace bpf |
| 297 | } // namespace android |
| 298 | |
Maciej Żenczykowski | a728a70 | 2021-01-11 19:08:33 -0800 | [diff] [blame] | 299 | #undef BPF_FD_TO_U32 |
| 300 | #undef BPF_FD_TYPE |
| 301 | #undef BPF_FD_JUST_USE_INT |